Merge tag 'drm-intel-gt-next-2020-09-07' of git://anongit.freedesktop.org/drm/drm-intel into drm-next

(Same content as drm-intel-gt-next-2020-09-04-3, S-o-b's added)

UAPI Changes:
(- Potential implicit changes from WW locking refactoring)

Cross-subsystem Changes:
(- WW locking changes should align the i915 locking more with others)

Driver Changes:

- MAJOR: Apply WW locking across the driver (Maarten)

- Reverts for 5 commits to make applying WW locking faster (Maarten)
- Disable preparser around invalidations on Tigerlake for non-RCS engines (Chris)
- Add missing dma_fence_put() for error case of syncobj timeline (Chris)
- Parse command buffer earlier in eb_relocate(slow) to facilitate backoff (Maarten)
- Pin engine before pinning all objects (Maarten)
- Rework intel_context pinning to do everything outside of pin_mutex (Maarten)

- Avoid tracking GEM context until registered (Cc: stable, Chris)
- Provide a fastpath for waiting on vma bindings (Chris)
- Fixes to preempt-to-busy mechanism (Chris)
- Distinguish the virtual breadcrumbs from the irq breadcrumbs (Chris)
- Switch to object allocations for page directories (Chris)
- Hold context/request reference while breadcrumbs are active (Chris)
- Make sure execbuffer always passes ww state to i915_vma_pin (Maarten)

- Code refactoring to facilitate use of WW locking (Maarten)
- Locking refactoring to use more granular locking (Maarten, Chris)
- Support for multiple pinned timelines per engine (Chris)
- Move complication of I915_GEM_THROTTLE to the ioctl from general code (Chris)
- Make active tracking/vma page-directory stash work preallocated (Chris)
- Avoid flushing submission tasklet too often (Chris)
- Reduce context termination list iteration guard to RCU (Chris)
- Reductions to locking contention (Chris)
- Fixes for issues found by CI (Chris)

Signed-off-by: Dave Airlie <airlied@redhat.com>

From: Joonas Lahtinen <jlahtine@jlahtine-mobl.ger.corp.intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20200907130039.GA27766@jlahtine-mobl.ger.corp.intel.com
This commit is contained in:
Dave Airlie 2020-09-09 07:53:59 +10:00
commit 1f4b2aca79
82 changed files with 3460 additions and 2204 deletions

View file

@ -2311,7 +2311,7 @@ err:
void intel_unpin_fb_vma(struct i915_vma *vma, unsigned long flags)
{
i915_gem_object_lock(vma->obj);
i915_gem_object_lock(vma->obj, NULL);
if (flags & PLANE_HAS_FENCE)
i915_vma_unpin_fence(vma);
i915_gem_object_unpin_from_display_plane(vma);
@ -3451,7 +3451,7 @@ initial_plane_vma(struct drm_i915_private *i915,
if (IS_ERR(vma))
goto err_obj;
if (i915_ggtt_pin(vma, 0, PIN_MAPPABLE | PIN_OFFSET_FIXED | base))
if (i915_ggtt_pin(vma, NULL, 0, PIN_MAPPABLE | PIN_OFFSET_FIXED | base))
goto err_obj;
if (i915_gem_object_is_tiled(obj) &&
@ -17194,7 +17194,7 @@ static int intel_framebuffer_init(struct intel_framebuffer *intel_fb,
if (!intel_fb->frontbuffer)
return -ENOMEM;
i915_gem_object_lock(obj);
i915_gem_object_lock(obj, NULL);
tiling = i915_gem_object_get_tiling(obj);
stride = i915_gem_object_get_stride(obj);
i915_gem_object_unlock(obj);

View file

@ -32,12 +32,13 @@ static void vma_clear_pages(struct i915_vma *vma)
vma->pages = NULL;
}
static int vma_bind(struct i915_address_space *vm,
struct i915_vma *vma,
enum i915_cache_level cache_level,
u32 flags)
static void vma_bind(struct i915_address_space *vm,
struct i915_vm_pt_stash *stash,
struct i915_vma *vma,
enum i915_cache_level cache_level,
u32 flags)
{
return vm->vma_ops.bind_vma(vm, vma, cache_level, flags);
vm->vma_ops.bind_vma(vm, stash, vma, cache_level, flags);
}
static void vma_unbind(struct i915_address_space *vm, struct i915_vma *vma)
@ -157,6 +158,7 @@ static void clear_pages_worker(struct work_struct *work)
struct clear_pages_work *w = container_of(work, typeof(*w), work);
struct drm_i915_gem_object *obj = w->sleeve->vma->obj;
struct i915_vma *vma = w->sleeve->vma;
struct i915_gem_ww_ctx ww;
struct i915_request *rq;
struct i915_vma *batch;
int err = w->dma.error;
@ -172,17 +174,20 @@ static void clear_pages_worker(struct work_struct *work)
obj->read_domains = I915_GEM_GPU_DOMAINS;
obj->write_domain = 0;
err = i915_vma_pin(vma, 0, 0, PIN_USER);
if (unlikely(err))
i915_gem_ww_ctx_init(&ww, false);
intel_engine_pm_get(w->ce->engine);
retry:
err = intel_context_pin_ww(w->ce, &ww);
if (err)
goto out_signal;
batch = intel_emit_vma_fill_blt(w->ce, vma, w->value);
batch = intel_emit_vma_fill_blt(w->ce, vma, &ww, w->value);
if (IS_ERR(batch)) {
err = PTR_ERR(batch);
goto out_unpin;
goto out_ctx;
}
rq = intel_context_create_request(w->ce);
rq = i915_request_create(w->ce);
if (IS_ERR(rq)) {
err = PTR_ERR(rq);
goto out_batch;
@ -224,9 +229,19 @@ out_request:
i915_request_add(rq);
out_batch:
intel_emit_vma_release(w->ce, batch);
out_unpin:
i915_vma_unpin(vma);
out_ctx:
intel_context_unpin(w->ce);
out_signal:
if (err == -EDEADLK) {
err = i915_gem_ww_ctx_backoff(&ww);
if (!err)
goto retry;
}
i915_gem_ww_ctx_fini(&ww);
i915_vma_unpin(w->sleeve->vma);
intel_engine_pm_put(w->ce->engine);
if (unlikely(err)) {
dma_fence_set_error(&w->dma, err);
dma_fence_signal(&w->dma);
@ -234,6 +249,44 @@ out_signal:
}
}
static int pin_wait_clear_pages_work(struct clear_pages_work *w,
struct intel_context *ce)
{
struct i915_vma *vma = w->sleeve->vma;
struct i915_gem_ww_ctx ww;
int err;
i915_gem_ww_ctx_init(&ww, false);
retry:
err = i915_gem_object_lock(vma->obj, &ww);
if (err)
goto out;
err = i915_vma_pin_ww(vma, &ww, 0, 0, PIN_USER);
if (unlikely(err))
goto out;
err = i915_sw_fence_await_reservation(&w->wait,
vma->obj->base.resv, NULL,
true, 0, I915_FENCE_GFP);
if (err)
goto err_unpin_vma;
dma_resv_add_excl_fence(vma->obj->base.resv, &w->dma);
err_unpin_vma:
if (err)
i915_vma_unpin(vma);
out:
if (err == -EDEADLK) {
err = i915_gem_ww_ctx_backoff(&ww);
if (!err)
goto retry;
}
i915_gem_ww_ctx_fini(&ww);
return err;
}
static int __i915_sw_fence_call
clear_pages_work_notify(struct i915_sw_fence *fence,
enum i915_sw_fence_notify state)
@ -287,17 +340,9 @@ int i915_gem_schedule_fill_pages_blt(struct drm_i915_gem_object *obj,
dma_fence_init(&work->dma, &clear_pages_work_ops, &fence_lock, 0, 0);
i915_sw_fence_init(&work->wait, clear_pages_work_notify);
i915_gem_object_lock(obj);
err = i915_sw_fence_await_reservation(&work->wait,
obj->base.resv, NULL, true, 0,
I915_FENCE_GFP);
if (err < 0) {
err = pin_wait_clear_pages_work(work, ce);
if (err < 0)
dma_fence_set_error(&work->dma, err);
} else {
dma_resv_add_excl_fence(obj->base.resv, &work->dma);
err = 0;
}
i915_gem_object_unlock(obj);
dma_fence_get(&work->dma);
i915_sw_fence_commit(&work->wait);

View file

@ -439,29 +439,36 @@ static bool __cancel_engine(struct intel_engine_cs *engine)
return __reset_engine(engine);
}
static struct intel_engine_cs *__active_engine(struct i915_request *rq)
static bool
__active_engine(struct i915_request *rq, struct intel_engine_cs **active)
{
struct intel_engine_cs *engine, *locked;
bool ret = false;
/*
* Serialise with __i915_request_submit() so that it sees
* is-banned?, or we know the request is already inflight.
*
* Note that rq->engine is unstable, and so we double
* check that we have acquired the lock on the final engine.
*/
locked = READ_ONCE(rq->engine);
spin_lock_irq(&locked->active.lock);
while (unlikely(locked != (engine = READ_ONCE(rq->engine)))) {
spin_unlock(&locked->active.lock);
spin_lock(&engine->active.lock);
locked = engine;
spin_lock(&locked->active.lock);
}
engine = NULL;
if (i915_request_is_active(rq) && rq->fence.error != -EIO)
engine = rq->engine;
if (!i915_request_completed(rq)) {
if (i915_request_is_active(rq) && rq->fence.error != -EIO)
*active = locked;
ret = true;
}
spin_unlock_irq(&locked->active.lock);
return engine;
return ret;
}
static struct intel_engine_cs *active_engine(struct intel_context *ce)
@ -472,17 +479,16 @@ static struct intel_engine_cs *active_engine(struct intel_context *ce)
if (!ce->timeline)
return NULL;
mutex_lock(&ce->timeline->mutex);
list_for_each_entry_reverse(rq, &ce->timeline->requests, link) {
if (i915_request_completed(rq))
break;
rcu_read_lock();
list_for_each_entry_rcu(rq, &ce->timeline->requests, link) {
if (i915_request_is_active(rq) && i915_request_completed(rq))
continue;
/* Check with the backend if the request is inflight */
engine = __active_engine(rq);
if (engine)
if (__active_engine(rq, &engine))
break;
}
mutex_unlock(&ce->timeline->mutex);
rcu_read_unlock();
return engine;
}
@ -713,6 +719,7 @@ __create_context(struct drm_i915_private *i915)
ctx->i915 = i915;
ctx->sched.priority = I915_USER_PRIORITY(I915_PRIORITY_NORMAL);
mutex_init(&ctx->mutex);
INIT_LIST_HEAD(&ctx->link);
spin_lock_init(&ctx->stale.lock);
INIT_LIST_HEAD(&ctx->stale.engines);
@ -740,10 +747,6 @@ __create_context(struct drm_i915_private *i915)
for (i = 0; i < ARRAY_SIZE(ctx->hang_timestamp); i++)
ctx->hang_timestamp[i] = jiffies - CONTEXT_FAST_HANG_JIFFIES;
spin_lock(&i915->gem.contexts.lock);
list_add_tail(&ctx->link, &i915->gem.contexts.list);
spin_unlock(&i915->gem.contexts.lock);
return ctx;
err_free:
@ -889,7 +892,7 @@ i915_gem_create_context(struct drm_i915_private *i915, unsigned int flags)
if (flags & I915_CONTEXT_CREATE_FLAGS_SINGLE_TIMELINE) {
struct intel_timeline *timeline;
timeline = intel_timeline_create(&i915->gt, NULL);
timeline = intel_timeline_create(&i915->gt);
if (IS_ERR(timeline)) {
context_close(ctx);
return ERR_CAST(timeline);
@ -931,6 +934,7 @@ static int gem_context_register(struct i915_gem_context *ctx,
struct drm_i915_file_private *fpriv,
u32 *id)
{
struct drm_i915_private *i915 = ctx->i915;
struct i915_address_space *vm;
int ret;
@ -949,8 +953,16 @@ static int gem_context_register(struct i915_gem_context *ctx,
/* And finally expose ourselves to userspace via the idr */
ret = xa_alloc(&fpriv->context_xa, id, ctx, xa_limit_32b, GFP_KERNEL);
if (ret)
put_pid(fetch_and_zero(&ctx->pid));
goto err_pid;
spin_lock(&i915->gem.contexts.lock);
list_add_tail(&ctx->link, &i915->gem.contexts.list);
spin_unlock(&i915->gem.contexts.lock);
return 0;
err_pid:
put_pid(fetch_and_zero(&ctx->pid));
return ret;
}
@ -1094,6 +1106,7 @@ I915_SELFTEST_DECLARE(static intel_engine_mask_t context_barrier_inject_fault);
static int context_barrier_task(struct i915_gem_context *ctx,
intel_engine_mask_t engines,
bool (*skip)(struct intel_context *ce, void *data),
int (*pin)(struct intel_context *ce, struct i915_gem_ww_ctx *ww, void *data),
int (*emit)(struct i915_request *rq, void *data),
void (*task)(void *data),
void *data)
@ -1101,6 +1114,7 @@ static int context_barrier_task(struct i915_gem_context *ctx,
struct context_barrier_task *cb;
struct i915_gem_engines_iter it;
struct i915_gem_engines *e;
struct i915_gem_ww_ctx ww;
struct intel_context *ce;
int err = 0;
@ -1138,10 +1152,21 @@ static int context_barrier_task(struct i915_gem_context *ctx,
if (skip && skip(ce, data))
continue;
rq = intel_context_create_request(ce);
i915_gem_ww_ctx_init(&ww, true);
retry:
err = intel_context_pin_ww(ce, &ww);
if (err)
goto err;
if (pin)
err = pin(ce, &ww, data);
if (err)
goto err_unpin;
rq = i915_request_create(ce);
if (IS_ERR(rq)) {
err = PTR_ERR(rq);
break;
goto err_unpin;
}
err = 0;
@ -1151,6 +1176,16 @@ static int context_barrier_task(struct i915_gem_context *ctx,
err = i915_active_add_request(&cb->base, rq);
i915_request_add(rq);
err_unpin:
intel_context_unpin(ce);
err:
if (err == -EDEADLK) {
err = i915_gem_ww_ctx_backoff(&ww);
if (!err)
goto retry;
}
i915_gem_ww_ctx_fini(&ww);
if (err)
break;
}
@ -1206,6 +1241,17 @@ static void set_ppgtt_barrier(void *data)
i915_vm_close(old);
}
static int pin_ppgtt_update(struct intel_context *ce, struct i915_gem_ww_ctx *ww, void *data)
{
struct i915_address_space *vm = ce->vm;
if (!HAS_LOGICAL_RING_CONTEXTS(vm->i915))
/* ppGTT is not part of the legacy context image */
return gen6_ppgtt_pin(i915_vm_to_ppgtt(vm), ww);
return 0;
}
static int emit_ppgtt_update(struct i915_request *rq, void *data)
{
struct i915_address_space *vm = rq->context->vm;
@ -1262,20 +1308,10 @@ static int emit_ppgtt_update(struct i915_request *rq, void *data)
static bool skip_ppgtt_update(struct intel_context *ce, void *data)
{
if (!test_bit(CONTEXT_ALLOC_BIT, &ce->flags))
return true;
if (HAS_LOGICAL_RING_CONTEXTS(ce->engine->i915))
return false;
if (!atomic_read(&ce->pin_count))
return true;
/* ppGTT is not part of the legacy context image */
if (gen6_ppgtt_pin(i915_vm_to_ppgtt(ce->vm)))
return true;
return false;
return !ce->state;
else
return !atomic_read(&ce->pin_count);
}
static int set_ppgtt(struct drm_i915_file_private *file_priv,
@ -1326,6 +1362,7 @@ static int set_ppgtt(struct drm_i915_file_private *file_priv,
*/
err = context_barrier_task(ctx, ALL_ENGINES,
skip_ppgtt_update,
pin_ppgtt_update,
emit_ppgtt_update,
set_ppgtt_barrier,
old);

View file

@ -128,7 +128,7 @@ static int i915_gem_begin_cpu_access(struct dma_buf *dma_buf, enum dma_data_dire
if (err)
return err;
err = i915_gem_object_lock_interruptible(obj);
err = i915_gem_object_lock_interruptible(obj, NULL);
if (err)
goto out;
@ -149,7 +149,7 @@ static int i915_gem_end_cpu_access(struct dma_buf *dma_buf, enum dma_data_direct
if (err)
return err;
err = i915_gem_object_lock_interruptible(obj);
err = i915_gem_object_lock_interruptible(obj, NULL);
if (err)
goto out;

View file

@ -32,11 +32,17 @@ void i915_gem_object_flush_if_display(struct drm_i915_gem_object *obj)
if (!i915_gem_object_is_framebuffer(obj))
return;
i915_gem_object_lock(obj);
i915_gem_object_lock(obj, NULL);
__i915_gem_object_flush_for_display(obj);
i915_gem_object_unlock(obj);
}
void i915_gem_object_flush_if_display_locked(struct drm_i915_gem_object *obj)
{
if (i915_gem_object_is_framebuffer(obj))
__i915_gem_object_flush_for_display(obj);
}
/**
* Moves a single object to the WC read, and possibly write domain.
* @obj: object to act on
@ -197,18 +203,12 @@ int i915_gem_object_set_cache_level(struct drm_i915_gem_object *obj,
if (ret)
return ret;
ret = i915_gem_object_lock_interruptible(obj);
if (ret)
return ret;
/* Always invalidate stale cachelines */
if (obj->cache_level != cache_level) {
i915_gem_object_set_cache_coherency(obj, cache_level);
obj->cache_dirty = true;
}
i915_gem_object_unlock(obj);
/* The cache-level will be applied when each vma is rebound. */
return i915_gem_object_unbind(obj,
I915_GEM_OBJECT_UNBIND_ACTIVE |
@ -293,7 +293,12 @@ int i915_gem_set_caching_ioctl(struct drm_device *dev, void *data,
goto out;
}
ret = i915_gem_object_lock_interruptible(obj, NULL);
if (ret)
goto out;
ret = i915_gem_object_set_cache_level(obj, level);
i915_gem_object_unlock(obj);
out:
i915_gem_object_put(obj);
@ -313,6 +318,7 @@ i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj,
unsigned int flags)
{
struct drm_i915_private *i915 = to_i915(obj->base.dev);
struct i915_gem_ww_ctx ww;
struct i915_vma *vma;
int ret;
@ -320,6 +326,11 @@ i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj,
if (HAS_LMEM(i915) && !i915_gem_object_is_lmem(obj))
return ERR_PTR(-EINVAL);
i915_gem_ww_ctx_init(&ww, true);
retry:
ret = i915_gem_object_lock(obj, &ww);
if (ret)
goto err;
/*
* The display engine is not coherent with the LLC cache on gen6. As
* a result, we make sure that the pinning that is about to occur is
@ -334,7 +345,7 @@ i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj,
HAS_WT(i915) ?
I915_CACHE_WT : I915_CACHE_NONE);
if (ret)
return ERR_PTR(ret);
goto err;
/*
* As the user may map the buffer once pinned in the display plane
@ -347,18 +358,31 @@ i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj,
vma = ERR_PTR(-ENOSPC);
if ((flags & PIN_MAPPABLE) == 0 &&
(!view || view->type == I915_GGTT_VIEW_NORMAL))
vma = i915_gem_object_ggtt_pin(obj, view, 0, alignment,
flags |
PIN_MAPPABLE |
PIN_NONBLOCK);
if (IS_ERR(vma))
vma = i915_gem_object_ggtt_pin(obj, view, 0, alignment, flags);
if (IS_ERR(vma))
return vma;
vma = i915_gem_object_ggtt_pin_ww(obj, &ww, view, 0, alignment,
flags | PIN_MAPPABLE |
PIN_NONBLOCK);
if (IS_ERR(vma) && vma != ERR_PTR(-EDEADLK))
vma = i915_gem_object_ggtt_pin_ww(obj, &ww, view, 0,
alignment, flags);
if (IS_ERR(vma)) {
ret = PTR_ERR(vma);
goto err;
}
vma->display_alignment = max_t(u64, vma->display_alignment, alignment);
i915_gem_object_flush_if_display(obj);
i915_gem_object_flush_if_display_locked(obj);
err:
if (ret == -EDEADLK) {
ret = i915_gem_ww_ctx_backoff(&ww);
if (!ret)
goto retry;
}
i915_gem_ww_ctx_fini(&ww);
if (ret)
return ERR_PTR(ret);
return vma;
}
@ -536,7 +560,7 @@ i915_gem_set_domain_ioctl(struct drm_device *dev, void *data,
if (err)
goto out;
err = i915_gem_object_lock_interruptible(obj);
err = i915_gem_object_lock_interruptible(obj, NULL);
if (err)
goto out_unpin;
@ -576,19 +600,17 @@ int i915_gem_object_prepare_read(struct drm_i915_gem_object *obj,
if (!i915_gem_object_has_struct_page(obj))
return -ENODEV;
ret = i915_gem_object_lock_interruptible(obj);
if (ret)
return ret;
assert_object_held(obj);
ret = i915_gem_object_wait(obj,
I915_WAIT_INTERRUPTIBLE,
MAX_SCHEDULE_TIMEOUT);
if (ret)
goto err_unlock;
return ret;
ret = i915_gem_object_pin_pages(obj);
if (ret)
goto err_unlock;
return ret;
if (obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_READ ||
!static_cpu_has(X86_FEATURE_CLFLUSH)) {
@ -616,8 +638,6 @@ out:
err_unpin:
i915_gem_object_unpin_pages(obj);
err_unlock:
i915_gem_object_unlock(obj);
return ret;
}
@ -630,20 +650,18 @@ int i915_gem_object_prepare_write(struct drm_i915_gem_object *obj,
if (!i915_gem_object_has_struct_page(obj))
return -ENODEV;
ret = i915_gem_object_lock_interruptible(obj);
if (ret)
return ret;
assert_object_held(obj);
ret = i915_gem_object_wait(obj,
I915_WAIT_INTERRUPTIBLE |
I915_WAIT_ALL,
MAX_SCHEDULE_TIMEOUT);
if (ret)
goto err_unlock;
return ret;
ret = i915_gem_object_pin_pages(obj);
if (ret)
goto err_unlock;
return ret;
if (obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_WRITE ||
!static_cpu_has(X86_FEATURE_CLFLUSH)) {
@ -680,7 +698,5 @@ out:
err_unpin:
i915_gem_object_unpin_pages(obj);
err_unlock:
i915_gem_object_unlock(obj);
return ret;
}

File diff suppressed because it is too large Load diff

View file

@ -283,37 +283,46 @@ static vm_fault_t vm_fault_gtt(struct vm_fault *vmf)
struct intel_runtime_pm *rpm = &i915->runtime_pm;
struct i915_ggtt *ggtt = &i915->ggtt;
bool write = area->vm_flags & VM_WRITE;
struct i915_gem_ww_ctx ww;
intel_wakeref_t wakeref;
struct i915_vma *vma;
pgoff_t page_offset;
int srcu;
int ret;
/* Sanity check that we allow writing into this object */
if (i915_gem_object_is_readonly(obj) && write)
return VM_FAULT_SIGBUS;
/* We don't use vmf->pgoff since that has the fake offset */
page_offset = (vmf->address - area->vm_start) >> PAGE_SHIFT;
trace_i915_gem_object_fault(obj, page_offset, true, write);
ret = i915_gem_object_pin_pages(obj);
if (ret)
goto err;
wakeref = intel_runtime_pm_get(rpm);
ret = intel_gt_reset_trylock(ggtt->vm.gt, &srcu);
i915_gem_ww_ctx_init(&ww, true);
retry:
ret = i915_gem_object_lock(obj, &ww);
if (ret)
goto err_rpm;
/* Sanity check that we allow writing into this object */
if (i915_gem_object_is_readonly(obj) && write) {
ret = -EFAULT;
goto err_rpm;
}
ret = i915_gem_object_pin_pages(obj);
if (ret)
goto err_rpm;
ret = intel_gt_reset_trylock(ggtt->vm.gt, &srcu);
if (ret)
goto err_pages;
/* Now pin it into the GTT as needed */
vma = i915_gem_object_ggtt_pin(obj, NULL, 0, 0,
PIN_MAPPABLE |
PIN_NONBLOCK /* NOWARN */ |
PIN_NOEVICT);
if (IS_ERR(vma)) {
vma = i915_gem_object_ggtt_pin_ww(obj, &ww, NULL, 0, 0,
PIN_MAPPABLE |
PIN_NONBLOCK /* NOWARN */ |
PIN_NOEVICT);
if (IS_ERR(vma) && vma != ERR_PTR(-EDEADLK)) {
/* Use a partial view if it is bigger than available space */
struct i915_ggtt_view view =
compute_partial_view(obj, page_offset, MIN_CHUNK_PAGES);
@ -328,11 +337,11 @@ static vm_fault_t vm_fault_gtt(struct vm_fault *vmf)
* all hope that the hardware is able to track future writes.
*/
vma = i915_gem_object_ggtt_pin(obj, &view, 0, 0, flags);
if (IS_ERR(vma)) {
vma = i915_gem_object_ggtt_pin_ww(obj, &ww, &view, 0, 0, flags);
if (IS_ERR(vma) && vma != ERR_PTR(-EDEADLK)) {
flags = PIN_MAPPABLE;
view.type = I915_GGTT_VIEW_PARTIAL;
vma = i915_gem_object_ggtt_pin(obj, &view, 0, 0, flags);
vma = i915_gem_object_ggtt_pin_ww(obj, &ww, &view, 0, 0, flags);
}
/* The entire mappable GGTT is pinned? Unexpected! */
@ -389,10 +398,16 @@ err_unpin:
__i915_vma_unpin(vma);
err_reset:
intel_gt_reset_unlock(ggtt->vm.gt, srcu);
err_rpm:
intel_runtime_pm_put(rpm, wakeref);
err_pages:
i915_gem_object_unpin_pages(obj);
err:
err_rpm:
if (ret == -EDEADLK) {
ret = i915_gem_ww_ctx_backoff(&ww);
if (!ret)
goto retry;
}
i915_gem_ww_ctx_fini(&ww);
intel_runtime_pm_put(rpm, wakeref);
return i915_error_to_vmf_fault(ret);
}

View file

@ -110,9 +110,39 @@ i915_gem_object_put(struct drm_i915_gem_object *obj)
#define assert_object_held(obj) dma_resv_assert_held((obj)->base.resv)
static inline void i915_gem_object_lock(struct drm_i915_gem_object *obj)
static inline int __i915_gem_object_lock(struct drm_i915_gem_object *obj,
struct i915_gem_ww_ctx *ww,
bool intr)
{
dma_resv_lock(obj->base.resv, NULL);
int ret;
if (intr)
ret = dma_resv_lock_interruptible(obj->base.resv, ww ? &ww->ctx : NULL);
else
ret = dma_resv_lock(obj->base.resv, ww ? &ww->ctx : NULL);
if (!ret && ww)
list_add_tail(&obj->obj_link, &ww->obj_list);
if (ret == -EALREADY)
ret = 0;
if (ret == -EDEADLK)
ww->contended = obj;
return ret;
}
static inline int i915_gem_object_lock(struct drm_i915_gem_object *obj,
struct i915_gem_ww_ctx *ww)
{
return __i915_gem_object_lock(obj, ww, ww && ww->intr);
}
static inline int i915_gem_object_lock_interruptible(struct drm_i915_gem_object *obj,
struct i915_gem_ww_ctx *ww)
{
WARN_ON(ww && !ww->intr);
return __i915_gem_object_lock(obj, ww, true);
}
static inline bool i915_gem_object_trylock(struct drm_i915_gem_object *obj)
@ -120,12 +150,6 @@ static inline bool i915_gem_object_trylock(struct drm_i915_gem_object *obj)
return dma_resv_trylock(obj->base.resv);
}
static inline int
i915_gem_object_lock_interruptible(struct drm_i915_gem_object *obj)
{
return dma_resv_lock_interruptible(obj->base.resv, NULL);
}
static inline void i915_gem_object_unlock(struct drm_i915_gem_object *obj)
{
dma_resv_unlock(obj->base.resv);
@ -412,7 +436,6 @@ static inline void
i915_gem_object_finish_access(struct drm_i915_gem_object *obj)
{
i915_gem_object_unpin_pages(obj);
i915_gem_object_unlock(obj);
}
static inline struct intel_engine_cs *
@ -435,6 +458,7 @@ i915_gem_object_last_write_engine(struct drm_i915_gem_object *obj)
void i915_gem_object_set_cache_coherency(struct drm_i915_gem_object *obj,
unsigned int cache_level);
void i915_gem_object_flush_if_display(struct drm_i915_gem_object *obj);
void i915_gem_object_flush_if_display_locked(struct drm_i915_gem_object *obj);
int __must_check
i915_gem_object_set_to_wc_domain(struct drm_i915_gem_object *obj, bool write);

View file

@ -14,6 +14,7 @@
struct i915_vma *intel_emit_vma_fill_blt(struct intel_context *ce,
struct i915_vma *vma,
struct i915_gem_ww_ctx *ww,
u32 value)
{
struct drm_i915_private *i915 = ce->vm->i915;
@ -39,10 +40,24 @@ struct i915_vma *intel_emit_vma_fill_blt(struct intel_context *ce,
goto out_pm;
}
err = i915_gem_object_lock(pool->obj, ww);
if (err)
goto out_put;
batch = i915_vma_instance(pool->obj, ce->vm, NULL);
if (IS_ERR(batch)) {
err = PTR_ERR(batch);
goto out_put;
}
err = i915_vma_pin_ww(batch, ww, 0, 0, PIN_USER);
if (unlikely(err))
goto out_put;
cmd = i915_gem_object_pin_map(pool->obj, I915_MAP_WC);
if (IS_ERR(cmd)) {
err = PTR_ERR(cmd);
goto out_put;
goto out_unpin;
}
rem = vma->size;
@ -84,19 +99,11 @@ struct i915_vma *intel_emit_vma_fill_blt(struct intel_context *ce,
intel_gt_chipset_flush(ce->vm->gt);
batch = i915_vma_instance(pool->obj, ce->vm, NULL);
if (IS_ERR(batch)) {
err = PTR_ERR(batch);
goto out_put;
}
err = i915_vma_pin(batch, 0, 0, PIN_USER);
if (unlikely(err))
goto out_put;
batch->private = pool;
return batch;
out_unpin:
i915_vma_unpin(batch);
out_put:
intel_gt_buffer_pool_put(pool);
out_pm:
@ -108,11 +115,9 @@ int intel_emit_vma_mark_active(struct i915_vma *vma, struct i915_request *rq)
{
int err;
i915_vma_lock(vma);
err = i915_request_await_object(rq, vma->obj, false);
if (err == 0)
err = i915_vma_move_to_active(vma, rq, 0);
i915_vma_unlock(vma);
if (unlikely(err))
return err;
@ -141,6 +146,7 @@ int i915_gem_object_fill_blt(struct drm_i915_gem_object *obj,
struct intel_context *ce,
u32 value)
{
struct i915_gem_ww_ctx ww;
struct i915_request *rq;
struct i915_vma *batch;
struct i915_vma *vma;
@ -150,17 +156,28 @@ int i915_gem_object_fill_blt(struct drm_i915_gem_object *obj,
if (IS_ERR(vma))
return PTR_ERR(vma);
err = i915_vma_pin(vma, 0, 0, PIN_USER);
if (unlikely(err))
return err;
i915_gem_ww_ctx_init(&ww, true);
intel_engine_pm_get(ce->engine);
retry:
err = i915_gem_object_lock(obj, &ww);
if (err)
goto out;
batch = intel_emit_vma_fill_blt(ce, vma, value);
err = intel_context_pin_ww(ce, &ww);
if (err)
goto out;
err = i915_vma_pin_ww(vma, &ww, 0, 0, PIN_USER);
if (err)
goto out_ctx;
batch = intel_emit_vma_fill_blt(ce, vma, &ww, value);
if (IS_ERR(batch)) {
err = PTR_ERR(batch);
goto out_unpin;
goto out_vma;
}
rq = intel_context_create_request(ce);
rq = i915_request_create(ce);
if (IS_ERR(rq)) {
err = PTR_ERR(rq);
goto out_batch;
@ -170,11 +187,9 @@ int i915_gem_object_fill_blt(struct drm_i915_gem_object *obj,
if (unlikely(err))
goto out_request;
i915_vma_lock(vma);
err = move_obj_to_gpu(vma->obj, rq, true);
if (err == 0)
err = i915_vma_move_to_active(vma, rq, EXEC_OBJECT_WRITE);
i915_vma_unlock(vma);
if (unlikely(err))
goto out_request;
@ -193,8 +208,18 @@ out_request:
i915_request_add(rq);
out_batch:
intel_emit_vma_release(ce, batch);
out_unpin:
out_vma:
i915_vma_unpin(vma);
out_ctx:
intel_context_unpin(ce);
out:
if (err == -EDEADLK) {
err = i915_gem_ww_ctx_backoff(&ww);
if (!err)
goto retry;
}
i915_gem_ww_ctx_fini(&ww);
intel_engine_pm_put(ce->engine);
return err;
}
@ -210,6 +235,7 @@ static bool wa_1209644611_applies(struct drm_i915_private *i915, u32 size)
}
struct i915_vma *intel_emit_vma_copy_blt(struct intel_context *ce,
struct i915_gem_ww_ctx *ww,
struct i915_vma *src,
struct i915_vma *dst)
{
@ -236,10 +262,24 @@ struct i915_vma *intel_emit_vma_copy_blt(struct intel_context *ce,
goto out_pm;
}
err = i915_gem_object_lock(pool->obj, ww);
if (err)
goto out_put;
batch = i915_vma_instance(pool->obj, ce->vm, NULL);
if (IS_ERR(batch)) {
err = PTR_ERR(batch);
goto out_put;
}
err = i915_vma_pin_ww(batch, ww, 0, 0, PIN_USER);
if (unlikely(err))
goto out_put;
cmd = i915_gem_object_pin_map(pool->obj, I915_MAP_WC);
if (IS_ERR(cmd)) {
err = PTR_ERR(cmd);
goto out_put;
goto out_unpin;
}
rem = src->size;
@ -296,20 +336,11 @@ struct i915_vma *intel_emit_vma_copy_blt(struct intel_context *ce,
i915_gem_object_unpin_map(pool->obj);
intel_gt_chipset_flush(ce->vm->gt);
batch = i915_vma_instance(pool->obj, ce->vm, NULL);
if (IS_ERR(batch)) {
err = PTR_ERR(batch);
goto out_put;
}
err = i915_vma_pin(batch, 0, 0, PIN_USER);
if (unlikely(err))
goto out_put;
batch->private = pool;
return batch;
out_unpin:
i915_vma_unpin(batch);
out_put:
intel_gt_buffer_pool_put(pool);
out_pm:
@ -321,10 +352,9 @@ int i915_gem_object_copy_blt(struct drm_i915_gem_object *src,
struct drm_i915_gem_object *dst,
struct intel_context *ce)
{
struct drm_gem_object *objs[] = { &src->base, &dst->base };
struct i915_address_space *vm = ce->vm;
struct i915_vma *vma[2], *batch;
struct ww_acquire_ctx acquire;
struct i915_gem_ww_ctx ww;
struct i915_request *rq;
int err, i;
@ -332,25 +362,36 @@ int i915_gem_object_copy_blt(struct drm_i915_gem_object *src,
if (IS_ERR(vma[0]))
return PTR_ERR(vma[0]);
err = i915_vma_pin(vma[0], 0, 0, PIN_USER);
if (unlikely(err))
return err;
vma[1] = i915_vma_instance(dst, vm, NULL);
if (IS_ERR(vma[1]))
goto out_unpin_src;
return PTR_ERR(vma);
err = i915_vma_pin(vma[1], 0, 0, PIN_USER);
i915_gem_ww_ctx_init(&ww, true);
intel_engine_pm_get(ce->engine);
retry:
err = i915_gem_object_lock(src, &ww);
if (!err)
err = i915_gem_object_lock(dst, &ww);
if (!err)
err = intel_context_pin_ww(ce, &ww);
if (err)
goto out;
err = i915_vma_pin_ww(vma[0], &ww, 0, 0, PIN_USER);
if (err)
goto out_ctx;
err = i915_vma_pin_ww(vma[1], &ww, 0, 0, PIN_USER);
if (unlikely(err))
goto out_unpin_src;
batch = intel_emit_vma_copy_blt(ce, vma[0], vma[1]);
batch = intel_emit_vma_copy_blt(ce, &ww, vma[0], vma[1]);
if (IS_ERR(batch)) {
err = PTR_ERR(batch);
goto out_unpin_dst;
}
rq = intel_context_create_request(ce);
rq = i915_request_create(ce);
if (IS_ERR(rq)) {
err = PTR_ERR(rq);
goto out_batch;
@ -360,14 +401,10 @@ int i915_gem_object_copy_blt(struct drm_i915_gem_object *src,
if (unlikely(err))
goto out_request;
err = drm_gem_lock_reservations(objs, ARRAY_SIZE(objs), &acquire);
if (unlikely(err))
goto out_request;
for (i = 0; i < ARRAY_SIZE(vma); i++) {
err = move_obj_to_gpu(vma[i]->obj, rq, i);
if (unlikely(err))
goto out_unlock;
goto out_request;
}
for (i = 0; i < ARRAY_SIZE(vma); i++) {
@ -375,20 +412,19 @@ int i915_gem_object_copy_blt(struct drm_i915_gem_object *src,
err = i915_vma_move_to_active(vma[i], rq, flags);
if (unlikely(err))
goto out_unlock;
goto out_request;
}
if (rq->engine->emit_init_breadcrumb) {
err = rq->engine->emit_init_breadcrumb(rq);
if (unlikely(err))
goto out_unlock;
goto out_request;
}
err = rq->engine->emit_bb_start(rq,
batch->node.start, batch->node.size,
0);
out_unlock:
drm_gem_unlock_reservations(objs, ARRAY_SIZE(objs), &acquire);
out_request:
if (unlikely(err))
i915_request_set_error_once(rq, err);
@ -400,6 +436,16 @@ out_unpin_dst:
i915_vma_unpin(vma[1]);
out_unpin_src:
i915_vma_unpin(vma[0]);
out_ctx:
intel_context_unpin(ce);
out:
if (err == -EDEADLK) {
err = i915_gem_ww_ctx_backoff(&ww);
if (!err)
goto retry;
}
i915_gem_ww_ctx_fini(&ww);
intel_engine_pm_put(ce->engine);
return err;
}

View file

@ -13,12 +13,15 @@
#include "i915_vma.h"
struct drm_i915_gem_object;
struct i915_gem_ww_ctx;
struct i915_vma *intel_emit_vma_fill_blt(struct intel_context *ce,
struct i915_vma *vma,
struct i915_gem_ww_ctx *ww,
u32 value);
struct i915_vma *intel_emit_vma_copy_blt(struct intel_context *ce,
struct i915_gem_ww_ctx *ww,
struct i915_vma *src,
struct i915_vma *dst);

View file

@ -123,6 +123,15 @@ struct drm_i915_gem_object {
struct list_head lut_list;
spinlock_t lut_lock; /* guards lut_list */
/**
* @obj_link: Link into @i915_gem_ww_ctx.obj_list
*
* When we lock this object through i915_gem_object_lock() with a
* context, we add it to the list to ensure we can unlock everything
* when i915_gem_ww_ctx_backoff() or i915_gem_ww_ctx_fini() are called.
*/
struct list_head obj_link;
/** Stolen memory for this object, instead of being backed by shmem. */
struct drm_mm_node *stolen;
union {
@ -282,6 +291,7 @@ struct drm_i915_gem_object {
} userptr;
unsigned long scratch;
u64 encode;
void *gvt_info;
};

View file

@ -84,7 +84,7 @@ void i915_gem_suspend_late(struct drm_i915_private *i915)
spin_unlock_irqrestore(&i915->mm.obj_lock, flags);
i915_gem_object_lock(obj);
i915_gem_object_lock(obj, NULL);
drm_WARN_ON(&i915->drm,
i915_gem_object_set_to_gtt_domain(obj, false));
i915_gem_object_unlock(obj);

View file

@ -9,6 +9,7 @@
#include <drm/drm_file.h>
#include "i915_drv.h"
#include "i915_gem_context.h"
#include "i915_gem_ioctls.h"
#include "i915_gem_object.h"
@ -35,9 +36,10 @@ int
i915_gem_throttle_ioctl(struct drm_device *dev, void *data,
struct drm_file *file)
{
const unsigned long recent_enough = jiffies - DRM_I915_THROTTLE_JIFFIES;
struct drm_i915_file_private *file_priv = file->driver_priv;
unsigned long recent_enough = jiffies - DRM_I915_THROTTLE_JIFFIES;
struct i915_request *request, *target = NULL;
struct i915_gem_context *ctx;
unsigned long idx;
long ret;
/* ABI: return -EIO if already wedged */
@ -45,27 +47,54 @@ i915_gem_throttle_ioctl(struct drm_device *dev, void *data,
if (ret)
return ret;
spin_lock(&file_priv->mm.lock);
list_for_each_entry(request, &file_priv->mm.request_list, client_link) {
if (time_after_eq(request->emitted_jiffies, recent_enough))
break;
rcu_read_lock();
xa_for_each(&file_priv->context_xa, idx, ctx) {
struct i915_gem_engines_iter it;
struct intel_context *ce;
if (target && xchg(&target->file_priv, NULL))
list_del(&target->client_link);
if (!kref_get_unless_zero(&ctx->ref))
continue;
rcu_read_unlock();
target = request;
for_each_gem_engine(ce,
i915_gem_context_lock_engines(ctx),
it) {
struct i915_request *rq, *target = NULL;
if (!ce->timeline)
continue;
mutex_lock(&ce->timeline->mutex);
list_for_each_entry_reverse(rq,
&ce->timeline->requests,
link) {
if (i915_request_completed(rq))
break;
if (time_after(rq->emitted_jiffies,
recent_enough))
continue;
target = i915_request_get(rq);
break;
}
mutex_unlock(&ce->timeline->mutex);
if (!target)
continue;
ret = i915_request_wait(target,
I915_WAIT_INTERRUPTIBLE,
MAX_SCHEDULE_TIMEOUT);
i915_request_put(target);
if (ret < 0)
break;
}
i915_gem_context_unlock_engines(ctx);
i915_gem_context_put(ctx);
rcu_read_lock();
}
if (target)
i915_request_get(target);
spin_unlock(&file_priv->mm.lock);
if (!target)
return 0;
ret = i915_request_wait(target,
I915_WAIT_INTERRUPTIBLE,
MAX_SCHEDULE_TIMEOUT);
i915_request_put(target);
rcu_read_unlock();
return ret < 0 ? ret : 0;
}

View file

@ -249,7 +249,7 @@ i915_gem_object_set_tiling(struct drm_i915_gem_object *obj,
* whilst executing a fenced command for an untiled object.
*/
i915_gem_object_lock(obj);
i915_gem_object_lock(obj, NULL);
if (i915_gem_object_is_framebuffer(obj)) {
i915_gem_object_unlock(obj);
return -EBUSY;

View file

@ -393,7 +393,7 @@ static int igt_mock_exhaust_device_supported_pages(void *arg)
*/
for (i = 1; i < BIT(ARRAY_SIZE(page_sizes)); i++) {
unsigned int combination = 0;
unsigned int combination = SZ_4K; /* Required for ppGTT */
for (j = 0; j < ARRAY_SIZE(page_sizes); j++) {
if (i & BIT(j))
@ -947,7 +947,7 @@ static int gpu_write(struct intel_context *ce,
{
int err;
i915_gem_object_lock(vma->obj);
i915_gem_object_lock(vma->obj, NULL);
err = i915_gem_object_set_to_gtt_domain(vma->obj, true);
i915_gem_object_unlock(vma->obj);
if (err)
@ -964,9 +964,10 @@ __cpu_check_shmem(struct drm_i915_gem_object *obj, u32 dword, u32 val)
unsigned long n;
int err;
i915_gem_object_lock(obj, NULL);
err = i915_gem_object_prepare_read(obj, &needs_flush);
if (err)
return err;
goto err_unlock;
for (n = 0; n < obj->base.size >> PAGE_SHIFT; ++n) {
u32 *ptr = kmap_atomic(i915_gem_object_get_page(obj, n));
@ -986,6 +987,8 @@ __cpu_check_shmem(struct drm_i915_gem_object *obj, u32 dword, u32 val)
}
i915_gem_object_finish_access(obj);
err_unlock:
i915_gem_object_unlock(obj);
return err;
}

View file

@ -75,7 +75,7 @@ static int __igt_client_fill(struct intel_engine_cs *engine)
if (err)
goto err_unpin;
i915_gem_object_lock(obj);
i915_gem_object_lock(obj, NULL);
err = i915_gem_object_set_to_cpu_domain(obj, false);
i915_gem_object_unlock(obj);
if (err)

View file

@ -27,9 +27,10 @@ static int cpu_set(struct context *ctx, unsigned long offset, u32 v)
u32 *cpu;
int err;
i915_gem_object_lock(ctx->obj, NULL);
err = i915_gem_object_prepare_write(ctx->obj, &needs_clflush);
if (err)
return err;
goto out;
page = i915_gem_object_get_page(ctx->obj, offset >> PAGE_SHIFT);
map = kmap_atomic(page);
@ -46,7 +47,9 @@ static int cpu_set(struct context *ctx, unsigned long offset, u32 v)
kunmap_atomic(map);
i915_gem_object_finish_access(ctx->obj);
return 0;
out:
i915_gem_object_unlock(ctx->obj);
return err;
}
static int cpu_get(struct context *ctx, unsigned long offset, u32 *v)
@ -57,9 +60,10 @@ static int cpu_get(struct context *ctx, unsigned long offset, u32 *v)
u32 *cpu;
int err;
i915_gem_object_lock(ctx->obj, NULL);
err = i915_gem_object_prepare_read(ctx->obj, &needs_clflush);
if (err)
return err;
goto out;
page = i915_gem_object_get_page(ctx->obj, offset >> PAGE_SHIFT);
map = kmap_atomic(page);
@ -73,7 +77,9 @@ static int cpu_get(struct context *ctx, unsigned long offset, u32 *v)
kunmap_atomic(map);
i915_gem_object_finish_access(ctx->obj);
return 0;
out:
i915_gem_object_unlock(ctx->obj);
return err;
}
static int gtt_set(struct context *ctx, unsigned long offset, u32 v)
@ -82,7 +88,7 @@ static int gtt_set(struct context *ctx, unsigned long offset, u32 v)
u32 __iomem *map;
int err = 0;
i915_gem_object_lock(ctx->obj);
i915_gem_object_lock(ctx->obj, NULL);
err = i915_gem_object_set_to_gtt_domain(ctx->obj, true);
i915_gem_object_unlock(ctx->obj);
if (err)
@ -115,7 +121,7 @@ static int gtt_get(struct context *ctx, unsigned long offset, u32 *v)
u32 __iomem *map;
int err = 0;
i915_gem_object_lock(ctx->obj);
i915_gem_object_lock(ctx->obj, NULL);
err = i915_gem_object_set_to_gtt_domain(ctx->obj, false);
i915_gem_object_unlock(ctx->obj);
if (err)
@ -147,7 +153,7 @@ static int wc_set(struct context *ctx, unsigned long offset, u32 v)
u32 *map;
int err;
i915_gem_object_lock(ctx->obj);
i915_gem_object_lock(ctx->obj, NULL);
err = i915_gem_object_set_to_wc_domain(ctx->obj, true);
i915_gem_object_unlock(ctx->obj);
if (err)
@ -170,7 +176,7 @@ static int wc_get(struct context *ctx, unsigned long offset, u32 *v)
u32 *map;
int err;
i915_gem_object_lock(ctx->obj);
i915_gem_object_lock(ctx->obj, NULL);
err = i915_gem_object_set_to_wc_domain(ctx->obj, false);
i915_gem_object_unlock(ctx->obj);
if (err)
@ -193,27 +199,27 @@ static int gpu_set(struct context *ctx, unsigned long offset, u32 v)
u32 *cs;
int err;
i915_gem_object_lock(ctx->obj);
i915_gem_object_lock(ctx->obj, NULL);
err = i915_gem_object_set_to_gtt_domain(ctx->obj, true);
i915_gem_object_unlock(ctx->obj);
if (err)
return err;
goto out_unlock;
vma = i915_gem_object_ggtt_pin(ctx->obj, NULL, 0, 0, 0);
if (IS_ERR(vma))
return PTR_ERR(vma);
if (IS_ERR(vma)) {
err = PTR_ERR(vma);
goto out_unlock;
}
rq = intel_engine_create_kernel_request(ctx->engine);
if (IS_ERR(rq)) {
i915_vma_unpin(vma);
return PTR_ERR(rq);
err = PTR_ERR(rq);
goto out_unpin;
}
cs = intel_ring_begin(rq, 4);
if (IS_ERR(cs)) {
i915_request_add(rq);
i915_vma_unpin(vma);
return PTR_ERR(cs);
err = PTR_ERR(cs);
goto out_rq;
}
if (INTEL_GEN(ctx->engine->i915) >= 8) {
@ -234,14 +240,16 @@ static int gpu_set(struct context *ctx, unsigned long offset, u32 v)
}
intel_ring_advance(rq, cs);
i915_vma_lock(vma);
err = i915_request_await_object(rq, vma->obj, true);
if (err == 0)
err = i915_vma_move_to_active(vma, rq, EXEC_OBJECT_WRITE);
i915_vma_unlock(vma);
i915_vma_unpin(vma);
out_rq:
i915_request_add(rq);
out_unpin:
i915_vma_unpin(vma);
out_unlock:
i915_gem_object_unlock(ctx->obj);
return err;
}

View file

@ -461,9 +461,10 @@ static int cpu_fill(struct drm_i915_gem_object *obj, u32 value)
unsigned int n, m, need_flush;
int err;
i915_gem_object_lock(obj, NULL);
err = i915_gem_object_prepare_write(obj, &need_flush);
if (err)
return err;
goto out;
for (n = 0; n < real_page_count(obj); n++) {
u32 *map;
@ -479,7 +480,9 @@ static int cpu_fill(struct drm_i915_gem_object *obj, u32 value)
i915_gem_object_finish_access(obj);
obj->read_domains = I915_GEM_DOMAIN_GTT | I915_GEM_DOMAIN_CPU;
obj->write_domain = 0;
return 0;
out:
i915_gem_object_unlock(obj);
return err;
}
static noinline int cpu_check(struct drm_i915_gem_object *obj,
@ -488,9 +491,10 @@ static noinline int cpu_check(struct drm_i915_gem_object *obj,
unsigned int n, m, needs_flush;
int err;
i915_gem_object_lock(obj, NULL);
err = i915_gem_object_prepare_read(obj, &needs_flush);
if (err)
return err;
goto out_unlock;
for (n = 0; n < real_page_count(obj); n++) {
u32 *map;
@ -527,6 +531,8 @@ out_unmap:
}
i915_gem_object_finish_access(obj);
out_unlock:
i915_gem_object_unlock(obj);
return err;
}
@ -887,24 +893,15 @@ out_file:
return err;
}
static struct i915_vma *rpcs_query_batch(struct i915_vma *vma)
static int rpcs_query_batch(struct drm_i915_gem_object *rpcs, struct i915_vma *vma)
{
struct drm_i915_gem_object *obj;
u32 *cmd;
int err;
if (INTEL_GEN(vma->vm->i915) < 8)
return ERR_PTR(-EINVAL);
GEM_BUG_ON(INTEL_GEN(vma->vm->i915) < 8);
obj = i915_gem_object_create_internal(vma->vm->i915, PAGE_SIZE);
if (IS_ERR(obj))
return ERR_CAST(obj);
cmd = i915_gem_object_pin_map(obj, I915_MAP_WB);
if (IS_ERR(cmd)) {
err = PTR_ERR(cmd);
goto err;
}
cmd = i915_gem_object_pin_map(rpcs, I915_MAP_WB);
if (IS_ERR(cmd))
return PTR_ERR(cmd);
*cmd++ = MI_STORE_REGISTER_MEM_GEN8;
*cmd++ = i915_mmio_reg_offset(GEN8_R_PWR_CLK_STATE);
@ -912,26 +909,12 @@ static struct i915_vma *rpcs_query_batch(struct i915_vma *vma)
*cmd++ = upper_32_bits(vma->node.start);
*cmd = MI_BATCH_BUFFER_END;
__i915_gem_object_flush_map(obj, 0, 64);
i915_gem_object_unpin_map(obj);
__i915_gem_object_flush_map(rpcs, 0, 64);
i915_gem_object_unpin_map(rpcs);
intel_gt_chipset_flush(vma->vm->gt);
vma = i915_vma_instance(obj, vma->vm, NULL);
if (IS_ERR(vma)) {
err = PTR_ERR(vma);
goto err;
}
err = i915_vma_pin(vma, 0, 0, PIN_USER);
if (err)
goto err;
return vma;
err:
i915_gem_object_put(obj);
return ERR_PTR(err);
return 0;
}
static int
@ -939,52 +922,68 @@ emit_rpcs_query(struct drm_i915_gem_object *obj,
struct intel_context *ce,
struct i915_request **rq_out)
{
struct drm_i915_private *i915 = to_i915(obj->base.dev);
struct i915_request *rq;
struct i915_gem_ww_ctx ww;
struct i915_vma *batch;
struct i915_vma *vma;
struct drm_i915_gem_object *rpcs;
int err;
GEM_BUG_ON(!intel_engine_can_store_dword(ce->engine));
if (INTEL_GEN(i915) < 8)
return -EINVAL;
vma = i915_vma_instance(obj, ce->vm, NULL);
if (IS_ERR(vma))
return PTR_ERR(vma);
i915_gem_object_lock(obj);
err = i915_gem_object_set_to_gtt_domain(obj, false);
i915_gem_object_unlock(obj);
if (err)
return err;
rpcs = i915_gem_object_create_internal(i915, PAGE_SIZE);
if (IS_ERR(rpcs))
return PTR_ERR(rpcs);
err = i915_vma_pin(vma, 0, 0, PIN_USER);
if (err)
return err;
batch = rpcs_query_batch(vma);
batch = i915_vma_instance(rpcs, ce->vm, NULL);
if (IS_ERR(batch)) {
err = PTR_ERR(batch);
goto err_vma;
goto err_put;
}
i915_gem_ww_ctx_init(&ww, false);
retry:
err = i915_gem_object_lock(obj, &ww);
if (!err)
err = i915_gem_object_lock(rpcs, &ww);
if (!err)
err = i915_gem_object_set_to_gtt_domain(obj, false);
if (!err)
err = i915_vma_pin_ww(vma, &ww, 0, 0, PIN_USER);
if (err)
goto err_put;
err = i915_vma_pin_ww(batch, &ww, 0, 0, PIN_USER);
if (err)
goto err_vma;
err = rpcs_query_batch(rpcs, vma);
if (err)
goto err_batch;
rq = i915_request_create(ce);
if (IS_ERR(rq)) {
err = PTR_ERR(rq);
goto err_batch;
}
i915_vma_lock(batch);
err = i915_request_await_object(rq, batch->obj, false);
if (err == 0)
err = i915_vma_move_to_active(batch, rq, 0);
i915_vma_unlock(batch);
if (err)
goto skip_request;
i915_vma_lock(vma);
err = i915_request_await_object(rq, vma->obj, true);
if (err == 0)
err = i915_vma_move_to_active(vma, rq, EXEC_OBJECT_WRITE);
i915_vma_unlock(vma);
if (err)
goto skip_request;
@ -1000,23 +999,24 @@ emit_rpcs_query(struct drm_i915_gem_object *obj,
if (err)
goto skip_request;
i915_vma_unpin_and_release(&batch, 0);
i915_vma_unpin(vma);
*rq_out = i915_request_get(rq);
i915_request_add(rq);
return 0;
skip_request:
i915_request_set_error_once(rq, err);
if (err)
i915_request_set_error_once(rq, err);
i915_request_add(rq);
err_batch:
i915_vma_unpin_and_release(&batch, 0);
i915_vma_unpin(batch);
err_vma:
i915_vma_unpin(vma);
err_put:
if (err == -EDEADLK) {
err = i915_gem_ww_ctx_backoff(&ww);
if (!err)
goto retry;
}
i915_gem_ww_ctx_fini(&ww);
i915_gem_object_put(rpcs);
return err;
}
@ -1709,7 +1709,7 @@ static int read_from_scratch(struct i915_gem_context *ctx,
i915_request_add(rq);
i915_gem_object_lock(obj);
i915_gem_object_lock(obj, NULL);
err = i915_gem_object_set_to_cpu_domain(obj, false);
i915_gem_object_unlock(obj);
if (err)
@ -1748,7 +1748,7 @@ static int check_scratch_page(struct i915_gem_context *ctx, u32 *out)
if (!vm)
return -ENODEV;
page = vm->scratch[0].base.page;
page = __px_page(vm->scratch[0]);
if (!page) {
pr_err("No scratch page!\n");
return -EINVAL;
@ -1914,8 +1914,8 @@ static int mock_context_barrier(void *arg)
return -ENOMEM;
counter = 0;
err = context_barrier_task(ctx, 0,
NULL, NULL, mock_barrier_task, &counter);
err = context_barrier_task(ctx, 0, NULL, NULL, NULL,
mock_barrier_task, &counter);
if (err) {
pr_err("Failed at line %d, err=%d\n", __LINE__, err);
goto out;
@ -1927,11 +1927,8 @@ static int mock_context_barrier(void *arg)
}
counter = 0;
err = context_barrier_task(ctx, ALL_ENGINES,
skip_unused_engines,
NULL,
mock_barrier_task,
&counter);
err = context_barrier_task(ctx, ALL_ENGINES, skip_unused_engines,
NULL, NULL, mock_barrier_task, &counter);
if (err) {
pr_err("Failed at line %d, err=%d\n", __LINE__, err);
goto out;
@ -1951,8 +1948,8 @@ static int mock_context_barrier(void *arg)
counter = 0;
context_barrier_inject_fault = BIT(RCS0);
err = context_barrier_task(ctx, ALL_ENGINES,
NULL, NULL, mock_barrier_task, &counter);
err = context_barrier_task(ctx, ALL_ENGINES, NULL, NULL, NULL,
mock_barrier_task, &counter);
context_barrier_inject_fault = 0;
if (err == -ENXIO)
err = 0;
@ -1966,11 +1963,8 @@ static int mock_context_barrier(void *arg)
goto out;
counter = 0;
err = context_barrier_task(ctx, ALL_ENGINES,
skip_unused_engines,
NULL,
mock_barrier_task,
&counter);
err = context_barrier_task(ctx, ALL_ENGINES, skip_unused_engines,
NULL, NULL, mock_barrier_task, &counter);
if (err) {
pr_err("Failed at line %d, err=%d\n", __LINE__, err);
goto out;

View file

@ -32,46 +32,39 @@ static int __igt_gpu_reloc(struct i915_execbuffer *eb,
if (IS_ERR(vma))
return PTR_ERR(vma);
err = i915_vma_pin(vma, 0, 0, PIN_USER | PIN_HIGH);
err = i915_gem_object_lock(obj, &eb->ww);
if (err)
return err;
err = i915_vma_pin_ww(vma, &eb->ww, 0, 0, PIN_USER | PIN_HIGH);
if (err)
return err;
/* 8-Byte aligned */
if (!__reloc_entry_gpu(eb, vma,
offsets[0] * sizeof(u32),
0)) {
err = -EIO;
goto unpin_vma;
}
err = __reloc_entry_gpu(eb, vma, offsets[0] * sizeof(u32), 0);
if (err <= 0)
goto reloc_err;
/* !8-Byte aligned */
if (!__reloc_entry_gpu(eb, vma,
offsets[1] * sizeof(u32),
1)) {
err = -EIO;
goto unpin_vma;
}
err = __reloc_entry_gpu(eb, vma, offsets[1] * sizeof(u32), 1);
if (err <= 0)
goto reloc_err;
/* Skip to the end of the cmd page */
i = PAGE_SIZE / sizeof(u32) - RELOC_TAIL - 1;
i = PAGE_SIZE / sizeof(u32) - 1;
i -= eb->reloc_cache.rq_size;
memset32(eb->reloc_cache.rq_cmd + eb->reloc_cache.rq_size,
MI_NOOP, i);
eb->reloc_cache.rq_size += i;
/* Force batch chaining */
if (!__reloc_entry_gpu(eb, vma,
offsets[2] * sizeof(u32),
2)) {
err = -EIO;
goto unpin_vma;
}
/* Force next batch */
err = __reloc_entry_gpu(eb, vma, offsets[2] * sizeof(u32), 2);
if (err <= 0)
goto reloc_err;
GEM_BUG_ON(!eb->reloc_cache.rq);
rq = i915_request_get(eb->reloc_cache.rq);
err = reloc_gpu_flush(&eb->reloc_cache);
if (err)
goto put_rq;
reloc_gpu_flush(eb, &eb->reloc_cache);
GEM_BUG_ON(eb->reloc_cache.rq);
err = i915_gem_object_wait(obj, I915_WAIT_INTERRUPTIBLE, HZ / 2);
@ -103,6 +96,11 @@ put_rq:
unpin_vma:
i915_vma_unpin(vma);
return err;
reloc_err:
if (!err)
err = -EIO;
goto unpin_vma;
}
static int igt_gpu_reloc(void *arg)
@ -124,6 +122,8 @@ static int igt_gpu_reloc(void *arg)
goto err_scratch;
}
intel_gt_pm_get(&eb.i915->gt);
for_each_uabi_engine(eb.engine, eb.i915) {
reloc_cache_init(&eb.reloc_cache, eb.i915);
memset(map, POISON_INUSE, 4096);
@ -134,15 +134,29 @@ static int igt_gpu_reloc(void *arg)
err = PTR_ERR(eb.context);
goto err_pm;
}
eb.reloc_pool = NULL;
eb.reloc_context = NULL;
err = intel_context_pin(eb.context);
if (err)
goto err_put;
i915_gem_ww_ctx_init(&eb.ww, false);
retry:
err = intel_context_pin_ww(eb.context, &eb.ww);
if (!err) {
err = __igt_gpu_reloc(&eb, scratch);
err = __igt_gpu_reloc(&eb, scratch);
intel_context_unpin(eb.context);
}
if (err == -EDEADLK) {
err = i915_gem_ww_ctx_backoff(&eb.ww);
if (!err)
goto retry;
}
i915_gem_ww_ctx_fini(&eb.ww);
if (eb.reloc_pool)
intel_gt_buffer_pool_put(eb.reloc_pool);
if (eb.reloc_context)
intel_context_put(eb.reloc_context);
intel_context_unpin(eb.context);
err_put:
intel_context_put(eb.context);
err_pm:
intel_engine_pm_put(eb.engine);
@ -153,6 +167,7 @@ err_pm:
if (igt_flush_test(eb.i915))
err = -EIO;
intel_gt_pm_put(&eb.i915->gt);
err_scratch:
i915_gem_object_put(scratch);
return err;

View file

@ -103,7 +103,7 @@ static int check_partial_mapping(struct drm_i915_gem_object *obj,
GEM_BUG_ON(i915_gem_object_get_tiling(obj) != tile->tiling);
GEM_BUG_ON(i915_gem_object_get_stride(obj) != tile->stride);
i915_gem_object_lock(obj);
i915_gem_object_lock(obj, NULL);
err = i915_gem_object_set_to_gtt_domain(obj, true);
i915_gem_object_unlock(obj);
if (err) {
@ -188,7 +188,7 @@ static int check_partial_mappings(struct drm_i915_gem_object *obj,
GEM_BUG_ON(i915_gem_object_get_tiling(obj) != tile->tiling);
GEM_BUG_ON(i915_gem_object_get_stride(obj) != tile->stride);
i915_gem_object_lock(obj);
i915_gem_object_lock(obj, NULL);
err = i915_gem_object_set_to_gtt_domain(obj, true);
i915_gem_object_unlock(obj);
if (err) {
@ -528,31 +528,42 @@ static int make_obj_busy(struct drm_i915_gem_object *obj)
for_each_uabi_engine(engine, i915) {
struct i915_request *rq;
struct i915_vma *vma;
struct i915_gem_ww_ctx ww;
int err;
vma = i915_vma_instance(obj, &engine->gt->ggtt->vm, NULL);
if (IS_ERR(vma))
return PTR_ERR(vma);
err = i915_vma_pin(vma, 0, 0, PIN_USER);
i915_gem_ww_ctx_init(&ww, false);
retry:
err = i915_gem_object_lock(obj, &ww);
if (!err)
err = i915_vma_pin_ww(vma, &ww, 0, 0, PIN_USER);
if (err)
return err;
goto err;
rq = intel_engine_create_kernel_request(engine);
if (IS_ERR(rq)) {
i915_vma_unpin(vma);
return PTR_ERR(rq);
err = PTR_ERR(rq);
goto err_unpin;
}
i915_vma_lock(vma);
err = i915_request_await_object(rq, vma->obj, true);
if (err == 0)
err = i915_vma_move_to_active(vma, rq,
EXEC_OBJECT_WRITE);
i915_vma_unlock(vma);
i915_request_add(rq);
err_unpin:
i915_vma_unpin(vma);
err:
if (err == -EDEADLK) {
err = i915_gem_ww_ctx_backoff(&ww);
if (!err)
goto retry;
}
i915_gem_ww_ctx_fini(&ww);
if (err)
return err;
}
@ -1123,6 +1134,7 @@ static int __igt_mmap_gpu(struct drm_i915_private *i915,
for_each_uabi_engine(engine, i915) {
struct i915_request *rq;
struct i915_vma *vma;
struct i915_gem_ww_ctx ww;
vma = i915_vma_instance(obj, engine->kernel_context->vm, NULL);
if (IS_ERR(vma)) {
@ -1130,9 +1142,13 @@ static int __igt_mmap_gpu(struct drm_i915_private *i915,
goto out_unmap;
}
err = i915_vma_pin(vma, 0, 0, PIN_USER);
i915_gem_ww_ctx_init(&ww, false);
retry:
err = i915_gem_object_lock(obj, &ww);
if (!err)
err = i915_vma_pin_ww(vma, &ww, 0, 0, PIN_USER);
if (err)
goto out_unmap;
goto out_ww;
rq = i915_request_create(engine->kernel_context);
if (IS_ERR(rq)) {
@ -1140,11 +1156,9 @@ static int __igt_mmap_gpu(struct drm_i915_private *i915,
goto out_unpin;
}
i915_vma_lock(vma);
err = i915_request_await_object(rq, vma->obj, false);
if (err == 0)
err = i915_vma_move_to_active(vma, rq, 0);
i915_vma_unlock(vma);
err = engine->emit_bb_start(rq, vma->node.start, 0, 0);
i915_request_get(rq);
@ -1166,6 +1180,13 @@ static int __igt_mmap_gpu(struct drm_i915_private *i915,
out_unpin:
i915_vma_unpin(vma);
out_ww:
if (err == -EDEADLK) {
err = i915_gem_ww_ctx_backoff(&ww);
if (!err)
goto retry;
}
i915_gem_ww_ctx_fini(&ww);
if (err)
goto out_unmap;
}

View file

@ -44,7 +44,7 @@ static int mock_phys_object(void *arg)
}
/* Make the object dirty so that put_pages must do copy back the data */
i915_gem_object_lock(obj);
i915_gem_object_lock(obj, NULL);
err = i915_gem_object_set_to_gtt_domain(obj, true);
i915_gem_object_unlock(obj);
if (err) {

View file

@ -16,8 +16,10 @@ static inline void gen6_write_pde(const struct gen6_ppgtt *ppgtt,
const unsigned int pde,
const struct i915_page_table *pt)
{
dma_addr_t addr = pt ? px_dma(pt) : px_dma(ppgtt->base.vm.scratch[1]);
/* Caller needs to make sure the write completes if necessary */
iowrite32(GEN6_PDE_ADDR_ENCODE(px_dma(pt)) | GEN6_PDE_VALID,
iowrite32(GEN6_PDE_ADDR_ENCODE(addr) | GEN6_PDE_VALID,
ppgtt->pd_addr + pde);
}
@ -79,7 +81,7 @@ static void gen6_ppgtt_clear_range(struct i915_address_space *vm,
{
struct gen6_ppgtt * const ppgtt = to_gen6_ppgtt(i915_vm_to_ppgtt(vm));
const unsigned int first_entry = start / I915_GTT_PAGE_SIZE;
const gen6_pte_t scratch_pte = vm->scratch[0].encode;
const gen6_pte_t scratch_pte = vm->scratch[0]->encode;
unsigned int pde = first_entry / GEN6_PTES;
unsigned int pte = first_entry % GEN6_PTES;
unsigned int num_entries = length / I915_GTT_PAGE_SIZE;
@ -90,8 +92,6 @@ static void gen6_ppgtt_clear_range(struct i915_address_space *vm,
const unsigned int count = min(num_entries, GEN6_PTES - pte);
gen6_pte_t *vaddr;
GEM_BUG_ON(px_base(pt) == px_base(&vm->scratch[1]));
num_entries -= count;
GEM_BUG_ON(count > atomic_read(&pt->used));
@ -127,7 +127,7 @@ static void gen6_ppgtt_insert_entries(struct i915_address_space *vm,
struct sgt_dma iter = sgt_dma(vma);
gen6_pte_t *vaddr;
GEM_BUG_ON(pd->entry[act_pt] == &vm->scratch[1]);
GEM_BUG_ON(!pd->entry[act_pt]);
vaddr = kmap_atomic_px(i915_pt_entry(pd, act_pt));
do {
@ -177,39 +177,36 @@ static void gen6_flush_pd(struct gen6_ppgtt *ppgtt, u64 start, u64 end)
mutex_unlock(&ppgtt->flush);
}
static int gen6_alloc_va_range(struct i915_address_space *vm,
u64 start, u64 length)
static void gen6_alloc_va_range(struct i915_address_space *vm,
struct i915_vm_pt_stash *stash,
u64 start, u64 length)
{
struct gen6_ppgtt *ppgtt = to_gen6_ppgtt(i915_vm_to_ppgtt(vm));
struct i915_page_directory * const pd = ppgtt->base.pd;
struct i915_page_table *pt, *alloc = NULL;
struct i915_page_table *pt;
bool flush = false;
u64 from = start;
unsigned int pde;
int ret = 0;
spin_lock(&pd->lock);
gen6_for_each_pde(pt, pd, start, length, pde) {
const unsigned int count = gen6_pte_count(start, length);
if (px_base(pt) == px_base(&vm->scratch[1])) {
if (!pt) {
spin_unlock(&pd->lock);
pt = fetch_and_zero(&alloc);
if (!pt)
pt = alloc_pt(vm);
if (IS_ERR(pt)) {
ret = PTR_ERR(pt);
goto unwind_out;
}
pt = stash->pt[0];
__i915_gem_object_pin_pages(pt->base);
i915_gem_object_make_unshrinkable(pt->base);
fill32_px(pt, vm->scratch[0].encode);
fill32_px(pt, vm->scratch[0]->encode);
spin_lock(&pd->lock);
if (pd->entry[pde] == &vm->scratch[1]) {
if (!pd->entry[pde]) {
stash->pt[0] = pt->stash;
atomic_set(&pt->used, 0);
pd->entry[pde] = pt;
} else {
alloc = pt;
pt = pd->entry[pde];
}
@ -226,38 +223,32 @@ static int gen6_alloc_va_range(struct i915_address_space *vm,
with_intel_runtime_pm(&vm->i915->runtime_pm, wakeref)
gen6_flush_pd(ppgtt, from, start);
}
goto out;
unwind_out:
gen6_ppgtt_clear_range(vm, from, start - from);
out:
if (alloc)
free_px(vm, alloc);
return ret;
}
static int gen6_ppgtt_init_scratch(struct gen6_ppgtt *ppgtt)
{
struct i915_address_space * const vm = &ppgtt->base.vm;
struct i915_page_directory * const pd = ppgtt->base.pd;
int ret;
ret = setup_scratch_page(vm, __GFP_HIGHMEM);
ret = setup_scratch_page(vm);
if (ret)
return ret;
vm->scratch[0].encode =
vm->pte_encode(px_dma(&vm->scratch[0]),
vm->scratch[0]->encode =
vm->pte_encode(px_dma(vm->scratch[0]),
I915_CACHE_NONE, PTE_READ_ONLY);
if (unlikely(setup_page_dma(vm, px_base(&vm->scratch[1])))) {
cleanup_scratch_page(vm);
return -ENOMEM;
vm->scratch[1] = vm->alloc_pt_dma(vm, I915_GTT_PAGE_SIZE_4K);
if (IS_ERR(vm->scratch[1]))
return PTR_ERR(vm->scratch[1]);
ret = pin_pt_dma(vm, vm->scratch[1]);
if (ret) {
i915_gem_object_put(vm->scratch[1]);
return ret;
}
fill32_px(&vm->scratch[1], vm->scratch[0].encode);
memset_p(pd->entry, &vm->scratch[1], I915_PDES);
fill32_px(vm->scratch[1], vm->scratch[0]->encode);
return 0;
}
@ -265,14 +256,12 @@ static int gen6_ppgtt_init_scratch(struct gen6_ppgtt *ppgtt)
static void gen6_ppgtt_free_pd(struct gen6_ppgtt *ppgtt)
{
struct i915_page_directory * const pd = ppgtt->base.pd;
struct i915_page_dma * const scratch =
px_base(&ppgtt->base.vm.scratch[1]);
struct i915_page_table *pt;
u32 pde;
gen6_for_all_pdes(pt, pd, pde)
if (px_base(pt) != scratch)
free_px(&ppgtt->base.vm, pt);
if (pt)
free_pt(&ppgtt->base.vm, pt);
}
static void gen6_ppgtt_cleanup(struct i915_address_space *vm)
@ -286,7 +275,8 @@ static void gen6_ppgtt_cleanup(struct i915_address_space *vm)
mutex_destroy(&ppgtt->flush);
mutex_destroy(&ppgtt->pin_mutex);
kfree(ppgtt->base.pd);
free_pd(&ppgtt->base.vm, ppgtt->base.pd);
}
static int pd_vma_set_pages(struct i915_vma *vma)
@ -302,28 +292,26 @@ static void pd_vma_clear_pages(struct i915_vma *vma)
vma->pages = NULL;
}
static int pd_vma_bind(struct i915_address_space *vm,
struct i915_vma *vma,
enum i915_cache_level cache_level,
u32 unused)
static void pd_vma_bind(struct i915_address_space *vm,
struct i915_vm_pt_stash *stash,
struct i915_vma *vma,
enum i915_cache_level cache_level,
u32 unused)
{
struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm);
struct gen6_ppgtt *ppgtt = vma->private;
u32 ggtt_offset = i915_ggtt_offset(vma) / I915_GTT_PAGE_SIZE;
px_base(ppgtt->base.pd)->ggtt_offset = ggtt_offset * sizeof(gen6_pte_t);
ppgtt->pp_dir = ggtt_offset * sizeof(gen6_pte_t) << 10;
ppgtt->pd_addr = (gen6_pte_t __iomem *)ggtt->gsm + ggtt_offset;
gen6_flush_pd(ppgtt, 0, ppgtt->base.vm.total);
return 0;
}
static void pd_vma_unbind(struct i915_address_space *vm, struct i915_vma *vma)
{
struct gen6_ppgtt *ppgtt = vma->private;
struct i915_page_directory * const pd = ppgtt->base.pd;
struct i915_page_dma * const scratch =
px_base(&ppgtt->base.vm.scratch[1]);
struct i915_page_table *pt;
unsigned int pde;
@ -332,11 +320,11 @@ static void pd_vma_unbind(struct i915_address_space *vm, struct i915_vma *vma)
/* Free all no longer used page tables */
gen6_for_all_pdes(pt, ppgtt->base.pd, pde) {
if (px_base(pt) == scratch || atomic_read(&pt->used))
if (!pt || atomic_read(&pt->used))
continue;
free_px(&ppgtt->base.vm, pt);
pd->entry[pde] = scratch;
free_pt(&ppgtt->base.vm, pt);
pd->entry[pde] = NULL;
}
ppgtt->scan_for_unused_pt = false;
@ -380,7 +368,7 @@ static struct i915_vma *pd_vma_create(struct gen6_ppgtt *ppgtt, int size)
return vma;
}
int gen6_ppgtt_pin(struct i915_ppgtt *base)
int gen6_ppgtt_pin(struct i915_ppgtt *base, struct i915_gem_ww_ctx *ww)
{
struct gen6_ppgtt *ppgtt = to_gen6_ppgtt(base);
int err;
@ -406,7 +394,7 @@ int gen6_ppgtt_pin(struct i915_ppgtt *base)
*/
err = 0;
if (!atomic_read(&ppgtt->pin_count))
err = i915_ggtt_pin(ppgtt->vma, GEN6_PD_ALIGN, PIN_HIGH);
err = i915_ggtt_pin(ppgtt->vma, ww, GEN6_PD_ALIGN, PIN_HIGH);
if (!err)
atomic_inc(&ppgtt->pin_count);
mutex_unlock(&ppgtt->pin_mutex);
@ -448,6 +436,7 @@ struct i915_ppgtt *gen6_ppgtt_create(struct intel_gt *gt)
mutex_init(&ppgtt->pin_mutex);
ppgtt_init(&ppgtt->base, gt);
ppgtt->base.vm.pd_shift = ilog2(SZ_4K * SZ_4K / sizeof(gen6_pte_t));
ppgtt->base.vm.top = 1;
ppgtt->base.vm.bind_async_flags = I915_VMA_LOCAL_BIND;
@ -456,9 +445,10 @@ struct i915_ppgtt *gen6_ppgtt_create(struct intel_gt *gt)
ppgtt->base.vm.insert_entries = gen6_ppgtt_insert_entries;
ppgtt->base.vm.cleanup = gen6_ppgtt_cleanup;
ppgtt->base.vm.alloc_pt_dma = alloc_pt_dma;
ppgtt->base.vm.pte_encode = ggtt->vm.pte_encode;
ppgtt->base.pd = __alloc_pd(sizeof(*ppgtt->base.pd));
ppgtt->base.pd = __alloc_pd(I915_PDES);
if (!ppgtt->base.pd) {
err = -ENOMEM;
goto err_free;
@ -479,7 +469,7 @@ struct i915_ppgtt *gen6_ppgtt_create(struct intel_gt *gt)
err_scratch:
free_scratch(&ppgtt->base.vm);
err_pd:
kfree(ppgtt->base.pd);
free_pd(&ppgtt->base.vm, ppgtt->base.pd);
err_free:
mutex_destroy(&ppgtt->pin_mutex);
kfree(ppgtt);

View file

@ -8,12 +8,15 @@
#include "intel_gtt.h"
struct i915_gem_ww_ctx;
struct gen6_ppgtt {
struct i915_ppgtt base;
struct mutex flush;
struct i915_vma *vma;
gen6_pte_t __iomem *pd_addr;
u32 pp_dir;
atomic_t pin_count;
struct mutex pin_mutex;
@ -66,7 +69,7 @@ static inline struct gen6_ppgtt *to_gen6_ppgtt(struct i915_ppgtt *base)
(pt = i915_pt_entry(pd, iter), true); \
++iter)
int gen6_ppgtt_pin(struct i915_ppgtt *base);
int gen6_ppgtt_pin(struct i915_ppgtt *base, struct i915_gem_ww_ctx *ww);
void gen6_ppgtt_unpin(struct i915_ppgtt *base);
void gen6_ppgtt_unpin_all(struct i915_ppgtt *base);
void gen6_ppgtt_enable(struct intel_gt *gt);

View file

@ -181,7 +181,7 @@ static void __gen8_ppgtt_cleanup(struct i915_address_space *vm,
} while (pde++, --count);
}
free_px(vm, pd);
free_px(vm, &pd->pt, lvl);
}
static void gen8_ppgtt_cleanup(struct i915_address_space *vm)
@ -199,7 +199,7 @@ static u64 __gen8_ppgtt_clear(struct i915_address_space * const vm,
struct i915_page_directory * const pd,
u64 start, const u64 end, int lvl)
{
const struct i915_page_scratch * const scratch = &vm->scratch[lvl];
const struct drm_i915_gem_object * const scratch = vm->scratch[lvl];
unsigned int idx, len;
GEM_BUG_ON(end > vm->total >> GEN8_PTE_SHIFT);
@ -239,7 +239,7 @@ static u64 __gen8_ppgtt_clear(struct i915_address_space * const vm,
vaddr = kmap_atomic_px(pt);
memset64(vaddr + gen8_pd_index(start, 0),
vm->scratch[0].encode,
vm->scratch[0]->encode,
count);
kunmap_atomic(vaddr);
@ -248,7 +248,7 @@ static u64 __gen8_ppgtt_clear(struct i915_address_space * const vm,
}
if (release_pd_entry(pd, idx, pt, scratch))
free_px(vm, pt);
free_px(vm, pt, lvl);
} while (idx++, --len);
return start;
@ -269,14 +269,12 @@ static void gen8_ppgtt_clear(struct i915_address_space *vm,
start, start + length, vm->top);
}
static int __gen8_ppgtt_alloc(struct i915_address_space * const vm,
struct i915_page_directory * const pd,
u64 * const start, const u64 end, int lvl)
static void __gen8_ppgtt_alloc(struct i915_address_space * const vm,
struct i915_vm_pt_stash *stash,
struct i915_page_directory * const pd,
u64 * const start, const u64 end, int lvl)
{
const struct i915_page_scratch * const scratch = &vm->scratch[lvl];
struct i915_page_table *alloc = NULL;
unsigned int idx, len;
int ret = 0;
GEM_BUG_ON(end > vm->total >> GEN8_PTE_SHIFT);
@ -297,49 +295,31 @@ static int __gen8_ppgtt_alloc(struct i915_address_space * const vm,
DBG("%s(%p):{ lvl:%d, idx:%d } allocating new tree\n",
__func__, vm, lvl + 1, idx);
pt = fetch_and_zero(&alloc);
if (lvl) {
if (!pt) {
pt = &alloc_pd(vm)->pt;
if (IS_ERR(pt)) {
ret = PTR_ERR(pt);
goto out;
}
}
pt = stash->pt[!!lvl];
__i915_gem_object_pin_pages(pt->base);
i915_gem_object_make_unshrinkable(pt->base);
fill_px(pt, vm->scratch[lvl].encode);
} else {
if (!pt) {
pt = alloc_pt(vm);
if (IS_ERR(pt)) {
ret = PTR_ERR(pt);
goto out;
}
}
if (intel_vgpu_active(vm->i915) ||
gen8_pt_count(*start, end) < I915_PDES)
fill_px(pt, vm->scratch[lvl].encode);
}
if (lvl ||
gen8_pt_count(*start, end) < I915_PDES ||
intel_vgpu_active(vm->i915))
fill_px(pt, vm->scratch[lvl]->encode);
spin_lock(&pd->lock);
if (likely(!pd->entry[idx]))
if (likely(!pd->entry[idx])) {
stash->pt[!!lvl] = pt->stash;
atomic_set(&pt->used, 0);
set_pd_entry(pd, idx, pt);
else
alloc = pt, pt = pd->entry[idx];
} else {
pt = pd->entry[idx];
}
}
if (lvl) {
atomic_inc(&pt->used);
spin_unlock(&pd->lock);
ret = __gen8_ppgtt_alloc(vm, as_pd(pt),
start, end, lvl);
if (unlikely(ret)) {
if (release_pd_entry(pd, idx, pt, scratch))
free_px(vm, pt);
goto out;
}
__gen8_ppgtt_alloc(vm, stash,
as_pd(pt), start, end, lvl);
spin_lock(&pd->lock);
atomic_dec(&pt->used);
@ -359,18 +339,12 @@ static int __gen8_ppgtt_alloc(struct i915_address_space * const vm,
}
} while (idx++, --len);
spin_unlock(&pd->lock);
out:
if (alloc)
free_px(vm, alloc);
return ret;
}
static int gen8_ppgtt_alloc(struct i915_address_space *vm,
u64 start, u64 length)
static void gen8_ppgtt_alloc(struct i915_address_space *vm,
struct i915_vm_pt_stash *stash,
u64 start, u64 length)
{
u64 from;
int err;
GEM_BUG_ON(!IS_ALIGNED(start, BIT_ULL(GEN8_PTE_SHIFT)));
GEM_BUG_ON(!IS_ALIGNED(length, BIT_ULL(GEN8_PTE_SHIFT)));
GEM_BUG_ON(range_overflows(start, length, vm->total));
@ -378,25 +352,9 @@ static int gen8_ppgtt_alloc(struct i915_address_space *vm,
start >>= GEN8_PTE_SHIFT;
length >>= GEN8_PTE_SHIFT;
GEM_BUG_ON(length == 0);
from = start;
err = __gen8_ppgtt_alloc(vm, i915_vm_to_ppgtt(vm)->pd,
&start, start + length, vm->top);
if (unlikely(err && from != start))
__gen8_ppgtt_clear(vm, i915_vm_to_ppgtt(vm)->pd,
from, start, vm->top);
return err;
}
static __always_inline void
write_pte(gen8_pte_t *pte, const gen8_pte_t val)
{
/* Magic delays? Or can we refine these to flush all in one pass? */
*pte = val;
wmb(); /* cpu to cache */
clflush(pte); /* cache to memory */
wmb(); /* visible to all */
__gen8_ppgtt_alloc(vm, stash, i915_vm_to_ppgtt(vm)->pd,
&start, start + length, vm->top);
}
static __always_inline u64
@ -415,8 +373,7 @@ gen8_ppgtt_insert_pte(struct i915_ppgtt *ppgtt,
vaddr = kmap_atomic_px(i915_pt_entry(pd, gen8_pd_index(idx, 1)));
do {
GEM_BUG_ON(iter->sg->length < I915_GTT_PAGE_SIZE);
write_pte(&vaddr[gen8_pd_index(idx, 0)],
pte_encode | iter->dma);
vaddr[gen8_pd_index(idx, 0)] = pte_encode | iter->dma;
iter->dma += I915_GTT_PAGE_SIZE;
if (iter->dma >= iter->max) {
@ -439,10 +396,12 @@ gen8_ppgtt_insert_pte(struct i915_ppgtt *ppgtt,
pd = pdp->entry[gen8_pd_index(idx, 2)];
}
clflush_cache_range(vaddr, PAGE_SIZE);
kunmap_atomic(vaddr);
vaddr = kmap_atomic_px(i915_pt_entry(pd, gen8_pd_index(idx, 1)));
}
} while (1);
clflush_cache_range(vaddr, PAGE_SIZE);
kunmap_atomic(vaddr);
return idx;
@ -498,7 +457,7 @@ static void gen8_ppgtt_insert_huge(struct i915_vma *vma,
do {
GEM_BUG_ON(iter->sg->length < page_size);
write_pte(&vaddr[index++], encode | iter->dma);
vaddr[index++] = encode | iter->dma;
start += page_size;
iter->dma += page_size;
@ -523,6 +482,7 @@ static void gen8_ppgtt_insert_huge(struct i915_vma *vma,
}
} while (rem >= page_size && index < I915_PDES);
clflush_cache_range(vaddr, PAGE_SIZE);
kunmap_atomic(vaddr);
/*
@ -554,7 +514,7 @@ static void gen8_ppgtt_insert_huge(struct i915_vma *vma,
if (I915_SELFTEST_ONLY(vma->vm->scrub_64K)) {
u16 i;
encode = vma->vm->scratch[0].encode;
encode = vma->vm->scratch[0]->encode;
vaddr = kmap_atomic_px(i915_pt_entry(pd, maybe_64K));
for (i = 1; i < index; i += 16)
@ -608,27 +568,37 @@ static int gen8_init_scratch(struct i915_address_space *vm)
GEM_BUG_ON(!clone->has_read_only);
vm->scratch_order = clone->scratch_order;
memcpy(vm->scratch, clone->scratch, sizeof(vm->scratch));
px_dma(&vm->scratch[0]) = 0; /* no xfer of ownership */
for (i = 0; i <= vm->top; i++)
vm->scratch[i] = i915_gem_object_get(clone->scratch[i]);
return 0;
}
ret = setup_scratch_page(vm, __GFP_HIGHMEM);
ret = setup_scratch_page(vm);
if (ret)
return ret;
vm->scratch[0].encode =
gen8_pte_encode(px_dma(&vm->scratch[0]),
vm->scratch[0]->encode =
gen8_pte_encode(px_dma(vm->scratch[0]),
I915_CACHE_LLC, vm->has_read_only);
for (i = 1; i <= vm->top; i++) {
if (unlikely(setup_page_dma(vm, px_base(&vm->scratch[i]))))
struct drm_i915_gem_object *obj;
obj = vm->alloc_pt_dma(vm, I915_GTT_PAGE_SIZE_4K);
if (IS_ERR(obj))
goto free_scratch;
fill_px(&vm->scratch[i], vm->scratch[i - 1].encode);
vm->scratch[i].encode =
gen8_pde_encode(px_dma(&vm->scratch[i]),
I915_CACHE_LLC);
ret = pin_pt_dma(vm, obj);
if (ret) {
i915_gem_object_put(obj);
goto free_scratch;
}
fill_px(obj, vm->scratch[i - 1]->encode);
obj->encode = gen8_pde_encode(px_dma(obj), I915_CACHE_LLC);
vm->scratch[i] = obj;
}
return 0;
@ -649,12 +619,20 @@ static int gen8_preallocate_top_level_pdp(struct i915_ppgtt *ppgtt)
for (idx = 0; idx < GEN8_3LVL_PDPES; idx++) {
struct i915_page_directory *pde;
int err;
pde = alloc_pd(vm);
if (IS_ERR(pde))
return PTR_ERR(pde);
fill_px(pde, vm->scratch[1].encode);
err = pin_pt_dma(vm, pde->pt.base);
if (err) {
i915_gem_object_put(pde->pt.base);
free_pd(vm, pde);
return err;
}
fill_px(pde, vm->scratch[1]->encode);
set_pd_entry(pd, idx, pde);
atomic_inc(px_used(pde)); /* keep pinned */
}
@ -668,21 +646,32 @@ gen8_alloc_top_pd(struct i915_address_space *vm)
{
const unsigned int count = gen8_pd_top_count(vm);
struct i915_page_directory *pd;
int err;
GEM_BUG_ON(count > ARRAY_SIZE(pd->entry));
GEM_BUG_ON(count > I915_PDES);
pd = __alloc_pd(offsetof(typeof(*pd), entry[count]));
pd = __alloc_pd(count);
if (unlikely(!pd))
return ERR_PTR(-ENOMEM);
if (unlikely(setup_page_dma(vm, px_base(pd)))) {
kfree(pd);
return ERR_PTR(-ENOMEM);
pd->pt.base = vm->alloc_pt_dma(vm, I915_GTT_PAGE_SIZE_4K);
if (IS_ERR(pd->pt.base)) {
err = PTR_ERR(pd->pt.base);
pd->pt.base = NULL;
goto err_pd;
}
fill_page_dma(px_base(pd), vm->scratch[vm->top].encode, count);
err = pin_pt_dma(vm, pd->pt.base);
if (err)
goto err_pd;
fill_page_dma(px_base(pd), vm->scratch[vm->top]->encode, count);
atomic_inc(px_used(pd)); /* mark as pinned */
return pd;
err_pd:
free_pd(vm, pd);
return ERR_PTR(err);
}
/*
@ -703,6 +692,7 @@ struct i915_ppgtt *gen8_ppgtt_create(struct intel_gt *gt)
ppgtt_init(ppgtt, gt);
ppgtt->vm.top = i915_vm_is_4lvl(&ppgtt->vm) ? 3 : 2;
ppgtt->vm.pd_shift = ilog2(SZ_4K * SZ_4K / sizeof(gen8_pte_t));
/*
* From bdw, there is hw support for read-only pages in the PPGTT.
@ -714,12 +704,7 @@ struct i915_ppgtt *gen8_ppgtt_create(struct intel_gt *gt)
*/
ppgtt->vm.has_read_only = !IS_GEN_RANGE(gt->i915, 11, 12);
/*
* There are only few exceptions for gen >=6. chv and bxt.
* And we are not sure about the latter so play safe for now.
*/
if (IS_CHERRYVIEW(gt->i915) || IS_BROXTON(gt->i915))
ppgtt->vm.pt_kmap_wc = true;
ppgtt->vm.alloc_pt_dma = alloc_pt_dma;
err = gen8_init_scratch(&ppgtt->vm);
if (err)

View file

@ -28,6 +28,8 @@
#include "i915_drv.h"
#include "i915_trace.h"
#include "intel_breadcrumbs.h"
#include "intel_context.h"
#include "intel_gt_pm.h"
#include "intel_gt_requests.h"
@ -53,33 +55,65 @@ static void irq_disable(struct intel_engine_cs *engine)
spin_unlock(&engine->gt->irq_lock);
}
static void __intel_breadcrumbs_arm_irq(struct intel_breadcrumbs *b)
{
lockdep_assert_held(&b->irq_lock);
if (!b->irq_engine || b->irq_armed)
return;
if (!intel_gt_pm_get_if_awake(b->irq_engine->gt))
return;
/*
* The breadcrumb irq will be disarmed on the interrupt after the
* waiters are signaled. This gives us a single interrupt window in
* which we can add a new waiter and avoid the cost of re-enabling
* the irq.
*/
WRITE_ONCE(b->irq_armed, true);
/*
* Since we are waiting on a request, the GPU should be busy
* and should have its own rpm reference. This is tracked
* by i915->gt.awake, we can forgo holding our own wakref
* for the interrupt as before i915->gt.awake is released (when
* the driver is idle) we disarm the breadcrumbs.
*/
if (!b->irq_enabled++)
irq_enable(b->irq_engine);
}
static void __intel_breadcrumbs_disarm_irq(struct intel_breadcrumbs *b)
{
struct intel_engine_cs *engine =
container_of(b, struct intel_engine_cs, breadcrumbs);
lockdep_assert_held(&b->irq_lock);
if (!b->irq_engine || !b->irq_armed)
return;
GEM_BUG_ON(!b->irq_enabled);
if (!--b->irq_enabled)
irq_disable(engine);
irq_disable(b->irq_engine);
WRITE_ONCE(b->irq_armed, false);
intel_gt_pm_put_async(engine->gt);
intel_gt_pm_put_async(b->irq_engine->gt);
}
void intel_engine_disarm_breadcrumbs(struct intel_engine_cs *engine)
static void add_signaling_context(struct intel_breadcrumbs *b,
struct intel_context *ce)
{
struct intel_breadcrumbs *b = &engine->breadcrumbs;
unsigned long flags;
intel_context_get(ce);
list_add_tail(&ce->signal_link, &b->signalers);
if (list_is_first(&ce->signal_link, &b->signalers))
__intel_breadcrumbs_arm_irq(b);
}
if (!READ_ONCE(b->irq_armed))
return;
spin_lock_irqsave(&b->irq_lock, flags);
if (b->irq_armed)
__intel_breadcrumbs_disarm_irq(b);
spin_unlock_irqrestore(&b->irq_lock, flags);
static void remove_signaling_context(struct intel_breadcrumbs *b,
struct intel_context *ce)
{
list_del(&ce->signal_link);
intel_context_put(ce);
}
static inline bool __request_completed(const struct i915_request *rq)
@ -90,6 +124,9 @@ static inline bool __request_completed(const struct i915_request *rq)
__maybe_unused static bool
check_signal_order(struct intel_context *ce, struct i915_request *rq)
{
if (rq->context != ce)
return false;
if (!list_is_last(&rq->signal_link, &ce->signals) &&
i915_seqno_passed(rq->fence.seqno,
list_next_entry(rq, signal_link)->fence.seqno))
@ -133,25 +170,21 @@ __dma_fence_signal__notify(struct dma_fence *fence,
static void add_retire(struct intel_breadcrumbs *b, struct intel_timeline *tl)
{
struct intel_engine_cs *engine =
container_of(b, struct intel_engine_cs, breadcrumbs);
if (unlikely(intel_engine_is_virtual(engine)))
engine = intel_virtual_engine_get_sibling(engine, 0);
intel_engine_add_retire(engine, tl);
if (b->irq_engine)
intel_engine_add_retire(b->irq_engine, tl);
}
static void __signal_request(struct i915_request *rq, struct list_head *signals)
static bool __signal_request(struct i915_request *rq, struct list_head *signals)
{
GEM_BUG_ON(!test_bit(I915_FENCE_FLAG_SIGNAL, &rq->fence.flags));
clear_bit(I915_FENCE_FLAG_SIGNAL, &rq->fence.flags);
if (!__dma_fence_signal(&rq->fence))
return;
if (!__dma_fence_signal(&rq->fence)) {
i915_request_put(rq);
return false;
}
i915_request_get(rq);
list_add_tail(&rq->signal_link, signals);
return true;
}
static void signal_irq_work(struct irq_work *work)
@ -164,7 +197,7 @@ static void signal_irq_work(struct irq_work *work)
spin_lock(&b->irq_lock);
if (b->irq_armed && list_empty(&b->signalers))
if (list_empty(&b->signalers))
__intel_breadcrumbs_disarm_irq(b);
list_splice_init(&b->signaled_requests, &signal);
@ -197,8 +230,8 @@ static void signal_irq_work(struct irq_work *work)
/* Advance the list to the first incomplete request */
__list_del_many(&ce->signals, pos);
if (&ce->signals == pos) { /* now empty */
list_del_init(&ce->signal_link);
add_retire(b, ce->timeline);
remove_signaling_context(b, ce);
}
}
}
@ -220,116 +253,89 @@ static void signal_irq_work(struct irq_work *work)
}
}
static bool __intel_breadcrumbs_arm_irq(struct intel_breadcrumbs *b)
struct intel_breadcrumbs *
intel_breadcrumbs_create(struct intel_engine_cs *irq_engine)
{
struct intel_engine_cs *engine =
container_of(b, struct intel_engine_cs, breadcrumbs);
struct intel_breadcrumbs *b;
lockdep_assert_held(&b->irq_lock);
if (b->irq_armed)
return true;
if (!intel_gt_pm_get_if_awake(engine->gt))
return false;
/*
* The breadcrumb irq will be disarmed on the interrupt after the
* waiters are signaled. This gives us a single interrupt window in
* which we can add a new waiter and avoid the cost of re-enabling
* the irq.
*/
WRITE_ONCE(b->irq_armed, true);
/*
* Since we are waiting on a request, the GPU should be busy
* and should have its own rpm reference. This is tracked
* by i915->gt.awake, we can forgo holding our own wakref
* for the interrupt as before i915->gt.awake is released (when
* the driver is idle) we disarm the breadcrumbs.
*/
if (!b->irq_enabled++)
irq_enable(engine);
return true;
}
void intel_engine_init_breadcrumbs(struct intel_engine_cs *engine)
{
struct intel_breadcrumbs *b = &engine->breadcrumbs;
b = kzalloc(sizeof(*b), GFP_KERNEL);
if (!b)
return NULL;
spin_lock_init(&b->irq_lock);
INIT_LIST_HEAD(&b->signalers);
INIT_LIST_HEAD(&b->signaled_requests);
init_irq_work(&b->irq_work, signal_irq_work);
b->irq_engine = irq_engine;
return b;
}
void intel_engine_reset_breadcrumbs(struct intel_engine_cs *engine)
void intel_breadcrumbs_reset(struct intel_breadcrumbs *b)
{
struct intel_breadcrumbs *b = &engine->breadcrumbs;
unsigned long flags;
if (!b->irq_engine)
return;
spin_lock_irqsave(&b->irq_lock, flags);
if (b->irq_enabled)
irq_enable(engine);
irq_enable(b->irq_engine);
else
irq_disable(engine);
irq_disable(b->irq_engine);
spin_unlock_irqrestore(&b->irq_lock, flags);
}
void intel_engine_transfer_stale_breadcrumbs(struct intel_engine_cs *engine,
struct intel_context *ce)
void intel_breadcrumbs_park(struct intel_breadcrumbs *b)
{
struct intel_breadcrumbs *b = &engine->breadcrumbs;
unsigned long flags;
if (!READ_ONCE(b->irq_armed))
return;
spin_lock_irqsave(&b->irq_lock, flags);
if (!list_empty(&ce->signals)) {
struct i915_request *rq, *next;
/* Queue for executing the signal callbacks in the irq_work */
list_for_each_entry_safe(rq, next, &ce->signals, signal_link) {
GEM_BUG_ON(rq->engine != engine);
GEM_BUG_ON(!__request_completed(rq));
__signal_request(rq, &b->signaled_requests);
}
INIT_LIST_HEAD(&ce->signals);
list_del_init(&ce->signal_link);
irq_work_queue(&b->irq_work);
}
__intel_breadcrumbs_disarm_irq(b);
spin_unlock_irqrestore(&b->irq_lock, flags);
if (!list_empty(&b->signalers))
irq_work_queue(&b->irq_work);
}
void intel_engine_fini_breadcrumbs(struct intel_engine_cs *engine)
void intel_breadcrumbs_free(struct intel_breadcrumbs *b)
{
kfree(b);
}
bool i915_request_enable_breadcrumb(struct i915_request *rq)
static void insert_breadcrumb(struct i915_request *rq,
struct intel_breadcrumbs *b)
{
lockdep_assert_held(&rq->lock);
struct intel_context *ce = rq->context;
struct list_head *pos;
if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &rq->fence.flags))
return true;
if (test_bit(I915_FENCE_FLAG_SIGNAL, &rq->fence.flags))
return;
if (test_bit(I915_FENCE_FLAG_ACTIVE, &rq->fence.flags)) {
struct intel_breadcrumbs *b = &rq->engine->breadcrumbs;
struct intel_context *ce = rq->context;
struct list_head *pos;
i915_request_get(rq);
spin_lock(&b->irq_lock);
if (test_bit(I915_FENCE_FLAG_SIGNAL, &rq->fence.flags))
goto unlock;
if (!__intel_breadcrumbs_arm_irq(b))
goto unlock;
/*
* If the request is already completed, we can transfer it
* straight onto a signaled list, and queue the irq worker for
* its signal completion.
*/
if (__request_completed(rq)) {
if (__signal_request(rq, &b->signaled_requests))
irq_work_queue(&b->irq_work);
return;
}
if (list_empty(&ce->signals)) {
add_signaling_context(b, ce);
pos = &ce->signals;
} else {
/*
* We keep the seqno in retirement order, so we can break
* inside intel_engine_signal_breadcrumbs as soon as we've
@ -351,24 +357,75 @@ bool i915_request_enable_breadcrumb(struct i915_request *rq)
if (i915_seqno_passed(rq->fence.seqno, it->fence.seqno))
break;
}
list_add(&rq->signal_link, pos);
if (pos == &ce->signals) /* catch transitions from empty list */
list_move_tail(&ce->signal_link, &b->signalers);
GEM_BUG_ON(!check_signal_order(ce, rq));
}
list_add(&rq->signal_link, pos);
GEM_BUG_ON(!check_signal_order(ce, rq));
set_bit(I915_FENCE_FLAG_SIGNAL, &rq->fence.flags);
set_bit(I915_FENCE_FLAG_SIGNAL, &rq->fence.flags);
unlock:
/* Check after attaching to irq, interrupt may have already fired. */
if (__request_completed(rq))
irq_work_queue(&b->irq_work);
}
bool i915_request_enable_breadcrumb(struct i915_request *rq)
{
struct intel_breadcrumbs *b;
/* Serialises with i915_request_retire() using rq->lock */
if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &rq->fence.flags))
return true;
/*
* Peek at i915_request_submit()/i915_request_unsubmit() status.
*
* If the request is not yet active (and not signaled), we will
* attach the breadcrumb later.
*/
if (!test_bit(I915_FENCE_FLAG_ACTIVE, &rq->fence.flags))
return true;
/*
* rq->engine is locked by rq->engine->active.lock. That however
* is not known until after rq->engine has been dereferenced and
* the lock acquired. Hence we acquire the lock and then validate
* that rq->engine still matches the lock we hold for it.
*
* Here, we are using the breadcrumb lock as a proxy for the
* rq->engine->active.lock, and we know that since the breadcrumb
* will be serialised within i915_request_submit/i915_request_unsubmit,
* the engine cannot change while active as long as we hold the
* breadcrumb lock on that engine.
*
* From the dma_fence_enable_signaling() path, we are outside of the
* request submit/unsubmit path, and so we must be more careful to
* acquire the right lock.
*/
b = READ_ONCE(rq->engine)->breadcrumbs;
spin_lock(&b->irq_lock);
while (unlikely(b != READ_ONCE(rq->engine)->breadcrumbs)) {
spin_unlock(&b->irq_lock);
b = READ_ONCE(rq->engine)->breadcrumbs;
spin_lock(&b->irq_lock);
}
return !__request_completed(rq);
/*
* Now that we are finally serialised with request submit/unsubmit,
* [with b->irq_lock] and with i915_request_retire() [via checking
* SIGNALED with rq->lock] confirm the request is indeed active. If
* it is no longer active, the breadcrumb will be attached upon
* i915_request_submit().
*/
if (test_bit(I915_FENCE_FLAG_ACTIVE, &rq->fence.flags))
insert_breadcrumb(rq, b);
spin_unlock(&b->irq_lock);
return true;
}
void i915_request_cancel_breadcrumb(struct i915_request *rq)
{
struct intel_breadcrumbs *b = &rq->engine->breadcrumbs;
lockdep_assert_held(&rq->lock);
struct intel_breadcrumbs *b = rq->engine->breadcrumbs;
/*
* We must wait for b->irq_lock so that we know the interrupt handler
@ -382,23 +439,19 @@ void i915_request_cancel_breadcrumb(struct i915_request *rq)
list_del(&rq->signal_link);
if (list_empty(&ce->signals))
list_del_init(&ce->signal_link);
remove_signaling_context(b, ce);
clear_bit(I915_FENCE_FLAG_SIGNAL, &rq->fence.flags);
i915_request_put(rq);
}
spin_unlock(&b->irq_lock);
}
void intel_engine_print_breadcrumbs(struct intel_engine_cs *engine,
struct drm_printer *p)
static void print_signals(struct intel_breadcrumbs *b, struct drm_printer *p)
{
struct intel_breadcrumbs *b = &engine->breadcrumbs;
struct intel_context *ce;
struct i915_request *rq;
if (list_empty(&b->signalers))
return;
drm_printf(p, "Signals:\n");
spin_lock_irq(&b->irq_lock);
@ -414,3 +467,17 @@ void intel_engine_print_breadcrumbs(struct intel_engine_cs *engine,
}
spin_unlock_irq(&b->irq_lock);
}
void intel_engine_print_breadcrumbs(struct intel_engine_cs *engine,
struct drm_printer *p)
{
struct intel_breadcrumbs *b;
b = engine->breadcrumbs;
if (!b)
return;
drm_printf(p, "IRQ: %s\n", enableddisabled(b->irq_armed));
if (!list_empty(&b->signalers))
print_signals(b, p);
}

View file

@ -0,0 +1,36 @@
/* SPDX-License-Identifier: MIT */
/*
* Copyright © 2019 Intel Corporation
*/
#ifndef __INTEL_BREADCRUMBS__
#define __INTEL_BREADCRUMBS__
#include <linux/irq_work.h>
#include "intel_engine_types.h"
struct drm_printer;
struct i915_request;
struct intel_breadcrumbs;
struct intel_breadcrumbs *
intel_breadcrumbs_create(struct intel_engine_cs *irq_engine);
void intel_breadcrumbs_free(struct intel_breadcrumbs *b);
void intel_breadcrumbs_reset(struct intel_breadcrumbs *b);
void intel_breadcrumbs_park(struct intel_breadcrumbs *b);
static inline void
intel_engine_signal_breadcrumbs(struct intel_engine_cs *engine)
{
irq_work_queue(&engine->breadcrumbs->irq_work);
}
void intel_engine_print_breadcrumbs(struct intel_engine_cs *engine,
struct drm_printer *p);
bool i915_request_enable_breadcrumb(struct i915_request *request);
void i915_request_cancel_breadcrumb(struct i915_request *request);
#endif /* __INTEL_BREADCRUMBS__ */

View file

@ -0,0 +1,47 @@
/* SPDX-License-Identifier: MIT */
/*
* Copyright © 2019 Intel Corporation
*/
#ifndef __INTEL_BREADCRUMBS_TYPES__
#define __INTEL_BREADCRUMBS_TYPES__
#include <linux/irq_work.h>
#include <linux/list.h>
#include <linux/spinlock.h>
#include <linux/types.h>
/*
* Rather than have every client wait upon all user interrupts,
* with the herd waking after every interrupt and each doing the
* heavyweight seqno dance, we delegate the task (of being the
* bottom-half of the user interrupt) to the first client. After
* every interrupt, we wake up one client, who does the heavyweight
* coherent seqno read and either goes back to sleep (if incomplete),
* or wakes up all the completed clients in parallel, before then
* transferring the bottom-half status to the next client in the queue.
*
* Compared to walking the entire list of waiters in a single dedicated
* bottom-half, we reduce the latency of the first waiter by avoiding
* a context switch, but incur additional coherent seqno reads when
* following the chain of request breadcrumbs. Since it is most likely
* that we have a single client waiting on each seqno, then reducing
* the overhead of waking that client is much preferred.
*/
struct intel_breadcrumbs {
spinlock_t irq_lock; /* protects the lists used in hardirq context */
/* Not all breadcrumbs are attached to physical HW */
struct intel_engine_cs *irq_engine;
struct list_head signalers;
struct list_head signaled_requests;
struct irq_work irq_work; /* for use from inside irq_lock */
unsigned int irq_enabled;
bool irq_armed;
};
#endif /* __INTEL_BREADCRUMBS_TYPES__ */

View file

@ -93,85 +93,12 @@ static void intel_context_active_release(struct intel_context *ce)
i915_active_release(&ce->active);
}
int __intel_context_do_pin(struct intel_context *ce)
{
int err;
if (unlikely(!test_bit(CONTEXT_ALLOC_BIT, &ce->flags))) {
err = intel_context_alloc_state(ce);
if (err)
return err;
}
err = i915_active_acquire(&ce->active);
if (err)
return err;
if (mutex_lock_interruptible(&ce->pin_mutex)) {
err = -EINTR;
goto out_release;
}
if (unlikely(intel_context_is_closed(ce))) {
err = -ENOENT;
goto out_unlock;
}
if (likely(!atomic_add_unless(&ce->pin_count, 1, 0))) {
err = intel_context_active_acquire(ce);
if (unlikely(err))
goto out_unlock;
err = ce->ops->pin(ce);
if (unlikely(err))
goto err_active;
CE_TRACE(ce, "pin ring:{start:%08x, head:%04x, tail:%04x}\n",
i915_ggtt_offset(ce->ring->vma),
ce->ring->head, ce->ring->tail);
smp_mb__before_atomic(); /* flush pin before it is visible */
atomic_inc(&ce->pin_count);
}
GEM_BUG_ON(!intel_context_is_pinned(ce)); /* no overflow! */
GEM_BUG_ON(i915_active_is_idle(&ce->active));
goto out_unlock;
err_active:
intel_context_active_release(ce);
out_unlock:
mutex_unlock(&ce->pin_mutex);
out_release:
i915_active_release(&ce->active);
return err;
}
void intel_context_unpin(struct intel_context *ce)
{
if (!atomic_dec_and_test(&ce->pin_count))
return;
CE_TRACE(ce, "unpin\n");
ce->ops->unpin(ce);
/*
* Once released, we may asynchronously drop the active reference.
* As that may be the only reference keeping the context alive,
* take an extra now so that it is not freed before we finish
* dereferencing it.
*/
intel_context_get(ce);
intel_context_active_release(ce);
intel_context_put(ce);
}
static int __context_pin_state(struct i915_vma *vma)
static int __context_pin_state(struct i915_vma *vma, struct i915_gem_ww_ctx *ww)
{
unsigned int bias = i915_ggtt_pin_bias(vma) | PIN_OFFSET_BIAS;
int err;
err = i915_ggtt_pin(vma, 0, bias | PIN_HIGH);
err = i915_ggtt_pin(vma, ww, 0, bias | PIN_HIGH);
if (err)
return err;
@ -200,11 +127,12 @@ static void __context_unpin_state(struct i915_vma *vma)
__i915_vma_unpin(vma);
}
static int __ring_active(struct intel_ring *ring)
static int __ring_active(struct intel_ring *ring,
struct i915_gem_ww_ctx *ww)
{
int err;
err = intel_ring_pin(ring);
err = intel_ring_pin(ring, ww);
if (err)
return err;
@ -225,6 +153,173 @@ static void __ring_retire(struct intel_ring *ring)
intel_ring_unpin(ring);
}
static int intel_context_pre_pin(struct intel_context *ce,
struct i915_gem_ww_ctx *ww)
{
int err;
CE_TRACE(ce, "active\n");
err = __ring_active(ce->ring, ww);
if (err)
return err;
err = intel_timeline_pin(ce->timeline, ww);
if (err)
goto err_ring;
if (!ce->state)
return 0;
err = __context_pin_state(ce->state, ww);
if (err)
goto err_timeline;
return 0;
err_timeline:
intel_timeline_unpin(ce->timeline);
err_ring:
__ring_retire(ce->ring);
return err;
}
static void intel_context_post_unpin(struct intel_context *ce)
{
if (ce->state)
__context_unpin_state(ce->state);
intel_timeline_unpin(ce->timeline);
__ring_retire(ce->ring);
}
int __intel_context_do_pin_ww(struct intel_context *ce,
struct i915_gem_ww_ctx *ww)
{
bool handoff = false;
void *vaddr;
int err = 0;
if (unlikely(!test_bit(CONTEXT_ALLOC_BIT, &ce->flags))) {
err = intel_context_alloc_state(ce);
if (err)
return err;
}
/*
* We always pin the context/ring/timeline here, to ensure a pin
* refcount for __intel_context_active(), which prevent a lock
* inversion of ce->pin_mutex vs dma_resv_lock().
*/
err = i915_gem_object_lock(ce->timeline->hwsp_ggtt->obj, ww);
if (!err && ce->ring->vma->obj)
err = i915_gem_object_lock(ce->ring->vma->obj, ww);
if (!err && ce->state)
err = i915_gem_object_lock(ce->state->obj, ww);
if (!err)
err = intel_context_pre_pin(ce, ww);
if (err)
return err;
err = i915_active_acquire(&ce->active);
if (err)
goto err_ctx_unpin;
err = ce->ops->pre_pin(ce, ww, &vaddr);
if (err)
goto err_release;
err = mutex_lock_interruptible(&ce->pin_mutex);
if (err)
goto err_post_unpin;
if (unlikely(intel_context_is_closed(ce))) {
err = -ENOENT;
goto err_unlock;
}
if (likely(!atomic_add_unless(&ce->pin_count, 1, 0))) {
err = intel_context_active_acquire(ce);
if (unlikely(err))
goto err_unlock;
err = ce->ops->pin(ce, vaddr);
if (err) {
intel_context_active_release(ce);
goto err_unlock;
}
CE_TRACE(ce, "pin ring:{start:%08x, head:%04x, tail:%04x}\n",
i915_ggtt_offset(ce->ring->vma),
ce->ring->head, ce->ring->tail);
handoff = true;
smp_mb__before_atomic(); /* flush pin before it is visible */
atomic_inc(&ce->pin_count);
}
GEM_BUG_ON(!intel_context_is_pinned(ce)); /* no overflow! */
err_unlock:
mutex_unlock(&ce->pin_mutex);
err_post_unpin:
if (!handoff)
ce->ops->post_unpin(ce);
err_release:
i915_active_release(&ce->active);
err_ctx_unpin:
intel_context_post_unpin(ce);
/*
* Unlock the hwsp_ggtt object since it's shared.
* In principle we can unlock all the global state locked above
* since it's pinned and doesn't need fencing, and will
* thus remain resident until it is explicitly unpinned.
*/
i915_gem_ww_unlock_single(ce->timeline->hwsp_ggtt->obj);
return err;
}
int __intel_context_do_pin(struct intel_context *ce)
{
struct i915_gem_ww_ctx ww;
int err;
i915_gem_ww_ctx_init(&ww, true);
retry:
err = __intel_context_do_pin_ww(ce, &ww);
if (err == -EDEADLK) {
err = i915_gem_ww_ctx_backoff(&ww);
if (!err)
goto retry;
}
i915_gem_ww_ctx_fini(&ww);
return err;
}
void intel_context_unpin(struct intel_context *ce)
{
if (!atomic_dec_and_test(&ce->pin_count))
return;
CE_TRACE(ce, "unpin\n");
ce->ops->unpin(ce);
ce->ops->post_unpin(ce);
/*
* Once released, we may asynchronously drop the active reference.
* As that may be the only reference keeping the context alive,
* take an extra now so that it is not freed before we finish
* dereferencing it.
*/
intel_context_get(ce);
intel_context_active_release(ce);
intel_context_put(ce);
}
__i915_active_call
static void __intel_context_retire(struct i915_active *active)
{
@ -235,48 +330,29 @@ static void __intel_context_retire(struct i915_active *active)
intel_context_get_avg_runtime_ns(ce));
set_bit(CONTEXT_VALID_BIT, &ce->flags);
if (ce->state)
__context_unpin_state(ce->state);
intel_timeline_unpin(ce->timeline);
__ring_retire(ce->ring);
intel_context_post_unpin(ce);
intel_context_put(ce);
}
static int __intel_context_active(struct i915_active *active)
{
struct intel_context *ce = container_of(active, typeof(*ce), active);
int err;
CE_TRACE(ce, "active\n");
intel_context_get(ce);
err = __ring_active(ce->ring);
if (err)
goto err_put;
/* everything should already be activated by intel_context_pre_pin() */
GEM_WARN_ON(!i915_active_acquire_if_busy(&ce->ring->vma->active));
__intel_ring_pin(ce->ring);
err = intel_timeline_pin(ce->timeline);
if (err)
goto err_ring;
__intel_timeline_pin(ce->timeline);
if (!ce->state)
return 0;
err = __context_pin_state(ce->state);
if (err)
goto err_timeline;
if (ce->state) {
GEM_WARN_ON(!i915_active_acquire_if_busy(&ce->state->active));
__i915_vma_pin(ce->state);
i915_vma_make_unshrinkable(ce->state);
}
return 0;
err_timeline:
intel_timeline_unpin(ce->timeline);
err_ring:
__ring_retire(ce->ring);
err_put:
intel_context_put(ce);
return err;
}
void
@ -382,15 +458,37 @@ int intel_context_prepare_remote_request(struct intel_context *ce,
struct i915_request *intel_context_create_request(struct intel_context *ce)
{
struct i915_gem_ww_ctx ww;
struct i915_request *rq;
int err;
err = intel_context_pin(ce);
if (unlikely(err))
return ERR_PTR(err);
i915_gem_ww_ctx_init(&ww, true);
retry:
err = intel_context_pin_ww(ce, &ww);
if (!err) {
rq = i915_request_create(ce);
intel_context_unpin(ce);
} else if (err == -EDEADLK) {
err = i915_gem_ww_ctx_backoff(&ww);
if (!err)
goto retry;
} else {
rq = ERR_PTR(err);
}
rq = i915_request_create(ce);
intel_context_unpin(ce);
i915_gem_ww_ctx_fini(&ww);
if (IS_ERR(rq))
return rq;
/*
* timeline->mutex should be the inner lock, but is used as outer lock.
* Hack around this to shut up lockdep in selftests..
*/
lockdep_unpin_lock(&ce->timeline->mutex, rq->cookie);
mutex_release(&ce->timeline->mutex.dep_map, _RET_IP_);
mutex_acquire(&ce->timeline->mutex.dep_map, SINGLE_DEPTH_NESTING, 0, _RET_IP_);
rq->cookie = lockdep_pin_lock(&ce->timeline->mutex);
return rq;
}

View file

@ -25,6 +25,8 @@
##__VA_ARGS__); \
} while (0)
struct i915_gem_ww_ctx;
void intel_context_init(struct intel_context *ce,
struct intel_engine_cs *engine);
void intel_context_fini(struct intel_context *ce);
@ -81,6 +83,8 @@ static inline void intel_context_unlock_pinned(struct intel_context *ce)
}
int __intel_context_do_pin(struct intel_context *ce);
int __intel_context_do_pin_ww(struct intel_context *ce,
struct i915_gem_ww_ctx *ww);
static inline bool intel_context_pin_if_active(struct intel_context *ce)
{
@ -95,6 +99,15 @@ static inline int intel_context_pin(struct intel_context *ce)
return __intel_context_do_pin(ce);
}
static inline int intel_context_pin_ww(struct intel_context *ce,
struct i915_gem_ww_ctx *ww)
{
if (likely(intel_context_pin_if_active(ce)))
return 0;
return __intel_context_do_pin_ww(ce, ww);
}
static inline void __intel_context_pin(struct intel_context *ce)
{
GEM_BUG_ON(!intel_context_is_pinned(ce));

View file

@ -23,6 +23,7 @@
DECLARE_EWMA(runtime, 3, 8);
struct i915_gem_context;
struct i915_gem_ww_ctx;
struct i915_vma;
struct intel_context;
struct intel_ring;
@ -30,8 +31,10 @@ struct intel_ring;
struct intel_context_ops {
int (*alloc)(struct intel_context *ce);
int (*pin)(struct intel_context *ce);
int (*pre_pin)(struct intel_context *ce, struct i915_gem_ww_ctx *ww, void **vaddr);
int (*pin)(struct intel_context *ce, void *vaddr);
void (*unpin)(struct intel_context *ce);
void (*post_unpin)(struct intel_context *ce);
void (*enter)(struct intel_context *ce);
void (*exit)(struct intel_context *ce);

View file

@ -223,26 +223,6 @@ void intel_engine_get_instdone(const struct intel_engine_cs *engine,
void intel_engine_init_execlists(struct intel_engine_cs *engine);
void intel_engine_init_breadcrumbs(struct intel_engine_cs *engine);
void intel_engine_fini_breadcrumbs(struct intel_engine_cs *engine);
void intel_engine_disarm_breadcrumbs(struct intel_engine_cs *engine);
static inline void
intel_engine_signal_breadcrumbs(struct intel_engine_cs *engine)
{
irq_work_queue(&engine->breadcrumbs.irq_work);
}
void intel_engine_reset_breadcrumbs(struct intel_engine_cs *engine);
void intel_engine_fini_breadcrumbs(struct intel_engine_cs *engine);
void intel_engine_transfer_stale_breadcrumbs(struct intel_engine_cs *engine,
struct intel_context *ce);
void intel_engine_print_breadcrumbs(struct intel_engine_cs *engine,
struct drm_printer *p);
static inline u32 *__gen8_emit_pipe_control(u32 *batch, u32 flags0, u32 flags1, u32 offset)
{
memset(batch, 0, 6 * sizeof(u32));

View file

@ -28,6 +28,7 @@
#include "i915_drv.h"
#include "intel_breadcrumbs.h"
#include "intel_context.h"
#include "intel_engine.h"
#include "intel_engine_pm.h"
@ -634,7 +635,7 @@ static int pin_ggtt_status_page(struct intel_engine_cs *engine,
else
flags = PIN_HIGH;
return i915_ggtt_pin(vma, 0, flags);
return i915_ggtt_pin(vma, NULL, 0, flags);
}
static int init_status_page(struct intel_engine_cs *engine)
@ -700,8 +701,13 @@ static int engine_setup_common(struct intel_engine_cs *engine)
if (err)
return err;
engine->breadcrumbs = intel_breadcrumbs_create(engine);
if (!engine->breadcrumbs) {
err = -ENOMEM;
goto err_status;
}
intel_engine_init_active(engine, ENGINE_PHYSICAL);
intel_engine_init_breadcrumbs(engine);
intel_engine_init_execlists(engine);
intel_engine_init_cmd_parser(engine);
intel_engine_init__pm(engine);
@ -716,6 +722,10 @@ static int engine_setup_common(struct intel_engine_cs *engine)
intel_engine_init_ctx_wa(engine);
return 0;
err_status:
cleanup_status_page(engine);
return err;
}
struct measure_breadcrumb {
@ -785,9 +795,11 @@ intel_engine_init_active(struct intel_engine_cs *engine, unsigned int subclass)
}
static struct intel_context *
create_kernel_context(struct intel_engine_cs *engine)
create_pinned_context(struct intel_engine_cs *engine,
unsigned int hwsp,
struct lock_class_key *key,
const char *name)
{
static struct lock_class_key kernel;
struct intel_context *ce;
int err;
@ -796,6 +808,7 @@ create_kernel_context(struct intel_engine_cs *engine)
return ce;
__set_bit(CONTEXT_BARRIER_BIT, &ce->flags);
ce->timeline = page_pack_bits(NULL, hwsp);
err = intel_context_pin(ce); /* perma-pin so it is always available */
if (err) {
@ -809,11 +822,20 @@ create_kernel_context(struct intel_engine_cs *engine)
* should we need to inject GPU operations during their request
* construction.
*/
lockdep_set_class(&ce->timeline->mutex, &kernel);
lockdep_set_class_and_name(&ce->timeline->mutex, key, name);
return ce;
}
static struct intel_context *
create_kernel_context(struct intel_engine_cs *engine)
{
static struct lock_class_key kernel;
return create_pinned_context(engine, I915_GEM_HWS_SEQNO_ADDR,
&kernel, "kernel_context");
}
/**
* intel_engines_init_common - initialize cengine state which might require hw access
* @engine: Engine to initialize.
@ -902,9 +924,9 @@ void intel_engine_cleanup_common(struct intel_engine_cs *engine)
tasklet_kill(&engine->execlists.tasklet); /* flush the callback */
cleanup_status_page(engine);
intel_breadcrumbs_free(engine->breadcrumbs);
intel_engine_fini_retire(engine);
intel_engine_fini_breadcrumbs(engine);
intel_engine_cleanup_cmd_parser(engine);
if (engine->default_state)

View file

@ -6,6 +6,7 @@
#include "i915_drv.h"
#include "intel_breadcrumbs.h"
#include "intel_context.h"
#include "intel_engine.h"
#include "intel_engine_heartbeat.h"
@ -247,7 +248,7 @@ static int __engine_park(struct intel_wakeref *wf)
call_idle_barriers(engine); /* cleanup after wedging */
intel_engine_park_heartbeat(engine);
intel_engine_disarm_breadcrumbs(engine);
intel_breadcrumbs_park(engine->breadcrumbs);
/* Must be reset upon idling, or we may miss the busy wakeup. */
GEM_BUG_ON(engine->execlists.queue_priority_hint != INT_MIN);

View file

@ -22,6 +22,7 @@
#include "i915_pmu.h"
#include "i915_priolist_types.h"
#include "i915_selftest.h"
#include "intel_breadcrumbs_types.h"
#include "intel_sseu.h"
#include "intel_timeline_types.h"
#include "intel_uncore.h"
@ -373,34 +374,8 @@ struct intel_engine_cs {
*/
struct ewma__engine_latency latency;
/* Rather than have every client wait upon all user interrupts,
* with the herd waking after every interrupt and each doing the
* heavyweight seqno dance, we delegate the task (of being the
* bottom-half of the user interrupt) to the first client. After
* every interrupt, we wake up one client, who does the heavyweight
* coherent seqno read and either goes back to sleep (if incomplete),
* or wakes up all the completed clients in parallel, before then
* transferring the bottom-half status to the next client in the queue.
*
* Compared to walking the entire list of waiters in a single dedicated
* bottom-half, we reduce the latency of the first waiter by avoiding
* a context switch, but incur additional coherent seqno reads when
* following the chain of request breadcrumbs. Since it is most likely
* that we have a single client waiting on each seqno, then reducing
* the overhead of waking that client is much preferred.
*/
struct intel_breadcrumbs {
spinlock_t irq_lock;
struct list_head signalers;
struct list_head signaled_requests;
struct irq_work irq_work; /* for use from inside irq_lock */
unsigned int irq_enabled;
bool irq_armed;
} breadcrumbs;
/* Keep track of all the seqno used, a trail of breadcrumbs */
struct intel_breadcrumbs *breadcrumbs;
struct intel_engine_pmu {
/**

View file

@ -78,8 +78,6 @@ int i915_ggtt_init_hw(struct drm_i915_private *i915)
{
int ret;
stash_init(&i915->mm.wc_stash);
/*
* Note that we use page colouring to enforce a guard page at the
* end of the address space. This is required as the CS may prefetch
@ -232,7 +230,7 @@ static void gen8_ggtt_insert_entries(struct i915_address_space *vm,
/* Fill the allocated but "unused" space beyond the end of the buffer */
while (gte < end)
gen8_set_pte(gte++, vm->scratch[0].encode);
gen8_set_pte(gte++, vm->scratch[0]->encode);
/*
* We want to flush the TLBs only after we're certain all the PTE
@ -283,7 +281,7 @@ static void gen6_ggtt_insert_entries(struct i915_address_space *vm,
/* Fill the allocated but "unused" space beyond the end of the buffer */
while (gte < end)
iowrite32(vm->scratch[0].encode, gte++);
iowrite32(vm->scratch[0]->encode, gte++);
/*
* We want to flush the TLBs only after we're certain all the PTE
@ -303,7 +301,7 @@ static void gen8_ggtt_clear_range(struct i915_address_space *vm,
struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm);
unsigned int first_entry = start / I915_GTT_PAGE_SIZE;
unsigned int num_entries = length / I915_GTT_PAGE_SIZE;
const gen8_pte_t scratch_pte = vm->scratch[0].encode;
const gen8_pte_t scratch_pte = vm->scratch[0]->encode;
gen8_pte_t __iomem *gtt_base =
(gen8_pte_t __iomem *)ggtt->gsm + first_entry;
const int max_entries = ggtt_total_entries(ggtt) - first_entry;
@ -401,7 +399,7 @@ static void gen6_ggtt_clear_range(struct i915_address_space *vm,
first_entry, num_entries, max_entries))
num_entries = max_entries;
scratch_pte = vm->scratch[0].encode;
scratch_pte = vm->scratch[0]->encode;
for (i = 0; i < num_entries; i++)
iowrite32(scratch_pte, &gtt_base[i]);
}
@ -436,16 +434,17 @@ static void i915_ggtt_clear_range(struct i915_address_space *vm,
intel_gtt_clear_range(start >> PAGE_SHIFT, length >> PAGE_SHIFT);
}
static int ggtt_bind_vma(struct i915_address_space *vm,
struct i915_vma *vma,
enum i915_cache_level cache_level,
u32 flags)
static void ggtt_bind_vma(struct i915_address_space *vm,
struct i915_vm_pt_stash *stash,
struct i915_vma *vma,
enum i915_cache_level cache_level,
u32 flags)
{
struct drm_i915_gem_object *obj = vma->obj;
u32 pte_flags;
if (i915_vma_is_bound(vma, ~flags & I915_VMA_BIND_MASK))
return 0;
return;
/* Applicable to VLV (gen8+ do not support RO in the GGTT) */
pte_flags = 0;
@ -454,8 +453,6 @@ static int ggtt_bind_vma(struct i915_address_space *vm,
vm->insert_entries(vm, vma, cache_level, pte_flags);
vma->page_sizes.gtt = I915_GTT_PAGE_SIZE;
return 0;
}
static void ggtt_unbind_vma(struct i915_address_space *vm, struct i915_vma *vma)
@ -568,31 +565,25 @@ err:
return ret;
}
static int aliasing_gtt_bind_vma(struct i915_address_space *vm,
struct i915_vma *vma,
enum i915_cache_level cache_level,
u32 flags)
static void aliasing_gtt_bind_vma(struct i915_address_space *vm,
struct i915_vm_pt_stash *stash,
struct i915_vma *vma,
enum i915_cache_level cache_level,
u32 flags)
{
u32 pte_flags;
int ret;
/* Currently applicable only to VLV */
pte_flags = 0;
if (i915_gem_object_is_readonly(vma->obj))
pte_flags |= PTE_READ_ONLY;
if (flags & I915_VMA_LOCAL_BIND) {
struct i915_ppgtt *alias = i915_vm_to_ggtt(vm)->alias;
ret = ppgtt_bind_vma(&alias->vm, vma, cache_level, flags);
if (ret)
return ret;
}
if (flags & I915_VMA_LOCAL_BIND)
ppgtt_bind_vma(&i915_vm_to_ggtt(vm)->alias->vm,
stash, vma, cache_level, flags);
if (flags & I915_VMA_GLOBAL_BIND)
vm->insert_entries(vm, vma, cache_level, pte_flags);
return 0;
}
static void aliasing_gtt_unbind_vma(struct i915_address_space *vm,
@ -607,6 +598,7 @@ static void aliasing_gtt_unbind_vma(struct i915_address_space *vm,
static int init_aliasing_ppgtt(struct i915_ggtt *ggtt)
{
struct i915_vm_pt_stash stash = {};
struct i915_ppgtt *ppgtt;
int err;
@ -619,15 +611,21 @@ static int init_aliasing_ppgtt(struct i915_ggtt *ggtt)
goto err_ppgtt;
}
err = i915_vm_alloc_pt_stash(&ppgtt->vm, &stash, ggtt->vm.total);
if (err)
goto err_ppgtt;
err = i915_vm_pin_pt_stash(&ppgtt->vm, &stash);
if (err)
goto err_stash;
/*
* Note we only pre-allocate as far as the end of the global
* GTT. On 48b / 4-level page-tables, the difference is very,
* very significant! We have to preallocate as GVT/vgpu does
* not like the page directory disappearing.
*/
err = ppgtt->vm.allocate_va_range(&ppgtt->vm, 0, ggtt->vm.total);
if (err)
goto err_ppgtt;
ppgtt->vm.allocate_va_range(&ppgtt->vm, &stash, 0, ggtt->vm.total);
ggtt->alias = ppgtt;
ggtt->vm.bind_async_flags |= ppgtt->vm.bind_async_flags;
@ -638,8 +636,11 @@ static int init_aliasing_ppgtt(struct i915_ggtt *ggtt)
GEM_BUG_ON(ggtt->vm.vma_ops.unbind_vma != ggtt_unbind_vma);
ggtt->vm.vma_ops.unbind_vma = aliasing_gtt_unbind_vma;
i915_vm_free_pt_stash(&ppgtt->vm, &stash);
return 0;
err_stash:
i915_vm_free_pt_stash(&ppgtt->vm, &stash);
err_ppgtt:
i915_vm_put(&ppgtt->vm);
return err;
@ -715,18 +716,11 @@ static void ggtt_cleanup_hw(struct i915_ggtt *ggtt)
void i915_ggtt_driver_release(struct drm_i915_private *i915)
{
struct i915_ggtt *ggtt = &i915->ggtt;
struct pagevec *pvec;
fini_aliasing_ppgtt(ggtt);
intel_ggtt_fini_fences(ggtt);
ggtt_cleanup_hw(ggtt);
pvec = &i915->mm.wc_stash.pvec;
if (pvec->nr) {
set_pages_array_wb(pvec->pages, pvec->nr);
__pagevec_release(pvec);
}
}
static unsigned int gen6_get_total_gtt_size(u16 snb_gmch_ctl)
@ -789,7 +783,7 @@ static int ggtt_probe_common(struct i915_ggtt *ggtt, u64 size)
return -ENOMEM;
}
ret = setup_scratch_page(&ggtt->vm, GFP_DMA32);
ret = setup_scratch_page(&ggtt->vm);
if (ret) {
drm_err(&i915->drm, "Scratch setup failed\n");
/* iounmap will also get called at remove, but meh */
@ -797,8 +791,8 @@ static int ggtt_probe_common(struct i915_ggtt *ggtt, u64 size)
return ret;
}
ggtt->vm.scratch[0].encode =
ggtt->vm.pte_encode(px_dma(&ggtt->vm.scratch[0]),
ggtt->vm.scratch[0]->encode =
ggtt->vm.pte_encode(px_dma(ggtt->vm.scratch[0]),
I915_CACHE_NONE, 0);
return 0;
@ -824,7 +818,7 @@ static void gen6_gmch_remove(struct i915_address_space *vm)
struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm);
iounmap(ggtt->gsm);
cleanup_scratch_page(vm);
free_scratch(vm);
}
static struct resource pci_resource(struct pci_dev *pdev, int bar)
@ -852,6 +846,8 @@ static int gen8_gmch_probe(struct i915_ggtt *ggtt)
else
size = gen8_get_total_gtt_size(snb_gmch_ctl);
ggtt->vm.alloc_pt_dma = alloc_pt_dma;
ggtt->vm.total = (size / sizeof(gen8_pte_t)) * I915_GTT_PAGE_SIZE;
ggtt->vm.cleanup = gen6_gmch_remove;
ggtt->vm.insert_page = gen8_ggtt_insert_page;
@ -1000,6 +996,8 @@ static int gen6_gmch_probe(struct i915_ggtt *ggtt)
size = gen6_get_total_gtt_size(snb_gmch_ctl);
ggtt->vm.total = (size / sizeof(gen6_pte_t)) * I915_GTT_PAGE_SIZE;
ggtt->vm.alloc_pt_dma = alloc_pt_dma;
ggtt->vm.clear_range = nop_clear_range;
if (!HAS_FULL_PPGTT(i915) || intel_scanout_needs_vtd_wa(i915))
ggtt->vm.clear_range = gen6_ggtt_clear_range;
@ -1050,6 +1048,8 @@ static int i915_gmch_probe(struct i915_ggtt *ggtt)
ggtt->gmadr =
(struct resource)DEFINE_RES_MEM(gmadr_base, ggtt->mappable_end);
ggtt->vm.alloc_pt_dma = alloc_pt_dma;
ggtt->do_idle_maps = needs_idle_maps(i915);
ggtt->vm.insert_page = i915_ggtt_insert_page;
ggtt->vm.insert_entries = i915_ggtt_insert_entries;
@ -1165,11 +1165,6 @@ void i915_ggtt_disable_guc(struct i915_ggtt *ggtt)
ggtt->invalidate(ggtt);
}
static unsigned int clear_bind(struct i915_vma *vma)
{
return atomic_fetch_and(~I915_VMA_BIND_MASK, &vma->flags);
}
void i915_ggtt_resume(struct i915_ggtt *ggtt)
{
struct i915_vma *vma;
@ -1187,11 +1182,13 @@ void i915_ggtt_resume(struct i915_ggtt *ggtt)
/* clflush objects bound into the GGTT and rebind them. */
list_for_each_entry(vma, &ggtt->vm.bound_list, vm_link) {
struct drm_i915_gem_object *obj = vma->obj;
unsigned int was_bound = clear_bind(vma);
unsigned int was_bound =
atomic_read(&vma->flags) & I915_VMA_BIND_MASK;
WARN_ON(i915_vma_bind(vma,
obj ? obj->cache_level : 0,
was_bound, NULL));
GEM_BUG_ON(!was_bound);
vma->ops->bind_vma(&ggtt->vm, NULL, vma,
obj ? obj->cache_level : 0,
was_bound);
if (obj) { /* only used during resume => exclusive access */
flush |= fetch_and_zero(&obj->write_domain);
obj->read_domains |= I915_GEM_DOMAIN_GTT;

View file

@ -356,7 +356,7 @@ static int intel_gt_init_scratch(struct intel_gt *gt, unsigned int size)
goto err_unref;
}
ret = i915_ggtt_pin(vma, 0, PIN_HIGH);
ret = i915_ggtt_pin(vma, NULL, 0, PIN_HIGH);
if (ret)
goto err_unref;
@ -406,21 +406,20 @@ static int __engines_record_defaults(struct intel_gt *gt)
/* We must be able to switch to something! */
GEM_BUG_ON(!engine->kernel_context);
err = intel_renderstate_init(&so, engine);
if (err)
goto out;
ce = intel_context_create(engine);
if (IS_ERR(ce)) {
err = PTR_ERR(ce);
goto out;
}
rq = intel_context_create_request(ce);
err = intel_renderstate_init(&so, ce);
if (err)
goto err;
rq = i915_request_create(ce);
if (IS_ERR(rq)) {
err = PTR_ERR(rq);
intel_context_put(ce);
goto out;
goto err_fini;
}
err = intel_engine_emit_ctx_wa(rq);
@ -434,9 +433,13 @@ static int __engines_record_defaults(struct intel_gt *gt)
err_rq:
requests[id] = i915_request_get(rq);
i915_request_add(rq);
intel_renderstate_fini(&so);
if (err)
err_fini:
intel_renderstate_fini(&so, ce);
err:
if (err) {
intel_context_put(ce);
goto out;
}
}
/* Flush the default context image to memory, and enable powersaving. */

View file

@ -35,39 +35,65 @@ static void node_free(struct intel_gt_buffer_pool_node *node)
{
i915_gem_object_put(node->obj);
i915_active_fini(&node->active);
kfree(node);
kfree_rcu(node, rcu);
}
static bool pool_free_older_than(struct intel_gt_buffer_pool *pool, long keep)
{
struct intel_gt_buffer_pool_node *node, *stale = NULL;
bool active = false;
int n;
/* Free buffers that have not been used in the past second */
for (n = 0; n < ARRAY_SIZE(pool->cache_list); n++) {
struct list_head *list = &pool->cache_list[n];
if (list_empty(list))
continue;
if (spin_trylock_irq(&pool->lock)) {
struct list_head *pos;
/* Most recent at head; oldest at tail */
list_for_each_prev(pos, list) {
unsigned long age;
node = list_entry(pos, typeof(*node), link);
age = READ_ONCE(node->age);
if (!age || jiffies - age < keep)
break;
/* Check we are the first to claim this node */
if (!xchg(&node->age, 0))
break;
node->free = stale;
stale = node;
}
if (!list_is_last(pos, list))
__list_del_many(pos, list);
spin_unlock_irq(&pool->lock);
}
active |= !list_empty(list);
}
while ((node = stale)) {
stale = stale->free;
node_free(node);
}
return active;
}
static void pool_free_work(struct work_struct *wrk)
{
struct intel_gt_buffer_pool *pool =
container_of(wrk, typeof(*pool), work.work);
struct intel_gt_buffer_pool_node *node, *next;
unsigned long old = jiffies - HZ;
bool active = false;
LIST_HEAD(stale);
int n;
/* Free buffers that have not been used in the past second */
spin_lock_irq(&pool->lock);
for (n = 0; n < ARRAY_SIZE(pool->cache_list); n++) {
struct list_head *list = &pool->cache_list[n];
/* Most recent at head; oldest at tail */
list_for_each_entry_safe_reverse(node, next, list, link) {
if (time_before(node->age, old))
break;
list_move(&node->link, &stale);
}
active |= !list_empty(list);
}
spin_unlock_irq(&pool->lock);
list_for_each_entry_safe(node, next, &stale, link)
node_free(node);
if (active)
if (pool_free_older_than(pool, HZ))
schedule_delayed_work(&pool->work,
round_jiffies_up_relative(HZ));
}
@ -109,8 +135,8 @@ static void pool_retire(struct i915_active *ref)
i915_gem_object_make_purgeable(node->obj);
spin_lock_irqsave(&pool->lock, flags);
node->age = jiffies;
list_add(&node->link, list);
list_add_rcu(&node->link, list);
WRITE_ONCE(node->age, jiffies ?: 1); /* 0 reserved for active nodes */
spin_unlock_irqrestore(&pool->lock, flags);
schedule_delayed_work(&pool->work,
@ -151,20 +177,30 @@ intel_gt_get_buffer_pool(struct intel_gt *gt, size_t size)
struct intel_gt_buffer_pool *pool = &gt->buffer_pool;
struct intel_gt_buffer_pool_node *node;
struct list_head *list;
unsigned long flags;
int ret;
size = PAGE_ALIGN(size);
list = bucket_for_size(pool, size);
spin_lock_irqsave(&pool->lock, flags);
list_for_each_entry(node, list, link) {
rcu_read_lock();
list_for_each_entry_rcu(node, list, link) {
unsigned long age;
if (node->obj->base.size < size)
continue;
list_del(&node->link);
break;
age = READ_ONCE(node->age);
if (!age)
continue;
if (cmpxchg(&node->age, age, 0) == age) {
spin_lock_irq(&pool->lock);
list_del_rcu(&node->link);
spin_unlock_irq(&pool->lock);
break;
}
}
spin_unlock_irqrestore(&pool->lock, flags);
rcu_read_unlock();
if (&node->link == list) {
node = node_create(pool, size);
@ -192,28 +228,13 @@ void intel_gt_init_buffer_pool(struct intel_gt *gt)
INIT_DELAYED_WORK(&pool->work, pool_free_work);
}
static void pool_free_imm(struct intel_gt_buffer_pool *pool)
{
int n;
spin_lock_irq(&pool->lock);
for (n = 0; n < ARRAY_SIZE(pool->cache_list); n++) {
struct intel_gt_buffer_pool_node *node, *next;
struct list_head *list = &pool->cache_list[n];
list_for_each_entry_safe(node, next, list, link)
node_free(node);
INIT_LIST_HEAD(list);
}
spin_unlock_irq(&pool->lock);
}
void intel_gt_flush_buffer_pool(struct intel_gt *gt)
{
struct intel_gt_buffer_pool *pool = &gt->buffer_pool;
do {
pool_free_imm(pool);
while (pool_free_older_than(pool, 0))
;
} while (cancel_delayed_work_sync(&pool->work));
}

View file

@ -25,7 +25,11 @@ struct intel_gt_buffer_pool_node {
struct i915_active active;
struct drm_i915_gem_object *obj;
struct list_head link;
struct intel_gt_buffer_pool *pool;
union {
struct intel_gt_buffer_pool *pool;
struct intel_gt_buffer_pool_node *free;
struct rcu_head rcu;
};
unsigned long age;
};

View file

@ -8,6 +8,7 @@
#include "i915_drv.h"
#include "i915_irq.h"
#include "intel_breadcrumbs.h"
#include "intel_gt.h"
#include "intel_gt_irq.h"
#include "intel_uncore.h"

View file

@ -11,160 +11,24 @@
#include "intel_gt.h"
#include "intel_gtt.h"
void stash_init(struct pagestash *stash)
struct drm_i915_gem_object *alloc_pt_dma(struct i915_address_space *vm, int sz)
{
pagevec_init(&stash->pvec);
spin_lock_init(&stash->lock);
}
static struct page *stash_pop_page(struct pagestash *stash)
{
struct page *page = NULL;
spin_lock(&stash->lock);
if (likely(stash->pvec.nr))
page = stash->pvec.pages[--stash->pvec.nr];
spin_unlock(&stash->lock);
return page;
}
static void stash_push_pagevec(struct pagestash *stash, struct pagevec *pvec)
{
unsigned int nr;
spin_lock_nested(&stash->lock, SINGLE_DEPTH_NESTING);
nr = min_t(typeof(nr), pvec->nr, pagevec_space(&stash->pvec));
memcpy(stash->pvec.pages + stash->pvec.nr,
pvec->pages + pvec->nr - nr,
sizeof(pvec->pages[0]) * nr);
stash->pvec.nr += nr;
spin_unlock(&stash->lock);
pvec->nr -= nr;
}
static struct page *vm_alloc_page(struct i915_address_space *vm, gfp_t gfp)
{
struct pagevec stack;
struct page *page;
if (I915_SELFTEST_ONLY(should_fail(&vm->fault_attr, 1)))
i915_gem_shrink_all(vm->i915);
page = stash_pop_page(&vm->free_pages);
if (page)
return page;
if (!vm->pt_kmap_wc)
return alloc_page(gfp);
/* Look in our global stash of WC pages... */
page = stash_pop_page(&vm->i915->mm.wc_stash);
if (page)
return page;
/*
* Otherwise batch allocate pages to amortize cost of set_pages_wc.
*
* We have to be careful as page allocation may trigger the shrinker
* (via direct reclaim) which will fill up the WC stash underneath us.
* So we add our WB pages into a temporary pvec on the stack and merge
* them into the WC stash after all the allocations are complete.
*/
pagevec_init(&stack);
do {
struct page *page;
page = alloc_page(gfp);
if (unlikely(!page))
break;
stack.pages[stack.nr++] = page;
} while (pagevec_space(&stack));
if (stack.nr && !set_pages_array_wc(stack.pages, stack.nr)) {
page = stack.pages[--stack.nr];
/* Merge spare WC pages to the global stash */
if (stack.nr)
stash_push_pagevec(&vm->i915->mm.wc_stash, &stack);
/* Push any surplus WC pages onto the local VM stash */
if (stack.nr)
stash_push_pagevec(&vm->free_pages, &stack);
}
/* Return unwanted leftovers */
if (unlikely(stack.nr)) {
WARN_ON_ONCE(set_pages_array_wb(stack.pages, stack.nr));
__pagevec_release(&stack);
}
return page;
return i915_gem_object_create_internal(vm->i915, sz);
}
static void vm_free_pages_release(struct i915_address_space *vm,
bool immediate)
int pin_pt_dma(struct i915_address_space *vm, struct drm_i915_gem_object *obj)
{
struct pagevec *pvec = &vm->free_pages.pvec;
struct pagevec stack;
int err;
lockdep_assert_held(&vm->free_pages.lock);
GEM_BUG_ON(!pagevec_count(pvec));
err = i915_gem_object_pin_pages(obj);
if (err)
return err;
if (vm->pt_kmap_wc) {
/*
* When we use WC, first fill up the global stash and then
* only if full immediately free the overflow.
*/
stash_push_pagevec(&vm->i915->mm.wc_stash, pvec);
/*
* As we have made some room in the VM's free_pages,
* we can wait for it to fill again. Unless we are
* inside i915_address_space_fini() and must
* immediately release the pages!
*/
if (pvec->nr <= (immediate ? 0 : PAGEVEC_SIZE - 1))
return;
/*
* We have to drop the lock to allow ourselves to sleep,
* so take a copy of the pvec and clear the stash for
* others to use it as we sleep.
*/
stack = *pvec;
pagevec_reinit(pvec);
spin_unlock(&vm->free_pages.lock);
pvec = &stack;
set_pages_array_wb(pvec->pages, pvec->nr);
spin_lock(&vm->free_pages.lock);
}
__pagevec_release(pvec);
}
static void vm_free_page(struct i915_address_space *vm, struct page *page)
{
/*
* On !llc, we need to change the pages back to WB. We only do so
* in bulk, so we rarely need to change the page attributes here,
* but doing so requires a stop_machine() from deep inside arch/x86/mm.
* To make detection of the possible sleep more likely, use an
* unconditional might_sleep() for everybody.
*/
might_sleep();
spin_lock(&vm->free_pages.lock);
while (!pagevec_space(&vm->free_pages.pvec))
vm_free_pages_release(vm, false);
GEM_BUG_ON(pagevec_count(&vm->free_pages.pvec) >= PAGEVEC_SIZE);
pagevec_add(&vm->free_pages.pvec, page);
spin_unlock(&vm->free_pages.lock);
i915_gem_object_make_unshrinkable(obj);
return 0;
}
void __i915_vm_close(struct i915_address_space *vm)
@ -194,14 +58,7 @@ void __i915_vm_close(struct i915_address_space *vm)
void i915_address_space_fini(struct i915_address_space *vm)
{
spin_lock(&vm->free_pages.lock);
if (pagevec_count(&vm->free_pages.pvec))
vm_free_pages_release(vm, true);
GEM_BUG_ON(pagevec_count(&vm->free_pages.pvec));
spin_unlock(&vm->free_pages.lock);
drm_mm_takedown(&vm->mm);
mutex_destroy(&vm->mutex);
}
@ -246,8 +103,6 @@ void i915_address_space_init(struct i915_address_space *vm, int subclass)
drm_mm_init(&vm->mm, 0, vm->total);
vm->mm.head_node.color = I915_COLOR_UNEVICTABLE;
stash_init(&vm->free_pages);
INIT_LIST_HEAD(&vm->bound_list);
}
@ -264,64 +119,50 @@ void clear_pages(struct i915_vma *vma)
memset(&vma->page_sizes, 0, sizeof(vma->page_sizes));
}
static int __setup_page_dma(struct i915_address_space *vm,
struct i915_page_dma *p,
gfp_t gfp)
dma_addr_t __px_dma(struct drm_i915_gem_object *p)
{
p->page = vm_alloc_page(vm, gfp | I915_GFP_ALLOW_FAIL);
if (unlikely(!p->page))
return -ENOMEM;
p->daddr = dma_map_page_attrs(vm->dma,
p->page, 0, PAGE_SIZE,
PCI_DMA_BIDIRECTIONAL,
DMA_ATTR_SKIP_CPU_SYNC |
DMA_ATTR_NO_WARN);
if (unlikely(dma_mapping_error(vm->dma, p->daddr))) {
vm_free_page(vm, p->page);
return -ENOMEM;
}
return 0;
GEM_BUG_ON(!i915_gem_object_has_pages(p));
return sg_dma_address(p->mm.pages->sgl);
}
int setup_page_dma(struct i915_address_space *vm, struct i915_page_dma *p)
struct page *__px_page(struct drm_i915_gem_object *p)
{
return __setup_page_dma(vm, p, __GFP_HIGHMEM);
}
void cleanup_page_dma(struct i915_address_space *vm, struct i915_page_dma *p)
{
dma_unmap_page(vm->dma, p->daddr, PAGE_SIZE, PCI_DMA_BIDIRECTIONAL);
vm_free_page(vm, p->page);
GEM_BUG_ON(!i915_gem_object_has_pages(p));
return sg_page(p->mm.pages->sgl);
}
void
fill_page_dma(const struct i915_page_dma *p, const u64 val, unsigned int count)
fill_page_dma(struct drm_i915_gem_object *p, const u64 val, unsigned int count)
{
kunmap_atomic(memset64(kmap_atomic(p->page), val, count));
struct page *page = __px_page(p);
void *vaddr;
vaddr = kmap(page);
memset64(vaddr, val, count);
clflush_cache_range(vaddr, PAGE_SIZE);
kunmap(page);
}
static void poison_scratch_page(struct page *page, unsigned long size)
static void poison_scratch_page(struct drm_i915_gem_object *scratch)
{
if (!IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM))
return;
struct sgt_iter sgt;
struct page *page;
u8 val;
GEM_BUG_ON(!IS_ALIGNED(size, PAGE_SIZE));
val = 0;
if (IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM))
val = POISON_FREE;
do {
for_each_sgt_page(page, sgt, scratch->mm.pages) {
void *vaddr;
vaddr = kmap(page);
memset(vaddr, POISON_FREE, PAGE_SIZE);
memset(vaddr, val, PAGE_SIZE);
kunmap(page);
page = pfn_to_page(page_to_pfn(page) + 1);
size -= PAGE_SIZE;
} while (size);
}
}
int setup_scratch_page(struct i915_address_space *vm, gfp_t gfp)
int setup_scratch_page(struct i915_address_space *vm)
{
unsigned long size;
@ -338,21 +179,27 @@ int setup_scratch_page(struct i915_address_space *vm, gfp_t gfp)
*/
size = I915_GTT_PAGE_SIZE_4K;
if (i915_vm_is_4lvl(vm) &&
HAS_PAGE_SIZES(vm->i915, I915_GTT_PAGE_SIZE_64K)) {
HAS_PAGE_SIZES(vm->i915, I915_GTT_PAGE_SIZE_64K))
size = I915_GTT_PAGE_SIZE_64K;
gfp |= __GFP_NOWARN;
}
gfp |= __GFP_ZERO | __GFP_RETRY_MAYFAIL;
do {
unsigned int order = get_order(size);
struct page *page;
dma_addr_t addr;
struct drm_i915_gem_object *obj;
page = alloc_pages(gfp, order);
if (unlikely(!page))
obj = vm->alloc_pt_dma(vm, size);
if (IS_ERR(obj))
goto skip;
if (pin_pt_dma(vm, obj))
goto skip_obj;
/* We need a single contiguous page for our scratch */
if (obj->mm.page_sizes.sg < size)
goto skip_obj;
/* And it needs to be correspondingly aligned */
if (__px_dma(obj) & (size - 1))
goto skip_obj;
/*
* Use a non-zero scratch page for debugging.
*
@ -362,61 +209,28 @@ int setup_scratch_page(struct i915_address_space *vm, gfp_t gfp)
* should it ever be accidentally used, the effect should be
* fairly benign.
*/
poison_scratch_page(page, size);
poison_scratch_page(obj);
addr = dma_map_page_attrs(vm->dma,
page, 0, size,
PCI_DMA_BIDIRECTIONAL,
DMA_ATTR_SKIP_CPU_SYNC |
DMA_ATTR_NO_WARN);
if (unlikely(dma_mapping_error(vm->dma, addr)))
goto free_page;
if (unlikely(!IS_ALIGNED(addr, size)))
goto unmap_page;
vm->scratch[0].base.page = page;
vm->scratch[0].base.daddr = addr;
vm->scratch_order = order;
vm->scratch[0] = obj;
vm->scratch_order = get_order(size);
return 0;
unmap_page:
dma_unmap_page(vm->dma, addr, size, PCI_DMA_BIDIRECTIONAL);
free_page:
__free_pages(page, order);
skip_obj:
i915_gem_object_put(obj);
skip:
if (size == I915_GTT_PAGE_SIZE_4K)
return -ENOMEM;
size = I915_GTT_PAGE_SIZE_4K;
gfp &= ~__GFP_NOWARN;
} while (1);
}
void cleanup_scratch_page(struct i915_address_space *vm)
{
struct i915_page_dma *p = px_base(&vm->scratch[0]);
unsigned int order = vm->scratch_order;
dma_unmap_page(vm->dma, p->daddr, BIT(order) << PAGE_SHIFT,
PCI_DMA_BIDIRECTIONAL);
__free_pages(p->page, order);
}
void free_scratch(struct i915_address_space *vm)
{
int i;
if (!px_dma(&vm->scratch[0])) /* set to 0 on clones */
return;
for (i = 1; i <= vm->top; i++) {
if (!px_dma(&vm->scratch[i]))
break;
cleanup_page_dma(vm, px_base(&vm->scratch[i]));
}
cleanup_scratch_page(vm);
for (i = 0; i <= vm->top; i++)
i915_gem_object_put(vm->scratch[i]);
}
void gtt_write_workarounds(struct intel_gt *gt)

View file

@ -134,38 +134,29 @@ typedef u64 gen8_pte_t;
#define GEN8_PDE_IPS_64K BIT(11)
#define GEN8_PDE_PS_2M BIT(7)
enum i915_cache_level;
struct drm_i915_file_private;
struct drm_i915_gem_object;
struct i915_fence_reg;
struct i915_vma;
struct intel_gt;
#define for_each_sgt_daddr(__dp, __iter, __sgt) \
__for_each_sgt_daddr(__dp, __iter, __sgt, I915_GTT_PAGE_SIZE)
struct i915_page_dma {
struct page *page;
union {
dma_addr_t daddr;
/*
* For gen6/gen7 only. This is the offset in the GGTT
* where the page directory entries for PPGTT begin
*/
u32 ggtt_offset;
};
};
struct i915_page_scratch {
struct i915_page_dma base;
u64 encode;
};
struct i915_page_table {
struct i915_page_dma base;
atomic_t used;
struct drm_i915_gem_object *base;
union {
atomic_t used;
struct i915_page_table *stash;
};
};
struct i915_page_directory {
struct i915_page_table pt;
spinlock_t lock;
void *entry[512];
void **entry;
};
#define __px_choose_expr(x, type, expr, other) \
@ -176,12 +167,14 @@ struct i915_page_directory {
other)
#define px_base(px) \
__px_choose_expr(px, struct i915_page_dma *, __x, \
__px_choose_expr(px, struct i915_page_scratch *, &__x->base, \
__px_choose_expr(px, struct i915_page_table *, &__x->base, \
__px_choose_expr(px, struct i915_page_directory *, &__x->pt.base, \
(void)0))))
#define px_dma(px) (px_base(px)->daddr)
__px_choose_expr(px, struct drm_i915_gem_object *, __x, \
__px_choose_expr(px, struct i915_page_table *, __x->base, \
__px_choose_expr(px, struct i915_page_directory *, __x->pt.base, \
(void)0)))
struct page *__px_page(struct drm_i915_gem_object *p);
dma_addr_t __px_dma(struct drm_i915_gem_object *p);
#define px_dma(px) (__px_dma(px_base(px)))
#define px_pt(px) \
__px_choose_expr(px, struct i915_page_table *, __x, \
@ -189,19 +182,18 @@ struct i915_page_directory {
(void)0))
#define px_used(px) (&px_pt(px)->used)
enum i915_cache_level;
struct drm_i915_file_private;
struct drm_i915_gem_object;
struct i915_vma;
struct intel_gt;
struct i915_vm_pt_stash {
/* preallocated chains of page tables/directories */
struct i915_page_table *pt[2];
};
struct i915_vma_ops {
/* Map an object into an address space with the given cache flags. */
int (*bind_vma)(struct i915_address_space *vm,
struct i915_vma *vma,
enum i915_cache_level cache_level,
u32 flags);
void (*bind_vma)(struct i915_address_space *vm,
struct i915_vm_pt_stash *stash,
struct i915_vma *vma,
enum i915_cache_level cache_level,
u32 flags);
/*
* Unmap an object from an address space. This usually consists of
* setting the valid PTE entries to a reserved scratch page.
@ -213,13 +205,6 @@ struct i915_vma_ops {
void (*clear_pages)(struct i915_vma *vma);
};
struct pagestash {
spinlock_t lock;
struct pagevec pvec;
};
void stash_init(struct pagestash *stash);
struct i915_address_space {
struct kref ref;
struct rcu_work rcu;
@ -256,33 +241,33 @@ struct i915_address_space {
#define VM_CLASS_GGTT 0
#define VM_CLASS_PPGTT 1
struct i915_page_scratch scratch[4];
unsigned int scratch_order;
unsigned int top;
struct drm_i915_gem_object *scratch[4];
/**
* List of vma currently bound.
*/
struct list_head bound_list;
struct pagestash free_pages;
/* Global GTT */
bool is_ggtt:1;
/* Some systems require uncached updates of the page directories */
bool pt_kmap_wc:1;
/* Some systems support read-only mappings for GGTT and/or PPGTT */
bool has_read_only:1;
u8 top;
u8 pd_shift;
u8 scratch_order;
struct drm_i915_gem_object *
(*alloc_pt_dma)(struct i915_address_space *vm, int sz);
u64 (*pte_encode)(dma_addr_t addr,
enum i915_cache_level level,
u32 flags); /* Create a valid PTE */
#define PTE_READ_ONLY BIT(0)
int (*allocate_va_range)(struct i915_address_space *vm,
u64 start, u64 length);
void (*allocate_va_range)(struct i915_address_space *vm,
struct i915_vm_pt_stash *stash,
u64 start, u64 length);
void (*clear_range)(struct i915_address_space *vm,
u64 start, u64 length);
void (*insert_page)(struct i915_address_space *vm,
@ -490,9 +475,9 @@ i915_pd_entry(const struct i915_page_directory * const pdp,
static inline dma_addr_t
i915_page_dir_dma_addr(const struct i915_ppgtt *ppgtt, const unsigned int n)
{
struct i915_page_dma *pt = ppgtt->pd->entry[n];
struct i915_page_table *pt = ppgtt->pd->entry[n];
return px_dma(pt ?: px_base(&ppgtt->vm.scratch[ppgtt->vm.top]));
return __px_dma(pt ? px_base(pt) : ppgtt->vm.scratch[ppgtt->vm.top]);
}
void ppgtt_init(struct i915_ppgtt *ppgtt, struct intel_gt *gt);
@ -517,13 +502,10 @@ struct i915_ppgtt *i915_ppgtt_create(struct intel_gt *gt);
void i915_ggtt_suspend(struct i915_ggtt *gtt);
void i915_ggtt_resume(struct i915_ggtt *ggtt);
int setup_page_dma(struct i915_address_space *vm, struct i915_page_dma *p);
void cleanup_page_dma(struct i915_address_space *vm, struct i915_page_dma *p);
#define kmap_atomic_px(px) kmap_atomic(px_base(px)->page)
#define kmap_atomic_px(px) kmap_atomic(__px_page(px_base(px)))
void
fill_page_dma(const struct i915_page_dma *p, const u64 val, unsigned int count);
fill_page_dma(struct drm_i915_gem_object *p, const u64 val, unsigned int count);
#define fill_px(px, v) fill_page_dma(px_base(px), (v), PAGE_SIZE / sizeof(u64))
#define fill32_px(px, v) do { \
@ -531,47 +513,51 @@ fill_page_dma(const struct i915_page_dma *p, const u64 val, unsigned int count);
fill_px((px), v__ << 32 | v__); \
} while (0)
int setup_scratch_page(struct i915_address_space *vm, gfp_t gfp);
void cleanup_scratch_page(struct i915_address_space *vm);
int setup_scratch_page(struct i915_address_space *vm);
void free_scratch(struct i915_address_space *vm);
struct drm_i915_gem_object *alloc_pt_dma(struct i915_address_space *vm, int sz);
struct i915_page_table *alloc_pt(struct i915_address_space *vm);
struct i915_page_directory *alloc_pd(struct i915_address_space *vm);
struct i915_page_directory *__alloc_pd(size_t sz);
struct i915_page_directory *__alloc_pd(int npde);
void free_pd(struct i915_address_space *vm, struct i915_page_dma *pd);
int pin_pt_dma(struct i915_address_space *vm, struct drm_i915_gem_object *obj);
#define free_px(vm, px) free_pd(vm, px_base(px))
void free_px(struct i915_address_space *vm,
struct i915_page_table *pt, int lvl);
#define free_pt(vm, px) free_px(vm, px, 0)
#define free_pd(vm, px) free_px(vm, px_pt(px), 1)
void
__set_pd_entry(struct i915_page_directory * const pd,
const unsigned short idx,
struct i915_page_dma * const to,
struct i915_page_table *pt,
u64 (*encode)(const dma_addr_t, const enum i915_cache_level));
#define set_pd_entry(pd, idx, to) \
__set_pd_entry((pd), (idx), px_base(to), gen8_pde_encode)
__set_pd_entry((pd), (idx), px_pt(to), gen8_pde_encode)
void
clear_pd_entry(struct i915_page_directory * const pd,
const unsigned short idx,
const struct i915_page_scratch * const scratch);
const struct drm_i915_gem_object * const scratch);
bool
release_pd_entry(struct i915_page_directory * const pd,
const unsigned short idx,
struct i915_page_table * const pt,
const struct i915_page_scratch * const scratch);
const struct drm_i915_gem_object * const scratch);
void gen6_ggtt_invalidate(struct i915_ggtt *ggtt);
int ggtt_set_pages(struct i915_vma *vma);
int ppgtt_set_pages(struct i915_vma *vma);
void clear_pages(struct i915_vma *vma);
int ppgtt_bind_vma(struct i915_address_space *vm,
struct i915_vma *vma,
enum i915_cache_level cache_level,
u32 flags);
void ppgtt_bind_vma(struct i915_address_space *vm,
struct i915_vm_pt_stash *stash,
struct i915_vma *vma,
enum i915_cache_level cache_level,
u32 flags);
void ppgtt_unbind_vma(struct i915_address_space *vm,
struct i915_vma *vma);
@ -579,6 +565,14 @@ void gtt_write_workarounds(struct intel_gt *gt);
void setup_private_pat(struct intel_uncore *uncore);
int i915_vm_alloc_pt_stash(struct i915_address_space *vm,
struct i915_vm_pt_stash *stash,
u64 size);
int i915_vm_pin_pt_stash(struct i915_address_space *vm,
struct i915_vm_pt_stash *stash);
void i915_vm_free_pt_stash(struct i915_address_space *vm,
struct i915_vm_pt_stash *stash);
static inline struct sgt_dma {
struct scatterlist *sg;
dma_addr_t dma, max;

View file

@ -137,6 +137,7 @@
#include "i915_perf.h"
#include "i915_trace.h"
#include "i915_vgpu.h"
#include "intel_breadcrumbs.h"
#include "intel_context.h"
#include "intel_engine_pm.h"
#include "intel_gt.h"
@ -1148,20 +1149,6 @@ __unwind_incomplete_requests(struct intel_engine_cs *engine)
} else {
struct intel_engine_cs *owner = rq->context->engine;
/*
* Decouple the virtual breadcrumb before moving it
* back to the virtual engine -- we don't want the
* request to complete in the background and try
* and cancel the breadcrumb on the virtual engine
* (instead of the old engine where it is linked)!
*/
if (test_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT,
&rq->fence.flags)) {
spin_lock_nested(&rq->lock,
SINGLE_DEPTH_NESTING);
i915_request_cancel_breadcrumb(rq);
spin_unlock(&rq->lock);
}
WRITE_ONCE(rq->engine, owner);
owner->submit_request(rq);
active = NULL;
@ -1819,16 +1806,31 @@ static bool virtual_matches(const struct virtual_engine *ve,
return true;
}
static void virtual_xfer_breadcrumbs(struct virtual_engine *ve)
static void virtual_xfer_context(struct virtual_engine *ve,
struct intel_engine_cs *engine)
{
unsigned int n;
if (likely(engine == ve->siblings[0]))
return;
GEM_BUG_ON(READ_ONCE(ve->context.inflight));
if (!intel_engine_has_relative_mmio(engine))
virtual_update_register_offsets(ve->context.lrc_reg_state,
engine);
/*
* All the outstanding signals on ve->siblings[0] must have
* been completed, just pending the interrupt handler. As those
* signals still refer to the old sibling (via rq->engine), we must
* transfer those to the old irq_worker to keep our locking
* consistent.
* Move the bound engine to the top of the list for
* future execution. We then kick this tasklet first
* before checking others, so that we preferentially
* reuse this set of bound registers.
*/
intel_engine_transfer_stale_breadcrumbs(ve->siblings[0], &ve->context);
for (n = 1; n < ve->num_siblings; n++) {
if (ve->siblings[n] == engine) {
swap(ve->siblings[n], ve->siblings[0]);
break;
}
}
}
#define for_each_waiter(p__, rq__) \
@ -2060,6 +2062,14 @@ static inline void clear_ports(struct i915_request **ports, int count)
memset_p((void **)ports, NULL, count);
}
static inline void
copy_ports(struct i915_request **dst, struct i915_request **src, int count)
{
/* A memcpy_p() would be very useful here! */
while (count--)
WRITE_ONCE(*dst++, *src++); /* avoid write tearing */
}
static void execlists_dequeue(struct intel_engine_cs *engine)
{
struct intel_engine_execlists * const execlists = &engine->execlists;
@ -2271,38 +2281,23 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
GEM_BUG_ON(!(rq->execution_mask & engine->mask));
WRITE_ONCE(rq->engine, engine);
if (engine != ve->siblings[0]) {
u32 *regs = ve->context.lrc_reg_state;
unsigned int n;
GEM_BUG_ON(READ_ONCE(ve->context.inflight));
if (!intel_engine_has_relative_mmio(engine))
virtual_update_register_offsets(regs,
engine);
if (!list_empty(&ve->context.signals))
virtual_xfer_breadcrumbs(ve);
/*
* Move the bound engine to the top of the list
* for future execution. We then kick this
* tasklet first before checking others, so that
* we preferentially reuse this set of bound
* registers.
*/
for (n = 1; n < ve->num_siblings; n++) {
if (ve->siblings[n] == engine) {
swap(ve->siblings[n],
ve->siblings[0]);
break;
}
}
GEM_BUG_ON(ve->siblings[0] != engine);
}
if (__i915_request_submit(rq)) {
/*
* Only after we confirm that we will submit
* this request (i.e. it has not already
* completed), do we want to update the context.
*
* This serves two purposes. It avoids
* unnecessary work if we are resubmitting an
* already completed request after timeslicing.
* But more importantly, it prevents us altering
* ve->siblings[] on an idle context, where
* we may be using ve->siblings[] in
* virtual_context_enter / virtual_context_exit.
*/
virtual_xfer_context(ve, engine);
GEM_BUG_ON(ve->siblings[0] != engine);
submit = true;
last = rq;
}
@ -2648,10 +2643,9 @@ static void process_csb(struct intel_engine_cs *engine)
/* switch pending to inflight */
GEM_BUG_ON(!assert_pending_valid(execlists, "promote"));
memcpy(execlists->inflight,
execlists->pending,
execlists_num_ports(execlists) *
sizeof(*execlists->pending));
copy_ports(execlists->inflight,
execlists->pending,
execlists_num_ports(execlists));
smp_wmb(); /* complete the seqlock */
WRITE_ONCE(execlists->active, execlists->inflight);
@ -3309,7 +3303,10 @@ static void execlists_context_unpin(struct intel_context *ce)
{
check_redzone((void *)ce->lrc_reg_state - LRC_STATE_OFFSET,
ce->engine);
}
static void execlists_context_post_unpin(struct intel_context *ce)
{
i915_gem_object_unpin_map(ce->state->obj);
}
@ -3471,20 +3468,24 @@ __execlists_update_reg_state(const struct intel_context *ce,
}
static int
__execlists_context_pin(struct intel_context *ce,
struct intel_engine_cs *engine)
execlists_context_pre_pin(struct intel_context *ce,
struct i915_gem_ww_ctx *ww, void **vaddr)
{
void *vaddr;
GEM_BUG_ON(!ce->state);
GEM_BUG_ON(!i915_vma_is_pinned(ce->state));
vaddr = i915_gem_object_pin_map(ce->state->obj,
i915_coherent_map_type(engine->i915) |
*vaddr = i915_gem_object_pin_map(ce->state->obj,
i915_coherent_map_type(ce->engine->i915) |
I915_MAP_OVERRIDE);
if (IS_ERR(vaddr))
return PTR_ERR(vaddr);
return PTR_ERR_OR_ZERO(*vaddr);
}
static int
__execlists_context_pin(struct intel_context *ce,
struct intel_engine_cs *engine,
void *vaddr)
{
ce->lrc.lrca = lrc_descriptor(ce, engine) | CTX_DESC_FORCE_RESTORE;
ce->lrc_reg_state = vaddr + LRC_STATE_OFFSET;
__execlists_update_reg_state(ce, engine, ce->ring->tail);
@ -3492,9 +3493,9 @@ __execlists_context_pin(struct intel_context *ce,
return 0;
}
static int execlists_context_pin(struct intel_context *ce)
static int execlists_context_pin(struct intel_context *ce, void *vaddr)
{
return __execlists_context_pin(ce, ce->engine);
return __execlists_context_pin(ce, ce->engine, vaddr);
}
static int execlists_context_alloc(struct intel_context *ce)
@ -3520,8 +3521,10 @@ static void execlists_context_reset(struct intel_context *ce)
static const struct intel_context_ops execlists_context_ops = {
.alloc = execlists_context_alloc,
.pre_pin = execlists_context_pre_pin,
.pin = execlists_context_pin,
.unpin = execlists_context_unpin,
.post_unpin = execlists_context_post_unpin,
.enter = intel_context_enter_engine,
.exit = intel_context_exit_engine,
@ -3885,7 +3888,7 @@ static int lrc_setup_wa_ctx(struct intel_engine_cs *engine)
goto err;
}
err = i915_ggtt_pin(vma, 0, PIN_HIGH);
err = i915_ggtt_pin(vma, NULL, 0, PIN_HIGH);
if (err)
goto err;
@ -4126,7 +4129,7 @@ static int execlists_resume(struct intel_engine_cs *engine)
{
intel_mocs_init_engine(engine);
intel_engine_reset_breadcrumbs(engine);
intel_breadcrumbs_reset(engine->breadcrumbs);
if (GEM_SHOW_DEBUG() && unexpected_starting_state(engine)) {
struct drm_printer p = drm_debug_printer(__func__);
@ -4757,14 +4760,21 @@ static int gen12_emit_flush(struct i915_request *request, u32 mode)
intel_engine_mask_t aux_inv = 0;
u32 cmd, *cs;
cmd = 4;
if (mode & EMIT_INVALIDATE)
cmd += 2;
if (mode & EMIT_INVALIDATE)
aux_inv = request->engine->mask & ~BIT(BCS0);
if (aux_inv)
cmd += 2 * hweight8(aux_inv) + 2;
cs = intel_ring_begin(request,
4 + (aux_inv ? 2 * hweight8(aux_inv) + 2 : 0));
cs = intel_ring_begin(request, cmd);
if (IS_ERR(cs))
return PTR_ERR(cs);
if (mode & EMIT_INVALIDATE)
*cs++ = preparser_disable(true);
cmd = MI_FLUSH_DW + 1;
/* We always require a command barrier so that subsequent
@ -4797,6 +4807,10 @@ static int gen12_emit_flush(struct i915_request *request, u32 mode)
}
*cs++ = MI_NOOP;
}
if (mode & EMIT_INVALIDATE)
*cs++ = preparser_disable(false);
intel_ring_advance(request, cs);
return 0;
@ -5295,6 +5309,14 @@ populate_lr_context(struct intel_context *ce,
return 0;
}
static struct intel_timeline *pinned_timeline(struct intel_context *ce)
{
struct intel_timeline *tl = fetch_and_zero(&ce->timeline);
return intel_timeline_create_from_engine(ce->engine,
page_unmask_bits(tl));
}
static int __execlists_context_alloc(struct intel_context *ce,
struct intel_engine_cs *engine)
{
@ -5325,19 +5347,17 @@ static int __execlists_context_alloc(struct intel_context *ce,
goto error_deref_obj;
}
if (!ce->timeline) {
if (!page_mask_bits(ce->timeline)) {
struct intel_timeline *tl;
struct i915_vma *hwsp;
/*
* Use the static global HWSP for the kernel context, and
* a dynamically allocated cacheline for everyone else.
*/
hwsp = NULL;
if (unlikely(intel_context_is_barrier(ce)))
hwsp = engine->status_page.vma;
tl = intel_timeline_create(engine->gt, hwsp);
if (unlikely(ce->timeline))
tl = pinned_timeline(ce);
else
tl = intel_timeline_create(engine->gt);
if (IS_ERR(tl)) {
ret = PTR_ERR(tl);
goto error_deref_obj;
@ -5443,12 +5463,12 @@ static int virtual_context_alloc(struct intel_context *ce)
return __execlists_context_alloc(ce, ve->siblings[0]);
}
static int virtual_context_pin(struct intel_context *ce)
static int virtual_context_pin(struct intel_context *ce, void *vaddr)
{
struct virtual_engine *ve = container_of(ce, typeof(*ve), context);
/* Note: we must use a real engine class for setting up reg state */
return __execlists_context_pin(ce, ve->siblings[0]);
return __execlists_context_pin(ce, ve->siblings[0], vaddr);
}
static void virtual_context_enter(struct intel_context *ce)
@ -5476,8 +5496,10 @@ static void virtual_context_exit(struct intel_context *ce)
static const struct intel_context_ops virtual_context_ops = {
.alloc = virtual_context_alloc,
.pre_pin = execlists_context_pre_pin,
.pin = virtual_context_pin,
.unpin = execlists_context_unpin,
.post_unpin = execlists_context_post_unpin,
.enter = virtual_context_enter,
.exit = virtual_context_exit,
@ -5711,9 +5733,7 @@ intel_execlists_create_virtual(struct intel_engine_cs **siblings,
snprintf(ve->base.name, sizeof(ve->base.name), "virtual");
intel_engine_init_active(&ve->base, ENGINE_VIRTUAL);
intel_engine_init_breadcrumbs(&ve->base);
intel_engine_init_execlists(&ve->base);
ve->base.breadcrumbs.irq_armed = true; /* fake HW, used for irq_work */
ve->base.cops = &virtual_context_ops;
ve->base.request_alloc = execlists_request_alloc;
@ -5730,6 +5750,12 @@ intel_execlists_create_virtual(struct intel_engine_cs **siblings,
intel_context_init(&ve->context, &ve->base);
ve->base.breadcrumbs = intel_breadcrumbs_create(NULL);
if (!ve->base.breadcrumbs) {
err = -ENOMEM;
goto err_put;
}
for (n = 0; n < count; n++) {
struct intel_engine_cs *sibling = siblings[n];

View file

@ -18,7 +18,8 @@ struct i915_page_table *alloc_pt(struct i915_address_space *vm)
if (unlikely(!pt))
return ERR_PTR(-ENOMEM);
if (unlikely(setup_page_dma(vm, &pt->base))) {
pt->base = vm->alloc_pt_dma(vm, I915_GTT_PAGE_SIZE_4K);
if (IS_ERR(pt->base)) {
kfree(pt);
return ERR_PTR(-ENOMEM);
}
@ -27,14 +28,20 @@ struct i915_page_table *alloc_pt(struct i915_address_space *vm)
return pt;
}
struct i915_page_directory *__alloc_pd(size_t sz)
struct i915_page_directory *__alloc_pd(int count)
{
struct i915_page_directory *pd;
pd = kzalloc(sz, I915_GFP_ALLOW_FAIL);
pd = kzalloc(sizeof(*pd), I915_GFP_ALLOW_FAIL);
if (unlikely(!pd))
return NULL;
pd->entry = kcalloc(count, sizeof(*pd->entry), I915_GFP_ALLOW_FAIL);
if (unlikely(!pd->entry)) {
kfree(pd);
return NULL;
}
spin_lock_init(&pd->lock);
return pd;
}
@ -43,11 +50,13 @@ struct i915_page_directory *alloc_pd(struct i915_address_space *vm)
{
struct i915_page_directory *pd;
pd = __alloc_pd(sizeof(*pd));
pd = __alloc_pd(I915_PDES);
if (unlikely(!pd))
return ERR_PTR(-ENOMEM);
if (unlikely(setup_page_dma(vm, px_base(pd)))) {
pd->pt.base = vm->alloc_pt_dma(vm, I915_GTT_PAGE_SIZE_4K);
if (IS_ERR(pd->pt.base)) {
kfree(pd->entry);
kfree(pd);
return ERR_PTR(-ENOMEM);
}
@ -55,41 +64,52 @@ struct i915_page_directory *alloc_pd(struct i915_address_space *vm)
return pd;
}
void free_pd(struct i915_address_space *vm, struct i915_page_dma *pd)
void free_px(struct i915_address_space *vm, struct i915_page_table *pt, int lvl)
{
cleanup_page_dma(vm, pd);
kfree(pd);
BUILD_BUG_ON(offsetof(struct i915_page_directory, pt));
if (lvl) {
struct i915_page_directory *pd =
container_of(pt, typeof(*pd), pt);
kfree(pd->entry);
}
if (pt->base)
i915_gem_object_put(pt->base);
kfree(pt);
}
static inline void
write_dma_entry(struct i915_page_dma * const pdma,
write_dma_entry(struct drm_i915_gem_object * const pdma,
const unsigned short idx,
const u64 encoded_entry)
{
u64 * const vaddr = kmap_atomic(pdma->page);
u64 * const vaddr = kmap_atomic(__px_page(pdma));
vaddr[idx] = encoded_entry;
clflush_cache_range(&vaddr[idx], sizeof(u64));
kunmap_atomic(vaddr);
}
void
__set_pd_entry(struct i915_page_directory * const pd,
const unsigned short idx,
struct i915_page_dma * const to,
struct i915_page_table * const to,
u64 (*encode)(const dma_addr_t, const enum i915_cache_level))
{
/* Each thread pre-pins the pd, and we may have a thread per pde. */
GEM_BUG_ON(atomic_read(px_used(pd)) > NALLOC * ARRAY_SIZE(pd->entry));
GEM_BUG_ON(atomic_read(px_used(pd)) > NALLOC * I915_PDES);
atomic_inc(px_used(pd));
pd->entry[idx] = to;
write_dma_entry(px_base(pd), idx, encode(to->daddr, I915_CACHE_LLC));
write_dma_entry(px_base(pd), idx, encode(px_dma(to), I915_CACHE_LLC));
}
void
clear_pd_entry(struct i915_page_directory * const pd,
const unsigned short idx,
const struct i915_page_scratch * const scratch)
const struct drm_i915_gem_object * const scratch)
{
GEM_BUG_ON(atomic_read(px_used(pd)) == 0);
@ -102,7 +122,7 @@ bool
release_pd_entry(struct i915_page_directory * const pd,
const unsigned short idx,
struct i915_page_table * const pt,
const struct i915_page_scratch * const scratch)
const struct drm_i915_gem_object * const scratch)
{
bool free = false;
@ -155,19 +175,16 @@ struct i915_ppgtt *i915_ppgtt_create(struct intel_gt *gt)
return ppgtt;
}
int ppgtt_bind_vma(struct i915_address_space *vm,
struct i915_vma *vma,
enum i915_cache_level cache_level,
u32 flags)
void ppgtt_bind_vma(struct i915_address_space *vm,
struct i915_vm_pt_stash *stash,
struct i915_vma *vma,
enum i915_cache_level cache_level,
u32 flags)
{
u32 pte_flags;
int err;
if (!test_bit(I915_VMA_ALLOC_BIT, __i915_vma_flags(vma))) {
err = vm->allocate_va_range(vm, vma->node.start, vma->size);
if (err)
return err;
vm->allocate_va_range(vm, stash, vma->node.start, vma->size);
set_bit(I915_VMA_ALLOC_BIT, __i915_vma_flags(vma));
}
@ -178,8 +195,6 @@ int ppgtt_bind_vma(struct i915_address_space *vm,
vm->insert_entries(vm, vma, cache_level, pte_flags);
wmb();
return 0;
}
void ppgtt_unbind_vma(struct i915_address_space *vm, struct i915_vma *vma)
@ -188,12 +203,93 @@ void ppgtt_unbind_vma(struct i915_address_space *vm, struct i915_vma *vma)
vm->clear_range(vm, vma->node.start, vma->size);
}
static unsigned long pd_count(u64 size, int shift)
{
/* Beware later misalignment */
return (size + 2 * (BIT_ULL(shift) - 1)) >> shift;
}
int i915_vm_alloc_pt_stash(struct i915_address_space *vm,
struct i915_vm_pt_stash *stash,
u64 size)
{
unsigned long count;
int shift, n;
shift = vm->pd_shift;
if (!shift)
return 0;
count = pd_count(size, shift);
while (count--) {
struct i915_page_table *pt;
pt = alloc_pt(vm);
if (IS_ERR(pt)) {
i915_vm_free_pt_stash(vm, stash);
return PTR_ERR(pt);
}
pt->stash = stash->pt[0];
stash->pt[0] = pt;
}
for (n = 1; n < vm->top; n++) {
shift += ilog2(I915_PDES); /* Each PD holds 512 entries */
count = pd_count(size, shift);
while (count--) {
struct i915_page_directory *pd;
pd = alloc_pd(vm);
if (IS_ERR(pd)) {
i915_vm_free_pt_stash(vm, stash);
return PTR_ERR(pd);
}
pd->pt.stash = stash->pt[1];
stash->pt[1] = &pd->pt;
}
}
return 0;
}
int i915_vm_pin_pt_stash(struct i915_address_space *vm,
struct i915_vm_pt_stash *stash)
{
struct i915_page_table *pt;
int n, err;
for (n = 0; n < ARRAY_SIZE(stash->pt); n++) {
for (pt = stash->pt[n]; pt; pt = pt->stash) {
err = pin_pt_dma(vm, pt->base);
if (err)
return err;
}
}
return 0;
}
void i915_vm_free_pt_stash(struct i915_address_space *vm,
struct i915_vm_pt_stash *stash)
{
struct i915_page_table *pt;
int n;
for (n = 0; n < ARRAY_SIZE(stash->pt); n++) {
while ((pt = stash->pt[n])) {
stash->pt[n] = pt->stash;
free_px(vm, pt, n);
}
}
}
int ppgtt_set_pages(struct i915_vma *vma)
{
GEM_BUG_ON(vma->pages);
vma->pages = vma->obj->mm.pages;
vma->page_sizes = vma->obj->mm.page_sizes;
return 0;

View file

@ -27,6 +27,7 @@
#include "i915_drv.h"
#include "intel_renderstate.h"
#include "gt/intel_context.h"
#include "intel_ring.h"
static const struct intel_renderstate_rodata *
@ -157,33 +158,47 @@ out:
#undef OUT_BATCH
int intel_renderstate_init(struct intel_renderstate *so,
struct intel_engine_cs *engine)
struct intel_context *ce)
{
struct drm_i915_gem_object *obj;
struct intel_engine_cs *engine = ce->engine;
struct drm_i915_gem_object *obj = NULL;
int err;
memset(so, 0, sizeof(*so));
so->rodata = render_state_get_rodata(engine);
if (!so->rodata)
if (so->rodata) {
if (so->rodata->batch_items * 4 > PAGE_SIZE)
return -EINVAL;
obj = i915_gem_object_create_internal(engine->i915, PAGE_SIZE);
if (IS_ERR(obj))
return PTR_ERR(obj);
so->vma = i915_vma_instance(obj, &engine->gt->ggtt->vm, NULL);
if (IS_ERR(so->vma)) {
err = PTR_ERR(so->vma);
goto err_obj;
}
}
i915_gem_ww_ctx_init(&so->ww, true);
retry:
err = intel_context_pin_ww(ce, &so->ww);
if (err)
goto err_fini;
/* return early if there's nothing to setup */
if (!err && !so->rodata)
return 0;
if (so->rodata->batch_items * 4 > PAGE_SIZE)
return -EINVAL;
obj = i915_gem_object_create_internal(engine->i915, PAGE_SIZE);
if (IS_ERR(obj))
return PTR_ERR(obj);
so->vma = i915_vma_instance(obj, &engine->gt->ggtt->vm, NULL);
if (IS_ERR(so->vma)) {
err = PTR_ERR(so->vma);
goto err_obj;
}
err = i915_gem_object_lock(so->vma->obj, &so->ww);
if (err)
goto err_context;
err = i915_vma_pin(so->vma, 0, 0, PIN_GLOBAL | PIN_HIGH);
if (err)
goto err_obj;
goto err_context;
err = render_state_setup(so, engine->i915);
if (err)
@ -193,8 +208,18 @@ int intel_renderstate_init(struct intel_renderstate *so,
err_unpin:
i915_vma_unpin(so->vma);
err_context:
intel_context_unpin(ce);
err_fini:
if (err == -EDEADLK) {
err = i915_gem_ww_ctx_backoff(&so->ww);
if (!err)
goto retry;
}
i915_gem_ww_ctx_fini(&so->ww);
err_obj:
i915_gem_object_put(obj);
if (obj)
i915_gem_object_put(obj);
so->vma = NULL;
return err;
}
@ -208,11 +233,9 @@ int intel_renderstate_emit(struct intel_renderstate *so,
if (!so->vma)
return 0;
i915_vma_lock(so->vma);
err = i915_request_await_object(rq, so->vma->obj, false);
if (err == 0)
err = i915_vma_move_to_active(so->vma, rq, 0);
i915_vma_unlock(so->vma);
if (err)
return err;
@ -233,7 +256,17 @@ int intel_renderstate_emit(struct intel_renderstate *so,
return 0;
}
void intel_renderstate_fini(struct intel_renderstate *so)
void intel_renderstate_fini(struct intel_renderstate *so,
struct intel_context *ce)
{
i915_vma_unpin_and_release(&so->vma, 0);
if (so->vma) {
i915_vma_unpin(so->vma);
i915_vma_close(so->vma);
}
intel_context_unpin(ce);
i915_gem_ww_ctx_fini(&so->ww);
if (so->vma)
i915_gem_object_put(so->vma->obj);
}

View file

@ -25,9 +25,10 @@
#define _INTEL_RENDERSTATE_H_
#include <linux/types.h>
#include "i915_gem.h"
struct i915_request;
struct intel_engine_cs;
struct intel_context;
struct i915_vma;
struct intel_renderstate_rodata {
@ -49,6 +50,7 @@ extern const struct intel_renderstate_rodata gen8_null_state;
extern const struct intel_renderstate_rodata gen9_null_state;
struct intel_renderstate {
struct i915_gem_ww_ctx ww;
const struct intel_renderstate_rodata *rodata;
struct i915_vma *vma;
u32 batch_offset;
@ -58,9 +60,10 @@ struct intel_renderstate {
};
int intel_renderstate_init(struct intel_renderstate *so,
struct intel_engine_cs *engine);
struct intel_context *ce);
int intel_renderstate_emit(struct intel_renderstate *so,
struct i915_request *rq);
void intel_renderstate_fini(struct intel_renderstate *so);
void intel_renderstate_fini(struct intel_renderstate *so,
struct intel_context *ce);
#endif /* _INTEL_RENDERSTATE_H_ */

View file

@ -15,6 +15,7 @@
#include "i915_drv.h"
#include "i915_gpu_error.h"
#include "i915_irq.h"
#include "intel_breadcrumbs.h"
#include "intel_engine_pm.h"
#include "intel_gt.h"
#include "intel_gt_pm.h"

View file

@ -21,7 +21,13 @@ unsigned int intel_ring_update_space(struct intel_ring *ring)
return space;
}
int intel_ring_pin(struct intel_ring *ring)
void __intel_ring_pin(struct intel_ring *ring)
{
GEM_BUG_ON(!atomic_read(&ring->pin_count));
atomic_inc(&ring->pin_count);
}
int intel_ring_pin(struct intel_ring *ring, struct i915_gem_ww_ctx *ww)
{
struct i915_vma *vma = ring->vma;
unsigned int flags;
@ -39,7 +45,7 @@ int intel_ring_pin(struct intel_ring *ring)
else
flags |= PIN_HIGH;
ret = i915_ggtt_pin(vma, 0, flags);
ret = i915_ggtt_pin(vma, ww, 0, flags);
if (unlikely(ret))
goto err_unpin;

View file

@ -21,7 +21,8 @@ int intel_ring_cacheline_align(struct i915_request *rq);
unsigned int intel_ring_update_space(struct intel_ring *ring);
int intel_ring_pin(struct intel_ring *ring);
void __intel_ring_pin(struct intel_ring *ring);
int intel_ring_pin(struct intel_ring *ring, struct i915_gem_ww_ctx *ww);
void intel_ring_unpin(struct intel_ring *ring);
void intel_ring_reset(struct intel_ring *ring, u32 tail);

View file

@ -32,6 +32,7 @@
#include "gen6_ppgtt.h"
#include "gen7_renderclear.h"
#include "i915_drv.h"
#include "intel_breadcrumbs.h"
#include "intel_context.h"
#include "intel_gt.h"
#include "intel_reset.h"
@ -201,16 +202,18 @@ static struct i915_address_space *vm_alias(struct i915_address_space *vm)
return vm;
}
static u32 pp_dir(struct i915_address_space *vm)
{
return to_gen6_ppgtt(i915_vm_to_ppgtt(vm))->pp_dir;
}
static void set_pp_dir(struct intel_engine_cs *engine)
{
struct i915_address_space *vm = vm_alias(engine->gt->vm);
if (vm) {
struct i915_ppgtt *ppgtt = i915_vm_to_ppgtt(vm);
ENGINE_WRITE(engine, RING_PP_DIR_DCLV, PP_DIR_DCLV_2G);
ENGINE_WRITE(engine, RING_PP_DIR_BASE,
px_base(ppgtt->pd)->ggtt_offset << 10);
ENGINE_WRITE(engine, RING_PP_DIR_BASE, pp_dir(vm));
}
}
@ -255,7 +258,7 @@ static int xcs_resume(struct intel_engine_cs *engine)
else
ring_setup_status_page(engine);
intel_engine_reset_breadcrumbs(engine);
intel_breadcrumbs_reset(engine->breadcrumbs);
/* Enforce ordering by reading HEAD register back */
ENGINE_POSTING_READ(engine, RING_HEAD);
@ -474,14 +477,16 @@ static void ring_context_destroy(struct kref *ref)
intel_context_free(ce);
}
static int __context_pin_ppgtt(struct intel_context *ce)
static int ring_context_pre_pin(struct intel_context *ce,
struct i915_gem_ww_ctx *ww,
void **unused)
{
struct i915_address_space *vm;
int err = 0;
vm = vm_alias(ce->vm);
if (vm)
err = gen6_ppgtt_pin(i915_vm_to_ppgtt((vm)));
err = gen6_ppgtt_pin(i915_vm_to_ppgtt((vm)), ww);
return err;
}
@ -496,6 +501,10 @@ static void __context_unpin_ppgtt(struct intel_context *ce)
}
static void ring_context_unpin(struct intel_context *ce)
{
}
static void ring_context_post_unpin(struct intel_context *ce)
{
__context_unpin_ppgtt(ce);
}
@ -584,9 +593,9 @@ static int ring_context_alloc(struct intel_context *ce)
return 0;
}
static int ring_context_pin(struct intel_context *ce)
static int ring_context_pin(struct intel_context *ce, void *unused)
{
return __context_pin_ppgtt(ce);
return 0;
}
static void ring_context_reset(struct intel_context *ce)
@ -597,8 +606,10 @@ static void ring_context_reset(struct intel_context *ce)
static const struct intel_context_ops ring_context_ops = {
.alloc = ring_context_alloc,
.pre_pin = ring_context_pre_pin,
.pin = ring_context_pin,
.unpin = ring_context_unpin,
.post_unpin = ring_context_post_unpin,
.enter = intel_context_enter_engine,
.exit = intel_context_exit_engine,
@ -608,7 +619,7 @@ static const struct intel_context_ops ring_context_ops = {
};
static int load_pd_dir(struct i915_request *rq,
const struct i915_ppgtt *ppgtt,
struct i915_address_space *vm,
u32 valid)
{
const struct intel_engine_cs * const engine = rq->engine;
@ -624,7 +635,7 @@ static int load_pd_dir(struct i915_request *rq,
*cs++ = MI_LOAD_REGISTER_IMM(1);
*cs++ = i915_mmio_reg_offset(RING_PP_DIR_BASE(engine->mmio_base));
*cs++ = px_base(ppgtt->pd)->ggtt_offset << 10;
*cs++ = pp_dir(vm);
/* Stall until the page table load is complete? */
*cs++ = MI_STORE_REGISTER_MEM | MI_SRM_LRM_GLOBAL_GTT;
@ -826,7 +837,7 @@ static int switch_mm(struct i915_request *rq, struct i915_address_space *vm)
* post-sync op, this extra pass appears vital before a
* mm switch!
*/
ret = load_pd_dir(rq, i915_vm_to_ppgtt(vm), PP_DIR_DCLV_2G);
ret = load_pd_dir(rq, vm, PP_DIR_DCLV_2G);
if (ret)
return ret;
@ -1250,14 +1261,15 @@ int intel_ring_submission_setup(struct intel_engine_cs *engine)
return -ENODEV;
}
timeline = intel_timeline_create(engine->gt, engine->status_page.vma);
timeline = intel_timeline_create_from_engine(engine,
I915_GEM_HWS_SEQNO_ADDR);
if (IS_ERR(timeline)) {
err = PTR_ERR(timeline);
goto err;
}
GEM_BUG_ON(timeline->has_initial_breadcrumb);
err = intel_timeline_pin(timeline);
err = intel_timeline_pin(timeline, NULL);
if (err)
goto err_timeline;
@ -1267,7 +1279,7 @@ int intel_ring_submission_setup(struct intel_engine_cs *engine)
goto err_timeline_unpin;
}
err = intel_ring_pin(ring);
err = intel_ring_pin(ring, NULL);
if (err)
goto err_ring;

View file

@ -7,6 +7,7 @@
#include <drm/i915_drm.h>
#include "i915_drv.h"
#include "intel_breadcrumbs.h"
#include "intel_gt.h"
#include "intel_gt_clock_utils.h"
#include "intel_gt_irq.h"

View file

@ -215,7 +215,8 @@ static void cacheline_free(struct intel_timeline_cacheline *cl)
static int intel_timeline_init(struct intel_timeline *timeline,
struct intel_gt *gt,
struct i915_vma *hwsp)
struct i915_vma *hwsp,
unsigned int offset)
{
void *vaddr;
@ -246,8 +247,7 @@ static int intel_timeline_init(struct intel_timeline *timeline,
vaddr = page_mask_bits(cl->vaddr);
} else {
timeline->hwsp_offset = I915_GEM_HWS_SEQNO_ADDR;
timeline->hwsp_offset = offset;
vaddr = i915_gem_object_pin_map(hwsp->obj, I915_MAP_WB);
if (IS_ERR(vaddr))
return PTR_ERR(vaddr);
@ -297,7 +297,9 @@ static void intel_timeline_fini(struct intel_timeline *timeline)
}
struct intel_timeline *
intel_timeline_create(struct intel_gt *gt, struct i915_vma *global_hwsp)
__intel_timeline_create(struct intel_gt *gt,
struct i915_vma *global_hwsp,
unsigned int offset)
{
struct intel_timeline *timeline;
int err;
@ -306,7 +308,7 @@ intel_timeline_create(struct intel_gt *gt, struct i915_vma *global_hwsp)
if (!timeline)
return ERR_PTR(-ENOMEM);
err = intel_timeline_init(timeline, gt, global_hwsp);
err = intel_timeline_init(timeline, gt, global_hwsp, offset);
if (err) {
kfree(timeline);
return ERR_PTR(err);
@ -315,14 +317,20 @@ intel_timeline_create(struct intel_gt *gt, struct i915_vma *global_hwsp)
return timeline;
}
int intel_timeline_pin(struct intel_timeline *tl)
void __intel_timeline_pin(struct intel_timeline *tl)
{
GEM_BUG_ON(!atomic_read(&tl->pin_count));
atomic_inc(&tl->pin_count);
}
int intel_timeline_pin(struct intel_timeline *tl, struct i915_gem_ww_ctx *ww)
{
int err;
if (atomic_add_unless(&tl->pin_count, 1, 0))
return 0;
err = i915_ggtt_pin(tl->hwsp_ggtt, 0, PIN_HIGH);
err = i915_ggtt_pin(tl->hwsp_ggtt, ww, 0, PIN_HIGH);
if (err)
return err;
@ -465,7 +473,7 @@ __intel_timeline_get_seqno(struct intel_timeline *tl,
goto err_rollback;
}
err = i915_ggtt_pin(vma, 0, PIN_HIGH);
err = i915_ggtt_pin(vma, NULL, 0, PIN_HIGH);
if (err) {
__idle_hwsp_free(vma->private, cacheline);
goto err_rollback;
@ -484,7 +492,9 @@ __intel_timeline_get_seqno(struct intel_timeline *tl,
* free it after the current request is retired, which ensures that
* all writes into the cacheline from previous requests are complete.
*/
err = i915_active_ref(&tl->hwsp_cacheline->active, tl, &rq->fence);
err = i915_active_ref(&tl->hwsp_cacheline->active,
tl->fence_context,
&rq->fence);
if (err)
goto err_cacheline;

View file

@ -29,10 +29,27 @@
#include "i915_active.h"
#include "i915_syncmap.h"
#include "gt/intel_timeline_types.h"
#include "intel_timeline_types.h"
struct intel_timeline *
intel_timeline_create(struct intel_gt *gt, struct i915_vma *global_hwsp);
__intel_timeline_create(struct intel_gt *gt,
struct i915_vma *global_hwsp,
unsigned int offset);
static inline struct intel_timeline *
intel_timeline_create(struct intel_gt *gt)
{
return __intel_timeline_create(gt, NULL, 0);
}
static inline struct intel_timeline *
intel_timeline_create_from_engine(struct intel_engine_cs *engine,
unsigned int offset)
{
return __intel_timeline_create(engine->gt,
engine->status_page.vma,
offset);
}
static inline struct intel_timeline *
intel_timeline_get(struct intel_timeline *timeline)
@ -71,7 +88,8 @@ static inline bool intel_timeline_sync_is_later(struct intel_timeline *tl,
return __intel_timeline_sync_is_later(tl, fence->context, fence->seqno);
}
int intel_timeline_pin(struct intel_timeline *tl);
void __intel_timeline_pin(struct intel_timeline *tl);
int intel_timeline_pin(struct intel_timeline *tl, struct i915_gem_ww_ctx *ww);
void intel_timeline_enter(struct intel_timeline *tl);
int intel_timeline_get_seqno(struct intel_timeline *tl,
struct i915_request *rq,

View file

@ -2088,6 +2088,7 @@ static int engine_wa_list_verify(struct intel_context *ce,
const struct i915_wa *wa;
struct i915_request *rq;
struct i915_vma *vma;
struct i915_gem_ww_ctx ww;
unsigned int i;
u32 *results;
int err;
@ -2100,29 +2101,34 @@ static int engine_wa_list_verify(struct intel_context *ce,
return PTR_ERR(vma);
intel_engine_pm_get(ce->engine);
rq = intel_context_create_request(ce);
intel_engine_pm_put(ce->engine);
i915_gem_ww_ctx_init(&ww, false);
retry:
err = i915_gem_object_lock(vma->obj, &ww);
if (err == 0)
err = intel_context_pin_ww(ce, &ww);
if (err)
goto err_pm;
rq = i915_request_create(ce);
if (IS_ERR(rq)) {
err = PTR_ERR(rq);
goto err_vma;
goto err_unpin;
}
i915_vma_lock(vma);
err = i915_request_await_object(rq, vma->obj, true);
if (err == 0)
err = i915_vma_move_to_active(vma, rq, EXEC_OBJECT_WRITE);
i915_vma_unlock(vma);
if (err) {
i915_request_add(rq);
goto err_vma;
}
err = wa_list_srm(rq, wal, vma);
if (err)
goto err_vma;
if (err == 0)
err = wa_list_srm(rq, wal, vma);
i915_request_get(rq);
if (err)
i915_request_set_error_once(rq, err);
i915_request_add(rq);
if (err)
goto err_rq;
if (i915_request_wait(rq, 0, HZ / 5) < 0) {
err = -ETIME;
goto err_rq;
@ -2147,7 +2153,16 @@ static int engine_wa_list_verify(struct intel_context *ce,
err_rq:
i915_request_put(rq);
err_vma:
err_unpin:
intel_context_unpin(ce);
err_pm:
if (err == -EDEADLK) {
err = i915_gem_ww_ctx_backoff(&ww);
if (!err)
goto retry;
}
i915_gem_ww_ctx_fini(&ww);
intel_engine_pm_put(ce->engine);
i915_vma_unpin(vma);
i915_vma_put(vma);
return err;

View file

@ -131,6 +131,10 @@ static void mock_context_unpin(struct intel_context *ce)
{
}
static void mock_context_post_unpin(struct intel_context *ce)
{
}
static void mock_context_destroy(struct kref *ref)
{
struct intel_context *ce = container_of(ref, typeof(*ce), ref);
@ -152,8 +156,7 @@ static int mock_context_alloc(struct intel_context *ce)
if (!ce->ring)
return -ENOMEM;
GEM_BUG_ON(ce->timeline);
ce->timeline = intel_timeline_create(ce->engine->gt, NULL);
ce->timeline = intel_timeline_create(ce->engine->gt);
if (IS_ERR(ce->timeline)) {
kfree(ce->engine);
return PTR_ERR(ce->timeline);
@ -164,7 +167,13 @@ static int mock_context_alloc(struct intel_context *ce)
return 0;
}
static int mock_context_pin(struct intel_context *ce)
static int mock_context_pre_pin(struct intel_context *ce,
struct i915_gem_ww_ctx *ww, void **unused)
{
return 0;
}
static int mock_context_pin(struct intel_context *ce, void *unused)
{
return 0;
}
@ -176,8 +185,10 @@ static void mock_context_reset(struct intel_context *ce)
static const struct intel_context_ops mock_context_ops = {
.alloc = mock_context_alloc,
.pre_pin = mock_context_pre_pin,
.pin = mock_context_pin,
.unpin = mock_context_unpin,
.post_unpin = mock_context_post_unpin,
.enter = intel_context_enter_engine,
.exit = intel_context_exit_engine,
@ -261,11 +272,12 @@ static void mock_engine_release(struct intel_engine_cs *engine)
GEM_BUG_ON(timer_pending(&mock->hw_delay));
intel_breadcrumbs_free(engine->breadcrumbs);
intel_context_unpin(engine->kernel_context);
intel_context_put(engine->kernel_context);
intel_engine_fini_retire(engine);
intel_engine_fini_breadcrumbs(engine);
}
struct intel_engine_cs *mock_engine(struct drm_i915_private *i915,
@ -323,20 +335,26 @@ int mock_engine_init(struct intel_engine_cs *engine)
struct intel_context *ce;
intel_engine_init_active(engine, ENGINE_MOCK);
intel_engine_init_breadcrumbs(engine);
intel_engine_init_execlists(engine);
intel_engine_init__pm(engine);
intel_engine_init_retire(engine);
engine->breadcrumbs = intel_breadcrumbs_create(NULL);
if (!engine->breadcrumbs)
return -ENOMEM;
ce = create_kernel_context(engine);
if (IS_ERR(ce))
goto err_breadcrumbs;
/* We insist the kernel context is using the status_page */
engine->status_page.vma = ce->timeline->hwsp_ggtt;
engine->kernel_context = ce;
return 0;
err_breadcrumbs:
intel_engine_fini_breadcrumbs(engine);
intel_breadcrumbs_free(engine->breadcrumbs);
return -ENOMEM;
}

View file

@ -68,6 +68,8 @@ static int context_sync(struct intel_context *ce)
} while (!err);
mutex_unlock(&tl->mutex);
/* Wait for all barriers to complete (remote CPU) before we check */
i915_active_unlock_wait(&ce->active);
return err;
}

View file

@ -2729,7 +2729,7 @@ static int create_gang(struct intel_engine_cs *engine,
i915_gem_object_put(obj);
intel_context_put(ce);
rq->client_link.next = &(*prev)->client_link;
rq->mock.link.next = &(*prev)->mock.link;
*prev = rq;
return 0;
@ -2970,8 +2970,7 @@ static int live_preempt_gang(void *arg)
}
while (rq) { /* wait for each rq from highest to lowest prio */
struct i915_request *n =
list_next_entry(rq, client_link);
struct i915_request *n = list_next_entry(rq, mock.link);
if (err == 0 && i915_request_wait(rq, 0, HZ / 5) < 0) {
struct drm_printer p =
@ -3090,7 +3089,7 @@ static struct i915_vma *create_global(struct intel_gt *gt, size_t sz)
return vma;
}
err = i915_ggtt_pin(vma, 0, 0);
err = i915_ggtt_pin(vma, NULL, 0, 0);
if (err) {
i915_vma_put(vma);
return ERR_PTR(err);
@ -4997,6 +4996,7 @@ static int __live_lrc_state(struct intel_engine_cs *engine,
{
struct intel_context *ce;
struct i915_request *rq;
struct i915_gem_ww_ctx ww;
enum {
RING_START_IDX = 0,
RING_TAIL_IDX,
@ -5011,7 +5011,11 @@ static int __live_lrc_state(struct intel_engine_cs *engine,
if (IS_ERR(ce))
return PTR_ERR(ce);
err = intel_context_pin(ce);
i915_gem_ww_ctx_init(&ww, false);
retry:
err = i915_gem_object_lock(scratch->obj, &ww);
if (!err)
err = intel_context_pin_ww(ce, &ww);
if (err)
goto err_put;
@ -5040,11 +5044,9 @@ static int __live_lrc_state(struct intel_engine_cs *engine,
*cs++ = i915_ggtt_offset(scratch) + RING_TAIL_IDX * sizeof(u32);
*cs++ = 0;
i915_vma_lock(scratch);
err = i915_request_await_object(rq, scratch->obj, true);
if (!err)
err = i915_vma_move_to_active(scratch, rq, EXEC_OBJECT_WRITE);
i915_vma_unlock(scratch);
i915_request_get(rq);
i915_request_add(rq);
@ -5081,6 +5083,12 @@ err_rq:
err_unpin:
intel_context_unpin(ce);
err_put:
if (err == -EDEADLK) {
err = i915_gem_ww_ctx_backoff(&ww);
if (!err)
goto retry;
}
i915_gem_ww_ctx_fini(&ww);
intel_context_put(ce);
return err;
}

View file

@ -77,20 +77,20 @@ create_spin_counter(struct intel_engine_cs *engine,
vma = i915_vma_instance(obj, vm, NULL);
if (IS_ERR(vma)) {
i915_gem_object_put(obj);
return vma;
err = PTR_ERR(vma);
goto err_put;
}
err = i915_vma_pin(vma, 0, 0, PIN_USER);
if (err) {
i915_vma_put(vma);
return ERR_PTR(err);
}
if (err)
goto err_unlock;
i915_vma_lock(vma);
base = i915_gem_object_pin_map(obj, I915_MAP_WC);
if (IS_ERR(base)) {
i915_gem_object_put(obj);
return ERR_CAST(base);
err = PTR_ERR(base);
goto err_unpin;
}
cs = base;
@ -134,6 +134,14 @@ create_spin_counter(struct intel_engine_cs *engine,
*cancel = base + loop;
*counter = srm ? memset32(base + end, 0, 1) : NULL;
return vma;
err_unpin:
i915_vma_unpin(vma);
err_unlock:
i915_vma_unlock(vma);
err_put:
i915_gem_object_put(obj);
return ERR_PTR(err);
}
static u8 wait_for_freq(struct intel_rps *rps, u8 freq, int timeout_ms)
@ -639,7 +647,6 @@ int live_rps_frequency_cs(void *arg)
goto err_vma;
}
i915_vma_lock(vma);
err = i915_request_await_object(rq, vma->obj, false);
if (!err)
err = i915_vma_move_to_active(vma, rq, 0);
@ -647,7 +654,6 @@ int live_rps_frequency_cs(void *arg)
err = rq->engine->emit_bb_start(rq,
vma->node.start,
PAGE_SIZE, 0);
i915_vma_unlock(vma);
i915_request_add(rq);
if (err)
goto err_vma;
@ -708,6 +714,7 @@ err_vma:
i915_gem_object_flush_map(vma->obj);
i915_gem_object_unpin_map(vma->obj);
i915_vma_unpin(vma);
i915_vma_unlock(vma);
i915_vma_put(vma);
st_engine_heartbeat_enable(engine);
@ -781,7 +788,6 @@ int live_rps_frequency_srm(void *arg)
goto err_vma;
}
i915_vma_lock(vma);
err = i915_request_await_object(rq, vma->obj, false);
if (!err)
err = i915_vma_move_to_active(vma, rq, 0);
@ -789,7 +795,6 @@ int live_rps_frequency_srm(void *arg)
err = rq->engine->emit_bb_start(rq,
vma->node.start,
PAGE_SIZE, 0);
i915_vma_unlock(vma);
i915_request_add(rq);
if (err)
goto err_vma;
@ -849,6 +854,7 @@ err_vma:
i915_gem_object_flush_map(vma->obj);
i915_gem_object_unpin_map(vma->obj);
i915_vma_unpin(vma);
i915_vma_unlock(vma);
i915_vma_put(vma);
st_engine_heartbeat_enable(engine);

View file

@ -72,7 +72,7 @@ static int __mock_hwsp_timeline(struct mock_hwsp_freelist *state,
unsigned long cacheline;
int err;
tl = intel_timeline_create(state->gt, NULL);
tl = intel_timeline_create(state->gt);
if (IS_ERR(tl))
return PTR_ERR(tl);
@ -455,7 +455,7 @@ tl_write(struct intel_timeline *tl, struct intel_engine_cs *engine, u32 value)
struct i915_request *rq;
int err;
err = intel_timeline_pin(tl);
err = intel_timeline_pin(tl, NULL);
if (err) {
rq = ERR_PTR(err);
goto out;
@ -487,7 +487,7 @@ checked_intel_timeline_create(struct intel_gt *gt)
{
struct intel_timeline *tl;
tl = intel_timeline_create(gt, NULL);
tl = intel_timeline_create(gt);
if (IS_ERR(tl))
return tl;
@ -660,14 +660,14 @@ static int live_hwsp_wrap(void *arg)
* foreign GPU references.
*/
tl = intel_timeline_create(gt, NULL);
tl = intel_timeline_create(gt);
if (IS_ERR(tl))
return PTR_ERR(tl);
if (!tl->has_initial_breadcrumb || !tl->hwsp_cacheline)
goto out_free;
err = intel_timeline_pin(tl);
err = intel_timeline_pin(tl, NULL);
if (err)
goto out_free;

View file

@ -214,7 +214,7 @@ static int check_whitelist(struct i915_gem_context *ctx,
return PTR_ERR(results);
err = 0;
i915_gem_object_lock(results);
i915_gem_object_lock(results, NULL);
intel_wedge_on_timeout(&wedge, engine->gt, HZ / 5) /* safety net! */
err = i915_gem_object_set_to_cpu_domain(results, false);
i915_gem_object_unlock(results);

View file

@ -677,7 +677,7 @@ struct i915_vma *intel_guc_allocate_vma(struct intel_guc *guc, u32 size)
goto err;
flags = PIN_OFFSET_BIAS | i915_ggtt_pin_bias(vma);
ret = i915_ggtt_pin(vma, 0, flags);
ret = i915_ggtt_pin(vma, NULL, 0, flags);
if (ret) {
vma = ERR_PTR(ret);
goto err;

View file

@ -1923,6 +1923,7 @@ static int perform_bb_shadow(struct parser_exec_state *s)
if (ret)
goto err_unmap;
i915_gem_object_unlock(bb->obj);
INIT_LIST_HEAD(&bb->list);
list_add(&bb->list, &s->workload->shadow_bb);
@ -2982,7 +2983,7 @@ static int shadow_indirect_ctx(struct intel_shadow_wa_ctx *wa_ctx)
goto put_obj;
}
i915_gem_object_lock(obj);
i915_gem_object_lock(obj, NULL);
ret = i915_gem_object_set_to_cpu_domain(obj, false);
i915_gem_object_unlock(obj);
if (ret) {

View file

@ -403,6 +403,14 @@ static void release_shadow_wa_ctx(struct intel_shadow_wa_ctx *wa_ctx)
wa_ctx->indirect_ctx.shadow_va = NULL;
}
static void set_dma_address(struct i915_page_directory *pd, dma_addr_t addr)
{
struct scatterlist *sg = pd->pt.base->mm.pages->sgl;
/* This is not a good idea */
sg->dma_address = addr;
}
static void set_context_ppgtt_from_shadow(struct intel_vgpu_workload *workload,
struct intel_context *ce)
{
@ -411,7 +419,7 @@ static void set_context_ppgtt_from_shadow(struct intel_vgpu_workload *workload,
int i = 0;
if (mm->ppgtt_mm.root_entry_type == GTT_TYPE_PPGTT_ROOT_L4_ENTRY) {
px_dma(ppgtt->pd) = mm->ppgtt_mm.shadow_pdps[0];
set_dma_address(ppgtt->pd, mm->ppgtt_mm.shadow_pdps[0]);
} else {
for (i = 0; i < GVT_RING_CTX_NR_PDPS; i++) {
struct i915_page_directory * const pd =
@ -421,7 +429,8 @@ static void set_context_ppgtt_from_shadow(struct intel_vgpu_workload *workload,
shadow ppgtt. */
if (!pd)
break;
px_dma(pd) = mm->ppgtt_mm.shadow_pdps[i];
set_dma_address(pd, mm->ppgtt_mm.shadow_pdps[i]);
}
}
}
@ -1240,13 +1249,13 @@ i915_context_ppgtt_root_restore(struct intel_vgpu_submission *s,
int i;
if (i915_vm_is_4lvl(&ppgtt->vm)) {
px_dma(ppgtt->pd) = s->i915_context_pml4;
set_dma_address(ppgtt->pd, s->i915_context_pml4);
} else {
for (i = 0; i < GEN8_3LVL_PDPES; i++) {
struct i915_page_directory * const pd =
i915_pd_entry(ppgtt->pd, i);
px_dma(pd) = s->i915_context_pdps[i];
set_dma_address(pd, s->i915_context_pdps[i]);
}
}
}

View file

@ -28,12 +28,14 @@ static struct i915_global_active {
} global;
struct active_node {
struct rb_node node;
struct i915_active_fence base;
struct i915_active *ref;
struct rb_node node;
u64 timeline;
};
#define fetch_node(x) rb_entry(READ_ONCE(x), typeof(struct active_node), node)
static inline struct active_node *
node_from_active(struct i915_active_fence *active)
{
@ -128,8 +130,8 @@ static inline void debug_active_assert(struct i915_active *ref) { }
static void
__active_retire(struct i915_active *ref)
{
struct rb_root root = RB_ROOT;
struct active_node *it, *n;
struct rb_root root;
unsigned long flags;
GEM_BUG_ON(i915_active_is_idle(ref));
@ -141,9 +143,25 @@ __active_retire(struct i915_active *ref)
GEM_BUG_ON(rcu_access_pointer(ref->excl.fence));
debug_active_deactivate(ref);
root = ref->tree;
ref->tree = RB_ROOT;
ref->cache = NULL;
/* Even if we have not used the cache, we may still have a barrier */
if (!ref->cache)
ref->cache = fetch_node(ref->tree.rb_node);
/* Keep the MRU cached node for reuse */
if (ref->cache) {
/* Discard all other nodes in the tree */
rb_erase(&ref->cache->node, &ref->tree);
root = ref->tree;
/* Rebuild the tree with only the cached node */
rb_link_node(&ref->cache->node, NULL, &ref->tree.rb_node);
rb_insert_color(&ref->cache->node, &ref->tree);
GEM_BUG_ON(ref->tree.rb_node != &ref->cache->node);
/* Make the cached node available for reuse with any timeline */
if (IS_ENABLED(CONFIG_64BIT))
ref->cache->timeline = 0; /* needs cmpxchg(u64) */
}
spin_unlock_irqrestore(&ref->tree_lock, flags);
@ -154,6 +172,7 @@ __active_retire(struct i915_active *ref)
/* ... except if you wait on it, you must manage your own references! */
wake_up_var(ref);
/* Finally free the discarded timeline tree */
rbtree_postorder_for_each_entry_safe(it, n, &root, node) {
GEM_BUG_ON(i915_active_fence_isset(&it->base));
kmem_cache_free(global.slab_cache, it);
@ -216,12 +235,11 @@ excl_retire(struct dma_fence *fence, struct dma_fence_cb *cb)
active_retire(container_of(cb, struct i915_active, excl.cb));
}
static struct i915_active_fence *
active_instance(struct i915_active *ref, struct intel_timeline *tl)
static struct active_node *__active_lookup(struct i915_active *ref, u64 idx)
{
struct active_node *node, *prealloc;
struct rb_node **p, *parent;
u64 idx = tl->fence_context;
struct active_node *it;
GEM_BUG_ON(idx == 0); /* 0 is the unordered timeline, rsvd for cache */
/*
* We track the most recently used timeline to skip a rbtree search
@ -230,8 +248,59 @@ active_instance(struct i915_active *ref, struct intel_timeline *tl)
* after the previous activity has been retired, or if it matches the
* current timeline.
*/
node = READ_ONCE(ref->cache);
if (node && node->timeline == idx)
it = READ_ONCE(ref->cache);
if (it) {
u64 cached = READ_ONCE(it->timeline);
/* Once claimed, this slot will only belong to this idx */
if (cached == idx)
return it;
#ifdef CONFIG_64BIT /* for cmpxchg(u64) */
/*
* An unclaimed cache [.timeline=0] can only be claimed once.
*
* If the value is already non-zero, some other thread has
* claimed the cache and we know that is does not match our
* idx. If, and only if, the timeline is currently zero is it
* worth competing to claim it atomically for ourselves (for
* only the winner of that race will cmpxchg return the old
* value of 0).
*/
if (!cached && !cmpxchg(&it->timeline, 0, idx))
return it;
#endif
}
BUILD_BUG_ON(offsetof(typeof(*it), node));
/* While active, the tree can only be built; not destroyed */
GEM_BUG_ON(i915_active_is_idle(ref));
it = fetch_node(ref->tree.rb_node);
while (it) {
if (it->timeline < idx) {
it = fetch_node(it->node.rb_right);
} else if (it->timeline > idx) {
it = fetch_node(it->node.rb_left);
} else {
WRITE_ONCE(ref->cache, it);
break;
}
}
/* NB: If the tree rotated beneath us, we may miss our target. */
return it;
}
static struct i915_active_fence *
active_instance(struct i915_active *ref, u64 idx)
{
struct active_node *node, *prealloc;
struct rb_node **p, *parent;
node = __active_lookup(ref, idx);
if (likely(node))
return &node->base;
/* Preallocate a replacement, just in case */
@ -268,10 +337,9 @@ active_instance(struct i915_active *ref, struct intel_timeline *tl)
rb_insert_color(&node->node, &ref->tree);
out:
ref->cache = node;
WRITE_ONCE(ref->cache, node);
spin_unlock_irq(&ref->tree_lock);
BUILD_BUG_ON(offsetof(typeof(*node), base));
return &node->base;
}
@ -353,69 +421,116 @@ __active_del_barrier(struct i915_active *ref, struct active_node *node)
return ____active_del_barrier(ref, node, barrier_to_engine(node));
}
int i915_active_ref(struct i915_active *ref,
struct intel_timeline *tl,
struct dma_fence *fence)
static bool
replace_barrier(struct i915_active *ref, struct i915_active_fence *active)
{
if (!is_barrier(active)) /* proto-node used by our idle barrier? */
return false;
/*
* This request is on the kernel_context timeline, and so
* we can use it to substitute for the pending idle-barrer
* request that we want to emit on the kernel_context.
*/
__active_del_barrier(ref, node_from_active(active));
return true;
}
int i915_active_ref(struct i915_active *ref, u64 idx, struct dma_fence *fence)
{
struct i915_active_fence *active;
int err;
lockdep_assert_held(&tl->mutex);
/* Prevent reaping in case we malloc/wait while building the tree */
err = i915_active_acquire(ref);
if (err)
return err;
active = active_instance(ref, tl);
active = active_instance(ref, idx);
if (!active) {
err = -ENOMEM;
goto out;
}
if (is_barrier(active)) { /* proto-node used by our idle barrier */
/*
* This request is on the kernel_context timeline, and so
* we can use it to substitute for the pending idle-barrer
* request that we want to emit on the kernel_context.
*/
__active_del_barrier(ref, node_from_active(active));
if (replace_barrier(ref, active)) {
RCU_INIT_POINTER(active->fence, NULL);
atomic_dec(&ref->count);
}
if (!__i915_active_fence_set(active, fence))
atomic_inc(&ref->count);
__i915_active_acquire(ref);
out:
i915_active_release(ref);
return err;
}
struct dma_fence *
i915_active_set_exclusive(struct i915_active *ref, struct dma_fence *f)
static struct dma_fence *
__i915_active_set_fence(struct i915_active *ref,
struct i915_active_fence *active,
struct dma_fence *fence)
{
struct dma_fence *prev;
/* We expect the caller to manage the exclusive timeline ordering */
GEM_BUG_ON(i915_active_is_idle(ref));
if (replace_barrier(ref, active)) {
RCU_INIT_POINTER(active->fence, fence);
return NULL;
}
rcu_read_lock();
prev = __i915_active_fence_set(&ref->excl, f);
prev = __i915_active_fence_set(active, fence);
if (prev)
prev = dma_fence_get_rcu(prev);
else
atomic_inc(&ref->count);
__i915_active_acquire(ref);
rcu_read_unlock();
return prev;
}
static struct i915_active_fence *
__active_fence(struct i915_active *ref, u64 idx)
{
struct active_node *it;
it = __active_lookup(ref, idx);
if (unlikely(!it)) { /* Contention with parallel tree builders! */
spin_lock_irq(&ref->tree_lock);
it = __active_lookup(ref, idx);
spin_unlock_irq(&ref->tree_lock);
}
GEM_BUG_ON(!it); /* slot must be preallocated */
return &it->base;
}
struct dma_fence *
__i915_active_ref(struct i915_active *ref, u64 idx, struct dma_fence *fence)
{
/* Only valid while active, see i915_active_acquire_for_context() */
return __i915_active_set_fence(ref, __active_fence(ref, idx), fence);
}
struct dma_fence *
i915_active_set_exclusive(struct i915_active *ref, struct dma_fence *f)
{
/* We expect the caller to manage the exclusive timeline ordering */
return __i915_active_set_fence(ref, &ref->excl, f);
}
bool i915_active_acquire_if_busy(struct i915_active *ref)
{
debug_active_assert(ref);
return atomic_add_unless(&ref->count, 1, 0);
}
static void __i915_active_activate(struct i915_active *ref)
{
spin_lock_irq(&ref->tree_lock); /* __active_retire() */
if (!atomic_fetch_inc(&ref->count))
debug_active_activate(ref);
spin_unlock_irq(&ref->tree_lock);
}
int i915_active_acquire(struct i915_active *ref)
{
int err;
@ -423,19 +538,19 @@ int i915_active_acquire(struct i915_active *ref)
if (i915_active_acquire_if_busy(ref))
return 0;
if (!ref->active) {
__i915_active_activate(ref);
return 0;
}
err = mutex_lock_interruptible(&ref->mutex);
if (err)
return err;
if (likely(!i915_active_acquire_if_busy(ref))) {
if (ref->active)
err = ref->active(ref);
if (!err) {
spin_lock_irq(&ref->tree_lock); /* __active_retire() */
debug_active_activate(ref);
atomic_inc(&ref->count);
spin_unlock_irq(&ref->tree_lock);
}
err = ref->active(ref);
if (!err)
__i915_active_activate(ref);
}
mutex_unlock(&ref->mutex);
@ -443,6 +558,24 @@ int i915_active_acquire(struct i915_active *ref)
return err;
}
int i915_active_acquire_for_context(struct i915_active *ref, u64 idx)
{
struct i915_active_fence *active;
int err;
err = i915_active_acquire(ref);
if (err)
return err;
active = active_instance(ref, idx);
if (!active) {
i915_active_release(ref);
return -ENOMEM;
}
return 0; /* return with active ref */
}
void i915_active_release(struct i915_active *ref)
{
debug_active_assert(ref);
@ -651,16 +784,16 @@ int i915_sw_fence_await_active(struct i915_sw_fence *fence,
return await_active(ref, flags, sw_await_fence, fence, fence);
}
#if IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM)
void i915_active_fini(struct i915_active *ref)
{
debug_active_fini(ref);
GEM_BUG_ON(atomic_read(&ref->count));
GEM_BUG_ON(work_pending(&ref->work));
GEM_BUG_ON(!RB_EMPTY_ROOT(&ref->tree));
mutex_destroy(&ref->mutex);
if (ref->cache)
kmem_cache_free(global.slab_cache, ref->cache);
}
#endif
static inline bool is_idle_barrier(struct active_node *node, u64 idx)
{
@ -674,7 +807,6 @@ static struct active_node *reuse_idle_barrier(struct i915_active *ref, u64 idx)
if (RB_EMPTY_ROOT(&ref->tree))
return NULL;
spin_lock_irq(&ref->tree_lock);
GEM_BUG_ON(i915_active_is_idle(ref));
/*
@ -700,9 +832,9 @@ static struct active_node *reuse_idle_barrier(struct i915_active *ref, u64 idx)
prev = p;
if (node->timeline < idx)
p = p->rb_right;
p = READ_ONCE(p->rb_right);
else
p = p->rb_left;
p = READ_ONCE(p->rb_left);
}
/*
@ -739,14 +871,13 @@ static struct active_node *reuse_idle_barrier(struct i915_active *ref, u64 idx)
goto match;
}
spin_unlock_irq(&ref->tree_lock);
return NULL;
match:
spin_lock_irq(&ref->tree_lock);
rb_erase(p, &ref->tree); /* Hide from waits and sibling allocations */
if (p == &ref->cache->node)
ref->cache = NULL;
WRITE_ONCE(ref->cache, NULL);
spin_unlock_irq(&ref->tree_lock);
return rb_entry(p, struct active_node, node);
@ -777,7 +908,9 @@ int i915_active_acquire_preallocate_barrier(struct i915_active *ref,
struct llist_node *prev = first;
struct active_node *node;
rcu_read_lock();
node = reuse_idle_barrier(ref, idx);
rcu_read_unlock();
if (!node) {
node = kmem_cache_alloc(global.slab_cache, GFP_KERNEL);
if (!node)
@ -801,7 +934,7 @@ int i915_active_acquire_preallocate_barrier(struct i915_active *ref,
*/
RCU_INIT_POINTER(node->base.fence, ERR_PTR(-EAGAIN));
node->base.cb.node.prev = (void *)engine;
atomic_inc(&ref->count);
__i915_active_acquire(ref);
}
GEM_BUG_ON(rcu_access_pointer(node->base.fence) != ERR_PTR(-EAGAIN));

View file

@ -163,14 +163,16 @@ void __i915_active_init(struct i915_active *ref,
__i915_active_init(ref, active, retire, &__mkey, &__wkey); \
} while (0)
int i915_active_ref(struct i915_active *ref,
struct intel_timeline *tl,
struct dma_fence *fence);
struct dma_fence *
__i915_active_ref(struct i915_active *ref, u64 idx, struct dma_fence *fence);
int i915_active_ref(struct i915_active *ref, u64 idx, struct dma_fence *fence);
static inline int
i915_active_add_request(struct i915_active *ref, struct i915_request *rq)
{
return i915_active_ref(ref, i915_request_timeline(rq), &rq->fence);
return i915_active_ref(ref,
i915_request_timeline(rq)->fence_context,
&rq->fence);
}
struct dma_fence *
@ -198,7 +200,9 @@ int i915_request_await_active(struct i915_request *rq,
#define I915_ACTIVE_AWAIT_BARRIER BIT(2)
int i915_active_acquire(struct i915_active *ref);
int i915_active_acquire_for_context(struct i915_active *ref, u64 idx);
bool i915_active_acquire_if_busy(struct i915_active *ref);
void i915_active_release(struct i915_active *ref);
static inline void __i915_active_acquire(struct i915_active *ref)
@ -213,11 +217,7 @@ i915_active_is_idle(const struct i915_active *ref)
return !atomic_read(&ref->count);
}
#if IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM)
void i915_active_fini(struct i915_active *ref);
#else
static inline void i915_active_fini(struct i915_active *ref) { }
#endif
int i915_active_acquire_preallocate_barrier(struct i915_active *ref,
struct intel_engine_cs *engine);
@ -231,4 +231,19 @@ struct i915_active *i915_active_create(void);
struct i915_active *i915_active_get(struct i915_active *ref);
void i915_active_put(struct i915_active *ref);
static inline int __i915_request_await_exclusive(struct i915_request *rq,
struct i915_active *active)
{
struct dma_fence *fence;
int err = 0;
fence = i915_active_fence_get(&active->excl);
if (fence) {
err = i915_request_await_dma_fence(rq, fence);
dma_fence_put(fence);
}
return err;
}
#endif /* _I915_ACTIVE_H_ */

View file

@ -1075,6 +1075,7 @@ static void i915_driver_release(struct drm_device *dev)
intel_memory_regions_driver_release(dev_priv);
i915_ggtt_driver_release(dev_priv);
i915_gem_drain_freed_objects(dev_priv);
i915_driver_mmio_release(dev_priv);
@ -1119,7 +1120,6 @@ static void i915_driver_postclose(struct drm_device *dev, struct drm_file *file)
struct drm_i915_file_private *file_priv = file->driver_priv;
i915_gem_context_close(file);
i915_gem_release(dev, file);
kfree_rcu(file_priv, rcu);

View file

@ -203,11 +203,6 @@ struct drm_i915_file_private {
struct rcu_head rcu;
};
struct {
spinlock_t lock;
struct list_head request_list;
} mm;
struct xarray context_xa;
struct xarray vm_xa;
@ -592,11 +587,6 @@ struct i915_gem_mm {
*/
atomic_t free_count;
/**
* Small stash of WC pages
*/
struct pagestash wc_stash;
/**
* tmpfs instance used for shmem backed objects
*/
@ -1826,11 +1816,18 @@ static inline void i915_gem_drain_workqueue(struct drm_i915_private *i915)
}
struct i915_vma * __must_check
i915_gem_object_ggtt_pin_ww(struct drm_i915_gem_object *obj,
struct i915_gem_ww_ctx *ww,
const struct i915_ggtt_view *view,
u64 size, u64 alignment, u64 flags);
static inline struct i915_vma * __must_check
i915_gem_object_ggtt_pin(struct drm_i915_gem_object *obj,
const struct i915_ggtt_view *view,
u64 size,
u64 alignment,
u64 flags);
u64 size, u64 alignment, u64 flags)
{
return i915_gem_object_ggtt_pin_ww(obj, NULL, view, size, alignment, flags);
}
int i915_gem_object_unbind(struct drm_i915_gem_object *obj,
unsigned long flags);
@ -1867,7 +1864,6 @@ void i915_gem_suspend_late(struct drm_i915_private *dev_priv);
void i915_gem_resume(struct drm_i915_private *dev_priv);
int i915_gem_open(struct drm_i915_private *i915, struct drm_file *file);
void i915_gem_release(struct drm_device *dev, struct drm_file *file);
int i915_gem_object_set_cache_level(struct drm_i915_gem_object *obj,
enum i915_cache_level cache_level);

View file

@ -335,12 +335,20 @@ i915_gem_shmem_pread(struct drm_i915_gem_object *obj,
u64 remain;
int ret;
ret = i915_gem_object_prepare_read(obj, &needs_clflush);
ret = i915_gem_object_lock_interruptible(obj, NULL);
if (ret)
return ret;
ret = i915_gem_object_prepare_read(obj, &needs_clflush);
if (ret) {
i915_gem_object_unlock(obj);
return ret;
}
fence = i915_gem_object_lock_fence(obj);
i915_gem_object_finish_access(obj);
i915_gem_object_unlock(obj);
if (!fence)
return -ENOMEM;
@ -420,7 +428,7 @@ i915_gem_gtt_pread(struct drm_i915_gem_object *obj,
GEM_BUG_ON(!drm_mm_node_allocated(&node));
}
ret = i915_gem_object_lock_interruptible(obj);
ret = i915_gem_object_lock_interruptible(obj, NULL);
if (ret)
goto out_unpin;
@ -619,7 +627,7 @@ i915_gem_gtt_pwrite_fast(struct drm_i915_gem_object *obj,
GEM_BUG_ON(!drm_mm_node_allocated(&node));
}
ret = i915_gem_object_lock_interruptible(obj);
ret = i915_gem_object_lock_interruptible(obj, NULL);
if (ret)
goto out_unpin;
@ -734,12 +742,20 @@ i915_gem_shmem_pwrite(struct drm_i915_gem_object *obj,
u64 remain;
int ret;
ret = i915_gem_object_prepare_write(obj, &needs_clflush);
ret = i915_gem_object_lock_interruptible(obj, NULL);
if (ret)
return ret;
ret = i915_gem_object_prepare_write(obj, &needs_clflush);
if (ret) {
i915_gem_object_unlock(obj);
return ret;
}
fence = i915_gem_object_lock_fence(obj);
i915_gem_object_finish_access(obj);
i915_gem_object_unlock(obj);
if (!fence)
return -ENOMEM;
@ -946,11 +962,10 @@ static void discard_ggtt_vma(struct i915_vma *vma)
}
struct i915_vma *
i915_gem_object_ggtt_pin(struct drm_i915_gem_object *obj,
const struct i915_ggtt_view *view,
u64 size,
u64 alignment,
u64 flags)
i915_gem_object_ggtt_pin_ww(struct drm_i915_gem_object *obj,
struct i915_gem_ww_ctx *ww,
const struct i915_ggtt_view *view,
u64 size, u64 alignment, u64 flags)
{
struct drm_i915_private *i915 = to_i915(obj->base.dev);
struct i915_ggtt *ggtt = &i915->ggtt;
@ -1016,7 +1031,7 @@ new_vma:
return ERR_PTR(ret);
}
ret = i915_vma_pin(vma, size, alignment, flags | PIN_GLOBAL);
ret = i915_vma_pin_ww(vma, ww, size, alignment, flags | PIN_GLOBAL);
if (ret)
return ERR_PTR(ret);
@ -1290,7 +1305,7 @@ int i915_gem_freeze_late(struct drm_i915_private *i915)
i915_gem_drain_freed_objects(i915);
list_for_each_entry(obj, &i915->mm.shrink_list, mm.link) {
i915_gem_object_lock(obj);
i915_gem_object_lock(obj, NULL);
drm_WARN_ON(&i915->drm,
i915_gem_object_set_to_cpu_domain(obj, true));
i915_gem_object_unlock(obj);
@ -1301,21 +1316,6 @@ int i915_gem_freeze_late(struct drm_i915_private *i915)
return 0;
}
void i915_gem_release(struct drm_device *dev, struct drm_file *file)
{
struct drm_i915_file_private *file_priv = file->driver_priv;
struct i915_request *request;
/* Clean up our request list when the client is going away, so that
* later retire_requests won't dereference our soon-to-be-gone
* file_priv.
*/
spin_lock(&file_priv->mm.lock);
list_for_each_entry(request, &file_priv->mm.request_list, client_link)
request->file_priv = NULL;
spin_unlock(&file_priv->mm.lock);
}
int i915_gem_open(struct drm_i915_private *i915, struct drm_file *file)
{
struct drm_i915_file_private *file_priv;
@ -1331,9 +1331,6 @@ int i915_gem_open(struct drm_i915_private *i915, struct drm_file *file)
file_priv->dev_priv = i915;
file_priv->file = file;
spin_lock_init(&file_priv->mm.lock);
INIT_LIST_HEAD(&file_priv->mm.request_list);
file_priv->bsd_engine = -1;
file_priv->hang_timestamp = jiffies;
@ -1344,6 +1341,58 @@ int i915_gem_open(struct drm_i915_private *i915, struct drm_file *file)
return ret;
}
void i915_gem_ww_ctx_init(struct i915_gem_ww_ctx *ww, bool intr)
{
ww_acquire_init(&ww->ctx, &reservation_ww_class);
INIT_LIST_HEAD(&ww->obj_list);
ww->intr = intr;
ww->contended = NULL;
}
static void i915_gem_ww_ctx_unlock_all(struct i915_gem_ww_ctx *ww)
{
struct drm_i915_gem_object *obj;
while ((obj = list_first_entry_or_null(&ww->obj_list, struct drm_i915_gem_object, obj_link))) {
list_del(&obj->obj_link);
i915_gem_object_unlock(obj);
}
}
void i915_gem_ww_unlock_single(struct drm_i915_gem_object *obj)
{
list_del(&obj->obj_link);
i915_gem_object_unlock(obj);
}
void i915_gem_ww_ctx_fini(struct i915_gem_ww_ctx *ww)
{
i915_gem_ww_ctx_unlock_all(ww);
WARN_ON(ww->contended);
ww_acquire_fini(&ww->ctx);
}
int __must_check i915_gem_ww_ctx_backoff(struct i915_gem_ww_ctx *ww)
{
int ret = 0;
if (WARN_ON(!ww->contended))
return -EINVAL;
i915_gem_ww_ctx_unlock_all(ww);
if (ww->intr)
ret = dma_resv_lock_slow_interruptible(ww->contended->base.resv, &ww->ctx);
else
dma_resv_lock_slow(ww->contended->base.resv, &ww->ctx);
if (!ret)
list_add_tail(&ww->contended->obj_link, &ww->obj_list);
ww->contended = NULL;
return ret;
}
#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
#include "selftests/mock_gem_device.c"
#include "selftests/i915_gem.c"

View file

@ -116,4 +116,16 @@ static inline bool __tasklet_is_scheduled(struct tasklet_struct *t)
return test_bit(TASKLET_STATE_SCHED, &t->state);
}
struct i915_gem_ww_ctx {
struct ww_acquire_ctx ctx;
struct list_head obj_list;
bool intr;
struct drm_i915_gem_object *contended;
};
void i915_gem_ww_ctx_init(struct i915_gem_ww_ctx *ctx, bool intr);
void i915_gem_ww_ctx_fini(struct i915_gem_ww_ctx *ctx);
int __must_check i915_gem_ww_ctx_backoff(struct i915_gem_ww_ctx *ctx);
void i915_gem_ww_unlock_single(struct drm_i915_gem_object *obj);
#endif /* __I915_GEM_H__ */

View file

@ -41,6 +41,7 @@
#include "display/intel_lpe_audio.h"
#include "display/intel_psr.h"
#include "gt/intel_breadcrumbs.h"
#include "gt/intel_gt.h"
#include "gt/intel_gt_irq.h"
#include "gt/intel_gt_pm_irq.h"

View file

@ -1195,24 +1195,39 @@ static struct intel_context *oa_pin_context(struct i915_perf_stream *stream)
struct i915_gem_engines_iter it;
struct i915_gem_context *ctx = stream->ctx;
struct intel_context *ce;
int err;
struct i915_gem_ww_ctx ww;
int err = -ENODEV;
for_each_gem_engine(ce, i915_gem_context_lock_engines(ctx), it) {
if (ce->engine != stream->engine) /* first match! */
continue;
/*
* As the ID is the gtt offset of the context's vma we
* pin the vma to ensure the ID remains fixed.
*/
err = intel_context_pin(ce);
if (err == 0) {
stream->pinned_ctx = ce;
break;
}
err = 0;
break;
}
i915_gem_context_unlock_engines(ctx);
if (err)
return ERR_PTR(err);
i915_gem_ww_ctx_init(&ww, true);
retry:
/*
* As the ID is the gtt offset of the context's vma we
* pin the vma to ensure the ID remains fixed.
*/
err = intel_context_pin_ww(ce, &ww);
if (err == -EDEADLK) {
err = i915_gem_ww_ctx_backoff(&ww);
if (!err)
goto retry;
}
i915_gem_ww_ctx_fini(&ww);
if (err)
return ERR_PTR(err);
stream->pinned_ctx = ce;
return stream->pinned_ctx;
}
@ -1923,15 +1938,22 @@ emit_oa_config(struct i915_perf_stream *stream,
{
struct i915_request *rq;
struct i915_vma *vma;
struct i915_gem_ww_ctx ww;
int err;
vma = get_oa_vma(stream, oa_config);
if (IS_ERR(vma))
return PTR_ERR(vma);
err = i915_vma_pin(vma, 0, 0, PIN_GLOBAL | PIN_HIGH);
i915_gem_ww_ctx_init(&ww, true);
retry:
err = i915_gem_object_lock(vma->obj, &ww);
if (err)
goto err_vma_put;
goto err;
err = i915_vma_pin_ww(vma, &ww, 0, 0, PIN_GLOBAL | PIN_HIGH);
if (err)
goto err;
intel_engine_pm_get(ce->engine);
rq = i915_request_create(ce);
@ -1953,11 +1975,9 @@ emit_oa_config(struct i915_perf_stream *stream,
goto err_add_request;
}
i915_vma_lock(vma);
err = i915_request_await_object(rq, vma->obj, 0);
if (!err)
err = i915_vma_move_to_active(vma, rq, 0);
i915_vma_unlock(vma);
if (err)
goto err_add_request;
@ -1971,7 +1991,14 @@ err_add_request:
i915_request_add(rq);
err_vma_unpin:
i915_vma_unpin(vma);
err_vma_put:
err:
if (err == -EDEADLK) {
err = i915_gem_ww_ctx_backoff(&ww);
if (!err)
goto retry;
}
i915_gem_ww_ctx_fini(&ww);
i915_vma_put(vma);
return err;
}

View file

@ -31,6 +31,7 @@
#include <linux/sched/signal.h>
#include "gem/i915_gem_context.h"
#include "gt/intel_breadcrumbs.h"
#include "gt/intel_context.h"
#include "gt/intel_ring.h"
#include "gt/intel_rps.h"
@ -186,48 +187,34 @@ static void irq_execute_cb_hook(struct irq_work *wrk)
irq_execute_cb(wrk);
}
static void __notify_execute_cb(struct i915_request *rq)
static __always_inline void
__notify_execute_cb(struct i915_request *rq, bool (*fn)(struct irq_work *wrk))
{
struct execute_cb *cb, *cn;
lockdep_assert_held(&rq->lock);
GEM_BUG_ON(!i915_request_is_active(rq));
if (llist_empty(&rq->execute_cb))
return;
llist_for_each_entry_safe(cb, cn, rq->execute_cb.first, work.llnode)
irq_work_queue(&cb->work);
/*
* XXX Rollback on __i915_request_unsubmit()
*
* In the future, perhaps when we have an active time-slicing scheduler,
* it will be interesting to unsubmit parallel execution and remove
* busywaits from the GPU until their master is restarted. This is
* quite hairy, we have to carefully rollback the fence and do a
* preempt-to-idle cycle on the target engine, all the while the
* master execute_cb may refire.
*/
init_llist_head(&rq->execute_cb);
llist_for_each_entry_safe(cb, cn,
llist_del_all(&rq->execute_cb),
work.llnode)
fn(&cb->work);
}
static inline void
remove_from_client(struct i915_request *request)
static void __notify_execute_cb_irq(struct i915_request *rq)
{
struct drm_i915_file_private *file_priv;
__notify_execute_cb(rq, irq_work_queue);
}
if (!READ_ONCE(request->file_priv))
return;
static bool irq_work_imm(struct irq_work *wrk)
{
wrk->func(wrk);
return false;
}
rcu_read_lock();
file_priv = xchg(&request->file_priv, NULL);
if (file_priv) {
spin_lock(&file_priv->mm.lock);
list_del(&request->client_link);
spin_unlock(&file_priv->mm.lock);
}
rcu_read_unlock();
static void __notify_execute_cb_imm(struct i915_request *rq)
{
__notify_execute_cb(rq, irq_work_imm);
}
static void free_capture_list(struct i915_request *request)
@ -274,9 +261,16 @@ static void remove_from_engine(struct i915_request *rq)
locked = engine;
}
list_del_init(&rq->sched.link);
clear_bit(I915_FENCE_FLAG_PQUEUE, &rq->fence.flags);
clear_bit(I915_FENCE_FLAG_HOLD, &rq->fence.flags);
/* Prevent further __await_execution() registering a cb, then flush */
set_bit(I915_FENCE_FLAG_ACTIVE, &rq->fence.flags);
spin_unlock_irq(&locked->active.lock);
__notify_execute_cb_imm(rq);
}
bool i915_request_retire(struct i915_request *rq)
@ -288,6 +282,7 @@ bool i915_request_retire(struct i915_request *rq)
GEM_BUG_ON(!i915_sw_fence_signaled(&rq->submit));
trace_i915_request_retire(rq);
i915_request_mark_complete(rq);
/*
* We know the GPU must have read the request to have
@ -305,32 +300,30 @@ bool i915_request_retire(struct i915_request *rq)
__i915_request_fill(rq, POISON_FREE);
rq->ring->head = rq->postfix;
if (!i915_request_signaled(rq)) {
spin_lock_irq(&rq->lock);
dma_fence_signal_locked(&rq->fence);
spin_unlock_irq(&rq->lock);
}
if (i915_request_has_waitboost(rq)) {
GEM_BUG_ON(!atomic_read(&rq->engine->gt->rps.num_waiters));
atomic_dec(&rq->engine->gt->rps.num_waiters);
}
/*
* We only loosely track inflight requests across preemption,
* and so we may find ourselves attempting to retire a _completed_
* request that we have removed from the HW and put back on a run
* queue.
*
* As we set I915_FENCE_FLAG_ACTIVE on the request, this should be
* after removing the breadcrumb and signaling it, so that we do not
* inadvertently attach the breadcrumb to a completed request.
*/
remove_from_engine(rq);
spin_lock_irq(&rq->lock);
i915_request_mark_complete(rq);
if (!i915_request_signaled(rq))
dma_fence_signal_locked(&rq->fence);
if (test_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT, &rq->fence.flags))
i915_request_cancel_breadcrumb(rq);
if (i915_request_has_waitboost(rq)) {
GEM_BUG_ON(!atomic_read(&rq->engine->gt->rps.num_waiters));
atomic_dec(&rq->engine->gt->rps.num_waiters);
}
if (!test_bit(I915_FENCE_FLAG_ACTIVE, &rq->fence.flags)) {
set_bit(I915_FENCE_FLAG_ACTIVE, &rq->fence.flags);
__notify_execute_cb(rq);
}
GEM_BUG_ON(!llist_empty(&rq->execute_cb));
spin_unlock_irq(&rq->lock);
remove_from_client(rq);
__list_del_entry(&rq->link); /* poison neither prev/next (RCU walks) */
intel_context_exit(rq->context);
@ -357,12 +350,6 @@ void i915_request_retire_upto(struct i915_request *rq)
} while (i915_request_retire(tmp) && tmp != rq);
}
static void __llist_add(struct llist_node *node, struct llist_head *head)
{
node->next = head->first;
head->first = node;
}
static struct i915_request * const *
__engine_active(struct intel_engine_cs *engine)
{
@ -388,17 +375,38 @@ static bool __request_in_flight(const struct i915_request *signal)
* As we know that there are always preemption points between
* requests, we know that only the currently executing request
* may be still active even though we have cleared the flag.
* However, we can't rely on our tracking of ELSP[0] to known
* However, we can't rely on our tracking of ELSP[0] to know
* which request is currently active and so maybe stuck, as
* the tracking maybe an event behind. Instead assume that
* if the context is still inflight, then it is still active
* even if the active flag has been cleared.
*
* To further complicate matters, if there a pending promotion, the HW
* may either perform a context switch to the second inflight execlists,
* or it may switch to the pending set of execlists. In the case of the
* latter, it may send the ACK and we process the event copying the
* pending[] over top of inflight[], _overwriting_ our *active. Since
* this implies the HW is arbitrating and not struck in *active, we do
* not worry about complete accuracy, but we do require no read/write
* tearing of the pointer [the read of the pointer must be valid, even
* as the array is being overwritten, for which we require the writes
* to avoid tearing.]
*
* Note that the read of *execlists->active may race with the promotion
* of execlists->pending[] to execlists->inflight[], overwritting
* the value at *execlists->active. This is fine. The promotion implies
* that we received an ACK from the HW, and so the context is not
* stuck -- if we do not see ourselves in *active, the inflight status
* is valid. If instead we see ourselves being copied into *active,
* we are inflight and may signal the callback.
*/
if (!intel_context_inflight(signal->context))
return false;
rcu_read_lock();
for (port = __engine_active(signal->engine); (rq = *port); port++) {
for (port = __engine_active(signal->engine);
(rq = READ_ONCE(*port)); /* may race with promotion of pending[] */
port++) {
if (rq->context == signal->context) {
inflight = i915_seqno_passed(rq->fence.seqno,
signal->fence.seqno);
@ -439,18 +447,24 @@ __await_execution(struct i915_request *rq,
cb->work.func = irq_execute_cb_hook;
}
spin_lock_irq(&signal->lock);
if (i915_request_is_active(signal) || __request_in_flight(signal)) {
if (hook) {
hook(rq, &signal->fence);
i915_request_put(signal);
}
i915_sw_fence_complete(cb->fence);
kmem_cache_free(global.slab_execute_cbs, cb);
} else {
__llist_add(&cb->work.llnode, &signal->execute_cb);
/*
* Register the callback first, then see if the signaler is already
* active. This ensures that if we race with the
* __notify_execute_cb from i915_request_submit() and we are not
* included in that list, we get a second bite of the cherry and
* execute it ourselves. After this point, a future
* i915_request_submit() will notify us.
*
* In i915_request_retire() we set the ACTIVE bit on a completed
* request (then flush the execute_cb). So by registering the
* callback first, then checking the ACTIVE bit, we serialise with
* the completed/retired request.
*/
if (llist_add(&cb->work.llnode, &signal->execute_cb)) {
if (i915_request_is_active(signal) ||
__request_in_flight(signal))
__notify_execute_cb_imm(signal);
}
spin_unlock_irq(&signal->lock);
return 0;
}
@ -566,18 +580,28 @@ xfer:
clear_bit(I915_FENCE_FLAG_PQUEUE, &request->fence.flags);
}
/*
* XXX Rollback bonded-execution on __i915_request_unsubmit()?
*
* In the future, perhaps when we have an active time-slicing scheduler,
* it will be interesting to unsubmit parallel execution and remove
* busywaits from the GPU until their master is restarted. This is
* quite hairy, we have to carefully rollback the fence and do a
* preempt-to-idle cycle on the target engine, all the while the
* master execute_cb may refire.
*/
__notify_execute_cb_irq(request);
/* We may be recursing from the signal callback of another i915 fence */
if (!i915_request_signaled(request)) {
spin_lock_nested(&request->lock, SINGLE_DEPTH_NESTING);
__notify_execute_cb(request);
if (test_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT,
&request->fence.flags) &&
!i915_request_enable_breadcrumb(request))
intel_engine_signal_breadcrumbs(engine);
spin_unlock(&request->lock);
GEM_BUG_ON(!llist_empty(&request->execute_cb));
}
return result;
@ -600,27 +624,27 @@ void __i915_request_unsubmit(struct i915_request *request)
{
struct intel_engine_cs *engine = request->engine;
/*
* Only unwind in reverse order, required so that the per-context list
* is kept in seqno/ring order.
*/
RQ_TRACE(request, "\n");
GEM_BUG_ON(!irqs_disabled());
lockdep_assert_held(&engine->active.lock);
/*
* Only unwind in reverse order, required so that the per-context list
* is kept in seqno/ring order.
* Before we remove this breadcrumb from the signal list, we have
* to ensure that a concurrent dma_fence_enable_signaling() does not
* attach itself. We first mark the request as no longer active and
* make sure that is visible to other cores, and then remove the
* breadcrumb if attached.
*/
/* We may be recursing from the signal callback of another i915 fence */
spin_lock_nested(&request->lock, SINGLE_DEPTH_NESTING);
GEM_BUG_ON(!test_bit(I915_FENCE_FLAG_ACTIVE, &request->fence.flags));
clear_bit_unlock(I915_FENCE_FLAG_ACTIVE, &request->fence.flags);
if (test_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT, &request->fence.flags))
i915_request_cancel_breadcrumb(request);
GEM_BUG_ON(!test_bit(I915_FENCE_FLAG_ACTIVE, &request->fence.flags));
clear_bit(I915_FENCE_FLAG_ACTIVE, &request->fence.flags);
spin_unlock(&request->lock);
/* We've already spun, don't charge on resubmitting. */
if (request->sched.semaphores && i915_request_started(request))
request->sched.semaphores = 0;
@ -757,7 +781,6 @@ static void __i915_request_ctor(void *arg)
dma_fence_init(&rq->fence, &i915_fence_ops, &rq->lock, 0, 0);
rq->file_priv = NULL;
rq->capture_list = NULL;
init_llist_head(&rq->execute_cb);
@ -847,7 +870,6 @@ __i915_request_create(struct intel_context *ce, gfp_t gfp)
/* No zalloc, everything must be cleared after use */
rq->batch = NULL;
GEM_BUG_ON(rq->file_priv);
GEM_BUG_ON(rq->capture_list);
GEM_BUG_ON(!llist_empty(&rq->execute_cb));
@ -1640,7 +1662,7 @@ static bool busywait_stop(unsigned long timeout, unsigned int cpu)
return this_cpu != cpu;
}
static bool __i915_spin_request(const struct i915_request * const rq, int state)
static bool __i915_spin_request(struct i915_request * const rq, int state)
{
unsigned long timeout_ns;
unsigned int cpu;
@ -1673,7 +1695,7 @@ static bool __i915_spin_request(const struct i915_request * const rq, int state)
timeout_ns = READ_ONCE(rq->engine->props.max_busywait_duration_ns);
timeout_ns += local_clock_ns(&cpu);
do {
if (i915_request_completed(rq))
if (dma_fence_is_signaled(&rq->fence))
return true;
if (signal_pending_state(state, current))
@ -1697,7 +1719,7 @@ static void request_wait_wake(struct dma_fence *fence, struct dma_fence_cb *cb)
{
struct request_wait *wait = container_of(cb, typeof(*wait), cb);
wake_up_process(wait->tsk);
wake_up_process(fetch_and_zero(&wait->tsk));
}
/**
@ -1766,10 +1788,8 @@ long i915_request_wait(struct i915_request *rq,
* duration, which we currently lack.
*/
if (IS_ACTIVE(CONFIG_DRM_I915_MAX_REQUEST_BUSYWAIT) &&
__i915_spin_request(rq, state)) {
dma_fence_signal(&rq->fence);
__i915_spin_request(rq, state))
goto out;
}
/*
* This client is about to stall waiting for the GPU. In many cases
@ -1790,15 +1810,29 @@ long i915_request_wait(struct i915_request *rq,
if (dma_fence_add_callback(&rq->fence, &wait.cb, request_wait_wake))
goto out;
/*
* Flush the submission tasklet, but only if it may help this request.
*
* We sometimes experience some latency between the HW interrupts and
* tasklet execution (mostly due to ksoftirqd latency, but it can also
* be due to lazy CS events), so lets run the tasklet manually if there
* is a chance it may submit this request. If the request is not ready
* to run, as it is waiting for other fences to be signaled, flushing
* the tasklet is busy work without any advantage for this client.
*
* If the HW is being lazy, this is the last chance before we go to
* sleep to catch any pending events. We will check periodically in
* the heartbeat to flush the submission tasklets as a last resort
* for unhappy HW.
*/
if (i915_request_is_ready(rq))
intel_engine_flush_submission(rq->engine);
for (;;) {
set_current_state(state);
if (i915_request_completed(rq)) {
dma_fence_signal(&rq->fence);
if (dma_fence_is_signaled(&rq->fence))
break;
}
intel_engine_flush_submission(rq->engine);
if (signal_pending_state(state, current)) {
timeout = -ERESTARTSYS;
@ -1814,7 +1848,9 @@ long i915_request_wait(struct i915_request *rq,
}
__set_current_state(TASK_RUNNING);
dma_fence_remove_callback(&rq->fence, &wait.cb);
if (READ_ONCE(wait.tsk))
dma_fence_remove_callback(&rq->fence, &wait.cb);
GEM_BUG_ON(!list_empty(&wait.cb.node));
out:
mutex_release(&rq->engine->gt->reset.mutex.dep_map, _THIS_IP_);

View file

@ -284,10 +284,6 @@ struct i915_request {
/** timeline->request entry for this request */
struct list_head link;
struct drm_i915_file_private *file_priv;
/** file_priv list entry for this request */
struct list_head client_link;
I915_SELFTEST_DECLARE(struct {
struct list_head link;
unsigned long delay;
@ -365,10 +361,6 @@ void i915_request_submit(struct i915_request *request);
void __i915_request_unsubmit(struct i915_request *request);
void i915_request_unsubmit(struct i915_request *request);
/* Note: part of the intel_breadcrumbs family */
bool i915_request_enable_breadcrumb(struct i915_request *request);
void i915_request_cancel_breadcrumb(struct i915_request *request);
long i915_request_wait(struct i915_request *rq,
unsigned int flags,
long timeout)

View file

@ -164,9 +164,13 @@ static void __i915_sw_fence_wake_up_all(struct i915_sw_fence *fence,
do {
list_for_each_entry_safe(pos, next, &x->head, entry) {
pos->func(pos,
TASK_NORMAL, fence->error,
&extra);
int wake_flags;
wake_flags = fence->error;
if (pos->func == autoremove_wake_function)
wake_flags = 0;
pos->func(pos, TASK_NORMAL, wake_flags, &extra);
}
if (list_empty(&extra))

View file

@ -291,6 +291,8 @@ i915_vma_instance(struct drm_i915_gem_object *obj,
struct i915_vma_work {
struct dma_fence_work base;
struct i915_address_space *vm;
struct i915_vm_pt_stash stash;
struct i915_vma *vma;
struct drm_i915_gem_object *pinned;
struct i915_sw_dma_fence_cb cb;
@ -302,13 +304,10 @@ static int __vma_bind(struct dma_fence_work *work)
{
struct i915_vma_work *vw = container_of(work, typeof(*vw), base);
struct i915_vma *vma = vw->vma;
int err;
err = vma->ops->bind_vma(vma->vm, vma, vw->cache_level, vw->flags);
if (err)
atomic_or(I915_VMA_ERROR, &vma->flags);
return err;
vma->ops->bind_vma(vw->vm, &vw->stash,
vma, vw->cache_level, vw->flags);
return 0;
}
static void __vma_release(struct dma_fence_work *work)
@ -317,6 +316,9 @@ static void __vma_release(struct dma_fence_work *work)
if (vw->pinned)
__i915_gem_object_unpin_pages(vw->pinned);
i915_vm_free_pt_stash(vw->vm, &vw->stash);
i915_vm_put(vw->vm);
}
static const struct dma_fence_work_ops bind_ops = {
@ -376,7 +378,6 @@ int i915_vma_bind(struct i915_vma *vma,
{
u32 bind_flags;
u32 vma_flags;
int ret;
GEM_BUG_ON(!drm_mm_node_allocated(&vma->node));
GEM_BUG_ON(vma->size > vma->node.size);
@ -433,9 +434,7 @@ int i915_vma_bind(struct i915_vma *vma,
work->pinned = vma->obj;
}
} else {
ret = vma->ops->bind_vma(vma->vm, vma, cache_level, bind_flags);
if (ret)
return ret;
vma->ops->bind_vma(vma->vm, NULL, vma, cache_level, bind_flags);
}
atomic_or(bind_flags, &vma->flags);
@ -853,13 +852,19 @@ static void vma_unbind_pages(struct i915_vma *vma)
__vma_put_pages(vma, count | count << I915_VMA_PAGES_BIAS);
}
int i915_vma_pin(struct i915_vma *vma, u64 size, u64 alignment, u64 flags)
int i915_vma_pin_ww(struct i915_vma *vma, struct i915_gem_ww_ctx *ww,
u64 size, u64 alignment, u64 flags)
{
struct i915_vma_work *work = NULL;
intel_wakeref_t wakeref = 0;
unsigned int bound;
int err;
#ifdef CONFIG_PROVE_LOCKING
if (debug_locks && lockdep_is_held(&vma->vm->i915->drm.struct_mutex))
WARN_ON(!ww);
#endif
BUILD_BUG_ON(PIN_GLOBAL != I915_VMA_GLOBAL_BIND);
BUILD_BUG_ON(PIN_USER != I915_VMA_LOCAL_BIND);
@ -873,17 +878,31 @@ int i915_vma_pin(struct i915_vma *vma, u64 size, u64 alignment, u64 flags)
if (err)
return err;
if (flags & PIN_GLOBAL)
wakeref = intel_runtime_pm_get(&vma->vm->i915->runtime_pm);
if (flags & vma->vm->bind_async_flags) {
work = i915_vma_work();
if (!work) {
err = -ENOMEM;
goto err_pages;
goto err_rpm;
}
work->vm = i915_vm_get(vma->vm);
/* Allocate enough page directories to used PTE */
if (vma->vm->allocate_va_range) {
i915_vm_alloc_pt_stash(vma->vm,
&work->stash,
vma->size);
err = i915_vm_pin_pt_stash(vma->vm,
&work->stash);
if (err)
goto err_fence;
}
}
if (flags & PIN_GLOBAL)
wakeref = intel_runtime_pm_get(&vma->vm->i915->runtime_pm);
/*
* Differentiate between user/kernel vma inside the aliasing-ppgtt.
*
@ -971,9 +990,9 @@ err_unlock:
err_fence:
if (work)
dma_fence_work_commit_imm(&work->base);
err_rpm:
if (wakeref)
intel_runtime_pm_put(&vma->vm->i915->runtime_pm, wakeref);
err_pages:
vma_put_pages(vma);
return err;
}
@ -989,7 +1008,8 @@ static void flush_idle_contexts(struct intel_gt *gt)
intel_gt_wait_for_idle(gt, MAX_SCHEDULE_TIMEOUT);
}
int i915_ggtt_pin(struct i915_vma *vma, u32 align, unsigned int flags)
int i915_ggtt_pin(struct i915_vma *vma, struct i915_gem_ww_ctx *ww,
u32 align, unsigned int flags)
{
struct i915_address_space *vm = vma->vm;
int err;
@ -997,7 +1017,7 @@ int i915_ggtt_pin(struct i915_vma *vma, u32 align, unsigned int flags)
GEM_BUG_ON(!i915_vma_is_ggtt(vma));
do {
err = i915_vma_pin(vma, 0, align, flags | PIN_GLOBAL);
err = i915_vma_pin_ww(vma, ww, 0, align, flags | PIN_GLOBAL);
if (err != -ENOSPC) {
if (!err) {
err = i915_vma_wait_for_bind(vma);
@ -1167,6 +1187,12 @@ void i915_vma_revoke_mmap(struct i915_vma *vma)
list_del(&vma->obj->userfault_link);
}
static int
__i915_request_await_bind(struct i915_request *rq, struct i915_vma *vma)
{
return __i915_request_await_exclusive(rq, &vma->active);
}
int __i915_vma_move_to_active(struct i915_vma *vma, struct i915_request *rq)
{
int err;
@ -1174,8 +1200,7 @@ int __i915_vma_move_to_active(struct i915_vma *vma, struct i915_request *rq)
GEM_BUG_ON(!i915_vma_is_pinned(vma));
/* Wait for the vma to be bound before we start! */
err = i915_request_await_active(rq, &vma->active,
I915_ACTIVE_AWAIT_EXCL);
err = __i915_request_await_bind(rq, vma);
if (err)
return err;

View file

@ -237,8 +237,17 @@ static inline void i915_vma_unlock(struct i915_vma *vma)
}
int __must_check
i915_vma_pin(struct i915_vma *vma, u64 size, u64 alignment, u64 flags);
int i915_ggtt_pin(struct i915_vma *vma, u32 align, unsigned int flags);
i915_vma_pin_ww(struct i915_vma *vma, struct i915_gem_ww_ctx *ww,
u64 size, u64 alignment, u64 flags);
static inline int __must_check
i915_vma_pin(struct i915_vma *vma, u64 size, u64 alignment, u64 flags)
{
return i915_vma_pin_ww(vma, NULL, size, alignment, flags);
}
int i915_ggtt_pin(struct i915_vma *vma, struct i915_gem_ww_ctx *ww,
u32 align, unsigned int flags);
static inline int i915_vma_pin_count(const struct i915_vma *vma)
{

View file

@ -199,11 +199,52 @@ out:
return err;
}
static int igt_gem_ww_ctx(void *arg)
{
struct drm_i915_private *i915 = arg;
struct drm_i915_gem_object *obj, *obj2;
struct i915_gem_ww_ctx ww;
int err = 0;
obj = i915_gem_object_create_internal(i915, PAGE_SIZE);
if (IS_ERR(obj))
return PTR_ERR(obj);
obj2 = i915_gem_object_create_internal(i915, PAGE_SIZE);
if (IS_ERR(obj)) {
err = PTR_ERR(obj);
goto put1;
}
i915_gem_ww_ctx_init(&ww, true);
retry:
/* Lock the objects, twice for good measure (-EALREADY handling) */
err = i915_gem_object_lock(obj, &ww);
if (!err)
err = i915_gem_object_lock_interruptible(obj, &ww);
if (!err)
err = i915_gem_object_lock_interruptible(obj2, &ww);
if (!err)
err = i915_gem_object_lock(obj2, &ww);
if (err == -EDEADLK) {
err = i915_gem_ww_ctx_backoff(&ww);
if (!err)
goto retry;
}
i915_gem_ww_ctx_fini(&ww);
i915_gem_object_put(obj2);
put1:
i915_gem_object_put(obj);
return err;
}
int i915_gem_live_selftests(struct drm_i915_private *i915)
{
static const struct i915_subtest tests[] = {
SUBTEST(igt_gem_suspend),
SUBTEST(igt_gem_hibernate),
SUBTEST(igt_gem_ww_ctx),
};
if (intel_gt_is_wedged(&i915->gt))

View file

@ -172,35 +172,45 @@ static int igt_ppgtt_alloc(void *arg)
/* Check we can allocate the entire range */
for (size = 4096; size <= limit; size <<= 2) {
err = ppgtt->vm.allocate_va_range(&ppgtt->vm, 0, size);
struct i915_vm_pt_stash stash = {};
err = i915_vm_alloc_pt_stash(&ppgtt->vm, &stash, size);
if (err)
goto err_ppgtt_cleanup;
err = i915_vm_pin_pt_stash(&ppgtt->vm, &stash);
if (err) {
if (err == -ENOMEM) {
pr_info("[1] Ran out of memory for va_range [0 + %llx] [bit %d]\n",
size, ilog2(size));
err = 0; /* virtual space too large! */
}
i915_vm_free_pt_stash(&ppgtt->vm, &stash);
goto err_ppgtt_cleanup;
}
ppgtt->vm.allocate_va_range(&ppgtt->vm, &stash, 0, size);
cond_resched();
ppgtt->vm.clear_range(&ppgtt->vm, 0, size);
i915_vm_free_pt_stash(&ppgtt->vm, &stash);
}
/* Check we can incrementally allocate the entire range */
for (last = 0, size = 4096; size <= limit; last = size, size <<= 2) {
err = ppgtt->vm.allocate_va_range(&ppgtt->vm,
last, size - last);
struct i915_vm_pt_stash stash = {};
err = i915_vm_alloc_pt_stash(&ppgtt->vm, &stash, size - last);
if (err)
goto err_ppgtt_cleanup;
err = i915_vm_pin_pt_stash(&ppgtt->vm, &stash);
if (err) {
if (err == -ENOMEM) {
pr_info("[2] Ran out of memory for va_range [%llx + %llx] [bit %d]\n",
last, size - last, ilog2(size));
err = 0; /* virtual space too large! */
}
i915_vm_free_pt_stash(&ppgtt->vm, &stash);
goto err_ppgtt_cleanup;
}
ppgtt->vm.allocate_va_range(&ppgtt->vm, &stash,
last, size - last);
cond_resched();
i915_vm_free_pt_stash(&ppgtt->vm, &stash);
}
err_ppgtt_cleanup:
@ -284,9 +294,23 @@ static int lowlevel_hole(struct i915_address_space *vm,
break;
}
if (vm->allocate_va_range &&
vm->allocate_va_range(vm, addr, BIT_ULL(size)))
break;
if (vm->allocate_va_range) {
struct i915_vm_pt_stash stash = {};
if (i915_vm_alloc_pt_stash(vm, &stash,
BIT_ULL(size)))
break;
if (i915_vm_pin_pt_stash(vm, &stash)) {
i915_vm_free_pt_stash(vm, &stash);
break;
}
vm->allocate_va_range(vm, &stash,
addr, BIT_ULL(size));
i915_vm_free_pt_stash(vm, &stash);
}
mock_vma->pages = obj->mm.pages;
mock_vma->node.size = BIT_ULL(size);
@ -1881,6 +1905,7 @@ static int igt_cs_tlb(void *arg)
continue;
while (!__igt_timeout(end_time, NULL)) {
struct i915_vm_pt_stash stash = {};
struct i915_request *rq;
u64 offset;
@ -1888,10 +1913,6 @@ static int igt_cs_tlb(void *arg)
0, vm->total - PAGE_SIZE,
chunk_size, PAGE_SIZE);
err = vm->allocate_va_range(vm, offset, chunk_size);
if (err)
goto end;
memset32(result, STACK_MAGIC, PAGE_SIZE / sizeof(u32));
vma = i915_vma_instance(bbe, vm, NULL);
@ -1904,6 +1925,20 @@ static int igt_cs_tlb(void *arg)
if (err)
goto end;
err = i915_vm_alloc_pt_stash(vm, &stash, chunk_size);
if (err)
goto end;
err = i915_vm_pin_pt_stash(vm, &stash);
if (err) {
i915_vm_free_pt_stash(vm, &stash);
goto end;
}
vm->allocate_va_range(vm, &stash, offset, chunk_size);
i915_vm_free_pt_stash(vm, &stash);
/* Prime the TLB with the dummy pages */
for (i = 0; i < count; i++) {
vma->node.start = offset + i * PAGE_SIZE;

View file

@ -307,7 +307,7 @@ static int live_noa_gpr(void *arg)
}
/* Poison the ce->vm so we detect writes not to the GGTT gt->scratch */
scratch = kmap(ce->vm->scratch[0].base.page);
scratch = kmap(__px_page(ce->vm->scratch[0]));
memset(scratch, POISON_FREE, PAGE_SIZE);
rq = intel_context_create_request(ce);
@ -405,7 +405,7 @@ static int live_noa_gpr(void *arg)
out_rq:
i915_request_put(rq);
out_ce:
kunmap(ce->vm->scratch[0].base.page);
kunmap(__px_page(ce->vm->scratch[0]));
intel_context_put(ce);
out:
stream_destroy(stream);

View file

@ -862,6 +862,8 @@ static int live_all_engines(void *arg)
goto out_free;
}
i915_vma_lock(batch);
idx = 0;
for_each_uabi_engine(engine, i915) {
request[idx] = intel_engine_create_kernel_request(engine);
@ -872,11 +874,9 @@ static int live_all_engines(void *arg)
goto out_request;
}
i915_vma_lock(batch);
err = i915_request_await_object(request[idx], batch->obj, 0);
if (err == 0)
err = i915_vma_move_to_active(batch, request[idx], 0);
i915_vma_unlock(batch);
GEM_BUG_ON(err);
err = engine->emit_bb_start(request[idx],
@ -891,6 +891,8 @@ static int live_all_engines(void *arg)
idx++;
}
i915_vma_unlock(batch);
idx = 0;
for_each_uabi_engine(engine, i915) {
if (i915_request_completed(request[idx])) {
@ -981,12 +983,13 @@ static int live_sequential_engines(void *arg)
goto out_free;
}
i915_vma_lock(batch);
request[idx] = intel_engine_create_kernel_request(engine);
if (IS_ERR(request[idx])) {
err = PTR_ERR(request[idx]);
pr_err("%s: Request allocation failed for %s with err=%d\n",
__func__, engine->name, err);
goto out_request;
goto out_unlock;
}
if (prev) {
@ -996,16 +999,14 @@ static int live_sequential_engines(void *arg)
i915_request_add(request[idx]);
pr_err("%s: Request await failed for %s with err=%d\n",
__func__, engine->name, err);
goto out_request;
goto out_unlock;
}
}
i915_vma_lock(batch);
err = i915_request_await_object(request[idx],
batch->obj, false);
if (err == 0)
err = i915_vma_move_to_active(batch, request[idx], 0);
i915_vma_unlock(batch);
GEM_BUG_ON(err);
err = engine->emit_bb_start(request[idx],
@ -1020,6 +1021,11 @@ static int live_sequential_engines(void *arg)
prev = request[idx];
idx++;
out_unlock:
i915_vma_unlock(batch);
if (err)
goto out_request;
}
idx = 0;

View file

@ -892,7 +892,7 @@ static int igt_vma_remapped_gtt(void *arg)
unsigned int x, y;
int err;
i915_gem_object_lock(obj);
i915_gem_object_lock(obj, NULL);
err = i915_gem_object_set_to_gtt_domain(obj, true);
i915_gem_object_unlock(obj);
if (err)

View file

@ -509,7 +509,7 @@ static int igt_lmem_write_cpu(void *arg)
if (err)
goto out_unpin;
i915_gem_object_lock(obj);
i915_gem_object_lock(obj, NULL);
err = i915_gem_object_set_to_wc_domain(obj, true);
i915_gem_object_unlock(obj);
if (err)
@ -522,9 +522,9 @@ static int igt_lmem_write_cpu(void *arg)
goto out_unpin;
}
/* We want to throw in a random width/align */
bytes[0] = igt_random_offset(&prng, 0, PAGE_SIZE, sizeof(u32),
sizeof(u32));
/* A random multiple of u32, picked between [64, PAGE_SIZE - 64] */
bytes[0] = igt_random_offset(&prng, 64, PAGE_SIZE - 64, 0, sizeof(u32));
GEM_BUG_ON(!IS_ALIGNED(bytes[0], sizeof(u32)));
i = 0;
do {

View file

@ -38,14 +38,14 @@ static void mock_insert_entries(struct i915_address_space *vm,
{
}
static int mock_bind_ppgtt(struct i915_address_space *vm,
struct i915_vma *vma,
enum i915_cache_level cache_level,
u32 flags)
static void mock_bind_ppgtt(struct i915_address_space *vm,
struct i915_vm_pt_stash *stash,
struct i915_vma *vma,
enum i915_cache_level cache_level,
u32 flags)
{
GEM_BUG_ON(flags & I915_VMA_GLOBAL_BIND);
set_bit(I915_VMA_LOCAL_BIND_BIT, __i915_vma_flags(vma));
return 0;
}
static void mock_unbind_ppgtt(struct i915_address_space *vm,
@ -74,9 +74,12 @@ struct i915_ppgtt *mock_ppgtt(struct drm_i915_private *i915, const char *name)
ppgtt->vm.i915 = i915;
ppgtt->vm.total = round_down(U64_MAX, PAGE_SIZE);
ppgtt->vm.file = ERR_PTR(-ENODEV);
ppgtt->vm.dma = &i915->drm.pdev->dev;
i915_address_space_init(&ppgtt->vm, VM_CLASS_PPGTT);
ppgtt->vm.alloc_pt_dma = alloc_pt_dma;
ppgtt->vm.clear_range = mock_clear_range;
ppgtt->vm.insert_page = mock_insert_page;
ppgtt->vm.insert_entries = mock_insert_entries;
@ -90,13 +93,12 @@ struct i915_ppgtt *mock_ppgtt(struct drm_i915_private *i915, const char *name)
return ppgtt;
}
static int mock_bind_ggtt(struct i915_address_space *vm,
struct i915_vma *vma,
enum i915_cache_level cache_level,
u32 flags)
static void mock_bind_ggtt(struct i915_address_space *vm,
struct i915_vm_pt_stash *stash,
struct i915_vma *vma,
enum i915_cache_level cache_level,
u32 flags)
{
atomic_or(I915_VMA_GLOBAL_BIND | I915_VMA_LOCAL_BIND, &vma->flags);
return 0;
}
static void mock_unbind_ggtt(struct i915_address_space *vm,
@ -116,6 +118,8 @@ void mock_init_ggtt(struct drm_i915_private *i915, struct i915_ggtt *ggtt)
ggtt->mappable_end = resource_size(&ggtt->gmadr);
ggtt->vm.total = 4096 * PAGE_SIZE;
ggtt->vm.alloc_pt_dma = alloc_pt_dma;
ggtt->vm.clear_range = mock_clear_range;
ggtt->vm.insert_page = mock_insert_page;
ggtt->vm.insert_entries = mock_insert_entries;