diff options
Diffstat (limited to 'drivers/gpu/drm/i915/i915_gem.c')
-rw-r--r-- | drivers/gpu/drm/i915/i915_gem.c | 375 |
1 files changed, 260 insertions, 115 deletions
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 73eeb6b1f1cd..20fcac37c85a 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -35,6 +35,7 @@ #include "intel_drv.h" #include "intel_frontbuffer.h" #include "intel_mocs.h" +#include "i915_gemfs.h" #include <linux/dma-fence-array.h> #include <linux/kthread.h> #include <linux/reservation.h> @@ -161,8 +162,7 @@ i915_gem_get_aperture_ioctl(struct drm_device *dev, void *data, return 0; } -static struct sg_table * -i915_gem_object_get_pages_phys(struct drm_i915_gem_object *obj) +static int i915_gem_object_get_pages_phys(struct drm_i915_gem_object *obj) { struct address_space *mapping = obj->base.filp->f_mapping; drm_dma_handle_t *phys; @@ -170,9 +170,10 @@ i915_gem_object_get_pages_phys(struct drm_i915_gem_object *obj) struct scatterlist *sg; char *vaddr; int i; + int err; if (WARN_ON(i915_gem_object_needs_bit17_swizzle(obj))) - return ERR_PTR(-EINVAL); + return -EINVAL; /* Always aligning to the object size, allows a single allocation * to handle all possible callers, and given typical object sizes, @@ -182,7 +183,7 @@ i915_gem_object_get_pages_phys(struct drm_i915_gem_object *obj) roundup_pow_of_two(obj->base.size), roundup_pow_of_two(obj->base.size)); if (!phys) - return ERR_PTR(-ENOMEM); + return -ENOMEM; vaddr = phys->vaddr; for (i = 0; i < obj->base.size / PAGE_SIZE; i++) { @@ -191,7 +192,7 @@ i915_gem_object_get_pages_phys(struct drm_i915_gem_object *obj) page = shmem_read_mapping_page(mapping, i); if (IS_ERR(page)) { - st = ERR_CAST(page); + err = PTR_ERR(page); goto err_phys; } @@ -208,13 +209,13 @@ i915_gem_object_get_pages_phys(struct drm_i915_gem_object *obj) st = kmalloc(sizeof(*st), GFP_KERNEL); if (!st) { - st = ERR_PTR(-ENOMEM); + err = -ENOMEM; goto err_phys; } if (sg_alloc_table(st, 1, GFP_KERNEL)) { kfree(st); - st = ERR_PTR(-ENOMEM); + err = -ENOMEM; goto err_phys; } @@ -226,11 +227,15 @@ i915_gem_object_get_pages_phys(struct drm_i915_gem_object *obj) sg_dma_len(sg) = obj->base.size; obj->phys_handle = phys; - return st; + + __i915_gem_object_set_pages(obj, st, sg->length); + + return 0; err_phys: drm_pci_free(obj->base.dev, phys); - return st; + + return err; } static void __start_cpu_write(struct drm_i915_gem_object *obj) @@ -353,7 +358,7 @@ static long i915_gem_object_wait_fence(struct dma_fence *fence, unsigned int flags, long timeout, - struct intel_rps_client *rps) + struct intel_rps_client *rps_client) { struct drm_i915_gem_request *rq; @@ -386,11 +391,11 @@ i915_gem_object_wait_fence(struct dma_fence *fence, * forcing the clocks too high for the whole system, we only allow * each client to waitboost once in a busy period. */ - if (rps) { + if (rps_client) { if (INTEL_GEN(rq->i915) >= 6) - gen6_rps_boost(rq, rps); + gen6_rps_boost(rq, rps_client); else - rps = NULL; + rps_client = NULL; } timeout = i915_wait_request(rq, flags, timeout); @@ -406,7 +411,7 @@ static long i915_gem_object_wait_reservation(struct reservation_object *resv, unsigned int flags, long timeout, - struct intel_rps_client *rps) + struct intel_rps_client *rps_client) { unsigned int seq = __read_seqcount_begin(&resv->seq); struct dma_fence *excl; @@ -425,7 +430,7 @@ i915_gem_object_wait_reservation(struct reservation_object *resv, for (i = 0; i < count; i++) { timeout = i915_gem_object_wait_fence(shared[i], flags, timeout, - rps); + rps_client); if (timeout < 0) break; @@ -442,7 +447,8 @@ i915_gem_object_wait_reservation(struct reservation_object *resv, } if (excl && timeout >= 0) { - timeout = i915_gem_object_wait_fence(excl, flags, timeout, rps); + timeout = i915_gem_object_wait_fence(excl, flags, timeout, + rps_client); prune_fences = timeout >= 0; } @@ -538,7 +544,7 @@ int i915_gem_object_wait(struct drm_i915_gem_object *obj, unsigned int flags, long timeout, - struct intel_rps_client *rps) + struct intel_rps_client *rps_client) { might_sleep(); #if IS_ENABLED(CONFIG_LOCKDEP) @@ -550,7 +556,7 @@ i915_gem_object_wait(struct drm_i915_gem_object *obj, timeout = i915_gem_object_wait_reservation(obj->resv, flags, timeout, - rps); + rps_client); return timeout < 0 ? timeout : 0; } @@ -558,7 +564,7 @@ static struct intel_rps_client *to_rps_client(struct drm_file *file) { struct drm_i915_file_private *fpriv = file->driver_priv; - return &fpriv->rps; + return &fpriv->rps_client; } static int @@ -1050,7 +1056,9 @@ i915_gem_gtt_pread(struct drm_i915_gem_object *obj, intel_runtime_pm_get(i915); vma = i915_gem_object_ggtt_pin(obj, NULL, 0, 0, - PIN_MAPPABLE | PIN_NONBLOCK); + PIN_MAPPABLE | + PIN_NONFAULT | + PIN_NONBLOCK); if (!IS_ERR(vma)) { node.start = i915_ggtt_offset(vma); node.allocated = false; @@ -1234,7 +1242,9 @@ i915_gem_gtt_pwrite_fast(struct drm_i915_gem_object *obj, intel_runtime_pm_get(i915); vma = i915_gem_object_ggtt_pin(obj, NULL, 0, 0, - PIN_MAPPABLE | PIN_NONBLOCK); + PIN_MAPPABLE | + PIN_NONFAULT | + PIN_NONBLOCK); if (!IS_ERR(vma)) { node.start = i915_ggtt_offset(vma); node.allocated = false; @@ -1905,22 +1915,27 @@ int i915_gem_fault(struct vm_fault *vmf) if (ret) goto err_unpin; - ret = i915_vma_get_fence(vma); + ret = i915_vma_pin_fence(vma); if (ret) goto err_unpin; - /* Mark as being mmapped into userspace for later revocation */ - assert_rpm_wakelock_held(dev_priv); - if (list_empty(&obj->userfault_link)) - list_add(&obj->userfault_link, &dev_priv->mm.userfault_list); - /* Finally, remap it using the new GTT offset */ ret = remap_io_mapping(area, area->vm_start + (vma->ggtt_view.partial.offset << PAGE_SHIFT), (ggtt->mappable_base + vma->node.start) >> PAGE_SHIFT, min_t(u64, vma->size, area->vm_end - area->vm_start), &ggtt->mappable); + if (ret) + goto err_fence; + /* Mark as being mmapped into userspace for later revocation */ + assert_rpm_wakelock_held(dev_priv); + if (!i915_vma_set_userfault(vma) && !obj->userfault_count++) + list_add(&obj->userfault_link, &dev_priv->mm.userfault_list); + GEM_BUG_ON(!obj->userfault_count); + +err_fence: + i915_vma_unpin_fence(vma); err_unpin: __i915_vma_unpin(vma); err_unlock: @@ -1972,6 +1987,25 @@ err: return ret; } +static void __i915_gem_object_release_mmap(struct drm_i915_gem_object *obj) +{ + struct i915_vma *vma; + + GEM_BUG_ON(!obj->userfault_count); + + obj->userfault_count = 0; + list_del(&obj->userfault_link); + drm_vma_node_unmap(&obj->base.vma_node, + obj->base.dev->anon_inode->i_mapping); + + list_for_each_entry(vma, &obj->vma_list, obj_link) { + if (!i915_vma_is_ggtt(vma)) + break; + + i915_vma_unset_userfault(vma); + } +} + /** * i915_gem_release_mmap - remove physical page mappings * @obj: obj in question @@ -2002,12 +2036,10 @@ i915_gem_release_mmap(struct drm_i915_gem_object *obj) lockdep_assert_held(&i915->drm.struct_mutex); intel_runtime_pm_get(i915); - if (list_empty(&obj->userfault_link)) + if (!obj->userfault_count) goto out; - list_del_init(&obj->userfault_link); - drm_vma_node_unmap(&obj->base.vma_node, - obj->base.dev->anon_inode->i_mapping); + __i915_gem_object_release_mmap(obj); /* Ensure that the CPU's PTE are revoked and there are not outstanding * memory transactions from userspace before we return. The TLB @@ -2035,11 +2067,8 @@ void i915_gem_runtime_suspend(struct drm_i915_private *dev_priv) */ list_for_each_entry_safe(obj, on, - &dev_priv->mm.userfault_list, userfault_link) { - list_del_init(&obj->userfault_link); - drm_vma_node_unmap(&obj->base.vma_node, - obj->base.dev->anon_inode->i_mapping); - } + &dev_priv->mm.userfault_list, userfault_link) + __i915_gem_object_release_mmap(obj); /* The fence will be lost when the device powers down. If any were * in use by hardware (i.e. they are pinned), we should not be powering @@ -2062,7 +2091,7 @@ void i915_gem_runtime_suspend(struct drm_i915_private *dev_priv) if (!reg->vma) continue; - GEM_BUG_ON(!list_empty(®->vma->obj->userfault_link)); + GEM_BUG_ON(i915_vma_has_userfault(reg->vma)); reg->dirty = true; } } @@ -2261,6 +2290,8 @@ void __i915_gem_object_put_pages(struct drm_i915_gem_object *obj, if (!IS_ERR(pages)) obj->ops->put_pages(obj, pages); + obj->mm.page_sizes.phys = obj->mm.page_sizes.sg = 0; + unlock: mutex_unlock(&obj->mm.lock); } @@ -2291,8 +2322,7 @@ static bool i915_sg_trim(struct sg_table *orig_st) return true; } -static struct sg_table * -i915_gem_object_get_pages_gtt(struct drm_i915_gem_object *obj) +static int i915_gem_object_get_pages_gtt(struct drm_i915_gem_object *obj) { struct drm_i915_private *dev_priv = to_i915(obj->base.dev); const unsigned long page_count = obj->base.size / PAGE_SIZE; @@ -2304,6 +2334,7 @@ i915_gem_object_get_pages_gtt(struct drm_i915_gem_object *obj) struct page *page; unsigned long last_pfn = 0; /* suppress gcc warning */ unsigned int max_segment = i915_sg_segment_size(); + unsigned int sg_page_sizes; gfp_t noreclaim; int ret; @@ -2316,12 +2347,12 @@ i915_gem_object_get_pages_gtt(struct drm_i915_gem_object *obj) st = kmalloc(sizeof(*st), GFP_KERNEL); if (st == NULL) - return ERR_PTR(-ENOMEM); + return -ENOMEM; rebuild_st: if (sg_alloc_table(st, page_count, GFP_KERNEL)) { kfree(st); - return ERR_PTR(-ENOMEM); + return -ENOMEM; } /* Get the list of pages out of our struct file. They'll be pinned @@ -2335,6 +2366,7 @@ rebuild_st: sg = st->sgl; st->nents = 0; + sg_page_sizes = 0; for (i = 0; i < page_count; i++) { const unsigned int shrink[] = { I915_SHRINK_BOUND | I915_SHRINK_UNBOUND | I915_SHRINK_PURGEABLE, @@ -2387,8 +2419,10 @@ rebuild_st: if (!i || sg->length >= max_segment || page_to_pfn(page) != last_pfn + 1) { - if (i) + if (i) { + sg_page_sizes |= sg->length; sg = sg_next(sg); + } st->nents++; sg_set_page(sg, page, PAGE_SIZE, 0); } else { @@ -2399,8 +2433,10 @@ rebuild_st: /* Check that the i965g/gm workaround works. */ WARN_ON((gfp & __GFP_DMA32) && (last_pfn >= 0x00100000UL)); } - if (sg) /* loop terminated early; short sg table */ + if (sg) { /* loop terminated early; short sg table */ + sg_page_sizes |= sg->length; sg_mark_end(sg); + } /* Trim unused sg entries to avoid wasting memory. */ i915_sg_trim(st); @@ -2429,7 +2465,9 @@ rebuild_st: if (i915_gem_object_needs_bit17_swizzle(obj)) i915_gem_object_do_bit_17_swizzle(obj, st); - return st; + __i915_gem_object_set_pages(obj, st, sg_page_sizes); + + return 0; err_sg: sg_mark_end(sg); @@ -2450,12 +2488,17 @@ err_pages: if (ret == -ENOSPC) ret = -ENOMEM; - return ERR_PTR(ret); + return ret; } void __i915_gem_object_set_pages(struct drm_i915_gem_object *obj, - struct sg_table *pages) + struct sg_table *pages, + unsigned int sg_page_sizes) { + struct drm_i915_private *i915 = to_i915(obj->base.dev); + unsigned long supported = INTEL_INFO(i915)->page_sizes; + int i; + lockdep_assert_held(&obj->mm.lock); obj->mm.get_page.sg_pos = pages->sgl; @@ -2469,23 +2512,40 @@ void __i915_gem_object_set_pages(struct drm_i915_gem_object *obj, __i915_gem_object_pin_pages(obj); obj->mm.quirked = true; } + + GEM_BUG_ON(!sg_page_sizes); + obj->mm.page_sizes.phys = sg_page_sizes; + + /* + * Calculate the supported page-sizes which fit into the given + * sg_page_sizes. This will give us the page-sizes which we may be able + * to use opportunistically when later inserting into the GTT. For + * example if phys=2G, then in theory we should be able to use 1G, 2M, + * 64K or 4K pages, although in practice this will depend on a number of + * other factors. + */ + obj->mm.page_sizes.sg = 0; + for_each_set_bit(i, &supported, ilog2(I915_GTT_MAX_PAGE_SIZE) + 1) { + if (obj->mm.page_sizes.phys & ~0u << i) + obj->mm.page_sizes.sg |= BIT(i); + } + + GEM_BUG_ON(!HAS_PAGE_SIZES(i915, obj->mm.page_sizes.sg)); } static int ____i915_gem_object_get_pages(struct drm_i915_gem_object *obj) { - struct sg_table *pages; + int err; if (unlikely(obj->mm.madv != I915_MADV_WILLNEED)) { DRM_DEBUG("Attempting to obtain a purgeable object\n"); return -EFAULT; } - pages = obj->ops->get_pages(obj); - if (unlikely(IS_ERR(pages))) - return PTR_ERR(pages); + err = obj->ops->get_pages(obj); + GEM_BUG_ON(!err && IS_ERR_OR_NULL(obj->mm.pages)); - __i915_gem_object_set_pages(obj, pages); - return 0; + return err; } /* Ensure that the associated pages are gathered from the backing storage @@ -2796,7 +2856,17 @@ i915_gem_reset_prepare_engine(struct intel_engine_cs *engine) { struct drm_i915_gem_request *request = NULL; - /* Prevent the signaler thread from updating the request + /* + * During the reset sequence, we must prevent the engine from + * entering RC6. As the context state is undefined until we restart + * the engine, if it does enter RC6 during the reset, the state + * written to the powercontext is undefined and so we may lose + * GPU state upon resume, i.e. fail to restart after a reset. + */ + intel_uncore_forcewake_get(engine->i915, FORCEWAKE_ALL); + + /* + * Prevent the signaler thread from updating the request * state (by calling dma_fence_signal) as we are processing * the reset. The write from the GPU of the seqno is * asynchronous and the signaler thread may see a different @@ -2807,7 +2877,8 @@ i915_gem_reset_prepare_engine(struct intel_engine_cs *engine) */ kthread_park(engine->breadcrumbs.signaler); - /* Prevent request submission to the hardware until we have + /* + * Prevent request submission to the hardware until we have * completed the reset in i915_gem_reset_finish(). If a request * is completed by one engine, it may then queue a request * to a second via its engine->irq_tasklet *just* as we are @@ -2997,6 +3068,8 @@ void i915_gem_reset_finish_engine(struct intel_engine_cs *engine) { tasklet_enable(&engine->execlists.irq_tasklet); kthread_unpark(engine->breadcrumbs.signaler); + + intel_uncore_forcewake_put(engine->i915, FORCEWAKE_ALL); } void i915_gem_reset_finish(struct drm_i915_private *dev_priv) @@ -3016,49 +3089,76 @@ static void nop_submit_request(struct drm_i915_gem_request *request) { GEM_BUG_ON(!i915_terminally_wedged(&request->i915->gpu_error)); dma_fence_set_error(&request->fence, -EIO); + i915_gem_request_submit(request); - intel_engine_init_global_seqno(request->engine, request->global_seqno); } -static void engine_set_wedged(struct intel_engine_cs *engine) +static void nop_complete_submit_request(struct drm_i915_gem_request *request) { - /* We need to be sure that no thread is running the old callback as - * we install the nop handler (otherwise we would submit a request - * to hardware that will never complete). In order to prevent this - * race, we wait until the machine is idle before making the swap - * (using stop_machine()). - */ - engine->submit_request = nop_submit_request; + unsigned long flags; - /* Mark all executing requests as skipped */ - engine->cancel_requests(engine); + GEM_BUG_ON(!i915_terminally_wedged(&request->i915->gpu_error)); + dma_fence_set_error(&request->fence, -EIO); - /* Mark all pending requests as complete so that any concurrent - * (lockless) lookup doesn't try and wait upon the request as we - * reset it. - */ - intel_engine_init_global_seqno(engine, - intel_engine_last_submit(engine)); + spin_lock_irqsave(&request->engine->timeline->lock, flags); + __i915_gem_request_submit(request); + intel_engine_init_global_seqno(request->engine, request->global_seqno); + spin_unlock_irqrestore(&request->engine->timeline->lock, flags); } -static int __i915_gem_set_wedged_BKL(void *data) +void i915_gem_set_wedged(struct drm_i915_private *i915) { - struct drm_i915_private *i915 = data; struct intel_engine_cs *engine; enum intel_engine_id id; + /* + * First, stop submission to hw, but do not yet complete requests by + * rolling the global seqno forward (since this would complete requests + * for which we haven't set the fence error to EIO yet). + */ for_each_engine(engine, i915, id) - engine_set_wedged(engine); + engine->submit_request = nop_submit_request; - set_bit(I915_WEDGED, &i915->gpu_error.flags); - wake_up_all(&i915->gpu_error.reset_queue); + /* + * Make sure no one is running the old callback before we proceed with + * cancelling requests and resetting the completion tracking. Otherwise + * we might submit a request to the hardware which never completes. + */ + synchronize_rcu(); - return 0; -} + for_each_engine(engine, i915, id) { + /* Mark all executing requests as skipped */ + engine->cancel_requests(engine); -void i915_gem_set_wedged(struct drm_i915_private *dev_priv) -{ - stop_machine(__i915_gem_set_wedged_BKL, dev_priv, NULL); + /* + * Only once we've force-cancelled all in-flight requests can we + * start to complete all requests. + */ + engine->submit_request = nop_complete_submit_request; + } + + /* + * Make sure no request can slip through without getting completed by + * either this call here to intel_engine_init_global_seqno, or the one + * in nop_complete_submit_request. + */ + synchronize_rcu(); + + for_each_engine(engine, i915, id) { + unsigned long flags; + + /* Mark all pending requests as complete so that any concurrent + * (lockless) lookup doesn't try and wait upon the request as we + * reset it. + */ + spin_lock_irqsave(&engine->timeline->lock, flags); + intel_engine_init_global_seqno(engine, + intel_engine_last_submit(engine)); + spin_unlock_irqrestore(&engine->timeline->lock, flags); + } + + set_bit(I915_WEDGED, &i915->gpu_error.flags); + wake_up_all(&i915->gpu_error.reset_queue); } bool i915_gem_unset_wedged(struct drm_i915_private *i915) @@ -3959,42 +4059,47 @@ i915_gem_object_ggtt_pin(struct drm_i915_gem_object *obj, lockdep_assert_held(&obj->base.dev->struct_mutex); + if (!view && flags & PIN_MAPPABLE) { + /* If the required space is larger than the available + * aperture, we will not able to find a slot for the + * object and unbinding the object now will be in + * vain. Worse, doing so may cause us to ping-pong + * the object in and out of the Global GTT and + * waste a lot of cycles under the mutex. + */ + if (obj->base.size > dev_priv->ggtt.mappable_end) + return ERR_PTR(-E2BIG); + + /* If NONBLOCK is set the caller is optimistically + * trying to cache the full object within the mappable + * aperture, and *must* have a fallback in place for + * situations where we cannot bind the object. We + * can be a little more lax here and use the fallback + * more often to avoid costly migrations of ourselves + * and other objects within the aperture. + * + * Half-the-aperture is used as a simple heuristic. + * More interesting would to do search for a free + * block prior to making the commitment to unbind. + * That caters for the self-harm case, and with a + * little more heuristics (e.g. NOFAULT, NOEVICT) + * we could try to minimise harm to others. + */ + if (flags & PIN_NONBLOCK && + obj->base.size > dev_priv->ggtt.mappable_end / 2) + return ERR_PTR(-ENOSPC); + } + vma = i915_vma_instance(obj, vm, view); if (unlikely(IS_ERR(vma))) return vma; if (i915_vma_misplaced(vma, size, alignment, flags)) { - if (flags & PIN_NONBLOCK && - (i915_vma_is_pinned(vma) || i915_vma_is_active(vma))) - return ERR_PTR(-ENOSPC); + if (flags & PIN_NONBLOCK) { + if (i915_vma_is_pinned(vma) || i915_vma_is_active(vma)) + return ERR_PTR(-ENOSPC); - if (flags & PIN_MAPPABLE) { - /* If the required space is larger than the available - * aperture, we will not able to find a slot for the - * object and unbinding the object now will be in - * vain. Worse, doing so may cause us to ping-pong - * the object in and out of the Global GTT and - * waste a lot of cycles under the mutex. - */ - if (vma->fence_size > dev_priv->ggtt.mappable_end) - return ERR_PTR(-E2BIG); - - /* If NONBLOCK is set the caller is optimistically - * trying to cache the full object within the mappable - * aperture, and *must* have a fallback in place for - * situations where we cannot bind the object. We - * can be a little more lax here and use the fallback - * more often to avoid costly migrations of ourselves - * and other objects within the aperture. - * - * Half-the-aperture is used as a simple heuristic. - * More interesting would to do search for a free - * block prior to making the commitment to unbind. - * That caters for the self-harm case, and with a - * little more heuristics (e.g. NOFAULT, NOEVICT) - * we could try to minimise harm to others. - */ - if (flags & PIN_NONBLOCK && + if (flags & PIN_MAPPABLE && vma->fence_size > dev_priv->ggtt.mappable_end / 2) return ERR_PTR(-ENOSPC); } @@ -4221,7 +4326,6 @@ void i915_gem_object_init(struct drm_i915_gem_object *obj, mutex_init(&obj->mm.lock); INIT_LIST_HEAD(&obj->global_link); - INIT_LIST_HEAD(&obj->userfault_link); INIT_LIST_HEAD(&obj->vma_list); INIT_LIST_HEAD(&obj->lut_list); INIT_LIST_HEAD(&obj->batch_pool_link); @@ -4251,6 +4355,30 @@ static const struct drm_i915_gem_object_ops i915_gem_object_ops = { .pwrite = i915_gem_object_pwrite_gtt, }; +static int i915_gem_object_create_shmem(struct drm_device *dev, + struct drm_gem_object *obj, + size_t size) +{ + struct drm_i915_private *i915 = to_i915(dev); + unsigned long flags = VM_NORESERVE; + struct file *filp; + + drm_gem_private_object_init(dev, obj, size); + + if (i915->mm.gemfs) + filp = shmem_file_setup_with_mnt(i915->mm.gemfs, "i915", size, + flags); + else + filp = shmem_file_setup("i915", size, flags); + + if (IS_ERR(filp)) + return PTR_ERR(filp); + + obj->filp = filp; + + return 0; +} + struct drm_i915_gem_object * i915_gem_object_create(struct drm_i915_private *dev_priv, u64 size) { @@ -4275,7 +4403,7 @@ i915_gem_object_create(struct drm_i915_private *dev_priv, u64 size) if (obj == NULL) return ERR_PTR(-ENOMEM); - ret = drm_gem_object_init(&dev_priv->drm, &obj->base, size); + ret = i915_gem_object_create_shmem(&dev_priv->drm, &obj->base, size); if (ret) goto fail; @@ -4378,6 +4506,7 @@ static void __i915_gem_free_objects(struct drm_i915_private *i915, llist_for_each_entry_safe(obj, on, freed, freed) { GEM_BUG_ON(obj->bind_count); + GEM_BUG_ON(obj->userfault_count); GEM_BUG_ON(atomic_read(&obj->frontbuffer_bits)); GEM_BUG_ON(!list_empty(&obj->lut_list)); @@ -4547,8 +4676,7 @@ int i915_gem_suspend(struct drm_i915_private *dev_priv) /* As the idle_work is rearming if it detects a race, play safe and * repeat the flush until it is definitely idle. */ - while (flush_delayed_work(&dev_priv->gt.idle_work)) - ; + drain_delayed_work(&dev_priv->gt.idle_work); /* Assert that we sucessfully flushed all the work and * reset the GPU back to its idle, low power state. @@ -4595,6 +4723,7 @@ void i915_gem_resume(struct drm_i915_private *dev_priv) mutex_lock(&dev->struct_mutex); i915_gem_restore_gtt_mappings(dev_priv); + i915_gem_restore_fences(dev_priv); /* As we didn't flush the kernel context before suspend, we cannot * guarantee that the context image is complete. So let's just reset @@ -4757,6 +4886,15 @@ int i915_gem_init(struct drm_i915_private *dev_priv) mutex_lock(&dev_priv->drm.struct_mutex); + /* + * We need to fallback to 4K pages since gvt gtt handling doesn't + * support huge page entries - we will need to check either hypervisor + * mm can support huge guest page or just do emulation in gvt. + */ + if (intel_vgpu_active(dev_priv)) + mkwrite_device_info(dev_priv)->page_sizes = + I915_GTT_PAGE_SIZE_4K; + dev_priv->mm.unordered_timeline = dma_fence_context_alloc(1); if (!i915_modparams.enable_execlists) { @@ -4914,6 +5052,10 @@ i915_gem_load_init(struct drm_i915_private *dev_priv) spin_lock_init(&dev_priv->fb_tracking.lock); + err = i915_gemfs_init(dev_priv); + if (err) + DRM_NOTE("Unable to create a private tmpfs mount, hugepage support will be disabled(%d).\n", err); + return 0; err_priorities: @@ -4952,6 +5094,8 @@ void i915_gem_load_cleanup(struct drm_i915_private *dev_priv) /* And ensure that our DESTROY_BY_RCU slabs are truly destroyed */ rcu_barrier(); + + i915_gemfs_fini(dev_priv); } int i915_gem_freeze(struct drm_i915_private *dev_priv) @@ -5341,6 +5485,7 @@ err_unlock: #include "selftests/scatterlist.c" #include "selftests/mock_gem_device.c" #include "selftests/huge_gem_object.c" +#include "selftests/huge_pages.c" #include "selftests/i915_gem_object.c" #include "selftests/i915_gem_coherency.c" #endif |