diff options
author | Dave Airlie <airlied@redhat.com> | 2015-10-30 09:45:33 +1000 |
---|---|---|
committer | Dave Airlie <airlied@redhat.com> | 2015-10-30 09:45:33 +1000 |
commit | 974e59ba0bc3bcc94d11fce9b34c10af32b5930f (patch) | |
tree | 51a970b2035018591ce0669aafc065ff20a80ee7 /drivers/gpu/drm/i915/i915_gem.c | |
parent | d7e1bc3f5e70c5a106606e33cfa4d413459611ba (diff) | |
parent | 606bb5e0b28b540685fb94c22902cd9a948a3779 (diff) |
Merge tag 'drm-intel-next-fixes-2015-10-22' of git://anongit.freedesktop.org/drm-intel into drm-next
Bunch of -fixes for 4.4. Well not just, I've left the mmio/register work
from Ville in here since it's low-risk but lots of churn all over.
* tag 'drm-intel-next-fixes-2015-10-22' of git://anongit.freedesktop.org/drm-intel: (23 commits)
drm/i915: Use round to closest when computing the CEA 1.001 pixel clocks
drm/i915: Kill the leftover RMW from ivb_sprite_disable()
drm/i915: restore ggtt double-bind avoidance
drm/i915/skl: Enable pipe gamma for sprite planes.
drm/i915/skl+: Enable pipe CSC on cursor planes. (v2)
MAINTAINERS: add link to the Intel Graphics for Linux web site
drm/i915: Move skl/bxt gt specific workarounds to ring init
drm/i915: Drop i915_gem_obj_is_pinned() from set-cache-level
drm/i915: revert a few more watermark commits
drm/i915: Remove dev_priv argument from NEEDS_FORCE_WAKE
drm/i915: Clean up LVDS register handling
drm/i915: Throw out some useless variables
drm/i915: Parametrize and fix SWF registers
drm/i915: s/PIPE_FRMCOUNT_GM45/PIPE_FRMCOUNT_G4X/ etc.
drm/i915: Turn GEN5_ASSERT_IIR_IS_ZERO() into a function
drm/i915: Fix a few bad hex numbers in register defines
drm/i915: Protect register macro arguments
drm/i915: Include gpio_mmio_base in GMBUS reg defines
drm/i915: Parametrize HSW video DIP data registers
drm/i915: Eliminate weird parameter inversion from BXT PPS registers
...
Diffstat (limited to 'drivers/gpu/drm/i915/i915_gem.c')
-rw-r--r-- | drivers/gpu/drm/i915/i915_gem.c | 99 |
1 files changed, 78 insertions, 21 deletions
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 1e67484fd5dc..e57061ac0219 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -3657,53 +3657,106 @@ i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object *obj, bool write) return 0; } +/** + * Changes the cache-level of an object across all VMA. + * + * After this function returns, the object will be in the new cache-level + * across all GTT and the contents of the backing storage will be coherent, + * with respect to the new cache-level. In order to keep the backing storage + * coherent for all users, we only allow a single cache level to be set + * globally on the object and prevent it from being changed whilst the + * hardware is reading from the object. That is if the object is currently + * on the scanout it will be set to uncached (or equivalent display + * cache coherency) and all non-MOCS GPU access will also be uncached so + * that all direct access to the scanout remains coherent. + */ int i915_gem_object_set_cache_level(struct drm_i915_gem_object *obj, enum i915_cache_level cache_level) { struct drm_device *dev = obj->base.dev; struct i915_vma *vma, *next; + bool bound = false; int ret = 0; if (obj->cache_level == cache_level) goto out; - if (i915_gem_obj_is_pinned(obj)) { - DRM_DEBUG("can not change the cache level of pinned objects\n"); - return -EBUSY; - } - + /* Inspect the list of currently bound VMA and unbind any that would + * be invalid given the new cache-level. This is principally to + * catch the issue of the CS prefetch crossing page boundaries and + * reading an invalid PTE on older architectures. + */ list_for_each_entry_safe(vma, next, &obj->vma_list, vma_link) { + if (!drm_mm_node_allocated(&vma->node)) + continue; + + if (vma->pin_count) { + DRM_DEBUG("can not change the cache level of pinned objects\n"); + return -EBUSY; + } + if (!i915_gem_valid_gtt_space(vma, cache_level)) { ret = i915_vma_unbind(vma); if (ret) return ret; - } + } else + bound = true; } - if (i915_gem_obj_bound_any(obj)) { + /* We can reuse the existing drm_mm nodes but need to change the + * cache-level on the PTE. We could simply unbind them all and + * rebind with the correct cache-level on next use. However since + * we already have a valid slot, dma mapping, pages etc, we may as + * rewrite the PTE in the belief that doing so tramples upon less + * state and so involves less work. + */ + if (bound) { + /* Before we change the PTE, the GPU must not be accessing it. + * If we wait upon the object, we know that all the bound + * VMA are no longer active. + */ ret = i915_gem_object_wait_rendering(obj, false); if (ret) return ret; - i915_gem_object_finish_gtt(obj); - - /* Before SandyBridge, you could not use tiling or fence - * registers with snooped memory, so relinquish any fences - * currently pointing to our region in the aperture. - */ - if (INTEL_INFO(dev)->gen < 6) { + if (!HAS_LLC(dev) && cache_level != I915_CACHE_NONE) { + /* Access to snoopable pages through the GTT is + * incoherent and on some machines causes a hard + * lockup. Relinquish the CPU mmaping to force + * userspace to refault in the pages and we can + * then double check if the GTT mapping is still + * valid for that pointer access. + */ + i915_gem_release_mmap(obj); + + /* As we no longer need a fence for GTT access, + * we can relinquish it now (and so prevent having + * to steal a fence from someone else on the next + * fence request). Note GPU activity would have + * dropped the fence as all snoopable access is + * supposed to be linear. + */ ret = i915_gem_object_put_fence(obj); if (ret) return ret; + } else { + /* We either have incoherent backing store and + * so no GTT access or the architecture is fully + * coherent. In such cases, existing GTT mmaps + * ignore the cache bit in the PTE and we can + * rewrite it without confusing the GPU or having + * to force userspace to fault back in its mmaps. + */ } - list_for_each_entry(vma, &obj->vma_list, vma_link) - if (drm_mm_node_allocated(&vma->node)) { - ret = i915_vma_bind(vma, cache_level, - PIN_UPDATE); - if (ret) - return ret; - } + list_for_each_entry(vma, &obj->vma_list, vma_link) { + if (!drm_mm_node_allocated(&vma->node)) + continue; + + ret = i915_vma_bind(vma, cache_level, PIN_UPDATE); + if (ret) + return ret; + } } list_for_each_entry(vma, &obj->vma_list, vma_link) @@ -3711,6 +3764,10 @@ int i915_gem_object_set_cache_level(struct drm_i915_gem_object *obj, obj->cache_level = cache_level; out: + /* Flush the dirty CPU caches to the backing storage so that the + * object is now coherent at its new cache level (with respect + * to the access domain). + */ if (obj->cache_dirty && obj->base.write_domain != I915_GEM_DOMAIN_CPU && cpu_write_needs_clflush(obj)) { |