aboutsummaryrefslogtreecommitdiff
path: root/drivers/gpu/drm/i915/i915_gem_gtt.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/gpu/drm/i915/i915_gem_gtt.c')
-rw-r--r--drivers/gpu/drm/i915/i915_gem_gtt.c2676
1 files changed, 1289 insertions, 1387 deletions
diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
index d646d37eec2f..6239a9adbf14 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.c
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
@@ -32,18 +32,27 @@
#include <linux/stop_machine.h>
#include <asm/set_memory.h>
+#include <asm/smp.h>
#include <drm/i915_drm.h>
+#include "display/intel_frontbuffer.h"
+#include "gt/intel_gt.h"
+#include "gt/intel_gt_requests.h"
+
#include "i915_drv.h"
-#include "i915_vgpu.h"
-#include "i915_reset.h"
+#include "i915_scatterlist.h"
#include "i915_trace.h"
-#include "intel_drv.h"
-#include "intel_frontbuffer.h"
+#include "i915_vgpu.h"
#define I915_GFP_ALLOW_FAIL (GFP_KERNEL | __GFP_RETRY_MAYFAIL | __GFP_NOWARN)
+#if IS_ENABLED(CONFIG_DRM_I915_TRACE_GTT)
+#define DBG(...) trace_printk(__VA_ARGS__)
+#else
+#define DBG(...)
+#endif
+
/**
* DOC: Global GTT views
*
@@ -105,46 +114,55 @@
*
*/
+#define as_pd(x) container_of((x), typeof(struct i915_page_directory), pt)
+
static int
i915_get_ggtt_vma_pages(struct i915_vma *vma);
-static void gen6_ggtt_invalidate(struct drm_i915_private *dev_priv)
+static void gen6_ggtt_invalidate(struct i915_ggtt *ggtt)
{
+ struct intel_uncore *uncore = ggtt->vm.gt->uncore;
+
/*
* Note that as an uncached mmio write, this will flush the
* WCB of the writes into the GGTT before it triggers the invalidate.
*/
- I915_WRITE(GFX_FLSH_CNTL_GEN6, GFX_FLSH_CNTL_EN);
+ intel_uncore_write_fw(uncore, GFX_FLSH_CNTL_GEN6, GFX_FLSH_CNTL_EN);
}
-static void guc_ggtt_invalidate(struct drm_i915_private *dev_priv)
+static void guc_ggtt_invalidate(struct i915_ggtt *ggtt)
{
- gen6_ggtt_invalidate(dev_priv);
- I915_WRITE(GEN8_GTCR, GEN8_GTCR_INVALIDATE);
-}
+ struct intel_uncore *uncore = ggtt->vm.gt->uncore;
+ struct drm_i915_private *i915 = ggtt->vm.i915;
-static void gmch_ggtt_invalidate(struct drm_i915_private *dev_priv)
-{
- intel_gtt_chipset_flush();
+ gen6_ggtt_invalidate(ggtt);
+
+ if (INTEL_GEN(i915) >= 12)
+ intel_uncore_write_fw(uncore, GEN12_GUC_TLB_INV_CR,
+ GEN12_GUC_TLB_INV_CR_INVALIDATE);
+ else
+ intel_uncore_write_fw(uncore, GEN8_GTCR, GEN8_GTCR_INVALIDATE);
}
-static inline void i915_ggtt_invalidate(struct drm_i915_private *i915)
+static void gmch_ggtt_invalidate(struct i915_ggtt *ggtt)
{
- i915->ggtt.invalidate(i915);
+ intel_gtt_chipset_flush();
}
static int ppgtt_bind_vma(struct i915_vma *vma,
enum i915_cache_level cache_level,
- u32 unused)
+ u32 flags)
{
u32 pte_flags;
int err;
- if (!(vma->flags & I915_VMA_LOCAL_BIND)) {
+ if (flags & I915_VMA_ALLOC) {
err = vma->vm->allocate_va_range(vma->vm,
vma->node.start, vma->size);
if (err)
return err;
+
+ set_bit(I915_VMA_ALLOC_BIT, __i915_vma_flags(vma));
}
/* Applicable to VLV, and gen8+ */
@@ -152,14 +170,17 @@ static int ppgtt_bind_vma(struct i915_vma *vma,
if (i915_gem_object_is_readonly(vma->obj))
pte_flags |= PTE_READ_ONLY;
+ GEM_BUG_ON(!test_bit(I915_VMA_ALLOC_BIT, __i915_vma_flags(vma)));
vma->vm->insert_entries(vma->vm, vma, cache_level, pte_flags);
+ wmb();
return 0;
}
static void ppgtt_unbind_vma(struct i915_vma *vma)
{
- vma->vm->clear_range(vma->vm, vma->node.start, vma->size);
+ if (test_and_clear_bit(I915_VMA_ALLOC_BIT, __i915_vma_flags(vma)))
+ vma->vm->clear_range(vma->vm, vma->node.start, vma->size);
}
static int ppgtt_set_pages(struct i915_vma *vma)
@@ -210,10 +231,10 @@ static u64 gen8_pte_encode(dma_addr_t addr,
return pte;
}
-static gen8_pde_t gen8_pde_encode(const dma_addr_t addr,
- const enum i915_cache_level level)
+static u64 gen8_pde_encode(const dma_addr_t addr,
+ const enum i915_cache_level level)
{
- gen8_pde_t pde = _PAGE_PRESENT | _PAGE_RW;
+ u64 pde = _PAGE_PRESENT | _PAGE_RW;
pde |= addr;
if (level != I915_CACHE_NONE)
pde |= PPAT_CACHED_PDE;
@@ -222,9 +243,6 @@ static gen8_pde_t gen8_pde_encode(const dma_addr_t addr,
return pde;
}
-#define gen8_pdpe_encode gen8_pde_encode
-#define gen8_pml4e_encode gen8_pde_encode
-
static u64 snb_pte_encode(dma_addr_t addr,
enum i915_cache_level level,
u32 flags)
@@ -341,11 +359,11 @@ static struct page *stash_pop_page(struct pagestash *stash)
static void stash_push_pagevec(struct pagestash *stash, struct pagevec *pvec)
{
- int nr;
+ unsigned int nr;
spin_lock_nested(&stash->lock, SINGLE_DEPTH_NESTING);
- nr = min_t(int, pvec->nr, pagevec_space(&stash->pvec));
+ nr = min_t(typeof(nr), pvec->nr, pagevec_space(&stash->pvec));
memcpy(stash->pvec.pages + stash->pvec.nr,
pvec->pages + pvec->nr - nr,
sizeof(pvec->pages[0]) * nr);
@@ -399,7 +417,8 @@ static struct page *vm_alloc_page(struct i915_address_space *vm, gfp_t gfp)
page = stack.pages[--stack.nr];
/* Merge spare WC pages to the global stash */
- stash_push_pagevec(&vm->i915->mm.wc_stash, &stack);
+ if (stack.nr)
+ stash_push_pagevec(&vm->i915->mm.wc_stash, &stack);
/* Push any surplus WC pages onto the local VM stash */
if (stack.nr)
@@ -469,13 +488,76 @@ static void vm_free_page(struct i915_address_space *vm, struct page *page)
*/
might_sleep();
spin_lock(&vm->free_pages.lock);
- if (!pagevec_add(&vm->free_pages.pvec, page))
+ while (!pagevec_space(&vm->free_pages.pvec))
vm_free_pages_release(vm, false);
+ GEM_BUG_ON(pagevec_count(&vm->free_pages.pvec) >= PAGEVEC_SIZE);
+ pagevec_add(&vm->free_pages.pvec, page);
+ spin_unlock(&vm->free_pages.lock);
+}
+
+static void i915_address_space_fini(struct i915_address_space *vm)
+{
+ spin_lock(&vm->free_pages.lock);
+ if (pagevec_count(&vm->free_pages.pvec))
+ vm_free_pages_release(vm, true);
+ GEM_BUG_ON(pagevec_count(&vm->free_pages.pvec));
spin_unlock(&vm->free_pages.lock);
+
+ drm_mm_takedown(&vm->mm);
+
+ mutex_destroy(&vm->mutex);
+}
+
+void __i915_vm_close(struct i915_address_space *vm)
+{
+ struct i915_vma *vma, *vn;
+
+ mutex_lock(&vm->mutex);
+ list_for_each_entry_safe(vma, vn, &vm->bound_list, vm_link) {
+ struct drm_i915_gem_object *obj = vma->obj;
+
+ /* Keep the obj (and hence the vma) alive as _we_ destroy it */
+ if (!kref_get_unless_zero(&obj->base.refcount))
+ continue;
+
+ atomic_and(~I915_VMA_PIN_MASK, &vma->flags);
+ WARN_ON(__i915_vma_unbind(vma));
+ i915_vma_destroy(vma);
+
+ i915_gem_object_put(obj);
+ }
+ GEM_BUG_ON(!list_empty(&vm->bound_list));
+ mutex_unlock(&vm->mutex);
+}
+
+static void __i915_vm_release(struct work_struct *work)
+{
+ struct i915_address_space *vm =
+ container_of(work, struct i915_address_space, rcu.work);
+
+ vm->cleanup(vm);
+ i915_address_space_fini(vm);
+
+ kfree(vm);
+}
+
+void i915_vm_release(struct kref *kref)
+{
+ struct i915_address_space *vm =
+ container_of(kref, struct i915_address_space, ref);
+
+ GEM_BUG_ON(i915_is_ggtt(vm));
+ trace_i915_ppgtt_release(vm);
+
+ queue_rcu_work(vm->i915->wq, &vm->rcu);
}
static void i915_address_space_init(struct i915_address_space *vm, int subclass)
{
+ kref_init(&vm->ref);
+ INIT_RCU_WORK(&vm->rcu, __i915_vm_release);
+ atomic_set(&vm->open, 1);
+
/*
* The vm->mutex must be reclaim safe (for use in the shrinker).
* Do a dummy acquire now under fs_reclaim so that any allocation
@@ -491,23 +573,9 @@ static void i915_address_space_init(struct i915_address_space *vm, int subclass)
stash_init(&vm->free_pages);
- INIT_LIST_HEAD(&vm->unbound_list);
INIT_LIST_HEAD(&vm->bound_list);
}
-static void i915_address_space_fini(struct i915_address_space *vm)
-{
- spin_lock(&vm->free_pages.lock);
- if (pagevec_count(&vm->free_pages.pvec))
- vm_free_pages_release(vm, true);
- GEM_BUG_ON(pagevec_count(&vm->free_pages.pvec));
- spin_unlock(&vm->free_pages.lock);
-
- drm_mm_takedown(&vm->mm);
-
- mutex_destroy(&vm->mutex);
-}
-
static int __setup_page_dma(struct i915_address_space *vm,
struct i915_page_dma *p,
gfp_t gfp)
@@ -544,28 +612,17 @@ static void cleanup_page_dma(struct i915_address_space *vm,
#define kmap_atomic_px(px) kmap_atomic(px_base(px)->page)
-#define setup_px(vm, px) setup_page_dma((vm), px_base(px))
-#define cleanup_px(vm, px) cleanup_page_dma((vm), px_base(px))
-#define fill_px(vm, px, v) fill_page_dma((vm), px_base(px), (v))
-#define fill32_px(vm, px, v) fill_page_dma_32((vm), px_base(px), (v))
-
-static void fill_page_dma(struct i915_address_space *vm,
- struct i915_page_dma *p,
- const u64 val)
+static void
+fill_page_dma(const struct i915_page_dma *p, const u64 val, unsigned int count)
{
- u64 * const vaddr = kmap_atomic(p->page);
-
- memset64(vaddr, val, PAGE_SIZE / sizeof(val));
-
- kunmap_atomic(vaddr);
+ kunmap_atomic(memset64(kmap_atomic(p->page), val, count));
}
-static void fill_page_dma_32(struct i915_address_space *vm,
- struct i915_page_dma *p,
- const u32 v)
-{
- fill_page_dma(vm, p, (u64)v << 32 | v);
-}
+#define fill_px(px, v) fill_page_dma(px_base(px), (v), PAGE_SIZE / sizeof(u64))
+#define fill32_px(px, v) do { \
+ u64 v__ = lower_32_bits(v); \
+ fill_px((px), v__ << 32 | v__); \
+} while (0)
static int
setup_scratch_page(struct i915_address_space *vm, gfp_t gfp)
@@ -584,7 +641,7 @@ setup_scratch_page(struct i915_address_space *vm, gfp_t gfp)
* for all.
*/
size = I915_GTT_PAGE_SIZE_4K;
- if (i915_vm_is_48bit(vm) &&
+ if (i915_vm_is_4lvl(vm) &&
HAS_PAGE_SIZES(vm->i915, I915_GTT_PAGE_SIZE_64K)) {
size = I915_GTT_PAGE_SIZE_64K;
gfp |= __GFP_NOWARN;
@@ -592,7 +649,7 @@ setup_scratch_page(struct i915_address_space *vm, gfp_t gfp)
gfp |= __GFP_ZERO | __GFP_RETRY_MAYFAIL;
do {
- int order = get_order(size);
+ unsigned int order = get_order(size);
struct page *page;
dma_addr_t addr;
@@ -611,9 +668,9 @@ setup_scratch_page(struct i915_address_space *vm, gfp_t gfp)
if (unlikely(!IS_ALIGNED(addr, size)))
goto unmap_page;
- vm->scratch_page.page = page;
- vm->scratch_page.daddr = addr;
- vm->scratch_page.order = order;
+ vm->scratch[0].base.page = page;
+ vm->scratch[0].base.daddr = addr;
+ vm->scratch_order = order;
return 0;
unmap_page:
@@ -631,11 +688,28 @@ skip:
static void cleanup_scratch_page(struct i915_address_space *vm)
{
- struct i915_page_dma *p = &vm->scratch_page;
+ struct i915_page_dma *p = px_base(&vm->scratch[0]);
+ unsigned int order = vm->scratch_order;
- dma_unmap_page(vm->dma, p->daddr, BIT(p->order) << PAGE_SHIFT,
+ dma_unmap_page(vm->dma, p->daddr, BIT(order) << PAGE_SHIFT,
PCI_DMA_BIDIRECTIONAL);
- __free_pages(p->page, p->order);
+ __free_pages(p->page, order);
+}
+
+static void free_scratch(struct i915_address_space *vm)
+{
+ int i;
+
+ if (!px_dma(&vm->scratch[0])) /* set to 0 on clones */
+ return;
+
+ for (i = 1; i <= vm->top; i++) {
+ if (!px_dma(&vm->scratch[i]))
+ break;
+ cleanup_page_dma(vm, px_base(&vm->scratch[i]));
+ }
+
+ cleanup_scratch_page(vm);
}
static struct i915_page_table *alloc_pt(struct i915_address_space *vm)
@@ -646,309 +720,437 @@ static struct i915_page_table *alloc_pt(struct i915_address_space *vm)
if (unlikely(!pt))
return ERR_PTR(-ENOMEM);
- if (unlikely(setup_px(vm, pt))) {
+ if (unlikely(setup_page_dma(vm, &pt->base))) {
kfree(pt);
return ERR_PTR(-ENOMEM);
}
- pt->used_ptes = 0;
+ atomic_set(&pt->used, 0);
return pt;
}
-static void free_pt(struct i915_address_space *vm, struct i915_page_table *pt)
+static struct i915_page_directory *__alloc_pd(size_t sz)
{
- cleanup_px(vm, pt);
- kfree(pt);
-}
+ struct i915_page_directory *pd;
-static void gen8_initialize_pt(struct i915_address_space *vm,
- struct i915_page_table *pt)
-{
- fill_px(vm, pt, vm->scratch_pte);
-}
+ pd = kzalloc(sz, I915_GFP_ALLOW_FAIL);
+ if (unlikely(!pd))
+ return NULL;
-static void gen6_initialize_pt(struct i915_address_space *vm,
- struct i915_page_table *pt)
-{
- fill32_px(vm, pt, vm->scratch_pte);
+ spin_lock_init(&pd->lock);
+ return pd;
}
static struct i915_page_directory *alloc_pd(struct i915_address_space *vm)
{
struct i915_page_directory *pd;
- pd = kzalloc(sizeof(*pd), I915_GFP_ALLOW_FAIL);
+ pd = __alloc_pd(sizeof(*pd));
if (unlikely(!pd))
return ERR_PTR(-ENOMEM);
- if (unlikely(setup_px(vm, pd))) {
+ if (unlikely(setup_page_dma(vm, px_base(pd)))) {
kfree(pd);
return ERR_PTR(-ENOMEM);
}
- pd->used_pdes = 0;
return pd;
}
-static void free_pd(struct i915_address_space *vm,
- struct i915_page_directory *pd)
+static void free_pd(struct i915_address_space *vm, struct i915_page_dma *pd)
{
- cleanup_px(vm, pd);
+ cleanup_page_dma(vm, pd);
kfree(pd);
}
-static void gen8_initialize_pd(struct i915_address_space *vm,
- struct i915_page_directory *pd)
-{
- fill_px(vm, pd,
- gen8_pde_encode(px_dma(vm->scratch_pt), I915_CACHE_LLC));
- memset_p((void **)pd->page_table, vm->scratch_pt, I915_PDES);
-}
+#define free_px(vm, px) free_pd(vm, px_base(px))
-static int __pdp_init(struct i915_address_space *vm,
- struct i915_page_directory_pointer *pdp)
+static inline void
+write_dma_entry(struct i915_page_dma * const pdma,
+ const unsigned short idx,
+ const u64 encoded_entry)
{
- const unsigned int pdpes = i915_pdpes_per_pdp(vm);
+ u64 * const vaddr = kmap_atomic(pdma->page);
- pdp->page_directory = kmalloc_array(pdpes, sizeof(*pdp->page_directory),
- I915_GFP_ALLOW_FAIL);
- if (unlikely(!pdp->page_directory))
- return -ENOMEM;
+ vaddr[idx] = encoded_entry;
+ kunmap_atomic(vaddr);
+}
- memset_p((void **)pdp->page_directory, vm->scratch_pd, pdpes);
+static inline void
+__set_pd_entry(struct i915_page_directory * const pd,
+ const unsigned short idx,
+ struct i915_page_dma * const to,
+ u64 (*encode)(const dma_addr_t, const enum i915_cache_level))
+{
+ /* Each thread pre-pins the pd, and we may have a thread per pde. */
+ GEM_BUG_ON(atomic_read(px_used(pd)) > 2 * ARRAY_SIZE(pd->entry));
- return 0;
+ atomic_inc(px_used(pd));
+ pd->entry[idx] = to;
+ write_dma_entry(px_base(pd), idx, encode(to->daddr, I915_CACHE_LLC));
}
-static void __pdp_fini(struct i915_page_directory_pointer *pdp)
+#define set_pd_entry(pd, idx, to) \
+ __set_pd_entry((pd), (idx), px_base(to), gen8_pde_encode)
+
+static inline void
+clear_pd_entry(struct i915_page_directory * const pd,
+ const unsigned short idx,
+ const struct i915_page_scratch * const scratch)
{
- kfree(pdp->page_directory);
- pdp->page_directory = NULL;
+ GEM_BUG_ON(atomic_read(px_used(pd)) == 0);
+
+ write_dma_entry(px_base(pd), idx, scratch->encode);
+ pd->entry[idx] = NULL;
+ atomic_dec(px_used(pd));
}
-static inline bool use_4lvl(const struct i915_address_space *vm)
+static bool
+release_pd_entry(struct i915_page_directory * const pd,
+ const unsigned short idx,
+ struct i915_page_table * const pt,
+ const struct i915_page_scratch * const scratch)
{
- return i915_vm_is_48bit(vm);
+ bool free = false;
+
+ if (atomic_add_unless(&pt->used, -1, 1))
+ return false;
+
+ spin_lock(&pd->lock);
+ if (atomic_dec_and_test(&pt->used)) {
+ clear_pd_entry(pd, idx, scratch);
+ free = true;
+ }
+ spin_unlock(&pd->lock);
+
+ return free;
}
-static struct i915_page_directory_pointer *
-alloc_pdp(struct i915_address_space *vm)
+static void gen8_ppgtt_notify_vgt(struct i915_ppgtt *ppgtt, bool create)
{
- struct i915_page_directory_pointer *pdp;
- int ret = -ENOMEM;
+ struct drm_i915_private *dev_priv = ppgtt->vm.i915;
+ enum vgt_g2v_type msg;
+ int i;
- GEM_BUG_ON(!use_4lvl(vm));
+ if (create)
+ atomic_inc(px_used(ppgtt->pd)); /* never remove */
+ else
+ atomic_dec(px_used(ppgtt->pd));
- pdp = kzalloc(sizeof(*pdp), GFP_KERNEL);
- if (!pdp)
- return ERR_PTR(-ENOMEM);
+ mutex_lock(&dev_priv->vgpu.lock);
- ret = __pdp_init(vm, pdp);
- if (ret)
- goto fail_bitmap;
+ if (i915_vm_is_4lvl(&ppgtt->vm)) {
+ const u64 daddr = px_dma(ppgtt->pd);
- ret = setup_px(vm, pdp);
- if (ret)
- goto fail_page_m;
+ I915_WRITE(vgtif_reg(pdp[0].lo), lower_32_bits(daddr));
+ I915_WRITE(vgtif_reg(pdp[0].hi), upper_32_bits(daddr));
+
+ msg = (create ? VGT_G2V_PPGTT_L4_PAGE_TABLE_CREATE :
+ VGT_G2V_PPGTT_L4_PAGE_TABLE_DESTROY);
+ } else {
+ for (i = 0; i < GEN8_3LVL_PDPES; i++) {
+ const u64 daddr = i915_page_dir_dma_addr(ppgtt, i);
+
+ I915_WRITE(vgtif_reg(pdp[i].lo), lower_32_bits(daddr));
+ I915_WRITE(vgtif_reg(pdp[i].hi), upper_32_bits(daddr));
+ }
- return pdp;
+ msg = (create ? VGT_G2V_PPGTT_L3_PAGE_TABLE_CREATE :
+ VGT_G2V_PPGTT_L3_PAGE_TABLE_DESTROY);
+ }
-fail_page_m:
- __pdp_fini(pdp);
-fail_bitmap:
- kfree(pdp);
+ /* g2v_notify atomically (via hv trap) consumes the message packet. */
+ I915_WRITE(vgtif_reg(g2v_notify), msg);
- return ERR_PTR(ret);
+ mutex_unlock(&dev_priv->vgpu.lock);
}
-static void free_pdp(struct i915_address_space *vm,
- struct i915_page_directory_pointer *pdp)
+/* Index shifts into the pagetable are offset by GEN8_PTE_SHIFT [12] */
+#define GEN8_PAGE_SIZE (SZ_4K) /* page and page-directory sizes are the same */
+#define GEN8_PTE_SHIFT (ilog2(GEN8_PAGE_SIZE))
+#define GEN8_PDES (GEN8_PAGE_SIZE / sizeof(u64))
+#define gen8_pd_shift(lvl) ((lvl) * ilog2(GEN8_PDES))
+#define gen8_pd_index(i, lvl) i915_pde_index((i), gen8_pd_shift(lvl))
+#define __gen8_pte_shift(lvl) (GEN8_PTE_SHIFT + gen8_pd_shift(lvl))
+#define __gen8_pte_index(a, lvl) i915_pde_index((a), __gen8_pte_shift(lvl))
+
+static inline unsigned int
+gen8_pd_range(u64 start, u64 end, int lvl, unsigned int *idx)
{
- __pdp_fini(pdp);
+ const int shift = gen8_pd_shift(lvl);
+ const u64 mask = ~0ull << gen8_pd_shift(lvl + 1);
- if (!use_4lvl(vm))
- return;
+ GEM_BUG_ON(start >= end);
+ end += ~mask >> gen8_pd_shift(1);
- cleanup_px(vm, pdp);
- kfree(pdp);
+ *idx = i915_pde_index(start, shift);
+ if ((start ^ end) & mask)
+ return GEN8_PDES - *idx;
+ else
+ return i915_pde_index(end, shift) - *idx;
}
-static void gen8_initialize_pdp(struct i915_address_space *vm,
- struct i915_page_directory_pointer *pdp)
+static inline bool gen8_pd_contains(u64 start, u64 end, int lvl)
{
- gen8_ppgtt_pdpe_t scratch_pdpe;
+ const u64 mask = ~0ull << gen8_pd_shift(lvl + 1);
- scratch_pdpe = gen8_pdpe_encode(px_dma(vm->scratch_pd), I915_CACHE_LLC);
+ GEM_BUG_ON(start >= end);
+ return (start ^ end) & mask && (start & ~mask) == 0;
+}
- fill_px(vm, pdp, scratch_pdpe);
+static inline unsigned int gen8_pt_count(u64 start, u64 end)
+{
+ GEM_BUG_ON(start >= end);
+ if ((start ^ end) >> gen8_pd_shift(1))
+ return GEN8_PDES - (start & (GEN8_PDES - 1));
+ else
+ return end - start;
}
-static void gen8_initialize_pml4(struct i915_address_space *vm,
- struct i915_pml4 *pml4)
+static inline unsigned int gen8_pd_top_count(const struct i915_address_space *vm)
{
- fill_px(vm, pml4,
- gen8_pml4e_encode(px_dma(vm->scratch_pdp), I915_CACHE_LLC));
- memset_p((void **)pml4->pdps, vm->scratch_pdp, GEN8_PML4ES_PER_PML4);
+ unsigned int shift = __gen8_pte_shift(vm->top);
+ return (vm->total + (1ull << shift) - 1) >> shift;
}
-/* PDE TLBs are a pain to invalidate on GEN8+. When we modify
- * the page table structures, we mark them dirty so that
- * context switching/execlist queuing code takes extra steps
- * to ensure that tlbs are flushed.
- */
-static void mark_tlbs_dirty(struct i915_hw_ppgtt *ppgtt)
+static inline struct i915_page_directory *
+gen8_pdp_for_page_index(struct i915_address_space * const vm, const u64 idx)
{
- ppgtt->pd_dirty_rings = INTEL_INFO(ppgtt->vm.i915)->ring_mask;
+ struct i915_ppgtt * const ppgtt = i915_vm_to_ppgtt(vm);
+
+ if (vm->top == 2)
+ return ppgtt->pd;
+ else
+ return i915_pd_entry(ppgtt->pd, gen8_pd_index(idx, vm->top));
}
-/* Removes entries from a single page table, releasing it if it's empty.
- * Caller can use the return value to update higher-level entries.
- */
-static bool gen8_ppgtt_clear_pt(const struct i915_address_space *vm,
- struct i915_page_table *pt,
- u64 start, u64 length)
+static inline struct i915_page_directory *
+gen8_pdp_for_page_address(struct i915_address_space * const vm, const u64 addr)
{
- unsigned int num_entries = gen8_pte_count(start, length);
- unsigned int pte = gen8_pte_index(start);
- unsigned int pte_end = pte + num_entries;
- gen8_pte_t *vaddr;
+ return gen8_pdp_for_page_index(vm, addr >> GEN8_PTE_SHIFT);
+}
- GEM_BUG_ON(num_entries > pt->used_ptes);
+static void __gen8_ppgtt_cleanup(struct i915_address_space *vm,
+ struct i915_page_directory *pd,
+ int count, int lvl)
+{
+ if (lvl) {
+ void **pde = pd->entry;
- pt->used_ptes -= num_entries;
- if (!pt->used_ptes)
- return true;
+ do {
+ if (!*pde)
+ continue;
- vaddr = kmap_atomic_px(pt);
- while (pte < pte_end)
- vaddr[pte++] = vm->scratch_pte;
- kunmap_atomic(vaddr);
+ __gen8_ppgtt_cleanup(vm, *pde, GEN8_PDES, lvl - 1);
+ } while (pde++, --count);
+ }
- return false;
+ free_px(vm, pd);
}
-static void gen8_ppgtt_set_pde(struct i915_address_space *vm,
- struct i915_page_directory *pd,
- struct i915_page_table *pt,
- unsigned int pde)
+static void gen8_ppgtt_cleanup(struct i915_address_space *vm)
{
- gen8_pde_t *vaddr;
+ struct i915_ppgtt *ppgtt = i915_vm_to_ppgtt(vm);
- pd->page_table[pde] = pt;
+ if (intel_vgpu_active(vm->i915))
+ gen8_ppgtt_notify_vgt(ppgtt, false);
- vaddr = kmap_atomic_px(pd);
- vaddr[pde] = gen8_pde_encode(px_dma(pt), I915_CACHE_LLC);
- kunmap_atomic(vaddr);
+ __gen8_ppgtt_cleanup(vm, ppgtt->pd, gen8_pd_top_count(vm), vm->top);
+ free_scratch(vm);
}
-static bool gen8_ppgtt_clear_pd(struct i915_address_space *vm,
- struct i915_page_directory *pd,
- u64 start, u64 length)
+static u64 __gen8_ppgtt_clear(struct i915_address_space * const vm,
+ struct i915_page_directory * const pd,
+ u64 start, const u64 end, int lvl)
{
- struct i915_page_table *pt;
- u32 pde;
+ const struct i915_page_scratch * const scratch = &vm->scratch[lvl];
+ unsigned int idx, len;
+
+ GEM_BUG_ON(end > vm->total >> GEN8_PTE_SHIFT);
- gen8_for_each_pde(pt, pd, start, length, pde) {
- GEM_BUG_ON(pt == vm->scratch_pt);
+ len = gen8_pd_range(start, end, lvl--, &idx);
+ DBG("%s(%p):{ lvl:%d, start:%llx, end:%llx, idx:%d, len:%d, used:%d }\n",
+ __func__, vm, lvl + 1, start, end,
+ idx, len, atomic_read(px_used(pd)));
+ GEM_BUG_ON(!len || len >= atomic_read(px_used(pd)));
- if (!gen8_ppgtt_clear_pt(vm, pt, start, length))
+ do {
+ struct i915_page_table *pt = pd->entry[idx];
+
+ if (atomic_fetch_inc(&pt->used) >> gen8_pd_shift(1) &&
+ gen8_pd_contains(start, end, lvl)) {
+ DBG("%s(%p):{ lvl:%d, idx:%d, start:%llx, end:%llx } removing pd\n",
+ __func__, vm, lvl + 1, idx, start, end);
+ clear_pd_entry(pd, idx, scratch);
+ __gen8_ppgtt_cleanup(vm, as_pd(pt), I915_PDES, lvl);
+ start += (u64)I915_PDES << gen8_pd_shift(lvl);
continue;
+ }
- gen8_ppgtt_set_pde(vm, pd, vm->scratch_pt, pde);
- GEM_BUG_ON(!pd->used_pdes);
- pd->used_pdes--;
+ if (lvl) {
+ start = __gen8_ppgtt_clear(vm, as_pd(pt),
+ start, end, lvl);
+ } else {
+ unsigned int count;
+ u64 *vaddr;
- free_pt(vm, pt);
- }
+ count = gen8_pt_count(start, end);
+ DBG("%s(%p):{ lvl:%d, start:%llx, end:%llx, idx:%d, len:%d, used:%d } removing pte\n",
+ __func__, vm, lvl, start, end,
+ gen8_pd_index(start, 0), count,
+ atomic_read(&pt->used));
+ GEM_BUG_ON(!count || count >= atomic_read(&pt->used));
- return !pd->used_pdes;
+ vaddr = kmap_atomic_px(pt);
+ memset64(vaddr + gen8_pd_index(start, 0),
+ vm->scratch[0].encode,
+ count);
+ kunmap_atomic(vaddr);
+
+ atomic_sub(count, &pt->used);
+ start += count;
+ }
+
+ if (release_pd_entry(pd, idx, pt, scratch))
+ free_px(vm, pt);
+ } while (idx++, --len);
+
+ return start;
}
-static void gen8_ppgtt_set_pdpe(struct i915_address_space *vm,
- struct i915_page_directory_pointer *pdp,
- struct i915_page_directory *pd,
- unsigned int pdpe)
+static void gen8_ppgtt_clear(struct i915_address_space *vm,
+ u64 start, u64 length)
{
- gen8_ppgtt_pdpe_t *vaddr;
+ GEM_BUG_ON(!IS_ALIGNED(start, BIT_ULL(GEN8_PTE_SHIFT)));
+ GEM_BUG_ON(!IS_ALIGNED(length, BIT_ULL(GEN8_PTE_SHIFT)));
+ GEM_BUG_ON(range_overflows(start, length, vm->total));
- pdp->page_directory[pdpe] = pd;
- if (!use_4lvl(vm))
- return;
+ start >>= GEN8_PTE_SHIFT;
+ length >>= GEN8_PTE_SHIFT;
+ GEM_BUG_ON(length == 0);
- vaddr = kmap_atomic_px(pdp);
- vaddr[pdpe] = gen8_pdpe_encode(px_dma(pd), I915_CACHE_LLC);
- kunmap_atomic(vaddr);
+ __gen8_ppgtt_clear(vm, i915_vm_to_ppgtt(vm)->pd,
+ start, start + length, vm->top);
}
-/* Removes entries from a single page dir pointer, releasing it if it's empty.
- * Caller can use the return value to update higher-level entries
- */
-static bool gen8_ppgtt_clear_pdp(struct i915_address_space *vm,
- struct i915_page_directory_pointer *pdp,
- u64 start, u64 length)
+static int __gen8_ppgtt_alloc(struct i915_address_space * const vm,
+ struct i915_page_directory * const pd,
+ u64 * const start, const u64 end, int lvl)
{
- struct i915_page_directory *pd;
- unsigned int pdpe;
+ const struct i915_page_scratch * const scratch = &vm->scratch[lvl];
+ struct i915_page_table *alloc = NULL;
+ unsigned int idx, len;
+ int ret = 0;
- gen8_for_each_pdpe(pd, pdp, start, length, pdpe) {
- GEM_BUG_ON(pd == vm->scratch_pd);
+ GEM_BUG_ON(end > vm->total >> GEN8_PTE_SHIFT);
- if (!gen8_ppgtt_clear_pd(vm, pd, start, length))
- continue;
+ len = gen8_pd_range(*start, end, lvl--, &idx);
+ DBG("%s(%p):{ lvl:%d, start:%llx, end:%llx, idx:%d, len:%d, used:%d }\n",
+ __func__, vm, lvl + 1, *start, end,
+ idx, len, atomic_read(px_used(pd)));
+ GEM_BUG_ON(!len || (idx + len - 1) >> gen8_pd_shift(1));
- gen8_ppgtt_set_pdpe(vm, pdp, vm->scratch_pd, pdpe);
- GEM_BUG_ON(!pdp->used_pdpes);
- pdp->used_pdpes--;
+ spin_lock(&pd->lock);
+ GEM_BUG_ON(!atomic_read(px_used(pd))); /* Must be pinned! */
+ do {
+ struct i915_page_table *pt = pd->entry[idx];
+
+ if (!pt) {
+ spin_unlock(&pd->lock);
+
+ DBG("%s(%p):{ lvl:%d, idx:%d } allocating new tree\n",
+ __func__, vm, lvl + 1, idx);
+
+ pt = fetch_and_zero(&alloc);
+ if (lvl) {
+ if (!pt) {
+ pt = &alloc_pd(vm)->pt;
+ if (IS_ERR(pt)) {
+ ret = PTR_ERR(pt);
+ goto out;
+ }
+ }
- free_pd(vm, pd);
- }
+ fill_px(pt, vm->scratch[lvl].encode);
+ } else {
+ if (!pt) {
+ pt = alloc_pt(vm);
+ if (IS_ERR(pt)) {
+ ret = PTR_ERR(pt);
+ goto out;
+ }
+ }
- return !pdp->used_pdpes;
-}
+ if (intel_vgpu_active(vm->i915) ||
+ gen8_pt_count(*start, end) < I915_PDES)
+ fill_px(pt, vm->scratch[lvl].encode);
+ }
-static void gen8_ppgtt_clear_3lvl(struct i915_address_space *vm,
- u64 start, u64 length)
-{
- gen8_ppgtt_clear_pdp(vm, &i915_vm_to_ppgtt(vm)->pdp, start, length);
-}
+ spin_lock(&pd->lock);
+ if (likely(!pd->entry[idx]))
+ set_pd_entry(pd, idx, pt);
+ else
+ alloc = pt, pt = pd->entry[idx];
+ }
-static void gen8_ppgtt_set_pml4e(struct i915_pml4 *pml4,
- struct i915_page_directory_pointer *pdp,
- unsigned int pml4e)
-{
- gen8_ppgtt_pml4e_t *vaddr;
+ if (lvl) {
+ atomic_inc(&pt->used);
+ spin_unlock(&pd->lock);
- pml4->pdps[pml4e] = pdp;
+ ret = __gen8_ppgtt_alloc(vm, as_pd(pt),
+ start, end, lvl);
+ if (unlikely(ret)) {
+ if (release_pd_entry(pd, idx, pt, scratch))
+ free_px(vm, pt);
+ goto out;
+ }
- vaddr = kmap_atomic_px(pml4);
- vaddr[pml4e] = gen8_pml4e_encode(px_dma(pdp), I915_CACHE_LLC);
- kunmap_atomic(vaddr);
+ spin_lock(&pd->lock);
+ atomic_dec(&pt->used);
+ GEM_BUG_ON(!atomic_read(&pt->used));
+ } else {
+ unsigned int count = gen8_pt_count(*start, end);
+
+ DBG("%s(%p):{ lvl:%d, start:%llx, end:%llx, idx:%d, len:%d, used:%d } inserting pte\n",
+ __func__, vm, lvl, *start, end,
+ gen8_pd_index(*start, 0), count,
+ atomic_read(&pt->used));
+
+ atomic_add(count, &pt->used);
+ /* All other pdes may be simultaneously removed */
+ GEM_BUG_ON(atomic_read(&pt->used) > 2 * I915_PDES);
+ *start += count;
+ }
+ } while (idx++, --len);
+ spin_unlock(&pd->lock);
+out:
+ if (alloc)
+ free_px(vm, alloc);
+ return ret;
}
-/* Removes entries from a single pml4.
- * This is the top-level structure in 4-level page tables used on gen8+.
- * Empty entries are always scratch pml4e.
- */
-static void gen8_ppgtt_clear_4lvl(struct i915_address_space *vm,
- u64 start, u64 length)
+static int gen8_ppgtt_alloc(struct i915_address_space *vm,
+ u64 start, u64 length)
{
- struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm);
- struct i915_pml4 *pml4 = &ppgtt->pml4;
- struct i915_page_directory_pointer *pdp;
- unsigned int pml4e;
-
- GEM_BUG_ON(!use_4lvl(vm));
+ u64 from;
+ int err;
- gen8_for_each_pml4e(pdp, pml4, start, length, pml4e) {
- GEM_BUG_ON(pdp == vm->scratch_pdp);
+ GEM_BUG_ON(!IS_ALIGNED(start, BIT_ULL(GEN8_PTE_SHIFT)));
+ GEM_BUG_ON(!IS_ALIGNED(length, BIT_ULL(GEN8_PTE_SHIFT)));
+ GEM_BUG_ON(range_overflows(start, length, vm->total));
- if (!gen8_ppgtt_clear_pdp(vm, pdp, start, length))
- continue;
+ start >>= GEN8_PTE_SHIFT;
+ length >>= GEN8_PTE_SHIFT;
+ GEM_BUG_ON(length == 0);
+ from = start;
- gen8_ppgtt_set_pml4e(pml4, vm->scratch_pdp, pml4e);
+ err = __gen8_ppgtt_alloc(vm, i915_vm_to_ppgtt(vm)->pd,
+ &start, start + length, vm->top);
+ if (unlikely(err && from != start))
+ __gen8_ppgtt_clear(vm, i915_vm_to_ppgtt(vm)->pd,
+ from, start, vm->top);
- free_pdp(vm, pdp);
- }
+ return err;
}
static inline struct sgt_dma {
@@ -960,47 +1162,28 @@ static inline struct sgt_dma {
return (struct sgt_dma) { sg, addr, addr + sg->length };
}
-struct gen8_insert_pte {
- u16 pml4e;
- u16 pdpe;
- u16 pde;
- u16 pte;
-};
-
-static __always_inline struct gen8_insert_pte gen8_insert_pte(u64 start)
-{
- return (struct gen8_insert_pte) {
- gen8_pml4e_index(start),
- gen8_pdpe_index(start),
- gen8_pde_index(start),
- gen8_pte_index(start),
- };
-}
-
-static __always_inline bool
-gen8_ppgtt_insert_pte_entries(struct i915_hw_ppgtt *ppgtt,
- struct i915_page_directory_pointer *pdp,
- struct sgt_dma *iter,
- struct gen8_insert_pte *idx,
- enum i915_cache_level cache_level,
- u32 flags)
+static __always_inline u64
+gen8_ppgtt_insert_pte(struct i915_ppgtt *ppgtt,
+ struct i915_page_directory *pdp,
+ struct sgt_dma *iter,
+ u64 idx,
+ enum i915_cache_level cache_level,
+ u32 flags)
{
struct i915_page_directory *pd;
const gen8_pte_t pte_encode = gen8_pte_encode(0, cache_level, flags);
gen8_pte_t *vaddr;
- bool ret;
- GEM_BUG_ON(idx->pdpe >= i915_pdpes_per_pdp(&ppgtt->vm));
- pd = pdp->page_directory[idx->pdpe];
- vaddr = kmap_atomic_px(pd->page_table[idx->pde]);
+ pd = i915_pd_entry(pdp, gen8_pd_index(idx, 2));
+ vaddr = kmap_atomic_px(i915_pt_entry(pd, gen8_pd_index(idx, 1)));
do {
- vaddr[idx->pte] = pte_encode | iter->dma;
+ vaddr[gen8_pd_index(idx, 0)] = pte_encode | iter->dma;
iter->dma += I915_GTT_PAGE_SIZE;
if (iter->dma >= iter->max) {
iter->sg = __sg_next(iter->sg);
if (!iter->sg) {
- ret = false;
+ idx = 0;
break;
}
@@ -1008,90 +1191,68 @@ gen8_ppgtt_insert_pte_entries(struct i915_hw_ppgtt *ppgtt,
iter->max = iter->dma + iter->sg->length;
}
- if (++idx->pte == GEN8_PTES) {
- idx->pte = 0;
-
- if (++idx->pde == I915_PDES) {
- idx->pde = 0;
-
+ if (gen8_pd_index(++idx, 0) == 0) {
+ if (gen8_pd_index(idx, 1) == 0) {
/* Limited by sg length for 3lvl */
- if (++idx->pdpe == GEN8_PML4ES_PER_PML4) {
- idx->pdpe = 0;
- ret = true;
+ if (gen8_pd_index(idx, 2) == 0)
break;
- }
- GEM_BUG_ON(idx->pdpe >= i915_pdpes_per_pdp(&ppgtt->vm));
- pd = pdp->page_directory[idx->pdpe];
+ pd = pdp->entry[gen8_pd_index(idx, 2)];
}
kunmap_atomic(vaddr);
- vaddr = kmap_atomic_px(pd->page_table[idx->pde]);
+ vaddr = kmap_atomic_px(i915_pt_entry(pd, gen8_pd_index(idx, 1)));
}
} while (1);
kunmap_atomic(vaddr);
- return ret;
+ return idx;
}
-static void gen8_ppgtt_insert_3lvl(struct i915_address_space *vm,
- struct i915_vma *vma,
+static void gen8_ppgtt_insert_huge(struct i915_vma *vma,
+ struct sgt_dma *iter,
enum i915_cache_level cache_level,
u32 flags)
{
- struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm);
- struct sgt_dma iter = sgt_dma(vma);
- struct gen8_insert_pte idx = gen8_insert_pte(vma->node.start);
-
- gen8_ppgtt_insert_pte_entries(ppgtt, &ppgtt->pdp, &iter, &idx,
- cache_level, flags);
-
- vma->page_sizes.gtt = I915_GTT_PAGE_SIZE;
-}
-
-static void gen8_ppgtt_insert_huge_entries(struct i915_vma *vma,
- struct i915_page_directory_pointer **pdps,
- struct sgt_dma *iter,
- enum i915_cache_level cache_level,
- u32 flags)
-{
const gen8_pte_t pte_encode = gen8_pte_encode(0, cache_level, flags);
u64 start = vma->node.start;
dma_addr_t rem = iter->sg->length;
+ GEM_BUG_ON(!i915_vm_is_4lvl(vma->vm));
+
do {
- struct gen8_insert_pte idx = gen8_insert_pte(start);
- struct i915_page_directory_pointer *pdp = pdps[idx.pml4e];
- struct i915_page_directory *pd = pdp->page_directory[idx.pdpe];
- unsigned int page_size;
- bool maybe_64K = false;
+ struct i915_page_directory * const pdp =
+ gen8_pdp_for_page_address(vma->vm, start);
+ struct i915_page_directory * const pd =
+ i915_pd_entry(pdp, __gen8_pte_index(start, 2));
gen8_pte_t encode = pte_encode;
+ unsigned int maybe_64K = -1;
+ unsigned int page_size;
gen8_pte_t *vaddr;
- u16 index, max;
+ u16 index;
if (vma->page_sizes.sg & I915_GTT_PAGE_SIZE_2M &&
IS_ALIGNED(iter->dma, I915_GTT_PAGE_SIZE_2M) &&
- rem >= I915_GTT_PAGE_SIZE_2M && !idx.pte) {
- index = idx.pde;
- max = I915_PDES;
- page_size = I915_GTT_PAGE_SIZE_2M;
-
+ rem >= I915_GTT_PAGE_SIZE_2M &&
+ !__gen8_pte_index(start, 0)) {
+ index = __gen8_pte_index(start, 1);
encode |= GEN8_PDE_PS_2M;
+ page_size = I915_GTT_PAGE_SIZE_2M;
vaddr = kmap_atomic_px(pd);
} else {
- struct i915_page_table *pt = pd->page_table[idx.pde];
+ struct i915_page_table *pt =
+ i915_pt_entry(pd, __gen8_pte_index(start, 1));
- index = idx.pte;
- max = GEN8_PTES;
+ index = __gen8_pte_index(start, 0);
page_size = I915_GTT_PAGE_SIZE;
if (!index &&
vma->page_sizes.sg & I915_GTT_PAGE_SIZE_64K &&
IS_ALIGNED(iter->dma, I915_GTT_PAGE_SIZE_64K) &&
(IS_ALIGNED(rem, I915_GTT_PAGE_SIZE_64K) ||
- rem >= (max - index) * I915_GTT_PAGE_SIZE))
- maybe_64K = true;
+ rem >= (I915_PDES - index) * I915_GTT_PAGE_SIZE))
+ maybe_64K = __gen8_pte_index(start, 1);
vaddr = kmap_atomic_px(pt);
}
@@ -1112,16 +1273,16 @@ static void gen8_ppgtt_insert_huge_entries(struct i915_vma *vma,
iter->dma = sg_dma_address(iter->sg);
iter->max = iter->dma + rem;
- if (maybe_64K && index < max &&
+ if (maybe_64K != -1 && index < I915_PDES &&
!(IS_ALIGNED(iter->dma, I915_GTT_PAGE_SIZE_64K) &&
(IS_ALIGNED(rem, I915_GTT_PAGE_SIZE_64K) ||
- rem >= (max - index) * I915_GTT_PAGE_SIZE)))
- maybe_64K = false;
+ rem >= (I915_PDES - index) * I915_GTT_PAGE_SIZE)))
+ maybe_64K = -1;
if (unlikely(!IS_ALIGNED(iter->dma, page_size)))
break;
}
- } while (rem >= page_size && index < max);
+ } while (rem >= page_size && index < I915_PDES);
kunmap_atomic(vaddr);
@@ -1131,14 +1292,14 @@ static void gen8_ppgtt_insert_huge_entries(struct i915_vma *vma,
* it and have reached the end of the sg table and we have
* enough padding.
*/
- if (maybe_64K &&
- (index == max ||
+ if (maybe_64K != -1 &&
+ (index == I915_PDES ||
(i915_vm_has_scratch_64K(vma->vm) &&
!iter->sg && IS_ALIGNED(vma->node.start +
vma->node.size,
I915_GTT_PAGE_SIZE_2M)))) {
vaddr = kmap_atomic_px(pd);
- vaddr[idx.pde] |= GEN8_PDE_IPS_64K;
+ vaddr[maybe_64K] |= GEN8_PDE_IPS_64K;
kunmap_atomic(vaddr);
page_size = I915_GTT_PAGE_SIZE_64K;
@@ -1154,8 +1315,8 @@ static void gen8_ppgtt_insert_huge_entries(struct i915_vma *vma,
if (I915_SELFTEST_ONLY(vma->vm->scrub_64K)) {
u16 i;
- encode = vma->vm->scratch_pte;
- vaddr = kmap_atomic_px(pd->page_table[idx.pde]);
+ encode = vma->vm->scratch[0].encode;
+ vaddr = kmap_atomic_px(i915_pt_entry(pd, maybe_64K));
for (i = 1; i < index; i += 16)
memset64(vaddr + i, encode, 15);
@@ -1168,44 +1329,35 @@ static void gen8_ppgtt_insert_huge_entries(struct i915_vma *vma,
} while (iter->sg);
}
-static void gen8_ppgtt_insert_4lvl(struct i915_address_space *vm,
- struct i915_vma *vma,
- enum i915_cache_level cache_level,
- u32 flags)
+static void gen8_ppgtt_insert(struct i915_address_space *vm,
+ struct i915_vma *vma,
+ enum i915_cache_level cache_level,
+ u32 flags)
{
- struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm);
+ struct i915_ppgtt * const ppgtt = i915_vm_to_ppgtt(vm);
struct sgt_dma iter = sgt_dma(vma);
- struct i915_page_directory_pointer **pdps = ppgtt->pml4.pdps;
if (vma->page_sizes.sg > I915_GTT_PAGE_SIZE) {
- gen8_ppgtt_insert_huge_entries(vma, pdps, &iter, cache_level,
- flags);
- } else {
- struct gen8_insert_pte idx = gen8_insert_pte(vma->node.start);
-
- while (gen8_ppgtt_insert_pte_entries(ppgtt, pdps[idx.pml4e++],
- &iter, &idx, cache_level,
- flags))
- GEM_BUG_ON(idx.pml4e >= GEN8_PML4ES_PER_PML4);
+ gen8_ppgtt_insert_huge(vma, &iter, cache_level, flags);
+ } else {
+ u64 idx = vma->node.start >> GEN8_PTE_SHIFT;
- vma->page_sizes.gtt = I915_GTT_PAGE_SIZE;
- }
-}
+ do {
+ struct i915_page_directory * const pdp =
+ gen8_pdp_for_page_index(vm, idx);
-static void gen8_free_page_tables(struct i915_address_space *vm,
- struct i915_page_directory *pd)
-{
- int i;
+ idx = gen8_ppgtt_insert_pte(ppgtt, pdp, &iter, idx,
+ cache_level, flags);
+ } while (idx);
- for (i = 0; i < I915_PDES; i++) {
- if (pd->page_table[i] != vm->scratch_pt)
- free_pt(vm, pd->page_table[i]);
+ vma->page_sizes.gtt = I915_GTT_PAGE_SIZE;
}
}
static int gen8_init_scratch(struct i915_address_space *vm)
{
int ret;
+ int i;
/*
* If everybody agrees to not to write into the scratch page,
@@ -1213,17 +1365,16 @@ static int gen8_init_scratch(struct i915_address_space *vm)
*/
if (vm->has_read_only &&
vm->i915->kernel_context &&
- vm->i915->kernel_context->ppgtt) {
+ vm->i915->kernel_context->vm) {
struct i915_address_space *clone =
- &vm->i915->kernel_context->ppgtt->vm;
+ rcu_dereference_protected(vm->i915->kernel_context->vm,
+ true); /* static */
GEM_BUG_ON(!clone->has_read_only);
- vm->scratch_page.order = clone->scratch_page.order;
- vm->scratch_pte = clone->scratch_pte;
- vm->scratch_pt = clone->scratch_pt;
- vm->scratch_pd = clone->scratch_pd;
- vm->scratch_pdp = clone->scratch_pdp;
+ vm->scratch_order = clone->scratch_order;
+ memcpy(vm->scratch, clone->scratch, sizeof(vm->scratch));
+ px_dma(&vm->scratch[0]) = 0; /* no xfer of ownership */
return 0;
}
@@ -1231,292 +1382,89 @@ static int gen8_init_scratch(struct i915_address_space *vm)
if (ret)
return ret;
- vm->scratch_pte =
- gen8_pte_encode(vm->scratch_page.daddr,
- I915_CACHE_LLC,
- PTE_READ_ONLY);
-
- vm->scratch_pt = alloc_pt(vm);
- if (IS_ERR(vm->scratch_pt)) {
- ret = PTR_ERR(vm->scratch_pt);
- goto free_scratch_page;
- }
+ vm->scratch[0].encode =
+ gen8_pte_encode(px_dma(&vm->scratch[0]),
+ I915_CACHE_LLC, vm->has_read_only);
- vm->scratch_pd = alloc_pd(vm);
- if (IS_ERR(vm->scratch_pd)) {
- ret = PTR_ERR(vm->scratch_pd);
- goto free_pt;
- }
+ for (i = 1; i <= vm->top; i++) {
+ if (unlikely(setup_page_dma(vm, px_base(&vm->scratch[i]))))
+ goto free_scratch;
- if (use_4lvl(vm)) {
- vm->scratch_pdp = alloc_pdp(vm);
- if (IS_ERR(vm->scratch_pdp)) {
- ret = PTR_ERR(vm->scratch_pdp);
- goto free_pd;
- }
+ fill_px(&vm->scratch[i], vm->scratch[i - 1].encode);
+ vm->scratch[i].encode =
+ gen8_pde_encode(px_dma(&vm->scratch[i]),
+ I915_CACHE_LLC);
}
- gen8_initialize_pt(vm, vm->scratch_pt);
- gen8_initialize_pd(vm, vm->scratch_pd);
- if (use_4lvl(vm))
- gen8_initialize_pdp(vm, vm->scratch_pdp);
-
return 0;
-free_pd:
- free_pd(vm, vm->scratch_pd);
-free_pt:
- free_pt(vm, vm->scratch_pt);
-free_scratch_page:
- cleanup_scratch_page(vm);
-
- return ret;
+free_scratch:
+ free_scratch(vm);
+ return -ENOMEM;
}
-static int gen8_ppgtt_notify_vgt(struct i915_hw_ppgtt *ppgtt, bool create)
+static int gen8_preallocate_top_level_pdp(struct i915_ppgtt *ppgtt)
{
struct i915_address_space *vm = &ppgtt->vm;
- struct drm_i915_private *dev_priv = vm->i915;
- enum vgt_g2v_type msg;
- int i;
-
- if (use_4lvl(vm)) {
- const u64 daddr = px_dma(&ppgtt->pml4);
+ struct i915_page_directory *pd = ppgtt->pd;
+ unsigned int idx;
- I915_WRITE(vgtif_reg(pdp[0].lo), lower_32_bits(daddr));
- I915_WRITE(vgtif_reg(pdp[0].hi), upper_32_bits(daddr));
+ GEM_BUG_ON(vm->top != 2);
+ GEM_BUG_ON(gen8_pd_top_count(vm) != GEN8_3LVL_PDPES);
- msg = (create ? VGT_G2V_PPGTT_L4_PAGE_TABLE_CREATE :
- VGT_G2V_PPGTT_L4_PAGE_TABLE_DESTROY);
- } else {
- for (i = 0; i < GEN8_3LVL_PDPES; i++) {
- const u64 daddr = i915_page_dir_dma_addr(ppgtt, i);
+ for (idx = 0; idx < GEN8_3LVL_PDPES; idx++) {
+ struct i915_page_directory *pde;
- I915_WRITE(vgtif_reg(pdp[i].lo), lower_32_bits(daddr));
- I915_WRITE(vgtif_reg(pdp[i].hi), upper_32_bits(daddr));
- }
+ pde = alloc_pd(vm);
+ if (IS_ERR(pde))
+ return PTR_ERR(pde);
- msg = (create ? VGT_G2V_PPGTT_L3_PAGE_TABLE_CREATE :
- VGT_G2V_PPGTT_L3_PAGE_TABLE_DESTROY);
+ fill_px(pde, vm->scratch[1].encode);
+ set_pd_entry(pd, idx, pde);
+ atomic_inc(px_used(pde)); /* keep pinned */
}
-
- I915_WRITE(vgtif_reg(g2v_notify), msg);
+ wmb();
return 0;
}
-static void gen8_free_scratch(struct i915_address_space *vm)
-{
- if (!vm->scratch_page.daddr)
- return;
-
- if (use_4lvl(vm))
- free_pdp(vm, vm->scratch_pdp);
- free_pd(vm, vm->scratch_pd);
- free_pt(vm, vm->scratch_pt);
- cleanup_scratch_page(vm);
-}
-
-static void gen8_ppgtt_cleanup_3lvl(struct i915_address_space *vm,
- struct i915_page_directory_pointer *pdp)
-{
- const unsigned int pdpes = i915_pdpes_per_pdp(vm);
- int i;
-
- for (i = 0; i < pdpes; i++) {
- if (pdp->page_directory[i] == vm->scratch_pd)
- continue;
-
- gen8_free_page_tables(vm, pdp->page_directory[i]);
- free_pd(vm, pdp->page_directory[i]);
- }
-
- free_pdp(vm, pdp);
-}
-
-static void gen8_ppgtt_cleanup_4lvl(struct i915_hw_ppgtt *ppgtt)
-{
- int i;
-
- for (i = 0; i < GEN8_PML4ES_PER_PML4; i++) {
- if (ppgtt->pml4.pdps[i] == ppgtt->vm.scratch_pdp)
- continue;
-
- gen8_ppgtt_cleanup_3lvl(&ppgtt->vm, ppgtt->pml4.pdps[i]);
- }
-
- cleanup_px(&ppgtt->vm, &ppgtt->pml4);
-}
-
-static void gen8_ppgtt_cleanup(struct i915_address_space *vm)
+static void ppgtt_init(struct i915_ppgtt *ppgtt, struct intel_gt *gt)
{
- struct drm_i915_private *dev_priv = vm->i915;
- struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm);
+ struct drm_i915_private *i915 = gt->i915;
- if (intel_vgpu_active(dev_priv))
- gen8_ppgtt_notify_vgt(ppgtt, false);
-
- if (use_4lvl(vm))
- gen8_ppgtt_cleanup_4lvl(ppgtt);
- else
- gen8_ppgtt_cleanup_3lvl(&ppgtt->vm, &ppgtt->pdp);
-
- gen8_free_scratch(vm);
-}
-
-static int gen8_ppgtt_alloc_pd(struct i915_address_space *vm,
- struct i915_page_directory *pd,
- u64 start, u64 length)
-{
- struct i915_page_table *pt;
- u64 from = start;
- unsigned int pde;
-
- gen8_for_each_pde(pt, pd, start, length, pde) {
- int count = gen8_pte_count(start, length);
-
- if (pt == vm->scratch_pt) {
- pd->used_pdes++;
-
- pt = alloc_pt(vm);
- if (IS_ERR(pt)) {
- pd->used_pdes--;
- goto unwind;
- }
-
- if (count < GEN8_PTES || intel_vgpu_active(vm->i915))
- gen8_initialize_pt(vm, pt);
-
- gen8_ppgtt_set_pde(vm, pd, pt, pde);
- GEM_BUG_ON(pd->used_pdes > I915_PDES);
- }
+ ppgtt->vm.gt = gt;
+ ppgtt->vm.i915 = i915;
+ ppgtt->vm.dma = &i915->drm.pdev->dev;
+ ppgtt->vm.total = BIT_ULL(INTEL_INFO(i915)->ppgtt_size);
- pt->used_ptes += count;
- }
- return 0;
+ i915_address_space_init(&ppgtt->vm, VM_CLASS_PPGTT);
-unwind:
- gen8_ppgtt_clear_pd(vm, pd, from, start - from);
- return -ENOMEM;
+ ppgtt->vm.vma_ops.bind_vma = ppgtt_bind_vma;
+ ppgtt->vm.vma_ops.unbind_vma = ppgtt_unbind_vma;
+ ppgtt->vm.vma_ops.set_pages = ppgtt_set_pages;
+ ppgtt->vm.vma_ops.clear_pages = clear_pages;
}
-static int gen8_ppgtt_alloc_pdp(struct i915_address_space *vm,
- struct i915_page_directory_pointer *pdp,
- u64 start, u64 length)
+static struct i915_page_directory *
+gen8_alloc_top_pd(struct i915_address_space *vm)
{
+ const unsigned int count = gen8_pd_top_count(vm);
struct i915_page_directory *pd;
- u64 from = start;
- unsigned int pdpe;
- int ret;
-
- gen8_for_each_pdpe(pd, pdp, start, length, pdpe) {
- if (pd == vm->scratch_pd) {
- pdp->used_pdpes++;
-
- pd = alloc_pd(vm);
- if (IS_ERR(pd)) {
- pdp->used_pdpes--;
- goto unwind;
- }
-
- gen8_initialize_pd(vm, pd);
- gen8_ppgtt_set_pdpe(vm, pdp, pd, pdpe);
- GEM_BUG_ON(pdp->used_pdpes > i915_pdpes_per_pdp(vm));
- }
-
- ret = gen8_ppgtt_alloc_pd(vm, pd, start, length);
- if (unlikely(ret))
- goto unwind_pd;
- }
-
- return 0;
-
-unwind_pd:
- if (!pd->used_pdes) {
- gen8_ppgtt_set_pdpe(vm, pdp, vm->scratch_pd, pdpe);
- GEM_BUG_ON(!pdp->used_pdpes);
- pdp->used_pdpes--;
- free_pd(vm, pd);
- }
-unwind:
- gen8_ppgtt_clear_pdp(vm, pdp, from, start - from);
- return -ENOMEM;
-}
-
-static int gen8_ppgtt_alloc_3lvl(struct i915_address_space *vm,
- u64 start, u64 length)
-{
- return gen8_ppgtt_alloc_pdp(vm,
- &i915_vm_to_ppgtt(vm)->pdp, start, length);
-}
-
-static int gen8_ppgtt_alloc_4lvl(struct i915_address_space *vm,
- u64 start, u64 length)
-{
- struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm);
- struct i915_pml4 *pml4 = &ppgtt->pml4;
- struct i915_page_directory_pointer *pdp;
- u64 from = start;
- u32 pml4e;
- int ret;
- gen8_for_each_pml4e(pdp, pml4, start, length, pml4e) {
- if (pml4->pdps[pml4e] == vm->scratch_pdp) {
- pdp = alloc_pdp(vm);
- if (IS_ERR(pdp))
- goto unwind;
+ GEM_BUG_ON(count > ARRAY_SIZE(pd->entry));
- gen8_initialize_pdp(vm, pdp);
- gen8_ppgtt_set_pml4e(pml4, pdp, pml4e);
- }
-
- ret = gen8_ppgtt_alloc_pdp(vm, pdp, start, length);
- if (unlikely(ret))
- goto unwind_pdp;
- }
-
- return 0;
-
-unwind_pdp:
- if (!pdp->used_pdpes) {
- gen8_ppgtt_set_pml4e(pml4, vm->scratch_pdp, pml4e);
- free_pdp(vm, pdp);
- }
-unwind:
- gen8_ppgtt_clear_4lvl(vm, from, start - from);
- return -ENOMEM;
-}
-
-static int gen8_preallocate_top_level_pdp(struct i915_hw_ppgtt *ppgtt)
-{
- struct i915_address_space *vm = &ppgtt->vm;
- struct i915_page_directory_pointer *pdp = &ppgtt->pdp;
- struct i915_page_directory *pd;
- u64 start = 0, length = ppgtt->vm.total;
- u64 from = start;
- unsigned int pdpe;
-
- gen8_for_each_pdpe(pd, pdp, start, length, pdpe) {
- pd = alloc_pd(vm);
- if (IS_ERR(pd))
- goto unwind;
+ pd = __alloc_pd(offsetof(typeof(*pd), entry[count]));
+ if (unlikely(!pd))
+ return ERR_PTR(-ENOMEM);
- gen8_initialize_pd(vm, pd);
- gen8_ppgtt_set_pdpe(vm, pdp, pd, pdpe);
- pdp->used_pdpes++;
+ if (unlikely(setup_page_dma(vm, px_base(pd)))) {
+ kfree(pd);
+ return ERR_PTR(-ENOMEM);
}
- pdp->used_pdpes++; /* never remove */
- return 0;
-
-unwind:
- start -= from;
- gen8_for_each_pdpe(pd, pdp, from, start, pdpe) {
- gen8_ppgtt_set_pdpe(vm, pdp, vm->scratch_pd, pdpe);
- free_pd(vm, pd);
- }
- pdp->used_pdpes = 0;
- return -ENOMEM;
+ fill_page_dma(px_base(pd), vm->scratch[vm->top].encode, count);
+ atomic_inc(px_used(pd)); /* mark as pinned */
+ return pd;
}
/*
@@ -1526,28 +1474,27 @@ unwind:
* space.
*
*/
-static struct i915_hw_ppgtt *gen8_ppgtt_create(struct drm_i915_private *i915)
+static struct i915_ppgtt *gen8_ppgtt_create(struct drm_i915_private *i915)
{
- struct i915_hw_ppgtt *ppgtt;
+ struct i915_ppgtt *ppgtt;
int err;
ppgtt = kzalloc(sizeof(*ppgtt), GFP_KERNEL);
if (!ppgtt)
return ERR_PTR(-ENOMEM);
- kref_init(&ppgtt->ref);
+ ppgtt_init(ppgtt, &i915->gt);
+ ppgtt->vm.top = i915_vm_is_4lvl(&ppgtt->vm) ? 3 : 2;
- ppgtt->vm.i915 = i915;
- ppgtt->vm.dma = &i915->drm.pdev->dev;
-
- ppgtt->vm.total = HAS_FULL_48BIT_PPGTT(i915) ?
- 1ULL << 48 :
- 1ULL << 32;
-
- /* From bdw, there is support for read-only pages in the PPGTT. */
- ppgtt->vm.has_read_only = true;
-
- i915_address_space_init(&ppgtt->vm, VM_CLASS_PPGTT);
+ /*
+ * From bdw, there is hw support for read-only pages in the PPGTT.
+ *
+ * Gen11 has HSDES#:1807136187 unresolved. Disable ro support
+ * for now.
+ *
+ * Gen12 has inherited the same read-only fault issue from gen11.
+ */
+ ppgtt->vm.has_read_only = !IS_GEN_RANGE(i915, 11, 12);
/* There are only few exceptions for gen >=6. chv and bxt.
* And we are not sure about the latter so play safe for now.
@@ -1559,55 +1506,42 @@ static struct i915_hw_ppgtt *gen8_ppgtt_create(struct drm_i915_private *i915)
if (err)
goto err_free;
- if (use_4lvl(&ppgtt->vm)) {
- err = setup_px(&ppgtt->vm, &ppgtt->pml4);
- if (err)
- goto err_scratch;
-
- gen8_initialize_pml4(&ppgtt->vm, &ppgtt->pml4);
+ ppgtt->pd = gen8_alloc_top_pd(&ppgtt->vm);
+ if (IS_ERR(ppgtt->pd)) {
+ err = PTR_ERR(ppgtt->pd);
+ goto err_free_scratch;
+ }
- ppgtt->vm.allocate_va_range = gen8_ppgtt_alloc_4lvl;
- ppgtt->vm.insert_entries = gen8_ppgtt_insert_4lvl;
- ppgtt->vm.clear_range = gen8_ppgtt_clear_4lvl;
- } else {
- err = __pdp_init(&ppgtt->vm, &ppgtt->pdp);
+ if (!i915_vm_is_4lvl(&ppgtt->vm)) {
+ err = gen8_preallocate_top_level_pdp(ppgtt);
if (err)
- goto err_scratch;
-
- if (intel_vgpu_active(i915)) {
- err = gen8_preallocate_top_level_pdp(ppgtt);
- if (err) {
- __pdp_fini(&ppgtt->pdp);
- goto err_scratch;
- }
- }
-
- ppgtt->vm.allocate_va_range = gen8_ppgtt_alloc_3lvl;
- ppgtt->vm.insert_entries = gen8_ppgtt_insert_3lvl;
- ppgtt->vm.clear_range = gen8_ppgtt_clear_3lvl;
+ goto err_free_pd;
}
+ ppgtt->vm.bind_async_flags = I915_VMA_LOCAL_BIND;
+ ppgtt->vm.insert_entries = gen8_ppgtt_insert;
+ ppgtt->vm.allocate_va_range = gen8_ppgtt_alloc;
+ ppgtt->vm.clear_range = gen8_ppgtt_clear;
+
if (intel_vgpu_active(i915))
gen8_ppgtt_notify_vgt(ppgtt, true);
ppgtt->vm.cleanup = gen8_ppgtt_cleanup;
- ppgtt->vm.vma_ops.bind_vma = ppgtt_bind_vma;
- ppgtt->vm.vma_ops.unbind_vma = ppgtt_unbind_vma;
- ppgtt->vm.vma_ops.set_pages = ppgtt_set_pages;
- ppgtt->vm.vma_ops.clear_pages = clear_pages;
-
return ppgtt;
-err_scratch:
- gen8_free_scratch(&ppgtt->vm);
+err_free_pd:
+ __gen8_ppgtt_cleanup(&ppgtt->vm, ppgtt->pd,
+ gen8_pd_top_count(&ppgtt->vm), ppgtt->vm.top);
+err_free_scratch:
+ free_scratch(&ppgtt->vm);
err_free:
kfree(ppgtt);
return ERR_PTR(err);
}
/* Write pde (index) from the page directory @pd to the page table @pt */
-static inline void gen6_write_pde(const struct gen6_hw_ppgtt *ppgtt,
+static inline void gen6_write_pde(const struct gen6_ppgtt *ppgtt,
const unsigned int pde,
const struct i915_page_table *pt)
{
@@ -1616,73 +1550,81 @@ static inline void gen6_write_pde(const struct gen6_hw_ppgtt *ppgtt,
ppgtt->pd_addr + pde);
}
-static void gen7_ppgtt_enable(struct drm_i915_private *dev_priv)
+static void gen7_ppgtt_enable(struct intel_gt *gt)
{
+ struct drm_i915_private *i915 = gt->i915;
+ struct intel_uncore *uncore = gt->uncore;
struct intel_engine_cs *engine;
- u32 ecochk, ecobits;
enum intel_engine_id id;
+ u32 ecochk;
- ecobits = I915_READ(GAC_ECO_BITS);
- I915_WRITE(GAC_ECO_BITS, ecobits | ECOBITS_PPGTT_CACHE64B);
+ intel_uncore_rmw(uncore, GAC_ECO_BITS, 0, ECOBITS_PPGTT_CACHE64B);
- ecochk = I915_READ(GAM_ECOCHK);
- if (IS_HASWELL(dev_priv)) {
+ ecochk = intel_uncore_read(uncore, GAM_ECOCHK);
+ if (IS_HASWELL(i915)) {
ecochk |= ECOCHK_PPGTT_WB_HSW;
} else {
ecochk |= ECOCHK_PPGTT_LLC_IVB;
ecochk &= ~ECOCHK_PPGTT_GFDT_IVB;
}
- I915_WRITE(GAM_ECOCHK, ecochk);
+ intel_uncore_write(uncore, GAM_ECOCHK, ecochk);
- for_each_engine(engine, dev_priv, id) {
+ for_each_engine(engine, gt, id) {
/* GFX_MODE is per-ring on gen7+ */
- I915_WRITE(RING_MODE_GEN7(engine),
- _MASKED_BIT_ENABLE(GFX_PPGTT_ENABLE));
+ ENGINE_WRITE(engine,
+ RING_MODE_GEN7,
+ _MASKED_BIT_ENABLE(GFX_PPGTT_ENABLE));
}
}
-static void gen6_ppgtt_enable(struct drm_i915_private *dev_priv)
+static void gen6_ppgtt_enable(struct intel_gt *gt)
{
- u32 ecochk, gab_ctl, ecobits;
+ struct intel_uncore *uncore = gt->uncore;
- ecobits = I915_READ(GAC_ECO_BITS);
- I915_WRITE(GAC_ECO_BITS, ecobits | ECOBITS_SNB_BIT |
- ECOBITS_PPGTT_CACHE64B);
+ intel_uncore_rmw(uncore,
+ GAC_ECO_BITS,
+ 0,
+ ECOBITS_SNB_BIT | ECOBITS_PPGTT_CACHE64B);
- gab_ctl = I915_READ(GAB_CTL);
- I915_WRITE(GAB_CTL, gab_ctl | GAB_CTL_CONT_AFTER_PAGEFAULT);
+ intel_uncore_rmw(uncore,
+ GAB_CTL,
+ 0,
+ GAB_CTL_CONT_AFTER_PAGEFAULT);
- ecochk = I915_READ(GAM_ECOCHK);
- I915_WRITE(GAM_ECOCHK, ecochk | ECOCHK_SNB_BIT | ECOCHK_PPGTT_CACHE64B);
+ intel_uncore_rmw(uncore,
+ GAM_ECOCHK,
+ 0,
+ ECOCHK_SNB_BIT | ECOCHK_PPGTT_CACHE64B);
- if (HAS_PPGTT(dev_priv)) /* may be disabled for VT-d */
- I915_WRITE(GFX_MODE, _MASKED_BIT_ENABLE(GFX_PPGTT_ENABLE));
+ if (HAS_PPGTT(uncore->i915)) /* may be disabled for VT-d */
+ intel_uncore_write(uncore,
+ GFX_MODE,
+ _MASKED_BIT_ENABLE(GFX_PPGTT_ENABLE));
}
/* PPGTT support for Sandybdrige/Gen6 and later */
static void gen6_ppgtt_clear_range(struct i915_address_space *vm,
u64 start, u64 length)
{
- struct gen6_hw_ppgtt *ppgtt = to_gen6_ppgtt(i915_vm_to_ppgtt(vm));
- unsigned int first_entry = start / I915_GTT_PAGE_SIZE;
+ struct gen6_ppgtt * const ppgtt = to_gen6_ppgtt(i915_vm_to_ppgtt(vm));
+ const unsigned int first_entry = start / I915_GTT_PAGE_SIZE;
+ const gen6_pte_t scratch_pte = vm->scratch[0].encode;
unsigned int pde = first_entry / GEN6_PTES;
unsigned int pte = first_entry % GEN6_PTES;
unsigned int num_entries = length / I915_GTT_PAGE_SIZE;
- const gen6_pte_t scratch_pte = vm->scratch_pte;
while (num_entries) {
- struct i915_page_table *pt = ppgtt->base.pd.page_table[pde++];
- const unsigned int end = min(pte + num_entries, GEN6_PTES);
- const unsigned int count = end - pte;
+ struct i915_page_table * const pt =
+ i915_pt_entry(ppgtt->base.pd, pde++);
+ const unsigned int count = min(num_entries, GEN6_PTES - pte);
gen6_pte_t *vaddr;
- GEM_BUG_ON(pt == vm->scratch_pt);
+ GEM_BUG_ON(px_base(pt) == px_base(&vm->scratch[1]));
num_entries -= count;
- GEM_BUG_ON(count > pt->used_ptes);
- pt->used_ptes -= count;
- if (!pt->used_ptes)
+ GEM_BUG_ON(count > atomic_read(&pt->used));
+ if (!atomic_sub_return(count, &pt->used))
ppgtt->scan_for_unused_pt = true;
/*
@@ -1693,9 +1635,7 @@ static void gen6_ppgtt_clear_range(struct i915_address_space *vm,
*/
vaddr = kmap_atomic_px(pt);
- do {
- vaddr[pte++] = scratch_pte;
- } while (pte < end);
+ memset32(vaddr + pte, scratch_pte, count);
kunmap_atomic(vaddr);
pte = 0;
@@ -1707,7 +1647,8 @@ static void gen6_ppgtt_insert_entries(struct i915_address_space *vm,
enum i915_cache_level cache_level,
u32 flags)
{
- struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm);
+ struct i915_ppgtt *ppgtt = i915_vm_to_ppgtt(vm);
+ struct i915_page_directory * const pd = ppgtt->pd;
unsigned first_entry = vma->node.start / I915_GTT_PAGE_SIZE;
unsigned act_pt = first_entry / GEN6_PTES;
unsigned act_pte = first_entry % GEN6_PTES;
@@ -1715,9 +1656,9 @@ static void gen6_ppgtt_insert_entries(struct i915_address_space *vm,
struct sgt_dma iter = sgt_dma(vma);
gen6_pte_t *vaddr;
- GEM_BUG_ON(ppgtt->pd.page_table[act_pt] == vm->scratch_pt);
+ GEM_BUG_ON(pd->entry[act_pt] == &vm->scratch[1]);
- vaddr = kmap_atomic_px(ppgtt->pd.page_table[act_pt]);
+ vaddr = kmap_atomic_px(i915_pt_entry(pd, act_pt));
do {
vaddr[act_pte] = pte_encode | GEN6_PTE_ADDR_ENCODE(iter.dma);
@@ -1733,7 +1674,7 @@ static void gen6_ppgtt_insert_entries(struct i915_address_space *vm,
if (++act_pte == GEN6_PTES) {
kunmap_atomic(vaddr);
- vaddr = kmap_atomic_px(ppgtt->pd.page_table[++act_pt]);
+ vaddr = kmap_atomic_px(i915_pt_entry(pd, ++act_pt));
act_pte = 0;
}
} while (1);
@@ -1745,99 +1686,115 @@ static void gen6_ppgtt_insert_entries(struct i915_address_space *vm,
static int gen6_alloc_va_range(struct i915_address_space *vm,
u64 start, u64 length)
{
- struct gen6_hw_ppgtt *ppgtt = to_gen6_ppgtt(i915_vm_to_ppgtt(vm));
- struct i915_page_table *pt;
+ struct gen6_ppgtt *ppgtt = to_gen6_ppgtt(i915_vm_to_ppgtt(vm));
+ struct i915_page_directory * const pd = ppgtt->base.pd;
+ struct i915_page_table *pt, *alloc = NULL;
+ intel_wakeref_t wakeref;
u64 from = start;
unsigned int pde;
bool flush = false;
+ int ret = 0;
+
+ wakeref = intel_runtime_pm_get(&vm->i915->runtime_pm);
- gen6_for_each_pde(pt, &ppgtt->base.pd, start, length, pde) {
+ spin_lock(&pd->lock);
+ gen6_for_each_pde(pt, pd, start, length, pde) {
const unsigned int count = gen6_pte_count(start, length);
- if (pt == vm->scratch_pt) {
- pt = alloc_pt(vm);
- if (IS_ERR(pt))
+ if (px_base(pt) == px_base(&vm->scratch[1])) {
+ spin_unlock(&pd->lock);
+
+ pt = fetch_and_zero(&alloc);
+ if (!pt)
+ pt = alloc_pt(vm);
+ if (IS_ERR(pt)) {
+ ret = PTR_ERR(pt);
goto unwind_out;
+ }
- gen6_initialize_pt(vm, pt);
- ppgtt->base.pd.page_table[pde] = pt;
+ fill32_px(pt, vm->scratch[0].encode);
- if (i915_vma_is_bound(ppgtt->vma,
- I915_VMA_GLOBAL_BIND)) {
- gen6_write_pde(ppgtt, pde, pt);
- flush = true;
+ spin_lock(&pd->lock);
+ if (pd->entry[pde] == &vm->scratch[1]) {
+ pd->entry[pde] = pt;
+ if (i915_vma_is_bound(ppgtt->vma,
+ I915_VMA_GLOBAL_BIND)) {
+ gen6_write_pde(ppgtt, pde, pt);
+ flush = true;
+ }
+ } else {
+ alloc = pt;
+ pt = pd->entry[pde];
}
-
- GEM_BUG_ON(pt->used_ptes);
}
- pt->used_ptes += count;
+ atomic_add(count, &pt->used);
}
+ spin_unlock(&pd->lock);
- if (flush) {
- mark_tlbs_dirty(&ppgtt->base);
- gen6_ggtt_invalidate(ppgtt->base.vm.i915);
- }
+ if (flush)
+ gen6_ggtt_invalidate(vm->gt->ggtt);
- return 0;
+ goto out;
unwind_out:
gen6_ppgtt_clear_range(vm, from, start - from);
- return -ENOMEM;
+out:
+ if (alloc)
+ free_px(vm, alloc);
+ intel_runtime_pm_put(&vm->i915->runtime_pm, wakeref);
+ return ret;
}
-static int gen6_ppgtt_init_scratch(struct gen6_hw_ppgtt *ppgtt)
+static int gen6_ppgtt_init_scratch(struct gen6_ppgtt *ppgtt)
{
struct i915_address_space * const vm = &ppgtt->base.vm;
- struct i915_page_table *unused;
- u32 pde;
+ struct i915_page_directory * const pd = ppgtt->base.pd;
int ret;
ret = setup_scratch_page(vm, __GFP_HIGHMEM);
if (ret)
return ret;
- vm->scratch_pte = vm->pte_encode(vm->scratch_page.daddr,
- I915_CACHE_NONE,
- PTE_READ_ONLY);
+ vm->scratch[0].encode =
+ vm->pte_encode(px_dma(&vm->scratch[0]),
+ I915_CACHE_NONE, PTE_READ_ONLY);
- vm->scratch_pt = alloc_pt(vm);
- if (IS_ERR(vm->scratch_pt)) {
+ if (unlikely(setup_page_dma(vm, px_base(&vm->scratch[1])))) {
cleanup_scratch_page(vm);
- return PTR_ERR(vm->scratch_pt);
+ return -ENOMEM;
}
- gen6_initialize_pt(vm, vm->scratch_pt);
- gen6_for_all_pdes(unused, &ppgtt->base.pd, pde)
- ppgtt->base.pd.page_table[pde] = vm->scratch_pt;
+ fill32_px(&vm->scratch[1], vm->scratch[0].encode);
+ memset_p(pd->entry, &vm->scratch[1], I915_PDES);
return 0;
}
-static void gen6_ppgtt_free_scratch(struct i915_address_space *vm)
-{
- free_pt(vm, vm->scratch_pt);
- cleanup_scratch_page(vm);
-}
-
-static void gen6_ppgtt_free_pd(struct gen6_hw_ppgtt *ppgtt)
+static void gen6_ppgtt_free_pd(struct gen6_ppgtt *ppgtt)
{
+ struct i915_page_directory * const pd = ppgtt->base.pd;
+ struct i915_page_dma * const scratch =
+ px_base(&ppgtt->base.vm.scratch[1]);
struct i915_page_table *pt;
u32 pde;
- gen6_for_all_pdes(pt, &ppgtt->base.pd, pde)
- if (pt != ppgtt->base.vm.scratch_pt)
- free_pt(&ppgtt->base.vm, pt);
+ gen6_for_all_pdes(pt, pd, pde)
+ if (px_base(pt) != scratch)
+ free_px(&ppgtt->base.vm, pt);
}
static void gen6_ppgtt_cleanup(struct i915_address_space *vm)
{
- struct gen6_hw_ppgtt *ppgtt = to_gen6_ppgtt(i915_vm_to_ppgtt(vm));
+ struct gen6_ppgtt *ppgtt = to_gen6_ppgtt(i915_vm_to_ppgtt(vm));
i915_vma_destroy(ppgtt->vma);
gen6_ppgtt_free_pd(ppgtt);
- gen6_ppgtt_free_scratch(vm);
+ free_scratch(vm);
+
+ mutex_destroy(&ppgtt->pin_mutex);
+ kfree(ppgtt->base.pd);
}
static int pd_vma_set_pages(struct i915_vma *vma)
@@ -1858,27 +1815,28 @@ static int pd_vma_bind(struct i915_vma *vma,
u32 unused)
{
struct i915_ggtt *ggtt = i915_vm_to_ggtt(vma->vm);
- struct gen6_hw_ppgtt *ppgtt = vma->private;
+ struct gen6_ppgtt *ppgtt = vma->private;
u32 ggtt_offset = i915_ggtt_offset(vma) / I915_GTT_PAGE_SIZE;
struct i915_page_table *pt;
unsigned int pde;
- ppgtt->base.pd.base.ggtt_offset = ggtt_offset * sizeof(gen6_pte_t);
+ px_base(ppgtt->base.pd)->ggtt_offset = ggtt_offset * sizeof(gen6_pte_t);
ppgtt->pd_addr = (gen6_pte_t __iomem *)ggtt->gsm + ggtt_offset;
- gen6_for_all_pdes(pt, &ppgtt->base.pd, pde)
+ gen6_for_all_pdes(pt, ppgtt->base.pd, pde)
gen6_write_pde(ppgtt, pde, pt);
- mark_tlbs_dirty(&ppgtt->base);
- gen6_ggtt_invalidate(ppgtt->base.vm.i915);
+ gen6_ggtt_invalidate(ggtt);
return 0;
}
static void pd_vma_unbind(struct i915_vma *vma)
{
- struct gen6_hw_ppgtt *ppgtt = vma->private;
- struct i915_page_table * const scratch_pt = ppgtt->base.vm.scratch_pt;
+ struct gen6_ppgtt *ppgtt = vma->private;
+ struct i915_page_directory * const pd = ppgtt->base.pd;
+ struct i915_page_dma * const scratch =
+ px_base(&ppgtt->base.vm.scratch[1]);
struct i915_page_table *pt;
unsigned int pde;
@@ -1886,12 +1844,12 @@ static void pd_vma_unbind(struct i915_vma *vma)
return;
/* Free all no longer used page tables */
- gen6_for_all_pdes(pt, &ppgtt->base.pd, pde) {
- if (pt->used_ptes || pt == scratch_pt)
+ gen6_for_all_pdes(pt, ppgtt->base.pd, pde) {
+ if (px_base(pt) == scratch || atomic_read(&pt->used))
continue;
- free_pt(&ppgtt->base.vm, pt);
- ppgtt->base.pd.page_table[pde] = scratch_pt;
+ free_px(&ppgtt->base.vm, pt);
+ pd->entry[pde] = scratch;
}
ppgtt->scan_for_unused_pt = false;
@@ -1904,44 +1862,42 @@ static const struct i915_vma_ops pd_vma_ops = {
.unbind_vma = pd_vma_unbind,
};
-static struct i915_vma *pd_vma_create(struct gen6_hw_ppgtt *ppgtt, int size)
+static struct i915_vma *pd_vma_create(struct gen6_ppgtt *ppgtt, int size)
{
- struct drm_i915_private *i915 = ppgtt->base.vm.i915;
- struct i915_ggtt *ggtt = &i915->ggtt;
+ struct i915_ggtt *ggtt = ppgtt->base.vm.gt->ggtt;
struct i915_vma *vma;
GEM_BUG_ON(!IS_ALIGNED(size, I915_GTT_PAGE_SIZE));
GEM_BUG_ON(size > ggtt->vm.total);
- vma = kmem_cache_zalloc(i915->vmas, GFP_KERNEL);
+ vma = i915_vma_alloc();
if (!vma)
return ERR_PTR(-ENOMEM);
- i915_active_init(i915, &vma->active, NULL);
- INIT_ACTIVE_REQUEST(&vma->last_fence);
+ i915_active_init(&vma->active, NULL, NULL);
- vma->vm = &ggtt->vm;
+ mutex_init(&vma->pages_mutex);
+ vma->vm = i915_vm_get(&ggtt->vm);
vma->ops = &pd_vma_ops;
vma->private = ppgtt;
vma->size = size;
vma->fence_size = size;
- vma->flags = I915_VMA_GGTT;
+ atomic_set(&vma->flags, I915_VMA_GGTT);
vma->ggtt_view.type = I915_GGTT_VIEW_ROTATED; /* prevent fencing */
INIT_LIST_HEAD(&vma->obj_link);
-
- mutex_lock(&vma->vm->mutex);
- list_add(&vma->vm_link, &vma->vm->unbound_list);
- mutex_unlock(&vma->vm->mutex);
+ INIT_LIST_HEAD(&vma->closed_link);
return vma;
}
-int gen6_ppgtt_pin(struct i915_hw_ppgtt *base)
+int gen6_ppgtt_pin(struct i915_ppgtt *base)
{
- struct gen6_hw_ppgtt *ppgtt = to_gen6_ppgtt(base);
- int err;
+ struct gen6_ppgtt *ppgtt = to_gen6_ppgtt(base);
+ int err = 0;
+
+ GEM_BUG_ON(!atomic_read(&ppgtt->base.vm.open));
/*
* Workaround the limited maximum vma->pin_count and the aliasing_ppgtt
@@ -1949,72 +1905,81 @@ int gen6_ppgtt_pin(struct i915_hw_ppgtt *base)
* (When vma->pin_count becomes atomic, I expect we will naturally
* need a larger, unpacked, type and kill this redundancy.)
*/
- if (ppgtt->pin_count++)
+ if (atomic_add_unless(&ppgtt->pin_count, 1, 0))
return 0;
+ if (mutex_lock_interruptible(&ppgtt->pin_mutex))
+ return -EINTR;
+
/*
* PPGTT PDEs reside in the GGTT and consists of 512 entries. The
* allocator works in address space sizes, so it's multiplied by page
* size. We allocate at the top of the GTT to avoid fragmentation.
*/
- err = i915_vma_pin(ppgtt->vma,
- 0, GEN6_PD_ALIGN,
- PIN_GLOBAL | PIN_HIGH);
- if (err)
- goto unpin;
-
- return 0;
+ if (!atomic_read(&ppgtt->pin_count)) {
+ err = i915_vma_pin(ppgtt->vma,
+ 0, GEN6_PD_ALIGN,
+ PIN_GLOBAL | PIN_HIGH);
+ }
+ if (!err)
+ atomic_inc(&ppgtt->pin_count);
+ mutex_unlock(&ppgtt->pin_mutex);
-unpin:
- ppgtt->pin_count = 0;
return err;
}
-void gen6_ppgtt_unpin(struct i915_hw_ppgtt *base)
+void gen6_ppgtt_unpin(struct i915_ppgtt *base)
+{
+ struct gen6_ppgtt *ppgtt = to_gen6_ppgtt(base);
+
+ GEM_BUG_ON(!atomic_read(&ppgtt->pin_count));
+ if (atomic_dec_and_test(&ppgtt->pin_count))
+ i915_vma_unpin(ppgtt->vma);
+}
+
+void gen6_ppgtt_unpin_all(struct i915_ppgtt *base)
{
- struct gen6_hw_ppgtt *ppgtt = to_gen6_ppgtt(base);
+ struct gen6_ppgtt *ppgtt = to_gen6_ppgtt(base);
- GEM_BUG_ON(!ppgtt->pin_count);
- if (--ppgtt->pin_count)
+ if (!atomic_read(&ppgtt->pin_count))
return;
i915_vma_unpin(ppgtt->vma);
+ atomic_set(&ppgtt->pin_count, 0);
}
-static struct i915_hw_ppgtt *gen6_ppgtt_create(struct drm_i915_private *i915)
+static struct i915_ppgtt *gen6_ppgtt_create(struct drm_i915_private *i915)
{
struct i915_ggtt * const ggtt = &i915->ggtt;
- struct gen6_hw_ppgtt *ppgtt;
+ struct gen6_ppgtt *ppgtt;
int err;
ppgtt = kzalloc(sizeof(*ppgtt), GFP_KERNEL);
if (!ppgtt)
return ERR_PTR(-ENOMEM);
- kref_init(&ppgtt->base.ref);
+ mutex_init(&ppgtt->pin_mutex);
- ppgtt->base.vm.i915 = i915;
- ppgtt->base.vm.dma = &i915->drm.pdev->dev;
-
- ppgtt->base.vm.total = I915_PDES * GEN6_PTES * I915_GTT_PAGE_SIZE;
-
- i915_address_space_init(&ppgtt->base.vm, VM_CLASS_PPGTT);
+ ppgtt_init(&ppgtt->base, &i915->gt);
+ ppgtt->base.vm.top = 1;
+ ppgtt->base.vm.bind_async_flags = I915_VMA_LOCAL_BIND;
ppgtt->base.vm.allocate_va_range = gen6_alloc_va_range;
ppgtt->base.vm.clear_range = gen6_ppgtt_clear_range;
ppgtt->base.vm.insert_entries = gen6_ppgtt_insert_entries;
ppgtt->base.vm.cleanup = gen6_ppgtt_cleanup;
- ppgtt->base.vm.vma_ops.bind_vma = ppgtt_bind_vma;
- ppgtt->base.vm.vma_ops.unbind_vma = ppgtt_unbind_vma;
- ppgtt->base.vm.vma_ops.set_pages = ppgtt_set_pages;
- ppgtt->base.vm.vma_ops.clear_pages = clear_pages;
-
ppgtt->base.vm.pte_encode = ggtt->vm.pte_encode;
+ ppgtt->base.pd = __alloc_pd(sizeof(*ppgtt->base.pd));
+ if (!ppgtt->base.pd) {
+ err = -ENOMEM;
+ goto err_free;
+ }
+
err = gen6_ppgtt_init_scratch(ppgtt);
if (err)
- goto err_free;
+ goto err_pd;
ppgtt->vma = pd_vma_create(ppgtt, GEN6_PD_SIZE);
if (IS_ERR(ppgtt->vma)) {
@@ -2025,27 +1990,40 @@ static struct i915_hw_ppgtt *gen6_ppgtt_create(struct drm_i915_private *i915)
return &ppgtt->base;
err_scratch:
- gen6_ppgtt_free_scratch(&ppgtt->base.vm);
+ free_scratch(&ppgtt->base.vm);
+err_pd:
+ kfree(ppgtt->base.pd);
err_free:
kfree(ppgtt);
return ERR_PTR(err);
}
-static void gtt_write_workarounds(struct drm_i915_private *dev_priv)
+static void gtt_write_workarounds(struct intel_gt *gt)
{
+ struct drm_i915_private *i915 = gt->i915;
+ struct intel_uncore *uncore = gt->uncore;
+
/* This function is for gtt related workarounds. This function is
* called on driver load and after a GPU reset, so you can place
* workarounds here even if they get overwritten by GPU reset.
*/
/* WaIncreaseDefaultTLBEntries:chv,bdw,skl,bxt,kbl,glk,cfl,cnl,icl */
- if (IS_BROADWELL(dev_priv))
- I915_WRITE(GEN8_L3_LRA_1_GPGPU, GEN8_L3_LRA_1_GPGPU_DEFAULT_VALUE_BDW);
- else if (IS_CHERRYVIEW(dev_priv))
- I915_WRITE(GEN8_L3_LRA_1_GPGPU, GEN8_L3_LRA_1_GPGPU_DEFAULT_VALUE_CHV);
- else if (IS_GEN9_LP(dev_priv))
- I915_WRITE(GEN8_L3_LRA_1_GPGPU, GEN9_L3_LRA_1_GPGPU_DEFAULT_VALUE_BXT);
- else if (INTEL_GEN(dev_priv) >= 9)
- I915_WRITE(GEN8_L3_LRA_1_GPGPU, GEN9_L3_LRA_1_GPGPU_DEFAULT_VALUE_SKL);
+ if (IS_BROADWELL(i915))
+ intel_uncore_write(uncore,
+ GEN8_L3_LRA_1_GPGPU,
+ GEN8_L3_LRA_1_GPGPU_DEFAULT_VALUE_BDW);
+ else if (IS_CHERRYVIEW(i915))
+ intel_uncore_write(uncore,
+ GEN8_L3_LRA_1_GPGPU,
+ GEN8_L3_LRA_1_GPGPU_DEFAULT_VALUE_CHV);
+ else if (IS_GEN9_LP(i915))
+ intel_uncore_write(uncore,
+ GEN8_L3_LRA_1_GPGPU,
+ GEN9_L3_LRA_1_GPGPU_DEFAULT_VALUE_BXT);
+ else if (INTEL_GEN(i915) >= 9 && INTEL_GEN(i915) <= 11)
+ intel_uncore_write(uncore,
+ GEN8_L3_LRA_1_GPGPU,
+ GEN9_L3_LRA_1_GPGPU_DEFAULT_VALUE_SKL);
/*
* To support 64K PTEs we need to first enable the use of the
@@ -2058,27 +2036,51 @@ static void gtt_write_workarounds(struct drm_i915_private *dev_priv)
* 32K pages, but we don't currently have any support for it in our
* driver.
*/
- if (HAS_PAGE_SIZES(dev_priv, I915_GTT_PAGE_SIZE_64K) &&
- INTEL_GEN(dev_priv) <= 10)
- I915_WRITE(GEN8_GAMW_ECO_DEV_RW_IA,
- I915_READ(GEN8_GAMW_ECO_DEV_RW_IA) |
- GAMW_ECO_ENABLE_64K_IPS_FIELD);
+ if (HAS_PAGE_SIZES(i915, I915_GTT_PAGE_SIZE_64K) &&
+ INTEL_GEN(i915) <= 10)
+ intel_uncore_rmw(uncore,
+ GEN8_GAMW_ECO_DEV_RW_IA,
+ 0,
+ GAMW_ECO_ENABLE_64K_IPS_FIELD);
+
+ if (IS_GEN_RANGE(i915, 8, 11)) {
+ bool can_use_gtt_cache = true;
+
+ /*
+ * According to the BSpec if we use 2M/1G pages then we also
+ * need to disable the GTT cache. At least on BDW we can see
+ * visual corruption when using 2M pages, and not disabling the
+ * GTT cache.
+ */
+ if (HAS_PAGE_SIZES(i915, I915_GTT_PAGE_SIZE_2M))
+ can_use_gtt_cache = false;
+
+ /* WaGttCachingOffByDefault */
+ intel_uncore_write(uncore,
+ HSW_GTT_CACHE_EN,
+ can_use_gtt_cache ? GTT_CACHE_EN_ALL : 0);
+ WARN_ON_ONCE(can_use_gtt_cache &&
+ intel_uncore_read(uncore,
+ HSW_GTT_CACHE_EN) == 0);
+ }
}
-int i915_ppgtt_init_hw(struct drm_i915_private *dev_priv)
+int i915_ppgtt_init_hw(struct intel_gt *gt)
{
- gtt_write_workarounds(dev_priv);
+ struct drm_i915_private *i915 = gt->i915;
+
+ gtt_write_workarounds(gt);
- if (IS_GEN(dev_priv, 6))
- gen6_ppgtt_enable(dev_priv);
- else if (IS_GEN(dev_priv, 7))
- gen7_ppgtt_enable(dev_priv);
+ if (IS_GEN(i915, 6))
+ gen6_ppgtt_enable(gt);
+ else if (IS_GEN(i915, 7))
+ gen7_ppgtt_enable(gt);
return 0;
}
-static struct i915_hw_ppgtt *
-__hw_ppgtt_create(struct drm_i915_private *i915)
+static struct i915_ppgtt *
+__ppgtt_create(struct drm_i915_private *i915)
{
if (INTEL_GEN(i915) < 8)
return gen6_ppgtt_create(i915);
@@ -2086,63 +2088,20 @@ __hw_ppgtt_create(struct drm_i915_private *i915)
return gen8_ppgtt_create(i915);
}
-struct i915_hw_ppgtt *
-i915_ppgtt_create(struct drm_i915_private *i915,
- struct drm_i915_file_private *fpriv)
+struct i915_ppgtt *
+i915_ppgtt_create(struct drm_i915_private *i915)
{
- struct i915_hw_ppgtt *ppgtt;
+ struct i915_ppgtt *ppgtt;
- ppgtt = __hw_ppgtt_create(i915);
+ ppgtt = __ppgtt_create(i915);
if (IS_ERR(ppgtt))
return ppgtt;
- ppgtt->vm.file = fpriv;
-
trace_i915_ppgtt_create(&ppgtt->vm);
return ppgtt;
}
-void i915_ppgtt_close(struct i915_address_space *vm)
-{
- GEM_BUG_ON(vm->closed);
- vm->closed = true;
-}
-
-static void ppgtt_destroy_vma(struct i915_address_space *vm)
-{
- struct list_head *phases[] = {
- &vm->bound_list,
- &vm->unbound_list,
- NULL,
- }, **phase;
-
- vm->closed = true;
- for (phase = phases; *phase; phase++) {
- struct i915_vma *vma, *vn;
-
- list_for_each_entry_safe(vma, vn, *phase, vm_link)
- i915_vma_destroy(vma);
- }
-}
-
-void i915_ppgtt_release(struct kref *kref)
-{
- struct i915_hw_ppgtt *ppgtt =
- container_of(kref, struct i915_hw_ppgtt, ref);
-
- trace_i915_ppgtt_release(&ppgtt->vm);
-
- ppgtt_destroy_vma(&ppgtt->vm);
-
- GEM_BUG_ON(!list_empty(&ppgtt->vm.bound_list));
- GEM_BUG_ON(!list_empty(&ppgtt->vm.unbound_list));
-
- ppgtt->vm.cleanup(&ppgtt->vm);
- i915_address_space_fini(&ppgtt->vm);
- kfree(ppgtt);
-}
-
/* Certain Gen5 chipsets require require idling the GPU before
* unmapping anything from the GTT when VT-d is enabled.
*/
@@ -2154,84 +2113,26 @@ static bool needs_idle_maps(struct drm_i915_private *dev_priv)
return IS_GEN(dev_priv, 5) && IS_MOBILE(dev_priv) && intel_vtd_active();
}
-static void gen6_check_faults(struct drm_i915_private *dev_priv)
-{
- struct intel_engine_cs *engine;
- enum intel_engine_id id;
- u32 fault;
-
- for_each_engine(engine, dev_priv, id) {
- fault = I915_READ(RING_FAULT_REG(engine));
- if (fault & RING_FAULT_VALID) {
- DRM_DEBUG_DRIVER("Unexpected fault\n"
- "\tAddr: 0x%08lx\n"
- "\tAddress space: %s\n"
- "\tSource ID: %d\n"
- "\tType: %d\n",
- fault & PAGE_MASK,
- fault & RING_FAULT_GTTSEL_MASK ? "GGTT" : "PPGTT",
- RING_FAULT_SRCID(fault),
- RING_FAULT_FAULT_TYPE(fault));
- }
- }
-}
-
-static void gen8_check_faults(struct drm_i915_private *dev_priv)
-{
- u32 fault = I915_READ(GEN8_RING_FAULT_REG);
-
- if (fault & RING_FAULT_VALID) {
- u32 fault_data0, fault_data1;
- u64 fault_addr;
-
- fault_data0 = I915_READ(GEN8_FAULT_TLB_DATA0);
- fault_data1 = I915_READ(GEN8_FAULT_TLB_DATA1);
- fault_addr = ((u64)(fault_data1 & FAULT_VA_HIGH_BITS) << 44) |
- ((u64)fault_data0 << 12);
-
- DRM_DEBUG_DRIVER("Unexpected fault\n"
- "\tAddr: 0x%08x_%08x\n"
- "\tAddress space: %s\n"
- "\tEngine ID: %d\n"
- "\tSource ID: %d\n"
- "\tType: %d\n",
- upper_32_bits(fault_addr),
- lower_32_bits(fault_addr),
- fault_data1 & FAULT_GTT_SEL ? "GGTT" : "PPGTT",
- GEN8_RING_FAULT_ENGINE_ID(fault),
- RING_FAULT_SRCID(fault),
- RING_FAULT_FAULT_TYPE(fault));
- }
-}
-
-void i915_check_and_clear_faults(struct drm_i915_private *dev_priv)
-{
- /* From GEN8 onwards we only have one 'All Engine Fault Register' */
- if (INTEL_GEN(dev_priv) >= 8)
- gen8_check_faults(dev_priv);
- else if (INTEL_GEN(dev_priv) >= 6)
- gen6_check_faults(dev_priv);
- else
- return;
-
- i915_clear_error_registers(dev_priv);
-}
-
-void i915_gem_suspend_gtt_mappings(struct drm_i915_private *dev_priv)
+static void ggtt_suspend_mappings(struct i915_ggtt *ggtt)
{
- struct i915_ggtt *ggtt = &dev_priv->ggtt;
+ struct drm_i915_private *i915 = ggtt->vm.i915;
/* Don't bother messing with faults pre GEN6 as we have little
* documentation supporting that it's a good idea.
*/
- if (INTEL_GEN(dev_priv) < 6)
+ if (INTEL_GEN(i915) < 6)
return;
- i915_check_and_clear_faults(dev_priv);
+ intel_gt_check_and_clear_faults(ggtt->vm.gt);
ggtt->vm.clear_range(&ggtt->vm, 0, ggtt->vm.total);
- i915_ggtt_invalidate(dev_priv);
+ ggtt->invalidate(ggtt);
+}
+
+void i915_gem_suspend_gtt_mappings(struct drm_i915_private *i915)
+{
+ ggtt_suspend_mappings(&i915->ggtt);
}
int i915_gem_gtt_prepare_pages(struct drm_i915_gem_object *obj,
@@ -2277,7 +2178,7 @@ static void gen8_ggtt_insert_page(struct i915_address_space *vm,
gen8_set_pte(pte, gen8_pte_encode(addr, level, 0));
- ggtt->invalidate(vm->i915);
+ ggtt->invalidate(ggtt);
}
static void gen8_ggtt_insert_entries(struct i915_address_space *vm,
@@ -2298,14 +2199,14 @@ static void gen8_ggtt_insert_entries(struct i915_address_space *vm,
gtt_entries = (gen8_pte_t __iomem *)ggtt->gsm;
gtt_entries += vma->node.start / I915_GTT_PAGE_SIZE;
- for_each_sgt_dma(addr, sgt_iter, vma->pages)
+ for_each_sgt_daddr(addr, sgt_iter, vma->pages)
gen8_set_pte(gtt_entries++, pte_encode | addr);
/*
* We want to flush the TLBs only after we're certain all the PTE
* updates have finished.
*/
- ggtt->invalidate(vm->i915);
+ ggtt->invalidate(ggtt);
}
static void gen6_ggtt_insert_page(struct i915_address_space *vm,
@@ -2320,7 +2221,7 @@ static void gen6_ggtt_insert_page(struct i915_address_space *vm,
iowrite32(vm->pte_encode(addr, level, flags), pte);
- ggtt->invalidate(vm->i915);
+ ggtt->invalidate(ggtt);
}
/*
@@ -2339,14 +2240,14 @@ static void gen6_ggtt_insert_entries(struct i915_address_space *vm,
unsigned int i = vma->node.start / I915_GTT_PAGE_SIZE;
struct sgt_iter iter;
dma_addr_t addr;
- for_each_sgt_dma(addr, iter, vma->pages)
+ for_each_sgt_daddr(addr, iter, vma->pages)
iowrite32(vm->pte_encode(addr, level, flags), &entries[i++]);
/*
* We want to flush the TLBs only after we're certain all the PTE
* updates have finished.
*/
- ggtt->invalidate(vm->i915);
+ ggtt->invalidate(ggtt);
}
static void nop_clear_range(struct i915_address_space *vm,
@@ -2360,7 +2261,7 @@ static void gen8_ggtt_clear_range(struct i915_address_space *vm,
struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm);
unsigned first_entry = start / I915_GTT_PAGE_SIZE;
unsigned num_entries = length / I915_GTT_PAGE_SIZE;
- const gen8_pte_t scratch_pte = vm->scratch_pte;
+ const gen8_pte_t scratch_pte = vm->scratch[0].encode;
gen8_pte_t __iomem *gtt_base =
(gen8_pte_t __iomem *)ggtt->gsm + first_entry;
const int max_entries = ggtt_total_entries(ggtt) - first_entry;
@@ -2485,8 +2386,7 @@ static void gen6_ggtt_clear_range(struct i915_address_space *vm,
first_entry, num_entries, max_entries))
num_entries = max_entries;
- scratch_pte = vm->scratch_pte;
-
+ scratch_pte = vm->scratch[0].encode;
for (i = 0; i < num_entries; i++)
iowrite32(scratch_pte, &gtt_base[i]);
}
@@ -2535,7 +2435,7 @@ static int ggtt_bind_vma(struct i915_vma *vma,
if (i915_gem_object_is_readonly(obj))
pte_flags |= PTE_READ_ONLY;
- with_intel_runtime_pm(i915, wakeref)
+ with_intel_runtime_pm(&i915->runtime_pm, wakeref)
vma->vm->insert_entries(vma->vm, vma, cache_level, pte_flags);
vma->page_sizes.gtt = I915_GTT_PAGE_SIZE;
@@ -2545,7 +2445,7 @@ static int ggtt_bind_vma(struct i915_vma *vma,
* GLOBAL/LOCAL_BIND, it's all the same ptes. Hence unconditionally
* upgrade to both bound if we bind either to avoid double-binding.
*/
- vma->flags |= I915_VMA_GLOBAL_BIND | I915_VMA_LOCAL_BIND;
+ atomic_or(I915_VMA_GLOBAL_BIND | I915_VMA_LOCAL_BIND, &vma->flags);
return 0;
}
@@ -2555,7 +2455,7 @@ static void ggtt_unbind_vma(struct i915_vma *vma)
struct drm_i915_private *i915 = vma->vm->i915;
intel_wakeref_t wakeref;
- with_intel_runtime_pm(i915, wakeref)
+ with_intel_runtime_pm(&i915->runtime_pm, wakeref)
vma->vm->clear_range(vma->vm, vma->node.start, vma->size);
}
@@ -2573,24 +2473,28 @@ static int aliasing_gtt_bind_vma(struct i915_vma *vma,
pte_flags |= PTE_READ_ONLY;
if (flags & I915_VMA_LOCAL_BIND) {
- struct i915_hw_ppgtt *appgtt = i915->mm.aliasing_ppgtt;
+ struct i915_ppgtt *alias = i915_vm_to_ggtt(vma->vm)->alias;
- if (!(vma->flags & I915_VMA_LOCAL_BIND)) {
- ret = appgtt->vm.allocate_va_range(&appgtt->vm,
- vma->node.start,
- vma->size);
+ if (flags & I915_VMA_ALLOC) {
+ ret = alias->vm.allocate_va_range(&alias->vm,
+ vma->node.start,
+ vma->size);
if (ret)
return ret;
+
+ set_bit(I915_VMA_ALLOC_BIT, __i915_vma_flags(vma));
}
- appgtt->vm.insert_entries(&appgtt->vm, vma, cache_level,
- pte_flags);
+ GEM_BUG_ON(!test_bit(I915_VMA_ALLOC_BIT,
+ __i915_vma_flags(vma)));
+ alias->vm.insert_entries(&alias->vm, vma,
+ cache_level, pte_flags);
}
if (flags & I915_VMA_GLOBAL_BIND) {
intel_wakeref_t wakeref;
- with_intel_runtime_pm(i915, wakeref) {
+ with_intel_runtime_pm(&i915->runtime_pm, wakeref) {
vma->vm->insert_entries(vma->vm, vma,
cache_level, pte_flags);
}
@@ -2603,16 +2507,17 @@ static void aliasing_gtt_unbind_vma(struct i915_vma *vma)
{
struct drm_i915_private *i915 = vma->vm->i915;
- if (vma->flags & I915_VMA_GLOBAL_BIND) {
+ if (i915_vma_is_bound(vma, I915_VMA_GLOBAL_BIND)) {
struct i915_address_space *vm = vma->vm;
intel_wakeref_t wakeref;
- with_intel_runtime_pm(i915, wakeref)
+ with_intel_runtime_pm(&i915->runtime_pm, wakeref)
vm->clear_range(vm, vma->node.start, vma->size);
}
- if (vma->flags & I915_VMA_LOCAL_BIND) {
- struct i915_address_space *vm = &i915->mm.aliasing_ppgtt->vm;
+ if (test_and_clear_bit(I915_VMA_ALLOC_BIT, __i915_vma_flags(vma))) {
+ struct i915_address_space *vm =
+ &i915_vm_to_ggtt(vma->vm)->alias->vm;
vm->clear_range(vm, vma->node.start, vma->size);
}
@@ -2626,7 +2531,9 @@ void i915_gem_gtt_finish_pages(struct drm_i915_gem_object *obj,
struct i915_ggtt *ggtt = &dev_priv->ggtt;
if (unlikely(ggtt->do_idle_maps)) {
- if (i915_gem_wait_for_idle(dev_priv, 0, MAX_SCHEDULE_TIMEOUT)) {
+ /* XXX This does not prevent more requests being submitted! */
+ if (intel_gt_retire_requests_timeout(ggtt->vm.gt,
+ -MAX_SCHEDULE_TIMEOUT)) {
DRM_ERROR("Failed to wait for idle; VT'd may hang.\n");
/* Wait a bit, in hopes it avoids the hang */
udelay(10);
@@ -2651,12 +2558,12 @@ static int ggtt_set_pages(struct i915_vma *vma)
return 0;
}
-static void i915_gtt_color_adjust(const struct drm_mm_node *node,
- unsigned long color,
- u64 *start,
- u64 *end)
+static void i915_ggtt_color_adjust(const struct drm_mm_node *node,
+ unsigned long color,
+ u64 *start,
+ u64 *end)
{
- if (node->allocated && node->color != color)
+ if (i915_node_color_differs(node, color))
*start += I915_GTT_PAGE_SIZE;
/* Also leave a space between the unallocated reserved node after the
@@ -2669,13 +2576,12 @@ static void i915_gtt_color_adjust(const struct drm_mm_node *node,
*end -= I915_GTT_PAGE_SIZE;
}
-int i915_gem_init_aliasing_ppgtt(struct drm_i915_private *i915)
+static int init_aliasing_ppgtt(struct i915_ggtt *ggtt)
{
- struct i915_ggtt *ggtt = &i915->ggtt;
- struct i915_hw_ppgtt *ppgtt;
+ struct i915_ppgtt *ppgtt;
int err;
- ppgtt = i915_ppgtt_create(i915, ERR_PTR(-EPERM));
+ ppgtt = i915_ppgtt_create(ggtt->vm.i915);
if (IS_ERR(ppgtt))
return PTR_ERR(ppgtt);
@@ -2694,7 +2600,8 @@ int i915_gem_init_aliasing_ppgtt(struct drm_i915_private *i915)
if (err)
goto err_ppgtt;
- i915->mm.aliasing_ppgtt = ppgtt;
+ ggtt->alias = ppgtt;
+ ggtt->vm.bind_async_flags |= ppgtt->vm.bind_async_flags;
GEM_BUG_ON(ggtt->vm.vma_ops.bind_vma != ggtt_bind_vma);
ggtt->vm.vma_ops.bind_vma = aliasing_gtt_bind_vma;
@@ -2705,26 +2612,58 @@ int i915_gem_init_aliasing_ppgtt(struct drm_i915_private *i915)
return 0;
err_ppgtt:
- i915_ppgtt_put(ppgtt);
+ i915_vm_put(&ppgtt->vm);
return err;
}
-void i915_gem_fini_aliasing_ppgtt(struct drm_i915_private *i915)
+static void fini_aliasing_ppgtt(struct i915_ggtt *ggtt)
{
- struct i915_ggtt *ggtt = &i915->ggtt;
- struct i915_hw_ppgtt *ppgtt;
+ struct i915_ppgtt *ppgtt;
- ppgtt = fetch_and_zero(&i915->mm.aliasing_ppgtt);
+ ppgtt = fetch_and_zero(&ggtt->alias);
if (!ppgtt)
return;
- i915_ppgtt_put(ppgtt);
+ i915_vm_put(&ppgtt->vm);
ggtt->vm.vma_ops.bind_vma = ggtt_bind_vma;
ggtt->vm.vma_ops.unbind_vma = ggtt_unbind_vma;
}
-int i915_gem_init_ggtt(struct drm_i915_private *dev_priv)
+static int ggtt_reserve_guc_top(struct i915_ggtt *ggtt)
+{
+ u64 size;
+ int ret;
+
+ if (!USES_GUC(ggtt->vm.i915))
+ return 0;
+
+ GEM_BUG_ON(ggtt->vm.total <= GUC_GGTT_TOP);
+ size = ggtt->vm.total - GUC_GGTT_TOP;
+
+ ret = i915_gem_gtt_reserve(&ggtt->vm, &ggtt->uc_fw, size,
+ GUC_GGTT_TOP, I915_COLOR_UNEVICTABLE,
+ PIN_NOEVICT);
+ if (ret)
+ DRM_DEBUG_DRIVER("Failed to reserve top of GGTT for GuC\n");
+
+ return ret;
+}
+
+static void ggtt_release_guc_top(struct i915_ggtt *ggtt)
+{
+ if (drm_mm_node_allocated(&ggtt->uc_fw))
+ drm_mm_remove_node(&ggtt->uc_fw);
+}
+
+static void cleanup_init_ggtt(struct i915_ggtt *ggtt)
+{
+ ggtt_release_guc_top(ggtt);
+ if (drm_mm_node_allocated(&ggtt->error_capture))
+ drm_mm_remove_node(&ggtt->error_capture);
+}
+
+static int init_ggtt(struct i915_ggtt *ggtt)
{
/* Let GEM Manage all of the aperture.
*
@@ -2735,7 +2674,6 @@ int i915_gem_init_ggtt(struct drm_i915_private *dev_priv)
* aperture. One page should be enough to keep any prefetching inside
* of the aperture.
*/
- struct i915_ggtt *ggtt = &dev_priv->ggtt;
unsigned long hole_start, hole_end;
struct drm_mm_node *entry;
int ret;
@@ -2747,19 +2685,30 @@ int i915_gem_init_ggtt(struct drm_i915_private *dev_priv)
* why.
*/
ggtt->pin_bias = max_t(u32, I915_GTT_PAGE_SIZE,
- intel_guc_reserved_gtt_size(&dev_priv->guc));
+ intel_wopcm_guc_size(&ggtt->vm.i915->wopcm));
- ret = intel_vgt_balloon(dev_priv);
+ ret = intel_vgt_balloon(ggtt);
if (ret)
return ret;
- /* Reserve a mappable slot for our lockless error capture */
- ret = drm_mm_insert_node_in_range(&ggtt->vm.mm, &ggtt->error_capture,
- PAGE_SIZE, 0, I915_COLOR_UNEVICTABLE,
- 0, ggtt->mappable_end,
- DRM_MM_INSERT_LOW);
+ if (ggtt->mappable_end) {
+ /* Reserve a mappable slot for our lockless error capture */
+ ret = drm_mm_insert_node_in_range(&ggtt->vm.mm, &ggtt->error_capture,
+ PAGE_SIZE, 0, I915_COLOR_UNEVICTABLE,
+ 0, ggtt->mappable_end,
+ DRM_MM_INSERT_LOW);
+ if (ret)
+ return ret;
+ }
+
+ /*
+ * The upper portion of the GuC address space has a sizeable hole
+ * (several MB) that is inaccessible by GuC. Reserve this range within
+ * GGTT as it can comfortably hold GuC/HuC firmware images.
+ */
+ ret = ggtt_reserve_guc_top(ggtt);
if (ret)
- return ret;
+ goto err;
/* Clear any non-preallocated blocks */
drm_mm_for_each_hole(entry, &ggtt->vm.mm, hole_start, hole_end) {
@@ -2772,59 +2721,78 @@ int i915_gem_init_ggtt(struct drm_i915_private *dev_priv)
/* And finally clear the reserved guard page */
ggtt->vm.clear_range(&ggtt->vm, ggtt->vm.total - PAGE_SIZE, PAGE_SIZE);
- if (INTEL_PPGTT(dev_priv) == INTEL_PPGTT_ALIASING) {
- ret = i915_gem_init_aliasing_ppgtt(dev_priv);
- if (ret)
- goto err;
- }
-
return 0;
err:
- drm_mm_remove_node(&ggtt->error_capture);
+ cleanup_init_ggtt(ggtt);
return ret;
}
-/**
- * i915_ggtt_cleanup_hw - Clean up GGTT hardware initialization
- * @dev_priv: i915 device
- */
-void i915_ggtt_cleanup_hw(struct drm_i915_private *dev_priv)
+int i915_init_ggtt(struct drm_i915_private *i915)
+{
+ int ret;
+
+ ret = init_ggtt(&i915->ggtt);
+ if (ret)
+ return ret;
+
+ if (INTEL_PPGTT(i915) == INTEL_PPGTT_ALIASING) {
+ ret = init_aliasing_ppgtt(&i915->ggtt);
+ if (ret)
+ cleanup_init_ggtt(&i915->ggtt);
+ }
+
+ return 0;
+}
+
+static void ggtt_cleanup_hw(struct i915_ggtt *ggtt)
{
- struct i915_ggtt *ggtt = &dev_priv->ggtt;
struct i915_vma *vma, *vn;
- struct pagevec *pvec;
- ggtt->vm.closed = true;
+ atomic_set(&ggtt->vm.open, 0);
- mutex_lock(&dev_priv->drm.struct_mutex);
- i915_gem_fini_aliasing_ppgtt(dev_priv);
+ rcu_barrier(); /* flush the RCU'ed__i915_vm_release */
+ flush_workqueue(ggtt->vm.i915->wq);
+
+ mutex_lock(&ggtt->vm.mutex);
list_for_each_entry_safe(vma, vn, &ggtt->vm.bound_list, vm_link)
- WARN_ON(i915_vma_unbind(vma));
+ WARN_ON(__i915_vma_unbind(vma));
if (drm_mm_node_allocated(&ggtt->error_capture))
drm_mm_remove_node(&ggtt->error_capture);
- if (drm_mm_initialized(&ggtt->vm.mm)) {
- intel_vgt_deballoon(dev_priv);
- i915_address_space_fini(&ggtt->vm);
- }
+ ggtt_release_guc_top(ggtt);
+ intel_vgt_deballoon(ggtt);
ggtt->vm.cleanup(&ggtt->vm);
- pvec = &dev_priv->mm.wc_stash.pvec;
+ mutex_unlock(&ggtt->vm.mutex);
+ i915_address_space_fini(&ggtt->vm);
+
+ arch_phys_wc_del(ggtt->mtrr);
+
+ if (ggtt->iomap.size)
+ io_mapping_fini(&ggtt->iomap);
+}
+
+/**
+ * i915_ggtt_driver_release - Clean up GGTT hardware initialization
+ * @i915: i915 device
+ */
+void i915_ggtt_driver_release(struct drm_i915_private *i915)
+{
+ struct pagevec *pvec;
+
+ fini_aliasing_ppgtt(&i915->ggtt);
+
+ ggtt_cleanup_hw(&i915->ggtt);
+
+ pvec = &i915->mm.wc_stash.pvec;
if (pvec->nr) {
set_pages_array_wb(pvec->pages, pvec->nr);
__pagevec_release(pvec);
}
-
- mutex_unlock(&dev_priv->drm.struct_mutex);
-
- arch_phys_wc_del(ggtt->mtrr);
- io_mapping_fini(&ggtt->iomap);
-
- i915_gem_cleanup_stolen(dev_priv);
}
static unsigned int gen6_get_total_gtt_size(u16 snb_gmch_ctl)
@@ -2895,243 +2863,77 @@ static int ggtt_probe_common(struct i915_ggtt *ggtt, u64 size)
return ret;
}
- ggtt->vm.scratch_pte =
- ggtt->vm.pte_encode(ggtt->vm.scratch_page.daddr,
+ ggtt->vm.scratch[0].encode =
+ ggtt->vm.pte_encode(px_dma(&ggtt->vm.scratch[0]),
I915_CACHE_NONE, 0);
return 0;
}
-static struct intel_ppat_entry *
-__alloc_ppat_entry(struct intel_ppat *ppat, unsigned int index, u8 value)
-{
- struct intel_ppat_entry *entry = &ppat->entries[index];
-
- GEM_BUG_ON(index >= ppat->max_entries);
- GEM_BUG_ON(test_bit(index, ppat->used));
-
- entry->ppat = ppat;
- entry->value = value;
- kref_init(&entry->ref);
- set_bit(index, ppat->used);
- set_bit(index, ppat->dirty);
-
- return entry;
-}
-
-static void __free_ppat_entry(struct intel_ppat_entry *entry)
-{
- struct intel_ppat *ppat = entry->ppat;
- unsigned int index = entry - ppat->entries;
-
- GEM_BUG_ON(index >= ppat->max_entries);
- GEM_BUG_ON(!test_bit(index, ppat->used));
-
- entry->value = ppat->clear_value;
- clear_bit(index, ppat->used);
- set_bit(index, ppat->dirty);
-}
-
-/**
- * intel_ppat_get - get a usable PPAT entry
- * @i915: i915 device instance
- * @value: the PPAT value required by the caller
- *
- * The function tries to search if there is an existing PPAT entry which
- * matches with the required value. If perfectly matched, the existing PPAT
- * entry will be used. If only partially matched, it will try to check if
- * there is any available PPAT index. If yes, it will allocate a new PPAT
- * index for the required entry and update the HW. If not, the partially
- * matched entry will be used.
- */
-const struct intel_ppat_entry *
-intel_ppat_get(struct drm_i915_private *i915, u8 value)
-{
- struct intel_ppat *ppat = &i915->ppat;
- struct intel_ppat_entry *entry = NULL;
- unsigned int scanned, best_score;
- int i;
-
- GEM_BUG_ON(!ppat->max_entries);
-
- scanned = best_score = 0;
- for_each_set_bit(i, ppat->used, ppat->max_entries) {
- unsigned int score;
-
- score = ppat->match(ppat->entries[i].value, value);
- if (score > best_score) {
- entry = &ppat->entries[i];
- if (score == INTEL_PPAT_PERFECT_MATCH) {
- kref_get(&entry->ref);
- return entry;
- }
- best_score = score;
- }
- scanned++;
- }
-
- if (scanned == ppat->max_entries) {
- if (!entry)
- return ERR_PTR(-ENOSPC);
-
- kref_get(&entry->ref);
- return entry;
- }
-
- i = find_first_zero_bit(ppat->used, ppat->max_entries);
- entry = __alloc_ppat_entry(ppat, i, value);
- ppat->update_hw(i915);
- return entry;
-}
-
-static void release_ppat(struct kref *kref)
-{
- struct intel_ppat_entry *entry =
- container_of(kref, struct intel_ppat_entry, ref);
- struct drm_i915_private *i915 = entry->ppat->i915;
-
- __free_ppat_entry(entry);
- entry->ppat->update_hw(i915);
-}
-
-/**
- * intel_ppat_put - put back the PPAT entry got from intel_ppat_get()
- * @entry: an intel PPAT entry
- *
- * Put back the PPAT entry got from intel_ppat_get(). If the PPAT index of the
- * entry is dynamically allocated, its reference count will be decreased. Once
- * the reference count becomes into zero, the PPAT index becomes free again.
- */
-void intel_ppat_put(const struct intel_ppat_entry *entry)
-{
- struct intel_ppat *ppat = entry->ppat;
- unsigned int index = entry - ppat->entries;
-
- GEM_BUG_ON(!ppat->max_entries);
-
- kref_put(&ppat->entries[index].ref, release_ppat);
-}
-
-static void cnl_private_pat_update_hw(struct drm_i915_private *dev_priv)
-{
- struct intel_ppat *ppat = &dev_priv->ppat;
- int i;
-
- for_each_set_bit(i, ppat->dirty, ppat->max_entries) {
- I915_WRITE(GEN10_PAT_INDEX(i), ppat->entries[i].value);
- clear_bit(i, ppat->dirty);
- }
-}
-
-static void bdw_private_pat_update_hw(struct drm_i915_private *dev_priv)
-{
- struct intel_ppat *ppat = &dev_priv->ppat;
- u64 pat = 0;
- int i;
-
- for (i = 0; i < ppat->max_entries; i++)
- pat |= GEN8_PPAT(i, ppat->entries[i].value);
-
- bitmap_clear(ppat->dirty, 0, ppat->max_entries);
-
- I915_WRITE(GEN8_PRIVATE_PAT_LO, lower_32_bits(pat));
- I915_WRITE(GEN8_PRIVATE_PAT_HI, upper_32_bits(pat));
-}
-
-static unsigned int bdw_private_pat_match(u8 src, u8 dst)
-{
- unsigned int score = 0;
- enum {
- AGE_MATCH = BIT(0),
- TC_MATCH = BIT(1),
- CA_MATCH = BIT(2),
- };
-
- /* Cache attribute has to be matched. */
- if (GEN8_PPAT_GET_CA(src) != GEN8_PPAT_GET_CA(dst))
- return 0;
-
- score |= CA_MATCH;
-
- if (GEN8_PPAT_GET_TC(src) == GEN8_PPAT_GET_TC(dst))
- score |= TC_MATCH;
-
- if (GEN8_PPAT_GET_AGE(src) == GEN8_PPAT_GET_AGE(dst))
- score |= AGE_MATCH;
-
- if (score == (AGE_MATCH | TC_MATCH | CA_MATCH))
- return INTEL_PPAT_PERFECT_MATCH;
-
- return score;
-}
-
-static unsigned int chv_private_pat_match(u8 src, u8 dst)
-{
- return (CHV_PPAT_GET_SNOOP(src) == CHV_PPAT_GET_SNOOP(dst)) ?
- INTEL_PPAT_PERFECT_MATCH : 0;
-}
-
-static void cnl_setup_private_ppat(struct intel_ppat *ppat)
-{
- ppat->max_entries = 8;
- ppat->update_hw = cnl_private_pat_update_hw;
- ppat->match = bdw_private_pat_match;
- ppat->clear_value = GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(3);
-
- __alloc_ppat_entry(ppat, 0, GEN8_PPAT_WB | GEN8_PPAT_LLC);
- __alloc_ppat_entry(ppat, 1, GEN8_PPAT_WC | GEN8_PPAT_LLCELLC);
- __alloc_ppat_entry(ppat, 2, GEN8_PPAT_WT | GEN8_PPAT_LLCELLC);
- __alloc_ppat_entry(ppat, 3, GEN8_PPAT_UC);
- __alloc_ppat_entry(ppat, 4, GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(0));
- __alloc_ppat_entry(ppat, 5, GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(1));
- __alloc_ppat_entry(ppat, 6, GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(2));
- __alloc_ppat_entry(ppat, 7, GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(3));
+static void tgl_setup_private_ppat(struct intel_uncore *uncore)
+{
+ /* TGL doesn't support LLC or AGE settings */
+ intel_uncore_write(uncore, GEN12_PAT_INDEX(0), GEN8_PPAT_WB);
+ intel_uncore_write(uncore, GEN12_PAT_INDEX(1), GEN8_PPAT_WC);
+ intel_uncore_write(uncore, GEN12_PAT_INDEX(2), GEN8_PPAT_WT);
+ intel_uncore_write(uncore, GEN12_PAT_INDEX(3), GEN8_PPAT_UC);
+ intel_uncore_write(uncore, GEN12_PAT_INDEX(4), GEN8_PPAT_WB);
+ intel_uncore_write(uncore, GEN12_PAT_INDEX(5), GEN8_PPAT_WB);
+ intel_uncore_write(uncore, GEN12_PAT_INDEX(6), GEN8_PPAT_WB);
+ intel_uncore_write(uncore, GEN12_PAT_INDEX(7), GEN8_PPAT_WB);
+}
+
+static void cnl_setup_private_ppat(struct intel_uncore *uncore)
+{
+ intel_uncore_write(uncore,
+ GEN10_PAT_INDEX(0),
+ GEN8_PPAT_WB | GEN8_PPAT_LLC);
+ intel_uncore_write(uncore,
+ GEN10_PAT_INDEX(1),
+ GEN8_PPAT_WC | GEN8_PPAT_LLCELLC);
+ intel_uncore_write(uncore,
+ GEN10_PAT_INDEX(2),
+ GEN8_PPAT_WT | GEN8_PPAT_LLCELLC);
+ intel_uncore_write(uncore,
+ GEN10_PAT_INDEX(3),
+ GEN8_PPAT_UC);
+ intel_uncore_write(uncore,
+ GEN10_PAT_INDEX(4),
+ GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(0));
+ intel_uncore_write(uncore,
+ GEN10_PAT_INDEX(5),
+ GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(1));
+ intel_uncore_write(uncore,
+ GEN10_PAT_INDEX(6),
+ GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(2));
+ intel_uncore_write(uncore,
+ GEN10_PAT_INDEX(7),
+ GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(3));
}
/* The GGTT and PPGTT need a private PPAT setup in order to handle cacheability
* bits. When using advanced contexts each context stores its own PAT, but
* writing this data shouldn't be harmful even in those cases. */
-static void bdw_setup_private_ppat(struct intel_ppat *ppat)
-{
- ppat->max_entries = 8;
- ppat->update_hw = bdw_private_pat_update_hw;
- ppat->match = bdw_private_pat_match;
- ppat->clear_value = GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(3);
-
- if (!HAS_PPGTT(ppat->i915)) {
- /* Spec: "For GGTT, there is NO pat_sel[2:0] from the entry,
- * so RTL will always use the value corresponding to
- * pat_sel = 000".
- * So let's disable cache for GGTT to avoid screen corruptions.
- * MOCS still can be used though.
- * - System agent ggtt writes (i.e. cpu gtt mmaps) already work
- * before this patch, i.e. the same uncached + snooping access
- * like on gen6/7 seems to be in effect.
- * - So this just fixes blitter/render access. Again it looks
- * like it's not just uncached access, but uncached + snooping.
- * So we can still hold onto all our assumptions wrt cpu
- * clflushing on LLC machines.
- */
- __alloc_ppat_entry(ppat, 0, GEN8_PPAT_UC);
- return;
- }
+static void bdw_setup_private_ppat(struct intel_uncore *uncore)
+{
+ u64 pat;
+
+ pat = GEN8_PPAT(0, GEN8_PPAT_WB | GEN8_PPAT_LLC) | /* for normal objects, no eLLC */
+ GEN8_PPAT(1, GEN8_PPAT_WC | GEN8_PPAT_LLCELLC) | /* for something pointing to ptes? */
+ GEN8_PPAT(2, GEN8_PPAT_WT | GEN8_PPAT_LLCELLC) | /* for scanout with eLLC */
+ GEN8_PPAT(3, GEN8_PPAT_UC) | /* Uncached objects, mostly for scanout */
+ GEN8_PPAT(4, GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(0)) |
+ GEN8_PPAT(5, GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(1)) |
+ GEN8_PPAT(6, GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(2)) |
+ GEN8_PPAT(7, GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(3));
- __alloc_ppat_entry(ppat, 0, GEN8_PPAT_WB | GEN8_PPAT_LLC); /* for normal objects, no eLLC */
- __alloc_ppat_entry(ppat, 1, GEN8_PPAT_WC | GEN8_PPAT_LLCELLC); /* for something pointing to ptes? */
- __alloc_ppat_entry(ppat, 2, GEN8_PPAT_WT | GEN8_PPAT_LLCELLC); /* for scanout with eLLC */
- __alloc_ppat_entry(ppat, 3, GEN8_PPAT_UC); /* Uncached objects, mostly for scanout */
- __alloc_ppat_entry(ppat, 4, GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(0));
- __alloc_ppat_entry(ppat, 5, GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(1));
- __alloc_ppat_entry(ppat, 6, GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(2));
- __alloc_ppat_entry(ppat, 7, GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(3));
+ intel_uncore_write(uncore, GEN8_PRIVATE_PAT_LO, lower_32_bits(pat));
+ intel_uncore_write(uncore, GEN8_PRIVATE_PAT_HI, upper_32_bits(pat));
}
-static void chv_setup_private_ppat(struct intel_ppat *ppat)
+static void chv_setup_private_ppat(struct intel_uncore *uncore)
{
- ppat->max_entries = 8;
- ppat->update_hw = bdw_private_pat_update_hw;
- ppat->match = chv_private_pat_match;
- ppat->clear_value = CHV_PPAT_SNOOP;
+ u64 pat;
/*
* Map WB on BDW to snooped on CHV.
@@ -3152,14 +2954,17 @@ static void chv_setup_private_ppat(struct intel_ppat *ppat)
* in order to keep the global status page working.
*/
- __alloc_ppat_entry(ppat, 0, CHV_PPAT_SNOOP);
- __alloc_ppat_entry(ppat, 1, 0);
- __alloc_ppat_entry(ppat, 2, 0);
- __alloc_ppat_entry(ppat, 3, 0);
- __alloc_ppat_entry(ppat, 4, CHV_PPAT_SNOOP);
- __alloc_ppat_entry(ppat, 5, CHV_PPAT_SNOOP);
- __alloc_ppat_entry(ppat, 6, CHV_PPAT_SNOOP);
- __alloc_ppat_entry(ppat, 7, CHV_PPAT_SNOOP);
+ pat = GEN8_PPAT(0, CHV_PPAT_SNOOP) |
+ GEN8_PPAT(1, 0) |
+ GEN8_PPAT(2, 0) |
+ GEN8_PPAT(3, 0) |
+ GEN8_PPAT(4, CHV_PPAT_SNOOP) |
+ GEN8_PPAT(5, CHV_PPAT_SNOOP) |
+ GEN8_PPAT(6, CHV_PPAT_SNOOP) |
+ GEN8_PPAT(7, CHV_PPAT_SNOOP);
+
+ intel_uncore_write(uncore, GEN8_PRIVATE_PAT_LO, lower_32_bits(pat));
+ intel_uncore_write(uncore, GEN8_PRIVATE_PAT_HI, upper_32_bits(pat));
}
static void gen6_gmch_remove(struct i915_address_space *vm)
@@ -3170,29 +2975,26 @@ static void gen6_gmch_remove(struct i915_address_space *vm)
cleanup_scratch_page(vm);
}
-static void setup_private_pat(struct drm_i915_private *dev_priv)
+static void setup_private_pat(struct intel_uncore *uncore)
{
- struct intel_ppat *ppat = &dev_priv->ppat;
- int i;
+ struct drm_i915_private *i915 = uncore->i915;
- ppat->i915 = dev_priv;
+ GEM_BUG_ON(INTEL_GEN(i915) < 8);
- if (INTEL_GEN(dev_priv) >= 10)
- cnl_setup_private_ppat(ppat);
- else if (IS_CHERRYVIEW(dev_priv) || IS_GEN9_LP(dev_priv))
- chv_setup_private_ppat(ppat);
+ if (INTEL_GEN(i915) >= 12)
+ tgl_setup_private_ppat(uncore);
+ else if (INTEL_GEN(i915) >= 10)
+ cnl_setup_private_ppat(uncore);
+ else if (IS_CHERRYVIEW(i915) || IS_GEN9_LP(i915))
+ chv_setup_private_ppat(uncore);
else
- bdw_setup_private_ppat(ppat);
-
- GEM_BUG_ON(ppat->max_entries > INTEL_MAX_PPAT_ENTRIES);
-
- for_each_clear_bit(i, ppat->used, ppat->max_entries) {
- ppat->entries[i].value = ppat->clear_value;
- ppat->entries[i].ppat = ppat;
- set_bit(i, ppat->dirty);
- }
+ bdw_setup_private_ppat(uncore);
+}
- ppat->update_hw(dev_priv);
+static struct resource pci_resource(struct pci_dev *pdev, int bar)
+{
+ return (struct resource)DEFINE_RES_MEM(pci_resource_start(pdev, bar),
+ pci_resource_len(pdev, bar));
}
static int gen8_gmch_probe(struct i915_ggtt *ggtt)
@@ -3204,10 +3006,10 @@ static int gen8_gmch_probe(struct i915_ggtt *ggtt)
int err;
/* TODO: We're not aware of mappable constraints on gen8 yet */
- ggtt->gmadr =
- (struct resource) DEFINE_RES_MEM(pci_resource_start(pdev, 2),
- pci_resource_len(pdev, 2));
- ggtt->mappable_end = resource_size(&ggtt->gmadr);
+ if (!IS_DGFX(dev_priv)) {
+ ggtt->gmadr = pci_resource(pdev, 2);
+ ggtt->mappable_end = resource_size(&ggtt->gmadr);
+ }
err = pci_set_dma_mask(pdev, DMA_BIT_MASK(39));
if (!err)
@@ -3237,11 +3039,6 @@ static int gen8_gmch_probe(struct i915_ggtt *ggtt)
ggtt->vm.insert_page = bxt_vtd_ggtt_insert_page__BKL;
if (ggtt->vm.clear_range != nop_clear_range)
ggtt->vm.clear_range = bxt_vtd_ggtt_clear_range__BKL;
-
- /* Prevent recursively calling stop_machine() and deadlocks. */
- dev_info(dev_priv->drm.dev,
- "Disabling error capture for VT-d workaround\n");
- i915_disable_error_state(dev_priv, -ENODEV);
}
ggtt->invalidate = gen6_ggtt_invalidate;
@@ -3253,7 +3050,7 @@ static int gen8_gmch_probe(struct i915_ggtt *ggtt)
ggtt->vm.pte_encode = gen8_pte_encode;
- setup_private_pat(dev_priv);
+ setup_private_pat(ggtt->vm.gt->uncore);
return ggtt_probe_common(ggtt, size);
}
@@ -3289,7 +3086,9 @@ static int gen6_gmch_probe(struct i915_ggtt *ggtt)
size = gen6_get_total_gtt_size(snb_gmch_ctl);
ggtt->vm.total = (size / sizeof(gen6_pte_t)) * I915_GTT_PAGE_SIZE;
- ggtt->vm.clear_range = gen6_ggtt_clear_range;
+ ggtt->vm.clear_range = nop_clear_range;
+ if (!HAS_FULL_PPGTT(dev_priv) || intel_scanout_needs_vtd_wa(dev_priv))
+ ggtt->vm.clear_range = gen6_ggtt_clear_range;
ggtt->vm.insert_page = gen6_ggtt_insert_page;
ggtt->vm.insert_entries = gen6_ggtt_insert_entries;
ggtt->vm.cleanup = gen6_gmch_remove;
@@ -3352,43 +3151,30 @@ static int i915_gmch_probe(struct i915_ggtt *ggtt)
ggtt->vm.vma_ops.clear_pages = clear_pages;
if (unlikely(ggtt->do_idle_maps))
- DRM_INFO("applying Ironlake quirks for intel_iommu\n");
+ dev_notice(dev_priv->drm.dev,
+ "Applying Ironlake quirks for intel_iommu\n");
return 0;
}
-/**
- * i915_ggtt_probe_hw - Probe GGTT hardware location
- * @dev_priv: i915 device
- */
-int i915_ggtt_probe_hw(struct drm_i915_private *dev_priv)
+static int ggtt_probe_hw(struct i915_ggtt *ggtt, struct intel_gt *gt)
{
- struct i915_ggtt *ggtt = &dev_priv->ggtt;
+ struct drm_i915_private *i915 = gt->i915;
int ret;
- ggtt->vm.i915 = dev_priv;
- ggtt->vm.dma = &dev_priv->drm.pdev->dev;
+ ggtt->vm.gt = gt;
+ ggtt->vm.i915 = i915;
+ ggtt->vm.dma = &i915->drm.pdev->dev;
- if (INTEL_GEN(dev_priv) <= 5)
+ if (INTEL_GEN(i915) <= 5)
ret = i915_gmch_probe(ggtt);
- else if (INTEL_GEN(dev_priv) < 8)
+ else if (INTEL_GEN(i915) < 8)
ret = gen6_gmch_probe(ggtt);
else
ret = gen8_gmch_probe(ggtt);
if (ret)
return ret;
- /* Trim the GGTT to fit the GuC mappable upper range (when enabled).
- * This is easier than doing range restriction on the fly, as we
- * currently don't have any bits spare to pass in this upper
- * restriction!
- */
- if (USES_GUC(dev_priv)) {
- ggtt->vm.total = min_t(u64, ggtt->vm.total, GUC_GGTT_TOP);
- ggtt->mappable_end =
- min_t(u64, ggtt->mappable_end, ggtt->vm.total);
- }
-
if ((ggtt->vm.total - 1) >> 32) {
DRM_ERROR("We never expected a Global GTT with more than 32bits"
" of address space! Found %lldM!\n",
@@ -3410,62 +3196,79 @@ int i915_ggtt_probe_hw(struct drm_i915_private *dev_priv)
DRM_DEBUG_DRIVER("GMADR size = %lluM\n", (u64)ggtt->mappable_end >> 20);
DRM_DEBUG_DRIVER("DSM size = %lluM\n",
(u64)resource_size(&intel_graphics_stolen_res) >> 20);
- if (intel_vtd_active())
- DRM_INFO("VT-d active for gfx access\n");
return 0;
}
/**
- * i915_ggtt_init_hw - Initialize GGTT hardware
- * @dev_priv: i915 device
+ * i915_ggtt_probe_hw - Probe GGTT hardware location
+ * @i915: i915 device
*/
-int i915_ggtt_init_hw(struct drm_i915_private *dev_priv)
+int i915_ggtt_probe_hw(struct drm_i915_private *i915)
{
- struct i915_ggtt *ggtt = &dev_priv->ggtt;
int ret;
- stash_init(&dev_priv->mm.wc_stash);
+ ret = ggtt_probe_hw(&i915->ggtt, &i915->gt);
+ if (ret)
+ return ret;
+
+ if (intel_vtd_active())
+ dev_info(i915->drm.dev, "VT-d active for gfx access\n");
+
+ return 0;
+}
+
+static int ggtt_init_hw(struct i915_ggtt *ggtt)
+{
+ struct drm_i915_private *i915 = ggtt->vm.i915;
- /* Note that we use page colouring to enforce a guard page at the
- * end of the address space. This is required as the CS may prefetch
- * beyond the end of the batch buffer, across the page boundary,
- * and beyond the end of the GTT if we do not provide a guard.
- */
- mutex_lock(&dev_priv->drm.struct_mutex);
i915_address_space_init(&ggtt->vm, VM_CLASS_GGTT);
ggtt->vm.is_ggtt = true;
/* Only VLV supports read-only GGTT mappings */
- ggtt->vm.has_read_only = IS_VALLEYVIEW(dev_priv);
+ ggtt->vm.has_read_only = IS_VALLEYVIEW(i915);
- if (!HAS_LLC(dev_priv) && !HAS_PPGTT(dev_priv))
- ggtt->vm.mm.color_adjust = i915_gtt_color_adjust;
- mutex_unlock(&dev_priv->drm.struct_mutex);
+ if (!HAS_LLC(i915) && !HAS_PPGTT(i915))
+ ggtt->vm.mm.color_adjust = i915_ggtt_color_adjust;
- if (!io_mapping_init_wc(&dev_priv->ggtt.iomap,
- dev_priv->ggtt.gmadr.start,
- dev_priv->ggtt.mappable_end)) {
- ret = -EIO;
- goto out_gtt_cleanup;
+ if (ggtt->mappable_end) {
+ if (!io_mapping_init_wc(&ggtt->iomap,
+ ggtt->gmadr.start,
+ ggtt->mappable_end)) {
+ ggtt->vm.cleanup(&ggtt->vm);
+ return -EIO;
+ }
+
+ ggtt->mtrr = arch_phys_wc_add(ggtt->gmadr.start,
+ ggtt->mappable_end);
}
- ggtt->mtrr = arch_phys_wc_add(ggtt->gmadr.start, ggtt->mappable_end);
+ i915_ggtt_init_fences(ggtt);
- /*
- * Initialise stolen early so that we may reserve preallocated
- * objects for the BIOS to KMS transition.
+ return 0;
+}
+
+/**
+ * i915_ggtt_init_hw - Initialize GGTT hardware
+ * @dev_priv: i915 device
+ */
+int i915_ggtt_init_hw(struct drm_i915_private *dev_priv)
+{
+ int ret;
+
+ stash_init(&dev_priv->mm.wc_stash);
+
+ /* Note that we use page colouring to enforce a guard page at the
+ * end of the address space. This is required as the CS may prefetch
+ * beyond the end of the batch buffer, across the page boundary,
+ * and beyond the end of the GTT if we do not provide a guard.
*/
- ret = i915_gem_init_stolen(dev_priv);
+ ret = ggtt_init_hw(&dev_priv->ggtt);
if (ret)
- goto out_gtt_cleanup;
+ return ret;
return 0;
-
-out_gtt_cleanup:
- ggtt->vm.cleanup(&ggtt->vm);
- return ret;
}
int i915_ggtt_enable_hw(struct drm_i915_private *dev_priv)
@@ -3476,76 +3279,82 @@ int i915_ggtt_enable_hw(struct drm_i915_private *dev_priv)
return 0;
}
-void i915_ggtt_enable_guc(struct drm_i915_private *i915)
+void i915_ggtt_enable_guc(struct i915_ggtt *ggtt)
{
- GEM_BUG_ON(i915->ggtt.invalidate != gen6_ggtt_invalidate);
+ GEM_BUG_ON(ggtt->invalidate != gen6_ggtt_invalidate);
- i915->ggtt.invalidate = guc_ggtt_invalidate;
+ ggtt->invalidate = guc_ggtt_invalidate;
- i915_ggtt_invalidate(i915);
+ ggtt->invalidate(ggtt);
}
-void i915_ggtt_disable_guc(struct drm_i915_private *i915)
+void i915_ggtt_disable_guc(struct i915_ggtt *ggtt)
{
/* XXX Temporary pardon for error unload */
- if (i915->ggtt.invalidate == gen6_ggtt_invalidate)
+ if (ggtt->invalidate == gen6_ggtt_invalidate)
return;
/* We should only be called after i915_ggtt_enable_guc() */
- GEM_BUG_ON(i915->ggtt.invalidate != guc_ggtt_invalidate);
+ GEM_BUG_ON(ggtt->invalidate != guc_ggtt_invalidate);
- i915->ggtt.invalidate = gen6_ggtt_invalidate;
+ ggtt->invalidate = gen6_ggtt_invalidate;
- i915_ggtt_invalidate(i915);
+ ggtt->invalidate(ggtt);
}
-void i915_gem_restore_gtt_mappings(struct drm_i915_private *dev_priv)
+static void ggtt_restore_mappings(struct i915_ggtt *ggtt)
{
- struct i915_ggtt *ggtt = &dev_priv->ggtt;
struct i915_vma *vma, *vn;
+ bool flush = false;
+ int open;
- i915_check_and_clear_faults(dev_priv);
+ intel_gt_check_and_clear_faults(ggtt->vm.gt);
mutex_lock(&ggtt->vm.mutex);
/* First fill our portion of the GTT with scratch pages */
ggtt->vm.clear_range(&ggtt->vm, 0, ggtt->vm.total);
- ggtt->vm.closed = true; /* skip rewriting PTE on VMA unbind */
+
+ /* Skip rewriting PTE on VMA unbind. */
+ open = atomic_xchg(&ggtt->vm.open, 0);
/* clflush objects bound into the GGTT and rebind them. */
list_for_each_entry_safe(vma, vn, &ggtt->vm.bound_list, vm_link) {
struct drm_i915_gem_object *obj = vma->obj;
- if (!(vma->flags & I915_VMA_GLOBAL_BIND))
+ if (!i915_vma_is_bound(vma, I915_VMA_GLOBAL_BIND))
continue;
- mutex_unlock(&ggtt->vm.mutex);
-
- if (!i915_vma_unbind(vma))
- goto lock;
+ if (!__i915_vma_unbind(vma))
+ continue;
+ clear_bit(I915_VMA_GLOBAL_BIND_BIT, __i915_vma_flags(vma));
WARN_ON(i915_vma_bind(vma,
obj ? obj->cache_level : 0,
- PIN_UPDATE));
- if (obj)
- WARN_ON(i915_gem_object_set_to_gtt_domain(obj, false));
-
-lock:
- mutex_lock(&ggtt->vm.mutex);
+ PIN_GLOBAL, NULL));
+ if (obj) { /* only used during resume => exclusive access */
+ flush |= fetch_and_zero(&obj->write_domain);
+ obj->read_domains |= I915_GEM_DOMAIN_GTT;
+ }
}
- ggtt->vm.closed = false;
- i915_ggtt_invalidate(dev_priv);
+ atomic_set(&ggtt->vm.open, open);
+ ggtt->invalidate(ggtt);
mutex_unlock(&ggtt->vm.mutex);
- if (INTEL_GEN(dev_priv) >= 8) {
- struct intel_ppat *ppat = &dev_priv->ppat;
+ if (flush)
+ wbinvd_on_all_cpus();
+}
- bitmap_set(ppat->dirty, 0, ppat->max_entries);
- dev_priv->ppat.update_hw(dev_priv);
- return;
- }
+void i915_gem_restore_gtt_mappings(struct drm_i915_private *i915)
+{
+ struct i915_ggtt *ggtt = &i915->ggtt;
+
+ ggtt_restore_mappings(ggtt);
+
+ if (INTEL_GEN(i915) >= 8)
+ setup_private_pat(ggtt->vm.gt->uncore);
}
static struct scatterlist *
@@ -3617,6 +3426,89 @@ err_st_alloc:
return ERR_PTR(ret);
}
+static struct scatterlist *
+remap_pages(struct drm_i915_gem_object *obj, unsigned int offset,
+ unsigned int width, unsigned int height,
+ unsigned int stride,
+ struct sg_table *st, struct scatterlist *sg)
+{
+ unsigned int row;
+
+ for (row = 0; row < height; row++) {
+ unsigned int left = width * I915_GTT_PAGE_SIZE;
+
+ while (left) {
+ dma_addr_t addr;
+ unsigned int length;
+
+ /* We don't need the pages, but need to initialize
+ * the entries so the sg list can be happily traversed.
+ * The only thing we need are DMA addresses.
+ */
+
+ addr = i915_gem_object_get_dma_address_len(obj, offset, &length);
+
+ length = min(left, length);
+
+ st->nents++;
+
+ sg_set_page(sg, NULL, length, 0);
+ sg_dma_address(sg) = addr;
+ sg_dma_len(sg) = length;
+ sg = sg_next(sg);
+
+ offset += length / I915_GTT_PAGE_SIZE;
+ left -= length;
+ }
+
+ offset += stride - width;
+ }
+
+ return sg;
+}
+
+static noinline struct sg_table *
+intel_remap_pages(struct intel_remapped_info *rem_info,
+ struct drm_i915_gem_object *obj)
+{
+ unsigned int size = intel_remapped_info_size(rem_info);
+ struct sg_table *st;
+ struct scatterlist *sg;
+ int ret = -ENOMEM;
+ int i;
+
+ /* Allocate target SG list. */
+ st = kmalloc(sizeof(*st), GFP_KERNEL);
+ if (!st)
+ goto err_st_alloc;
+
+ ret = sg_alloc_table(st, size, GFP_KERNEL);
+ if (ret)
+ goto err_sg_alloc;
+
+ st->nents = 0;
+ sg = st->sgl;
+
+ for (i = 0 ; i < ARRAY_SIZE(rem_info->plane); i++) {
+ sg = remap_pages(obj, rem_info->plane[i].offset,
+ rem_info->plane[i].width, rem_info->plane[i].height,
+ rem_info->plane[i].stride, st, sg);
+ }
+
+ i915_sg_trim(st);
+
+ return st;
+
+err_sg_alloc:
+ kfree(st);
+err_st_alloc:
+
+ DRM_DEBUG_DRIVER("Failed to create remapped mapping for object size %zu! (%ux%u tiles, %u pages)\n",
+ obj->base.size, rem_info->plane[0].width, rem_info->plane[0].height, size);
+
+ return ERR_PTR(ret);
+}
+
static noinline struct sg_table *
intel_partial_pages(const struct i915_ggtt_view *view,
struct drm_i915_gem_object *obj)
@@ -3695,13 +3587,18 @@ i915_get_ggtt_vma_pages(struct i915_vma *vma)
intel_rotate_pages(&vma->ggtt_view.rotated, vma->obj);
break;
+ case I915_GGTT_VIEW_REMAPPED:
+ vma->pages =
+ intel_remap_pages(&vma->ggtt_view.remapped, vma->obj);
+ break;
+
case I915_GGTT_VIEW_PARTIAL:
vma->pages = intel_partial_pages(&vma->ggtt_view, vma->obj);
break;
}
ret = 0;
- if (unlikely(IS_ERR(vma->pages))) {
+ if (IS_ERR(vma->pages)) {
ret = PTR_ERR(vma->pages);
vma->pages = NULL;
DRM_ERROR("Failed to get pages for VMA view type %u (%d)!\n",
@@ -3746,7 +3643,7 @@ int i915_gem_gtt_reserve(struct i915_address_space *vm,
GEM_BUG_ON(!IS_ALIGNED(size, I915_GTT_PAGE_SIZE));
GEM_BUG_ON(!IS_ALIGNED(offset, I915_GTT_MIN_ALIGNMENT));
GEM_BUG_ON(range_overflows(offset, size, vm->total));
- GEM_BUG_ON(vm == &vm->i915->mm.aliasing_ppgtt->vm);
+ GEM_BUG_ON(vm == &vm->i915->ggtt.alias->vm);
GEM_BUG_ON(drm_mm_node_allocated(node));
node->size = size;
@@ -3835,7 +3732,8 @@ int i915_gem_gtt_insert(struct i915_address_space *vm,
u64 offset;
int err;
- lockdep_assert_held(&vm->i915->drm.struct_mutex);
+ lockdep_assert_held(&vm->mutex);
+
GEM_BUG_ON(!size);
GEM_BUG_ON(!IS_ALIGNED(size, I915_GTT_PAGE_SIZE));
GEM_BUG_ON(alignment && !is_power_of_2(alignment));
@@ -3843,7 +3741,7 @@ int i915_gem_gtt_insert(struct i915_address_space *vm,
GEM_BUG_ON(start >= end);
GEM_BUG_ON(start > 0 && !IS_ALIGNED(start, I915_GTT_PAGE_SIZE));
GEM_BUG_ON(end < U64_MAX && !IS_ALIGNED(end, I915_GTT_PAGE_SIZE));
- GEM_BUG_ON(vm == &vm->i915->mm.aliasing_ppgtt->vm);
+ GEM_BUG_ON(vm == &vm->i915->ggtt.alias->vm);
GEM_BUG_ON(drm_mm_node_allocated(node));
if (unlikely(range_overflows(start, size, end)))
@@ -3886,7 +3784,8 @@ int i915_gem_gtt_insert(struct i915_address_space *vm,
if (flags & PIN_NOEVICT)
return -ENOSPC;
- /* No free space, pick a slot at random.
+ /*
+ * No free space, pick a slot at random.
*
* There is a pathological case here using a GTT shared between
* mmap and GPU (i.e. ggtt/aliasing_ppgtt but not full-ppgtt):
@@ -3914,6 +3813,9 @@ int i915_gem_gtt_insert(struct i915_address_space *vm,
if (err != -ENOSPC)
return err;
+ if (flags & PIN_NOSEARCH)
+ return -ENOSPC;
+
/* Randomly selected placement is pinned, do a search */
err = i915_gem_evict_something(vm, size, alignment, color,
start, end, flags);