diff options
-rw-r--r-- | drivers/gpu/drm/xe/xe_pt.c | 11 | ||||
-rw-r--r-- | drivers/gpu/drm/xe/xe_vm.c | 67 | ||||
-rw-r--r-- | drivers/gpu/drm/xe/xe_vm_types.h | 7 | ||||
-rw-r--r-- | include/uapi/drm/xe_drm.h | 48 |
4 files changed, 115 insertions, 18 deletions
diff --git a/drivers/gpu/drm/xe/xe_pt.c b/drivers/gpu/drm/xe/xe_pt.c index c6c9b723db5a..3b485313804a 100644 --- a/drivers/gpu/drm/xe/xe_pt.c +++ b/drivers/gpu/drm/xe/xe_pt.c @@ -290,8 +290,6 @@ struct xe_pt_stage_bind_walk { struct xe_vm *vm; /** @tile: The tile we're building for. */ struct xe_tile *tile; - /** @cache: Desired cache level for the ptes */ - enum xe_cache_level cache; /** @default_pte: PTE flag only template. No address is associated */ u64 default_pte; /** @dma_offset: DMA offset to add to the PTE. */ @@ -511,7 +509,7 @@ xe_pt_stage_bind_entry(struct xe_ptw *parent, pgoff_t offset, { struct xe_pt_stage_bind_walk *xe_walk = container_of(walk, typeof(*xe_walk), base); - u16 pat_index = tile_to_xe(xe_walk->tile)->pat.idx[xe_walk->cache]; + u16 pat_index = xe_walk->vma->pat_index; struct xe_pt *xe_parent = container_of(parent, typeof(*xe_parent), base); struct xe_vm *vm = xe_walk->vm; struct xe_pt *xe_child; @@ -657,13 +655,8 @@ xe_pt_stage_bind(struct xe_tile *tile, struct xe_vma *vma, if (is_devmem) { xe_walk.default_pte |= XE_PPGTT_PTE_DM; xe_walk.dma_offset = vram_region_gpu_offset(bo->ttm.resource); - xe_walk.cache = XE_CACHE_WB; - } else { - if (!xe_vma_has_no_bo(vma) && bo->flags & XE_BO_SCANOUT_BIT) - xe_walk.cache = XE_CACHE_WT; - else - xe_walk.cache = XE_CACHE_WB; } + if (!xe_vma_has_no_bo(vma) && xe_bo_is_stolen(bo)) xe_walk.dma_offset = xe_ttm_stolen_gpu_offset(xe_bo_device(bo)); diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c index c33ae4db4e02..a97a310123fc 100644 --- a/drivers/gpu/drm/xe/xe_vm.c +++ b/drivers/gpu/drm/xe/xe_vm.c @@ -6,6 +6,7 @@ #include "xe_vm.h" #include <linux/dma-fence-array.h> +#include <linux/nospec.h> #include <drm/drm_exec.h> #include <drm/drm_print.h> @@ -26,6 +27,7 @@ #include "xe_gt_pagefault.h" #include "xe_gt_tlb_invalidation.h" #include "xe_migrate.h" +#include "xe_pat.h" #include "xe_pm.h" #include "xe_preempt_fence.h" #include "xe_pt.h" @@ -868,7 +870,8 @@ static struct xe_vma *xe_vma_create(struct xe_vm *vm, u64 start, u64 end, bool read_only, bool is_null, - u8 tile_mask) + u8 tile_mask, + u16 pat_index) { struct xe_vma *vma; struct xe_tile *tile; @@ -910,6 +913,8 @@ static struct xe_vma *xe_vma_create(struct xe_vm *vm, if (GRAPHICS_VER(vm->xe) >= 20 || vm->xe->info.platform == XE_PVC) vma->gpuva.flags |= XE_VMA_ATOMIC_PTE_BIT; + vma->pat_index = pat_index; + if (bo) { struct drm_gpuvm_bo *vm_bo; @@ -2162,7 +2167,7 @@ static struct drm_gpuva_ops * vm_bind_ioctl_ops_create(struct xe_vm *vm, struct xe_bo *bo, u64 bo_offset_or_userptr, u64 addr, u64 range, u32 operation, u32 flags, u8 tile_mask, - u32 prefetch_region) + u32 prefetch_region, u16 pat_index) { struct drm_gem_object *obj = bo ? &bo->ttm.base : NULL; struct drm_gpuva_ops *ops; @@ -2231,6 +2236,7 @@ vm_bind_ioctl_ops_create(struct xe_vm *vm, struct xe_bo *bo, op->map.read_only = flags & DRM_XE_VM_BIND_FLAG_READONLY; op->map.is_null = flags & DRM_XE_VM_BIND_FLAG_NULL; + op->map.pat_index = pat_index; } else if (__op->op == DRM_GPUVA_OP_PREFETCH) { op->prefetch.region = prefetch_region; } @@ -2242,7 +2248,8 @@ vm_bind_ioctl_ops_create(struct xe_vm *vm, struct xe_bo *bo, } static struct xe_vma *new_vma(struct xe_vm *vm, struct drm_gpuva_op_map *op, - u8 tile_mask, bool read_only, bool is_null) + u8 tile_mask, bool read_only, bool is_null, + u16 pat_index) { struct xe_bo *bo = op->gem.obj ? gem_to_xe_bo(op->gem.obj) : NULL; struct xe_vma *vma; @@ -2258,7 +2265,7 @@ static struct xe_vma *new_vma(struct xe_vm *vm, struct drm_gpuva_op_map *op, vma = xe_vma_create(vm, bo, op->gem.offset, op->va.addr, op->va.addr + op->va.range - 1, read_only, is_null, - tile_mask); + tile_mask, pat_index); if (bo) xe_bo_unlock(bo); @@ -2404,7 +2411,7 @@ static int vm_bind_ioctl_ops_parse(struct xe_vm *vm, struct xe_exec_queue *q, vma = new_vma(vm, &op->base.map, op->tile_mask, op->map.read_only, - op->map.is_null); + op->map.is_null, op->map.pat_index); if (IS_ERR(vma)) return PTR_ERR(vma); @@ -2430,7 +2437,7 @@ static int vm_bind_ioctl_ops_parse(struct xe_vm *vm, struct xe_exec_queue *q, vma = new_vma(vm, op->base.remap.prev, op->tile_mask, read_only, - is_null); + is_null, old->pat_index); if (IS_ERR(vma)) return PTR_ERR(vma); @@ -2464,7 +2471,7 @@ static int vm_bind_ioctl_ops_parse(struct xe_vm *vm, struct xe_exec_queue *q, vma = new_vma(vm, op->base.remap.next, op->tile_mask, read_only, - is_null); + is_null, old->pat_index); if (IS_ERR(vma)) return PTR_ERR(vma); @@ -2862,6 +2869,26 @@ static int vm_bind_ioctl_check_args(struct xe_device *xe, u64 obj_offset = (*bind_ops)[i].obj_offset; u32 prefetch_region = (*bind_ops)[i].prefetch_mem_region_instance; bool is_null = flags & DRM_XE_VM_BIND_FLAG_NULL; + u16 pat_index = (*bind_ops)[i].pat_index; + u16 coh_mode; + + if (XE_IOCTL_DBG(xe, pat_index >= xe->pat.n_entries)) { + err = -EINVAL; + goto free_bind_ops; + } + + pat_index = array_index_nospec(pat_index, xe->pat.n_entries); + (*bind_ops)[i].pat_index = pat_index; + coh_mode = xe_pat_index_get_coh_mode(xe, pat_index); + if (XE_IOCTL_DBG(xe, !coh_mode)) { /* hw reserved */ + err = -EINVAL; + goto free_bind_ops; + } + + if (XE_WARN_ON(coh_mode > XE_COH_AT_LEAST_1WAY)) { + err = -EINVAL; + goto free_bind_ops; + } if (i == 0) { *async = !!(flags & DRM_XE_VM_BIND_FLAG_ASYNC); @@ -2892,6 +2919,8 @@ static int vm_bind_ioctl_check_args(struct xe_device *xe, op == DRM_XE_VM_BIND_OP_UNMAP_ALL) || XE_IOCTL_DBG(xe, obj && op == DRM_XE_VM_BIND_OP_MAP_USERPTR) || + XE_IOCTL_DBG(xe, coh_mode == XE_COH_NONE && + op == DRM_XE_VM_BIND_OP_MAP_USERPTR) || XE_IOCTL_DBG(xe, obj && op == DRM_XE_VM_BIND_OP_PREFETCH) || XE_IOCTL_DBG(xe, prefetch_region && @@ -3025,6 +3054,8 @@ int xe_vm_bind_ioctl(struct drm_device *dev, void *data, struct drm_file *file) u64 addr = bind_ops[i].addr; u32 obj = bind_ops[i].obj; u64 obj_offset = bind_ops[i].obj_offset; + u16 pat_index = bind_ops[i].pat_index; + u16 coh_mode; if (!obj) continue; @@ -3052,6 +3083,24 @@ int xe_vm_bind_ioctl(struct drm_device *dev, void *data, struct drm_file *file) goto put_obj; } } + + coh_mode = xe_pat_index_get_coh_mode(xe, pat_index); + if (bos[i]->cpu_caching) { + if (XE_IOCTL_DBG(xe, coh_mode == XE_COH_NONE && + bos[i]->cpu_caching == DRM_XE_GEM_CPU_CACHING_WB)) { + err = -EINVAL; + goto put_obj; + } + } else if (XE_IOCTL_DBG(xe, coh_mode == XE_COH_NONE)) { + /* + * Imported dma-buf from a different device should + * require 1way or 2way coherency since we don't know + * how it was mapped on the CPU. Just assume is it + * potentially cached on CPU side. + */ + err = -EINVAL; + goto put_obj; + } } if (args->num_syncs) { @@ -3079,10 +3128,12 @@ int xe_vm_bind_ioctl(struct drm_device *dev, void *data, struct drm_file *file) u64 obj_offset = bind_ops[i].obj_offset; u8 tile_mask = bind_ops[i].tile_mask; u32 prefetch_region = bind_ops[i].prefetch_mem_region_instance; + u16 pat_index = bind_ops[i].pat_index; ops[i] = vm_bind_ioctl_ops_create(vm, bos[i], obj_offset, addr, range, op, flags, - tile_mask, prefetch_region); + tile_mask, prefetch_region, + pat_index); if (IS_ERR(ops[i])) { err = PTR_ERR(ops[i]); ops[i] = NULL; diff --git a/drivers/gpu/drm/xe/xe_vm_types.h b/drivers/gpu/drm/xe/xe_vm_types.h index fc2645e07578..74cdf16a42ad 100644 --- a/drivers/gpu/drm/xe/xe_vm_types.h +++ b/drivers/gpu/drm/xe/xe_vm_types.h @@ -110,6 +110,11 @@ struct xe_vma { */ u8 tile_present; + /** + * @pat_index: The pat index to use when encoding the PTEs for this vma. + */ + u16 pat_index; + struct { struct list_head rebind_link; } notifier; @@ -333,6 +338,8 @@ struct xe_vma_op_map { bool read_only; /** @is_null: is NULL binding */ bool is_null; + /** @pat_index: The pat index to use for this operation. */ + u16 pat_index; }; /** struct xe_vma_op_remap - VMA remap operation */ diff --git a/include/uapi/drm/xe_drm.h b/include/uapi/drm/xe_drm.h index ab7d1b26c773..1a844fa7af8a 100644 --- a/include/uapi/drm/xe_drm.h +++ b/include/uapi/drm/xe_drm.h @@ -636,8 +636,54 @@ struct drm_xe_vm_bind_op { */ __u32 obj; + /** + * @pat_index: The platform defined @pat_index to use for this mapping. + * The index basically maps to some predefined memory attributes, + * including things like caching, coherency, compression etc. The exact + * meaning of the pat_index is platform specific and defined in the + * Bspec and PRMs. When the KMD sets up the binding the index here is + * encoded into the ppGTT PTE. + * + * For coherency the @pat_index needs to be at least 1way coherent when + * drm_xe_gem_create.cpu_caching is DRM_XE_GEM_CPU_CACHING_WB. The KMD + * will extract the coherency mode from the @pat_index and reject if + * there is a mismatch (see note below for pre-MTL platforms). + * + * Note: On pre-MTL platforms there is only a caching mode and no + * explicit coherency mode, but on such hardware there is always a + * shared-LLC (or is dgpu) so all GT memory accesses are coherent with + * CPU caches even with the caching mode set as uncached. It's only the + * display engine that is incoherent (on dgpu it must be in VRAM which + * is always mapped as WC on the CPU). However to keep the uapi somewhat + * consistent with newer platforms the KMD groups the different cache + * levels into the following coherency buckets on all pre-MTL platforms: + * + * ppGTT UC -> COH_NONE + * ppGTT WC -> COH_NONE + * ppGTT WT -> COH_NONE + * ppGTT WB -> COH_AT_LEAST_1WAY + * + * In practice UC/WC/WT should only ever used for scanout surfaces on + * such platforms (or perhaps in general for dma-buf if shared with + * another device) since it is only the display engine that is actually + * incoherent. Everything else should typically use WB given that we + * have a shared-LLC. On MTL+ this completely changes and the HW + * defines the coherency mode as part of the @pat_index, where + * incoherent GT access is possible. + * + * Note: For userptr and externally imported dma-buf the kernel expects + * either 1WAY or 2WAY for the @pat_index. + * + * For DRM_XE_VM_BIND_FLAG_NULL bindings there are no KMD restrictions + * on the @pat_index. For such mappings there is no actual memory being + * mapped (the address in the PTE is invalid), so the various PAT memory + * attributes likely do not apply. Simply leaving as zero is one + * option (still a valid pat_index). + */ + __u16 pat_index; + /** @pad: MBZ */ - __u32 pad; + __u16 pad; union { /** |