diff options
| author | Linus Torvalds <[email protected]> | 2019-11-27 17:45:48 -0800 |
|---|---|---|
| committer | Linus Torvalds <[email protected]> | 2019-11-27 17:45:48 -0800 |
| commit | a6ed68d6468bd5a3da78a103344ded1435fed57a (patch) | |
| tree | be42a3609d7e9a2581806aab5bc1ace42f9ca992 /drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c | |
| parent | 8c39f71ee2019e77ee14f88b1321b2348db51820 (diff) | |
| parent | acc61b8929365e63a3e8c8c8913177795aa45594 (diff) | |
Merge tag 'drm-next-2019-11-27' of git://anongit.freedesktop.org/drm/drm
Pull drm updates from Dave Airlie:
"Lots of stuff in here, though it hasn't been too insane this merge
apart from dealing with the security fun.
uapi:
- export different colorspace properties on DP vs HDMI
- new fourcc for ARM 16x16 block format
- syncobj: allow querying last submitted timeline value
- DRM_FORMAT_BIG_ENDIAN defined as unsigned
core:
- allow using gem vma manager in ttm
- connector/encoder/bridge doc fixes
- allow more than 3 encoders for a connector
- displayport mst suspend/resume reprobing support
- vram lazy unmapping, uniform vram mm and gem vram
- edid cleanups + AVI informframe bar info
- displayport helpers - dpcd parser added
dp_cec:
- Allow a connector to be associated with a cec device
ttm:
- pipelining with no_gpu_wait fix
- always keep BOs on the LRU
sched:
- allow free_job routine to sleep
i915:
- Block userptr from mappable GTT
- i915 perf uapi versioning
- OA stream dynamic reconfiguration
- make context persistence optional
- introduce DRM_I915_UNSTABLE Kconfig
- add fake lmem testing under unstable
- BT.2020 support for DP MSA
- struct mutex elimination
- Tigerlake display/PLL/power management improvements
- Jasper Lake PCH support
- refactor PMU for multiple GPUs
- Icelake firmware update
- Split out vga + switcheroo code
amdgpu:
- implement dma-buf import/export without helpers
- vega20 RAS enablement
- DC i2c over aux fixes
- renoir GPU reset
- DC HDCP support
- BACO support for CI/VI asics
- MSI-X support
- Arcturus EEPROM support
- Arcturus VCN encode support
- VCN dynamic powergating on RV/RV2
amdkfd:
- add navi12/14/renoir support to kfd
radeon:
- SI dpm fix ported from amdgpu
- fix bad DMA on ppc platforms
gma500:
- memory leak fixes
qxl:
- convert to new gem mmap
exynos:
- build warning fix
komeda:
- add aclk sysfs attribute
v3d:
- userspace cleanup uapi change
i810:
- fix for underflow in dispatch ioctls
ast:
- refactor show_cursor
mgag200:
- refactor show_cursor
arcgpu:
- encoder finding improvements
mediatek:
- mipi_tx, dsi and partial crtc support for MT8183 SoC
- rotation support
meson:
- add suspend/resume support
omap:
- misc refactors
tegra:
- DisplayPort support for Tegra 210, 186 and 194.
- IOMMU-backed DMA API fixes
panfrost:
- fix lockdep issue
- simplify devfreq integration
rcar-du:
- R8A774B1 SoC support
- fixes for H2 ES2.0
sun4i:
- vcc-dsi regulator support
virtio-gpu:
- vmexit vs spinlock fix
- move to gem shmem helpers
- handle large command buffers with cma"
* tag 'drm-next-2019-11-27' of git://anongit.freedesktop.org/drm/drm: (1855 commits)
drm/amdgpu: invalidate mmhub semaphore workaround in gmc9/gmc10
drm/amdgpu: initialize vm_inv_eng0_sem for gfxhub and mmhub
drm/amd/amdgpu/sriov skip RLCG s/r list for arcturus VF.
drm/amd/amdgpu/sriov temporarily skip ras,dtm,hdcp for arcturus VF
drm/amdgpu/gfx10: re-init clear state buffer after gpu reset
merge fix for "ftrace: Rework event_create_dir()"
drm/amdgpu: Update Arcturus golden registers
drm/amdgpu/gfx10: fix out-of-bound mqd_backup array access
drm/amdgpu/gfx10: explicitly wait for cp idle after halt/unhalt
Revert "drm/amd/display: enable S/G for RAVEN chip"
drm/amdgpu: disable gfxoff on original raven
drm/amdgpu: remove experimental flag for Navi14
drm/amdgpu: disable gfxoff when using register read interface
drm/amdgpu/powerplay: properly set PP_GFXOFF_MASK (v2)
drm/amdgpu: fix bad DMA from INTERRUPT_CNTL2
drm/radeon: fix bad DMA from INTERRUPT_CNTL2
drm/amd/display: Fix debugfs on MST connectors
drm/amdgpu/nv: add asic func for fetching vbios from rom directly
drm/amdgpu: put flush_delayed_work at first
drm/amdgpu/vcn2.5: fix the enc loop with hw fini
...
Diffstat (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c')
| -rw-r--r-- | drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c | 86 |
1 files changed, 45 insertions, 41 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c index 6d021ecc8d59..ae6f5446262c 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c @@ -19,9 +19,6 @@ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR * OTHER DEALINGS IN THE SOFTWARE. */ - -#define pr_fmt(fmt) "kfd2kgd: " fmt - #include <linux/dma-buf.h> #include <linux/list.h> #include <linux/pagemap.h> @@ -33,11 +30,6 @@ #include "amdgpu_amdkfd.h" #include "amdgpu_dma_buf.h" -/* Special VM and GART address alignment needed for VI pre-Fiji due to - * a HW bug. - */ -#define VI_BO_SIZE_ALIGN (0x8000) - /* BO flag to indicate a KFD userptr BO */ #define AMDGPU_AMDKFD_USERPTR_BO (1ULL << 63) @@ -349,13 +341,46 @@ static int vm_update_pds(struct amdgpu_vm *vm, struct amdgpu_sync *sync) struct amdgpu_device *adev = amdgpu_ttm_adev(pd->tbo.bdev); int ret; - ret = amdgpu_vm_update_directories(adev, vm); + ret = amdgpu_vm_update_pdes(adev, vm, false); if (ret) return ret; return amdgpu_sync_fence(NULL, sync, vm->last_update, false); } +static uint64_t get_pte_flags(struct amdgpu_device *adev, struct kgd_mem *mem) +{ + struct amdgpu_device *bo_adev = amdgpu_ttm_adev(mem->bo->tbo.bdev); + bool coherent = mem->alloc_flags & ALLOC_MEM_FLAGS_COHERENT; + uint32_t mapping_flags; + + mapping_flags = AMDGPU_VM_PAGE_READABLE; + if (mem->alloc_flags & ALLOC_MEM_FLAGS_WRITABLE) + mapping_flags |= AMDGPU_VM_PAGE_WRITEABLE; + if (mem->alloc_flags & ALLOC_MEM_FLAGS_EXECUTABLE) + mapping_flags |= AMDGPU_VM_PAGE_EXECUTABLE; + + switch (adev->asic_type) { + case CHIP_ARCTURUS: + if (mem->alloc_flags & ALLOC_MEM_FLAGS_VRAM) { + if (bo_adev == adev) + mapping_flags |= coherent ? + AMDGPU_VM_MTYPE_CC : AMDGPU_VM_MTYPE_RW; + else + mapping_flags |= AMDGPU_VM_MTYPE_UC; + } else { + mapping_flags |= coherent ? + AMDGPU_VM_MTYPE_UC : AMDGPU_VM_MTYPE_NC; + } + break; + default: + mapping_flags |= coherent ? + AMDGPU_VM_MTYPE_UC : AMDGPU_VM_MTYPE_NC; + } + + return amdgpu_gem_va_map_flags(adev, mapping_flags); +} + /* add_bo_to_vm - Add a BO to a VM * * Everything that needs to bo done only once when a BO is first added @@ -404,8 +429,7 @@ static int add_bo_to_vm(struct amdgpu_device *adev, struct kgd_mem *mem, } bo_va_entry->va = va; - bo_va_entry->pte_flags = amdgpu_gmc_get_pte_flags(adev, - mem->mapping_flags); + bo_va_entry->pte_flags = get_pte_flags(adev, mem); bo_va_entry->kgd_dev = (void *)adev; list_add(&bo_va_entry->bo_list, list_bo_va); @@ -586,7 +610,7 @@ static int reserve_bo_and_vm(struct kgd_mem *mem, amdgpu_vm_get_pd_bo(vm, &ctx->list, &ctx->vm_pd[0]); ret = ttm_eu_reserve_buffers(&ctx->ticket, &ctx->list, - false, &ctx->duplicates, true); + false, &ctx->duplicates); if (!ret) ctx->reserved = true; else { @@ -659,7 +683,7 @@ static int reserve_bo_and_cond_vms(struct kgd_mem *mem, } ret = ttm_eu_reserve_buffers(&ctx->ticket, &ctx->list, - false, &ctx->duplicates, true); + false, &ctx->duplicates); if (!ret) ctx->reserved = true; else @@ -1079,10 +1103,8 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu( uint64_t user_addr = 0; struct amdgpu_bo *bo; struct amdgpu_bo_param bp; - int byte_align; u32 domain, alloc_domain; u64 alloc_flags; - uint32_t mapping_flags; int ret; /* @@ -1135,25 +1157,7 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu( if ((*mem)->aql_queue) size = size >> 1; - /* Workaround for TLB bug on older VI chips */ - byte_align = (adev->family == AMDGPU_FAMILY_VI && - adev->asic_type != CHIP_FIJI && - adev->asic_type != CHIP_POLARIS10 && - adev->asic_type != CHIP_POLARIS11 && - adev->asic_type != CHIP_POLARIS12 && - adev->asic_type != CHIP_VEGAM) ? - VI_BO_SIZE_ALIGN : 1; - - mapping_flags = AMDGPU_VM_PAGE_READABLE; - if (flags & ALLOC_MEM_FLAGS_WRITABLE) - mapping_flags |= AMDGPU_VM_PAGE_WRITEABLE; - if (flags & ALLOC_MEM_FLAGS_EXECUTABLE) - mapping_flags |= AMDGPU_VM_PAGE_EXECUTABLE; - if (flags & ALLOC_MEM_FLAGS_COHERENT) - mapping_flags |= AMDGPU_VM_MTYPE_UC; - else - mapping_flags |= AMDGPU_VM_MTYPE_NC; - (*mem)->mapping_flags = mapping_flags; + (*mem)->alloc_flags = flags; amdgpu_sync_create(&(*mem)->sync); @@ -1168,7 +1172,7 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu( memset(&bp, 0, sizeof(bp)); bp.size = size; - bp.byte_align = byte_align; + bp.byte_align = 1; bp.domain = alloc_domain; bp.flags = alloc_flags; bp.type = bo_type; @@ -1626,9 +1630,10 @@ int amdgpu_amdkfd_gpuvm_import_dmabuf(struct kgd_dev *kgd, INIT_LIST_HEAD(&(*mem)->bo_va_list); mutex_init(&(*mem)->lock); - (*mem)->mapping_flags = - AMDGPU_VM_PAGE_READABLE | AMDGPU_VM_PAGE_WRITEABLE | - AMDGPU_VM_PAGE_EXECUTABLE | AMDGPU_VM_MTYPE_NC; + (*mem)->alloc_flags = + ((bo->preferred_domains & AMDGPU_GEM_DOMAIN_VRAM) ? + ALLOC_MEM_FLAGS_VRAM : ALLOC_MEM_FLAGS_GTT) | + ALLOC_MEM_FLAGS_WRITABLE | ALLOC_MEM_FLAGS_EXECUTABLE; (*mem)->bo = amdgpu_bo_ref(bo); (*mem)->va = va; @@ -1797,8 +1802,7 @@ static int validate_invalid_user_pages(struct amdkfd_process_info *process_info) } /* Reserve all BOs and page tables for validation */ - ret = ttm_eu_reserve_buffers(&ticket, &resv_list, false, &duplicates, - true); + ret = ttm_eu_reserve_buffers(&ticket, &resv_list, false, &duplicates); WARN(!list_empty(&duplicates), "Duplicates should be empty"); if (ret) goto out_free; @@ -1996,7 +2000,7 @@ int amdgpu_amdkfd_gpuvm_restore_process_bos(void *info, struct dma_fence **ef) } ret = ttm_eu_reserve_buffers(&ctx.ticket, &ctx.list, - false, &duplicate_save, true); + false, &duplicate_save); if (ret) { pr_debug("Memory eviction: TTM Reserve Failed. Try again\n"); goto ttm_reserve_fail; |