diff options
Diffstat (limited to 'drivers/gpu/drm/amd/amdgpu')
87 files changed, 3153 insertions, 2076 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/Makefile b/drivers/gpu/drm/amd/amdgpu/Makefile index be5e5acc3e39..2ca2b5154d52 100644 --- a/drivers/gpu/drm/amd/amdgpu/Makefile +++ b/drivers/gpu/drm/amd/amdgpu/Makefile @@ -30,7 +30,6 @@ FULL_AMD_DISPLAY_PATH = $(FULL_AMD_PATH)/$(DISPLAY_FOLDER_NAME) ccflags-y := -I$(FULL_AMD_PATH)/include/asic_reg \ -I$(FULL_AMD_PATH)/include \ -I$(FULL_AMD_PATH)/amdgpu \ - -I$(FULL_AMD_PATH)/scheduler \ -I$(FULL_AMD_PATH)/powerplay/inc \ -I$(FULL_AMD_PATH)/acp/include \ -I$(FULL_AMD_DISPLAY_PATH) \ @@ -88,8 +87,7 @@ amdgpu-y += \ # add SMC block amdgpu-y += \ - amdgpu_dpm.o \ - amdgpu_powerplay.o + amdgpu_dpm.o # add DCE block amdgpu-y += \ @@ -130,6 +128,8 @@ amdgpu-y += \ # add amdkfd interfaces amdgpu-y += \ amdgpu_amdkfd.o \ + amdgpu_amdkfd_fence.o \ + amdgpu_amdkfd_gpuvm.o \ amdgpu_amdkfd_gfx_v8.o # add cgs diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index 1ac81be374dd..f44a83ab2bf4 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -181,10 +181,6 @@ extern int amdgpu_cik_support; #define CIK_CURSOR_WIDTH 128 #define CIK_CURSOR_HEIGHT 128 -/* GPU RESET flags */ -#define AMDGPU_RESET_INFO_VRAM_LOST (1 << 0) -#define AMDGPU_RESET_INFO_FULLRESET (1 << 1) - struct amdgpu_device; struct amdgpu_ib; struct amdgpu_cs_parser; @@ -344,14 +340,6 @@ bool amdgpu_get_bios(struct amdgpu_device *adev); bool amdgpu_read_bios(struct amdgpu_device *adev); /* - * Dummy page - */ -struct amdgpu_dummy_page { - struct page *page; - dma_addr_t addr; -}; - -/* * Clocks */ @@ -453,7 +441,7 @@ struct amdgpu_sa_bo { void amdgpu_gem_force_release(struct amdgpu_device *adev); int amdgpu_gem_object_create(struct amdgpu_device *adev, unsigned long size, int alignment, u32 initial_domain, - u64 flags, bool kernel, + u64 flags, enum ttm_bo_type type, struct reservation_object *resv, struct drm_gem_object **obj); @@ -1080,7 +1068,7 @@ static inline void amdgpu_set_ib_value(struct amdgpu_cs_parser *p, /* * Writeback */ -#define AMDGPU_MAX_WB 512 /* Reserve at most 512 WB slots for amdgpu-owned rings. */ +#define AMDGPU_MAX_WB 128 /* Reserve at most 128 WB slots for amdgpu-owned rings. */ struct amdgpu_wb { struct amdgpu_bo *wb_obj; @@ -1093,8 +1081,6 @@ struct amdgpu_wb { int amdgpu_device_wb_get(struct amdgpu_device *adev, u32 *wb); void amdgpu_device_wb_free(struct amdgpu_device *adev, u32 wb); -void amdgpu_device_get_pcie_info(struct amdgpu_device *adev); - /* * SDMA */ @@ -1407,9 +1393,7 @@ enum amd_hw_ip_block_type { #define HWIP_MAX_INSTANCE 6 struct amd_powerplay { - struct cgs_device *cgs_device; void *pp_handle; - const struct amd_ip_funcs *ip_funcs; const struct amd_pm_funcs *pp_funcs; }; @@ -1505,7 +1489,7 @@ struct amdgpu_device { /* MC */ struct amdgpu_gmc gmc; struct amdgpu_gart gart; - struct amdgpu_dummy_page dummy_page; + dma_addr_t dummy_page_addr; struct amdgpu_vm_manager vm_manager; struct amdgpu_vmhub vmhub[AMDGPU_MAX_VMHUBS]; @@ -1644,6 +1628,9 @@ uint32_t amdgpu_mm_rreg(struct amdgpu_device *adev, uint32_t reg, uint32_t acc_flags); void amdgpu_mm_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v, uint32_t acc_flags); +void amdgpu_mm_wreg8(struct amdgpu_device *adev, uint32_t offset, uint8_t value); +uint8_t amdgpu_mm_rreg8(struct amdgpu_device *adev, uint32_t offset); + u32 amdgpu_io_rreg(struct amdgpu_device *adev, u32 reg); void amdgpu_io_wreg(struct amdgpu_device *adev, u32 reg, u32 v); @@ -1667,6 +1654,9 @@ int emu_soc_asic_init(struct amdgpu_device *adev); #define RREG32_NO_KIQ(reg) amdgpu_mm_rreg(adev, (reg), AMDGPU_REGS_NO_KIQ) #define WREG32_NO_KIQ(reg, v) amdgpu_mm_wreg(adev, (reg), (v), AMDGPU_REGS_NO_KIQ) +#define RREG8(reg) amdgpu_mm_rreg8(adev, (reg)) +#define WREG8(reg, v) amdgpu_mm_wreg8(adev, (reg), (v)) + #define RREG32(reg) amdgpu_mm_rreg(adev, (reg), 0) #define RREG32_IDX(reg) amdgpu_mm_rreg(adev, (reg), AMDGPU_REGS_IDX) #define DREG32(reg) printk(KERN_INFO "REGISTER: " #reg " : 0x%08X\n", amdgpu_mm_rreg(adev, (reg), 0)) @@ -1839,9 +1829,6 @@ void amdgpu_device_vram_location(struct amdgpu_device *adev, void amdgpu_device_gart_location(struct amdgpu_device *adev, struct amdgpu_gmc *mc); int amdgpu_device_resize_fb_bar(struct amdgpu_device *adev); -void amdgpu_ttm_set_active_vram_size(struct amdgpu_device *adev, u64 size); -int amdgpu_ttm_init(struct amdgpu_device *adev); -void amdgpu_ttm_fini(struct amdgpu_device *adev); void amdgpu_device_program_register_sequence(struct amdgpu_device *adev, const u32 *registers, const u32 array_size); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c index 57afad79f55d..8fa850a070e0 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c @@ -540,6 +540,9 @@ int amdgpu_acpi_pcie_performance_request(struct amdgpu_device *adev, size_t size; u32 retry = 3; + if (amdgpu_acpi_pcie_notify_device_ready(adev)) + return -EINVAL; + /* Get the device handle */ handle = ACPI_HANDLE(&adev->pdev->dev); if (!handle) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c index 450426dbed92..4d36203ffb11 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c @@ -30,6 +30,8 @@ const struct kgd2kfd_calls *kgd2kfd; bool (*kgd2kfd_init_p)(unsigned int, const struct kgd2kfd_calls**); +static const unsigned int compute_vmid_bitmap = 0xFF00; + int amdgpu_amdkfd_init(void) { int ret; @@ -56,6 +58,7 @@ int amdgpu_amdkfd_init(void) #else ret = -ENOENT; #endif + amdgpu_amdkfd_gpuvm_init_mem_limits(); return ret; } @@ -78,10 +81,15 @@ void amdgpu_amdkfd_device_probe(struct amdgpu_device *adev) switch (adev->asic_type) { #ifdef CONFIG_DRM_AMDGPU_CIK case CHIP_KAVERI: + case CHIP_HAWAII: kfd2kgd = amdgpu_amdkfd_gfx_7_get_functions(); break; #endif case CHIP_CARRIZO: + case CHIP_TONGA: + case CHIP_FIJI: + case CHIP_POLARIS10: + case CHIP_POLARIS11: kfd2kgd = amdgpu_amdkfd_gfx_8_0_get_functions(); break; default: @@ -132,9 +140,13 @@ void amdgpu_amdkfd_device_init(struct amdgpu_device *adev) int last_valid_bit; if (adev->kfd) { struct kgd2kfd_shared_resources gpu_resources = { - .compute_vmid_bitmap = 0xFF00, + .compute_vmid_bitmap = compute_vmid_bitmap, .num_pipe_per_mec = adev->gfx.mec.num_pipe_per_mec, - .num_queue_per_pipe = adev->gfx.mec.num_queue_per_pipe + .num_queue_per_pipe = adev->gfx.mec.num_queue_per_pipe, + .gpuvm_size = min(adev->vm_manager.max_pfn + << AMDGPU_GPU_PAGE_SHIFT, + AMDGPU_VA_HOLE_START), + .drm_render_minor = adev->ddev->render->index }; /* this is going to have a few of the MSBs set that we need to @@ -204,19 +216,14 @@ int alloc_gtt_mem(struct kgd_dev *kgd, size_t size, void **cpu_ptr) { struct amdgpu_device *adev = (struct amdgpu_device *)kgd; - struct kgd_mem **mem = (struct kgd_mem **) mem_obj; + struct amdgpu_bo *bo = NULL; int r; + uint64_t gpu_addr_tmp = 0; + void *cpu_ptr_tmp = NULL; - BUG_ON(kgd == NULL); - BUG_ON(gpu_addr == NULL); - BUG_ON(cpu_ptr == NULL); - - *mem = kmalloc(sizeof(struct kgd_mem), GFP_KERNEL); - if ((*mem) == NULL) - return -ENOMEM; - - r = amdgpu_bo_create(adev, size, PAGE_SIZE, true, AMDGPU_GEM_DOMAIN_GTT, - AMDGPU_GEM_CREATE_CPU_GTT_USWC, NULL, NULL, &(*mem)->bo); + r = amdgpu_bo_create(adev, size, PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT, + AMDGPU_GEM_CREATE_CPU_GTT_USWC, ttm_bo_type_kernel, + NULL, &bo); if (r) { dev_err(adev->dev, "failed to allocate BO for amdkfd (%d)\n", r); @@ -224,54 +231,53 @@ int alloc_gtt_mem(struct kgd_dev *kgd, size_t size, } /* map the buffer */ - r = amdgpu_bo_reserve((*mem)->bo, true); + r = amdgpu_bo_reserve(bo, true); if (r) { dev_err(adev->dev, "(%d) failed to reserve bo for amdkfd\n", r); goto allocate_mem_reserve_bo_failed; } - r = amdgpu_bo_pin((*mem)->bo, AMDGPU_GEM_DOMAIN_GTT, - &(*mem)->gpu_addr); + r = amdgpu_bo_pin(bo, AMDGPU_GEM_DOMAIN_GTT, + &gpu_addr_tmp); if (r) { dev_err(adev->dev, "(%d) failed to pin bo for amdkfd\n", r); goto allocate_mem_pin_bo_failed; } - *gpu_addr = (*mem)->gpu_addr; - r = amdgpu_bo_kmap((*mem)->bo, &(*mem)->cpu_ptr); + r = amdgpu_bo_kmap(bo, &cpu_ptr_tmp); if (r) { dev_err(adev->dev, "(%d) failed to map bo to kernel for amdkfd\n", r); goto allocate_mem_kmap_bo_failed; } - *cpu_ptr = (*mem)->cpu_ptr; - amdgpu_bo_unreserve((*mem)->bo); + *mem_obj = bo; + *gpu_addr = gpu_addr_tmp; + *cpu_ptr = cpu_ptr_tmp; + + amdgpu_bo_unreserve(bo); return 0; allocate_mem_kmap_bo_failed: - amdgpu_bo_unpin((*mem)->bo); + amdgpu_bo_unpin(bo); allocate_mem_pin_bo_failed: - amdgpu_bo_unreserve((*mem)->bo); + amdgpu_bo_unreserve(bo); allocate_mem_reserve_bo_failed: - amdgpu_bo_unref(&(*mem)->bo); + amdgpu_bo_unref(&bo); return r; } void free_gtt_mem(struct kgd_dev *kgd, void *mem_obj) { - struct kgd_mem *mem = (struct kgd_mem *) mem_obj; + struct amdgpu_bo *bo = (struct amdgpu_bo *) mem_obj; - BUG_ON(mem == NULL); - - amdgpu_bo_reserve(mem->bo, true); - amdgpu_bo_kunmap(mem->bo); - amdgpu_bo_unpin(mem->bo); - amdgpu_bo_unreserve(mem->bo); - amdgpu_bo_unref(&(mem->bo)); - kfree(mem); + amdgpu_bo_reserve(bo, true); + amdgpu_bo_kunmap(bo); + amdgpu_bo_unpin(bo); + amdgpu_bo_unreserve(bo); + amdgpu_bo_unref(&(bo)); } void get_local_mem_info(struct kgd_dev *kgd, @@ -361,3 +367,68 @@ uint64_t amdgpu_amdkfd_get_vram_usage(struct kgd_dev *kgd) return amdgpu_vram_mgr_usage(&adev->mman.bdev.man[TTM_PL_VRAM]); } + +int amdgpu_amdkfd_submit_ib(struct kgd_dev *kgd, enum kgd_engine_type engine, + uint32_t vmid, uint64_t gpu_addr, + uint32_t *ib_cmd, uint32_t ib_len) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)kgd; + struct amdgpu_job *job; + struct amdgpu_ib *ib; + struct amdgpu_ring *ring; + struct dma_fence *f = NULL; + int ret; + + switch (engine) { + case KGD_ENGINE_MEC1: + ring = &adev->gfx.compute_ring[0]; + break; + case KGD_ENGINE_SDMA1: + ring = &adev->sdma.instance[0].ring; + break; + case KGD_ENGINE_SDMA2: + ring = &adev->sdma.instance[1].ring; + break; + default: + pr_err("Invalid engine in IB submission: %d\n", engine); + ret = -EINVAL; + goto err; + } + + ret = amdgpu_job_alloc(adev, 1, &job, NULL); + if (ret) + goto err; + + ib = &job->ibs[0]; + memset(ib, 0, sizeof(struct amdgpu_ib)); + + ib->gpu_addr = gpu_addr; + ib->ptr = ib_cmd; + ib->length_dw = ib_len; + /* This works for NO_HWS. TODO: need to handle without knowing VMID */ + job->vmid = vmid; + + ret = amdgpu_ib_schedule(ring, 1, ib, job, &f); + if (ret) { + DRM_ERROR("amdgpu: failed to schedule IB.\n"); + goto err_ib_sched; + } + + ret = dma_fence_wait(f, false); + +err_ib_sched: + dma_fence_put(f); + amdgpu_job_free(job); +err: + return ret; +} + +bool amdgpu_amdkfd_is_kfd_vmid(struct amdgpu_device *adev, u32 vmid) +{ + if (adev->kfd) { + if ((1 << vmid) & compute_vmid_bitmap) + return true; + } + + return false; +} diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h index 2a519f9062ee..d7509b706b26 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h @@ -28,13 +28,89 @@ #include <linux/types.h> #include <linux/mmu_context.h> #include <kgd_kfd_interface.h> +#include <drm/ttm/ttm_execbuf_util.h> +#include "amdgpu_sync.h" +#include "amdgpu_vm.h" + +extern const struct kgd2kfd_calls *kgd2kfd; struct amdgpu_device; +struct kfd_bo_va_list { + struct list_head bo_list; + struct amdgpu_bo_va *bo_va; + void *kgd_dev; + bool is_mapped; + uint64_t va; + uint64_t pte_flags; +}; + struct kgd_mem { + struct mutex lock; struct amdgpu_bo *bo; - uint64_t gpu_addr; - void *cpu_ptr; + struct list_head bo_va_list; + /* protected by amdkfd_process_info.lock */ + struct ttm_validate_buffer validate_list; + struct ttm_validate_buffer resv_list; + uint32_t domain; + unsigned int mapped_to_gpu_memory; + uint64_t va; + + uint32_t mapping_flags; + + struct amdkfd_process_info *process_info; + + struct amdgpu_sync sync; + + bool aql_queue; +}; + +/* KFD Memory Eviction */ +struct amdgpu_amdkfd_fence { + struct dma_fence base; + struct mm_struct *mm; + spinlock_t lock; + char timeline_name[TASK_COMM_LEN]; +}; + +struct amdgpu_amdkfd_fence *amdgpu_amdkfd_fence_create(u64 context, + struct mm_struct *mm); +bool amdkfd_fence_check_mm(struct dma_fence *f, struct mm_struct *mm); +struct amdgpu_amdkfd_fence *to_amdgpu_amdkfd_fence(struct dma_fence *f); + +struct amdkfd_process_info { + /* List head of all VMs that belong to a KFD process */ + struct list_head vm_list_head; + /* List head for all KFD BOs that belong to a KFD process. */ + struct list_head kfd_bo_list; + /* Lock to protect kfd_bo_list */ + struct mutex lock; + + /* Number of VMs */ + unsigned int n_vms; + /* Eviction Fence */ + struct amdgpu_amdkfd_fence *eviction_fence; +}; + +/* struct amdkfd_vm - + * For Memory Eviction KGD requires a mechanism to keep track of all KFD BOs + * belonging to a KFD process. All the VMs belonging to the same process point + * to the same amdkfd_process_info. + */ +struct amdkfd_vm { + /* Keep base as the first parameter for pointer compatibility between + * amdkfd_vm and amdgpu_vm. + */ + struct amdgpu_vm base; + + /* List node in amdkfd_process_info.vm_list_head*/ + struct list_head vm_list_node; + + struct amdgpu_device *adev; + /* Points to the KFD process VM info*/ + struct amdkfd_process_info *process_info; + + uint64_t pd_phys_addr; }; int amdgpu_amdkfd_init(void); @@ -48,9 +124,15 @@ void amdgpu_amdkfd_device_probe(struct amdgpu_device *adev); void amdgpu_amdkfd_device_init(struct amdgpu_device *adev); void amdgpu_amdkfd_device_fini(struct amdgpu_device *adev); +int amdgpu_amdkfd_submit_ib(struct kgd_dev *kgd, enum kgd_engine_type engine, + uint32_t vmid, uint64_t gpu_addr, + uint32_t *ib_cmd, uint32_t ib_len); + struct kfd2kgd_calls *amdgpu_amdkfd_gfx_7_get_functions(void); struct kfd2kgd_calls *amdgpu_amdkfd_gfx_8_0_get_functions(void); +bool amdgpu_amdkfd_is_kfd_vmid(struct amdgpu_device *adev, u32 vmid); + /* Shared API */ int alloc_gtt_mem(struct kgd_dev *kgd, size_t size, void **mem_obj, uint64_t *gpu_addr, @@ -79,4 +161,30 @@ uint64_t amdgpu_amdkfd_get_vram_usage(struct kgd_dev *kgd); valid; \ }) +/* GPUVM API */ +int amdgpu_amdkfd_gpuvm_create_process_vm(struct kgd_dev *kgd, void **vm, + void **process_info, + struct dma_fence **ef); +void amdgpu_amdkfd_gpuvm_destroy_process_vm(struct kgd_dev *kgd, void *vm); +uint32_t amdgpu_amdkfd_gpuvm_get_process_page_dir(void *vm); +int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu( + struct kgd_dev *kgd, uint64_t va, uint64_t size, + void *vm, struct kgd_mem **mem, + uint64_t *offset, uint32_t flags); +int amdgpu_amdkfd_gpuvm_free_memory_of_gpu( + struct kgd_dev *kgd, struct kgd_mem *mem); +int amdgpu_amdkfd_gpuvm_map_memory_to_gpu( + struct kgd_dev *kgd, struct kgd_mem *mem, void *vm); +int amdgpu_amdkfd_gpuvm_unmap_memory_from_gpu( + struct kgd_dev *kgd, struct kgd_mem *mem, void *vm); +int amdgpu_amdkfd_gpuvm_sync_memory( + struct kgd_dev *kgd, struct kgd_mem *mem, bool intr); +int amdgpu_amdkfd_gpuvm_map_gtt_bo_to_kernel(struct kgd_dev *kgd, + struct kgd_mem *mem, void **kptr, uint64_t *size); +int amdgpu_amdkfd_gpuvm_restore_process_bos(void *process_info, + struct dma_fence **ef); + +void amdgpu_amdkfd_gpuvm_init_mem_limits(void); +void amdgpu_amdkfd_unreserve_system_memory_limit(struct amdgpu_bo *bo); + #endif /* AMDGPU_AMDKFD_H_INCLUDED */ diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_fence.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_fence.c new file mode 100644 index 000000000000..2c14025e5e76 --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_fence.c @@ -0,0 +1,179 @@ +/* + * Copyright 2016-2018 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +#include <linux/dma-fence.h> +#include <linux/spinlock.h> +#include <linux/atomic.h> +#include <linux/stacktrace.h> +#include <linux/sched.h> +#include <linux/slab.h> +#include <linux/sched/mm.h> +#include "amdgpu_amdkfd.h" + +static const struct dma_fence_ops amdkfd_fence_ops; +static atomic_t fence_seq = ATOMIC_INIT(0); + +/* Eviction Fence + * Fence helper functions to deal with KFD memory eviction. + * Big Idea - Since KFD submissions are done by user queues, a BO cannot be + * evicted unless all the user queues for that process are evicted. + * + * All the BOs in a process share an eviction fence. When process X wants + * to map VRAM memory but TTM can't find enough space, TTM will attempt to + * evict BOs from its LRU list. TTM checks if the BO is valuable to evict + * by calling ttm_bo_driver->eviction_valuable(). + * + * ttm_bo_driver->eviction_valuable() - will return false if the BO belongs + * to process X. Otherwise, it will return true to indicate BO can be + * evicted by TTM. + * + * If ttm_bo_driver->eviction_valuable returns true, then TTM will continue + * the evcition process for that BO by calling ttm_bo_evict --> amdgpu_bo_move + * --> amdgpu_copy_buffer(). This sets up job in GPU scheduler. + * + * GPU Scheduler (amd_sched_main) - sets up a cb (fence_add_callback) to + * nofity when the BO is free to move. fence_add_callback --> enable_signaling + * --> amdgpu_amdkfd_fence.enable_signaling + * + * amdgpu_amdkfd_fence.enable_signaling - Start a work item that will quiesce + * user queues and signal fence. The work item will also start another delayed + * work item to restore BOs + */ + +struct amdgpu_amdkfd_fence *amdgpu_amdkfd_fence_create(u64 context, + struct mm_struct *mm) +{ + struct amdgpu_amdkfd_fence *fence; + + fence = kzalloc(sizeof(*fence), GFP_KERNEL); + if (fence == NULL) + return NULL; + + /* This reference gets released in amdkfd_fence_release */ + mmgrab(mm); + fence->mm = mm; + get_task_comm(fence->timeline_name, current); + spin_lock_init(&fence->lock); + + dma_fence_init(&fence->base, &amdkfd_fence_ops, &fence->lock, + context, atomic_inc_return(&fence_seq)); + + return fence; +} + +struct amdgpu_amdkfd_fence *to_amdgpu_amdkfd_fence(struct dma_fence *f) +{ + struct amdgpu_amdkfd_fence *fence; + + if (!f) + return NULL; + + fence = container_of(f, struct amdgpu_amdkfd_fence, base); + if (fence && f->ops == &amdkfd_fence_ops) + return fence; + + return NULL; +} + +static const char *amdkfd_fence_get_driver_name(struct dma_fence *f) +{ + return "amdgpu_amdkfd_fence"; +} + +static const char *amdkfd_fence_get_timeline_name(struct dma_fence *f) +{ + struct amdgpu_amdkfd_fence *fence = to_amdgpu_amdkfd_fence(f); + + return fence->timeline_name; +} + +/** + * amdkfd_fence_enable_signaling - This gets called when TTM wants to evict + * a KFD BO and schedules a job to move the BO. + * If fence is already signaled return true. + * If fence is not signaled schedule a evict KFD process work item. + */ +static bool amdkfd_fence_enable_signaling(struct dma_fence *f) +{ + struct amdgpu_amdkfd_fence *fence = to_amdgpu_amdkfd_fence(f); + + if (!fence) + return false; + + if (dma_fence_is_signaled(f)) + return true; + + if (!kgd2kfd->schedule_evict_and_restore_process(fence->mm, f)) + return true; + + return false; +} + +/** + * amdkfd_fence_release - callback that fence can be freed + * + * @fence: fence + * + * This function is called when the reference count becomes zero. + * Drops the mm_struct reference and RCU schedules freeing up the fence. + */ +static void amdkfd_fence_release(struct dma_fence *f) +{ + struct amdgpu_amdkfd_fence *fence = to_amdgpu_amdkfd_fence(f); + + /* Unconditionally signal the fence. The process is getting + * terminated. + */ + if (WARN_ON(!fence)) + return; /* Not an amdgpu_amdkfd_fence */ + + mmdrop(fence->mm); + kfree_rcu(f, rcu); +} + +/** + * amdkfd_fence_check_mm - Check if @mm is same as that of the fence @f + * if same return TRUE else return FALSE. + * + * @f: [IN] fence + * @mm: [IN] mm that needs to be verified + */ +bool amdkfd_fence_check_mm(struct dma_fence *f, struct mm_struct *mm) +{ + struct amdgpu_amdkfd_fence *fence = to_amdgpu_amdkfd_fence(f); + + if (!fence) + return false; + else if (fence->mm == mm) + return true; + + return false; +} + +static const struct dma_fence_ops amdkfd_fence_ops = { + .get_driver_name = amdkfd_fence_get_driver_name, + .get_timeline_name = amdkfd_fence_get_timeline_name, + .enable_signaling = amdkfd_fence_enable_signaling, + .signaled = NULL, + .wait = dma_fence_default_wait, + .release = amdkfd_fence_release, +}; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c index a9e6aea0e5f8..7485c376b90e 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c @@ -139,11 +139,14 @@ static uint32_t kgd_address_watch_get_offset(struct kgd_dev *kgd, static bool get_atc_vmid_pasid_mapping_valid(struct kgd_dev *kgd, uint8_t vmid); static uint16_t get_atc_vmid_pasid_mapping_pasid(struct kgd_dev *kgd, uint8_t vmid); -static void write_vmid_invalidate_request(struct kgd_dev *kgd, uint8_t vmid); static uint16_t get_fw_version(struct kgd_dev *kgd, enum kgd_engine_type type); static void set_scratch_backing_va(struct kgd_dev *kgd, uint64_t va, uint32_t vmid); +static void set_vm_context_page_table_base(struct kgd_dev *kgd, uint32_t vmid, + uint32_t page_table_base); +static int invalidate_tlbs(struct kgd_dev *kgd, uint16_t pasid); +static int invalidate_tlbs_vmid(struct kgd_dev *kgd, uint16_t vmid); /* Because of REG_GET_FIELD() being used, we put this function in the * asic specific file. @@ -196,12 +199,25 @@ static const struct kfd2kgd_calls kfd2kgd = { .address_watch_get_offset = kgd_address_watch_get_offset, .get_atc_vmid_pasid_mapping_pasid = get_atc_vmid_pasid_mapping_pasid, .get_atc_vmid_pasid_mapping_valid = get_atc_vmid_pasid_mapping_valid, - .write_vmid_invalidate_request = write_vmid_invalidate_request, .get_fw_version = get_fw_version, .set_scratch_backing_va = set_scratch_backing_va, .get_tile_config = get_tile_config, .get_cu_info = get_cu_info, - .get_vram_usage = amdgpu_amdkfd_get_vram_usage + .get_vram_usage = amdgpu_amdkfd_get_vram_usage, + .create_process_vm = amdgpu_amdkfd_gpuvm_create_process_vm, + .destroy_process_vm = amdgpu_amdkfd_gpuvm_destroy_process_vm, + .get_process_page_dir = amdgpu_amdkfd_gpuvm_get_process_page_dir, + .set_vm_context_page_table_base = set_vm_context_page_table_base, + .alloc_memory_of_gpu = amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu, + .free_memory_of_gpu = amdgpu_amdkfd_gpuvm_free_memory_of_gpu, + .map_memory_to_gpu = amdgpu_amdkfd_gpuvm_map_memory_to_gpu, + .unmap_memory_to_gpu = amdgpu_amdkfd_gpuvm_unmap_memory_from_gpu, + .sync_memory = amdgpu_amdkfd_gpuvm_sync_memory, + .map_gtt_bo_to_kernel = amdgpu_amdkfd_gpuvm_map_gtt_bo_to_kernel, + .restore_process_bos = amdgpu_amdkfd_gpuvm_restore_process_bos, + .invalidate_tlbs = invalidate_tlbs, + .invalidate_tlbs_vmid = invalidate_tlbs_vmid, + .submit_ib = amdgpu_amdkfd_submit_ib, }; struct kfd2kgd_calls *amdgpu_amdkfd_gfx_7_get_functions(void) @@ -787,14 +803,7 @@ static uint16_t get_atc_vmid_pasid_mapping_pasid(struct kgd_dev *kgd, struct amdgpu_device *adev = (struct amdgpu_device *) kgd; reg = RREG32(mmATC_VMID0_PASID_MAPPING + vmid); - return reg & ATC_VMID0_PASID_MAPPING__VALID_MASK; -} - -static void write_vmid_invalidate_request(struct kgd_dev *kgd, uint8_t vmid) -{ - struct amdgpu_device *adev = (struct amdgpu_device *) kgd; - - WREG32(mmVM_INVALIDATE_REQUEST, 1 << vmid); + return reg & ATC_VMID0_PASID_MAPPING__PASID_MASK; } static void set_scratch_backing_va(struct kgd_dev *kgd, @@ -812,8 +821,6 @@ static uint16_t get_fw_version(struct kgd_dev *kgd, enum kgd_engine_type type) struct amdgpu_device *adev = (struct amdgpu_device *) kgd; const union amdgpu_firmware_header *hdr; - BUG_ON(kgd == NULL); - switch (type) { case KGD_ENGINE_PFP: hdr = (const union amdgpu_firmware_header *) @@ -866,3 +873,50 @@ static uint16_t get_fw_version(struct kgd_dev *kgd, enum kgd_engine_type type) return hdr->common.ucode_version; } +static void set_vm_context_page_table_base(struct kgd_dev *kgd, uint32_t vmid, + uint32_t page_table_base) +{ + struct amdgpu_device *adev = get_amdgpu_device(kgd); + + if (!amdgpu_amdkfd_is_kfd_vmid(adev, vmid)) { + pr_err("trying to set page table base for wrong VMID\n"); + return; + } + WREG32(mmVM_CONTEXT8_PAGE_TABLE_BASE_ADDR + vmid - 8, page_table_base); +} + +static int invalidate_tlbs(struct kgd_dev *kgd, uint16_t pasid) +{ + struct amdgpu_device *adev = (struct amdgpu_device *) kgd; + int vmid; + unsigned int tmp; + + for (vmid = 0; vmid < 16; vmid++) { + if (!amdgpu_amdkfd_is_kfd_vmid(adev, vmid)) + continue; + + tmp = RREG32(mmATC_VMID0_PASID_MAPPING + vmid); + if ((tmp & ATC_VMID0_PASID_MAPPING__VALID_MASK) && + (tmp & ATC_VMID0_PASID_MAPPING__PASID_MASK) == pasid) { + WREG32(mmVM_INVALIDATE_REQUEST, 1 << vmid); + RREG32(mmVM_INVALIDATE_RESPONSE); + break; + } + } + + return 0; +} + +static int invalidate_tlbs_vmid(struct kgd_dev *kgd, uint16_t vmid) +{ + struct amdgpu_device *adev = (struct amdgpu_device *) kgd; + + if (!amdgpu_amdkfd_is_kfd_vmid(adev, vmid)) { + pr_err("non kfd vmid\n"); + return 0; + } + + WREG32(mmVM_INVALIDATE_REQUEST, 1 << vmid); + RREG32(mmVM_INVALIDATE_RESPONSE); + return 0; +} diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c index b127259d7d85..7be453494423 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c @@ -81,7 +81,6 @@ static int kgd_hqd_destroy(struct kgd_dev *kgd, void *mqd, uint32_t queue_id); static int kgd_hqd_sdma_destroy(struct kgd_dev *kgd, void *mqd, unsigned int utimeout); -static void write_vmid_invalidate_request(struct kgd_dev *kgd, uint8_t vmid); static int kgd_address_watch_disable(struct kgd_dev *kgd); static int kgd_address_watch_execute(struct kgd_dev *kgd, unsigned int watch_point_id, @@ -99,10 +98,13 @@ static bool get_atc_vmid_pasid_mapping_valid(struct kgd_dev *kgd, uint8_t vmid); static uint16_t get_atc_vmid_pasid_mapping_pasid(struct kgd_dev *kgd, uint8_t vmid); -static void write_vmid_invalidate_request(struct kgd_dev *kgd, uint8_t vmid); static uint16_t get_fw_version(struct kgd_dev *kgd, enum kgd_engine_type type); static void set_scratch_backing_va(struct kgd_dev *kgd, uint64_t va, uint32_t vmid); +static void set_vm_context_page_table_base(struct kgd_dev *kgd, uint32_t vmid, + uint32_t page_table_base); +static int invalidate_tlbs(struct kgd_dev *kgd, uint16_t pasid); +static int invalidate_tlbs_vmid(struct kgd_dev *kgd, uint16_t vmid); /* Because of REG_GET_FIELD() being used, we put this function in the * asic specific file. @@ -157,12 +159,25 @@ static const struct kfd2kgd_calls kfd2kgd = { get_atc_vmid_pasid_mapping_pasid, .get_atc_vmid_pasid_mapping_valid = get_atc_vmid_pasid_mapping_valid, - .write_vmid_invalidate_request = write_vmid_invalidate_request, .get_fw_version = get_fw_version, .set_scratch_backing_va = set_scratch_backing_va, .get_tile_config = get_tile_config, .get_cu_info = get_cu_info, - .get_vram_usage = amdgpu_amdkfd_get_vram_usage + .get_vram_usage = amdgpu_amdkfd_get_vram_usage, + .create_process_vm = amdgpu_amdkfd_gpuvm_create_process_vm, + .destroy_process_vm = amdgpu_amdkfd_gpuvm_destroy_process_vm, + .get_process_page_dir = amdgpu_amdkfd_gpuvm_get_process_page_dir, + .set_vm_context_page_table_base = set_vm_context_page_table_base, + .alloc_memory_of_gpu = amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu, + .free_memory_of_gpu = amdgpu_amdkfd_gpuvm_free_memory_of_gpu, + .map_memory_to_gpu = amdgpu_amdkfd_gpuvm_map_memory_to_gpu, + .unmap_memory_to_gpu = amdgpu_amdkfd_gpuvm_unmap_memory_from_gpu, + .sync_memory = amdgpu_amdkfd_gpuvm_sync_memory, + .map_gtt_bo_to_kernel = amdgpu_amdkfd_gpuvm_map_gtt_bo_to_kernel, + .restore_process_bos = amdgpu_amdkfd_gpuvm_restore_process_bos, + .invalidate_tlbs = invalidate_tlbs, + .invalidate_tlbs_vmid = invalidate_tlbs_vmid, + .submit_ib = amdgpu_amdkfd_submit_ib, }; struct kfd2kgd_calls *amdgpu_amdkfd_gfx_8_0_get_functions(void) @@ -704,14 +719,7 @@ static uint16_t get_atc_vmid_pasid_mapping_pasid(struct kgd_dev *kgd, struct amdgpu_device *adev = (struct amdgpu_device *) kgd; reg = RREG32(mmATC_VMID0_PASID_MAPPING + vmid); - return reg & ATC_VMID0_PASID_MAPPING__VALID_MASK; -} - -static void write_vmid_invalidate_request(struct kgd_dev *kgd, uint8_t vmid) -{ - struct amdgpu_device *adev = (struct amdgpu_device *) kgd; - - WREG32(mmVM_INVALIDATE_REQUEST, 1 << vmid); + return reg & ATC_VMID0_PASID_MAPPING__PASID_MASK; } static int kgd_address_watch_disable(struct kgd_dev *kgd) @@ -775,8 +783,6 @@ static uint16_t get_fw_version(struct kgd_dev *kgd, enum kgd_engine_type type) struct amdgpu_device *adev = (struct amdgpu_device *) kgd; const union amdgpu_firmware_header *hdr; - BUG_ON(kgd == NULL); - switch (type) { case KGD_ENGINE_PFP: hdr = (const union amdgpu_firmware_header *) @@ -828,3 +834,51 @@ static uint16_t get_fw_version(struct kgd_dev *kgd, enum kgd_engine_type type) /* Only 12 bit in use*/ return hdr->common.ucode_version; } + +static void set_vm_context_page_table_base(struct kgd_dev *kgd, uint32_t vmid, + uint32_t page_table_base) +{ + struct amdgpu_device *adev = get_amdgpu_device(kgd); + + if (!amdgpu_amdkfd_is_kfd_vmid(adev, vmid)) { + pr_err("trying to set page table base for wrong VMID\n"); + return; + } + WREG32(mmVM_CONTEXT8_PAGE_TABLE_BASE_ADDR + vmid - 8, page_table_base); +} + +static int invalidate_tlbs(struct kgd_dev *kgd, uint16_t pasid) +{ + struct amdgpu_device *adev = (struct amdgpu_device *) kgd; + int vmid; + unsigned int tmp; + + for (vmid = 0; vmid < 16; vmid++) { + if (!amdgpu_amdkfd_is_kfd_vmid(adev, vmid)) + continue; + + tmp = RREG32(mmATC_VMID0_PASID_MAPPING + vmid); + if ((tmp & ATC_VMID0_PASID_MAPPING__VALID_MASK) && + (tmp & ATC_VMID0_PASID_MAPPING__PASID_MASK) == pasid) { + WREG32(mmVM_INVALIDATE_REQUEST, 1 << vmid); + RREG32(mmVM_INVALIDATE_RESPONSE); + break; + } + } + + return 0; +} + +static int invalidate_tlbs_vmid(struct kgd_dev *kgd, uint16_t vmid) +{ + struct amdgpu_device *adev = (struct amdgpu_device *) kgd; + + if (!amdgpu_amdkfd_is_kfd_vmid(adev, vmid)) { + pr_err("non kfd vmid %d\n", vmid); + return -EINVAL; + } + + WREG32(mmVM_INVALIDATE_REQUEST, 1 << vmid); + RREG32(mmVM_INVALIDATE_RESPONSE); + return 0; +} diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c new file mode 100644 index 000000000000..a12a1654e124 --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c @@ -0,0 +1,1506 @@ +/* + * Copyright 2014-2018 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +#define pr_fmt(fmt) "kfd2kgd: " fmt + +#include <linux/list.h> +#include <drm/drmP.h> +#include "amdgpu_object.h" +#include "amdgpu_vm.h" +#include "amdgpu_amdkfd.h" + +/* Special VM and GART address alignment needed for VI pre-Fiji due to + * a HW bug. + */ +#define VI_BO_SIZE_ALIGN (0x8000) + +/* Impose limit on how much memory KFD can use */ +static struct { + uint64_t max_system_mem_limit; + int64_t system_mem_used; + spinlock_t mem_limit_lock; +} kfd_mem_limit; + +/* Struct used for amdgpu_amdkfd_bo_validate */ +struct amdgpu_vm_parser { + uint32_t domain; + bool wait; +}; + +static const char * const domain_bit_to_string[] = { + "CPU", + "GTT", + "VRAM", + "GDS", + "GWS", + "OA" +}; + +#define domain_string(domain) domain_bit_to_string[ffs(domain)-1] + + + +static inline struct amdgpu_device *get_amdgpu_device(struct kgd_dev *kgd) +{ + return (struct amdgpu_device *)kgd; +} + +static bool check_if_add_bo_to_vm(struct amdgpu_vm *avm, + struct kgd_mem *mem) +{ + struct kfd_bo_va_list *entry; + + list_for_each_entry(entry, &mem->bo_va_list, bo_list) + if (entry->bo_va->base.vm == avm) + return false; + + return true; +} + +/* Set memory usage limits. Current, limits are + * System (kernel) memory - 3/8th System RAM + */ +void amdgpu_amdkfd_gpuvm_init_mem_limits(void) +{ + struct sysinfo si; + uint64_t mem; + + si_meminfo(&si); + mem = si.totalram - si.totalhigh; + mem *= si.mem_unit; + + spin_lock_init(&kfd_mem_limit.mem_limit_lock); + kfd_mem_limit.max_system_mem_limit = (mem >> 1) - (mem >> 3); + pr_debug("Kernel memory limit %lluM\n", + (kfd_mem_limit.max_system_mem_limit >> 20)); +} + +static int amdgpu_amdkfd_reserve_system_mem_limit(struct amdgpu_device *adev, + uint64_t size, u32 domain) +{ + size_t acc_size; + int ret = 0; + + acc_size = ttm_bo_dma_acc_size(&adev->mman.bdev, size, + sizeof(struct amdgpu_bo)); + + spin_lock(&kfd_mem_limit.mem_limit_lock); + if (domain == AMDGPU_GEM_DOMAIN_GTT) { + if (kfd_mem_limit.system_mem_used + (acc_size + size) > + kfd_mem_limit.max_system_mem_limit) { + ret = -ENOMEM; + goto err_no_mem; + } + kfd_mem_limit.system_mem_used += (acc_size + size); + } +err_no_mem: + spin_unlock(&kfd_mem_limit.mem_limit_lock); + return ret; +} + +static void unreserve_system_mem_limit(struct amdgpu_device *adev, + uint64_t size, u32 domain) +{ + size_t acc_size; + + acc_size = ttm_bo_dma_acc_size(&adev->mman.bdev, size, + sizeof(struct amdgpu_bo)); + + spin_lock(&kfd_mem_limit.mem_limit_lock); + if (domain == AMDGPU_GEM_DOMAIN_GTT) + kfd_mem_limit.system_mem_used -= (acc_size + size); + WARN_ONCE(kfd_mem_limit.system_mem_used < 0, + "kfd system memory accounting unbalanced"); + + spin_unlock(&kfd_mem_limit.mem_limit_lock); +} + +void amdgpu_amdkfd_unreserve_system_memory_limit(struct amdgpu_bo *bo) +{ + spin_lock(&kfd_mem_limit.mem_limit_lock); + + if (bo->preferred_domains == AMDGPU_GEM_DOMAIN_GTT) { + kfd_mem_limit.system_mem_used -= + (bo->tbo.acc_size + amdgpu_bo_size(bo)); + } + WARN_ONCE(kfd_mem_limit.system_mem_used < 0, + "kfd system memory accounting unbalanced"); + + spin_unlock(&kfd_mem_limit.mem_limit_lock); +} + + +/* amdgpu_amdkfd_remove_eviction_fence - Removes eviction fence(s) from BO's + * reservation object. + * + * @bo: [IN] Remove eviction fence(s) from this BO + * @ef: [IN] If ef is specified, then this eviction fence is removed if it + * is present in the shared list. + * @ef_list: [OUT] Returns list of eviction fences. These fences are removed + * from BO's reservation object shared list. + * @ef_count: [OUT] Number of fences in ef_list. + * + * NOTE: If called with ef_list, then amdgpu_amdkfd_add_eviction_fence must be + * called to restore the eviction fences and to avoid memory leak. This is + * useful for shared BOs. + * NOTE: Must be called with BO reserved i.e. bo->tbo.resv->lock held. + */ +static int amdgpu_amdkfd_remove_eviction_fence(struct amdgpu_bo *bo, + struct amdgpu_amdkfd_fence *ef, + struct amdgpu_amdkfd_fence ***ef_list, + unsigned int *ef_count) +{ + struct reservation_object_list *fobj; + struct reservation_object *resv; + unsigned int i = 0, j = 0, k = 0, shared_count; + unsigned int count = 0; + struct amdgpu_amdkfd_fence **fence_list; + + if (!ef && !ef_list) + return -EINVAL; + + if (ef_list) { + *ef_list = NULL; + *ef_count = 0; + } + + resv = bo->tbo.resv; + fobj = reservation_object_get_list(resv); + + if (!fobj) + return 0; + + preempt_disable(); + write_seqcount_begin(&resv->seq); + + /* Go through all the shared fences in the resevation object. If + * ef is specified and it exists in the list, remove it and reduce the + * count. If ef is not specified, then get the count of eviction fences + * present. + */ + shared_count = fobj->shared_count; + for (i = 0; i < shared_count; ++i) { + struct dma_fence *f; + + f = rcu_dereference_protected(fobj->shared[i], + reservation_object_held(resv)); + + if (ef) { + if (f->context == ef->base.context) { + dma_fence_put(f); + fobj->shared_count--; + } else { + RCU_INIT_POINTER(fobj->shared[j++], f); + } + } else if (to_amdgpu_amdkfd_fence(f)) + count++; + } + write_seqcount_end(&resv->seq); + preempt_enable(); + + if (ef || !count) + return 0; + + /* Alloc memory for count number of eviction fence pointers. Fill the + * ef_list array and ef_count + */ + fence_list = kcalloc(count, sizeof(struct amdgpu_amdkfd_fence *), + GFP_KERNEL); + if (!fence_list) + return -ENOMEM; + + preempt_disable(); + write_seqcount_begin(&resv->seq); + + j = 0; + for (i = 0; i < shared_count; ++i) { + struct dma_fence *f; + struct amdgpu_amdkfd_fence *efence; + + f = rcu_dereference_protected(fobj->shared[i], + reservation_object_held(resv)); + + efence = to_amdgpu_amdkfd_fence(f); + if (efence) { + fence_list[k++] = efence; + fobj->shared_count--; + } else { + RCU_INIT_POINTER(fobj->shared[j++], f); + } + } + + write_seqcount_end(&resv->seq); + preempt_enable(); + + *ef_list = fence_list; + *ef_count = k; + + return 0; +} + +/* amdgpu_amdkfd_add_eviction_fence - Adds eviction fence(s) back into BO's + * reservation object. + * + * @bo: [IN] Add eviction fences to this BO + * @ef_list: [IN] List of eviction fences to be added + * @ef_count: [IN] Number of fences in ef_list. + * + * NOTE: Must call amdgpu_amdkfd_remove_eviction_fence before calling this + * function. + */ +static void amdgpu_amdkfd_add_eviction_fence(struct amdgpu_bo *bo, + struct amdgpu_amdkfd_fence **ef_list, + unsigned int ef_count) +{ + int i; + + if (!ef_list || !ef_count) + return; + + for (i = 0; i < ef_count; i++) { + amdgpu_bo_fence(bo, &ef_list[i]->base, true); + /* Re-adding the fence takes an additional reference. Drop that + * reference. + */ + dma_fence_put(&ef_list[i]->base); + } + + kfree(ef_list); +} + +static int amdgpu_amdkfd_bo_validate(struct amdgpu_bo *bo, uint32_t domain, + bool wait) +{ + struct ttm_operation_ctx ctx = { false, false }; + int ret; + + if (WARN(amdgpu_ttm_tt_get_usermm(bo->tbo.ttm), + "Called with userptr BO")) + return -EINVAL; + + amdgpu_ttm_placement_from_domain(bo, domain); + + ret = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx); + if (ret) + goto validate_fail; + if (wait) { + struct amdgpu_amdkfd_fence **ef_list; + unsigned int ef_count; + + ret = amdgpu_amdkfd_remove_eviction_fence(bo, NULL, &ef_list, + &ef_count); + if (ret) + goto validate_fail; + + ttm_bo_wait(&bo->tbo, false, false); + amdgpu_amdkfd_add_eviction_fence(bo, ef_list, ef_count); + } + +validate_fail: + return ret; +} + +static int amdgpu_amdkfd_validate(void *param, struct amdgpu_bo *bo) +{ + struct amdgpu_vm_parser *p = param; + + return amdgpu_amdkfd_bo_validate(bo, p->domain, p->wait); +} + +/* vm_validate_pt_pd_bos - Validate page table and directory BOs + * + * Page directories are not updated here because huge page handling + * during page table updates can invalidate page directory entries + * again. Page directories are only updated after updating page + * tables. + */ +static int vm_validate_pt_pd_bos(struct amdkfd_vm *vm) +{ + struct amdgpu_bo *pd = vm->base.root.base.bo; + struct amdgpu_device *adev = amdgpu_ttm_adev(pd->tbo.bdev); + struct amdgpu_vm_parser param; + uint64_t addr, flags = AMDGPU_PTE_VALID; + int ret; + + param.domain = AMDGPU_GEM_DOMAIN_VRAM; + param.wait = false; + + ret = amdgpu_vm_validate_pt_bos(adev, &vm->base, amdgpu_amdkfd_validate, + ¶m); + if (ret) { + pr_err("amdgpu: failed to validate PT BOs\n"); + return ret; + } + + ret = amdgpu_amdkfd_validate(¶m, pd); + if (ret) { + pr_err("amdgpu: failed to validate PD\n"); + return ret; + } + + addr = amdgpu_bo_gpu_offset(vm->base.root.base.bo); + amdgpu_gmc_get_vm_pde(adev, -1, &addr, &flags); + vm->pd_phys_addr = addr; + + if (vm->base.use_cpu_for_update) { + ret = amdgpu_bo_kmap(pd, NULL); + if (ret) { + pr_err("amdgpu: failed to kmap PD, ret=%d\n", ret); + return ret; + } + } + + return 0; +} + +static int sync_vm_fence(struct amdgpu_device *adev, struct amdgpu_sync *sync, + struct dma_fence *f) +{ + int ret = amdgpu_sync_fence(adev, sync, f, false); + + /* Sync objects can't handle multiple GPUs (contexts) updating + * sync->last_vm_update. Fortunately we don't need it for + * KFD's purposes, so we can just drop that fence. + */ + if (sync->last_vm_update) { + dma_fence_put(sync->last_vm_update); + sync->last_vm_update = NULL; + } + + return ret; +} + +static int vm_update_pds(struct amdgpu_vm *vm, struct amdgpu_sync *sync) +{ + struct amdgpu_bo *pd = vm->root.base.bo; + struct amdgpu_device *adev = amdgpu_ttm_adev(pd->tbo.bdev); + int ret; + + ret = amdgpu_vm_update_directories(adev, vm); + if (ret) + return ret; + + return sync_vm_fence(adev, sync, vm->last_update); +} + +/* add_bo_to_vm - Add a BO to a VM + * + * Everything that needs to bo done only once when a BO is first added + * to a VM. It can later be mapped and unmapped many times without + * repeating these steps. + * + * 1. Allocate and initialize BO VA entry data structure + * 2. Add BO to the VM + * 3. Determine ASIC-specific PTE flags + * 4. Alloc page tables and directories if needed + * 4a. Validate new page tables and directories + */ +static int add_bo_to_vm(struct amdgpu_device *adev, struct kgd_mem *mem, + struct amdgpu_vm *avm, bool is_aql, + struct kfd_bo_va_list **p_bo_va_entry) +{ + int ret; + struct kfd_bo_va_list *bo_va_entry; + struct amdkfd_vm *kvm = container_of(avm, + struct amdkfd_vm, base); + struct amdgpu_bo *pd = avm->root.base.bo; + struct amdgpu_bo *bo = mem->bo; + uint64_t va = mem->va; + struct list_head *list_bo_va = &mem->bo_va_list; + unsigned long bo_size = bo->tbo.mem.size; + + if (!va) { + pr_err("Invalid VA when adding BO to VM\n"); + return -EINVAL; + } + + if (is_aql) + va += bo_size; + + bo_va_entry = kzalloc(sizeof(*bo_va_entry), GFP_KERNEL); + if (!bo_va_entry) + return -ENOMEM; + + pr_debug("\t add VA 0x%llx - 0x%llx to vm %p\n", va, + va + bo_size, avm); + + /* Add BO to VM internal data structures*/ + bo_va_entry->bo_va = amdgpu_vm_bo_add(adev, avm, bo); + if (!bo_va_entry->bo_va) { + ret = -EINVAL; + pr_err("Failed to add BO object to VM. ret == %d\n", + ret); + goto err_vmadd; + } + + bo_va_entry->va = va; + bo_va_entry->pte_flags = amdgpu_gmc_get_pte_flags(adev, + mem->mapping_flags); + bo_va_entry->kgd_dev = (void *)adev; + list_add(&bo_va_entry->bo_list, list_bo_va); + + if (p_bo_va_entry) + *p_bo_va_entry = bo_va_entry; + + /* Allocate new page tables if needed and validate + * them. Clearing of new page tables and validate need to wait + * on move fences. We don't want that to trigger the eviction + * fence, so remove it temporarily. + */ + amdgpu_amdkfd_remove_eviction_fence(pd, + kvm->process_info->eviction_fence, + NULL, NULL); + + ret = amdgpu_vm_alloc_pts(adev, avm, va, amdgpu_bo_size(bo)); + if (ret) { + pr_err("Failed to allocate pts, err=%d\n", ret); + goto err_alloc_pts; + } + + ret = vm_validate_pt_pd_bos(kvm); + if (ret) { + pr_err("validate_pt_pd_bos() failed\n"); + goto err_alloc_pts; + } + + /* Add the eviction fence back */ + amdgpu_bo_fence(pd, &kvm->process_info->eviction_fence->base, true); + + return 0; + +err_alloc_pts: + amdgpu_bo_fence(pd, &kvm->process_info->eviction_fence->base, true); + amdgpu_vm_bo_rmv(adev, bo_va_entry->bo_va); + list_del(&bo_va_entry->bo_list); +err_vmadd: + kfree(bo_va_entry); + return ret; +} + +static void remove_bo_from_vm(struct amdgpu_device *adev, + struct kfd_bo_va_list *entry, unsigned long size) +{ + pr_debug("\t remove VA 0x%llx - 0x%llx in entry %p\n", + entry->va, + entry->va + size, entry); + amdgpu_vm_bo_rmv(adev, entry->bo_va); + list_del(&entry->bo_list); + kfree(entry); +} + +static void add_kgd_mem_to_kfd_bo_list(struct kgd_mem *mem, + struct amdkfd_process_info *process_info) +{ + struct ttm_validate_buffer *entry = &mem->validate_list; + struct amdgpu_bo *bo = mem->bo; + + INIT_LIST_HEAD(&entry->head); + entry->shared = true; + entry->bo = &bo->tbo; + mutex_lock(&process_info->lock); + list_add_tail(&entry->head, &process_info->kfd_bo_list); + mutex_unlock(&process_info->lock); +} + +/* Reserving a BO and its page table BOs must happen atomically to + * avoid deadlocks. Some operations update multiple VMs at once. Track + * all the reservation info in a context structure. Optionally a sync + * object can track VM updates. + */ +struct bo_vm_reservation_context { + struct amdgpu_bo_list_entry kfd_bo; /* BO list entry for the KFD BO */ + unsigned int n_vms; /* Number of VMs reserved */ + struct amdgpu_bo_list_entry *vm_pd; /* Array of VM BO list entries */ + struct ww_acquire_ctx ticket; /* Reservation ticket */ + struct list_head list, duplicates; /* BO lists */ + struct amdgpu_sync *sync; /* Pointer to sync object */ + bool reserved; /* Whether BOs are reserved */ +}; + +enum bo_vm_match { + BO_VM_NOT_MAPPED = 0, /* Match VMs where a BO is not mapped */ + BO_VM_MAPPED, /* Match VMs where a BO is mapped */ + BO_VM_ALL, /* Match all VMs a BO was added to */ +}; + +/** + * reserve_bo_and_vm - reserve a BO and a VM unconditionally. + * @mem: KFD BO structure. + * @vm: the VM to reserve. + * @ctx: the struct that will be used in unreserve_bo_and_vms(). + */ +static int reserve_bo_and_vm(struct kgd_mem *mem, + struct amdgpu_vm *vm, + struct bo_vm_reservation_context *ctx) +{ + struct amdgpu_bo *bo = mem->bo; + int ret; + + WARN_ON(!vm); + + ctx->reserved = false; + ctx->n_vms = 1; + ctx->sync = &mem->sync; + + INIT_LIST_HEAD(&ctx->list); + INIT_LIST_HEAD(&ctx->duplicates); + + ctx->vm_pd = kcalloc(ctx->n_vms, sizeof(*ctx->vm_pd), GFP_KERNEL); + if (!ctx->vm_pd) + return -ENOMEM; + + ctx->kfd_bo.robj = bo; + ctx->kfd_bo.priority = 0; + ctx->kfd_bo.tv.bo = &bo->tbo; + ctx->kfd_bo.tv.shared = true; + ctx->kfd_bo.user_pages = NULL; + list_add(&ctx->kfd_bo.tv.head, &ctx->list); + + amdgpu_vm_get_pd_bo(vm, &ctx->list, &ctx->vm_pd[0]); + + ret = ttm_eu_reserve_buffers(&ctx->ticket, &ctx->list, + false, &ctx->duplicates); + if (!ret) + ctx->reserved = true; + else { + pr_err("Failed to reserve buffers in ttm\n"); + kfree(ctx->vm_pd); + ctx->vm_pd = NULL; + } + + return ret; +} + +/** + * reserve_bo_and_cond_vms - reserve a BO and some VMs conditionally + * @mem: KFD BO structure. + * @vm: the VM to reserve. If NULL, then all VMs associated with the BO + * is used. Otherwise, a single VM associated with the BO. + * @map_type: the mapping status that will be used to filter the VMs. + * @ctx: the struct that will be used in unreserve_bo_and_vms(). + * + * Returns 0 for success, negative for failure. + */ +static int reserve_bo_and_cond_vms(struct kgd_mem *mem, + struct amdgpu_vm *vm, enum bo_vm_match map_type, + struct bo_vm_reservation_context *ctx) +{ + struct amdgpu_bo *bo = mem->bo; + struct kfd_bo_va_list *entry; + unsigned int i; + int ret; + + ctx->reserved = false; + ctx->n_vms = 0; + ctx->vm_pd = NULL; + ctx->sync = &mem->sync; + + INIT_LIST_HEAD(&ctx->list); + INIT_LIST_HEAD(&ctx->duplicates); + + list_for_each_entry(entry, &mem->bo_va_list, bo_list) { + if ((vm && vm != entry->bo_va->base.vm) || + (entry->is_mapped != map_type + && map_type != BO_VM_ALL)) + continue; + + ctx->n_vms++; + } + + if (ctx->n_vms != 0) { + ctx->vm_pd = kcalloc(ctx->n_vms, sizeof(*ctx->vm_pd), + GFP_KERNEL); + if (!ctx->vm_pd) + return -ENOMEM; + } + + ctx->kfd_bo.robj = bo; + ctx->kfd_bo.priority = 0; + ctx->kfd_bo.tv.bo = &bo->tbo; + ctx->kfd_bo.tv.shared = true; + ctx->kfd_bo.user_pages = NULL; + list_add(&ctx->kfd_bo.tv.head, &ctx->list); + + i = 0; + list_for_each_entry(entry, &mem->bo_va_list, bo_list) { + if ((vm && vm != entry->bo_va->base.vm) || + (entry->is_mapped != map_type + && map_type != BO_VM_ALL)) + continue; + + amdgpu_vm_get_pd_bo(entry->bo_va->base.vm, &ctx->list, + &ctx->vm_pd[i]); + i++; + } + + ret = ttm_eu_reserve_buffers(&ctx->ticket, &ctx->list, + false, &ctx->duplicates); + if (!ret) + ctx->reserved = true; + else + pr_err("Failed to reserve buffers in ttm.\n"); + + if (ret) { + kfree(ctx->vm_pd); + ctx->vm_pd = NULL; + } + + return ret; +} + +/** + * unreserve_bo_and_vms - Unreserve BO and VMs from a reservation context + * @ctx: Reservation context to unreserve + * @wait: Optionally wait for a sync object representing pending VM updates + * @intr: Whether the wait is interruptible + * + * Also frees any resources allocated in + * reserve_bo_and_(cond_)vm(s). Returns the status from + * amdgpu_sync_wait. + */ +static int unreserve_bo_and_vms(struct bo_vm_reservation_context *ctx, + bool wait, bool intr) +{ + int ret = 0; + + if (wait) + ret = amdgpu_sync_wait(ctx->sync, intr); + + if (ctx->reserved) + ttm_eu_backoff_reservation(&ctx->ticket, &ctx->list); + kfree(ctx->vm_pd); + + ctx->sync = NULL; + + ctx->reserved = false; + ctx->vm_pd = NULL; + + return ret; +} + +static int unmap_bo_from_gpuvm(struct amdgpu_device *adev, + struct kfd_bo_va_list *entry, + struct amdgpu_sync *sync) +{ + struct amdgpu_bo_va *bo_va = entry->bo_va; + struct amdgpu_vm *vm = bo_va->base.vm; + struct amdkfd_vm *kvm = container_of(vm, struct amdkfd_vm, base); + struct amdgpu_bo *pd = vm->root.base.bo; + + /* Remove eviction fence from PD (and thereby from PTs too as + * they share the resv. object). Otherwise during PT update + * job (see amdgpu_vm_bo_update_mapping), eviction fence would + * get added to job->sync object and job execution would + * trigger the eviction fence. + */ + amdgpu_amdkfd_remove_eviction_fence(pd, + kvm->process_info->eviction_fence, + NULL, NULL); + amdgpu_vm_bo_unmap(adev, bo_va, entry->va); + + amdgpu_vm_clear_freed(adev, vm, &bo_va->last_pt_update); + + /* Add the eviction fence back */ + amdgpu_bo_fence(pd, &kvm->process_info->eviction_fence->base, true); + + sync_vm_fence(adev, sync, bo_va->last_pt_update); + + return 0; +} + +static int update_gpuvm_pte(struct amdgpu_device *adev, + struct kfd_bo_va_list *entry, + struct amdgpu_sync *sync) +{ + int ret; + struct amdgpu_vm *vm; + struct amdgpu_bo_va *bo_va; + struct amdgpu_bo *bo; + + bo_va = entry->bo_va; + vm = bo_va->base.vm; + bo = bo_va->base.bo; + + /* Update the page tables */ + ret = amdgpu_vm_bo_update(adev, bo_va, false); + if (ret) { + pr_err("amdgpu_vm_bo_update failed\n"); + return ret; + } + + return sync_vm_fence(adev, sync, bo_va->last_pt_update); +} + +static int map_bo_to_gpuvm(struct amdgpu_device *adev, + struct kfd_bo_va_list *entry, struct amdgpu_sync *sync) +{ + int ret; + + /* Set virtual address for the allocation */ + ret = amdgpu_vm_bo_map(adev, entry->bo_va, entry->va, 0, + amdgpu_bo_size(entry->bo_va->base.bo), + entry->pte_flags); + if (ret) { + pr_err("Failed to map VA 0x%llx in vm. ret %d\n", + entry->va, ret); + return ret; + } + + ret = update_gpuvm_pte(adev, entry, sync); + if (ret) { + pr_err("update_gpuvm_pte() failed\n"); + goto update_gpuvm_pte_failed; + } + + return 0; + +update_gpuvm_pte_failed: + unmap_bo_from_gpuvm(adev, entry, sync); + return ret; +} + +static int process_validate_vms(struct amdkfd_process_info *process_info) +{ + struct amdkfd_vm *peer_vm; + int ret; + + list_for_each_entry(peer_vm, &process_info->vm_list_head, + vm_list_node) { + ret = vm_validate_pt_pd_bos(peer_vm); + if (ret) + return ret; + } + + return 0; +} + +static int process_update_pds(struct amdkfd_process_info *process_info, + struct amdgpu_sync *sync) +{ + struct amdkfd_vm *peer_vm; + int ret; + + list_for_each_entry(peer_vm, &process_info->vm_list_head, + vm_list_node) { + ret = vm_update_pds(&peer_vm->base, sync); + if (ret) + return ret; + } + + return 0; +} + +int amdgpu_amdkfd_gpuvm_create_process_vm(struct kgd_dev *kgd, void **vm, + void **process_info, + struct dma_fence **ef) +{ + int ret; + struct amdkfd_vm *new_vm; + struct amdkfd_process_info *info; + struct amdgpu_device *adev = get_amdgpu_device(kgd); + + new_vm = kzalloc(sizeof(*new_vm), GFP_KERNEL); + if (!new_vm) + return -ENOMEM; + + /* Initialize the VM context, allocate the page directory and zero it */ + ret = amdgpu_vm_init(adev, &new_vm->base, AMDGPU_VM_CONTEXT_COMPUTE, 0); + if (ret) { + pr_err("Failed init vm ret %d\n", ret); + goto vm_init_fail; + } + new_vm->adev = adev; + + if (!*process_info) { + info = kzalloc(sizeof(*info), GFP_KERNEL); + if (!info) { + ret = -ENOMEM; + goto alloc_process_info_fail; + } + + mutex_init(&info->lock); + INIT_LIST_HEAD(&info->vm_list_head); + INIT_LIST_HEAD(&info->kfd_bo_list); + + info->eviction_fence = + amdgpu_amdkfd_fence_create(dma_fence_context_alloc(1), + current->mm); + if (!info->eviction_fence) { + pr_err("Failed to create eviction fence\n"); + goto create_evict_fence_fail; + } + + *process_info = info; + *ef = dma_fence_get(&info->eviction_fence->base); + } + + new_vm->process_info = *process_info; + + mutex_lock(&new_vm->process_info->lock); + list_add_tail(&new_vm->vm_list_node, + &(new_vm->process_info->vm_list_head)); + new_vm->process_info->n_vms++; + mutex_unlock(&new_vm->process_info->lock); + + *vm = (void *) new_vm; + + pr_debug("Created process vm %p\n", *vm); + + return ret; + +create_evict_fence_fail: + mutex_destroy(&info->lock); + kfree(info); +alloc_process_info_fail: + amdgpu_vm_fini(adev, &new_vm->base); +vm_init_fail: + kfree(new_vm); + return ret; + +} + +void amdgpu_amdkfd_gpuvm_destroy_process_vm(struct kgd_dev *kgd, void *vm) +{ + struct amdgpu_device *adev = get_amdgpu_device(kgd); + struct amdkfd_vm *kfd_vm = (struct amdkfd_vm *) vm; + struct amdgpu_vm *avm = &kfd_vm->base; + struct amdgpu_bo *pd; + struct amdkfd_process_info *process_info; + + if (WARN_ON(!kgd || !vm)) + return; + + pr_debug("Destroying process vm %p\n", vm); + /* Release eviction fence from PD */ + pd = avm->root.base.bo; + amdgpu_bo_reserve(pd, false); + amdgpu_bo_fence(pd, NULL, false); + amdgpu_bo_unreserve(pd); + + process_info = kfd_vm->process_info; + + mutex_lock(&process_info->lock); + process_info->n_vms--; + list_del(&kfd_vm->vm_list_node); + mutex_unlock(&process_info->lock); + + /* Release per-process resources */ + if (!process_info->n_vms) { + WARN_ON(!list_empty(&process_info->kfd_bo_list)); + + dma_fence_put(&process_info->eviction_fence->base); + mutex_destroy(&process_info->lock); + kfree(process_info); + } + + /* Release the VM context */ + amdgpu_vm_fini(adev, avm); + kfree(vm); +} + +uint32_t amdgpu_amdkfd_gpuvm_get_process_page_dir(void *vm) +{ + struct amdkfd_vm *avm = (struct amdkfd_vm *)vm; + + return avm->pd_phys_addr >> AMDGPU_GPU_PAGE_SHIFT; +} + +int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu( + struct kgd_dev *kgd, uint64_t va, uint64_t size, + void *vm, struct kgd_mem **mem, + uint64_t *offset, uint32_t flags) +{ + struct amdgpu_device *adev = get_amdgpu_device(kgd); + struct amdkfd_vm *kfd_vm = (struct amdkfd_vm *)vm; + struct amdgpu_bo *bo; + int byte_align; + u32 alloc_domain; + u64 alloc_flags; + uint32_t mapping_flags; + int ret; + + /* + * Check on which domain to allocate BO + */ + if (flags & ALLOC_MEM_FLAGS_VRAM) { + alloc_domain = AMDGPU_GEM_DOMAIN_VRAM; + alloc_flags = AMDGPU_GEM_CREATE_VRAM_CLEARED; + alloc_flags |= (flags & ALLOC_MEM_FLAGS_PUBLIC) ? + AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED : + AMDGPU_GEM_CREATE_NO_CPU_ACCESS; + } else if (flags & ALLOC_MEM_FLAGS_GTT) { + alloc_domain = AMDGPU_GEM_DOMAIN_GTT; + alloc_flags = 0; + } else { + return -EINVAL; + } + + *mem = kzalloc(sizeof(struct kgd_mem), GFP_KERNEL); + if (!*mem) + return -ENOMEM; + INIT_LIST_HEAD(&(*mem)->bo_va_list); + mutex_init(&(*mem)->lock); + (*mem)->aql_queue = !!(flags & ALLOC_MEM_FLAGS_AQL_QUEUE_MEM); + + /* Workaround for AQL queue wraparound bug. Map the same + * memory twice. That means we only actually allocate half + * the memory. + */ + if ((*mem)->aql_queue) + size = size >> 1; + + /* Workaround for TLB bug on older VI chips */ + byte_align = (adev->family == AMDGPU_FAMILY_VI && + adev->asic_type != CHIP_FIJI && + adev->asic_type != CHIP_POLARIS10 && + adev->asic_type != CHIP_POLARIS11) ? + VI_BO_SIZE_ALIGN : 1; + + mapping_flags = AMDGPU_VM_PAGE_READABLE; + if (flags & ALLOC_MEM_FLAGS_WRITABLE) + mapping_flags |= AMDGPU_VM_PAGE_WRITEABLE; + if (flags & ALLOC_MEM_FLAGS_EXECUTABLE) + mapping_flags |= AMDGPU_VM_PAGE_EXECUTABLE; + if (flags & ALLOC_MEM_FLAGS_COHERENT) + mapping_flags |= AMDGPU_VM_MTYPE_UC; + else + mapping_flags |= AMDGPU_VM_MTYPE_NC; + (*mem)->mapping_flags = mapping_flags; + + amdgpu_sync_create(&(*mem)->sync); + + ret = amdgpu_amdkfd_reserve_system_mem_limit(adev, size, alloc_domain); + if (ret) { + pr_debug("Insufficient system memory\n"); + goto err_reserve_system_mem; + } + + pr_debug("\tcreate BO VA 0x%llx size 0x%llx domain %s\n", + va, size, domain_string(alloc_domain)); + + ret = amdgpu_bo_create(adev, size, byte_align, + alloc_domain, alloc_flags, ttm_bo_type_device, NULL, &bo); + if (ret) { + pr_debug("Failed to create BO on domain %s. ret %d\n", + domain_string(alloc_domain), ret); + goto err_bo_create; + } + bo->kfd_bo = *mem; + (*mem)->bo = bo; + + (*mem)->va = va; + (*mem)->domain = alloc_domain; + (*mem)->mapped_to_gpu_memory = 0; + (*mem)->process_info = kfd_vm->process_info; + add_kgd_mem_to_kfd_bo_list(*mem, kfd_vm->process_info); + + if (offset) + *offset = amdgpu_bo_mmap_offset(bo); + + return 0; + +err_bo_create: + unreserve_system_mem_limit(adev, size, alloc_domain); +err_reserve_system_mem: + mutex_destroy(&(*mem)->lock); + kfree(*mem); + return ret; +} + +int amdgpu_amdkfd_gpuvm_free_memory_of_gpu( + struct kgd_dev *kgd, struct kgd_mem *mem) +{ + struct amdkfd_process_info *process_info = mem->process_info; + unsigned long bo_size = mem->bo->tbo.mem.size; + struct kfd_bo_va_list *entry, *tmp; + struct bo_vm_reservation_context ctx; + struct ttm_validate_buffer *bo_list_entry; + int ret; + + mutex_lock(&mem->lock); + + if (mem->mapped_to_gpu_memory > 0) { + pr_debug("BO VA 0x%llx size 0x%lx is still mapped.\n", + mem->va, bo_size); + mutex_unlock(&mem->lock); + return -EBUSY; + } + + mutex_unlock(&mem->lock); + /* lock is not needed after this, since mem is unused and will + * be freed anyway + */ + + /* Make sure restore workers don't access the BO any more */ + bo_list_entry = &mem->validate_list; + mutex_lock(&process_info->lock); + list_del(&bo_list_entry->head); + mutex_unlock(&process_info->lock); + + ret = reserve_bo_and_cond_vms(mem, NULL, BO_VM_ALL, &ctx); + if (unlikely(ret)) + return ret; + + /* The eviction fence should be removed by the last unmap. + * TODO: Log an error condition if the bo still has the eviction fence + * attached + */ + amdgpu_amdkfd_remove_eviction_fence(mem->bo, + process_info->eviction_fence, + NULL, NULL); + pr_debug("Release VA 0x%llx - 0x%llx\n", mem->va, + mem->va + bo_size * (1 + mem->aql_queue)); + + /* Remove from VM internal data structures */ + list_for_each_entry_safe(entry, tmp, &mem->bo_va_list, bo_list) + remove_bo_from_vm((struct amdgpu_device *)entry->kgd_dev, + entry, bo_size); + + ret = unreserve_bo_and_vms(&ctx, false, false); + + /* Free the sync object */ + amdgpu_sync_free(&mem->sync); + + /* Free the BO*/ + amdgpu_bo_unref(&mem->bo); + mutex_destroy(&mem->lock); + kfree(mem); + + return ret; +} + +int amdgpu_amdkfd_gpuvm_map_memory_to_gpu( + struct kgd_dev *kgd, struct kgd_mem *mem, void *vm) +{ + struct amdgpu_device *adev = get_amdgpu_device(kgd); + struct amdkfd_vm *kfd_vm = (struct amdkfd_vm *)vm; + int ret; + struct amdgpu_bo *bo; + uint32_t domain; + struct kfd_bo_va_list *entry; + struct bo_vm_reservation_context ctx; + struct kfd_bo_va_list *bo_va_entry = NULL; + struct kfd_bo_va_list *bo_va_entry_aql = NULL; + unsigned long bo_size; + + /* Make sure restore is not running concurrently. + */ + mutex_lock(&mem->process_info->lock); + + mutex_lock(&mem->lock); + + bo = mem->bo; + + if (!bo) { + pr_err("Invalid BO when mapping memory to GPU\n"); + ret = -EINVAL; + goto out; + } + + domain = mem->domain; + bo_size = bo->tbo.mem.size; + + pr_debug("Map VA 0x%llx - 0x%llx to vm %p domain %s\n", + mem->va, + mem->va + bo_size * (1 + mem->aql_queue), + vm, domain_string(domain)); + + ret = reserve_bo_and_vm(mem, vm, &ctx); + if (unlikely(ret)) + goto out; + + if (check_if_add_bo_to_vm((struct amdgpu_vm *)vm, mem)) { + ret = add_bo_to_vm(adev, mem, (struct amdgpu_vm *)vm, false, + &bo_va_entry); + if (ret) + goto add_bo_to_vm_failed; + if (mem->aql_queue) { + ret = add_bo_to_vm(adev, mem, (struct amdgpu_vm *)vm, + true, &bo_va_entry_aql); + if (ret) + goto add_bo_to_vm_failed_aql; + } + } else { + ret = vm_validate_pt_pd_bos((struct amdkfd_vm *)vm); + if (unlikely(ret)) + goto add_bo_to_vm_failed; + } + + if (mem->mapped_to_gpu_memory == 0) { + /* Validate BO only once. The eviction fence gets added to BO + * the first time it is mapped. Validate will wait for all + * background evictions to complete. + */ + ret = amdgpu_amdkfd_bo_validate(bo, domain, true); + if (ret) { + pr_debug("Validate failed\n"); + goto map_bo_to_gpuvm_failed; + } + } + + list_for_each_entry(entry, &mem->bo_va_list, bo_list) { + if (entry->bo_va->base.vm == vm && !entry->is_mapped) { + pr_debug("\t map VA 0x%llx - 0x%llx in entry %p\n", + entry->va, entry->va + bo_size, + entry); + + ret = map_bo_to_gpuvm(adev, entry, ctx.sync); + if (ret) { + pr_err("Failed to map radeon bo to gpuvm\n"); + goto map_bo_to_gpuvm_failed; + } + + ret = vm_update_pds(vm, ctx.sync); + if (ret) { + pr_err("Failed to update page directories\n"); + goto map_bo_to_gpuvm_failed; + } + + entry->is_mapped = true; + mem->mapped_to_gpu_memory++; + pr_debug("\t INC mapping count %d\n", + mem->mapped_to_gpu_memory); + } + } + + if (!amdgpu_ttm_tt_get_usermm(bo->tbo.ttm) && !bo->pin_count) + amdgpu_bo_fence(bo, + &kfd_vm->process_info->eviction_fence->base, + true); + ret = unreserve_bo_and_vms(&ctx, false, false); + + goto out; + +map_bo_to_gpuvm_failed: + if (bo_va_entry_aql) + remove_bo_from_vm(adev, bo_va_entry_aql, bo_size); +add_bo_to_vm_failed_aql: + if (bo_va_entry) + remove_bo_from_vm(adev, bo_va_entry, bo_size); +add_bo_to_vm_failed: + unreserve_bo_and_vms(&ctx, false, false); +out: + mutex_unlock(&mem->process_info->lock); + mutex_unlock(&mem->lock); + return ret; +} + +int amdgpu_amdkfd_gpuvm_unmap_memory_from_gpu( + struct kgd_dev *kgd, struct kgd_mem *mem, void *vm) +{ + struct amdgpu_device *adev = get_amdgpu_device(kgd); + struct amdkfd_process_info *process_info = + ((struct amdkfd_vm *)vm)->process_info; + unsigned long bo_size = mem->bo->tbo.mem.size; + struct kfd_bo_va_list *entry; + struct bo_vm_reservation_context ctx; + int ret; + + mutex_lock(&mem->lock); + + ret = reserve_bo_and_cond_vms(mem, vm, BO_VM_MAPPED, &ctx); + if (unlikely(ret)) + goto out; + /* If no VMs were reserved, it means the BO wasn't actually mapped */ + if (ctx.n_vms == 0) { + ret = -EINVAL; + goto unreserve_out; + } + + ret = vm_validate_pt_pd_bos((struct amdkfd_vm *)vm); + if (unlikely(ret)) + goto unreserve_out; + + pr_debug("Unmap VA 0x%llx - 0x%llx from vm %p\n", + mem->va, + mem->va + bo_size * (1 + mem->aql_queue), + vm); + + list_for_each_entry(entry, &mem->bo_va_list, bo_list) { + if (entry->bo_va->base.vm == vm && entry->is_mapped) { + pr_debug("\t unmap VA 0x%llx - 0x%llx from entry %p\n", + entry->va, + entry->va + bo_size, + entry); + + ret = unmap_bo_from_gpuvm(adev, entry, ctx.sync); + if (ret == 0) { + entry->is_mapped = false; + } else { + pr_err("failed to unmap VA 0x%llx\n", + mem->va); + goto unreserve_out; + } + + mem->mapped_to_gpu_memory--; + pr_debug("\t DEC mapping count %d\n", + mem->mapped_to_gpu_memory); + } + } + + /* If BO is unmapped from all VMs, unfence it. It can be evicted if + * required. + */ + if (mem->mapped_to_gpu_memory == 0 && + !amdgpu_ttm_tt_get_usermm(mem->bo->tbo.ttm) && !mem->bo->pin_count) + amdgpu_amdkfd_remove_eviction_fence(mem->bo, + process_info->eviction_fence, + NULL, NULL); + +unreserve_out: + unreserve_bo_and_vms(&ctx, false, false); +out: + mutex_unlock(&mem->lock); + return ret; +} + +int amdgpu_amdkfd_gpuvm_sync_memory( + struct kgd_dev *kgd, struct kgd_mem *mem, bool intr) +{ + struct amdgpu_sync sync; + int ret; + + amdgpu_sync_create(&sync); + + mutex_lock(&mem->lock); + amdgpu_sync_clone(&mem->sync, &sync); + mutex_unlock(&mem->lock); + + ret = amdgpu_sync_wait(&sync, intr); + amdgpu_sync_free(&sync); + return ret; +} + +int amdgpu_amdkfd_gpuvm_map_gtt_bo_to_kernel(struct kgd_dev *kgd, + struct kgd_mem *mem, void **kptr, uint64_t *size) +{ + int ret; + struct amdgpu_bo *bo = mem->bo; + + if (amdgpu_ttm_tt_get_usermm(bo->tbo.ttm)) { + pr_err("userptr can't be mapped to kernel\n"); + return -EINVAL; + } + + /* delete kgd_mem from kfd_bo_list to avoid re-validating + * this BO in BO's restoring after eviction. + */ + mutex_lock(&mem->process_info->lock); + + ret = amdgpu_bo_reserve(bo, true); + if (ret) { + pr_err("Failed to reserve bo. ret %d\n", ret); + goto bo_reserve_failed; + } + + ret = amdgpu_bo_pin(bo, AMDGPU_GEM_DOMAIN_GTT, NULL); + if (ret) { + pr_err("Failed to pin bo. ret %d\n", ret); + goto pin_failed; + } + + ret = amdgpu_bo_kmap(bo, kptr); + if (ret) { + pr_err("Failed to map bo to kernel. ret %d\n", ret); + goto kmap_failed; + } + + amdgpu_amdkfd_remove_eviction_fence( + bo, mem->process_info->eviction_fence, NULL, NULL); + list_del_init(&mem->validate_list.head); + + if (size) + *size = amdgpu_bo_size(bo); + + amdgpu_bo_unreserve(bo); + + mutex_unlock(&mem->process_info->lock); + return 0; + +kmap_failed: + amdgpu_bo_unpin(bo); +pin_failed: + amdgpu_bo_unreserve(bo); +bo_reserve_failed: + mutex_unlock(&mem->process_info->lock); + + return ret; +} + +/** amdgpu_amdkfd_gpuvm_restore_process_bos - Restore all BOs for the given + * KFD process identified by process_info + * + * @process_info: amdkfd_process_info of the KFD process + * + * After memory eviction, restore thread calls this function. The function + * should be called when the Process is still valid. BO restore involves - + * + * 1. Release old eviction fence and create new one + * 2. Get two copies of PD BO list from all the VMs. Keep one copy as pd_list. + * 3 Use the second PD list and kfd_bo_list to create a list (ctx.list) of + * BOs that need to be reserved. + * 4. Reserve all the BOs + * 5. Validate of PD and PT BOs. + * 6. Validate all KFD BOs using kfd_bo_list and Map them and add new fence + * 7. Add fence to all PD and PT BOs. + * 8. Unreserve all BOs + */ +int amdgpu_amdkfd_gpuvm_restore_process_bos(void *info, struct dma_fence **ef) +{ + struct amdgpu_bo_list_entry *pd_bo_list; + struct amdkfd_process_info *process_info = info; + struct amdkfd_vm *peer_vm; + struct kgd_mem *mem; + struct bo_vm_reservation_context ctx; + struct amdgpu_amdkfd_fence *new_fence; + int ret = 0, i; + struct list_head duplicate_save; + struct amdgpu_sync sync_obj; + + INIT_LIST_HEAD(&duplicate_save); + INIT_LIST_HEAD(&ctx.list); + INIT_LIST_HEAD(&ctx.duplicates); + + pd_bo_list = kcalloc(process_info->n_vms, + sizeof(struct amdgpu_bo_list_entry), + GFP_KERNEL); + if (!pd_bo_list) + return -ENOMEM; + + i = 0; + mutex_lock(&process_info->lock); + list_for_each_entry(peer_vm, &process_info->vm_list_head, + vm_list_node) + amdgpu_vm_get_pd_bo(&peer_vm->base, &ctx.list, + &pd_bo_list[i++]); + + /* Reserve all BOs and page tables/directory. Add all BOs from + * kfd_bo_list to ctx.list + */ + list_for_each_entry(mem, &process_info->kfd_bo_list, + validate_list.head) { + + list_add_tail(&mem->resv_list.head, &ctx.list); + mem->resv_list.bo = mem->validate_list.bo; + mem->resv_list.shared = mem->validate_list.shared; + } + + ret = ttm_eu_reserve_buffers(&ctx.ticket, &ctx.list, + false, &duplicate_save); + if (ret) { + pr_debug("Memory eviction: TTM Reserve Failed. Try again\n"); + goto ttm_reserve_fail; + } + + amdgpu_sync_create(&sync_obj); + + /* Validate PDs and PTs */ + ret = process_validate_vms(process_info); + if (ret) + goto validate_map_fail; + + /* Wait for PD/PTs validate to finish */ + /* FIXME: I think this isn't needed */ + list_for_each_entry(peer_vm, &process_info->vm_list_head, + vm_list_node) { + struct amdgpu_bo *bo = peer_vm->base.root.base.bo; + + ttm_bo_wait(&bo->tbo, false, false); + } + + /* Validate BOs and map them to GPUVM (update VM page tables). */ + list_for_each_entry(mem, &process_info->kfd_bo_list, + validate_list.head) { + + struct amdgpu_bo *bo = mem->bo; + uint32_t domain = mem->domain; + struct kfd_bo_va_list *bo_va_entry; + + ret = amdgpu_amdkfd_bo_validate(bo, domain, false); + if (ret) { + pr_debug("Memory eviction: Validate BOs failed. Try again\n"); + goto validate_map_fail; + } + + list_for_each_entry(bo_va_entry, &mem->bo_va_list, + bo_list) { + ret = update_gpuvm_pte((struct amdgpu_device *) + bo_va_entry->kgd_dev, + bo_va_entry, + &sync_obj); + if (ret) { + pr_debug("Memory eviction: update PTE failed. Try again\n"); + goto validate_map_fail; + } + } + } + + /* Update page directories */ + ret = process_update_pds(process_info, &sync_obj); + if (ret) { + pr_debug("Memory eviction: update PDs failed. Try again\n"); + goto validate_map_fail; + } + + amdgpu_sync_wait(&sync_obj, false); + + /* Release old eviction fence and create new one, because fence only + * goes from unsignaled to signaled, fence cannot be reused. + * Use context and mm from the old fence. + */ + new_fence = amdgpu_amdkfd_fence_create( + process_info->eviction_fence->base.context, + process_info->eviction_fence->mm); + if (!new_fence) { + pr_err("Failed to create eviction fence\n"); + ret = -ENOMEM; + goto validate_map_fail; + } + dma_fence_put(&process_info->eviction_fence->base); + process_info->eviction_fence = new_fence; + *ef = dma_fence_get(&new_fence->base); + + /* Wait for validate to finish and attach new eviction fence */ + list_for_each_entry(mem, &process_info->kfd_bo_list, + validate_list.head) + ttm_bo_wait(&mem->bo->tbo, false, false); + list_for_each_entry(mem, &process_info->kfd_bo_list, + validate_list.head) + amdgpu_bo_fence(mem->bo, + &process_info->eviction_fence->base, true); + + /* Attach eviction fence to PD / PT BOs */ + list_for_each_entry(peer_vm, &process_info->vm_list_head, + vm_list_node) { + struct amdgpu_bo *bo = peer_vm->base.root.base.bo; + + amdgpu_bo_fence(bo, &process_info->eviction_fence->base, true); + } + +validate_map_fail: + ttm_eu_backoff_reservation(&ctx.ticket, &ctx.list); + amdgpu_sync_free(&sync_obj); +ttm_reserve_fail: + mutex_unlock(&process_info->lock); + kfree(pd_bo_list); + return ret; +} diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.c index ff8efd0f8fd5..a0f48cb9b8f0 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.c @@ -114,6 +114,9 @@ union igp_info { struct atom_integrated_system_info_v1_11 v11; }; +union umc_info { + struct atom_umc_info_v3_1 v31; +}; /* * Return vram width from integrated system info table, if available, * or 0 if not. @@ -143,6 +146,94 @@ int amdgpu_atomfirmware_get_vram_width(struct amdgpu_device *adev) return 0; } +static int convert_atom_mem_type_to_vram_type (struct amdgpu_device *adev, + int atom_mem_type) +{ + int vram_type; + + if (adev->flags & AMD_IS_APU) { + switch (atom_mem_type) { + case Ddr2MemType: + case LpDdr2MemType: + vram_type = AMDGPU_VRAM_TYPE_DDR2; + break; + case Ddr3MemType: + case LpDdr3MemType: + vram_type = AMDGPU_VRAM_TYPE_DDR3; + break; + case Ddr4MemType: + case LpDdr4MemType: + vram_type = AMDGPU_VRAM_TYPE_DDR4; + break; + default: + vram_type = AMDGPU_VRAM_TYPE_UNKNOWN; + break; + } + } else { + switch (atom_mem_type) { + case ATOM_DGPU_VRAM_TYPE_GDDR5: + vram_type = AMDGPU_VRAM_TYPE_GDDR5; + break; + case ATOM_DGPU_VRAM_TYPE_HBM: + vram_type = AMDGPU_VRAM_TYPE_HBM; + break; + default: + vram_type = AMDGPU_VRAM_TYPE_UNKNOWN; + break; + } + } + + return vram_type; +} +/* + * Return vram type from either integrated system info table + * or umc info table, if available, or 0 (TYPE_UNKNOWN) if not + */ +int amdgpu_atomfirmware_get_vram_type(struct amdgpu_device *adev) +{ + struct amdgpu_mode_info *mode_info = &adev->mode_info; + int index; + u16 data_offset, size; + union igp_info *igp_info; + union umc_info *umc_info; + u8 frev, crev; + u8 mem_type; + + if (adev->flags & AMD_IS_APU) + index = get_index_into_master_table(atom_master_list_of_data_tables_v2_1, + integratedsysteminfo); + else + index = get_index_into_master_table(atom_master_list_of_data_tables_v2_1, + umc_info); + if (amdgpu_atom_parse_data_header(mode_info->atom_context, + index, &size, + &frev, &crev, &data_offset)) { + if (adev->flags & AMD_IS_APU) { + igp_info = (union igp_info *) + (mode_info->atom_context->bios + data_offset); + switch (crev) { + case 11: + mem_type = igp_info->v11.memorytype; + return convert_atom_mem_type_to_vram_type(adev, mem_type); + default: + return 0; + } + } else { + umc_info = (union umc_info *) + (mode_info->atom_context->bios + data_offset); + switch (crev) { + case 1: + mem_type = umc_info->v31.vram_type; + return convert_atom_mem_type_to_vram_type(adev, mem_type); + default: + return 0; + } + } + } + + return 0; +} + union firmware_info { struct atom_firmware_info_v3_1 v31; }; @@ -151,10 +242,6 @@ union smu_info { struct atom_smu_info_v3_1 v31; }; -union umc_info { - struct atom_umc_info_v3_1 v31; -}; - int amdgpu_atomfirmware_get_clock_info(struct amdgpu_device *adev) { struct amdgpu_mode_info *mode_info = &adev->mode_info; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.h index 288b97e54347..7689c961c4ef 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.h @@ -28,6 +28,7 @@ bool amdgpu_atomfirmware_gpu_supports_virtualization(struct amdgpu_device *adev) void amdgpu_atomfirmware_scratch_regs_init(struct amdgpu_device *adev); int amdgpu_atomfirmware_allocate_fb_scratch(struct amdgpu_device *adev); int amdgpu_atomfirmware_get_vram_width(struct amdgpu_device *adev); +int amdgpu_atomfirmware_get_vram_type(struct amdgpu_device *adev); int amdgpu_atomfirmware_get_clock_info(struct amdgpu_device *adev); #endif diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_benchmark.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_benchmark.c index 2fb299afc12b..02b849be083b 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_benchmark.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_benchmark.c @@ -80,8 +80,8 @@ static void amdgpu_benchmark_move(struct amdgpu_device *adev, unsigned size, int time; n = AMDGPU_BENCHMARK_ITERATIONS; - r = amdgpu_bo_create(adev, size, PAGE_SIZE, true, sdomain, 0, NULL, - NULL, &sobj); + r = amdgpu_bo_create(adev, size, PAGE_SIZE,sdomain, 0, + ttm_bo_type_kernel, NULL, &sobj); if (r) { goto out_cleanup; } @@ -93,8 +93,8 @@ static void amdgpu_benchmark_move(struct amdgpu_device *adev, unsigned size, if (r) { goto out_cleanup; } - r = amdgpu_bo_create(adev, size, PAGE_SIZE, true, ddomain, 0, NULL, - NULL, &dobj); + r = amdgpu_bo_create(adev, size, PAGE_SIZE, ddomain, 0, + ttm_bo_type_kernel, NULL, &dobj); if (r) { goto out_cleanup; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c index dc3360b16bda..37098c68a645 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c @@ -24,7 +24,6 @@ #include <linux/list.h> #include <linux/slab.h> #include <linux/pci.h> -#include <linux/acpi.h> #include <drm/drmP.h> #include <linux/firmware.h> #include <drm/amdgpu_drm.h> @@ -42,152 +41,6 @@ struct amdgpu_cgs_device { struct amdgpu_device *adev = \ ((struct amdgpu_cgs_device *)cgs_device)->adev -static void *amdgpu_cgs_register_pp_handle(struct cgs_device *cgs_device, - int (*call_back_func)(struct amd_pp_init *, void **)) -{ - CGS_FUNC_ADEV; - struct amd_pp_init pp_init; - struct amd_powerplay *amd_pp; - - if (call_back_func == NULL) - return NULL; - - amd_pp = &(adev->powerplay); - pp_init.chip_family = adev->family; - pp_init.chip_id = adev->asic_type; - pp_init.pm_en = (amdgpu_dpm != 0 && !amdgpu_sriov_vf(adev)) ? true : false; - pp_init.feature_mask = amdgpu_pp_feature_mask; - pp_init.device = cgs_device; - if (call_back_func(&pp_init, &(amd_pp->pp_handle))) - return NULL; - - return adev->powerplay.pp_handle; -} - -static int amdgpu_cgs_alloc_gpu_mem(struct cgs_device *cgs_device, - enum cgs_gpu_mem_type type, - uint64_t size, uint64_t align, - cgs_handle_t *handle) -{ - CGS_FUNC_ADEV; - uint16_t flags = 0; - int ret = 0; - uint32_t domain = 0; - struct amdgpu_bo *obj; - - /* fail if the alignment is not a power of 2 */ - if (((align != 1) && (align & (align - 1))) - || size == 0 || align == 0) - return -EINVAL; - - - switch(type) { - case CGS_GPU_MEM_TYPE__VISIBLE_CONTIG_FB: - case CGS_GPU_MEM_TYPE__VISIBLE_FB: - flags = AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED | - AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS; - domain = AMDGPU_GEM_DOMAIN_VRAM; - break; - case CGS_GPU_MEM_TYPE__INVISIBLE_CONTIG_FB: - case CGS_GPU_MEM_TYPE__INVISIBLE_FB: - flags = AMDGPU_GEM_CREATE_NO_CPU_ACCESS | - AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS; - domain = AMDGPU_GEM_DOMAIN_VRAM; - break; - case CGS_GPU_MEM_TYPE__GART_CACHEABLE: - domain = AMDGPU_GEM_DOMAIN_GTT; - break; - case CGS_GPU_MEM_TYPE__GART_WRITECOMBINE: - flags = AMDGPU_GEM_CREATE_CPU_GTT_USWC; - domain = AMDGPU_GEM_DOMAIN_GTT; - break; - default: - return -EINVAL; - } - - - *handle = 0; - - ret = amdgpu_bo_create(adev, size, align, true, domain, flags, - NULL, NULL, &obj); - if (ret) { - DRM_ERROR("(%d) bo create failed\n", ret); - return ret; - } - *handle = (cgs_handle_t)obj; - - return ret; -} - -static int amdgpu_cgs_free_gpu_mem(struct cgs_device *cgs_device, cgs_handle_t handle) -{ - struct amdgpu_bo *obj = (struct amdgpu_bo *)handle; - - if (obj) { - int r = amdgpu_bo_reserve(obj, true); - if (likely(r == 0)) { - amdgpu_bo_kunmap(obj); - amdgpu_bo_unpin(obj); - amdgpu_bo_unreserve(obj); - } - amdgpu_bo_unref(&obj); - - } - return 0; -} - -static int amdgpu_cgs_gmap_gpu_mem(struct cgs_device *cgs_device, cgs_handle_t handle, - uint64_t *mcaddr) -{ - int r; - struct amdgpu_bo *obj = (struct amdgpu_bo *)handle; - - WARN_ON_ONCE(obj->placement.num_placement > 1); - - r = amdgpu_bo_reserve(obj, true); - if (unlikely(r != 0)) - return r; - r = amdgpu_bo_pin(obj, obj->preferred_domains, mcaddr); - amdgpu_bo_unreserve(obj); - return r; -} - -static int amdgpu_cgs_gunmap_gpu_mem(struct cgs_device *cgs_device, cgs_handle_t handle) -{ - int r; - struct amdgpu_bo *obj = (struct amdgpu_bo *)handle; - r = amdgpu_bo_reserve(obj, true); - if (unlikely(r != 0)) - return r; - r = amdgpu_bo_unpin(obj); - amdgpu_bo_unreserve(obj); - return r; -} - -static int amdgpu_cgs_kmap_gpu_mem(struct cgs_device *cgs_device, cgs_handle_t handle, - void **map) -{ - int r; - struct amdgpu_bo *obj = (struct amdgpu_bo *)handle; - r = amdgpu_bo_reserve(obj, true); - if (unlikely(r != 0)) - return r; - r = amdgpu_bo_kmap(obj, map); - amdgpu_bo_unreserve(obj); - return r; -} - -static int amdgpu_cgs_kunmap_gpu_mem(struct cgs_device *cgs_device, cgs_handle_t handle) -{ - int r; - struct amdgpu_bo *obj = (struct amdgpu_bo *)handle; - r = amdgpu_bo_reserve(obj, true); - if (unlikely(r != 0)) - return r; - amdgpu_bo_kunmap(obj); - amdgpu_bo_unreserve(obj); - return r; -} static uint32_t amdgpu_cgs_read_register(struct cgs_device *cgs_device, unsigned offset) { @@ -801,11 +654,6 @@ static int amdgpu_cgs_get_firmware_info(struct cgs_device *cgs_device, else strcpy(fw_name, "amdgpu/vega10_smc.bin"); break; - case CHIP_CARRIZO: - case CHIP_STONEY: - case CHIP_RAVEN: - adev->pm.fw_version = info->version; - return 0; default: DRM_ERROR("SMC firmware not supported\n"); return -EINVAL; @@ -857,61 +705,6 @@ static int amdgpu_cgs_is_virtualization_enabled(void *cgs_device) return amdgpu_sriov_vf(adev); } -static int amdgpu_cgs_query_system_info(struct cgs_device *cgs_device, - struct cgs_system_info *sys_info) -{ - CGS_FUNC_ADEV; - - if (NULL == sys_info) - return -ENODEV; - - if (sizeof(struct cgs_system_info) != sys_info->size) - return -ENODEV; - - switch (sys_info->info_id) { - case CGS_SYSTEM_INFO_ADAPTER_BDF_ID: - sys_info->value = adev->pdev->devfn | (adev->pdev->bus->number << 8); - break; - case CGS_SYSTEM_INFO_PCIE_GEN_INFO: - sys_info->value = adev->pm.pcie_gen_mask; - break; - case CGS_SYSTEM_INFO_PCIE_MLW: - sys_info->value = adev->pm.pcie_mlw_mask; - break; - case CGS_SYSTEM_INFO_PCIE_DEV: - sys_info->value = adev->pdev->device; - break; - case CGS_SYSTEM_INFO_PCIE_REV: - sys_info->value = adev->pdev->revision; - break; - case CGS_SYSTEM_INFO_CG_FLAGS: - sys_info->value = adev->cg_flags; - break; - case CGS_SYSTEM_INFO_PG_FLAGS: - sys_info->value = adev->pg_flags; - break; - case CGS_SYSTEM_INFO_GFX_CU_INFO: - sys_info->value = adev->gfx.cu_info.number; - break; - case CGS_SYSTEM_INFO_GFX_SE_INFO: - sys_info->value = adev->gfx.config.max_shader_engines; - break; - case CGS_SYSTEM_INFO_PCIE_SUB_SYS_ID: - sys_info->value = adev->pdev->subsystem_device; - break; - case CGS_SYSTEM_INFO_PCIE_SUB_SYS_VENDOR_ID: - sys_info->value = adev->pdev->subsystem_vendor; - break; - case CGS_SYSTEM_INFO_PCIE_BUS_DEVFN: - sys_info->value = adev->pdev->devfn; - break; - default: - return -ENODEV; - } - - return 0; -} - static int amdgpu_cgs_get_active_displays_info(struct cgs_device *cgs_device, struct cgs_display_info *info) { @@ -982,235 +775,7 @@ static int amdgpu_cgs_notify_dpm_enabled(struct cgs_device *cgs_device, bool ena return 0; } -/** \brief evaluate acpi namespace object, handle or pathname must be valid - * \param cgs_device - * \param info input/output arguments for the control method - * \return status - */ - -#if defined(CONFIG_ACPI) -static int amdgpu_cgs_acpi_eval_object(struct cgs_device *cgs_device, - struct cgs_acpi_method_info *info) -{ - CGS_FUNC_ADEV; - acpi_handle handle; - struct acpi_object_list input; - struct acpi_buffer output = { ACPI_ALLOCATE_BUFFER, NULL }; - union acpi_object *params, *obj; - uint8_t name[5] = {'\0'}; - struct cgs_acpi_method_argument *argument; - uint32_t i, count; - acpi_status status; - int result; - - handle = ACPI_HANDLE(&adev->pdev->dev); - if (!handle) - return -ENODEV; - - memset(&input, 0, sizeof(struct acpi_object_list)); - - /* validate input info */ - if (info->size != sizeof(struct cgs_acpi_method_info)) - return -EINVAL; - - input.count = info->input_count; - if (info->input_count > 0) { - if (info->pinput_argument == NULL) - return -EINVAL; - argument = info->pinput_argument; - for (i = 0; i < info->input_count; i++) { - if (((argument->type == ACPI_TYPE_STRING) || - (argument->type == ACPI_TYPE_BUFFER)) && - (argument->pointer == NULL)) - return -EINVAL; - argument++; - } - } - - if (info->output_count > 0) { - if (info->poutput_argument == NULL) - return -EINVAL; - argument = info->poutput_argument; - for (i = 0; i < info->output_count; i++) { - if (((argument->type == ACPI_TYPE_STRING) || - (argument->type == ACPI_TYPE_BUFFER)) - && (argument->pointer == NULL)) - return -EINVAL; - argument++; - } - } - - /* The path name passed to acpi_evaluate_object should be null terminated */ - if ((info->field & CGS_ACPI_FIELD_METHOD_NAME) != 0) { - strncpy(name, (char *)&(info->name), sizeof(uint32_t)); - name[4] = '\0'; - } - - /* parse input parameters */ - if (input.count > 0) { - input.pointer = params = - kzalloc(sizeof(union acpi_object) * input.count, GFP_KERNEL); - if (params == NULL) - return -EINVAL; - - argument = info->pinput_argument; - - for (i = 0; i < input.count; i++) { - params->type = argument->type; - switch (params->type) { - case ACPI_TYPE_INTEGER: - params->integer.value = argument->value; - break; - case ACPI_TYPE_STRING: - params->string.length = argument->data_length; - params->string.pointer = argument->pointer; - break; - case ACPI_TYPE_BUFFER: - params->buffer.length = argument->data_length; - params->buffer.pointer = argument->pointer; - break; - default: - break; - } - params++; - argument++; - } - } - - /* parse output info */ - count = info->output_count; - argument = info->poutput_argument; - - /* evaluate the acpi method */ - status = acpi_evaluate_object(handle, name, &input, &output); - - if (ACPI_FAILURE(status)) { - result = -EIO; - goto free_input; - } - - /* return the output info */ - obj = output.pointer; - - if (count > 1) { - if ((obj->type != ACPI_TYPE_PACKAGE) || - (obj->package.count != count)) { - result = -EIO; - goto free_obj; - } - params = obj->package.elements; - } else - params = obj; - - if (params == NULL) { - result = -EIO; - goto free_obj; - } - - for (i = 0; i < count; i++) { - if (argument->type != params->type) { - result = -EIO; - goto free_obj; - } - switch (params->type) { - case ACPI_TYPE_INTEGER: - argument->value = params->integer.value; - break; - case ACPI_TYPE_STRING: - if ((params->string.length != argument->data_length) || - (params->string.pointer == NULL)) { - result = -EIO; - goto free_obj; - } - strncpy(argument->pointer, - params->string.pointer, - params->string.length); - break; - case ACPI_TYPE_BUFFER: - if (params->buffer.pointer == NULL) { - result = -EIO; - goto free_obj; - } - memcpy(argument->pointer, - params->buffer.pointer, - argument->data_length); - break; - default: - break; - } - argument++; - params++; - } - - result = 0; -free_obj: - kfree(obj); -free_input: - kfree((void *)input.pointer); - return result; -} -#else -static int amdgpu_cgs_acpi_eval_object(struct cgs_device *cgs_device, - struct cgs_acpi_method_info *info) -{ - return -EIO; -} -#endif - -static int amdgpu_cgs_call_acpi_method(struct cgs_device *cgs_device, - uint32_t acpi_method, - uint32_t acpi_function, - void *pinput, void *poutput, - uint32_t output_count, - uint32_t input_size, - uint32_t output_size) -{ - struct cgs_acpi_method_argument acpi_input[2] = { {0}, {0} }; - struct cgs_acpi_method_argument acpi_output = {0}; - struct cgs_acpi_method_info info = {0}; - - acpi_input[0].type = CGS_ACPI_TYPE_INTEGER; - acpi_input[0].data_length = sizeof(uint32_t); - acpi_input[0].value = acpi_function; - - acpi_input[1].type = CGS_ACPI_TYPE_BUFFER; - acpi_input[1].data_length = input_size; - acpi_input[1].pointer = pinput; - - acpi_output.type = CGS_ACPI_TYPE_BUFFER; - acpi_output.data_length = output_size; - acpi_output.pointer = poutput; - - info.size = sizeof(struct cgs_acpi_method_info); - info.field = CGS_ACPI_FIELD_METHOD_NAME | CGS_ACPI_FIELD_INPUT_ARGUMENT_COUNT; - info.input_count = 2; - info.name = acpi_method; - info.pinput_argument = acpi_input; - info.output_count = output_count; - info.poutput_argument = &acpi_output; - - return amdgpu_cgs_acpi_eval_object(cgs_device, &info); -} - -static int amdgpu_cgs_set_temperature_range(struct cgs_device *cgs_device, - int min_temperature, - int max_temperature) -{ - CGS_FUNC_ADEV; - - adev->pm.dpm.thermal.min_temp = min_temperature; - adev->pm.dpm.thermal.max_temp = max_temperature; - - return 0; -} - static const struct cgs_ops amdgpu_cgs_ops = { - .alloc_gpu_mem = amdgpu_cgs_alloc_gpu_mem, - .free_gpu_mem = amdgpu_cgs_free_gpu_mem, - .gmap_gpu_mem = amdgpu_cgs_gmap_gpu_mem, - .gunmap_gpu_mem = amdgpu_cgs_gunmap_gpu_mem, - .kmap_gpu_mem = amdgpu_cgs_kmap_gpu_mem, - .kunmap_gpu_mem = amdgpu_cgs_kunmap_gpu_mem, .read_register = amdgpu_cgs_read_register, .write_register = amdgpu_cgs_write_register, .read_ind_register = amdgpu_cgs_read_ind_register, @@ -1225,13 +790,9 @@ static const struct cgs_ops amdgpu_cgs_ops = { .set_clockgating_state = amdgpu_cgs_set_clockgating_state, .get_active_displays_info = amdgpu_cgs_get_active_displays_info, .notify_dpm_enabled = amdgpu_cgs_notify_dpm_enabled, - .call_acpi_method = amdgpu_cgs_call_acpi_method, - .query_system_info = amdgpu_cgs_query_system_info, .is_virtualization_enabled = amdgpu_cgs_is_virtualization_enabled, .enter_safe_mode = amdgpu_cgs_enter_safe_mode, .lock_grbm_idx = amdgpu_cgs_lock_grbm_idx, - .register_pp_handle = amdgpu_cgs_register_pp_handle, - .set_temperature_range = amdgpu_cgs_set_temperature_range, }; static const struct cgs_os_ops amdgpu_cgs_os_ops = { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c index ffc1f6f46913..9da8d5802980 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c @@ -69,25 +69,18 @@ void amdgpu_connector_hotplug(struct drm_connector *connector) /* don't do anything if sink is not display port, i.e., * passive dp->(dvi|hdmi) adaptor */ - if (dig_connector->dp_sink_type == CONNECTOR_OBJECT_ID_DISPLAYPORT) { - int saved_dpms = connector->dpms; - /* Only turn off the display if it's physically disconnected */ - if (!amdgpu_display_hpd_sense(adev, amdgpu_connector->hpd.hpd)) { - drm_helper_connector_dpms(connector, DRM_MODE_DPMS_OFF); - } else if (amdgpu_atombios_dp_needs_link_train(amdgpu_connector)) { - /* Don't try to start link training before we - * have the dpcd */ - if (amdgpu_atombios_dp_get_dpcd(amdgpu_connector)) - return; - - /* set it to OFF so that drm_helper_connector_dpms() - * won't return immediately since the current state - * is ON at this point. - */ - connector->dpms = DRM_MODE_DPMS_OFF; - drm_helper_connector_dpms(connector, DRM_MODE_DPMS_ON); - } - connector->dpms = saved_dpms; + if (dig_connector->dp_sink_type == CONNECTOR_OBJECT_ID_DISPLAYPORT && + amdgpu_display_hpd_sense(adev, amdgpu_connector->hpd.hpd) && + amdgpu_atombios_dp_needs_link_train(amdgpu_connector)) { + /* Don't start link training before we have the DPCD */ + if (amdgpu_atombios_dp_get_dpcd(amdgpu_connector)) + return; + + /* Turn the connector off and back on immediately, which + * will trigger link training + */ + drm_helper_connector_dpms(connector, DRM_MODE_DPMS_OFF); + drm_helper_connector_dpms(connector, DRM_MODE_DPMS_ON); } } } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c index eaa3cb0c3ad1..dc34b50e6b29 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c @@ -346,8 +346,8 @@ static int amdgpu_cs_bo_validate(struct amdgpu_cs_parser *p, struct ttm_operation_ctx ctx = { .interruptible = true, .no_wait_gpu = false, - .allow_reserved_eviction = false, - .resv = bo->tbo.resv + .resv = bo->tbo.resv, + .flags = 0 }; uint32_t domain; int r; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c index ee76b468774a..369beb5041a2 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c @@ -767,10 +767,21 @@ static int amdgpu_debugfs_evict_vram(struct seq_file *m, void *data) return 0; } +static int amdgpu_debugfs_evict_gtt(struct seq_file *m, void *data) +{ + struct drm_info_node *node = (struct drm_info_node *)m->private; + struct drm_device *dev = node->minor->dev; + struct amdgpu_device *adev = dev->dev_private; + + seq_printf(m, "(%d)\n", ttm_bo_evict_mm(&adev->mman.bdev, TTM_PL_TT)); + return 0; +} + static const struct drm_info_list amdgpu_debugfs_list[] = { {"amdgpu_vbios", amdgpu_debugfs_get_vbios_dump}, {"amdgpu_test_ib", &amdgpu_debugfs_test_ib}, - {"amdgpu_evict_vram", &amdgpu_debugfs_evict_vram} + {"amdgpu_evict_vram", &amdgpu_debugfs_evict_vram}, + {"amdgpu_evict_gtt", &amdgpu_debugfs_evict_gtt}, }; int amdgpu_debugfs_init(struct amdgpu_device *adev) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index d2a5f48c5767..690cf77b950e 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -87,6 +87,8 @@ static const char *amdgpu_asic_name[] = { "LAST", }; +static void amdgpu_device_get_pcie_info(struct amdgpu_device *adev); + bool amdgpu_device_is_px(struct drm_device *dev) { struct amdgpu_device *adev = dev->dev_private; @@ -121,6 +123,32 @@ uint32_t amdgpu_mm_rreg(struct amdgpu_device *adev, uint32_t reg, return ret; } +/* + * MMIO register read with bytes helper functions + * @offset:bytes offset from MMIO start + * +*/ + +uint8_t amdgpu_mm_rreg8(struct amdgpu_device *adev, uint32_t offset) { + if (offset < adev->rmmio_size) + return (readb(adev->rmmio + offset)); + BUG(); +} + +/* + * MMIO register write with bytes helper functions + * @offset:bytes offset from MMIO start + * @value: the value want to be written to the register + * +*/ +void amdgpu_mm_wreg8(struct amdgpu_device *adev, uint32_t offset, uint8_t value) { + if (offset < adev->rmmio_size) + writeb(value, adev->rmmio + offset); + else + BUG(); +} + + void amdgpu_mm_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v, uint32_t acc_flags) { @@ -492,7 +520,7 @@ static int amdgpu_device_wb_init(struct amdgpu_device *adev) memset(&adev->wb.used, 0, sizeof(adev->wb.used)); /* clear wb memory */ - memset((char *)adev->wb.wb, 0, AMDGPU_MAX_WB * sizeof(uint32_t)); + memset((char *)adev->wb.wb, 0, AMDGPU_MAX_WB * sizeof(uint32_t) * 8); } return 0; @@ -530,8 +558,9 @@ int amdgpu_device_wb_get(struct amdgpu_device *adev, u32 *wb) */ void amdgpu_device_wb_free(struct amdgpu_device *adev, u32 wb) { + wb >>= 3; if (wb < adev->wb.num_wb) - __clear_bit(wb >> 3, adev->wb.used); + __clear_bit(wb, adev->wb.used); } /** @@ -829,6 +858,8 @@ static void amdgpu_device_check_arguments(struct amdgpu_device *adev) dev_warn(adev->dev, "lockup_timeout msut be > 0, adjusting to 10000\n"); amdgpu_lockup_timeout = 10000; } + + adev->firmware.load_type = amdgpu_ucode_get_load_type(adev, amdgpu_fw_load_type); } /** @@ -1386,7 +1417,8 @@ static int amdgpu_device_ip_late_set_cg_state(struct amdgpu_device *adev) continue; /* skip CG for VCE/UVD, it's handled specially */ if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_UVD && - adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCE) { + adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCE && + adev->ip_blocks[i].version->funcs->set_clockgating_state) { /* enable clockgating to save power */ r = adev->ip_blocks[i].version->funcs->set_clockgating_state((void *)adev, AMD_CG_STATE_GATE); @@ -1435,7 +1467,8 @@ static int amdgpu_device_ip_fini(struct amdgpu_device *adev) for (i = 0; i < adev->num_ip_blocks; i++) { if (!adev->ip_blocks[i].status.hw) continue; - if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC) { + if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC && + adev->ip_blocks[i].version->funcs->set_clockgating_state) { /* ungate blocks before hw fini so that we can shutdown the blocks safely */ r = adev->ip_blocks[i].version->funcs->set_clockgating_state((void *)adev, AMD_CG_STATE_UNGATE); @@ -1458,11 +1491,6 @@ static int amdgpu_device_ip_fini(struct amdgpu_device *adev) for (i = adev->num_ip_blocks - 1; i >= 0; i--) { if (!adev->ip_blocks[i].status.hw) continue; - if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC) { - amdgpu_free_static_csa(adev); - amdgpu_device_wb_fini(adev); - amdgpu_device_vram_scratch_fini(adev); - } if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_UVD && adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCE) { @@ -1492,6 +1520,13 @@ static int amdgpu_device_ip_fini(struct amdgpu_device *adev) for (i = adev->num_ip_blocks - 1; i >= 0; i--) { if (!adev->ip_blocks[i].status.sw) continue; + + if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC) { + amdgpu_free_static_csa(adev); + amdgpu_device_wb_fini(adev); + amdgpu_device_vram_scratch_fini(adev); + } + r = adev->ip_blocks[i].version->funcs->sw_fini((void *)adev); /* XXX handle errors */ if (r) { @@ -1542,7 +1577,8 @@ int amdgpu_device_ip_suspend(struct amdgpu_device *adev) if (!adev->ip_blocks[i].status.valid) continue; /* ungate blocks so that suspend can properly shut them down */ - if (i != AMD_IP_BLOCK_TYPE_SMC) { + if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_SMC && + adev->ip_blocks[i].version->funcs->set_clockgating_state) { r = adev->ip_blocks[i].version->funcs->set_clockgating_state((void *)adev, AMD_CG_STATE_UNGATE); if (r) { @@ -1588,6 +1624,8 @@ static int amdgpu_device_ip_reinit_early_sriov(struct amdgpu_device *adev) r = block->version->funcs->hw_init(adev); DRM_INFO("RE-INIT: %s %s\n", block->version->funcs->name, r?"failed":"successed"); + if (r) + return r; } } @@ -1621,6 +1659,8 @@ static int amdgpu_device_ip_reinit_late_sriov(struct amdgpu_device *adev) r = block->version->funcs->hw_init(adev); DRM_INFO("RE-INIT: %s %s\n", block->version->funcs->name, r?"failed":"successed"); + if (r) + return r; } } @@ -1871,6 +1911,8 @@ int amdgpu_device_init(struct amdgpu_device *adev, if (adev->rio_mem == NULL) DRM_INFO("PCI I/O BAR is not found.\n"); + amdgpu_device_get_pcie_info(adev); + /* early init functions */ r = amdgpu_device_ip_early_init(adev); if (r) @@ -2079,6 +2121,7 @@ void amdgpu_device_fini(struct amdgpu_device *adev) amdgpu_ib_pool_fini(adev); amdgpu_fence_driver_fini(adev); + amdgpu_pm_sysfs_fini(adev); amdgpu_fbdev_fini(adev); r = amdgpu_device_ip_fini(adev); if (adev->firmware.gpu_info_fw) { @@ -2107,7 +2150,6 @@ void amdgpu_device_fini(struct amdgpu_device *adev) iounmap(adev->rmmio); adev->rmmio = NULL; amdgpu_device_doorbell_fini(adev); - amdgpu_pm_sysfs_fini(adev); amdgpu_debugfs_regs_cleanup(adev); } @@ -2467,17 +2509,71 @@ err: return r; } +static int amdgpu_device_handle_vram_lost(struct amdgpu_device *adev) +{ + struct amdgpu_ring *ring = adev->mman.buffer_funcs_ring; + struct amdgpu_bo *bo, *tmp; + struct dma_fence *fence = NULL, *next = NULL; + long r = 1; + int i = 0; + long tmo; + + if (amdgpu_sriov_runtime(adev)) + tmo = msecs_to_jiffies(amdgpu_lockup_timeout); + else + tmo = msecs_to_jiffies(100); + + DRM_INFO("recover vram bo from shadow start\n"); + mutex_lock(&adev->shadow_list_lock); + list_for_each_entry_safe(bo, tmp, &adev->shadow_list, shadow_list) { + next = NULL; + amdgpu_device_recover_vram_from_shadow(adev, ring, bo, &next); + if (fence) { + r = dma_fence_wait_timeout(fence, false, tmo); + if (r == 0) + pr_err("wait fence %p[%d] timeout\n", fence, i); + else if (r < 0) + pr_err("wait fence %p[%d] interrupted\n", fence, i); + if (r < 1) { + dma_fence_put(fence); + fence = next; + break; + } + i++; + } + + dma_fence_put(fence); + fence = next; + } + mutex_unlock(&adev->shadow_list_lock); + + if (fence) { + r = dma_fence_wait_timeout(fence, false, tmo); + if (r == 0) + pr_err("wait fence %p[%d] timeout\n", fence, i); + else if (r < 0) + pr_err("wait fence %p[%d] interrupted\n", fence, i); + + } + dma_fence_put(fence); + + if (r > 0) + DRM_INFO("recover vram bo from shadow done\n"); + else + DRM_ERROR("recover vram bo from shadow failed\n"); + + return (r > 0?0:1); +} + /* * amdgpu_device_reset - reset ASIC/GPU for bare-metal or passthrough * * @adev: amdgpu device pointer - * @reset_flags: output param tells caller the reset result * * attempt to do soft-reset or full-reset and reinitialize Asic * return 0 means successed otherwise failed */ -static int amdgpu_device_reset(struct amdgpu_device *adev, - uint64_t* reset_flags) +static int amdgpu_device_reset(struct amdgpu_device *adev) { bool need_full_reset, vram_lost = 0; int r; @@ -2492,7 +2588,6 @@ static int amdgpu_device_reset(struct amdgpu_device *adev, DRM_INFO("soft reset failed, will fallback to full reset!\n"); need_full_reset = true; } - } if (need_full_reset) { @@ -2541,13 +2636,8 @@ out: } } - if (reset_flags) { - if (vram_lost) - (*reset_flags) |= AMDGPU_RESET_INFO_VRAM_LOST; - - if (need_full_reset) - (*reset_flags) |= AMDGPU_RESET_INFO_FULLRESET; - } + if (!r && ((need_full_reset && !(adev->flags & AMD_IS_APU)) || vram_lost)) + r = amdgpu_device_handle_vram_lost(adev); return r; } @@ -2556,14 +2646,11 @@ out: * amdgpu_device_reset_sriov - reset ASIC for SR-IOV vf * * @adev: amdgpu device pointer - * @reset_flags: output param tells caller the reset result * * do VF FLR and reinitialize Asic * return 0 means successed otherwise failed */ -static int amdgpu_device_reset_sriov(struct amdgpu_device *adev, - uint64_t *reset_flags, - bool from_hypervisor) +static int amdgpu_device_reset_sriov(struct amdgpu_device *adev, bool from_hypervisor) { int r; @@ -2584,28 +2671,20 @@ static int amdgpu_device_reset_sriov(struct amdgpu_device *adev, /* now we are okay to resume SMC/CP/SDMA */ r = amdgpu_device_ip_reinit_late_sriov(adev); + amdgpu_virt_release_full_gpu(adev, true); if (r) goto error; amdgpu_irq_gpu_reset_resume_helper(adev); r = amdgpu_ib_ring_tests(adev); - if (r) - dev_err(adev->dev, "[GPU_RESET] ib ring test failed (%d).\n", r); -error: - /* release full control of GPU after ib test */ - amdgpu_virt_release_full_gpu(adev, true); - - if (reset_flags) { - if (adev->virt.gim_feature & AMDGIM_FEATURE_GIM_FLR_VRAMLOST) { - (*reset_flags) |= AMDGPU_RESET_INFO_VRAM_LOST; - atomic_inc(&adev->vram_lost_counter); - } - - /* VF FLR or hotlink reset is always full-reset */ - (*reset_flags) |= AMDGPU_RESET_INFO_FULLRESET; + if (!r && adev->virt.gim_feature & AMDGIM_FEATURE_GIM_FLR_VRAMLOST) { + atomic_inc(&adev->vram_lost_counter); + r = amdgpu_device_handle_vram_lost(adev); } +error: + return r; } @@ -2623,7 +2702,6 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev, struct amdgpu_job *job, bool force) { struct drm_atomic_state *state = NULL; - uint64_t reset_flags = 0; int i, r, resched; if (!force && !amdgpu_device_ip_check_soft_reset(adev)) { @@ -2645,22 +2723,23 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev, /* block TTM */ resched = ttm_bo_lock_delayed_workqueue(&adev->mman.bdev); + /* store modesetting */ if (amdgpu_device_has_dc_support(adev)) state = drm_atomic_helper_suspend(adev->ddev); - /* block scheduler */ + /* block all schedulers and reset given job's ring */ for (i = 0; i < AMDGPU_MAX_RINGS; ++i) { struct amdgpu_ring *ring = adev->rings[i]; if (!ring || !ring->sched.thread) continue; - /* only focus on the ring hit timeout if &job not NULL */ + kthread_park(ring->sched.thread); + if (job && job->ring->idx != i) continue; - kthread_park(ring->sched.thread); drm_sched_hw_job_reset(&ring->sched, &job->base); /* after all hw jobs are reset, hw fence is meaningless, so force_completion */ @@ -2668,68 +2747,24 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev, } if (amdgpu_sriov_vf(adev)) - r = amdgpu_device_reset_sriov(adev, &reset_flags, job ? false : true); + r = amdgpu_device_reset_sriov(adev, job ? false : true); else - r = amdgpu_device_reset(adev, &reset_flags); - - if (!r) { - if (((reset_flags & AMDGPU_RESET_INFO_FULLRESET) && !(adev->flags & AMD_IS_APU)) || - (reset_flags & AMDGPU_RESET_INFO_VRAM_LOST)) { - struct amdgpu_ring *ring = adev->mman.buffer_funcs_ring; - struct amdgpu_bo *bo, *tmp; - struct dma_fence *fence = NULL, *next = NULL; - - DRM_INFO("recover vram bo from shadow\n"); - mutex_lock(&adev->shadow_list_lock); - list_for_each_entry_safe(bo, tmp, &adev->shadow_list, shadow_list) { - next = NULL; - amdgpu_device_recover_vram_from_shadow(adev, ring, bo, &next); - if (fence) { - r = dma_fence_wait(fence, false); - if (r) { - WARN(r, "recovery from shadow isn't completed\n"); - break; - } - } - - dma_fence_put(fence); - fence = next; - } - mutex_unlock(&adev->shadow_list_lock); - if (fence) { - r = dma_fence_wait(fence, false); - if (r) - WARN(r, "recovery from shadow isn't completed\n"); - } - dma_fence_put(fence); - } - - for (i = 0; i < AMDGPU_MAX_RINGS; ++i) { - struct amdgpu_ring *ring = adev->rings[i]; + r = amdgpu_device_reset(adev); - if (!ring || !ring->sched.thread) - continue; + for (i = 0; i < AMDGPU_MAX_RINGS; ++i) { + struct amdgpu_ring *ring = adev->rings[i]; - /* only focus on the ring hit timeout if &job not NULL */ - if (job && job->ring->idx != i) - continue; + if (!ring || !ring->sched.thread) + continue; + /* only need recovery sched of the given job's ring + * or all rings (in the case @job is NULL) + * after above amdgpu_reset accomplished + */ + if ((!job || job->ring->idx == i) && !r) drm_sched_job_recovery(&ring->sched); - kthread_unpark(ring->sched.thread); - } - } else { - for (i = 0; i < AMDGPU_MAX_RINGS; ++i) { - struct amdgpu_ring *ring = adev->rings[i]; - if (!ring || !ring->sched.thread) - continue; - - /* only focus on the ring hit timeout if &job not NULL */ - if (job && job->ring->idx != i) - continue; - - kthread_unpark(adev->rings[i]->sched.thread); - } + kthread_unpark(ring->sched.thread); } if (amdgpu_device_has_dc_support(adev)) { @@ -2755,7 +2790,7 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev, return r; } -void amdgpu_device_get_pcie_info(struct amdgpu_device *adev) +static void amdgpu_device_get_pcie_info(struct amdgpu_device *adev) { u32 mask; int ret; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_dpm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_dpm.h index bd745a4fae0c..643d008410c6 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_dpm.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_dpm.h @@ -341,17 +341,9 @@ enum amdgpu_pcie_gen { ((adev)->powerplay.pp_funcs->reset_power_profile_state(\ (adev)->powerplay.pp_handle, request)) -#define amdgpu_dpm_get_power_profile_state(adev, query) \ - ((adev)->powerplay.pp_funcs->get_power_profile_state(\ - (adev)->powerplay.pp_handle, query)) - -#define amdgpu_dpm_set_power_profile_state(adev, request) \ - ((adev)->powerplay.pp_funcs->set_power_profile_state(\ - (adev)->powerplay.pp_handle, request)) - -#define amdgpu_dpm_switch_power_profile(adev, type) \ +#define amdgpu_dpm_switch_power_profile(adev, type, en) \ ((adev)->powerplay.pp_funcs->switch_power_profile(\ - (adev)->powerplay.pp_handle, type)) + (adev)->powerplay.pp_handle, type, en)) #define amdgpu_dpm_set_clockgating_by_smu(adev, msg_id) \ ((adev)->powerplay.pp_funcs->set_clockgating_by_smu(\ diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c index 88ec9280a67a..e6709362994a 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c @@ -121,7 +121,7 @@ uint amdgpu_pg_mask = 0xffffffff; uint amdgpu_sdma_phase_quantum = 32; char *amdgpu_disable_cu = NULL; char *amdgpu_virtual_display = NULL; -uint amdgpu_pp_feature_mask = 0x3fff; +uint amdgpu_pp_feature_mask = 0xffffbfff; int amdgpu_ngg = 0; int amdgpu_prim_buf_per_se = 0; int amdgpu_pos_buf_per_se = 0; @@ -284,10 +284,10 @@ module_param_named(lbpw, amdgpu_lbpw, int, 0444); MODULE_PARM_DESC(compute_multipipe, "Force compute queues to be spread across pipes (1 = enable, 0 = disable, -1 = auto)"); module_param_named(compute_multipipe, amdgpu_compute_multipipe, int, 0444); -MODULE_PARM_DESC(gpu_recovery, "Enable GPU recovery mechanism, (1 = enable, 0 = disable, -1 = auto"); +MODULE_PARM_DESC(gpu_recovery, "Enable GPU recovery mechanism, (1 = enable, 0 = disable, -1 = auto)"); module_param_named(gpu_recovery, amdgpu_gpu_recovery, int, 0444); -MODULE_PARM_DESC(emu_mode, "Emulation mode, (1 = enable, 0 = disable"); +MODULE_PARM_DESC(emu_mode, "Emulation mode, (1 = enable, 0 = disable)"); module_param_named(emu_mode, amdgpu_emu_mode, int, 0444); #ifdef CONFIG_DRM_AMDGPU_SI diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c index 008eaee57114..cf0f186c6092 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c @@ -68,17 +68,15 @@ */ static int amdgpu_gart_dummy_page_init(struct amdgpu_device *adev) { - if (adev->dummy_page.page) + struct page *dummy_page = adev->mman.bdev.glob->dummy_read_page; + + if (adev->dummy_page_addr) return 0; - adev->dummy_page.page = alloc_page(GFP_DMA32 | GFP_KERNEL | __GFP_ZERO); - if (adev->dummy_page.page == NULL) - return -ENOMEM; - adev->dummy_page.addr = pci_map_page(adev->pdev, adev->dummy_page.page, - 0, PAGE_SIZE, PCI_DMA_BIDIRECTIONAL); - if (pci_dma_mapping_error(adev->pdev, adev->dummy_page.addr)) { + adev->dummy_page_addr = pci_map_page(adev->pdev, dummy_page, 0, + PAGE_SIZE, PCI_DMA_BIDIRECTIONAL); + if (pci_dma_mapping_error(adev->pdev, adev->dummy_page_addr)) { dev_err(&adev->pdev->dev, "Failed to DMA MAP the dummy page\n"); - __free_page(adev->dummy_page.page); - adev->dummy_page.page = NULL; + adev->dummy_page_addr = 0; return -ENOMEM; } return 0; @@ -93,12 +91,11 @@ static int amdgpu_gart_dummy_page_init(struct amdgpu_device *adev) */ static void amdgpu_gart_dummy_page_fini(struct amdgpu_device *adev) { - if (adev->dummy_page.page == NULL) + if (!adev->dummy_page_addr) return; - pci_unmap_page(adev->pdev, adev->dummy_page.addr, - PAGE_SIZE, PCI_DMA_BIDIRECTIONAL); - __free_page(adev->dummy_page.page); - adev->dummy_page.page = NULL; + pci_unmap_page(adev->pdev, adev->dummy_page_addr, + PAGE_SIZE, PCI_DMA_BIDIRECTIONAL); + adev->dummy_page_addr = 0; } /** @@ -116,11 +113,12 @@ int amdgpu_gart_table_vram_alloc(struct amdgpu_device *adev) int r; if (adev->gart.robj == NULL) { - r = amdgpu_bo_create(adev, adev->gart.table_size, - PAGE_SIZE, true, AMDGPU_GEM_DOMAIN_VRAM, + r = amdgpu_bo_create(adev, adev->gart.table_size, PAGE_SIZE, + AMDGPU_GEM_DOMAIN_VRAM, AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED | AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS, - NULL, NULL, &adev->gart.robj); + ttm_bo_type_kernel, NULL, + &adev->gart.robj); if (r) { return r; } @@ -236,7 +234,7 @@ int amdgpu_gart_unbind(struct amdgpu_device *adev, uint64_t offset, #ifdef CONFIG_DRM_AMDGPU_GART_DEBUGFS adev->gart.pages[p] = NULL; #endif - page_base = adev->dummy_page.addr; + page_base = adev->dummy_page_addr; if (!adev->gart.ptr) continue; @@ -318,7 +316,7 @@ int amdgpu_gart_bind(struct amdgpu_device *adev, uint64_t offset, t = offset / AMDGPU_GPU_PAGE_SIZE; p = t / (PAGE_SIZE / AMDGPU_GPU_PAGE_SIZE); for (i = 0; i < pages; i++, p++) - adev->gart.pages[p] = pagelist[i]; + adev->gart.pages[p] = pagelist ? pagelist[i] : NULL; #endif if (!adev->gart.ptr) @@ -347,7 +345,7 @@ int amdgpu_gart_init(struct amdgpu_device *adev) { int r; - if (adev->dummy_page.page) + if (adev->dummy_page_addr) return 0; /* We need PAGE_SIZE >= AMDGPU_GPU_PAGE_SIZE */ diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c index 55a840ae6d68..46b9ea4e6103 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c @@ -36,8 +36,6 @@ void amdgpu_gem_object_free(struct drm_gem_object *gobj) struct amdgpu_bo *robj = gem_to_amdgpu_bo(gobj); if (robj) { - if (robj->gem_base.import_attach) - drm_prime_gem_destroy(&robj->gem_base, robj->tbo.sg); amdgpu_mn_unregister(robj); amdgpu_bo_unref(&robj); } @@ -45,7 +43,7 @@ void amdgpu_gem_object_free(struct drm_gem_object *gobj) int amdgpu_gem_object_create(struct amdgpu_device *adev, unsigned long size, int alignment, u32 initial_domain, - u64 flags, bool kernel, + u64 flags, enum ttm_bo_type type, struct reservation_object *resv, struct drm_gem_object **obj) { @@ -59,8 +57,8 @@ int amdgpu_gem_object_create(struct amdgpu_device *adev, unsigned long size, } retry: - r = amdgpu_bo_create(adev, size, alignment, kernel, initial_domain, - flags, NULL, resv, &bo); + r = amdgpu_bo_create(adev, size, alignment, initial_domain, + flags, type, resv, &bo); if (r) { if (r != -ERESTARTSYS) { if (flags & AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED) { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c index 8ea342dc6376..311589e02d17 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c @@ -181,7 +181,7 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs, } } - if (ring->funcs->init_cond_exec) + if (job && ring->funcs->init_cond_exec) patch_offset = amdgpu_ring_init_cond_exec(ring); #ifdef CONFIG_X86_64 @@ -279,11 +279,6 @@ int amdgpu_ib_pool_init(struct amdgpu_device *adev) return r; } - r = amdgpu_sa_bo_manager_start(adev, &adev->ring_tmp_bo); - if (r) { - return r; - } - adev->ib_pool_ready = true; if (amdgpu_debugfs_sa_init(adev)) { dev_err(adev->dev, "failed to register debugfs file for SA\n"); @@ -302,7 +297,6 @@ int amdgpu_ib_pool_init(struct amdgpu_device *adev) void amdgpu_ib_pool_fini(struct amdgpu_device *adev) { if (adev->ib_pool_ready) { - amdgpu_sa_bo_manager_suspend(adev, &adev->ring_tmp_bo); amdgpu_sa_bo_manager_fini(adev, &adev->ring_tmp_bo); adev->ib_pool_ready = false; } @@ -322,14 +316,45 @@ int amdgpu_ib_ring_tests(struct amdgpu_device *adev) { unsigned i; int r, ret = 0; + long tmo_gfx, tmo_mm; + + tmo_mm = tmo_gfx = AMDGPU_IB_TEST_TIMEOUT; + if (amdgpu_sriov_vf(adev)) { + /* for MM engines in hypervisor side they are not scheduled together + * with CP and SDMA engines, so even in exclusive mode MM engine could + * still running on other VF thus the IB TEST TIMEOUT for MM engines + * under SR-IOV should be set to a long time. 8 sec should be enough + * for the MM comes back to this VF. + */ + tmo_mm = 8 * AMDGPU_IB_TEST_TIMEOUT; + } + + if (amdgpu_sriov_runtime(adev)) { + /* for CP & SDMA engines since they are scheduled together so + * need to make the timeout width enough to cover the time + * cost waiting for it coming back under RUNTIME only + */ + tmo_gfx = 8 * AMDGPU_IB_TEST_TIMEOUT; + } for (i = 0; i < AMDGPU_MAX_RINGS; ++i) { struct amdgpu_ring *ring = adev->rings[i]; + long tmo; if (!ring || !ring->ready) continue; - r = amdgpu_ring_test_ib(ring, AMDGPU_IB_TEST_TIMEOUT); + /* MM engine need more time */ + if (ring->funcs->type == AMDGPU_RING_TYPE_UVD || + ring->funcs->type == AMDGPU_RING_TYPE_VCE || + ring->funcs->type == AMDGPU_RING_TYPE_UVD_ENC || + ring->funcs->type == AMDGPU_RING_TYPE_VCN_DEC || + ring->funcs->type == AMDGPU_RING_TYPE_VCN_ENC) + tmo = tmo_mm; + else + tmo = tmo_gfx; + + r = amdgpu_ring_test_ib(ring, tmo); if (r) { ring->ready = false; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.h index b8a7dba69595..0e01f115bbe5 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.h @@ -25,51 +25,12 @@ #define __AMDGPU_IH_H__ #include <linux/chash.h> +#include "soc15_ih_clientid.h" struct amdgpu_device; - /* - * vega10+ IH clients - */ -enum amdgpu_ih_clientid -{ - AMDGPU_IH_CLIENTID_IH = 0x00, - AMDGPU_IH_CLIENTID_ACP = 0x01, - AMDGPU_IH_CLIENTID_ATHUB = 0x02, - AMDGPU_IH_CLIENTID_BIF = 0x03, - AMDGPU_IH_CLIENTID_DCE = 0x04, - AMDGPU_IH_CLIENTID_ISP = 0x05, - AMDGPU_IH_CLIENTID_PCIE0 = 0x06, - AMDGPU_IH_CLIENTID_RLC = 0x07, - AMDGPU_IH_CLIENTID_SDMA0 = 0x08, - AMDGPU_IH_CLIENTID_SDMA1 = 0x09, - AMDGPU_IH_CLIENTID_SE0SH = 0x0a, - AMDGPU_IH_CLIENTID_SE1SH = 0x0b, - AMDGPU_IH_CLIENTID_SE2SH = 0x0c, - AMDGPU_IH_CLIENTID_SE3SH = 0x0d, - AMDGPU_IH_CLIENTID_SYSHUB = 0x0e, - AMDGPU_IH_CLIENTID_THM = 0x0f, - AMDGPU_IH_CLIENTID_UVD = 0x10, - AMDGPU_IH_CLIENTID_VCE0 = 0x11, - AMDGPU_IH_CLIENTID_VMC = 0x12, - AMDGPU_IH_CLIENTID_XDMA = 0x13, - AMDGPU_IH_CLIENTID_GRBM_CP = 0x14, - AMDGPU_IH_CLIENTID_ATS = 0x15, - AMDGPU_IH_CLIENTID_ROM_SMUIO = 0x16, - AMDGPU_IH_CLIENTID_DF = 0x17, - AMDGPU_IH_CLIENTID_VCE1 = 0x18, - AMDGPU_IH_CLIENTID_PWR = 0x19, - AMDGPU_IH_CLIENTID_UTCL2 = 0x1b, - AMDGPU_IH_CLIENTID_EA = 0x1c, - AMDGPU_IH_CLIENTID_UTCL2LOG = 0x1d, - AMDGPU_IH_CLIENTID_MP0 = 0x1e, - AMDGPU_IH_CLIENTID_MP1 = 0x1f, - - AMDGPU_IH_CLIENTID_MAX, - - AMDGPU_IH_CLIENTID_VCN = AMDGPU_IH_CLIENTID_UVD -}; #define AMDGPU_IH_CLIENTID_LEGACY 0 +#define AMDGPU_IH_CLIENTID_MAX SOC15_IH_CLIENTID_MAX #define AMDGPU_PAGEFAULT_HASH_BITS 8 struct amdgpu_retryfault_hashtable { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c index f6f2a662bb8f..11dfe57bd8bb 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c @@ -208,7 +208,8 @@ int amdgpu_irq_init(struct amdgpu_device *adev) r = drm_irq_install(adev->ddev, adev->ddev->pdev->irq); if (r) { adev->irq.installed = false; - flush_work(&adev->hotplug_work); + if (!amdgpu_device_has_dc_support(adev)) + flush_work(&adev->hotplug_work); cancel_work_sync(&adev->reset_work); return r; } @@ -234,7 +235,8 @@ void amdgpu_irq_fini(struct amdgpu_device *adev) adev->irq.installed = false; if (adev->irq.msi_enabled) pci_disable_msi(adev->pdev); - flush_work(&adev->hotplug_work); + if (!amdgpu_device_has_dc_support(adev)) + flush_work(&adev->hotplug_work); cancel_work_sync(&adev->reset_work); } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h index d9533bbc467c..d6416ee52e32 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h @@ -350,6 +350,7 @@ struct amdgpu_mode_info { u16 firmware_flags; /* pointer to backlight encoder */ struct amdgpu_encoder *bl_encoder; + u8 bl_level; /* saved backlight level */ struct amdgpu_audio audio; /* audio stuff */ int num_crtc; /* number of crtcs */ int num_hpd; /* number of hpd pins */ @@ -550,14 +551,6 @@ struct amdgpu_connector { /* we need to mind the EDID between detect and get modes due to analog/digital/tvencoder */ struct edid *edid; - /* number of modes generated from EDID at 'dc_sink' */ - int num_modes; - /* The 'old' sink - before an HPD. - * The 'current' sink is in dc_link->sink. */ - struct dc_sink *dc_sink; - struct dc_link *dc_link; - struct dc_sink *dc_em_sink; - const struct dc_stream *stream; void *con_priv; bool dac_load_detect; bool detected_by_load; /* if the connection status was determined by load */ @@ -568,27 +561,6 @@ struct amdgpu_connector { enum amdgpu_connector_audio audio; enum amdgpu_connector_dither dither; unsigned pixelclock_for_modeset; - - struct drm_dp_mst_topology_mgr mst_mgr; - struct amdgpu_dm_dp_aux dm_dp_aux; - struct drm_dp_mst_port *port; - struct amdgpu_connector *mst_port; - struct amdgpu_encoder *mst_encoder; - struct semaphore mst_sem; - - /* TODO see if we can merge with ddc_bus or make a dm_connector */ - struct amdgpu_i2c_adapter *i2c; - - /* Monitor range limits */ - int min_vfreq ; - int max_vfreq ; - int pixel_clock_mhz; - - /*freesync caps*/ - struct mod_freesync_caps caps; - - struct mutex hpd_lock; - }; /* TODO: start to use this struct and remove same field from base one */ diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c index 969de54b62da..6d08cde8443c 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c @@ -36,6 +36,7 @@ #include <drm/drm_cache.h> #include "amdgpu.h" #include "amdgpu_trace.h" +#include "amdgpu_amdkfd.h" static bool amdgpu_need_backup(struct amdgpu_device *adev) { @@ -54,8 +55,13 @@ static void amdgpu_ttm_bo_destroy(struct ttm_buffer_object *tbo) struct amdgpu_device *adev = amdgpu_ttm_adev(tbo->bdev); struct amdgpu_bo *bo = ttm_to_amdgpu_bo(tbo); + if (bo->kfd_bo) + amdgpu_amdkfd_unreserve_system_memory_limit(bo); + amdgpu_bo_kunmap(bo); + if (bo->gem_base.import_attach) + drm_prime_gem_destroy(&bo->gem_base, bo->tbo.sg); drm_gem_object_release(&bo->gem_base); amdgpu_bo_unref(&bo->parent); if (!list_empty(&bo->shadow_list)) { @@ -169,13 +175,15 @@ void amdgpu_ttm_placement_from_domain(struct amdgpu_bo *abo, u32 domain) * @size: size for the new BO * @align: alignment for the new BO * @domain: where to place it - * @bo_ptr: resulting BO + * @bo_ptr: used to initialize BOs in structures * @gpu_addr: GPU addr of the pinned BO * @cpu_addr: optional CPU address mapping * * Allocates and pins a BO for kernel internal use, and returns it still * reserved. * + * Note: For bo_ptr new BO is only created if bo_ptr points to NULL. + * * Returns 0 on success, negative error code otherwise. */ int amdgpu_bo_create_reserved(struct amdgpu_device *adev, @@ -187,10 +195,10 @@ int amdgpu_bo_create_reserved(struct amdgpu_device *adev, int r; if (!*bo_ptr) { - r = amdgpu_bo_create(adev, size, align, true, domain, + r = amdgpu_bo_create(adev, size, align, domain, AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED | AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS, - NULL, NULL, bo_ptr); + ttm_bo_type_kernel, NULL, bo_ptr); if (r) { dev_err(adev->dev, "(%d) failed to allocate kernel bo\n", r); @@ -238,12 +246,14 @@ error_free: * @size: size for the new BO * @align: alignment for the new BO * @domain: where to place it - * @bo_ptr: resulting BO + * @bo_ptr: used to initialize BOs in structures * @gpu_addr: GPU addr of the pinned BO * @cpu_addr: optional CPU address mapping * * Allocates and pins a BO for kernel internal use. * + * Note: For bo_ptr new BO is only created if bo_ptr points to NULL. + * * Returns 0 on success, negative error code otherwise. */ int amdgpu_bo_create_kernel(struct amdgpu_device *adev, @@ -331,21 +341,19 @@ fail: return false; } -static int amdgpu_bo_do_create(struct amdgpu_device *adev, - unsigned long size, int byte_align, - bool kernel, u32 domain, u64 flags, - struct sg_table *sg, +static int amdgpu_bo_do_create(struct amdgpu_device *adev, unsigned long size, + int byte_align, u32 domain, + u64 flags, enum ttm_bo_type type, struct reservation_object *resv, struct amdgpu_bo **bo_ptr) { struct ttm_operation_ctx ctx = { - .interruptible = !kernel, + .interruptible = (type != ttm_bo_type_kernel), .no_wait_gpu = false, - .allow_reserved_eviction = true, - .resv = resv + .resv = resv, + .flags = TTM_OPT_FLAG_ALLOW_RES_EVICT }; struct amdgpu_bo *bo; - enum ttm_bo_type type; unsigned long page_align; size_t acc_size; int r; @@ -356,13 +364,6 @@ static int amdgpu_bo_do_create(struct amdgpu_device *adev, if (!amdgpu_bo_validate_size(adev, size, domain)) return -ENOMEM; - if (kernel) { - type = ttm_bo_type_kernel; - } else if (sg) { - type = ttm_bo_type_sg; - } else { - type = ttm_bo_type_device; - } *bo_ptr = NULL; acc_size = ttm_bo_dma_acc_size(&adev->mman.bdev, size, @@ -381,7 +382,8 @@ static int amdgpu_bo_do_create(struct amdgpu_device *adev, AMDGPU_GEM_DOMAIN_GWS | AMDGPU_GEM_DOMAIN_OA); bo->allowed_domains = bo->preferred_domains; - if (!kernel && bo->allowed_domains == AMDGPU_GEM_DOMAIN_VRAM) + if (type != ttm_bo_type_kernel && + bo->allowed_domains == AMDGPU_GEM_DOMAIN_VRAM) bo->allowed_domains |= AMDGPU_GEM_DOMAIN_GTT; bo->flags = flags; @@ -418,8 +420,8 @@ static int amdgpu_bo_do_create(struct amdgpu_device *adev, amdgpu_ttm_placement_from_domain(bo, domain); r = ttm_bo_init_reserved(&adev->mman.bdev, &bo->tbo, size, type, - &bo->placement, page_align, &ctx, NULL, - acc_size, sg, resv, &amdgpu_ttm_bo_destroy); + &bo->placement, page_align, &ctx, acc_size, + NULL, resv, &amdgpu_ttm_bo_destroy); if (unlikely(r != 0)) return r; @@ -431,7 +433,7 @@ static int amdgpu_bo_do_create(struct amdgpu_device *adev, else amdgpu_cs_report_moved_bytes(adev, ctx.bytes_moved, 0); - if (kernel) + if (type == ttm_bo_type_kernel) bo->tbo.priority = 1; if (flags & AMDGPU_GEM_CREATE_VRAM_CLEARED && @@ -475,12 +477,11 @@ static int amdgpu_bo_create_shadow(struct amdgpu_device *adev, if (bo->shadow) return 0; - r = amdgpu_bo_do_create(adev, size, byte_align, true, - AMDGPU_GEM_DOMAIN_GTT, + r = amdgpu_bo_do_create(adev, size, byte_align, AMDGPU_GEM_DOMAIN_GTT, AMDGPU_GEM_CREATE_CPU_GTT_USWC | AMDGPU_GEM_CREATE_SHADOW, - NULL, bo->tbo.resv, - &bo->shadow); + ttm_bo_type_kernel, + bo->tbo.resv, &bo->shadow); if (!r) { bo->shadow->parent = amdgpu_bo_ref(bo); mutex_lock(&adev->shadow_list_lock); @@ -491,18 +492,17 @@ static int amdgpu_bo_create_shadow(struct amdgpu_device *adev, return r; } -int amdgpu_bo_create(struct amdgpu_device *adev, - unsigned long size, int byte_align, - bool kernel, u32 domain, u64 flags, - struct sg_table *sg, +int amdgpu_bo_create(struct amdgpu_device *adev, unsigned long size, + int byte_align, u32 domain, + u64 flags, enum ttm_bo_type type, struct reservation_object *resv, struct amdgpu_bo **bo_ptr) { uint64_t parent_flags = flags & ~AMDGPU_GEM_CREATE_SHADOW; int r; - r = amdgpu_bo_do_create(adev, size, byte_align, kernel, domain, - parent_flags, sg, resv, bo_ptr); + r = amdgpu_bo_do_create(adev, size, byte_align, domain, + parent_flags, type, resv, bo_ptr); if (r) return r; @@ -817,7 +817,8 @@ static const char *amdgpu_vram_names[] = { "GDDR4", "GDDR5", "HBM", - "DDR3" + "DDR3", + "DDR4", }; int amdgpu_bo_init(struct amdgpu_device *adev) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h index c2b02f5c88d2..546f77cb7882 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h @@ -92,6 +92,8 @@ struct amdgpu_bo { struct list_head mn_list; struct list_head shadow_list; }; + + struct kgd_mem *kfd_bo; }; static inline struct amdgpu_bo *ttm_to_amdgpu_bo(struct ttm_buffer_object *tbo) @@ -201,12 +203,11 @@ static inline bool amdgpu_bo_explicit_sync(struct amdgpu_bo *bo) return bo->flags & AMDGPU_GEM_CREATE_EXPLICIT_SYNC; } -int amdgpu_bo_create(struct amdgpu_device *adev, - unsigned long size, int byte_align, - bool kernel, u32 domain, u64 flags, - struct sg_table *sg, - struct reservation_object *resv, - struct amdgpu_bo **bo_ptr); +int amdgpu_bo_create(struct amdgpu_device *adev, unsigned long size, + int byte_align, u32 domain, + u64 flags, enum ttm_bo_type type, + struct reservation_object *resv, + struct amdgpu_bo **bo_ptr); int amdgpu_bo_create_reserved(struct amdgpu_device *adev, unsigned long size, int align, u32 domain, struct amdgpu_bo **bo_ptr, @@ -281,8 +282,6 @@ void amdgpu_sa_bo_manager_fini(struct amdgpu_device *adev, struct amdgpu_sa_manager *sa_manager); int amdgpu_sa_bo_manager_start(struct amdgpu_device *adev, struct amdgpu_sa_manager *sa_manager); -int amdgpu_sa_bo_manager_suspend(struct amdgpu_device *adev, - struct amdgpu_sa_manager *sa_manager); int amdgpu_sa_bo_new(struct amdgpu_sa_manager *sa_manager, struct amdgpu_sa_bo **sa_bo, unsigned size, unsigned align); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c index 9e73cbcfce44..361975cf45a9 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c @@ -734,161 +734,6 @@ fail: return -EINVAL; } -static ssize_t amdgpu_get_pp_power_profile(struct device *dev, - char *buf, struct amd_pp_profile *query) -{ - struct drm_device *ddev = dev_get_drvdata(dev); - struct amdgpu_device *adev = ddev->dev_private; - int ret = 0xff; - - if (adev->powerplay.pp_funcs->get_power_profile_state) - ret = amdgpu_dpm_get_power_profile_state( - adev, query); - - if (ret) - return ret; - - return snprintf(buf, PAGE_SIZE, - "%d %d %d %d %d\n", - query->min_sclk / 100, - query->min_mclk / 100, - query->activity_threshold, - query->up_hyst, - query->down_hyst); -} - -static ssize_t amdgpu_get_pp_gfx_power_profile(struct device *dev, - struct device_attribute *attr, - char *buf) -{ - struct amd_pp_profile query = {0}; - - query.type = AMD_PP_GFX_PROFILE; - - return amdgpu_get_pp_power_profile(dev, buf, &query); -} - -static ssize_t amdgpu_get_pp_compute_power_profile(struct device *dev, - struct device_attribute *attr, - char *buf) -{ - struct amd_pp_profile query = {0}; - - query.type = AMD_PP_COMPUTE_PROFILE; - - return amdgpu_get_pp_power_profile(dev, buf, &query); -} - -static ssize_t amdgpu_set_pp_power_profile(struct device *dev, - const char *buf, - size_t count, - struct amd_pp_profile *request) -{ - struct drm_device *ddev = dev_get_drvdata(dev); - struct amdgpu_device *adev = ddev->dev_private; - uint32_t loop = 0; - char *sub_str, buf_cpy[128], *tmp_str; - const char delimiter[3] = {' ', '\n', '\0'}; - long int value; - int ret = 0xff; - - if (strncmp("reset", buf, strlen("reset")) == 0) { - if (adev->powerplay.pp_funcs->reset_power_profile_state) - ret = amdgpu_dpm_reset_power_profile_state( - adev, request); - if (ret) { - count = -EINVAL; - goto fail; - } - return count; - } - - if (strncmp("set", buf, strlen("set")) == 0) { - if (adev->powerplay.pp_funcs->set_power_profile_state) - ret = amdgpu_dpm_set_power_profile_state( - adev, request); - - if (ret) { - count = -EINVAL; - goto fail; - } - return count; - } - - if (count + 1 >= 128) { - count = -EINVAL; - goto fail; - } - - memcpy(buf_cpy, buf, count + 1); - tmp_str = buf_cpy; - - while (tmp_str[0]) { - sub_str = strsep(&tmp_str, delimiter); - ret = kstrtol(sub_str, 0, &value); - if (ret) { - count = -EINVAL; - goto fail; - } - - switch (loop) { - case 0: - /* input unit MHz convert to dpm table unit 10KHz*/ - request->min_sclk = (uint32_t)value * 100; - break; - case 1: - /* input unit MHz convert to dpm table unit 10KHz*/ - request->min_mclk = (uint32_t)value * 100; - break; - case 2: - request->activity_threshold = (uint16_t)value; - break; - case 3: - request->up_hyst = (uint8_t)value; - break; - case 4: - request->down_hyst = (uint8_t)value; - break; - default: - break; - } - - loop++; - } - if (adev->powerplay.pp_funcs->set_power_profile_state) - ret = amdgpu_dpm_set_power_profile_state(adev, request); - - if (ret) - count = -EINVAL; - -fail: - return count; -} - -static ssize_t amdgpu_set_pp_gfx_power_profile(struct device *dev, - struct device_attribute *attr, - const char *buf, - size_t count) -{ - struct amd_pp_profile request = {0}; - - request.type = AMD_PP_GFX_PROFILE; - - return amdgpu_set_pp_power_profile(dev, buf, count, &request); -} - -static ssize_t amdgpu_set_pp_compute_power_profile(struct device *dev, - struct device_attribute *attr, - const char *buf, - size_t count) -{ - struct amd_pp_profile request = {0}; - - request.type = AMD_PP_COMPUTE_PROFILE; - - return amdgpu_set_pp_power_profile(dev, buf, count, &request); -} - static DEVICE_ATTR(power_dpm_state, S_IRUGO | S_IWUSR, amdgpu_get_dpm_state, amdgpu_set_dpm_state); static DEVICE_ATTR(power_dpm_force_performance_level, S_IRUGO | S_IWUSR, amdgpu_get_dpm_forced_performance_level, @@ -916,12 +761,6 @@ static DEVICE_ATTR(pp_sclk_od, S_IRUGO | S_IWUSR, static DEVICE_ATTR(pp_mclk_od, S_IRUGO | S_IWUSR, amdgpu_get_pp_mclk_od, amdgpu_set_pp_mclk_od); -static DEVICE_ATTR(pp_gfx_power_profile, S_IRUGO | S_IWUSR, - amdgpu_get_pp_gfx_power_profile, - amdgpu_set_pp_gfx_power_profile); -static DEVICE_ATTR(pp_compute_power_profile, S_IRUGO | S_IWUSR, - amdgpu_get_pp_compute_power_profile, - amdgpu_set_pp_compute_power_profile); static DEVICE_ATTR(pp_power_profile_mode, S_IRUGO | S_IWUSR, amdgpu_get_pp_power_profile_mode, amdgpu_set_pp_power_profile_mode); @@ -1315,7 +1154,7 @@ static umode_t hwmon_attributes_visible(struct kobject *kobj, umode_t effective_mode = attr->mode; /* handle non-powerplay limitations */ - if (!adev->powerplay.cgs_device) { + if (!adev->powerplay.pp_handle) { /* Skip fan attributes if fan is not present */ if (adev->pm.no_fan && (attr == &sensor_dev_attr_pwm1.dev_attr.attr || @@ -1767,21 +1606,6 @@ int amdgpu_pm_sysfs_init(struct amdgpu_device *adev) return ret; } ret = device_create_file(adev->dev, - &dev_attr_pp_gfx_power_profile); - if (ret) { - DRM_ERROR("failed to create device file " - "pp_gfx_power_profile\n"); - return ret; - } - ret = device_create_file(adev->dev, - &dev_attr_pp_compute_power_profile); - if (ret) { - DRM_ERROR("failed to create device file " - "pp_compute_power_profile\n"); - return ret; - } - - ret = device_create_file(adev->dev, &dev_attr_pp_power_profile_mode); if (ret) { DRM_ERROR("failed to create device file " @@ -1827,10 +1651,6 @@ void amdgpu_pm_sysfs_fini(struct amdgpu_device *adev) device_remove_file(adev->dev, &dev_attr_pp_sclk_od); device_remove_file(adev->dev, &dev_attr_pp_mclk_od); device_remove_file(adev->dev, - &dev_attr_pp_gfx_power_profile); - device_remove_file(adev->dev, - &dev_attr_pp_compute_power_profile); - device_remove_file(adev->dev, &dev_attr_pp_power_profile_mode); device_remove_file(adev->dev, &dev_attr_pp_od_clk_voltage); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_powerplay.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_powerplay.c deleted file mode 100644 index 5f5aa5fddc16..000000000000 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_powerplay.c +++ /dev/null @@ -1,290 +0,0 @@ -/* - * Copyright 2015 Advanced Micro Devices, Inc. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR - * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, - * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR - * OTHER DEALINGS IN THE SOFTWARE. - * - * Authors: AMD - * - */ -#include "atom.h" -#include "amdgpu.h" -#include "amd_shared.h" -#include <linux/module.h> -#include <linux/moduleparam.h> -#include "amdgpu_pm.h" -#include <drm/amdgpu_drm.h> -#include "amdgpu_powerplay.h" -#include "si_dpm.h" -#include "cik_dpm.h" -#include "vi_dpm.h" - -static int amdgpu_pp_early_init(void *handle) -{ - struct amdgpu_device *adev = (struct amdgpu_device *)handle; - struct amd_powerplay *amd_pp; - int ret = 0; - - amd_pp = &(adev->powerplay); - amd_pp->pp_handle = (void *)adev; - - switch (adev->asic_type) { - case CHIP_POLARIS11: - case CHIP_POLARIS10: - case CHIP_POLARIS12: - case CHIP_TONGA: - case CHIP_FIJI: - case CHIP_TOPAZ: - case CHIP_CARRIZO: - case CHIP_STONEY: - case CHIP_VEGA10: - case CHIP_RAVEN: - amd_pp->cgs_device = amdgpu_cgs_create_device(adev); - amd_pp->ip_funcs = &pp_ip_funcs; - amd_pp->pp_funcs = &pp_dpm_funcs; - break; - /* These chips don't have powerplay implemenations */ -#ifdef CONFIG_DRM_AMDGPU_SI - case CHIP_TAHITI: - case CHIP_PITCAIRN: - case CHIP_VERDE: - case CHIP_OLAND: - case CHIP_HAINAN: - amd_pp->ip_funcs = &si_dpm_ip_funcs; - amd_pp->pp_funcs = &si_dpm_funcs; - break; -#endif -#ifdef CONFIG_DRM_AMDGPU_CIK - case CHIP_BONAIRE: - case CHIP_HAWAII: - if (amdgpu_dpm == -1) { - amd_pp->ip_funcs = &ci_dpm_ip_funcs; - amd_pp->pp_funcs = &ci_dpm_funcs; - } else { - amd_pp->cgs_device = amdgpu_cgs_create_device(adev); - amd_pp->ip_funcs = &pp_ip_funcs; - amd_pp->pp_funcs = &pp_dpm_funcs; - } - break; - case CHIP_KABINI: - case CHIP_MULLINS: - case CHIP_KAVERI: - amd_pp->ip_funcs = &kv_dpm_ip_funcs; - amd_pp->pp_funcs = &kv_dpm_funcs; - break; -#endif - default: - ret = -EINVAL; - break; - } - - if (adev->powerplay.ip_funcs->early_init) - ret = adev->powerplay.ip_funcs->early_init( - amd_pp->cgs_device ? amd_pp->cgs_device : - amd_pp->pp_handle); - - return ret; -} - - -static int amdgpu_pp_late_init(void *handle) -{ - int ret = 0; - struct amdgpu_device *adev = (struct amdgpu_device *)handle; - - if (adev->powerplay.ip_funcs->late_init) - ret = adev->powerplay.ip_funcs->late_init( - adev->powerplay.pp_handle); - - return ret; -} - -static int amdgpu_pp_sw_init(void *handle) -{ - int ret = 0; - struct amdgpu_device *adev = (struct amdgpu_device *)handle; - - if (adev->powerplay.ip_funcs->sw_init) - ret = adev->powerplay.ip_funcs->sw_init( - adev->powerplay.pp_handle); - - return ret; -} - -static int amdgpu_pp_sw_fini(void *handle) -{ - int ret = 0; - struct amdgpu_device *adev = (struct amdgpu_device *)handle; - - if (adev->powerplay.ip_funcs->sw_fini) - ret = adev->powerplay.ip_funcs->sw_fini( - adev->powerplay.pp_handle); - if (ret) - return ret; - - return ret; -} - -static int amdgpu_pp_hw_init(void *handle) -{ - int ret = 0; - struct amdgpu_device *adev = (struct amdgpu_device *)handle; - - if (adev->firmware.load_type == AMDGPU_FW_LOAD_SMU) - amdgpu_ucode_init_bo(adev); - - if (adev->powerplay.ip_funcs->hw_init) - ret = adev->powerplay.ip_funcs->hw_init( - adev->powerplay.pp_handle); - - return ret; -} - -static int amdgpu_pp_hw_fini(void *handle) -{ - int ret = 0; - struct amdgpu_device *adev = (struct amdgpu_device *)handle; - - if (adev->powerplay.ip_funcs->hw_fini) - ret = adev->powerplay.ip_funcs->hw_fini( - adev->powerplay.pp_handle); - - if (adev->firmware.load_type == AMDGPU_FW_LOAD_SMU) - amdgpu_ucode_fini_bo(adev); - - return ret; -} - -static void amdgpu_pp_late_fini(void *handle) -{ - struct amdgpu_device *adev = (struct amdgpu_device *)handle; - - if (adev->powerplay.ip_funcs->late_fini) - adev->powerplay.ip_funcs->late_fini( - adev->powerplay.pp_handle); - - if (adev->powerplay.cgs_device) - amdgpu_cgs_destroy_device(adev->powerplay.cgs_device); -} - -static int amdgpu_pp_suspend(void *handle) -{ - int ret = 0; - struct amdgpu_device *adev = (struct amdgpu_device *)handle; - - if (adev->powerplay.ip_funcs->suspend) - ret = adev->powerplay.ip_funcs->suspend( - adev->powerplay.pp_handle); - return ret; -} - -static int amdgpu_pp_resume(void *handle) -{ - int ret = 0; - struct amdgpu_device *adev = (struct amdgpu_device *)handle; - - if (adev->powerplay.ip_funcs->resume) - ret = adev->powerplay.ip_funcs->resume( - adev->powerplay.pp_handle); - return ret; -} - -static int amdgpu_pp_set_clockgating_state(void *handle, - enum amd_clockgating_state state) -{ - int ret = 0; - struct amdgpu_device *adev = (struct amdgpu_device *)handle; - - if (adev->powerplay.ip_funcs->set_clockgating_state) - ret = adev->powerplay.ip_funcs->set_clockgating_state( - adev->powerplay.pp_handle, state); - return ret; -} - -static int amdgpu_pp_set_powergating_state(void *handle, - enum amd_powergating_state state) -{ - int ret = 0; - struct amdgpu_device *adev = (struct amdgpu_device *)handle; - - if (adev->powerplay.ip_funcs->set_powergating_state) - ret = adev->powerplay.ip_funcs->set_powergating_state( - adev->powerplay.pp_handle, state); - return ret; -} - - -static bool amdgpu_pp_is_idle(void *handle) -{ - bool ret = true; - struct amdgpu_device *adev = (struct amdgpu_device *)handle; - - if (adev->powerplay.ip_funcs->is_idle) - ret = adev->powerplay.ip_funcs->is_idle( - adev->powerplay.pp_handle); - return ret; -} - -static int amdgpu_pp_wait_for_idle(void *handle) -{ - int ret = 0; - struct amdgpu_device *adev = (struct amdgpu_device *)handle; - - if (adev->powerplay.ip_funcs->wait_for_idle) - ret = adev->powerplay.ip_funcs->wait_for_idle( - adev->powerplay.pp_handle); - return ret; -} - -static int amdgpu_pp_soft_reset(void *handle) -{ - int ret = 0; - struct amdgpu_device *adev = (struct amdgpu_device *)handle; - - if (adev->powerplay.ip_funcs->soft_reset) - ret = adev->powerplay.ip_funcs->soft_reset( - adev->powerplay.pp_handle); - return ret; -} - -static const struct amd_ip_funcs amdgpu_pp_ip_funcs = { - .name = "amdgpu_powerplay", - .early_init = amdgpu_pp_early_init, - .late_init = amdgpu_pp_late_init, - .sw_init = amdgpu_pp_sw_init, - .sw_fini = amdgpu_pp_sw_fini, - .hw_init = amdgpu_pp_hw_init, - .hw_fini = amdgpu_pp_hw_fini, - .late_fini = amdgpu_pp_late_fini, - .suspend = amdgpu_pp_suspend, - .resume = amdgpu_pp_resume, - .is_idle = amdgpu_pp_is_idle, - .wait_for_idle = amdgpu_pp_wait_for_idle, - .soft_reset = amdgpu_pp_soft_reset, - .set_clockgating_state = amdgpu_pp_set_clockgating_state, - .set_powergating_state = amdgpu_pp_set_powergating_state, -}; - -const struct amdgpu_ip_block_version amdgpu_pp_ip_block = -{ - .type = AMD_IP_BLOCK_TYPE_SMC, - .major = 1, - .minor = 0, - .rev = 0, - .funcs = &amdgpu_pp_ip_funcs, -}; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_powerplay.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_powerplay.h deleted file mode 100644 index c0c4bfdcdb14..000000000000 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_powerplay.h +++ /dev/null @@ -1,33 +0,0 @@ -/* - * Copyright 2015 Advanced Micro Devices, Inc. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR - * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, - * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR - * OTHER DEALINGS IN THE SOFTWARE. - * - * Authors: AMD - * - */ - -#ifndef __AMDGPU_POWERPLAY_H__ -#define __AMDGPU_POWERPLAY_H__ - -#include "amd_shared.h" - -extern const struct amdgpu_ip_block_version amdgpu_pp_ip_block; - -#endif /* __AMDGPU_POWERPLAY_H__ */ diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c index 8ce74a1d9966..1c9991738477 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c @@ -105,14 +105,25 @@ amdgpu_gem_prime_import_sg_table(struct drm_device *dev, int ret; ww_mutex_lock(&resv->lock, NULL); - ret = amdgpu_bo_create(adev, attach->dmabuf->size, PAGE_SIZE, false, - AMDGPU_GEM_DOMAIN_GTT, 0, sg, resv, &bo); - ww_mutex_unlock(&resv->lock); + ret = amdgpu_bo_create(adev, attach->dmabuf->size, PAGE_SIZE, + AMDGPU_GEM_DOMAIN_CPU, 0, ttm_bo_type_sg, + resv, &bo); if (ret) - return ERR_PTR(ret); + goto error; + + bo->tbo.sg = sg; + bo->tbo.ttm->sg = sg; + bo->allowed_domains = AMDGPU_GEM_DOMAIN_GTT; + bo->preferred_domains = AMDGPU_GEM_DOMAIN_GTT; + if (attach->dmabuf->ops != &amdgpu_dmabuf_ops) + bo->prime_shared_count = 1; - bo->prime_shared_count = 1; + ww_mutex_unlock(&resv->lock); return &bo->gem_base; + +error: + ww_mutex_unlock(&resv->lock); + return ERR_PTR(ret); } static int amdgpu_gem_map_attach(struct dma_buf *dma_buf, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c index 6e712f12eecd..9a75410cd576 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c @@ -62,6 +62,9 @@ static int psp_sw_init(void *handle) psp->adev = adev; + if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) + return 0; + ret = psp_init_microcode(psp); if (ret) { DRM_ERROR("Failed to load psp firmware!\n"); @@ -75,6 +78,9 @@ static int psp_sw_fini(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; + if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) + return 0; + release_firmware(adev->psp.sos_fw); adev->psp.sos_fw = NULL; release_firmware(adev->psp.asd_fw); @@ -453,6 +459,9 @@ static int psp_suspend(void *handle) struct amdgpu_device *adev = (struct amdgpu_device *)handle; struct psp_context *psp = &adev->psp; + if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) + return 0; + ret = psp_ring_stop(psp, PSP_RING_TYPE__KM); if (ret) { DRM_ERROR("PSP ring stop failed\n"); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c index e223b0f6417b..d5f526f38e50 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c @@ -484,7 +484,7 @@ static ssize_t amdgpu_debugfs_ring_read(struct file *f, char __user *buf, result = 0; if (*pos < 12) { - early[0] = amdgpu_ring_get_rptr(ring); + early[0] = amdgpu_ring_get_rptr(ring) & ring->buf_mask; early[1] = amdgpu_ring_get_wptr(ring) & ring->buf_mask; early[2] = ring->wptr & ring->buf_mask; for (i = *pos / 4; i < 3 && size; i++) { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h index 1d0d250cbfdf..1a5911882657 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h @@ -26,6 +26,7 @@ #include <drm/amdgpu_drm.h> #include <drm/gpu_scheduler.h> +#include <drm/drm_print.h> /* max number of rings */ #define AMDGPU_MAX_RINGS 18 @@ -35,8 +36,9 @@ #define AMDGPU_MAX_UVD_ENC_RINGS 2 /* some special values for the owner field */ -#define AMDGPU_FENCE_OWNER_UNDEFINED ((void*)0ul) -#define AMDGPU_FENCE_OWNER_VM ((void*)1ul) +#define AMDGPU_FENCE_OWNER_UNDEFINED ((void *)0ul) +#define AMDGPU_FENCE_OWNER_VM ((void *)1ul) +#define AMDGPU_FENCE_OWNER_KFD ((void *)2ul) #define AMDGPU_FENCE_FLAG_64BIT (1 << 0) #define AMDGPU_FENCE_FLAG_INT (1 << 1) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sa.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_sa.c index 5ca75a456ad2..fb1667b35daa 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sa.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sa.c @@ -63,21 +63,27 @@ int amdgpu_sa_bo_manager_init(struct amdgpu_device *adev, for (i = 0; i < AMDGPU_SA_NUM_FENCE_LISTS; ++i) INIT_LIST_HEAD(&sa_manager->flist[i]); - r = amdgpu_bo_create(adev, size, align, true, domain, - 0, NULL, NULL, &sa_manager->bo); + r = amdgpu_bo_create_kernel(adev, size, align, domain, &sa_manager->bo, + &sa_manager->gpu_addr, &sa_manager->cpu_ptr); if (r) { dev_err(adev->dev, "(%d) failed to allocate bo for manager\n", r); return r; } + memset(sa_manager->cpu_ptr, 0, sa_manager->size); return r; } void amdgpu_sa_bo_manager_fini(struct amdgpu_device *adev, - struct amdgpu_sa_manager *sa_manager) + struct amdgpu_sa_manager *sa_manager) { struct amdgpu_sa_bo *sa_bo, *tmp; + if (sa_manager->bo == NULL) { + dev_err(adev->dev, "no bo for sa manager\n"); + return; + } + if (!list_empty(&sa_manager->olist)) { sa_manager->hole = &sa_manager->olist, amdgpu_sa_bo_try_free(sa_manager); @@ -88,55 +94,9 @@ void amdgpu_sa_bo_manager_fini(struct amdgpu_device *adev, list_for_each_entry_safe(sa_bo, tmp, &sa_manager->olist, olist) { amdgpu_sa_bo_remove_locked(sa_bo); } - amdgpu_bo_unref(&sa_manager->bo); - sa_manager->size = 0; -} - -int amdgpu_sa_bo_manager_start(struct amdgpu_device *adev, - struct amdgpu_sa_manager *sa_manager) -{ - int r; - - if (sa_manager->bo == NULL) { - dev_err(adev->dev, "no bo for sa manager\n"); - return -EINVAL; - } - /* map the buffer */ - r = amdgpu_bo_reserve(sa_manager->bo, false); - if (r) { - dev_err(adev->dev, "(%d) failed to reserve manager bo\n", r); - return r; - } - r = amdgpu_bo_pin(sa_manager->bo, sa_manager->domain, &sa_manager->gpu_addr); - if (r) { - amdgpu_bo_unreserve(sa_manager->bo); - dev_err(adev->dev, "(%d) failed to pin manager bo\n", r); - return r; - } - r = amdgpu_bo_kmap(sa_manager->bo, &sa_manager->cpu_ptr); - memset(sa_manager->cpu_ptr, 0, sa_manager->size); - amdgpu_bo_unreserve(sa_manager->bo); - return r; -} - -int amdgpu_sa_bo_manager_suspend(struct amdgpu_device *adev, - struct amdgpu_sa_manager *sa_manager) -{ - int r; - - if (sa_manager->bo == NULL) { - dev_err(adev->dev, "no bo for sa manager\n"); - return -EINVAL; - } - - r = amdgpu_bo_reserve(sa_manager->bo, true); - if (!r) { - amdgpu_bo_kunmap(sa_manager->bo); - amdgpu_bo_unpin(sa_manager->bo); - amdgpu_bo_unreserve(sa_manager->bo); - } - return r; + amdgpu_bo_free_kernel(&sa_manager->bo, &sa_manager->gpu_addr, &sa_manager->cpu_ptr); + sa_manager->size = 0; } static void amdgpu_sa_bo_remove_locked(struct amdgpu_sa_bo *sa_bo) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c index df65c66dc956..2d6f5ec77a68 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c @@ -31,6 +31,7 @@ #include <drm/drmP.h> #include "amdgpu.h" #include "amdgpu_trace.h" +#include "amdgpu_amdkfd.h" struct amdgpu_sync_entry { struct hlist_node node; @@ -85,11 +86,20 @@ static bool amdgpu_sync_same_dev(struct amdgpu_device *adev, */ static void *amdgpu_sync_get_owner(struct dma_fence *f) { - struct drm_sched_fence *s_fence = to_drm_sched_fence(f); + struct drm_sched_fence *s_fence; + struct amdgpu_amdkfd_fence *kfd_fence; + + if (!f) + return AMDGPU_FENCE_OWNER_UNDEFINED; + s_fence = to_drm_sched_fence(f); if (s_fence) return s_fence->owner; + kfd_fence = to_amdgpu_amdkfd_fence(f); + if (kfd_fence) + return AMDGPU_FENCE_OWNER_KFD; + return AMDGPU_FENCE_OWNER_UNDEFINED; } @@ -204,11 +214,18 @@ int amdgpu_sync_resv(struct amdgpu_device *adev, for (i = 0; i < flist->shared_count; ++i) { f = rcu_dereference_protected(flist->shared[i], reservation_object_held(resv)); + /* We only want to trigger KFD eviction fences on + * evict or move jobs. Skip KFD fences otherwise. + */ + fence_owner = amdgpu_sync_get_owner(f); + if (fence_owner == AMDGPU_FENCE_OWNER_KFD && + owner != AMDGPU_FENCE_OWNER_UNDEFINED) + continue; + if (amdgpu_sync_same_dev(adev, f)) { /* VM updates are only interesting * for other VM updates and moves. */ - fence_owner = amdgpu_sync_get_owner(f); if ((owner != AMDGPU_FENCE_OWNER_UNDEFINED) && (fence_owner != AMDGPU_FENCE_OWNER_UNDEFINED) && ((owner == AMDGPU_FENCE_OWNER_VM) != @@ -305,6 +322,41 @@ struct dma_fence *amdgpu_sync_get_fence(struct amdgpu_sync *sync, bool *explicit return NULL; } +/** + * amdgpu_sync_clone - clone a sync object + * + * @source: sync object to clone + * @clone: pointer to destination sync object + * + * Adds references to all unsignaled fences in @source to @clone. Also + * removes signaled fences from @source while at it. + */ +int amdgpu_sync_clone(struct amdgpu_sync *source, struct amdgpu_sync *clone) +{ + struct amdgpu_sync_entry *e; + struct hlist_node *tmp; + struct dma_fence *f; + int i, r; + + hash_for_each_safe(source->fences, i, tmp, e, node) { + f = e->fence; + if (!dma_fence_is_signaled(f)) { + r = amdgpu_sync_fence(NULL, clone, f, e->explicit); + if (r) + return r; + } else { + hash_del(&e->node); + dma_fence_put(f); + kmem_cache_free(amdgpu_sync_slab, e); + } + } + + dma_fence_put(clone->last_vm_update); + clone->last_vm_update = dma_fence_get(source->last_vm_update); + + return 0; +} + int amdgpu_sync_wait(struct amdgpu_sync *sync, bool intr) { struct amdgpu_sync_entry *e; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.h index 7aba38d5c9df..10cf23a57f17 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.h @@ -50,6 +50,7 @@ int amdgpu_sync_resv(struct amdgpu_device *adev, struct dma_fence *amdgpu_sync_peek_fence(struct amdgpu_sync *sync, struct amdgpu_ring *ring); struct dma_fence *amdgpu_sync_get_fence(struct amdgpu_sync *sync, bool *explicit); +int amdgpu_sync_clone(struct amdgpu_sync *source, struct amdgpu_sync *clone); int amdgpu_sync_wait(struct amdgpu_sync *sync, bool intr); void amdgpu_sync_free(struct amdgpu_sync *sync); int amdgpu_sync_init(void); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_test.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_test.c index f3d81b6fb499..2dbe87591f81 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_test.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_test.c @@ -59,9 +59,8 @@ static void amdgpu_do_test_moves(struct amdgpu_device *adev) goto out_cleanup; } - r = amdgpu_bo_create(adev, size, PAGE_SIZE, true, - AMDGPU_GEM_DOMAIN_VRAM, 0, - NULL, NULL, &vram_obj); + r = amdgpu_bo_create(adev, size, PAGE_SIZE, AMDGPU_GEM_DOMAIN_VRAM, 0, + ttm_bo_type_kernel, NULL, &vram_obj); if (r) { DRM_ERROR("Failed to create VRAM object\n"); goto out_cleanup; @@ -80,9 +79,9 @@ static void amdgpu_do_test_moves(struct amdgpu_device *adev) void **vram_start, **vram_end; struct dma_fence *fence = NULL; - r = amdgpu_bo_create(adev, size, PAGE_SIZE, true, - AMDGPU_GEM_DOMAIN_GTT, 0, NULL, - NULL, gtt_obj + i); + r = amdgpu_bo_create(adev, size, PAGE_SIZE, + AMDGPU_GEM_DOMAIN_GTT, 0, + ttm_bo_type_kernel, NULL, gtt_obj + i); if (r) { DRM_ERROR("Failed to create GTT object %d\n", i); goto out_lclean; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c index b372d8d650a5..e28b73609fbc 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c @@ -46,6 +46,7 @@ #include "amdgpu.h" #include "amdgpu_object.h" #include "amdgpu_trace.h" +#include "amdgpu_amdkfd.h" #include "bif/bif_4_1_d.h" #define DRM_FILE_PAGE_OFFSET (0x100000000ULL >> PAGE_SHIFT) @@ -203,6 +204,12 @@ static void amdgpu_evict_flags(struct ttm_buffer_object *bo, .flags = TTM_PL_MASK_CACHING | TTM_PL_FLAG_SYSTEM }; + if (bo->type == ttm_bo_type_sg) { + placement->num_placement = 0; + placement->num_busy_placement = 0; + return; + } + if (!amdgpu_ttm_bo_is_amdgpu_bo(bo)) { placement->placement = &placements; placement->busy_placement = &placements; @@ -213,9 +220,7 @@ static void amdgpu_evict_flags(struct ttm_buffer_object *bo, abo = ttm_to_amdgpu_bo(bo); switch (bo->mem.mem_type) { case TTM_PL_VRAM: - if (adev->mman.buffer_funcs && - adev->mman.buffer_funcs_ring && - adev->mman.buffer_funcs_ring->ready == false) { + if (!adev->mman.buffer_funcs_enabled) { amdgpu_ttm_placement_from_domain(abo, AMDGPU_GEM_DOMAIN_CPU); } else if (adev->gmc.visible_vram_size < adev->gmc.real_vram_size && !(abo->flags & AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED)) { @@ -260,6 +265,13 @@ static int amdgpu_verify_access(struct ttm_buffer_object *bo, struct file *filp) { struct amdgpu_bo *abo = ttm_to_amdgpu_bo(bo); + /* + * Don't verify access for KFD BOs. They don't have a GEM + * object associated with them. + */ + if (abo->kfd_bo) + return 0; + if (amdgpu_ttm_tt_get_usermm(bo->ttm)) return -EPERM; return drm_vma_node_verify_access(&abo->gem_base.vma_node, @@ -331,7 +343,7 @@ int amdgpu_ttm_copy_mem_to_mem(struct amdgpu_device *adev, const uint64_t GTT_MAX_BYTES = (AMDGPU_GTT_MAX_TRANSFER_SIZE * AMDGPU_GPU_PAGE_SIZE); - if (!ring->ready) { + if (!adev->mman.buffer_funcs_enabled) { DRM_ERROR("Trying to move memory with ring turned off.\n"); return -EINVAL; } @@ -577,12 +589,9 @@ static int amdgpu_bo_move(struct ttm_buffer_object *bo, bool evict, amdgpu_move_null(bo, new_mem); return 0; } - if (adev->mman.buffer_funcs == NULL || - adev->mman.buffer_funcs_ring == NULL || - !adev->mman.buffer_funcs_ring->ready) { - /* use memcpy */ + + if (!adev->mman.buffer_funcs_enabled) goto memcpy; - } if (old_mem->mem_type == TTM_PL_VRAM && new_mem->mem_type == TTM_PL_SYSTEM) { @@ -621,6 +630,7 @@ static int amdgpu_ttm_io_mem_reserve(struct ttm_bo_device *bdev, struct ttm_mem_ { struct ttm_mem_type_manager *man = &bdev->man[mem->mem_type]; struct amdgpu_device *adev = amdgpu_ttm_adev(bdev); + struct drm_mm_node *mm_node = mem->mm_node; mem->bus.addr = NULL; mem->bus.offset = 0; @@ -640,6 +650,15 @@ static int amdgpu_ttm_io_mem_reserve(struct ttm_bo_device *bdev, struct ttm_mem_ /* check if it's visible */ if ((mem->bus.offset + mem->bus.size) > adev->gmc.visible_vram_size) return -EINVAL; + /* Only physically contiguous buffers apply. In a contiguous + * buffer, size of the first mm_node would match the number of + * pages in ttm_mem_reg. + */ + if (adev->mman.aper_base_kaddr && + (mm_node->size == mem->num_pages)) + mem->bus.addr = (u8 *)adev->mman.aper_base_kaddr + + mem->bus.offset; + mem->bus.base = adev->gmc.aper_base; mem->bus.is_iomem = true; break; @@ -674,7 +693,6 @@ struct amdgpu_ttm_gup_task_list { struct amdgpu_ttm_tt { struct ttm_dma_tt ttm; - struct amdgpu_device *adev; u64 offset; uint64_t userptr; struct mm_struct *usermm; @@ -832,6 +850,7 @@ static void amdgpu_ttm_tt_unpin_userptr(struct ttm_tt *ttm) static int amdgpu_ttm_backend_bind(struct ttm_tt *ttm, struct ttm_mem_reg *bo_mem) { + struct amdgpu_device *adev = amdgpu_ttm_adev(ttm->bdev); struct amdgpu_ttm_tt *gtt = (void*)ttm; uint64_t flags; int r = 0; @@ -858,9 +877,9 @@ static int amdgpu_ttm_backend_bind(struct ttm_tt *ttm, return 0; } - flags = amdgpu_ttm_tt_pte_flags(gtt->adev, ttm, bo_mem); + flags = amdgpu_ttm_tt_pte_flags(adev, ttm, bo_mem); gtt->offset = (u64)bo_mem->start << PAGE_SHIFT; - r = amdgpu_gart_bind(gtt->adev, gtt->offset, ttm->num_pages, + r = amdgpu_gart_bind(adev, gtt->offset, ttm->num_pages, ttm->pages, gtt->ttm.dma_address, flags); if (r) @@ -937,6 +956,7 @@ int amdgpu_ttm_recover_gart(struct ttm_buffer_object *tbo) static int amdgpu_ttm_backend_unbind(struct ttm_tt *ttm) { + struct amdgpu_device *adev = amdgpu_ttm_adev(ttm->bdev); struct amdgpu_ttm_tt *gtt = (void *)ttm; int r; @@ -947,7 +967,7 @@ static int amdgpu_ttm_backend_unbind(struct ttm_tt *ttm) return 0; /* unbind shouldn't be done for GDS/GWS/OA in ttm_bo_clean_mm */ - r = amdgpu_gart_unbind(gtt->adev, gtt->offset, ttm->num_pages); + r = amdgpu_gart_unbind(adev, gtt->offset, ttm->num_pages); if (r) DRM_ERROR("failed to unbind %lu pages at 0x%08llX\n", gtt->ttm.ttm.num_pages, gtt->offset); @@ -968,22 +988,20 @@ static struct ttm_backend_func amdgpu_backend_func = { .destroy = &amdgpu_ttm_backend_destroy, }; -static struct ttm_tt *amdgpu_ttm_tt_create(struct ttm_bo_device *bdev, - unsigned long size, uint32_t page_flags, - struct page *dummy_read_page) +static struct ttm_tt *amdgpu_ttm_tt_create(struct ttm_buffer_object *bo, + uint32_t page_flags) { struct amdgpu_device *adev; struct amdgpu_ttm_tt *gtt; - adev = amdgpu_ttm_adev(bdev); + adev = amdgpu_ttm_adev(bo->bdev); gtt = kzalloc(sizeof(struct amdgpu_ttm_tt), GFP_KERNEL); if (gtt == NULL) { return NULL; } gtt->ttm.ttm.func = &amdgpu_backend_func; - gtt->adev = adev; - if (ttm_dma_tt_init(>t->ttm, bdev, size, page_flags, dummy_read_page)) { + if (ttm_sg_tt_init(>t->ttm, bo, page_flags)) { kfree(gtt); return NULL; } @@ -1009,7 +1027,8 @@ static int amdgpu_ttm_tt_populate(struct ttm_tt *ttm, if (slave && ttm->sg) { drm_prime_sg_to_page_addr_arrays(ttm->sg, ttm->pages, - gtt->ttm.dma_address, ttm->num_pages); + gtt->ttm.dma_address, + ttm->num_pages); ttm->state = tt_unbound; return 0; } @@ -1167,6 +1186,23 @@ static bool amdgpu_ttm_bo_eviction_valuable(struct ttm_buffer_object *bo, { unsigned long num_pages = bo->mem.num_pages; struct drm_mm_node *node = bo->mem.mm_node; + struct reservation_object_list *flist; + struct dma_fence *f; + int i; + + /* If bo is a KFD BO, check if the bo belongs to the current process. + * If true, then return false as any KFD process needs all its BOs to + * be resident to run successfully + */ + flist = reservation_object_get_list(bo->resv); + if (flist) { + for (i = 0; i < flist->shared_count; ++i) { + f = rcu_dereference_protected(flist->shared[i], + reservation_object_held(bo->resv)); + if (amdkfd_fence_check_mm(f, current->mm)) + return false; + } + } switch (bo->mem.mem_type) { case TTM_PL_TT: @@ -1306,11 +1342,12 @@ static int amdgpu_ttm_fw_reserve_vram_init(struct amdgpu_device *adev) if (adev->fw_vram_usage.size > 0 && adev->fw_vram_usage.size <= vram_size) { - r = amdgpu_bo_create(adev, adev->fw_vram_usage.size, - PAGE_SIZE, true, AMDGPU_GEM_DOMAIN_VRAM, - AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED | - AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS, NULL, NULL, - &adev->fw_vram_usage.reserved_bo); + r = amdgpu_bo_create(adev, adev->fw_vram_usage.size, PAGE_SIZE, + AMDGPU_GEM_DOMAIN_VRAM, + AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED | + AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS, + ttm_bo_type_kernel, NULL, + &adev->fw_vram_usage.reserved_bo); if (r) goto error_create; @@ -1402,7 +1439,11 @@ int amdgpu_ttm_init(struct amdgpu_device *adev) adev->gmc.visible_vram_size = vis_vram_limit; /* Change the size here instead of the init above so only lpfn is affected */ - amdgpu_ttm_set_active_vram_size(adev, adev->gmc.visible_vram_size); + amdgpu_ttm_set_buffer_funcs_status(adev, false); +#ifdef CONFIG_64BIT + adev->mman.aper_base_kaddr = ioremap_wc(adev->gmc.aper_base, + adev->gmc.visible_vram_size); +#endif /* *The reserved vram for firmware must be pinned to the specified @@ -1495,6 +1536,9 @@ void amdgpu_ttm_fini(struct amdgpu_device *adev) amdgpu_ttm_debugfs_fini(adev); amdgpu_bo_free_kernel(&adev->stolen_vga_memory, NULL, NULL); amdgpu_ttm_fw_reserve_vram_fini(adev); + if (adev->mman.aper_base_kaddr) + iounmap(adev->mman.aper_base_kaddr); + adev->mman.aper_base_kaddr = NULL; ttm_bo_clean_mm(&adev->mman.bdev, TTM_PL_VRAM); ttm_bo_clean_mm(&adev->mman.bdev, TTM_PL_TT); @@ -1510,18 +1554,30 @@ void amdgpu_ttm_fini(struct amdgpu_device *adev) DRM_INFO("amdgpu: ttm finalized\n"); } -/* this should only be called at bootup or when userspace - * isn't running */ -void amdgpu_ttm_set_active_vram_size(struct amdgpu_device *adev, u64 size) +/** + * amdgpu_ttm_set_buffer_funcs_status - enable/disable use of buffer functions + * + * @adev: amdgpu_device pointer + * @enable: true when we can use buffer functions. + * + * Enable/disable use of buffer functions during suspend/resume. This should + * only be called at bootup or when userspace isn't running. + */ +void amdgpu_ttm_set_buffer_funcs_status(struct amdgpu_device *adev, bool enable) { - struct ttm_mem_type_manager *man; + struct ttm_mem_type_manager *man = &adev->mman.bdev.man[TTM_PL_VRAM]; + uint64_t size; - if (!adev->mman.initialized) + if (!adev->mman.initialized || adev->in_gpu_reset) return; - man = &adev->mman.bdev.man[TTM_PL_VRAM]; /* this just adjusts TTM size idea, which sets lpfn to the correct value */ + if (enable) + size = adev->gmc.real_vram_size; + else + size = adev->gmc.visible_vram_size; man->size = size >> PAGE_SHIFT; + adev->mman.buffer_funcs_enabled = enable; } int amdgpu_mmap(struct file *filp, struct vm_area_struct *vma) @@ -1620,6 +1676,11 @@ int amdgpu_copy_buffer(struct amdgpu_ring *ring, uint64_t src_offset, unsigned i; int r; + if (direct_submit && !ring->ready) { + DRM_ERROR("Trying to move memory with ring turned off.\n"); + return -EINVAL; + } + max_bytes = adev->mman.buffer_funcs->copy_max_bytes; num_loops = DIV_ROUND_UP(byte_count, max_bytes); num_dw = num_loops * adev->mman.buffer_funcs->copy_num_dw; @@ -1693,7 +1754,7 @@ int amdgpu_fill_buffer(struct amdgpu_bo *bo, struct amdgpu_job *job; int r; - if (!ring->ready) { + if (!adev->mman.buffer_funcs_enabled) { DRM_ERROR("Trying to clear memory with ring turned off.\n"); return -EINVAL; } @@ -1929,38 +1990,98 @@ static const struct file_operations amdgpu_ttm_gtt_fops = { #endif -static ssize_t amdgpu_iova_to_phys_read(struct file *f, char __user *buf, - size_t size, loff_t *pos) +static ssize_t amdgpu_iomem_read(struct file *f, char __user *buf, + size_t size, loff_t *pos) { struct amdgpu_device *adev = file_inode(f)->i_private; - int r; - uint64_t phys; struct iommu_domain *dom; + ssize_t result = 0; + int r; - // always return 8 bytes - if (size != 8) - return -EINVAL; + dom = iommu_get_domain_for_dev(adev->dev); - // only accept page addresses - if (*pos & 0xFFF) - return -EINVAL; + while (size) { + phys_addr_t addr = *pos & PAGE_MASK; + loff_t off = *pos & ~PAGE_MASK; + size_t bytes = PAGE_SIZE - off; + unsigned long pfn; + struct page *p; + void *ptr; + + bytes = bytes < size ? bytes : size; + + addr = dom ? iommu_iova_to_phys(dom, addr) : addr; + + pfn = addr >> PAGE_SHIFT; + if (!pfn_valid(pfn)) + return -EPERM; + + p = pfn_to_page(pfn); + if (p->mapping != adev->mman.bdev.dev_mapping) + return -EPERM; + + ptr = kmap(p); + r = copy_to_user(buf, ptr, bytes); + kunmap(p); + if (r) + return -EFAULT; + + size -= bytes; + *pos += bytes; + result += bytes; + } + + return result; +} + +static ssize_t amdgpu_iomem_write(struct file *f, const char __user *buf, + size_t size, loff_t *pos) +{ + struct amdgpu_device *adev = file_inode(f)->i_private; + struct iommu_domain *dom; + ssize_t result = 0; + int r; dom = iommu_get_domain_for_dev(adev->dev); - if (dom) - phys = iommu_iova_to_phys(dom, *pos); - else - phys = *pos; - r = copy_to_user(buf, &phys, 8); - if (r) - return -EFAULT; + while (size) { + phys_addr_t addr = *pos & PAGE_MASK; + loff_t off = *pos & ~PAGE_MASK; + size_t bytes = PAGE_SIZE - off; + unsigned long pfn; + struct page *p; + void *ptr; + + bytes = bytes < size ? bytes : size; - return 8; + addr = dom ? iommu_iova_to_phys(dom, addr) : addr; + + pfn = addr >> PAGE_SHIFT; + if (!pfn_valid(pfn)) + return -EPERM; + + p = pfn_to_page(pfn); + if (p->mapping != adev->mman.bdev.dev_mapping) + return -EPERM; + + ptr = kmap(p); + r = copy_from_user(ptr, buf, bytes); + kunmap(p); + if (r) + return -EFAULT; + + size -= bytes; + *pos += bytes; + result += bytes; + } + + return result; } -static const struct file_operations amdgpu_ttm_iova_fops = { +static const struct file_operations amdgpu_ttm_iomem_fops = { .owner = THIS_MODULE, - .read = amdgpu_iova_to_phys_read, + .read = amdgpu_iomem_read, + .write = amdgpu_iomem_write, .llseek = default_llseek }; @@ -1973,7 +2094,7 @@ static const struct { #ifdef CONFIG_DRM_AMDGPU_GART_DEBUGFS { "amdgpu_gtt", &amdgpu_ttm_gtt_fops, TTM_PL_TT }, #endif - { "amdgpu_iova", &amdgpu_ttm_iova_fops, TTM_PL_SYSTEM }, + { "amdgpu_iomem", &amdgpu_ttm_iomem_fops, TTM_PL_SYSTEM }, }; #endif diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h index 1e275c7b006b..6ea7de863041 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h @@ -44,6 +44,7 @@ struct amdgpu_mman { struct ttm_bo_device bdev; bool mem_global_referenced; bool initialized; + void __iomem *aper_base_kaddr; #if defined(CONFIG_DEBUG_FS) struct dentry *debugfs_entries[8]; @@ -52,6 +53,7 @@ struct amdgpu_mman { /* buffer handling */ const struct amdgpu_buffer_funcs *buffer_funcs; struct amdgpu_ring *buffer_funcs_ring; + bool buffer_funcs_enabled; struct mutex gtt_window_lock; /* Scheduler entity for buffer moves */ @@ -74,6 +76,11 @@ int amdgpu_gtt_mgr_recover(struct ttm_mem_type_manager *man); uint64_t amdgpu_vram_mgr_usage(struct ttm_mem_type_manager *man); uint64_t amdgpu_vram_mgr_vis_usage(struct ttm_mem_type_manager *man); +int amdgpu_ttm_init(struct amdgpu_device *adev); +void amdgpu_ttm_fini(struct amdgpu_device *adev); +void amdgpu_ttm_set_buffer_funcs_status(struct amdgpu_device *adev, + bool enable); + int amdgpu_copy_buffer(struct amdgpu_ring *ring, uint64_t src_offset, uint64_t dst_offset, uint32_t byte_count, struct reservation_object *resv, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c index 9cd5517a4fa9..f3c459b7c0bb 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c @@ -299,12 +299,15 @@ int amdgpu_uvd_suspend(struct amdgpu_device *adev) cancel_delayed_work_sync(&adev->uvd.idle_work); - for (i = 0; i < adev->uvd.max_handles; ++i) - if (atomic_read(&adev->uvd.handles[i])) - break; + /* only valid for physical mode */ + if (adev->asic_type < CHIP_POLARIS10) { + for (i = 0; i < adev->uvd.max_handles; ++i) + if (atomic_read(&adev->uvd.handles[i])) + break; - if (i == AMDGPU_MAX_UVD_HANDLES) - return 0; + if (i == adev->uvd.max_handles) + return 0; + } size = amdgpu_bo_size(adev->uvd.vcpu_bo); ptr = adev->uvd.cpu_addr; @@ -1116,9 +1119,6 @@ static void amdgpu_uvd_idle_work_handler(struct work_struct *work) container_of(work, struct amdgpu_device, uvd.idle_work.work); unsigned fences = amdgpu_fence_count_emitted(&adev->uvd.ring); - if (amdgpu_sriov_vf(adev)) - return; - if (fences == 0) { if (adev->pm.dpm_enabled) { amdgpu_dpm_enable_uvd(adev, false); @@ -1138,11 +1138,12 @@ static void amdgpu_uvd_idle_work_handler(struct work_struct *work) void amdgpu_uvd_ring_begin_use(struct amdgpu_ring *ring) { struct amdgpu_device *adev = ring->adev; - bool set_clocks = !cancel_delayed_work_sync(&adev->uvd.idle_work); + bool set_clocks; if (amdgpu_sriov_vf(adev)) return; + set_clocks = !cancel_delayed_work_sync(&adev->uvd.idle_work); if (set_clocks) { if (adev->pm.dpm_enabled) { amdgpu_dpm_enable_uvd(adev, true); @@ -1158,7 +1159,8 @@ void amdgpu_uvd_ring_begin_use(struct amdgpu_ring *ring) void amdgpu_uvd_ring_end_use(struct amdgpu_ring *ring) { - schedule_delayed_work(&ring->adev->uvd.idle_work, UVD_IDLE_TIMEOUT); + if (!amdgpu_sriov_vf(ring->adev)) + schedule_delayed_work(&ring->adev->uvd.idle_work, UVD_IDLE_TIMEOUT); } /** diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c index d274ae535530..9152478d7528 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c @@ -300,9 +300,6 @@ static void amdgpu_vce_idle_work_handler(struct work_struct *work) container_of(work, struct amdgpu_device, vce.idle_work.work); unsigned i, count = 0; - if (amdgpu_sriov_vf(adev)) - return; - for (i = 0; i < adev->vce.num_rings; i++) count += amdgpu_fence_count_emitted(&adev->vce.ring[i]); @@ -362,7 +359,8 @@ void amdgpu_vce_ring_begin_use(struct amdgpu_ring *ring) */ void amdgpu_vce_ring_end_use(struct amdgpu_ring *ring) { - schedule_delayed_work(&ring->adev->vce.idle_work, VCE_IDLE_TIMEOUT); + if (!amdgpu_sriov_vf(ring->adev)) + schedule_delayed_work(&ring->adev->vce.idle_work, VCE_IDLE_TIMEOUT); } /** diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c index b832651d2137..21adb1b6e5cb 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c @@ -22,7 +22,9 @@ */ #include "amdgpu.h" -#define MAX_KIQ_REG_WAIT 100000000 /* in usecs */ +#define MAX_KIQ_REG_WAIT 5000 /* in usecs, 5ms */ +#define MAX_KIQ_REG_BAILOUT_INTERVAL 5 /* in msecs, 5ms */ +#define MAX_KIQ_REG_TRY 20 uint64_t amdgpu_csa_vaddr(struct amdgpu_device *adev) { @@ -137,9 +139,9 @@ void amdgpu_virt_init_setting(struct amdgpu_device *adev) uint32_t amdgpu_virt_kiq_rreg(struct amdgpu_device *adev, uint32_t reg) { - signed long r; + signed long r, cnt = 0; unsigned long flags; - uint32_t val, seq; + uint32_t seq; struct amdgpu_kiq *kiq = &adev->gfx.kiq; struct amdgpu_ring *ring = &kiq->ring; @@ -153,18 +155,39 @@ uint32_t amdgpu_virt_kiq_rreg(struct amdgpu_device *adev, uint32_t reg) spin_unlock_irqrestore(&kiq->ring_lock, flags); r = amdgpu_fence_wait_polling(ring, seq, MAX_KIQ_REG_WAIT); - if (r < 1) { - DRM_ERROR("wait for kiq fence error: %ld\n", r); - return ~0; + + /* don't wait anymore for gpu reset case because this way may + * block gpu_recover() routine forever, e.g. this virt_kiq_rreg + * is triggered in TTM and ttm_bo_lock_delayed_workqueue() will + * never return if we keep waiting in virt_kiq_rreg, which cause + * gpu_recover() hang there. + * + * also don't wait anymore for IRQ context + * */ + if (r < 1 && (adev->in_gpu_reset || in_interrupt())) + goto failed_kiq_read; + + if (in_interrupt()) + might_sleep(); + + while (r < 1 && cnt++ < MAX_KIQ_REG_TRY) { + msleep(MAX_KIQ_REG_BAILOUT_INTERVAL); + r = amdgpu_fence_wait_polling(ring, seq, MAX_KIQ_REG_WAIT); } - val = adev->wb.wb[adev->virt.reg_val_offs]; - return val; + if (cnt > MAX_KIQ_REG_TRY) + goto failed_kiq_read; + + return adev->wb.wb[adev->virt.reg_val_offs]; + +failed_kiq_read: + pr_err("failed to read reg:%x\n", reg); + return ~0; } void amdgpu_virt_kiq_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v) { - signed long r; + signed long r, cnt = 0; unsigned long flags; uint32_t seq; struct amdgpu_kiq *kiq = &adev->gfx.kiq; @@ -180,8 +203,34 @@ void amdgpu_virt_kiq_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v) spin_unlock_irqrestore(&kiq->ring_lock, flags); r = amdgpu_fence_wait_polling(ring, seq, MAX_KIQ_REG_WAIT); - if (r < 1) - DRM_ERROR("wait for kiq fence error: %ld\n", r); + + /* don't wait anymore for gpu reset case because this way may + * block gpu_recover() routine forever, e.g. this virt_kiq_rreg + * is triggered in TTM and ttm_bo_lock_delayed_workqueue() will + * never return if we keep waiting in virt_kiq_rreg, which cause + * gpu_recover() hang there. + * + * also don't wait anymore for IRQ context + * */ + if (r < 1 && (adev->in_gpu_reset || in_interrupt())) + goto failed_kiq_write; + + if (in_interrupt()) + might_sleep(); + + while (r < 1 && cnt++ < MAX_KIQ_REG_TRY) { + + msleep(MAX_KIQ_REG_BAILOUT_INTERVAL); + r = amdgpu_fence_wait_polling(ring, seq, MAX_KIQ_REG_WAIT); + } + + if (cnt > MAX_KIQ_REG_TRY) + goto failed_kiq_write; + + return; + +failed_kiq_write: + pr_err("failed to write reg:%x\n", reg); } /** diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c index 0b237e027cab..24474294c92a 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c @@ -413,9 +413,9 @@ static int amdgpu_vm_alloc_levels(struct amdgpu_device *adev, if (!entry->base.bo) { r = amdgpu_bo_create(adev, amdgpu_vm_bo_size(adev, level), - AMDGPU_GPU_PAGE_SIZE, true, + AMDGPU_GPU_PAGE_SIZE, AMDGPU_GEM_DOMAIN_VRAM, flags, - NULL, resv, &pt); + ttm_bo_type_kernel, resv, &pt); if (r) return r; @@ -2409,8 +2409,8 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm, AMDGPU_GEM_CREATE_SHADOW); size = amdgpu_vm_bo_size(adev, adev->vm_manager.root_level); - r = amdgpu_bo_create(adev, size, align, true, AMDGPU_GEM_DOMAIN_VRAM, - flags, NULL, NULL, &vm->root.base.bo); + r = amdgpu_bo_create(adev, size, align, AMDGPU_GEM_DOMAIN_VRAM, flags, + ttm_bo_type_kernel, NULL, &vm->root.base.bo); if (r) goto error_free_sched_entity; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h index fabf44b262be..e9841518343e 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h @@ -28,6 +28,7 @@ #include <linux/kfifo.h> #include <linux/rbtree.h> #include <drm/gpu_scheduler.h> +#include <drm/drm_file.h> #include "amdgpu_sync.h" #include "amdgpu_ring.h" diff --git a/drivers/gpu/drm/amd/amdgpu/atombios_encoders.c b/drivers/gpu/drm/amd/amdgpu/atombios_encoders.c index 2af26d2da127..d702fb8e3427 100644 --- a/drivers/gpu/drm/amd/amdgpu/atombios_encoders.c +++ b/drivers/gpu/drm/amd/amdgpu/atombios_encoders.c @@ -34,7 +34,7 @@ #include <linux/backlight.h> #include "bif/bif_4_1_d.h" -static u8 +u8 amdgpu_atombios_encoder_get_backlight_level_from_reg(struct amdgpu_device *adev) { u8 backlight_level; @@ -48,7 +48,7 @@ amdgpu_atombios_encoder_get_backlight_level_from_reg(struct amdgpu_device *adev) return backlight_level; } -static void +void amdgpu_atombios_encoder_set_backlight_level_to_reg(struct amdgpu_device *adev, u8 backlight_level) { diff --git a/drivers/gpu/drm/amd/amdgpu/atombios_encoders.h b/drivers/gpu/drm/amd/amdgpu/atombios_encoders.h index 2bdec40515ce..f77cbdef679e 100644 --- a/drivers/gpu/drm/amd/amdgpu/atombios_encoders.h +++ b/drivers/gpu/drm/amd/amdgpu/atombios_encoders.h @@ -25,6 +25,11 @@ #define __ATOMBIOS_ENCODER_H__ u8 +amdgpu_atombios_encoder_get_backlight_level_from_reg(struct amdgpu_device *adev); +void +amdgpu_atombios_encoder_set_backlight_level_to_reg(struct amdgpu_device *adev, + u8 backlight_level); +u8 amdgpu_atombios_encoder_get_backlight_level(struct amdgpu_encoder *amdgpu_encoder); void amdgpu_atombios_encoder_set_backlight_level(struct amdgpu_encoder *amdgpu_encoder, diff --git a/drivers/gpu/drm/amd/amdgpu/ci_dpm.c b/drivers/gpu/drm/amd/amdgpu/ci_dpm.c index f82f40fb3bea..98d1dd253596 100644 --- a/drivers/gpu/drm/amd/amdgpu/ci_dpm.c +++ b/drivers/gpu/drm/amd/amdgpu/ci_dpm.c @@ -65,6 +65,8 @@ MODULE_FIRMWARE("radeon/hawaii_k_smc.bin"); #define VOLTAGE_VID_OFFSET_SCALE1 625 #define VOLTAGE_VID_OFFSET_SCALE2 100 +static const struct amd_pm_funcs ci_dpm_funcs; + static const struct ci_pt_defaults defaults_hawaii_xt = { 1, 0xF, 0xFD, 0x19, 5, 0x14, 0, 0xB0000, @@ -3695,40 +3697,6 @@ static int ci_find_boot_level(struct ci_single_dpm_table *table, return ret; } -static void ci_save_default_power_profile(struct amdgpu_device *adev) -{ - struct ci_power_info *pi = ci_get_pi(adev); - struct SMU7_Discrete_GraphicsLevel *levels = - pi->smc_state_table.GraphicsLevel; - uint32_t min_level = 0; - - pi->default_gfx_power_profile.activity_threshold = - be16_to_cpu(levels[0].ActivityLevel); - pi->default_gfx_power_profile.up_hyst = levels[0].UpH; - pi->default_gfx_power_profile.down_hyst = levels[0].DownH; - pi->default_gfx_power_profile.type = AMD_PP_GFX_PROFILE; - - pi->default_compute_power_profile = pi->default_gfx_power_profile; - pi->default_compute_power_profile.type = AMD_PP_COMPUTE_PROFILE; - - /* Optimize compute power profile: Use only highest - * 2 power levels (if more than 2 are available), Hysteresis: - * 0ms up, 5ms down - */ - if (pi->smc_state_table.GraphicsDpmLevelCount > 2) - min_level = pi->smc_state_table.GraphicsDpmLevelCount - 2; - else if (pi->smc_state_table.GraphicsDpmLevelCount == 2) - min_level = 1; - pi->default_compute_power_profile.min_sclk = - be32_to_cpu(levels[min_level].SclkFrequency); - - pi->default_compute_power_profile.up_hyst = 0; - pi->default_compute_power_profile.down_hyst = 5; - - pi->gfx_power_profile = pi->default_gfx_power_profile; - pi->compute_power_profile = pi->default_compute_power_profile; -} - static int ci_init_smc_table(struct amdgpu_device *adev) { struct ci_power_info *pi = ci_get_pi(adev); @@ -3874,8 +3842,6 @@ static int ci_init_smc_table(struct amdgpu_device *adev) if (ret) return ret; - ci_save_default_power_profile(adev); - return 0; } @@ -6277,6 +6243,7 @@ static int ci_dpm_early_init(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; + adev->powerplay.pp_funcs = &ci_dpm_funcs; ci_dpm_set_irq_funcs(adev); return 0; @@ -6753,222 +6720,6 @@ static int ci_dpm_set_mclk_od(void *handle, uint32_t value) return 0; } -static int ci_dpm_get_power_profile_state(void *handle, - struct amd_pp_profile *query) -{ - struct amdgpu_device *adev = (struct amdgpu_device *)handle; - struct ci_power_info *pi = ci_get_pi(adev); - - if (!pi || !query) - return -EINVAL; - - if (query->type == AMD_PP_GFX_PROFILE) - memcpy(query, &pi->gfx_power_profile, - sizeof(struct amd_pp_profile)); - else if (query->type == AMD_PP_COMPUTE_PROFILE) - memcpy(query, &pi->compute_power_profile, - sizeof(struct amd_pp_profile)); - else - return -EINVAL; - - return 0; -} - -static int ci_populate_requested_graphic_levels(struct amdgpu_device *adev, - struct amd_pp_profile *request) -{ - struct ci_power_info *pi = ci_get_pi(adev); - struct ci_dpm_table *dpm_table = &(pi->dpm_table); - struct SMU7_Discrete_GraphicsLevel *levels = - pi->smc_state_table.GraphicsLevel; - uint32_t array = pi->dpm_table_start + - offsetof(SMU7_Discrete_DpmTable, GraphicsLevel); - uint32_t array_size = sizeof(struct SMU7_Discrete_GraphicsLevel) * - SMU7_MAX_LEVELS_GRAPHICS; - uint32_t i; - - for (i = 0; i < dpm_table->sclk_table.count; i++) { - levels[i].ActivityLevel = - cpu_to_be16(request->activity_threshold); - levels[i].EnabledForActivity = 1; - levels[i].UpH = request->up_hyst; - levels[i].DownH = request->down_hyst; - } - - return amdgpu_ci_copy_bytes_to_smc(adev, array, (uint8_t *)levels, - array_size, pi->sram_end); -} - -static void ci_find_min_clock_masks(struct amdgpu_device *adev, - uint32_t *sclk_mask, uint32_t *mclk_mask, - uint32_t min_sclk, uint32_t min_mclk) -{ - struct ci_power_info *pi = ci_get_pi(adev); - struct ci_dpm_table *dpm_table = &(pi->dpm_table); - uint32_t i; - - for (i = 0; i < dpm_table->sclk_table.count; i++) { - if (dpm_table->sclk_table.dpm_levels[i].enabled && - dpm_table->sclk_table.dpm_levels[i].value >= min_sclk) - *sclk_mask |= 1 << i; - } - - for (i = 0; i < dpm_table->mclk_table.count; i++) { - if (dpm_table->mclk_table.dpm_levels[i].enabled && - dpm_table->mclk_table.dpm_levels[i].value >= min_mclk) - *mclk_mask |= 1 << i; - } -} - -static int ci_set_power_profile_state(struct amdgpu_device *adev, - struct amd_pp_profile *request) -{ - struct ci_power_info *pi = ci_get_pi(adev); - int tmp_result, result = 0; - uint32_t sclk_mask = 0, mclk_mask = 0; - - tmp_result = ci_freeze_sclk_mclk_dpm(adev); - if (tmp_result) { - DRM_ERROR("Failed to freeze SCLK MCLK DPM!"); - result = tmp_result; - } - - tmp_result = ci_populate_requested_graphic_levels(adev, - request); - if (tmp_result) { - DRM_ERROR("Failed to populate requested graphic levels!"); - result = tmp_result; - } - - tmp_result = ci_unfreeze_sclk_mclk_dpm(adev); - if (tmp_result) { - DRM_ERROR("Failed to unfreeze SCLK MCLK DPM!"); - result = tmp_result; - } - - ci_find_min_clock_masks(adev, &sclk_mask, &mclk_mask, - request->min_sclk, request->min_mclk); - - if (sclk_mask) { - if (!pi->sclk_dpm_key_disabled) - amdgpu_ci_send_msg_to_smc_with_parameter( - adev, - PPSMC_MSG_SCLKDPM_SetEnabledMask, - pi->dpm_level_enable_mask. - sclk_dpm_enable_mask & - sclk_mask); - } - - if (mclk_mask) { - if (!pi->mclk_dpm_key_disabled) - amdgpu_ci_send_msg_to_smc_with_parameter( - adev, - PPSMC_MSG_MCLKDPM_SetEnabledMask, - pi->dpm_level_enable_mask. - mclk_dpm_enable_mask & - mclk_mask); - } - - - return result; -} - -static int ci_dpm_set_power_profile_state(void *handle, - struct amd_pp_profile *request) -{ - struct amdgpu_device *adev = (struct amdgpu_device *)handle; - struct ci_power_info *pi = ci_get_pi(adev); - int ret = -1; - - if (!pi || !request) - return -EINVAL; - - if (adev->pm.dpm.forced_level != - AMD_DPM_FORCED_LEVEL_AUTO) - return -EINVAL; - - if (request->min_sclk || - request->min_mclk || - request->activity_threshold || - request->up_hyst || - request->down_hyst) { - if (request->type == AMD_PP_GFX_PROFILE) - memcpy(&pi->gfx_power_profile, request, - sizeof(struct amd_pp_profile)); - else if (request->type == AMD_PP_COMPUTE_PROFILE) - memcpy(&pi->compute_power_profile, request, - sizeof(struct amd_pp_profile)); - else - return -EINVAL; - - if (request->type == pi->current_power_profile) - ret = ci_set_power_profile_state( - adev, - request); - } else { - /* set power profile if it exists */ - switch (request->type) { - case AMD_PP_GFX_PROFILE: - ret = ci_set_power_profile_state( - adev, - &pi->gfx_power_profile); - break; - case AMD_PP_COMPUTE_PROFILE: - ret = ci_set_power_profile_state( - adev, - &pi->compute_power_profile); - break; - default: - return -EINVAL; - } - } - - if (!ret) - pi->current_power_profile = request->type; - - return 0; -} - -static int ci_dpm_reset_power_profile_state(void *handle, - struct amd_pp_profile *request) -{ - struct amdgpu_device *adev = (struct amdgpu_device *)handle; - struct ci_power_info *pi = ci_get_pi(adev); - - if (!pi || !request) - return -EINVAL; - - if (request->type == AMD_PP_GFX_PROFILE) { - pi->gfx_power_profile = pi->default_gfx_power_profile; - return ci_dpm_set_power_profile_state(adev, - &pi->gfx_power_profile); - } else if (request->type == AMD_PP_COMPUTE_PROFILE) { - pi->compute_power_profile = - pi->default_compute_power_profile; - return ci_dpm_set_power_profile_state(adev, - &pi->compute_power_profile); - } else - return -EINVAL; -} - -static int ci_dpm_switch_power_profile(void *handle, - enum amd_pp_profile_type type) -{ - struct amdgpu_device *adev = (struct amdgpu_device *)handle; - struct ci_power_info *pi = ci_get_pi(adev); - struct amd_pp_profile request = {0}; - - if (!pi) - return -EINVAL; - - if (pi->current_power_profile != type) { - request.type = type; - return ci_dpm_set_power_profile_state(adev, &request); - } - - return 0; -} - static int ci_dpm_read_sensor(void *handle, int idx, void *value, int *size) { @@ -7012,7 +6763,7 @@ static int ci_dpm_read_sensor(void *handle, int idx, } } -const struct amd_ip_funcs ci_dpm_ip_funcs = { +static const struct amd_ip_funcs ci_dpm_ip_funcs = { .name = "ci_dpm", .early_init = ci_dpm_early_init, .late_init = ci_dpm_late_init, @@ -7029,7 +6780,16 @@ const struct amd_ip_funcs ci_dpm_ip_funcs = { .set_powergating_state = ci_dpm_set_powergating_state, }; -const struct amd_pm_funcs ci_dpm_funcs = { +const struct amdgpu_ip_block_version ci_smu_ip_block = +{ + .type = AMD_IP_BLOCK_TYPE_SMC, + .major = 7, + .minor = 0, + .rev = 0, + .funcs = &ci_dpm_ip_funcs, +}; + +static const struct amd_pm_funcs ci_dpm_funcs = { .pre_set_power_state = &ci_dpm_pre_set_power_state, .set_power_state = &ci_dpm_set_power_state, .post_set_power_state = &ci_dpm_post_set_power_state, @@ -7053,10 +6813,6 @@ const struct amd_pm_funcs ci_dpm_funcs = { .set_mclk_od = ci_dpm_set_mclk_od, .check_state_equal = ci_check_state_equal, .get_vce_clock_state = amdgpu_get_vce_clock_state, - .get_power_profile_state = ci_dpm_get_power_profile_state, - .set_power_profile_state = ci_dpm_set_power_profile_state, - .reset_power_profile_state = ci_dpm_reset_power_profile_state, - .switch_power_profile = ci_dpm_switch_power_profile, .read_sensor = ci_dpm_read_sensor, }; diff --git a/drivers/gpu/drm/amd/amdgpu/ci_dpm.h b/drivers/gpu/drm/amd/amdgpu/ci_dpm.h index 84cbc9c45f4d..91be2996ae7c 100644 --- a/drivers/gpu/drm/amd/amdgpu/ci_dpm.h +++ b/drivers/gpu/drm/amd/amdgpu/ci_dpm.h @@ -295,13 +295,6 @@ struct ci_power_info { bool fan_is_controlled_by_smc; u32 t_min; u32 fan_ctrl_default_mode; - - /* power profile */ - struct amd_pp_profile gfx_power_profile; - struct amd_pp_profile compute_power_profile; - struct amd_pp_profile default_gfx_power_profile; - struct amd_pp_profile default_compute_power_profile; - enum amd_pp_profile_type current_power_profile; }; #define CISLANDS_VOLTAGE_CONTROL_NONE 0x0 diff --git a/drivers/gpu/drm/amd/amdgpu/cik.c b/drivers/gpu/drm/amd/amdgpu/cik.c index 4324184996a5..0df22030e713 100644 --- a/drivers/gpu/drm/amd/amdgpu/cik.c +++ b/drivers/gpu/drm/amd/amdgpu/cik.c @@ -67,7 +67,6 @@ #include "amdgpu_dm.h" #include "amdgpu_amdkfd.h" -#include "amdgpu_powerplay.h" #include "dce_virtual.h" /* @@ -1887,10 +1886,6 @@ static int cik_common_early_init(void *handle) return -EINVAL; } - adev->firmware.load_type = amdgpu_ucode_get_load_type(adev, amdgpu_fw_load_type); - - amdgpu_device_get_pcie_info(adev); - return 0; } @@ -2000,7 +1995,10 @@ int cik_set_ip_blocks(struct amdgpu_device *adev) amdgpu_device_ip_block_add(adev, &cik_common_ip_block); amdgpu_device_ip_block_add(adev, &gmc_v7_0_ip_block); amdgpu_device_ip_block_add(adev, &cik_ih_ip_block); - amdgpu_device_ip_block_add(adev, &amdgpu_pp_ip_block); + if (amdgpu_dpm == -1) + amdgpu_device_ip_block_add(adev, &ci_smu_ip_block); + else + amdgpu_device_ip_block_add(adev, &pp_smu_ip_block); if (adev->enable_virtual_display) amdgpu_device_ip_block_add(adev, &dce_virtual_ip_block); #if defined(CONFIG_DRM_AMD_DC) @@ -2018,7 +2016,10 @@ int cik_set_ip_blocks(struct amdgpu_device *adev) amdgpu_device_ip_block_add(adev, &cik_common_ip_block); amdgpu_device_ip_block_add(adev, &gmc_v7_0_ip_block); amdgpu_device_ip_block_add(adev, &cik_ih_ip_block); - amdgpu_device_ip_block_add(adev, &amdgpu_pp_ip_block); + if (amdgpu_dpm == -1) + amdgpu_device_ip_block_add(adev, &ci_smu_ip_block); + else + amdgpu_device_ip_block_add(adev, &pp_smu_ip_block); if (adev->enable_virtual_display) amdgpu_device_ip_block_add(adev, &dce_virtual_ip_block); #if defined(CONFIG_DRM_AMD_DC) @@ -2036,7 +2037,7 @@ int cik_set_ip_blocks(struct amdgpu_device *adev) amdgpu_device_ip_block_add(adev, &cik_common_ip_block); amdgpu_device_ip_block_add(adev, &gmc_v7_0_ip_block); amdgpu_device_ip_block_add(adev, &cik_ih_ip_block); - amdgpu_device_ip_block_add(adev, &amdgpu_pp_ip_block); + amdgpu_device_ip_block_add(adev, &kv_smu_ip_block); if (adev->enable_virtual_display) amdgpu_device_ip_block_add(adev, &dce_virtual_ip_block); #if defined(CONFIG_DRM_AMD_DC) @@ -2055,7 +2056,7 @@ int cik_set_ip_blocks(struct amdgpu_device *adev) amdgpu_device_ip_block_add(adev, &cik_common_ip_block); amdgpu_device_ip_block_add(adev, &gmc_v7_0_ip_block); amdgpu_device_ip_block_add(adev, &cik_ih_ip_block); - amdgpu_device_ip_block_add(adev, &amdgpu_pp_ip_block); + amdgpu_device_ip_block_add(adev, &kv_smu_ip_block); if (adev->enable_virtual_display) amdgpu_device_ip_block_add(adev, &dce_virtual_ip_block); #if defined(CONFIG_DRM_AMD_DC) diff --git a/drivers/gpu/drm/amd/amdgpu/cik_dpm.h b/drivers/gpu/drm/amd/amdgpu/cik_dpm.h index c7b4349f6319..2a086610f74d 100644 --- a/drivers/gpu/drm/amd/amdgpu/cik_dpm.h +++ b/drivers/gpu/drm/amd/amdgpu/cik_dpm.h @@ -24,8 +24,7 @@ #ifndef __CIK_DPM_H__ #define __CIK_DPM_H__ -extern const struct amd_ip_funcs ci_dpm_ip_funcs; -extern const struct amd_ip_funcs kv_dpm_ip_funcs; -extern const struct amd_pm_funcs ci_dpm_funcs; -extern const struct amd_pm_funcs kv_dpm_funcs; +extern const struct amdgpu_ip_block_version ci_smu_ip_block; +extern const struct amdgpu_ip_block_version kv_smu_ip_block; + #endif diff --git a/drivers/gpu/drm/amd/amdgpu/cik_ih.c b/drivers/gpu/drm/amd/amdgpu/cik_ih.c index 07c7852180d0..44d10c2172f6 100644 --- a/drivers/gpu/drm/amd/amdgpu/cik_ih.c +++ b/drivers/gpu/drm/amd/amdgpu/cik_ih.c @@ -111,7 +111,7 @@ static int cik_ih_irq_init(struct amdgpu_device *adev) cik_ih_disable_interrupts(adev); /* setup interrupt control */ - WREG32(mmINTERRUPT_CNTL2, adev->dummy_page.addr >> 8); + WREG32(mmINTERRUPT_CNTL2, adev->dummy_page_addr >> 8); interrupt_cntl = RREG32(mmINTERRUPT_CNTL); /* INTERRUPT_CNTL__IH_DUMMY_RD_OVERRIDE_MASK=0 - dummy read disabled with msi, enabled without msi * INTERRUPT_CNTL__IH_DUMMY_RD_OVERRIDE_MASK=1 - dummy read controlled by IH_DUMMY_RD_EN diff --git a/drivers/gpu/drm/amd/amdgpu/cik_sdma.c b/drivers/gpu/drm/amd/amdgpu/cik_sdma.c index 69568cd1bb99..f48ea0dad875 100644 --- a/drivers/gpu/drm/amd/amdgpu/cik_sdma.c +++ b/drivers/gpu/drm/amd/amdgpu/cik_sdma.c @@ -310,7 +310,7 @@ static void cik_sdma_gfx_stop(struct amdgpu_device *adev) if ((adev->mman.buffer_funcs_ring == sdma0) || (adev->mman.buffer_funcs_ring == sdma1)) - amdgpu_ttm_set_active_vram_size(adev, adev->gmc.visible_vram_size); + amdgpu_ttm_set_buffer_funcs_status(adev, false); for (i = 0; i < adev->sdma.num_instances; i++) { rb_cntl = RREG32(mmSDMA0_GFX_RB_CNTL + sdma_offsets[i]); @@ -510,7 +510,7 @@ static int cik_sdma_gfx_resume(struct amdgpu_device *adev) } if (adev->mman.buffer_funcs_ring == ring) - amdgpu_ttm_set_active_vram_size(adev, adev->gmc.real_vram_size); + amdgpu_ttm_set_buffer_funcs_status(adev, true); } return 0; diff --git a/drivers/gpu/drm/amd/amdgpu/cz_ih.c b/drivers/gpu/drm/amd/amdgpu/cz_ih.c index cfd0ad03c938..960c29e17da6 100644 --- a/drivers/gpu/drm/amd/amdgpu/cz_ih.c +++ b/drivers/gpu/drm/amd/amdgpu/cz_ih.c @@ -111,7 +111,7 @@ static int cz_ih_irq_init(struct amdgpu_device *adev) cz_ih_disable_interrupts(adev); /* setup interrupt control */ - WREG32(mmINTERRUPT_CNTL2, adev->dummy_page.addr >> 8); + WREG32(mmINTERRUPT_CNTL2, adev->dummy_page_addr >> 8); interrupt_cntl = RREG32(mmINTERRUPT_CNTL); /* INTERRUPT_CNTL__IH_DUMMY_RD_OVERRIDE_MASK=0 - dummy read disabled with msi, enabled without msi * INTERRUPT_CNTL__IH_DUMMY_RD_OVERRIDE_MASK=1 - dummy read controlled by IH_DUMMY_RD_EN diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c b/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c index 7ea900010702..452f88ea46a2 100644 --- a/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c +++ b/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c @@ -2862,6 +2862,11 @@ static int dce_v10_0_hw_fini(void *handle) static int dce_v10_0_suspend(void *handle) { + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + + adev->mode_info.bl_level = + amdgpu_atombios_encoder_get_backlight_level_from_reg(adev); + return dce_v10_0_hw_fini(handle); } @@ -2870,6 +2875,9 @@ static int dce_v10_0_resume(void *handle) struct amdgpu_device *adev = (struct amdgpu_device *)handle; int ret; + amdgpu_atombios_encoder_set_backlight_level_to_reg(adev, + adev->mode_info.bl_level); + ret = dce_v10_0_hw_init(handle); /* turn on the BL */ diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c b/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c index 158b92ea435f..a7c1c584a191 100644 --- a/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c @@ -2988,6 +2988,11 @@ static int dce_v11_0_hw_fini(void *handle) static int dce_v11_0_suspend(void *handle) { + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + + adev->mode_info.bl_level = + amdgpu_atombios_encoder_get_backlight_level_from_reg(adev); + return dce_v11_0_hw_fini(handle); } @@ -2996,6 +3001,9 @@ static int dce_v11_0_resume(void *handle) struct amdgpu_device *adev = (struct amdgpu_device *)handle; int ret; + amdgpu_atombios_encoder_set_backlight_level_to_reg(adev, + adev->mode_info.bl_level); + ret = dce_v11_0_hw_init(handle); /* turn on the BL */ diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c b/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c index 03f19363f8f6..9f67b7fd3487 100644 --- a/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c +++ b/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c @@ -2730,6 +2730,11 @@ static int dce_v6_0_hw_fini(void *handle) static int dce_v6_0_suspend(void *handle) { + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + + adev->mode_info.bl_level = + amdgpu_atombios_encoder_get_backlight_level_from_reg(adev); + return dce_v6_0_hw_fini(handle); } @@ -2738,6 +2743,9 @@ static int dce_v6_0_resume(void *handle) struct amdgpu_device *adev = (struct amdgpu_device *)handle; int ret; + amdgpu_atombios_encoder_set_backlight_level_to_reg(adev, + adev->mode_info.bl_level); + ret = dce_v6_0_hw_init(handle); /* turn on the BL */ @@ -3037,7 +3045,7 @@ static int dce_v6_0_hpd_irq(struct amdgpu_device *adev, tmp |= DC_HPD1_INT_CONTROL__DC_HPD1_INT_ACK_MASK; WREG32(mmDC_HPD1_INT_CONTROL + hpd_offsets[hpd], tmp); schedule_work(&adev->hotplug_work); - DRM_INFO("IH: HPD%d\n", hpd + 1); + DRM_DEBUG("IH: HPD%d\n", hpd + 1); } return 0; diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c b/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c index 8dbe97dff58c..f55422cbd77a 100644 --- a/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c +++ b/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c @@ -2760,6 +2760,11 @@ static int dce_v8_0_hw_fini(void *handle) static int dce_v8_0_suspend(void *handle) { + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + + adev->mode_info.bl_level = + amdgpu_atombios_encoder_get_backlight_level_from_reg(adev); + return dce_v8_0_hw_fini(handle); } @@ -2768,6 +2773,9 @@ static int dce_v8_0_resume(void *handle) struct amdgpu_device *adev = (struct amdgpu_device *)handle; int ret; + amdgpu_atombios_encoder_set_backlight_level_to_reg(adev, + adev->mode_info.bl_level); + ret = dce_v8_0_hw_init(handle); /* turn on the BL */ diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c index 972d421caada..e13d9d83767b 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c @@ -4358,34 +4358,8 @@ static void gfx_v7_0_gpu_early_init(struct amdgpu_device *adev) case CHIP_KAVERI: adev->gfx.config.max_shader_engines = 1; adev->gfx.config.max_tile_pipes = 4; - if ((adev->pdev->device == 0x1304) || - (adev->pdev->device == 0x1305) || - (adev->pdev->device == 0x130C) || - (adev->pdev->device == 0x130F) || - (adev->pdev->device == 0x1310) || - (adev->pdev->device == 0x1311) || - (adev->pdev->device == 0x131C)) { - adev->gfx.config.max_cu_per_sh = 8; - adev->gfx.config.max_backends_per_se = 2; - } else if ((adev->pdev->device == 0x1309) || - (adev->pdev->device == 0x130A) || - (adev->pdev->device == 0x130D) || - (adev->pdev->device == 0x1313) || - (adev->pdev->device == 0x131D)) { - adev->gfx.config.max_cu_per_sh = 6; - adev->gfx.config.max_backends_per_se = 2; - } else if ((adev->pdev->device == 0x1306) || - (adev->pdev->device == 0x1307) || - (adev->pdev->device == 0x130B) || - (adev->pdev->device == 0x130E) || - (adev->pdev->device == 0x1315) || - (adev->pdev->device == 0x131B)) { - adev->gfx.config.max_cu_per_sh = 4; - adev->gfx.config.max_backends_per_se = 1; - } else { - adev->gfx.config.max_cu_per_sh = 3; - adev->gfx.config.max_backends_per_se = 1; - } + adev->gfx.config.max_cu_per_sh = 8; + adev->gfx.config.max_backends_per_se = 2; adev->gfx.config.max_sh_per_se = 1; adev->gfx.config.max_texture_channel_caches = 4; adev->gfx.config.max_gprs = 256; diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c index 848008ef46b8..d1d2c27156b2 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c @@ -271,58 +271,65 @@ static int gfx_v9_0_ring_test_ring(struct amdgpu_ring *ring) static int gfx_v9_0_ring_test_ib(struct amdgpu_ring *ring, long timeout) { - struct amdgpu_device *adev = ring->adev; - struct amdgpu_ib ib; - struct dma_fence *f = NULL; - uint32_t scratch; - uint32_t tmp = 0; - long r; - - r = amdgpu_gfx_scratch_get(adev, &scratch); - if (r) { - DRM_ERROR("amdgpu: failed to get scratch reg (%ld).\n", r); - return r; - } - WREG32(scratch, 0xCAFEDEAD); - memset(&ib, 0, sizeof(ib)); - r = amdgpu_ib_get(adev, NULL, 256, &ib); - if (r) { - DRM_ERROR("amdgpu: failed to get ib (%ld).\n", r); - goto err1; - } - ib.ptr[0] = PACKET3(PACKET3_SET_UCONFIG_REG, 1); - ib.ptr[1] = ((scratch - PACKET3_SET_UCONFIG_REG_START)); - ib.ptr[2] = 0xDEADBEEF; - ib.length_dw = 3; - - r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f); - if (r) - goto err2; - - r = dma_fence_wait_timeout(f, false, timeout); - if (r == 0) { - DRM_ERROR("amdgpu: IB test timed out.\n"); - r = -ETIMEDOUT; - goto err2; - } else if (r < 0) { - DRM_ERROR("amdgpu: fence wait failed (%ld).\n", r); - goto err2; - } - tmp = RREG32(scratch); - if (tmp == 0xDEADBEEF) { - DRM_DEBUG("ib test on ring %d succeeded\n", ring->idx); - r = 0; - } else { - DRM_ERROR("amdgpu: ib test failed (scratch(0x%04X)=0x%08X)\n", - scratch, tmp); - r = -EINVAL; - } + struct amdgpu_device *adev = ring->adev; + struct amdgpu_ib ib; + struct dma_fence *f = NULL; + + unsigned index; + uint64_t gpu_addr; + uint32_t tmp; + long r; + + r = amdgpu_device_wb_get(adev, &index); + if (r) { + dev_err(adev->dev, "(%ld) failed to allocate wb slot\n", r); + return r; + } + + gpu_addr = adev->wb.gpu_addr + (index * 4); + adev->wb.wb[index] = cpu_to_le32(0xCAFEDEAD); + memset(&ib, 0, sizeof(ib)); + r = amdgpu_ib_get(adev, NULL, 16, &ib); + if (r) { + DRM_ERROR("amdgpu: failed to get ib (%ld).\n", r); + goto err1; + } + ib.ptr[0] = PACKET3(PACKET3_WRITE_DATA, 3); + ib.ptr[1] = WRITE_DATA_DST_SEL(5) | WR_CONFIRM; + ib.ptr[2] = lower_32_bits(gpu_addr); + ib.ptr[3] = upper_32_bits(gpu_addr); + ib.ptr[4] = 0xDEADBEEF; + ib.length_dw = 5; + + r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f); + if (r) + goto err2; + + r = dma_fence_wait_timeout(f, false, timeout); + if (r == 0) { + DRM_ERROR("amdgpu: IB test timed out.\n"); + r = -ETIMEDOUT; + goto err2; + } else if (r < 0) { + DRM_ERROR("amdgpu: fence wait failed (%ld).\n", r); + goto err2; + } + + tmp = adev->wb.wb[index]; + if (tmp == 0xDEADBEEF) { + DRM_DEBUG("ib test on ring %d succeeded\n", ring->idx); + r = 0; + } else { + DRM_ERROR("ib test on ring %d failed\n", ring->idx); + r = -EINVAL; + } + err2: - amdgpu_ib_free(adev, &ib, NULL); - dma_fence_put(f); + amdgpu_ib_free(adev, &ib, NULL); + dma_fence_put(f); err1: - amdgpu_gfx_scratch_free(adev, scratch); - return r; + amdgpu_device_wb_free(adev, index); + return r; } @@ -1254,23 +1261,23 @@ static int gfx_v9_0_sw_init(void *handle) adev->gfx.mec.num_queue_per_pipe = 8; /* KIQ event */ - r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_GRBM_CP, 178, &adev->gfx.kiq.irq); + r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, 178, &adev->gfx.kiq.irq); if (r) return r; /* EOP Event */ - r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_GRBM_CP, 181, &adev->gfx.eop_irq); + r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, 181, &adev->gfx.eop_irq); if (r) return r; /* Privileged reg */ - r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_GRBM_CP, 184, + r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, 184, &adev->gfx.priv_reg_irq); if (r) return r; /* Privileged inst */ - r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_GRBM_CP, 185, + r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, 185, &adev->gfx.priv_inst_irq); if (r) return r; @@ -2954,7 +2961,13 @@ static int gfx_v9_0_hw_fini(void *handle) gfx_v9_0_kcq_disable(&adev->gfx.kiq.ring, &adev->gfx.compute_ring[i]); if (amdgpu_sriov_vf(adev)) { - pr_debug("For SRIOV client, shouldn't do anything.\n"); + gfx_v9_0_cp_gfx_enable(adev, false); + /* must disable polling for SRIOV when hw finished, otherwise + * CPC engine may still keep fetching WB address which is already + * invalid after sw finished and trigger DMAR reading error in + * hypervisor side. + */ + WREG32_FIELD15(GC, 0, CP_PQ_WPTR_POLL_CNTL, EN, 0); return 0; } gfx_v9_0_cp_enable(adev, false); diff --git a/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c b/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c index 94a07bcbbdda..acfbd2d749cf 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c @@ -92,9 +92,9 @@ static void gfxhub_v1_0_init_system_aperture_regs(struct amdgpu_device *adev) /* Program "protection fault". */ WREG32_SOC15(GC, 0, mmVM_L2_PROTECTION_FAULT_DEFAULT_ADDR_LO32, - (u32)(adev->dummy_page.addr >> 12)); + (u32)(adev->dummy_page_addr >> 12)); WREG32_SOC15(GC, 0, mmVM_L2_PROTECTION_FAULT_DEFAULT_ADDR_HI32, - (u32)((u64)adev->dummy_page.addr >> 44)); + (u32)((u64)adev->dummy_page_addr >> 44)); WREG32_FIELD15(GC, 0, VM_L2_PROTECTION_FAULT_CNTL2, ACTIVE_PAGE_MIGRATION_PTE_READ_RETRY, 1); diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c index 2c0ed9dd0c91..5617cf62c566 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c @@ -533,7 +533,7 @@ static int gmc_v6_0_gart_enable(struct amdgpu_device *adev) WREG32(mmVM_CONTEXT0_PAGE_TABLE_END_ADDR, adev->gmc.gart_end >> 12); WREG32(mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR, adev->gart.table_addr >> 12); WREG32(mmVM_CONTEXT0_PROTECTION_FAULT_DEFAULT_ADDR, - (u32)(adev->dummy_page.addr >> 12)); + (u32)(adev->dummy_page_addr >> 12)); WREG32(mmVM_CONTEXT0_CNTL2, 0); WREG32(mmVM_CONTEXT0_CNTL, VM_CONTEXT0_CNTL__ENABLE_CONTEXT_MASK | @@ -563,7 +563,7 @@ static int gmc_v6_0_gart_enable(struct amdgpu_device *adev) /* enable context1-15 */ WREG32(mmVM_CONTEXT1_PROTECTION_FAULT_DEFAULT_ADDR, - (u32)(adev->dummy_page.addr >> 12)); + (u32)(adev->dummy_page_addr >> 12)); WREG32(mmVM_CONTEXT1_CNTL2, 4); WREG32(mmVM_CONTEXT1_CNTL, VM_CONTEXT1_CNTL__ENABLE_CONTEXT_MASK | diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c index 4edd17059868..80054f36e487 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c @@ -644,7 +644,7 @@ static int gmc_v7_0_gart_enable(struct amdgpu_device *adev) WREG32(mmVM_CONTEXT0_PAGE_TABLE_END_ADDR, adev->gmc.gart_end >> 12); WREG32(mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR, adev->gart.table_addr >> 12); WREG32(mmVM_CONTEXT0_PROTECTION_FAULT_DEFAULT_ADDR, - (u32)(adev->dummy_page.addr >> 12)); + (u32)(adev->dummy_page_addr >> 12)); WREG32(mmVM_CONTEXT0_CNTL2, 0); tmp = RREG32(mmVM_CONTEXT0_CNTL); tmp = REG_SET_FIELD(tmp, VM_CONTEXT0_CNTL, ENABLE_CONTEXT, 1); @@ -674,7 +674,7 @@ static int gmc_v7_0_gart_enable(struct amdgpu_device *adev) /* enable context1-15 */ WREG32(mmVM_CONTEXT1_PROTECTION_FAULT_DEFAULT_ADDR, - (u32)(adev->dummy_page.addr >> 12)); + (u32)(adev->dummy_page_addr >> 12)); WREG32(mmVM_CONTEXT1_CNTL2, 4); tmp = RREG32(mmVM_CONTEXT1_CNTL); tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL, ENABLE_CONTEXT, 1); diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c index 1e0ad0657e96..d71d4cb68f9c 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c @@ -860,7 +860,7 @@ static int gmc_v8_0_gart_enable(struct amdgpu_device *adev) WREG32(mmVM_CONTEXT0_PAGE_TABLE_END_ADDR, adev->gmc.gart_end >> 12); WREG32(mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR, adev->gart.table_addr >> 12); WREG32(mmVM_CONTEXT0_PROTECTION_FAULT_DEFAULT_ADDR, - (u32)(adev->dummy_page.addr >> 12)); + (u32)(adev->dummy_page_addr >> 12)); WREG32(mmVM_CONTEXT0_CNTL2, 0); tmp = RREG32(mmVM_CONTEXT0_CNTL); tmp = REG_SET_FIELD(tmp, VM_CONTEXT0_CNTL, ENABLE_CONTEXT, 1); @@ -890,7 +890,7 @@ static int gmc_v8_0_gart_enable(struct amdgpu_device *adev) /* enable context1-15 */ WREG32(mmVM_CONTEXT1_PROTECTION_FAULT_DEFAULT_ADDR, - (u32)(adev->dummy_page.addr >> 12)); + (u32)(adev->dummy_page_addr >> 12)); WREG32(mmVM_CONTEXT1_CNTL2, 4); tmp = RREG32(mmVM_CONTEXT1_CNTL); tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL, ENABLE_CONTEXT, 1); @@ -1105,7 +1105,6 @@ static int gmc_v8_0_sw_init(void *handle) */ adev->need_dma32 = false; dma_bits = adev->need_dma32 ? 32 : 40; - adev->need_swiotlb = drm_get_max_iomem() > ((u64)1 << dma_bits); r = pci_set_dma_mask(adev->pdev, DMA_BIT_MASK(dma_bits)); if (r) { adev->need_dma32 = true; diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c index bc4bd5e7ac94..a70cbc45c4c1 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c @@ -673,7 +673,7 @@ static int gmc_v9_0_late_init(void *handle) for(i = 0; i < AMDGPU_MAX_VMHUBS; ++i) BUG_ON(vm_inv_eng[i] > 16); - if (adev->asic_type == CHIP_VEGA10) { + if (adev->asic_type == CHIP_VEGA10 && !amdgpu_sriov_vf(adev)) { r = gmc_v9_0_ecc_available(adev); if (r == 1) { DRM_INFO("ECC is active.\n"); @@ -722,7 +722,10 @@ static int gmc_v9_0_mc_init(struct amdgpu_device *adev) adev->gmc.vram_width = amdgpu_atomfirmware_get_vram_width(adev); if (!adev->gmc.vram_width) { /* hbm memory channel size */ - chansize = 128; + if (adev->flags & AMD_IS_APU) + chansize = 64; + else + chansize = 128; tmp = RREG32_SOC15(DF, 0, mmDF_CS_AON0_DramBaseAddress0); tmp &= DF_CS_AON0_DramBaseAddress0__IntLvNumChan_MASK; @@ -789,7 +792,7 @@ static int gmc_v9_0_mc_init(struct amdgpu_device *adev) switch (adev->asic_type) { case CHIP_VEGA10: /* all engines support GPUVM */ default: - adev->gmc.gart_size = 256ULL << 20; + adev->gmc.gart_size = 512ULL << 20; break; case CHIP_RAVEN: /* DCE SG support */ adev->gmc.gart_size = 1024ULL << 20; @@ -833,9 +836,9 @@ static int gmc_v9_0_sw_init(void *handle) spin_lock_init(&adev->gmc.invalidate_lock); + adev->gmc.vram_type = amdgpu_atomfirmware_get_vram_type(adev); switch (adev->asic_type) { case CHIP_RAVEN: - adev->gmc.vram_type = AMDGPU_VRAM_TYPE_UNKNOWN; if (adev->rev_id == 0x0 || adev->rev_id == 0x1) { amdgpu_vm_adjust_size(adev, 256 * 1024, 9, 3, 48); } else { @@ -846,8 +849,6 @@ static int gmc_v9_0_sw_init(void *handle) } break; case CHIP_VEGA10: - /* XXX Don't know how to get VRAM type yet. */ - adev->gmc.vram_type = AMDGPU_VRAM_TYPE_HBM; /* * To fulfill 4-level page support, * vm size is 256TB (48bit), maximum size of Vega10, @@ -860,9 +861,9 @@ static int gmc_v9_0_sw_init(void *handle) } /* This interrupt is VMC page fault.*/ - r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_VMC, 0, + r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_VMC, 0, &adev->gmc.vm_fault); - r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_UTCL2, 0, + r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_UTCL2, 0, &adev->gmc.vm_fault); if (r) diff --git a/drivers/gpu/drm/amd/amdgpu/iceland_ih.c b/drivers/gpu/drm/amd/amdgpu/iceland_ih.c index 3237a576692d..842c4b677b4d 100644 --- a/drivers/gpu/drm/amd/amdgpu/iceland_ih.c +++ b/drivers/gpu/drm/amd/amdgpu/iceland_ih.c @@ -111,7 +111,7 @@ static int iceland_ih_irq_init(struct amdgpu_device *adev) iceland_ih_disable_interrupts(adev); /* setup interrupt control */ - WREG32(mmINTERRUPT_CNTL2, adev->dummy_page.addr >> 8); + WREG32(mmINTERRUPT_CNTL2, adev->dummy_page_addr >> 8); interrupt_cntl = RREG32(mmINTERRUPT_CNTL); /* INTERRUPT_CNTL__IH_DUMMY_RD_OVERRIDE_MASK=0 - dummy read disabled with msi, enabled without msi * INTERRUPT_CNTL__IH_DUMMY_RD_OVERRIDE_MASK=1 - dummy read controlled by IH_DUMMY_RD_EN diff --git a/drivers/gpu/drm/amd/amdgpu/kv_dpm.c b/drivers/gpu/drm/amd/amdgpu/kv_dpm.c index 8766681cfd3f..81babe026529 100644 --- a/drivers/gpu/drm/amd/amdgpu/kv_dpm.c +++ b/drivers/gpu/drm/amd/amdgpu/kv_dpm.c @@ -42,6 +42,8 @@ #define KV_MINIMUM_ENGINE_CLOCK 800 #define SMC_RAM_END 0x40000 +static const struct amd_pm_funcs kv_dpm_funcs; + static void kv_dpm_set_irq_funcs(struct amdgpu_device *adev); static int kv_enable_nb_dpm(struct amdgpu_device *adev, bool enable); @@ -2960,6 +2962,7 @@ static int kv_dpm_early_init(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; + adev->powerplay.pp_funcs = &kv_dpm_funcs; kv_dpm_set_irq_funcs(adev); return 0; @@ -3301,7 +3304,7 @@ static int kv_dpm_read_sensor(void *handle, int idx, } } -const struct amd_ip_funcs kv_dpm_ip_funcs = { +static const struct amd_ip_funcs kv_dpm_ip_funcs = { .name = "kv_dpm", .early_init = kv_dpm_early_init, .late_init = kv_dpm_late_init, @@ -3318,7 +3321,16 @@ const struct amd_ip_funcs kv_dpm_ip_funcs = { .set_powergating_state = kv_dpm_set_powergating_state, }; -const struct amd_pm_funcs kv_dpm_funcs = { +const struct amdgpu_ip_block_version kv_smu_ip_block = +{ + .type = AMD_IP_BLOCK_TYPE_SMC, + .major = 1, + .minor = 0, + .rev = 0, + .funcs = &kv_dpm_ip_funcs, +}; + +static const struct amd_pm_funcs kv_dpm_funcs = { .pre_set_power_state = &kv_dpm_pre_set_power_state, .set_power_state = &kv_dpm_set_power_state, .post_set_power_state = &kv_dpm_post_set_power_state, diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c index d0ade9fd9fa9..3dd5816495a5 100644 --- a/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c +++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c @@ -103,9 +103,9 @@ static void mmhub_v1_0_init_system_aperture_regs(struct amdgpu_device *adev) /* Program "protection fault". */ WREG32_SOC15(MMHUB, 0, mmVM_L2_PROTECTION_FAULT_DEFAULT_ADDR_LO32, - (u32)(adev->dummy_page.addr >> 12)); + (u32)(adev->dummy_page_addr >> 12)); WREG32_SOC15(MMHUB, 0, mmVM_L2_PROTECTION_FAULT_DEFAULT_ADDR_HI32, - (u32)((u64)adev->dummy_page.addr >> 44)); + (u32)((u64)adev->dummy_page_addr >> 44)); tmp = RREG32_SOC15(MMHUB, 0, mmVM_L2_PROTECTION_FAULT_CNTL2); tmp = REG_SET_FIELD(tmp, VM_L2_PROTECTION_FAULT_CNTL2, diff --git a/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c b/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c index 271452d3999a..8fb933c62cf5 100644 --- a/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c +++ b/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c @@ -33,56 +33,34 @@ static void xgpu_ai_mailbox_send_ack(struct amdgpu_device *adev) { - u32 reg; - int timeout = AI_MAILBOX_TIMEDOUT; - u32 mask = REG_FIELD_MASK(BIF_BX_PF0_MAILBOX_CONTROL, RCV_MSG_VALID); - - reg = RREG32_NO_KIQ(SOC15_REG_OFFSET(NBIO, 0, - mmBIF_BX_PF0_MAILBOX_CONTROL)); - reg = REG_SET_FIELD(reg, BIF_BX_PF0_MAILBOX_CONTROL, RCV_MSG_ACK, 1); - WREG32_NO_KIQ(SOC15_REG_OFFSET(NBIO, 0, - mmBIF_BX_PF0_MAILBOX_CONTROL), reg); - - /*Wait for RCV_MSG_VALID to be 0*/ - reg = RREG32_NO_KIQ(SOC15_REG_OFFSET(NBIO, 0, - mmBIF_BX_PF0_MAILBOX_CONTROL)); - while (reg & mask) { - if (timeout <= 0) { - pr_err("RCV_MSG_VALID is not cleared\n"); - break; - } - mdelay(1); - timeout -=1; - - reg = RREG32_NO_KIQ(SOC15_REG_OFFSET(NBIO, 0, - mmBIF_BX_PF0_MAILBOX_CONTROL)); - } + WREG8(AI_MAIBOX_CONTROL_RCV_OFFSET_BYTE, 2); } static void xgpu_ai_mailbox_set_valid(struct amdgpu_device *adev, bool val) { - u32 reg; + WREG8(AI_MAIBOX_CONTROL_TRN_OFFSET_BYTE, val ? 1 : 0); +} - reg = RREG32_NO_KIQ(SOC15_REG_OFFSET(NBIO, 0, - mmBIF_BX_PF0_MAILBOX_CONTROL)); - reg = REG_SET_FIELD(reg, BIF_BX_PF0_MAILBOX_CONTROL, - TRN_MSG_VALID, val ? 1 : 0); - WREG32_NO_KIQ(SOC15_REG_OFFSET(NBIO, 0, mmBIF_BX_PF0_MAILBOX_CONTROL), - reg); +/* + * this peek_msg could *only* be called in IRQ routine becuase in IRQ routine + * RCV_MSG_VALID filed of BIF_BX_PF0_MAILBOX_CONTROL must already be set to 1 + * by host. + * + * if called no in IRQ routine, this peek_msg cannot guaranteed to return the + * correct value since it doesn't return the RCV_DW0 under the case that + * RCV_MSG_VALID is set by host. + */ +static enum idh_event xgpu_ai_mailbox_peek_msg(struct amdgpu_device *adev) +{ + return RREG32_NO_KIQ(SOC15_REG_OFFSET(NBIO, 0, + mmBIF_BX_PF0_MAILBOX_MSGBUF_RCV_DW0)); } + static int xgpu_ai_mailbox_rcv_msg(struct amdgpu_device *adev, enum idh_event event) { u32 reg; - u32 mask = REG_FIELD_MASK(BIF_BX_PF0_MAILBOX_CONTROL, RCV_MSG_VALID); - - if (event != IDH_FLR_NOTIFICATION_CMPL) { - reg = RREG32_NO_KIQ(SOC15_REG_OFFSET(NBIO, 0, - mmBIF_BX_PF0_MAILBOX_CONTROL)); - if (!(reg & mask)) - return -ENOENT; - } reg = RREG32_NO_KIQ(SOC15_REG_OFFSET(NBIO, 0, mmBIF_BX_PF0_MAILBOX_MSGBUF_RCV_DW0)); @@ -94,54 +72,67 @@ static int xgpu_ai_mailbox_rcv_msg(struct amdgpu_device *adev, return 0; } +static uint8_t xgpu_ai_peek_ack(struct amdgpu_device *adev) { + return RREG8(AI_MAIBOX_CONTROL_TRN_OFFSET_BYTE) & 2; +} + static int xgpu_ai_poll_ack(struct amdgpu_device *adev) { - int r = 0, timeout = AI_MAILBOX_TIMEDOUT; - u32 mask = REG_FIELD_MASK(BIF_BX_PF0_MAILBOX_CONTROL, TRN_MSG_ACK); - u32 reg; + int timeout = AI_MAILBOX_POLL_ACK_TIMEDOUT; + u8 reg; + + do { + reg = RREG8(AI_MAIBOX_CONTROL_TRN_OFFSET_BYTE); + if (reg & 2) + return 0; - reg = RREG32_NO_KIQ(SOC15_REG_OFFSET(NBIO, 0, - mmBIF_BX_PF0_MAILBOX_CONTROL)); - while (!(reg & mask)) { - if (timeout <= 0) { - pr_err("Doesn't get ack from pf.\n"); - r = -ETIME; - break; - } mdelay(5); timeout -= 5; + } while (timeout > 1); - reg = RREG32_NO_KIQ(SOC15_REG_OFFSET(NBIO, 0, - mmBIF_BX_PF0_MAILBOX_CONTROL)); - } + pr_err("Doesn't get TRN_MSG_ACK from pf in %d msec\n", AI_MAILBOX_POLL_ACK_TIMEDOUT); - return r; + return -ETIME; } static int xgpu_ai_poll_msg(struct amdgpu_device *adev, enum idh_event event) { - int r = 0, timeout = AI_MAILBOX_TIMEDOUT; - - r = xgpu_ai_mailbox_rcv_msg(adev, event); - while (r) { - if (timeout <= 0) { - pr_err("Doesn't get msg:%d from pf.\n", event); - r = -ETIME; - break; - } - mdelay(5); - timeout -= 5; + int r, timeout = AI_MAILBOX_POLL_MSG_TIMEDOUT; + do { r = xgpu_ai_mailbox_rcv_msg(adev, event); - } + if (!r) + return 0; - return r; + msleep(10); + timeout -= 10; + } while (timeout > 1); + + pr_err("Doesn't get msg:%d from pf, error=%d\n", event, r); + + return -ETIME; } static void xgpu_ai_mailbox_trans_msg (struct amdgpu_device *adev, enum idh_request req, u32 data1, u32 data2, u32 data3) { u32 reg; int r; + uint8_t trn; + + /* IMPORTANT: + * clear TRN_MSG_VALID valid to clear host's RCV_MSG_ACK + * and with host's RCV_MSG_ACK cleared hw automatically clear host's RCV_MSG_ACK + * which lead to VF's TRN_MSG_ACK cleared, otherwise below xgpu_ai_poll_ack() + * will return immediatly + */ + do { + xgpu_ai_mailbox_set_valid(adev, false); + trn = xgpu_ai_peek_ack(adev); + if (trn) { + pr_err("trn=%x ACK should not asssert! wait again !\n", trn); + msleep(1); + } + } while(trn); reg = RREG32_NO_KIQ(SOC15_REG_OFFSET(NBIO, 0, mmBIF_BX_PF0_MAILBOX_MSGBUF_TRN_DW0)); @@ -245,15 +236,36 @@ static void xgpu_ai_mailbox_flr_work(struct work_struct *work) { struct amdgpu_virt *virt = container_of(work, struct amdgpu_virt, flr_work); struct amdgpu_device *adev = container_of(virt, struct amdgpu_device, virt); - - /* wait until RCV_MSG become 3 */ - if (xgpu_ai_poll_msg(adev, IDH_FLR_NOTIFICATION_CMPL)) { - pr_err("failed to recieve FLR_CMPL\n"); - return; - } - - /* Trigger recovery due to world switch failure */ - amdgpu_device_gpu_recover(adev, NULL, false); + int timeout = AI_MAILBOX_POLL_FLR_TIMEDOUT; + int locked; + + /* block amdgpu_gpu_recover till msg FLR COMPLETE received, + * otherwise the mailbox msg will be ruined/reseted by + * the VF FLR. + * + * we can unlock the lock_reset to allow "amdgpu_job_timedout" + * to run gpu_recover() after FLR_NOTIFICATION_CMPL received + * which means host side had finished this VF's FLR. + */ + locked = mutex_trylock(&adev->lock_reset); + if (locked) + adev->in_gpu_reset = 1; + + do { + if (xgpu_ai_mailbox_peek_msg(adev) == IDH_FLR_NOTIFICATION_CMPL) + goto flr_done; + + msleep(10); + timeout -= 10; + } while (timeout > 1); + +flr_done: + if (locked) + mutex_unlock(&adev->lock_reset); + + /* Trigger recovery for world switch failure if no TDR */ + if (amdgpu_lockup_timeout == 0) + amdgpu_device_gpu_recover(adev, NULL, true); } static int xgpu_ai_set_mailbox_rcv_irq(struct amdgpu_device *adev, @@ -274,24 +286,22 @@ static int xgpu_ai_mailbox_rcv_irq(struct amdgpu_device *adev, struct amdgpu_irq_src *source, struct amdgpu_iv_entry *entry) { - int r; - - /* trigger gpu-reset by hypervisor only if TDR disbaled */ - if (!amdgpu_gpu_recovery) { - /* see what event we get */ - r = xgpu_ai_mailbox_rcv_msg(adev, IDH_FLR_NOTIFICATION); - - /* sometimes the interrupt is delayed to inject to VM, so under such case - * the IDH_FLR_NOTIFICATION is overwritten by VF FLR from GIM side, thus - * above recieve message could be failed, we should schedule the flr_work - * anyway + enum idh_event event = xgpu_ai_mailbox_peek_msg(adev); + + switch (event) { + case IDH_FLR_NOTIFICATION: + if (amdgpu_sriov_runtime(adev)) + schedule_work(&adev->virt.flr_work); + break; + /* READY_TO_ACCESS_GPU is fetched by kernel polling, IRQ can ignore + * it byfar since that polling thread will handle it, + * other msg like flr complete is not handled here. */ - if (r) { - DRM_ERROR("FLR_NOTIFICATION is missed\n"); - xgpu_ai_mailbox_send_ack(adev); - } - - schedule_work(&adev->virt.flr_work); + case IDH_CLR_MSG_BUF: + case IDH_FLR_NOTIFICATION_CMPL: + case IDH_READY_TO_ACCESS_GPU: + default: + break; } return 0; @@ -319,11 +329,11 @@ int xgpu_ai_mailbox_add_irq_id(struct amdgpu_device *adev) { int r; - r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_BIF, 135, &adev->virt.rcv_irq); + r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_BIF, 135, &adev->virt.rcv_irq); if (r) return r; - r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_BIF, 138, &adev->virt.ack_irq); + r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_BIF, 138, &adev->virt.ack_irq); if (r) { amdgpu_irq_put(adev, &adev->virt.rcv_irq, 0); return r; diff --git a/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.h b/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.h index 67e78576a9eb..b4a9ceea334b 100644 --- a/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.h +++ b/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.h @@ -24,7 +24,9 @@ #ifndef __MXGPU_AI_H__ #define __MXGPU_AI_H__ -#define AI_MAILBOX_TIMEDOUT 12000 +#define AI_MAILBOX_POLL_ACK_TIMEDOUT 500 +#define AI_MAILBOX_POLL_MSG_TIMEDOUT 12000 +#define AI_MAILBOX_POLL_FLR_TIMEDOUT 500 enum idh_request { IDH_REQ_GPU_INIT_ACCESS = 1, @@ -51,4 +53,7 @@ int xgpu_ai_mailbox_add_irq_id(struct amdgpu_device *adev); int xgpu_ai_mailbox_get_irq(struct amdgpu_device *adev); void xgpu_ai_mailbox_put_irq(struct amdgpu_device *adev); +#define AI_MAIBOX_CONTROL_TRN_OFFSET_BYTE SOC15_REG_OFFSET(NBIO, 0, mmBIF_BX_PF0_MAILBOX_CONTROL) * 4 +#define AI_MAIBOX_CONTROL_RCV_OFFSET_BYTE SOC15_REG_OFFSET(NBIO, 0, mmBIF_BX_PF0_MAILBOX_CONTROL) * 4 + 1 + #endif diff --git a/drivers/gpu/drm/amd/amdgpu/nbio_v6_1.c b/drivers/gpu/drm/amd/amdgpu/nbio_v6_1.c index 2daeef6e9345..1cf34248dff4 100644 --- a/drivers/gpu/drm/amd/amdgpu/nbio_v6_1.c +++ b/drivers/gpu/drm/amd/amdgpu/nbio_v6_1.c @@ -133,7 +133,7 @@ static void nbio_v6_1_ih_control(struct amdgpu_device *adev) u32 interrupt_cntl; /* setup interrupt control */ - WREG32_SOC15(NBIO, 0, mmINTERRUPT_CNTL2, adev->dummy_page.addr >> 8); + WREG32_SOC15(NBIO, 0, mmINTERRUPT_CNTL2, adev->dummy_page_addr >> 8); interrupt_cntl = RREG32_SOC15(NBIO, 0, mmINTERRUPT_CNTL); /* INTERRUPT_CNTL__IH_DUMMY_RD_OVERRIDE_MASK=0 - dummy read disabled with msi, enabled without msi * INTERRUPT_CNTL__IH_DUMMY_RD_OVERRIDE_MASK=1 - dummy read controlled by IH_DUMMY_RD_EN diff --git a/drivers/gpu/drm/amd/amdgpu/nbio_v7_0.c b/drivers/gpu/drm/amd/amdgpu/nbio_v7_0.c index cd10c76a76e2..df34dc79d444 100644 --- a/drivers/gpu/drm/amd/amdgpu/nbio_v7_0.c +++ b/drivers/gpu/drm/amd/amdgpu/nbio_v7_0.c @@ -208,7 +208,7 @@ static void nbio_v7_0_ih_control(struct amdgpu_device *adev) u32 interrupt_cntl; /* setup interrupt control */ - WREG32_SOC15(NBIO, 0, mmINTERRUPT_CNTL2, adev->dummy_page.addr >> 8); + WREG32_SOC15(NBIO, 0, mmINTERRUPT_CNTL2, adev->dummy_page_addr >> 8); interrupt_cntl = RREG32_SOC15(NBIO, 0, mmINTERRUPT_CNTL); /* INTERRUPT_CNTL__IH_DUMMY_RD_OVERRIDE_MASK=0 - dummy read disabled with msi, enabled without msi * INTERRUPT_CNTL__IH_DUMMY_RD_OVERRIDE_MASK=1 - dummy read controlled by IH_DUMMY_RD_EN diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c b/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c index 6ccc9d43a7b8..6452101c7aab 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c @@ -339,7 +339,7 @@ static void sdma_v2_4_gfx_stop(struct amdgpu_device *adev) if ((adev->mman.buffer_funcs_ring == sdma0) || (adev->mman.buffer_funcs_ring == sdma1)) - amdgpu_ttm_set_active_vram_size(adev, adev->gmc.visible_vram_size); + amdgpu_ttm_set_buffer_funcs_status(adev, false); for (i = 0; i < adev->sdma.num_instances; i++) { rb_cntl = RREG32(mmSDMA0_GFX_RB_CNTL + sdma_offsets[i]); @@ -484,7 +484,7 @@ static int sdma_v2_4_gfx_resume(struct amdgpu_device *adev) } if (adev->mman.buffer_funcs_ring == ring) - amdgpu_ttm_set_active_vram_size(adev, adev->gmc.real_vram_size); + amdgpu_ttm_set_buffer_funcs_status(adev, true); } return 0; diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c index 0c2b12ec0e9f..ecaef084dab1 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c @@ -510,7 +510,7 @@ static void sdma_v3_0_gfx_stop(struct amdgpu_device *adev) if ((adev->mman.buffer_funcs_ring == sdma0) || (adev->mman.buffer_funcs_ring == sdma1)) - amdgpu_ttm_set_active_vram_size(adev, adev->gmc.visible_vram_size); + amdgpu_ttm_set_buffer_funcs_status(adev, false); for (i = 0; i < adev->sdma.num_instances; i++) { rb_cntl = RREG32(mmSDMA0_GFX_RB_CNTL + sdma_offsets[i]); @@ -711,14 +711,17 @@ static int sdma_v3_0_gfx_resume(struct amdgpu_device *adev) WREG32(mmSDMA0_GFX_RB_WPTR_POLL_ADDR_HI + sdma_offsets[i], upper_32_bits(wptr_gpu_addr)); wptr_poll_cntl = RREG32(mmSDMA0_GFX_RB_WPTR_POLL_CNTL + sdma_offsets[i]); - if (ring->use_pollmem) + if (ring->use_pollmem) { + /*wptr polling is not enogh fast, directly clean the wptr register */ + WREG32(mmSDMA0_GFX_RB_WPTR + sdma_offsets[i], 0); wptr_poll_cntl = REG_SET_FIELD(wptr_poll_cntl, SDMA0_GFX_RB_WPTR_POLL_CNTL, ENABLE, 1); - else + } else { wptr_poll_cntl = REG_SET_FIELD(wptr_poll_cntl, SDMA0_GFX_RB_WPTR_POLL_CNTL, ENABLE, 0); + } WREG32(mmSDMA0_GFX_RB_WPTR_POLL_CNTL + sdma_offsets[i], wptr_poll_cntl); /* enable DMA RB */ @@ -750,7 +753,7 @@ static int sdma_v3_0_gfx_resume(struct amdgpu_device *adev) } if (adev->mman.buffer_funcs_ring == ring) - amdgpu_ttm_set_active_vram_size(adev, adev->gmc.real_vram_size); + amdgpu_ttm_set_buffer_funcs_status(adev, true); } return 0; diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c index 3d5385dda34c..9448c45d1b60 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c @@ -31,8 +31,6 @@ #include "sdma0/sdma0_4_0_sh_mask.h" #include "sdma1/sdma1_4_0_offset.h" #include "sdma1/sdma1_4_0_sh_mask.h" -#include "mmhub/mmhub_1_0_offset.h" -#include "mmhub/mmhub_1_0_sh_mask.h" #include "hdp/hdp_4_0_offset.h" #include "sdma0/sdma0_4_1_default.h" @@ -238,31 +236,27 @@ static uint64_t sdma_v4_0_ring_get_rptr(struct amdgpu_ring *ring) static uint64_t sdma_v4_0_ring_get_wptr(struct amdgpu_ring *ring) { struct amdgpu_device *adev = ring->adev; - u64 *wptr = NULL; - uint64_t local_wptr = 0; + u64 wptr; if (ring->use_doorbell) { /* XXX check if swapping is necessary on BE */ - wptr = ((u64 *)&adev->wb.wb[ring->wptr_offs]); - DRM_DEBUG("wptr/doorbell before shift == 0x%016llx\n", *wptr); - *wptr = (*wptr) >> 2; - DRM_DEBUG("wptr/doorbell after shift == 0x%016llx\n", *wptr); + wptr = READ_ONCE(*((u64 *)&adev->wb.wb[ring->wptr_offs])); + DRM_DEBUG("wptr/doorbell before shift == 0x%016llx\n", wptr); } else { u32 lowbit, highbit; int me = (ring == &adev->sdma.instance[0].ring) ? 0 : 1; - wptr = &local_wptr; lowbit = RREG32(sdma_v4_0_get_reg_offset(adev, me, mmSDMA0_GFX_RB_WPTR)) >> 2; highbit = RREG32(sdma_v4_0_get_reg_offset(adev, me, mmSDMA0_GFX_RB_WPTR_HI)) >> 2; DRM_DEBUG("wptr [%i]high== 0x%08x low==0x%08x\n", me, highbit, lowbit); - *wptr = highbit; - *wptr = (*wptr) << 32; - *wptr |= lowbit; + wptr = highbit; + wptr = wptr << 32; + wptr |= lowbit; } - return *wptr; + return wptr >> 2; } /** @@ -430,7 +424,7 @@ static void sdma_v4_0_gfx_stop(struct amdgpu_device *adev) if ((adev->mman.buffer_funcs_ring == sdma0) || (adev->mman.buffer_funcs_ring == sdma1)) - amdgpu_ttm_set_active_vram_size(adev, adev->gmc.visible_vram_size); + amdgpu_ttm_set_buffer_funcs_status(adev, false); for (i = 0; i < adev->sdma.num_instances; i++) { rb_cntl = RREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_CNTL)); @@ -672,7 +666,7 @@ static int sdma_v4_0_gfx_resume(struct amdgpu_device *adev) } if (adev->mman.buffer_funcs_ring == ring) - amdgpu_ttm_set_active_vram_size(adev, adev->gmc.real_vram_size); + amdgpu_ttm_set_buffer_funcs_status(adev, true); } @@ -1176,13 +1170,13 @@ static int sdma_v4_0_sw_init(void *handle) struct amdgpu_device *adev = (struct amdgpu_device *)handle; /* SDMA trap event */ - r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_SDMA0, 224, + r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_SDMA0, 224, &adev->sdma.trap_irq); if (r) return r; /* SDMA trap event */ - r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_SDMA1, 224, + r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_SDMA1, 224, &adev->sdma.trap_irq); if (r) return r; @@ -1337,7 +1331,7 @@ static int sdma_v4_0_process_trap_irq(struct amdgpu_device *adev, { DRM_DEBUG("IH: SDMA trap\n"); switch (entry->client_id) { - case AMDGPU_IH_CLIENTID_SDMA0: + case SOC15_IH_CLIENTID_SDMA0: switch (entry->ring_id) { case 0: amdgpu_fence_process(&adev->sdma.instance[0].ring); @@ -1353,7 +1347,7 @@ static int sdma_v4_0_process_trap_irq(struct amdgpu_device *adev, break; } break; - case AMDGPU_IH_CLIENTID_SDMA1: + case SOC15_IH_CLIENTID_SDMA1: switch (entry->ring_id) { case 0: amdgpu_fence_process(&adev->sdma.instance[1].ring); @@ -1403,7 +1397,7 @@ static void sdma_v4_0_update_medium_grain_clock_gating( if (def != data) WREG32(SOC15_REG_OFFSET(SDMA0, 0, mmSDMA0_CLK_CTRL), data); - if (adev->asic_type == CHIP_VEGA10) { + if (adev->sdma.num_instances > 1) { def = data = RREG32(SOC15_REG_OFFSET(SDMA1, 0, mmSDMA1_CLK_CTRL)); data &= ~(SDMA1_CLK_CTRL__SOFT_OVERRIDE7_MASK | SDMA1_CLK_CTRL__SOFT_OVERRIDE6_MASK | @@ -1431,7 +1425,7 @@ static void sdma_v4_0_update_medium_grain_clock_gating( if (def != data) WREG32(SOC15_REG_OFFSET(SDMA0, 0, mmSDMA0_CLK_CTRL), data); - if (adev->asic_type == CHIP_VEGA10) { + if (adev->sdma.num_instances > 1) { def = data = RREG32(SOC15_REG_OFFSET(SDMA1, 0, mmSDMA1_CLK_CTRL)); data |= (SDMA1_CLK_CTRL__SOFT_OVERRIDE7_MASK | SDMA1_CLK_CTRL__SOFT_OVERRIDE6_MASK | @@ -1462,7 +1456,7 @@ static void sdma_v4_0_update_medium_grain_light_sleep( WREG32(SOC15_REG_OFFSET(SDMA0, 0, mmSDMA0_POWER_CNTL), data); /* 1-not override: enable sdma1 mem light sleep */ - if (adev->asic_type == CHIP_VEGA10) { + if (adev->sdma.num_instances > 1) { def = data = RREG32(SOC15_REG_OFFSET(SDMA1, 0, mmSDMA1_POWER_CNTL)); data |= SDMA1_POWER_CNTL__MEM_POWER_OVERRIDE_MASK; if (def != data) @@ -1476,7 +1470,7 @@ static void sdma_v4_0_update_medium_grain_light_sleep( WREG32(SOC15_REG_OFFSET(SDMA0, 0, mmSDMA0_POWER_CNTL), data); /* 0-override:disable sdma1 mem light sleep */ - if (adev->asic_type == CHIP_VEGA10) { + if (adev->sdma.num_instances > 1) { def = data = RREG32(SOC15_REG_OFFSET(SDMA1, 0, mmSDMA1_POWER_CNTL)); data &= ~SDMA1_POWER_CNTL__MEM_POWER_OVERRIDE_MASK; if (def != data) diff --git a/drivers/gpu/drm/amd/amdgpu/si.c b/drivers/gpu/drm/amd/amdgpu/si.c index f20c4b7414e8..b154667a8fd9 100644 --- a/drivers/gpu/drm/amd/amdgpu/si.c +++ b/drivers/gpu/drm/amd/amdgpu/si.c @@ -31,7 +31,8 @@ #include "amdgpu_uvd.h" #include "amdgpu_vce.h" #include "atom.h" -#include "amdgpu_powerplay.h" +#include "amd_pcie.h" +#include "si_dpm.h" #include "sid.h" #include "si_ih.h" #include "gfx_v6_0.h" @@ -1484,8 +1485,8 @@ static void si_pcie_gen3_enable(struct amdgpu_device *adev) { struct pci_dev *root = adev->pdev->bus->self; int bridge_pos, gpu_pos; - u32 speed_cntl, mask, current_data_rate; - int ret, i; + u32 speed_cntl, current_data_rate; + int i; u16 tmp16; if (pci_is_root_bus(adev->pdev->bus)) @@ -1497,23 +1498,20 @@ static void si_pcie_gen3_enable(struct amdgpu_device *adev) if (adev->flags & AMD_IS_APU) return; - ret = drm_pcie_get_speed_cap_mask(adev->ddev, &mask); - if (ret != 0) - return; - - if (!(mask & (DRM_PCIE_SPEED_50 | DRM_PCIE_SPEED_80))) + if (!(adev->pm.pcie_gen_mask & (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2 | + CAIL_PCIE_LINK_SPEED_SUPPORT_GEN3))) return; speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL); current_data_rate = (speed_cntl & LC_CURRENT_DATA_RATE_MASK) >> LC_CURRENT_DATA_RATE_SHIFT; - if (mask & DRM_PCIE_SPEED_80) { + if (adev->pm.pcie_gen_mask & CAIL_PCIE_LINK_SPEED_SUPPORT_GEN3) { if (current_data_rate == 2) { DRM_INFO("PCIE gen 3 link speeds already enabled\n"); return; } DRM_INFO("enabling PCIE gen 3 link speeds, disable with amdgpu.pcie_gen2=0\n"); - } else if (mask & DRM_PCIE_SPEED_50) { + } else if (adev->pm.pcie_gen_mask & CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2) { if (current_data_rate == 1) { DRM_INFO("PCIE gen 2 link speeds already enabled\n"); return; @@ -1529,7 +1527,7 @@ static void si_pcie_gen3_enable(struct amdgpu_device *adev) if (!gpu_pos) return; - if (mask & DRM_PCIE_SPEED_80) { + if (adev->pm.pcie_gen_mask & CAIL_PCIE_LINK_SPEED_SUPPORT_GEN3) { if (current_data_rate != 2) { u16 bridge_cfg, gpu_cfg; u16 bridge_cfg2, gpu_cfg2; @@ -1612,9 +1610,9 @@ static void si_pcie_gen3_enable(struct amdgpu_device *adev) pci_read_config_word(adev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &tmp16); tmp16 &= ~0xf; - if (mask & DRM_PCIE_SPEED_80) + if (adev->pm.pcie_gen_mask & CAIL_PCIE_LINK_SPEED_SUPPORT_GEN3) tmp16 |= 3; - else if (mask & DRM_PCIE_SPEED_50) + else if (adev->pm.pcie_gen_mask & CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2) tmp16 |= 2; else tmp16 |= 1; @@ -1985,7 +1983,7 @@ int si_set_ip_blocks(struct amdgpu_device *adev) amdgpu_device_ip_block_add(adev, &si_common_ip_block); amdgpu_device_ip_block_add(adev, &gmc_v6_0_ip_block); amdgpu_device_ip_block_add(adev, &si_ih_ip_block); - amdgpu_device_ip_block_add(adev, &amdgpu_pp_ip_block); + amdgpu_device_ip_block_add(adev, &si_smu_ip_block); if (adev->enable_virtual_display) amdgpu_device_ip_block_add(adev, &dce_virtual_ip_block); else @@ -1999,7 +1997,7 @@ int si_set_ip_blocks(struct amdgpu_device *adev) amdgpu_device_ip_block_add(adev, &si_common_ip_block); amdgpu_device_ip_block_add(adev, &gmc_v6_0_ip_block); amdgpu_device_ip_block_add(adev, &si_ih_ip_block); - amdgpu_device_ip_block_add(adev, &amdgpu_pp_ip_block); + amdgpu_device_ip_block_add(adev, &si_smu_ip_block); if (adev->enable_virtual_display) amdgpu_device_ip_block_add(adev, &dce_virtual_ip_block); else @@ -2013,7 +2011,7 @@ int si_set_ip_blocks(struct amdgpu_device *adev) amdgpu_device_ip_block_add(adev, &si_common_ip_block); amdgpu_device_ip_block_add(adev, &gmc_v6_0_ip_block); amdgpu_device_ip_block_add(adev, &si_ih_ip_block); - amdgpu_device_ip_block_add(adev, &amdgpu_pp_ip_block); + amdgpu_device_ip_block_add(adev, &si_smu_ip_block); if (adev->enable_virtual_display) amdgpu_device_ip_block_add(adev, &dce_virtual_ip_block); amdgpu_device_ip_block_add(adev, &gfx_v6_0_ip_block); diff --git a/drivers/gpu/drm/amd/amdgpu/si_dma.c b/drivers/gpu/drm/amd/amdgpu/si_dma.c index acbf5afa4f38..b75d901ba3c4 100644 --- a/drivers/gpu/drm/amd/amdgpu/si_dma.c +++ b/drivers/gpu/drm/amd/amdgpu/si_dma.c @@ -121,7 +121,7 @@ static void si_dma_stop(struct amdgpu_device *adev) WREG32(DMA_RB_CNTL + sdma_offsets[i], rb_cntl); if (adev->mman.buffer_funcs_ring == ring) - amdgpu_ttm_set_active_vram_size(adev, adev->gmc.visible_vram_size); + amdgpu_ttm_set_buffer_funcs_status(adev, false); ring->ready = false; } } @@ -184,7 +184,7 @@ static int si_dma_start(struct amdgpu_device *adev) } if (adev->mman.buffer_funcs_ring == ring) - amdgpu_ttm_set_active_vram_size(adev, adev->gmc.real_vram_size); + amdgpu_ttm_set_buffer_funcs_status(adev, true); } return 0; diff --git a/drivers/gpu/drm/amd/amdgpu/si_dpm.c b/drivers/gpu/drm/amd/amdgpu/si_dpm.c index 8138053fcef1..3bfcf0d257ab 100644 --- a/drivers/gpu/drm/amd/amdgpu/si_dpm.c +++ b/drivers/gpu/drm/amd/amdgpu/si_dpm.c @@ -26,6 +26,7 @@ #include "amdgpu_pm.h" #include "amdgpu_dpm.h" #include "amdgpu_atombios.h" +#include "amd_pcie.h" #include "sid.h" #include "r600_dpm.h" #include "si_dpm.h" @@ -66,6 +67,8 @@ MODULE_FIRMWARE("radeon/hainan_smc.bin"); MODULE_FIRMWARE("radeon/hainan_k_smc.bin"); MODULE_FIRMWARE("radeon/banks_k_2_smc.bin"); +static const struct amd_pm_funcs si_dpm_funcs; + union power_info { struct _ATOM_POWERPLAY_INFO info; struct _ATOM_POWERPLAY_INFO_V2 info_2; @@ -3331,29 +3334,6 @@ static void btc_apply_voltage_delta_rules(struct amdgpu_device *adev, } } -static enum amdgpu_pcie_gen r600_get_pcie_gen_support(struct amdgpu_device *adev, - u32 sys_mask, - enum amdgpu_pcie_gen asic_gen, - enum amdgpu_pcie_gen default_gen) -{ - switch (asic_gen) { - case AMDGPU_PCIE_GEN1: - return AMDGPU_PCIE_GEN1; - case AMDGPU_PCIE_GEN2: - return AMDGPU_PCIE_GEN2; - case AMDGPU_PCIE_GEN3: - return AMDGPU_PCIE_GEN3; - default: - if ((sys_mask & DRM_PCIE_SPEED_80) && (default_gen == AMDGPU_PCIE_GEN3)) - return AMDGPU_PCIE_GEN3; - else if ((sys_mask & DRM_PCIE_SPEED_50) && (default_gen == AMDGPU_PCIE_GEN2)) - return AMDGPU_PCIE_GEN2; - else - return AMDGPU_PCIE_GEN1; - } - return AMDGPU_PCIE_GEN1; -} - static void r600_calculate_u_and_p(u32 i, u32 r_c, u32 p_b, u32 *p, u32 *u) { @@ -5028,10 +5008,11 @@ static int si_populate_smc_acpi_state(struct amdgpu_device *adev, table->ACPIState.levels[0].vddc.index, &table->ACPIState.levels[0].std_vddc); } - table->ACPIState.levels[0].gen2PCIE = (u8)r600_get_pcie_gen_support(adev, - si_pi->sys_pcie_mask, - si_pi->boot_pcie_gen, - AMDGPU_PCIE_GEN1); + table->ACPIState.levels[0].gen2PCIE = + (u8)amdgpu_get_pcie_gen_support(adev, + si_pi->sys_pcie_mask, + si_pi->boot_pcie_gen, + AMDGPU_PCIE_GEN1); if (si_pi->vddc_phase_shed_control) si_populate_phase_shedding_value(adev, @@ -7168,10 +7149,10 @@ static void si_parse_pplib_clock_info(struct amdgpu_device *adev, pl->vddc = le16_to_cpu(clock_info->si.usVDDC); pl->vddci = le16_to_cpu(clock_info->si.usVDDCI); pl->flags = le32_to_cpu(clock_info->si.ulFlags); - pl->pcie_gen = r600_get_pcie_gen_support(adev, - si_pi->sys_pcie_mask, - si_pi->boot_pcie_gen, - clock_info->si.ucPCIEGen); + pl->pcie_gen = amdgpu_get_pcie_gen_support(adev, + si_pi->sys_pcie_mask, + si_pi->boot_pcie_gen, + clock_info->si.ucPCIEGen); /* patch up vddc if necessary */ ret = si_get_leakage_voltage_from_leakage_index(adev, pl->vddc, @@ -7326,7 +7307,6 @@ static int si_dpm_init(struct amdgpu_device *adev) struct si_power_info *si_pi; struct atom_clock_dividers dividers; int ret; - u32 mask; si_pi = kzalloc(sizeof(struct si_power_info), GFP_KERNEL); if (si_pi == NULL) @@ -7336,11 +7316,9 @@ static int si_dpm_init(struct amdgpu_device *adev) eg_pi = &ni_pi->eg; pi = &eg_pi->rv7xx; - ret = drm_pcie_get_speed_cap_mask(adev->ddev, &mask); - if (ret) - si_pi->sys_pcie_mask = 0; - else - si_pi->sys_pcie_mask = mask; + si_pi->sys_pcie_mask = + (adev->pm.pcie_gen_mask & CAIL_PCIE_LINK_SPEED_SUPPORT_MASK) >> + CAIL_PCIE_LINK_SPEED_SUPPORT_SHIFT; si_pi->force_pcie_gen = AMDGPU_PCIE_GEN_INVALID; si_pi->boot_pcie_gen = si_get_current_pcie_speed(adev); @@ -7938,6 +7916,7 @@ static int si_dpm_early_init(void *handle) struct amdgpu_device *adev = (struct amdgpu_device *)handle; + adev->powerplay.pp_funcs = &si_dpm_funcs; si_dpm_set_irq_funcs(adev); return 0; } @@ -8038,7 +8017,7 @@ static int si_dpm_read_sensor(void *handle, int idx, } } -const struct amd_ip_funcs si_dpm_ip_funcs = { +static const struct amd_ip_funcs si_dpm_ip_funcs = { .name = "si_dpm", .early_init = si_dpm_early_init, .late_init = si_dpm_late_init, @@ -8055,7 +8034,16 @@ const struct amd_ip_funcs si_dpm_ip_funcs = { .set_powergating_state = si_dpm_set_powergating_state, }; -const struct amd_pm_funcs si_dpm_funcs = { +const struct amdgpu_ip_block_version si_smu_ip_block = +{ + .type = AMD_IP_BLOCK_TYPE_SMC, + .major = 6, + .minor = 0, + .rev = 0, + .funcs = &si_dpm_ip_funcs, +}; + +static const struct amd_pm_funcs si_dpm_funcs = { .pre_set_power_state = &si_dpm_pre_set_power_state, .set_power_state = &si_dpm_set_power_state, .post_set_power_state = &si_dpm_post_set_power_state, diff --git a/drivers/gpu/drm/amd/amdgpu/si_dpm.h b/drivers/gpu/drm/amd/amdgpu/si_dpm.h index 9fe343de3477..6b7d292b919f 100644 --- a/drivers/gpu/drm/amd/amdgpu/si_dpm.h +++ b/drivers/gpu/drm/amd/amdgpu/si_dpm.h @@ -245,8 +245,7 @@ enum si_display_gap SI_PM_DISPLAY_GAP_IGNORE = 3, }; -extern const struct amd_ip_funcs si_dpm_ip_funcs; -extern const struct amd_pm_funcs si_dpm_funcs; +extern const struct amdgpu_ip_block_version si_smu_ip_block; struct ni_leakage_coeffients { diff --git a/drivers/gpu/drm/amd/amdgpu/soc15.c b/drivers/gpu/drm/amd/amdgpu/soc15.c index 8dc8b72ed49b..c6e857325b58 100644 --- a/drivers/gpu/drm/amd/amdgpu/soc15.c +++ b/drivers/gpu/drm/amd/amdgpu/soc15.c @@ -57,7 +57,6 @@ #include "uvd_v7_0.h" #include "vce_v4_0.h" #include "vcn_v1_0.h" -#include "amdgpu_powerplay.h" #include "dce_virtual.h" #include "mxgpu_ai.h" @@ -531,10 +530,9 @@ int soc15_set_ip_blocks(struct amdgpu_device *adev) amdgpu_device_ip_block_add(adev, &vega10_common_ip_block); amdgpu_device_ip_block_add(adev, &gmc_v9_0_ip_block); amdgpu_device_ip_block_add(adev, &vega10_ih_ip_block); - if (amdgpu_fw_load_type == 2 || amdgpu_fw_load_type == -1) - amdgpu_device_ip_block_add(adev, &psp_v3_1_ip_block); + amdgpu_device_ip_block_add(adev, &psp_v3_1_ip_block); if (!amdgpu_sriov_vf(adev)) - amdgpu_device_ip_block_add(adev, &amdgpu_pp_ip_block); + amdgpu_device_ip_block_add(adev, &pp_smu_ip_block); if (adev->enable_virtual_display || amdgpu_sriov_vf(adev)) amdgpu_device_ip_block_add(adev, &dce_virtual_ip_block); #if defined(CONFIG_DRM_AMD_DC) @@ -553,7 +551,7 @@ int soc15_set_ip_blocks(struct amdgpu_device *adev) amdgpu_device_ip_block_add(adev, &gmc_v9_0_ip_block); amdgpu_device_ip_block_add(adev, &vega10_ih_ip_block); amdgpu_device_ip_block_add(adev, &psp_v10_0_ip_block); - amdgpu_device_ip_block_add(adev, &amdgpu_pp_ip_block); + amdgpu_device_ip_block_add(adev, &pp_smu_ip_block); if (adev->enable_virtual_display || amdgpu_sriov_vf(adev)) amdgpu_device_ip_block_add(adev, &dce_virtual_ip_block); #if defined(CONFIG_DRM_AMD_DC) @@ -692,10 +690,6 @@ static int soc15_common_early_init(void *handle) xgpu_ai_mailbox_set_irq_funcs(adev); } - adev->firmware.load_type = amdgpu_ucode_get_load_type(adev, amdgpu_fw_load_type); - - amdgpu_device_get_pcie_info(adev); - return 0; } diff --git a/drivers/gpu/drm/amd/amdgpu/tonga_ih.c b/drivers/gpu/drm/amd/amdgpu/tonga_ih.c index 18435389bae4..52853d8a8fdd 100644 --- a/drivers/gpu/drm/amd/amdgpu/tonga_ih.c +++ b/drivers/gpu/drm/amd/amdgpu/tonga_ih.c @@ -107,7 +107,7 @@ static int tonga_ih_irq_init(struct amdgpu_device *adev) tonga_ih_disable_interrupts(adev); /* setup interrupt control */ - WREG32(mmINTERRUPT_CNTL2, adev->dummy_page.addr >> 8); + WREG32(mmINTERRUPT_CNTL2, adev->dummy_page_addr >> 8); interrupt_cntl = RREG32(mmINTERRUPT_CNTL); /* INTERRUPT_CNTL__IH_DUMMY_RD_OVERRIDE_MASK=0 - dummy read disabled with msi, enabled without msi * INTERRUPT_CNTL__IH_DUMMY_RD_OVERRIDE_MASK=1 - dummy read controlled by IH_DUMMY_RD_EN diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c b/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c index a3e64e22c93c..f26f515db2fb 100644 --- a/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c +++ b/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c @@ -1580,7 +1580,7 @@ static const struct amdgpu_ring_funcs uvd_v6_0_enc_ring_vm_funcs = { .set_wptr = uvd_v6_0_enc_ring_set_wptr, .emit_frame_size = 4 + /* uvd_v6_0_enc_ring_emit_pipeline_sync */ - 6 + /* uvd_v6_0_enc_ring_emit_vm_flush */ + 5 + /* uvd_v6_0_enc_ring_emit_vm_flush */ 5 + 5 + /* uvd_v6_0_enc_ring_emit_fence x2 vm fence */ 1, /* uvd_v6_0_enc_ring_insert_end */ .emit_ib_size = 5, /* uvd_v6_0_enc_ring_emit_ib */ diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c b/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c index e54cc3ca2303..eddc57f3b72a 100644 --- a/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c +++ b/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c @@ -390,13 +390,13 @@ static int uvd_v7_0_sw_init(void *handle) struct amdgpu_device *adev = (struct amdgpu_device *)handle; /* UVD TRAP */ - r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_UVD, 124, &adev->uvd.irq); + r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_UVD, 124, &adev->uvd.irq); if (r) return r; /* UVD ENC TRAP */ for (i = 0; i < adev->uvd.num_enc_rings; ++i) { - r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_UVD, i + 119, &adev->uvd.irq); + r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_UVD, i + 119, &adev->uvd.irq); if (r) return r; } diff --git a/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c b/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c index 2329b310ccf2..73fd48d6c756 100755 --- a/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c @@ -420,7 +420,7 @@ static int vce_v4_0_sw_init(void *handle) unsigned size; int r, i; - r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_VCE0, 167, &adev->vce.irq); + r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_VCE0, 167, &adev->vce.irq); if (r) return r; diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c b/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c index fdf4ac9313cf..8c132673bc79 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c @@ -75,13 +75,13 @@ static int vcn_v1_0_sw_init(void *handle) struct amdgpu_device *adev = (struct amdgpu_device *)handle; /* VCN DEC TRAP */ - r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_VCN, 124, &adev->vcn.irq); + r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_VCN, 124, &adev->vcn.irq); if (r) return r; /* VCN ENC TRAP */ for (i = 0; i < adev->vcn.num_enc_rings; ++i) { - r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_VCN, i + 119, + r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_VCN, i + 119, &adev->vcn.irq); if (r) return r; diff --git a/drivers/gpu/drm/amd/amdgpu/vega10_ih.c b/drivers/gpu/drm/amd/amdgpu/vega10_ih.c index cc8ce7e352a8..5ae5ed2e62d6 100644 --- a/drivers/gpu/drm/amd/amdgpu/vega10_ih.c +++ b/drivers/gpu/drm/amd/amdgpu/vega10_ih.c @@ -245,8 +245,8 @@ static bool vega10_ih_prescreen_iv(struct amdgpu_device *adev) * some faults get cleared. */ switch (dw0 & 0xff) { - case AMDGPU_IH_CLIENTID_VMC: - case AMDGPU_IH_CLIENTID_UTCL2: + case SOC15_IH_CLIENTID_VMC: + case SOC15_IH_CLIENTID_UTCL2: break; default: /* Not a VM fault */ diff --git a/drivers/gpu/drm/amd/amdgpu/vi.c b/drivers/gpu/drm/amd/amdgpu/vi.c index 61360a1552d8..e7fb165cc9db 100644 --- a/drivers/gpu/drm/amd/amdgpu/vi.c +++ b/drivers/gpu/drm/amd/amdgpu/vi.c @@ -71,7 +71,6 @@ #include "uvd_v5_0.h" #include "uvd_v6_0.h" #include "vce_v3_0.h" -#include "amdgpu_powerplay.h" #if defined(CONFIG_DRM_AMD_ACP) #include "amdgpu_acp.h" #endif @@ -1097,11 +1096,6 @@ static int vi_common_early_init(void *handle) xgpu_vi_mailbox_set_irq_funcs(adev); } - /* vi use smc load by default */ - adev->firmware.load_type = amdgpu_ucode_get_load_type(adev, amdgpu_fw_load_type); - - amdgpu_device_get_pcie_info(adev); - return 0; } @@ -1516,7 +1510,7 @@ int vi_set_ip_blocks(struct amdgpu_device *adev) amdgpu_device_ip_block_add(adev, &vi_common_ip_block); amdgpu_device_ip_block_add(adev, &gmc_v7_4_ip_block); amdgpu_device_ip_block_add(adev, &iceland_ih_ip_block); - amdgpu_device_ip_block_add(adev, &amdgpu_pp_ip_block); + amdgpu_device_ip_block_add(adev, &pp_smu_ip_block); if (adev->enable_virtual_display) amdgpu_device_ip_block_add(adev, &dce_virtual_ip_block); amdgpu_device_ip_block_add(adev, &gfx_v8_0_ip_block); @@ -1526,7 +1520,7 @@ int vi_set_ip_blocks(struct amdgpu_device *adev) amdgpu_device_ip_block_add(adev, &vi_common_ip_block); amdgpu_device_ip_block_add(adev, &gmc_v8_5_ip_block); amdgpu_device_ip_block_add(adev, &tonga_ih_ip_block); - amdgpu_device_ip_block_add(adev, &amdgpu_pp_ip_block); + amdgpu_device_ip_block_add(adev, &pp_smu_ip_block); if (adev->enable_virtual_display || amdgpu_sriov_vf(adev)) amdgpu_device_ip_block_add(adev, &dce_virtual_ip_block); #if defined(CONFIG_DRM_AMD_DC) @@ -1546,7 +1540,7 @@ int vi_set_ip_blocks(struct amdgpu_device *adev) amdgpu_device_ip_block_add(adev, &vi_common_ip_block); amdgpu_device_ip_block_add(adev, &gmc_v8_0_ip_block); amdgpu_device_ip_block_add(adev, &tonga_ih_ip_block); - amdgpu_device_ip_block_add(adev, &amdgpu_pp_ip_block); + amdgpu_device_ip_block_add(adev, &pp_smu_ip_block); if (adev->enable_virtual_display || amdgpu_sriov_vf(adev)) amdgpu_device_ip_block_add(adev, &dce_virtual_ip_block); #if defined(CONFIG_DRM_AMD_DC) @@ -1568,7 +1562,7 @@ int vi_set_ip_blocks(struct amdgpu_device *adev) amdgpu_device_ip_block_add(adev, &vi_common_ip_block); amdgpu_device_ip_block_add(adev, &gmc_v8_1_ip_block); amdgpu_device_ip_block_add(adev, &tonga_ih_ip_block); - amdgpu_device_ip_block_add(adev, &amdgpu_pp_ip_block); + amdgpu_device_ip_block_add(adev, &pp_smu_ip_block); if (adev->enable_virtual_display) amdgpu_device_ip_block_add(adev, &dce_virtual_ip_block); #if defined(CONFIG_DRM_AMD_DC) @@ -1586,7 +1580,7 @@ int vi_set_ip_blocks(struct amdgpu_device *adev) amdgpu_device_ip_block_add(adev, &vi_common_ip_block); amdgpu_device_ip_block_add(adev, &gmc_v8_0_ip_block); amdgpu_device_ip_block_add(adev, &cz_ih_ip_block); - amdgpu_device_ip_block_add(adev, &amdgpu_pp_ip_block); + amdgpu_device_ip_block_add(adev, &pp_smu_ip_block); if (adev->enable_virtual_display) amdgpu_device_ip_block_add(adev, &dce_virtual_ip_block); #if defined(CONFIG_DRM_AMD_DC) @@ -1607,7 +1601,7 @@ int vi_set_ip_blocks(struct amdgpu_device *adev) amdgpu_device_ip_block_add(adev, &vi_common_ip_block); amdgpu_device_ip_block_add(adev, &gmc_v8_0_ip_block); amdgpu_device_ip_block_add(adev, &cz_ih_ip_block); - amdgpu_device_ip_block_add(adev, &amdgpu_pp_ip_block); + amdgpu_device_ip_block_add(adev, &pp_smu_ip_block); if (adev->enable_virtual_display) amdgpu_device_ip_block_add(adev, &dce_virtual_ip_block); #if defined(CONFIG_DRM_AMD_DC) |