diff options
author | Dave Airlie <airlied@redhat.com> | 2022-07-12 11:07:30 +1000 |
---|---|---|
committer | Dave Airlie <airlied@redhat.com> | 2022-07-12 11:07:32 +1000 |
commit | 344feb7ccf764756937cfd74fa4ac5caba069c99 (patch) | |
tree | b86c7eb43878b310c31702094763d130b7e31dc5 /drivers/gpu/drm/amd/amdgpu | |
parent | c6a3d73592ae20f2f6306f823aa5121c83c88223 (diff) | |
parent | c5da61cf5bab30059f22ea368702c445ee87171a (diff) |
Merge tag 'amd-drm-next-5.20-2022-07-05' of https://gitlab.freedesktop.org/agd5f/linux into drm-next
amd-drm-next-5.20-2022-07-05:
amdgpu:
- Various spelling and grammer fixes
- Various eDP fixes
- Various DMCUB fixes
- VCN fixes
- GMC 11 fixes
- RAS fixes
- TMZ support for GC 10.3.7
- GPUVM TLB flush fixes
- SMU 13.0.x updates
- DCN 3.2 Support
- DCN 3.2.1 Support
- MES updates
- GFX11 modifiers support
- USB-C fixes
- MMHUB 3.0.1 support
- SDMA 6.0 doorbell fixes
- Initial devcoredump support
- Enable high priority gfx queue on asics which support it
- Enable GPU reset for SMU 13.0.4
- OLED display fixes
- MPO fixes
- DC frame size fixes
- ASPM support for PCIE 7.4/7.6
- GPU reset support for SMU 13.0.0
- GFX11 updates
- VCN JPEG fix
- BACO support for SMU 13.0.7
- VCN instance handling fix
- GFX8 GPUVM TLB flush fix
- GPU reset rework
- VCN 4.0.2 support
- GTT size fixes
- DP link training fixes
- LSDMA 6.0.1 support
- Various backlight fixes
- Color encoding fixes
- Backlight config cleanup
- VCN 4.x unified queue cleanup
amdkfd:
- MMU notifier fixes
- Updates for GC 10.3.6 and 10.3.7
- P2P DMA support using dma-buf
- Add available memory IOCTL
- SDMA 6.0.1 fix
- MES fixes
- HMM profiler support
radeon:
- License fix
- Backlight config cleanup
UAPI:
- Add available memory IOCTL to amdkfd
Proposed userspace: https://www.mail-archive.com/amd-gfx@lists.freedesktop.org/msg75743.html
- HMM profiler support for amdkfd
Proposed userspace: https://lists.freedesktop.org/archives/amd-gfx/2022-June/080805.html
Signed-off-by: Dave Airlie <airlied@redhat.com>
From: Alex Deucher <alexander.deucher@amd.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20220705212633.6037-1-alexander.deucher@amd.com
Diffstat (limited to 'drivers/gpu/drm/amd/amdgpu')
57 files changed, 2318 insertions, 921 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/Makefile b/drivers/gpu/drm/amd/amdgpu/Makefile index 3e0e2eb7e235..a87e42c2c8dc 100644 --- a/drivers/gpu/drm/amd/amdgpu/Makefile +++ b/drivers/gpu/drm/amd/amdgpu/Makefile @@ -88,7 +88,8 @@ amdgpu-y += \ gmc_v8_0.o \ gfxhub_v1_0.o mmhub_v1_0.o gmc_v9_0.o gfxhub_v1_1.o mmhub_v9_4.o \ gfxhub_v2_0.o mmhub_v2_0.o gmc_v10_0.o gfxhub_v2_1.o mmhub_v2_3.o \ - mmhub_v1_7.o gfxhub_v3_0.o mmhub_v3_0.o mmhub_v3_0_2.o gmc_v11_0.o + mmhub_v1_7.o gfxhub_v3_0.o mmhub_v3_0.o mmhub_v3_0_2.o gmc_v11_0.o \ + mmhub_v3_0_1.o # add UMC block amdgpu-y += \ diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index 30ce6bb6fa77..fb9399a999ae 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -223,6 +223,9 @@ static const int __maybe_unused sched_policy = KFD_SCHED_POLICY_HWS; static const bool __maybe_unused debug_evictions; /* = false */ static const bool __maybe_unused no_system_mem_limit; #endif +#ifdef CONFIG_HSA_AMD_P2P +extern bool pcie_p2p; +#endif extern int amdgpu_tmz; extern int amdgpu_reset_method; @@ -274,7 +277,7 @@ extern int amdgpu_vcnfw_log; #define CIK_CURSOR_WIDTH 128 #define CIK_CURSOR_HEIGHT 128 -/* smasrt shift bias level limits */ +/* smart shift bias level limits */ #define AMDGPU_SMARTSHIFT_MAX_BIAS (100) #define AMDGPU_SMARTSHIFT_MIN_BIAS (-100) @@ -667,6 +670,7 @@ enum amd_hw_ip_block_type { RSMU_HWIP, XGMI_HWIP, DCI_HWIP, + PCIE_HWIP, MAX_HWIP }; @@ -1044,10 +1048,18 @@ struct amdgpu_device { /* reset dump register */ uint32_t *reset_dump_reg_list; + uint32_t *reset_dump_reg_value; int num_regs; +#ifdef CONFIG_DEV_COREDUMP + struct amdgpu_task_info reset_task_info; + bool reset_vram_lost; + struct timespec64 reset_time; +#endif bool scpm_enabled; uint32_t scpm_status; + + struct work_struct reset_work; }; static inline struct amdgpu_device *drm_to_adev(struct drm_device *ddev) @@ -1242,7 +1254,7 @@ bool amdgpu_device_has_job_running(struct amdgpu_device *adev); bool amdgpu_device_should_recover_gpu(struct amdgpu_device *adev); int amdgpu_device_gpu_recover(struct amdgpu_device *adev, struct amdgpu_job* job); -int amdgpu_device_gpu_recover_imp(struct amdgpu_device *adev, +int amdgpu_device_gpu_recover(struct amdgpu_device *adev, struct amdgpu_job *job); void amdgpu_device_pci_config_reset(struct amdgpu_device *adev); int amdgpu_device_pci_reset(struct amdgpu_device *adev); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c index 98ac53ee6bb5..130060834b4e 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c @@ -66,9 +66,7 @@ struct amdgpu_atif { struct amdgpu_atif_notifications notifications; struct amdgpu_atif_functions functions; struct amdgpu_atif_notification_cfg notification_cfg; -#if defined(CONFIG_BACKLIGHT_CLASS_DEVICE) || defined(CONFIG_BACKLIGHT_CLASS_DEVICE_MODULE) struct backlight_device *bd; -#endif struct amdgpu_dm_backlight_caps backlight_caps; }; @@ -436,7 +434,6 @@ static int amdgpu_atif_handler(struct amdgpu_device *adev, DRM_DEBUG_DRIVER("ATIF: %d pending SBIOS requests\n", count); if (req.pending & ATIF_PANEL_BRIGHTNESS_CHANGE_REQUEST) { -#if defined(CONFIG_BACKLIGHT_CLASS_DEVICE) || defined(CONFIG_BACKLIGHT_CLASS_DEVICE_MODULE) if (atif->bd) { DRM_DEBUG_DRIVER("Changing brightness to %d\n", req.backlight_level); @@ -447,7 +444,6 @@ static int amdgpu_atif_handler(struct amdgpu_device *adev, */ backlight_device_set_brightness(atif->bd, req.backlight_level); } -#endif } if (req.pending & ATIF_DGPU_DISPLAY_EVENT) { @@ -849,7 +845,6 @@ int amdgpu_acpi_init(struct amdgpu_device *adev) { struct amdgpu_atif *atif = &amdgpu_acpi_priv.atif; -#if defined(CONFIG_BACKLIGHT_CLASS_DEVICE) || defined(CONFIG_BACKLIGHT_CLASS_DEVICE_MODULE) if (atif->notifications.brightness_change) { if (amdgpu_device_has_dc_support(adev)) { #if defined(CONFIG_DRM_AMD_DC) @@ -876,7 +871,6 @@ int amdgpu_acpi_init(struct amdgpu_device *adev) } } } -#endif adev->acpi_nb.notifier_call = amdgpu_acpi_event; register_acpi_notifier(&adev->acpi_nb); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c index 1f8161cd507f..567597469a8a 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c @@ -33,6 +33,7 @@ #include <uapi/linux/kfd_ioctl.h> #include "amdgpu_ras.h" #include "amdgpu_umc.h" +#include "amdgpu_reset.h" /* Total memory size in system memory and all GPU VRAM. Used to * estimate worst case amount of memory to reserve for page tables @@ -122,6 +123,15 @@ static void amdgpu_doorbell_get_kfd_info(struct amdgpu_device *adev, } } + +static void amdgpu_amdkfd_reset_work(struct work_struct *work) +{ + struct amdgpu_device *adev = container_of(work, struct amdgpu_device, + kfd.reset_work); + + amdgpu_device_gpu_recover(adev, NULL); +} + void amdgpu_amdkfd_device_init(struct amdgpu_device *adev) { int i; @@ -180,6 +190,8 @@ void amdgpu_amdkfd_device_init(struct amdgpu_device *adev) adev->kfd.init_complete = kgd2kfd_device_init(adev->kfd.dev, adev_to_drm(adev), &gpu_resources); + + INIT_WORK(&adev->kfd.reset_work, amdgpu_amdkfd_reset_work); } } @@ -247,7 +259,8 @@ int amdgpu_amdkfd_post_reset(struct amdgpu_device *adev) void amdgpu_amdkfd_gpu_reset(struct amdgpu_device *adev) { if (amdgpu_device_should_recover_gpu(adev)) - amdgpu_device_gpu_recover(adev, NULL); + amdgpu_reset_domain_schedule(adev->reset_domain, + &adev->kfd.reset_work); } int amdgpu_amdkfd_alloc_gtt_mem(struct amdgpu_device *adev, size_t size, @@ -671,6 +684,8 @@ int amdgpu_amdkfd_submit_ib(struct amdgpu_device *adev, goto err_ib_sched; } + /* Drop the initial kref_init count (see drm_sched_main as example) */ + dma_fence_put(f); ret = dma_fence_wait(f, false); err_ib_sched: @@ -714,7 +729,8 @@ int amdgpu_amdkfd_flush_gpu_tlb_pasid(struct amdgpu_device *adev, { bool all_hub = false; - if (adev->family == AMDGPU_FAMILY_AI) + if (adev->family == AMDGPU_FAMILY_AI || + adev->family == AMDGPU_FAMILY_RV) all_hub = true; return amdgpu_gmc_flush_gpu_tlb_pasid(adev, pasid, flush_type, all_hub); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h index f8b9f27adcf5..73bf8b5f2aa9 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h @@ -48,6 +48,7 @@ enum kfd_mem_attachment_type { KFD_MEM_ATT_SHARED, /* Share kgd_mem->bo or another attachment's */ KFD_MEM_ATT_USERPTR, /* SG bo to DMA map pages from a userptr bo */ KFD_MEM_ATT_DMABUF, /* DMAbuf to DMA map TTM BOs */ + KFD_MEM_ATT_SG /* Tag to DMA map SG BOs */ }; struct kfd_mem_attachment { @@ -96,6 +97,7 @@ struct amdgpu_kfd_dev { struct kfd_dev *dev; uint64_t vram_used; bool init_complete; + struct work_struct reset_work; }; enum kgd_engine_type { @@ -266,6 +268,7 @@ int amdgpu_amdkfd_gpuvm_acquire_process_vm(struct amdgpu_device *adev, void amdgpu_amdkfd_gpuvm_release_process_vm(struct amdgpu_device *adev, void *drm_priv); uint64_t amdgpu_amdkfd_gpuvm_get_process_page_dir(void *drm_priv); +size_t amdgpu_amdkfd_get_available_memory(struct amdgpu_device *adev); int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu( struct amdgpu_device *adev, uint64_t va, uint64_t size, void *drm_priv, struct kgd_mem **mem, @@ -279,10 +282,11 @@ int amdgpu_amdkfd_gpuvm_unmap_memory_from_gpu( struct amdgpu_device *adev, struct kgd_mem *mem, void *drm_priv); int amdgpu_amdkfd_gpuvm_sync_memory( struct amdgpu_device *adev, struct kgd_mem *mem, bool intr); -int amdgpu_amdkfd_gpuvm_map_gtt_bo_to_kernel(struct amdgpu_device *adev, - struct kgd_mem *mem, void **kptr, uint64_t *size); -void amdgpu_amdkfd_gpuvm_unmap_gtt_bo_from_kernel(struct amdgpu_device *adev, - struct kgd_mem *mem); +int amdgpu_amdkfd_gpuvm_map_gtt_bo_to_kernel(struct kgd_mem *mem, + void **kptr, uint64_t *size); +void amdgpu_amdkfd_gpuvm_unmap_gtt_bo_from_kernel(struct kgd_mem *mem); + +int amdgpu_amdkfd_map_gtt_bo_to_gart(struct amdgpu_device *adev, struct amdgpu_bo *bo); int amdgpu_amdkfd_gpuvm_restore_process_bos(void *process_info, struct dma_fence **ef); @@ -332,7 +336,7 @@ void amdgpu_amdkfd_release_notify(struct amdgpu_bo *bo) } #endif /* KGD2KFD callbacks */ -int kgd2kfd_quiesce_mm(struct mm_struct *mm); +int kgd2kfd_quiesce_mm(struct mm_struct *mm, uint32_t trigger); int kgd2kfd_resume_mm(struct mm_struct *mm); int kgd2kfd_schedule_evict_and_restore_process(struct mm_struct *mm, struct dma_fence *fence); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c index 6b6d46e29e6e..2fcc6e079769 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c @@ -32,12 +32,19 @@ #include "amdgpu_dma_buf.h" #include <uapi/linux/kfd_ioctl.h> #include "amdgpu_xgmi.h" +#include "kfd_smi_events.h" /* Userptr restore delay, just long enough to allow consecutive VM * changes to accumulate */ #define AMDGPU_USERPTR_RESTORE_DELAY_MS 1 +/* + * Align VRAM allocations to 2MB to avoid fragmentation caused by 4K allocations in the tail 2MB + * BO chunk + */ +#define VRAM_ALLOCATION_ALIGN (1 << 21) + /* Impose limit on how much memory KFD can use */ static struct { uint64_t max_system_mem_limit; @@ -108,7 +115,7 @@ void amdgpu_amdkfd_reserve_system_mem(uint64_t size) * compromise that should work in most cases without reserving too * much memory for page tables unnecessarily (factor 16K, >> 14). */ -#define ESTIMATE_PT_SIZE(mem_size) ((mem_size) >> 14) +#define ESTIMATE_PT_SIZE(mem_size) max(((mem_size) >> 14), AMDGPU_VM_RESERVED_VRAM) static size_t amdgpu_amdkfd_acc_size(uint64_t size) { @@ -148,7 +155,13 @@ static int amdgpu_amdkfd_reserve_mem_limit(struct amdgpu_device *adev, } else if (alloc_flag & KFD_IOC_ALLOC_MEM_FLAGS_VRAM) { system_mem_needed = acc_size; ttm_mem_needed = acc_size; - vram_needed = size; + + /* + * Conservatively round up the allocation requirement to 2 MB + * to avoid fragmentation caused by 4K allocations in the tail + * 2M BO chunk. + */ + vram_needed = ALIGN(size, VRAM_ALLOCATION_ALIGN); } else if (alloc_flag & KFD_IOC_ALLOC_MEM_FLAGS_USERPTR) { system_mem_needed = acc_size + size; ttm_mem_needed = acc_size; @@ -173,7 +186,9 @@ static int amdgpu_amdkfd_reserve_mem_limit(struct amdgpu_device *adev, (kfd_mem_limit.ttm_mem_used + ttm_mem_needed > kfd_mem_limit.max_ttm_mem_limit) || (adev->kfd.vram_used + vram_needed > - adev->gmc.real_vram_size - reserved_for_pt)) { + adev->gmc.real_vram_size - + atomic64_read(&adev->vram_pin_size) - + reserved_for_pt)) { ret = -ENOMEM; goto release; } @@ -205,7 +220,7 @@ static void unreserve_mem_limit(struct amdgpu_device *adev, } else if (alloc_flag & KFD_IOC_ALLOC_MEM_FLAGS_VRAM) { kfd_mem_limit.system_mem_used -= acc_size; kfd_mem_limit.ttm_mem_used -= acc_size; - adev->kfd.vram_used -= size; + adev->kfd.vram_used -= ALIGN(size, VRAM_ALLOCATION_ALIGN); } else if (alloc_flag & KFD_IOC_ALLOC_MEM_FLAGS_USERPTR) { kfd_mem_limit.system_mem_used -= (acc_size + size); kfd_mem_limit.ttm_mem_used -= acc_size; @@ -241,6 +256,42 @@ void amdgpu_amdkfd_release_notify(struct amdgpu_bo *bo) kfree(bo->kfd_bo); } +/** + * @create_dmamap_sg_bo: Creates a amdgpu_bo object to reflect information + * about USERPTR or DOOREBELL or MMIO BO. + * @adev: Device for which dmamap BO is being created + * @mem: BO of peer device that is being DMA mapped. Provides parameters + * in building the dmamap BO + * @bo_out: Output parameter updated with handle of dmamap BO + */ +static int +create_dmamap_sg_bo(struct amdgpu_device *adev, + struct kgd_mem *mem, struct amdgpu_bo **bo_out) +{ + struct drm_gem_object *gem_obj; + int ret, align; + + ret = amdgpu_bo_reserve(mem->bo, false); + if (ret) + return ret; + + align = 1; + ret = amdgpu_gem_object_create(adev, mem->bo->tbo.base.size, align, + AMDGPU_GEM_DOMAIN_CPU, AMDGPU_GEM_CREATE_PREEMPTIBLE, + ttm_bo_type_sg, mem->bo->tbo.base.resv, &gem_obj); + + amdgpu_bo_unreserve(mem->bo); + + if (ret) { + pr_err("Error in creating DMA mappable SG BO on domain: %d\n", ret); + return -EINVAL; + } + + *bo_out = gem_to_amdgpu_bo(gem_obj); + (*bo_out)->parent = amdgpu_bo_ref(mem->bo); + return ret; +} + /* amdgpu_amdkfd_remove_eviction_fence - Removes eviction fence from BO's * reservation object. * @@ -446,6 +497,38 @@ static uint64_t get_pte_flags(struct amdgpu_device *adev, struct kgd_mem *mem) return pte_flags; } +/** + * create_sg_table() - Create an sg_table for a contiguous DMA addr range + * @addr: The starting address to point to + * @size: Size of memory area in bytes being pointed to + * + * Allocates an instance of sg_table and initializes it to point to memory + * area specified by input parameters. The address used to build is assumed + * to be DMA mapped, if needed. + * + * DOORBELL or MMIO BOs use only one scatterlist node in their sg_table + * because they are physically contiguous. + * + * Return: Initialized instance of SG Table or NULL + */ +static struct sg_table *create_sg_table(uint64_t addr, uint32_t size) +{ + struct sg_table *sg = kmalloc(sizeof(*sg), GFP_KERNEL); + + if (!sg) + return NULL; + if (sg_alloc_table(sg, 1, GFP_KERNEL)) { + kfree(sg); + return NULL; + } + sg_dma_address(sg->sgl) = addr; + sg->sgl->length = size; +#ifdef CONFIG_NEED_SG_DMA_LENGTH + sg->sgl->dma_length = size; +#endif + return sg; +} + static int kfd_mem_dmamap_userptr(struct kgd_mem *mem, struct kfd_mem_attachment *attachment) @@ -510,6 +593,87 @@ kfd_mem_dmamap_dmabuf(struct kfd_mem_attachment *attachment) return ttm_bo_validate(&bo->tbo, &bo->placement, &ctx); } +/** + * kfd_mem_dmamap_sg_bo() - Create DMA mapped sg_table to access DOORBELL or MMIO BO + * @mem: SG BO of the DOORBELL or MMIO resource on the owning device + * @attachment: Virtual address attachment of the BO on accessing device + * + * An access request from the device that owns DOORBELL does not require DMA mapping. + * This is because the request doesn't go through PCIe root complex i.e. it instead + * loops back. The need to DMA map arises only when accessing peer device's DOORBELL + * + * In contrast, all access requests for MMIO need to be DMA mapped without regard to + * device ownership. This is because access requests for MMIO go through PCIe root + * complex. + * + * This is accomplished in two steps: + * - Obtain DMA mapped address of DOORBELL or MMIO memory that could be used + * in updating requesting device's page table + * - Signal TTM to mark memory pointed to by requesting device's BO as GPU + * accessible. This allows an update of requesting device's page table + * with entries associated with DOOREBELL or MMIO memory + * + * This method is invoked in the following contexts: + * - Mapping of DOORBELL or MMIO BO of same or peer device + * - Validating an evicted DOOREBELL or MMIO BO on device seeking access + * + * Return: ZERO if successful, NON-ZERO otherwise + */ +static int +kfd_mem_dmamap_sg_bo(struct kgd_mem *mem, + struct kfd_mem_attachment *attachment) +{ + struct ttm_operation_ctx ctx = {.interruptible = true}; + struct amdgpu_bo *bo = attachment->bo_va->base.bo; + struct amdgpu_device *adev = attachment->adev; + struct ttm_tt *ttm = bo->tbo.ttm; + enum dma_data_direction dir; + dma_addr_t dma_addr; + bool mmio; + int ret; + + /* Expect SG Table of dmapmap BO to be NULL */ + mmio = (mem->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_MMIO_REMAP); + if (unlikely(ttm->sg)) { + pr_err("SG Table of %d BO for peer device is UNEXPECTEDLY NON-NULL", mmio); + return -EINVAL; + } + + dir = mem->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_WRITABLE ? + DMA_BIDIRECTIONAL : DMA_TO_DEVICE; + dma_addr = mem->bo->tbo.sg->sgl->dma_address; + pr_debug("%d BO size: %d\n", mmio, mem->bo->tbo.sg->sgl->length); + pr_debug("%d BO address before DMA mapping: %llx\n", mmio, dma_addr); + dma_addr = dma_map_resource(adev->dev, dma_addr, + mem->bo->tbo.sg->sgl->length, dir, DMA_ATTR_SKIP_CPU_SYNC); + ret = dma_mapping_error(adev->dev, dma_addr); + if (unlikely(ret)) + return ret; + pr_debug("%d BO address after DMA mapping: %llx\n", mmio, dma_addr); + + ttm->sg = create_sg_table(dma_addr, mem->bo->tbo.sg->sgl->length); + if (unlikely(!ttm->sg)) { + ret = -ENOMEM; + goto unmap_sg; + } + + amdgpu_bo_placement_from_domain(bo, AMDGPU_GEM_DOMAIN_GTT); + ret = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx); + if (unlikely(ret)) + goto free_sg; + + return ret; + +free_sg: + sg_free_table(ttm->sg); + kfree(ttm->sg); + ttm->sg = NULL; +unmap_sg: + dma_unmap_resource(adev->dev, dma_addr, mem->bo->tbo.sg->sgl->length, + dir, DMA_ATTR_SKIP_CPU_SYNC); + return ret; +} + static int kfd_mem_dmamap_attachment(struct kgd_mem *mem, struct kfd_mem_attachment *attachment) @@ -521,6 +685,8 @@ kfd_mem_dmamap_attachment(struct kgd_mem *mem, return kfd_mem_dmamap_userptr(mem, attachment); case KFD_MEM_ATT_DMABUF: return kfd_mem_dmamap_dmabuf(attachment); + case KFD_MEM_ATT_SG: + return kfd_mem_dmamap_sg_bo(mem, attachment); default: WARN_ON_ONCE(1); } @@ -561,6 +727,50 @@ kfd_mem_dmaunmap_dmabuf(struct kfd_mem_attachment *attachment) ttm_bo_validate(&bo->tbo, &bo->placement, &ctx); } +/** + * kfd_mem_dmaunmap_sg_bo() - Free DMA mapped sg_table of DOORBELL or MMIO BO + * @mem: SG BO of the DOORBELL or MMIO resource on the owning device + * @attachment: Virtual address attachment of the BO on accessing device + * + * The method performs following steps: + * - Signal TTM to mark memory pointed to by BO as GPU inaccessible + * - Free SG Table that is used to encapsulate DMA mapped memory of + * peer device's DOORBELL or MMIO memory + * + * This method is invoked in the following contexts: + * UNMapping of DOORBELL or MMIO BO on a device having access to its memory + * Eviction of DOOREBELL or MMIO BO on device having access to its memory + * + * Return: void + */ +static void +kfd_mem_dmaunmap_sg_bo(struct kgd_mem *mem, + struct kfd_mem_attachment *attachment) +{ + struct ttm_operation_ctx ctx = {.interruptible = true}; + struct amdgpu_bo *bo = attachment->bo_va->base.bo; + struct amdgpu_device *adev = attachment->adev; + struct ttm_tt *ttm = bo->tbo.ttm; + enum dma_data_direction dir; + + if (unlikely(!ttm->sg)) { + pr_err("SG Table of BO is UNEXPECTEDLY NULL"); + return; + } + + amdgpu_bo_placement_from_domain(bo, AMDGPU_GEM_DOMAIN_CPU); + ttm_bo_validate(&bo->tbo, &bo->placement, &ctx); + + dir = mem->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_WRITABLE ? + DMA_BIDIRECTIONAL : DMA_TO_DEVICE; + dma_unmap_resource(adev->dev, ttm->sg->sgl->dma_address, + ttm->sg->sgl->length, dir, DMA_ATTR_SKIP_CPU_SYNC); + sg_free_table(ttm->sg); + kfree(ttm->sg); + ttm->sg = NULL; + bo->tbo.sg = NULL; +} + static void kfd_mem_dmaunmap_attachment(struct kgd_mem *mem, struct kfd_mem_attachment *attachment) @@ -574,39 +784,15 @@ kfd_mem_dmaunmap_attachment(struct kgd_mem *mem, case KFD_MEM_ATT_DMABUF: kfd_mem_dmaunmap_dmabuf(attachment); break; + case KFD_MEM_ATT_SG: + kfd_mem_dmaunmap_sg_bo(mem, attachment); + break; default: WARN_ON_ONCE(1); } } static int -kfd_mem_attach_userptr(struct amdgpu_device *adev, struct kgd_mem *mem, - struct amdgpu_bo **bo) -{ - unsigned long bo_size = mem->bo->tbo.base.size; - struct drm_gem_object *gobj; - int ret; - - ret = amdgpu_bo_reserve(mem->bo, false); - if (ret) - return ret; - - ret = amdgpu_gem_object_create(adev, bo_size, 1, - AMDGPU_GEM_DOMAIN_CPU, - AMDGPU_GEM_CREATE_PREEMPTIBLE, - ttm_bo_type_sg, mem->bo->tbo.base.resv, - &gobj); - amdgpu_bo_unreserve(mem->bo); - if (ret) - return ret; - - *bo = gem_to_amdgpu_bo(gobj); - (*bo)->parent = amdgpu_bo_ref(mem->bo); - - return 0; -} - -static int kfd_mem_attach_dmabuf(struct amdgpu_device *adev, struct kgd_mem *mem, struct amdgpu_bo **bo) { @@ -656,6 +842,7 @@ static int kfd_mem_attach(struct amdgpu_device *adev, struct kgd_mem *mem, uint64_t va = mem->va; struct kfd_mem_attachment *attachment[2] = {NULL, NULL}; struct amdgpu_bo *bo[2] = {NULL, NULL}; + bool same_hive = false; int i, ret; if (!va) { @@ -663,6 +850,24 @@ static int kfd_mem_attach(struct amdgpu_device *adev, struct kgd_mem *mem, return -EINVAL; } + /* Determine access to VRAM, MMIO and DOORBELL BOs of peer devices + * + * The access path of MMIO and DOORBELL BOs of is always over PCIe. + * In contrast the access path of VRAM BOs depens upon the type of + * link that connects the peer device. Access over PCIe is allowed + * if peer device has large BAR. In contrast, access over xGMI is + * allowed for both small and large BAR configurations of peer device + */ + if ((adev != bo_adev) && + ((mem->domain == AMDGPU_GEM_DOMAIN_VRAM) || + (mem->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_DOORBELL) || + (mem->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_MMIO_REMAP))) { + if (mem->domain == AMDGPU_GEM_DOMAIN_VRAM) + same_hive = amdgpu_xgmi_same_hive(adev, bo_adev); + if (!same_hive && !amdgpu_device_is_peer_accessible(bo_adev, adev)) + return -EINVAL; + } + for (i = 0; i <= is_aql; i++) { attachment[i] = kzalloc(sizeof(*attachment[i]), GFP_KERNEL); if (unlikely(!attachment[i])) { @@ -673,9 +878,9 @@ static int kfd_mem_attach(struct amdgpu_device *adev, struct kgd_mem *mem, pr_debug("\t add VA 0x%llx - 0x%llx to vm %p\n", va, va + bo_size, vm); - if (adev == bo_adev || - (amdgpu_ttm_tt_get_usermm(mem->bo->tbo.ttm) && adev->ram_is_direct_mapped) || - (mem->domain == AMDGPU_GEM_DOMAIN_VRAM && amdgpu_xgmi_same_hive(adev, bo_adev))) { + if ((adev == bo_adev && !(mem->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_MMIO_REMAP)) || + (amdgpu_ttm_tt_get_usermm(mem->bo->tbo.ttm) && adev->ram_is_direct_mapped) || + same_hive) { /* Mappings on the local GPU, or VRAM mappings in the * local hive, or userptr mapping IOMMU direct map mode * share the original BO @@ -691,26 +896,30 @@ static int kfd_mem_attach(struct amdgpu_device *adev, struct kgd_mem *mem, } else if (amdgpu_ttm_tt_get_usermm(mem->bo->tbo.ttm)) { /* Create an SG BO to DMA-map userptrs on other GPUs */ attachment[i]->type = KFD_MEM_ATT_USERPTR; - ret = kfd_mem_attach_userptr(adev, mem, &bo[i]); + ret = create_dmamap_sg_bo(adev, mem, &bo[i]); if (ret) goto unwind; - } else if (mem->domain == AMDGPU_GEM_DOMAIN_GTT && - mem->bo->tbo.type != ttm_bo_type_sg) { - /* GTT BOs use DMA-mapping ability of dynamic-attach - * DMA bufs. TODO: The same should work for VRAM on - * large-BAR GPUs. - */ + /* Handle DOORBELL BOs of peer devices and MMIO BOs of local and peer devices */ + } else if (mem->bo->tbo.type == ttm_bo_type_sg) { + WARN_ONCE(!(mem->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_DOORBELL || + mem->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_MMIO_REMAP), + "Handing invalid SG BO in ATTACH request"); + attachment[i]->type = KFD_MEM_ATT_SG; + ret = create_dmamap_sg_bo(adev, mem, &bo[i]); + if (ret) + goto unwind; + /* Enable acces to GTT and VRAM BOs of peer devices */ + } else if (mem->domain == AMDGPU_GEM_DOMAIN_GTT || + mem->domain == AMDGPU_GEM_DOMAIN_VRAM) { attachment[i]->type = KFD_MEM_ATT_DMABUF; ret = kfd_mem_attach_dmabuf(adev, mem, &bo[i]); if (ret) goto unwind; + pr_debug("Employ DMABUF mechanism to enable peer GPU access\n"); } else { - /* FIXME: Need to DMA-map other BO types: - * large-BAR VRAM, doorbells, MMIO remap - */ - attachment[i]->type = KFD_MEM_ATT_SHARED; - bo[i] = mem->bo; - drm_gem_object_get(&bo[i]->tbo.base); + WARN_ONCE(true, "Handling invalid ATTACH request"); + ret = -EINVAL; + goto unwind; } /* Add BO to VM internal data structures */ @@ -1111,24 +1320,6 @@ update_gpuvm_pte_failed: return ret; } -static struct sg_table *create_doorbell_sg(uint64_t addr, uint32_t size) -{ - struct sg_table *sg = kmalloc(sizeof(*sg), GFP_KERNEL); - - if (!sg) - return NULL; - if (sg_alloc_table(sg, 1, GFP_KERNEL)) { - kfree(sg); - return NULL; - } - sg->sgl->dma_address = addr; - sg->sgl->length = size; -#ifdef CONFIG_NEED_SG_DMA_LENGTH - sg->sgl->dma_length = size; -#endif - return sg; -} - static int process_validate_vms(struct amdkfd_process_info *process_info) { struct amdgpu_vm *peer_vm; @@ -1457,6 +1648,22 @@ out_unlock: return ret; } +size_t amdgpu_amdkfd_get_available_memory(struct amdgpu_device *adev) +{ + uint64_t reserved_for_pt = + ESTIMATE_PT_SIZE(amdgpu_amdkfd_total_mem_size); + size_t available; + + spin_lock(&kfd_mem_limit.mem_limit_lock); + available = adev->gmc.real_vram_size + - adev->kfd.vram_used + - atomic64_read(&adev->vram_pin_size) + - reserved_for_pt; + spin_unlock(&kfd_mem_limit.mem_limit_lock); + + return ALIGN_DOWN(available, VRAM_ALLOCATION_ALIGN); +} + int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu( struct amdgpu_device *adev, uint64_t va, uint64_t size, void *drm_priv, struct kgd_mem **mem, @@ -1497,7 +1704,7 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu( bo_type = ttm_bo_type_sg; if (size > UINT_MAX) return -EINVAL; - sg = create_doorbell_sg(*offset, size); + sg = create_sg_table(*offset, size); if (!sg) return -ENOMEM; } else { @@ -1907,8 +2114,69 @@ int amdgpu_amdkfd_gpuvm_sync_memory( return ret; } -int amdgpu_amdkfd_gpuvm_map_gtt_bo_to_kernel(struct amdgpu_device *adev, - struct kgd_mem *mem, void **kptr, uint64_t *size) +/** + * amdgpu_amdkfd_map_gtt_bo_to_gart - Map BO to GART and increment reference count + * @adev: Device to which allocated BO belongs + * @bo: Buffer object to be mapped + * + * Before return, bo reference count is incremented. To release the reference and unpin/ + * unmap the BO, call amdgpu_amdkfd_free_gtt_mem. + */ +int amdgpu_amdkfd_map_gtt_bo_to_gart(struct amdgpu_device *adev, struct amdgpu_bo *bo) +{ + int ret; + + ret = amdgpu_bo_reserve(bo, true); + if (ret) { + pr_err("Failed to reserve bo. ret %d\n", ret); + goto err_reserve_bo_failed; + } + + ret = amdgpu_bo_pin(bo, AMDGPU_GEM_DOMAIN_GTT); + if (ret) { + pr_err("Failed to pin bo. ret %d\n", ret); + goto err_pin_bo_failed; + } + + ret = amdgpu_ttm_alloc_gart(&bo->tbo); + if (ret) { + pr_err("Failed to bind bo to GART. ret %d\n", ret); + goto err_map_bo_gart_failed; + } + + amdgpu_amdkfd_remove_eviction_fence( + bo, bo->kfd_bo->process_info->eviction_fence); + + amdgpu_bo_unreserve(bo); + + bo = amdgpu_bo_ref(bo); + + return 0; + +err_map_bo_gart_failed: + amdgpu_bo_unpin(bo); +err_pin_bo_failed: + amdgpu_bo_unreserve(bo); +err_reserve_bo_failed: + + return ret; +} + +/** amdgpu_amdkfd_gpuvm_map_gtt_bo_to_kernel() - Map a GTT BO for kernel CPU access + * + * @mem: Buffer object to be mapped for CPU access + * @kptr[out]: pointer in kernel CPU address space + * @size[out]: size of the buffer + * + * Pins the BO and maps it for kernel CPU access. The eviction fence is removed + * from the BO, since pinned BOs cannot be evicted. The bo must remain on the + * validate_list, so the GPU mapping can be restored after a page table was + * evicted. + * + * Return: 0 on success, error code on failure + */ +int amdgpu_amdkfd_gpuvm_map_gtt_bo_to_kernel(struct kgd_mem *mem, + void **kptr, uint64_t *size) { int ret; struct amdgpu_bo *bo = mem->bo; @@ -1959,8 +2227,15 @@ bo_reserve_failed: return ret; } -void amdgpu_amdkfd_gpuvm_unmap_gtt_bo_from_kernel(struct amdgpu_device *adev, - struct kgd_mem *mem) +/** amdgpu_amdkfd_gpuvm_map_gtt_bo_to_kernel() - Unmap a GTT BO for kernel CPU access + * + * @mem: Buffer object to be unmapped for CPU access + * + * Removes the kernel CPU mapping and unpins the BO. It does not restore the + * eviction fence, so this function should only be used for cleanup before the + * BO is destroyed. + */ +void amdgpu_amdkfd_gpuvm_unmap_gtt_bo_from_kernel(struct kgd_mem *mem) { struct amdgpu_bo *bo = mem->bo; @@ -2072,7 +2347,7 @@ int amdgpu_amdkfd_evict_userptr(struct kgd_mem *mem, evicted_bos = atomic_inc_return(&process_info->evicted_bos); if (evicted_bos == 1) { /* First eviction, stop the queues */ - r = kgd2kfd_quiesce_mm(mm); + r = kgd2kfd_quiesce_mm(mm, KFD_QUEUE_EVICTION_TRIGGER_USERPTR); if (r) pr_err("Failed to quiesce KFD\n"); schedule_delayed_work(&process_info->restore_userptr_work, @@ -2346,13 +2621,16 @@ static void amdgpu_amdkfd_restore_userptr_worker(struct work_struct *work) unlock_out: mutex_unlock(&process_info->lock); - mmput(mm); - put_task_struct(usertask); /* If validation failed, reschedule another attempt */ - if (evicted_bos) + if (evicted_bos) { schedule_delayed_work(&process_info->restore_userptr_work, msecs_to_jiffies(AMDGPU_USERPTR_RESTORE_DELAY_MS)); + + kfd_smi_event_queue_restore_rescheduled(mm); + } + mmput(mm); + put_task_struct(usertask); } /** amdgpu_amdkfd_gpuvm_restore_process_bos - Restore all BOs for the given diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c index 7dc92ef36b2b..2ef5296216d6 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c @@ -110,7 +110,7 @@ static int amdgpu_ctx_priority_permit(struct drm_file *filp, return -EACCES; } -static enum amdgpu_gfx_pipe_priority amdgpu_ctx_prio_to_compute_prio(int32_t prio) +static enum amdgpu_gfx_pipe_priority amdgpu_ctx_prio_to_gfx_pipe_prio(int32_t prio) { switch (prio) { case AMDGPU_CTX_PRIORITY_HIGH: @@ -143,8 +143,9 @@ static unsigned int amdgpu_ctx_get_hw_prio(struct amdgpu_ctx *ctx, u32 hw_ip) ctx->init_priority : ctx->override_priority; switch (hw_ip) { + case AMDGPU_HW_IP_GFX: case AMDGPU_HW_IP_COMPUTE: - hw_prio = amdgpu_ctx_prio_to_compute_prio(ctx_prio); + hw_prio = amdgpu_ctx_prio_to_gfx_pipe_prio(ctx_prio); break; case AMDGPU_HW_IP_VCE: case AMDGPU_HW_IP_VCN_ENC: @@ -779,7 +780,7 @@ static void amdgpu_ctx_set_entity_priority(struct amdgpu_ctx *ctx, amdgpu_ctx_to_drm_sched_prio(priority)); /* set hw priority */ - if (hw_ip == AMDGPU_HW_IP_COMPUTE) { + if (hw_ip == AMDGPU_HW_IP_COMPUTE || hw_ip == AMDGPU_HW_IP_GFX) { hw_prio = amdgpu_ctx_get_hw_prio(ctx, hw_ip); hw_prio = array_index_nospec(hw_prio, AMDGPU_RING_PRIO_MAX); scheds = adev->gpu_sched[hw_ip][hw_prio].sched; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c index eedb12f6b8a3..f3ac7912c29c 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c @@ -1709,17 +1709,24 @@ static ssize_t amdgpu_reset_dump_register_list_write(struct file *f, i++; } while (len < size); + new = kmalloc_array(i, sizeof(uint32_t), GFP_KERNEL); + if (!new) { + ret = -ENOMEM; + goto error_free; + } ret = down_write_killable(&adev->reset_domain->sem); if (ret) goto error_free; swap(adev->reset_dump_reg_list, tmp); + swap(adev->reset_dump_reg_value, new); adev->num_regs = i; up_write(&adev->reset_domain->sem); ret = size; error_free: kfree(tmp); + kfree(new); return ret; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index 625424f3082b..64f37713b270 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -32,6 +32,9 @@ #include <linux/slab.h> #include <linux/iommu.h> #include <linux/pci.h> +#include <linux/devcoredump.h> +#include <generated/utsrelease.h> +#include <linux/pci-p2pdma.h> #include <drm/drm_atomic_helper.h> #include <drm/drm_probe_helper.h> @@ -1942,35 +1945,6 @@ static int amdgpu_device_parse_gpu_info_fw(struct amdgpu_device *adev) } switch (adev->asic_type) { -#ifdef CONFIG_DRM_AMDGPU_SI - case CHIP_VERDE: - case CHIP_TAHITI: - case CHIP_PITCAIRN: - case CHIP_OLAND: - case CHIP_HAINAN: -#endif -#ifdef CONFIG_DRM_AMDGPU_CIK - case CHIP_BONAIRE: - case CHIP_HAWAII: - case CHIP_KAVERI: - case CHIP_KABINI: - case CHIP_MULLINS: -#endif - case CHIP_TOPAZ: - case CHIP_TONGA: - case CHIP_FIJI: - case CHIP_POLARIS10: - case CHIP_POLARIS11: - case CHIP_POLARIS12: - case CHIP_VEGAM: - case CHIP_CARRIZO: - case CHIP_STONEY: - case CHIP_VEGA20: - case CHIP_ALDEBARAN: - case CHIP_SIENNA_CICHLID: - case CHIP_NAVY_FLOUNDER: - case CHIP_DIMGREY_CAVEFISH: - case CHIP_BEIGE_GOBY: default: return 0; case CHIP_VEGA10: @@ -3316,38 +3290,12 @@ bool amdgpu_device_asic_has_dc_support(enum amd_asic_type asic_type) case CHIP_MULLINS: /* * We have systems in the wild with these ASICs that require - * LVDS and VGA support which is not supported with DC. + * VGA support which is not supported with DC. * * Fallback to the non-DC driver here by default so as not to * cause regressions. */ return amdgpu_dc > 0; - case CHIP_HAWAII: - case CHIP_CARRIZO: - case CHIP_STONEY: - case CHIP_POLARIS10: - case CHIP_POLARIS11: - case CHIP_POLARIS12: - case CHIP_VEGAM: - case CHIP_TONGA: - case CHIP_FIJI: - case CHIP_VEGA10: - case CHIP_VEGA12: - case CHIP_VEGA20: -#if defined(CONFIG_DRM_AMD_DC_DCN) - case CHIP_RAVEN: - case CHIP_NAVI10: - case CHIP_NAVI14: - case CHIP_NAVI12: - case CHIP_RENOIR: - case CHIP_CYAN_SKILLFISH: - case CHIP_SIENNA_CICHLID: - case CHIP_NAVY_FLOUNDER: - case CHIP_DIMGREY_CAVEFISH: - case CHIP_BEIGE_GOBY: - case CHIP_VANGOGH: - case CHIP_YELLOW_CARP: -#endif default: return amdgpu_dc != 0; #else @@ -3369,7 +3317,7 @@ bool amdgpu_device_asic_has_dc_support(enum amd_asic_type asic_type) */ bool amdgpu_device_has_dc_support(struct amdgpu_device *adev) { - if (amdgpu_sriov_vf(adev) || + if (amdgpu_sriov_vf(adev) || adev->enable_virtual_display || (adev->harvest_ip_mask & AMD_HARVEST_IP_DMU_MASK)) return false; @@ -3667,14 +3615,6 @@ int amdgpu_device_init(struct amdgpu_device *adev, if (amdgpu_mcbp) DRM_INFO("MCBP is enabled\n"); - if (adev->asic_type >= CHIP_NAVI10) { - if (amdgpu_mes || amdgpu_mes_kiq) - adev->enable_mes = true; - - if (amdgpu_mes_kiq) - adev->enable_mes_kiq = true; - } - /* * Reset domain needs to be present early, before XGMI hive discovered * (if any) and intitialized to use reset sem and in_gpu reset flag @@ -4666,6 +4606,8 @@ int amdgpu_device_pre_asic_reset(struct amdgpu_device *adev, amdgpu_virt_fini_data_exchange(adev); } + amdgpu_fence_driver_isr_toggle(adev, true); + /* block all schedulers and reset given job's ring */ for (i = 0; i < AMDGPU_MAX_RINGS; ++i) { struct amdgpu_ring *ring = adev->rings[i]; @@ -4681,6 +4623,8 @@ int amdgpu_device_pre_asic_reset(struct amdgpu_device *adev, amdgpu_fence_driver_force_completion(ring); } + amdgpu_fence_driver_isr_toggle(adev, false); + if (job && job->vm) drm_sched_increase_karma(&job->base); @@ -4721,20 +4665,73 @@ int amdgpu_device_pre_asic_reset(struct amdgpu_device *adev, static int amdgpu_reset_reg_dumps(struct amdgpu_device *adev) { - uint32_t reg_value; int i; lockdep_assert_held(&adev->reset_domain->sem); dump_stack(); for (i = 0; i < adev->num_regs; i++) { - reg_value = RREG32(adev->reset_dump_reg_list[i]); - trace_amdgpu_reset_reg_dumps(adev->reset_dump_reg_list[i], reg_value); + adev->reset_dump_reg_value[i] = RREG32(adev->reset_dump_reg_list[i]); + trace_amdgpu_reset_reg_dumps(adev->reset_dump_reg_list[i], + adev->reset_dump_reg_value[i]); } return 0; } +#ifdef CONFIG_DEV_COREDUMP +static ssize_t amdgpu_devcoredump_read(char *buffer, loff_t offset, + size_t count, void *data, size_t datalen) +{ + struct drm_printer p; + struct amdgpu_device *adev = data; + struct drm_print_iterator iter; + int i; + + iter.data = buffer; + iter.offset = 0; + iter.start = offset; + iter.remain = count; + + p = drm_coredump_printer(&iter); + + drm_printf(&p, "**** AMDGPU Device Coredump ****\n"); + drm_printf(&p, "kernel: " UTS_RELEASE "\n"); + drm_printf(&p, "module: " KBUILD_MODNAME "\n"); + drm_printf(&p, "time: %lld.%09ld\n", adev->reset_time.tv_sec, adev->reset_time.tv_nsec); + if (adev->reset_task_info.pid) + drm_printf(&p, "process_name: %s PID: %d\n", + adev->reset_task_info.process_name, + adev->reset_task_info.pid); + + if (adev->reset_vram_lost) + drm_printf(&p, "VRAM is lost due to GPU reset!\n"); + if (adev->num_regs) { + drm_printf(&p, "AMDGPU register dumps:\nOffset: Value:\n"); + + for (i = 0; i < adev->num_regs; i++) + drm_printf(&p, "0x%08x: 0x%08x\n", + adev->reset_dump_reg_list[i], + adev->reset_dump_reg_value[i]); + } + + return count - iter.remain; +} + +static void amdgpu_devcoredump_free(void *data) +{ +} + +static void amdgpu_reset_capture_coredumpm(struct amdgpu_device *adev) +{ + struct drm_device *dev = adev_to_drm(adev); + + ktime_get_ts64(&adev->reset_time); + dev_coredumpm(dev->dev, THIS_MODULE, adev, 0, GFP_KERNEL, + amdgpu_devcoredump_read, amdgpu_devcoredump_free); +} +#endif + int amdgpu_do_asic_reset(struct list_head *device_list_handle, struct amdgpu_reset_context *reset_context) { @@ -4819,6 +4816,15 @@ int amdgpu_do_asic_reset(struct list_head *device_list_handle, goto out; vram_lost = amdgpu_device_check_vram_lost(tmp_adev); +#ifdef CONFIG_DEV_COREDUMP + tmp_adev->reset_vram_lost = vram_lost; + memset(&tmp_adev->reset_task_info, 0, + sizeof(tmp_adev->reset_task_info)); + if (reset_context->job && reset_context->job->vm) + tmp_adev->reset_task_info = + reset_context->job->vm->task_info; + amdgpu_reset_capture_coredumpm(tmp_adev); +#endif if (vram_lost) { DRM_INFO("VRAM is lost due to GPU reset!\n"); amdgpu_inc_vram_lost(tmp_adev); @@ -5004,16 +5010,32 @@ static void amdgpu_device_recheck_guilty_jobs( /* clear job's guilty and depend the folowing step to decide the real one */ drm_sched_reset_karma(s_job); - /* for the real bad job, it will be resubmitted twice, adding a dma_fence_get - * to make sure fence is balanced */ - dma_fence_get(s_job->s_fence->parent); drm_sched_resubmit_jobs_ext(&ring->sched, 1); + if (!s_job->s_fence->parent) { + DRM_WARN("Failed to get a HW fence for job!"); + continue; + } + ret = dma_fence_wait_timeout(s_job->s_fence->parent, false, ring->sched.timeout); if (ret == 0) { /* timeout */ DRM_ERROR("Found the real bad job! ring:%s, job_id:%llx\n", ring->sched.name, s_job->id); + + amdgpu_fence_driver_isr_toggle(adev, true); + + /* Clear this failed job from fence array */ + amdgpu_fence_driver_clear_job_fences(ring); + + amdgpu_fence_driver_isr_toggle(adev, false); + + /* Since the job won't signal and we go for + * another resubmit drop this parent pointer + */ + dma_fence_put(s_job->s_fence->parent); + s_job->s_fence->parent = NULL; + /* set guilty */ drm_sched_increase_karma(s_job); retry: @@ -5042,7 +5064,6 @@ retry: /* got the hw fence, signal finished fence */ atomic_dec(ring->sched.score); - dma_fence_put(s_job->s_fence->parent); dma_fence_get(&s_job->s_fence->finished); dma_fence_signal(&s_job->s_fence->finished); dma_fence_put(&s_job->s_fence->finished); @@ -5055,8 +5076,29 @@ retry: } } +static inline void amdgpu_device_stop_pending_resets(struct amdgpu_device *adev) +{ + struct amdgpu_ras *con = amdgpu_ras_get_context(adev); + +#if defined(CONFIG_DEBUG_FS) + if (!amdgpu_sriov_vf(adev)) + cancel_work(&adev->reset_work); +#endif + + if (adev->kfd.dev) + cancel_work(&adev->kfd.reset_work); + + if (amdgpu_sriov_vf(adev)) + cancel_work(&adev->virt.flr_work); + + if (con && adev->ras_enabled) + cancel_work(&con->recovery_work); + +} + + /** - * amdgpu_device_gpu_recover_imp - reset the asic and recover scheduler + * amdgpu_device_gpu_recover - reset the asic and recover scheduler * * @adev: amdgpu_device pointer * @job: which job trigger hang @@ -5066,7 +5108,7 @@ retry: * Returns 0 for success or an error on failure. */ -int amdgpu_device_gpu_recover_imp(struct amdgpu_device *adev, +int amdgpu_device_gpu_recover(struct amdgpu_device *adev, struct amdgpu_job *job) { struct list_head device_list, *device_list_handle = NULL; @@ -5164,7 +5206,7 @@ int amdgpu_device_gpu_recover_imp(struct amdgpu_device *adev, */ amdgpu_unregister_gpu_instance(tmp_adev); - drm_fb_helper_set_suspend_unlocked(adev_to_drm(adev)->fb_helper, true); + drm_fb_helper_set_suspend_unlocked(adev_to_drm(tmp_adev)->fb_helper, true); /* disable ras on ALL IPs */ if (!need_emergency_restart && @@ -5194,8 +5236,8 @@ int amdgpu_device_gpu_recover_imp(struct amdgpu_device *adev, * * job->base holds a reference to parent fence */ - if (job && job->base.s_fence->parent && - dma_fence_is_signaled(job->base.s_fence->parent)) { + if (job && (job->hw_fence.ops != NULL) && + dma_fence_is_signaled(&job->hw_fence)) { job_signaled = true; dev_info(adev->dev, "Guilty job already signaled, skipping HW reset"); goto skip_hw_reset; @@ -5210,6 +5252,12 @@ retry: /* Rest of adevs pre asic reset from XGMI hive. */ r, adev_to_drm(tmp_adev)->unique); tmp_adev->asic_reset_res = r; } + + /* + * Drop all pending non scheduler resets. Scheduler resets + * were already dropped during drm_sched_stop + */ + amdgpu_device_stop_pending_resets(tmp_adev); } tmp_vram_lost_counter = atomic_read(&((adev)->vram_lost_counter)); @@ -5308,38 +5356,9 @@ skip_sched_resume: if (r) dev_info(adev->dev, "GPU reset end with ret = %d\n", r); - return r; -} - -struct amdgpu_recover_work_struct { - struct work_struct base; - struct amdgpu_device *adev; - struct amdgpu_job *job; - int ret; -}; - -static void amdgpu_device_queue_gpu_recover_work(struct work_struct *work) -{ - struct amdgpu_recover_work_struct *recover_work = container_of(work, struct amdgpu_recover_work_struct, base); - - recover_work->ret = amdgpu_device_gpu_recover_imp(recover_work->adev, recover_work->job); -} -/* - * Serialize gpu recover into reset domain single threaded wq - */ -int amdgpu_device_gpu_recover(struct amdgpu_device *adev, - struct amdgpu_job *job) -{ - struct amdgpu_recover_work_struct work = {.adev = adev, .job = job}; - - INIT_WORK(&work.base, amdgpu_device_queue_gpu_recover_work); - - if (!amdgpu_reset_domain_schedule(adev->reset_domain, &work.base)) - return -EAGAIN; - - flush_work(&work.base); - return work.ret; + atomic_set(&adev->reset_domain->reset_res, r); + return r; } /** @@ -5490,6 +5509,36 @@ static void amdgpu_device_get_pcie_info(struct amdgpu_device *adev) } } +/** + * amdgpu_device_is_peer_accessible - Check peer access through PCIe BAR + * + * @adev: amdgpu_device pointer + * @peer_adev: amdgpu_device pointer for peer device trying to access @adev + * + * Return true if @peer_adev can access (DMA) @adev through the PCIe + * BAR, i.e. @adev is "large BAR" and the BAR matches the DMA mask of + * @peer_adev. + */ +bool amdgpu_device_is_peer_accessible(struct amdgpu_device *adev, + struct amdgpu_device *peer_adev) +{ +#ifdef CONFIG_HSA_AMD_P2P + uint64_t address_mask = peer_adev->dev->dma_mask ? + ~*peer_adev->dev->dma_mask : ~((1ULL << 32) - 1); + resource_size_t aper_limit = + adev->gmc.aper_base + adev->gmc.aper_size - 1; + bool p2p_access = !(pci_p2pdma_distance_many(adev->pdev, + &peer_adev->dev, 1, true) < 0); + + return pcie_p2p && p2p_access && (adev->gmc.visible_vram_size && + adev->gmc.real_vram_size == adev->gmc.visible_vram_size && + !(adev->gmc.aper_base & address_mask || + aper_limit & address_mask)); +#else + return false; +#endif +} + int amdgpu_device_baco_enter(struct drm_device *dev) { struct amdgpu_device *adev = drm_to_adev(dev); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c index 47f0344205ed..37234c2998d7 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c @@ -194,6 +194,7 @@ static int hw_id_map[MAX_HWIP] = { [UMC_HWIP] = UMC_HWID, [XGMI_HWIP] = XGMI_HWID, [DCI_HWIP] = DCI_HWID, + [PCIE_HWIP] = PCIE_HWID, }; static int amdgpu_discovery_read_binary_from_vram(struct amdgpu_device *adev, uint8_t *binary) @@ -1435,6 +1436,11 @@ static int amdgpu_discovery_get_vcn_info(struct amdgpu_device *adev) return -EINVAL; } + /* num_vcn_inst is currently limited to AMDGPU_MAX_VCN_INSTANCES + * which is smaller than VCN_INFO_TABLE_MAX_NUM_INSTANCES + * but that may change in the future with new GPUs so keep this + * check for defensive purposes. + */ if (adev->vcn.num_vcn_inst > VCN_INFO_TABLE_MAX_NUM_INSTANCES) { dev_err(adev->dev, "invalid vcn instances\n"); return -EINVAL; @@ -1450,6 +1456,9 @@ static int amdgpu_discovery_get_vcn_info(struct amdgpu_device *adev) switch (le16_to_cpu(vcn_info->v1.header.version_major)) { case 1: + /* num_vcn_inst is currently limited to AMDGPU_MAX_VCN_INSTANCES + * so this won't overflow. + */ for (v = 0; v < adev->vcn.num_vcn_inst; v++) { adev->vcn.vcn_codec_disable_mask[v] = le32_to_cpu(vcn_info->v1.instance_info[v].fuse_data.all_bits); @@ -1709,6 +1718,8 @@ static int amdgpu_discovery_set_display_ip_blocks(struct amdgpu_device *adev) case IP_VERSION(3, 1, 3): case IP_VERSION(3, 1, 5): case IP_VERSION(3, 1, 6): + case IP_VERSION(3, 2, 0): + case IP_VERSION(3, 2, 1): amdgpu_device_ip_block_add(adev, &dm_ip_block); break; default: @@ -1886,6 +1897,7 @@ static int amdgpu_discovery_set_mm_ip_blocks(struct amdgpu_device *adev) amdgpu_device_ip_block_add(adev, &vcn_v3_0_ip_block); break; case IP_VERSION(4, 0, 0): + case IP_VERSION(4, 0, 2): case IP_VERSION(4, 0, 4): amdgpu_device_ip_block_add(adev, &vcn_v4_0_ip_block); amdgpu_device_ip_block_add(adev, &jpeg_v4_0_ip_block); @@ -2321,6 +2333,7 @@ int amdgpu_discovery_set_ip_blocks(struct amdgpu_device *adev) switch (adev->ip_versions[LSDMA_HWIP][0]) { case IP_VERSION(6, 0, 0): + case IP_VERSION(6, 0, 1): case IP_VERSION(6, 0, 2): adev->lsdma.funcs = &lsdma_v6_0_funcs; break; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c index 17c9bbe0cbc5..97fff4727724 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c @@ -30,6 +30,9 @@ #include "atom.h" #include "amdgpu_connectors.h" #include "amdgpu_display.h" +#include "soc15_common.h" +#include "gc/gc_11_0_0_offset.h" +#include "gc/gc_11_0_0_sh_mask.h" #include <asm/div64.h> #include <linux/pci.h> @@ -663,6 +666,11 @@ static int convert_tiling_flags_to_modifier(struct amdgpu_framebuffer *afb) { struct amdgpu_device *adev = drm_to_adev(afb->base.dev); uint64_t modifier = 0; + int num_pipes = 0; + int num_pkrs = 0; + + num_pkrs = adev->gfx.config.gb_addr_config_fields.num_pkrs; + num_pipes = adev->gfx.config.gb_addr_config_fields.num_pipes; if (!afb->tiling_flags || !AMDGPU_TILING_GET(afb->tiling_flags, SWIZZLE_MODE)) { modifier = DRM_FORMAT_MOD_LINEAR; @@ -675,7 +683,7 @@ static int convert_tiling_flags_to_modifier(struct amdgpu_framebuffer *afb) int bank_xor_bits = 0; int packers = 0; int rb = 0; - int pipes = ilog2(adev->gfx.config.gb_addr_config_fields.num_pipes); + int pipes = ilog2(num_pipes); uint32_t dcc_offset = AMDGPU_TILING_GET(afb->tiling_flags, DCC_OFFSET_256B); switch (swizzle >> 2) { @@ -691,12 +699,17 @@ static int convert_tiling_flags_to_modifier(struct amdgpu_framebuffer *afb) case 6: /* 64 KiB _X */ block_size_bits = 16; break; + case 7: /* 256 KiB */ + block_size_bits = 18; + break; default: /* RESERVED or VAR */ return -EINVAL; } - if (adev->ip_versions[GC_HWIP][0] >= IP_VERSION(10, 3, 0)) + if (adev->ip_versions[GC_HWIP][0] >= IP_VERSION(11, 0, 0)) + version = AMD_FMT_MOD_TILE_VER_GFX11; + else if (adev->ip_versions[GC_HWIP][0] >= IP_VERSION(10, 3, 0)) version = AMD_FMT_MOD_TILE_VER_GFX10_RBPLUS; else if (adev->ip_versions[GC_HWIP][0] >= IP_VERSION(10, 0, 0)) version = AMD_FMT_MOD_TILE_VER_GFX10; @@ -707,19 +720,32 @@ static int convert_tiling_flags_to_modifier(struct amdgpu_framebuffer *afb) case 0: /* Z microtiling */ return -EINVAL; case 1: /* S microtiling */ - if (!has_xor) - version = AMD_FMT_MOD_TILE_VER_GFX9; + if (adev->ip_versions[GC_HWIP][0] < IP_VERSION(11, 0, 0)) { + if (!has_xor) + version = AMD_FMT_MOD_TILE_VER_GFX9; + } break; case 2: - if (!has_xor && afb->base.format->cpp[0] != 4) - version = AMD_FMT_MOD_TILE_VER_GFX9; + if (adev->ip_versions[GC_HWIP][0] < IP_VERSION(11, 0, 0)) { + if (!has_xor && afb->base.format->cpp[0] != 4) + version = AMD_FMT_MOD_TILE_VER_GFX9; + } break; case 3: break; } if (has_xor) { + if (num_pipes == num_pkrs && num_pkrs == 0) { + DRM_ERROR("invalid number of pipes and packers\n"); + return -EINVAL; + } + switch (version) { + case AMD_FMT_MOD_TILE_VER_GFX11: + pipe_xor_bits = min(block_size_bits - 8, pipes); + packers = ilog2(adev->gfx.config.gb_addr_config_fields.num_pkrs); + break; case AMD_FMT_MOD_TILE_VER_GFX10_RBPLUS: pipe_xor_bits = min(block_size_bits - 8, pipes); packers = min(block_size_bits - 8 - pipe_xor_bits, @@ -753,9 +779,10 @@ static int convert_tiling_flags_to_modifier(struct amdgpu_framebuffer *afb) u64 render_dcc_offset; /* Enable constant encode on RAVEN2 and later. */ - bool dcc_constant_encode = adev->asic_type > CHIP_RAVEN || + bool dcc_constant_encode = (adev->asic_type > CHIP_RAVEN || (adev->asic_type == CHIP_RAVEN && - adev->external_rev_id >= 0x81); + adev->external_rev_id >= 0x81)) && + adev->ip_versions[GC_HWIP][0] < IP_VERSION(11, 0, 0); int max_cblock_size = dcc_i64b ? AMD_FMT_MOD_DCC_BLOCK_64B : dcc_i128b ? AMD_FMT_MOD_DCC_BLOCK_128B : @@ -870,10 +897,11 @@ static unsigned int get_dcc_block_size(uint64_t modifier, bool rb_aligned, return max(10 + (rb_aligned ? (int)AMD_FMT_MOD_GET(RB, modifier) : 0), 12); } case AMD_FMT_MOD_TILE_VER_GFX10: - case AMD_FMT_MOD_TILE_VER_GFX10_RBPLUS: { + case AMD_FMT_MOD_TILE_VER_GFX10_RBPLUS: + case AMD_FMT_MOD_TILE_VER_GFX11: { int pipes_log2 = AMD_FMT_MOD_GET(PIPE_XOR_BITS, modifier); - if (ver == AMD_FMT_MOD_TILE_VER_GFX10_RBPLUS && pipes_log2 > 1 && + if (ver >= AMD_FMT_MOD_TILE_VER_GFX10_RBPLUS && pipes_log2 > 1 && AMD_FMT_MOD_GET(PACKERS, modifier) == pipes_log2) ++pipes_log2; @@ -966,6 +994,9 @@ static int amdgpu_display_verify_sizes(struct amdgpu_framebuffer *rfb) case DC_SW_64KB_S_X: block_size_log2 = 16; break; + case DC_SW_VAR_S_X: + block_size_log2 = 18; + break; default: drm_dbg_kms(rfb->base.dev, "Swizzle mode with unknown block size: %d\n", swizzle); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.h index 7b6d83e2b13c..560352f7c317 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.h @@ -35,8 +35,6 @@ #define amdgpu_display_add_encoder(adev, e, s, c) (adev)->mode_info.funcs->add_encoder((adev), (e), (s), (c)) #define amdgpu_display_add_connector(adev, ci, sd, ct, ib, coi, h, r) (adev)->mode_info.funcs->add_connector((adev), (ci), (sd), (ct), (ib), (coi), (h), (r)) -int amdgpu_display_freesync_ioctl(struct drm_device *dev, void *data, - struct drm_file *filp); void amdgpu_display_update_priority(struct amdgpu_device *adev); uint32_t amdgpu_display_supported_domains(struct amdgpu_device *adev, uint64_t bo_flags); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c index 8890300766a5..e3d139708160 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c @@ -803,6 +803,16 @@ module_param_named(no_queue_eviction_on_vm_fault, amdgpu_no_queue_eviction_on_vm #endif /** + * DOC: pcie_p2p (bool) + * Enable PCIe P2P (requires large-BAR). Default value: true (on) + */ +#ifdef CONFIG_HSA_AMD_P2P +bool pcie_p2p = true; +module_param(pcie_p2p, bool, 0444); +MODULE_PARM_DESC(pcie_p2p, "Enable PCIe P2P (requires large-BAR). (N = off, Y = on(default))"); +#endif + +/** * DOC: dcfeaturemask (uint) * Override display features enabled. See enum DC_FEATURE_MASK in drivers/gpu/drm/amd/include/amd_shared.h. * The default is the current set of stable display features. diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c index d16c8c1f72db..39597ab807d1 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c @@ -39,6 +39,7 @@ #include <drm/drm_drv.h> #include "amdgpu.h" #include "amdgpu_trace.h" +#include "amdgpu_reset.h" /* * Fences @@ -163,11 +164,16 @@ int amdgpu_fence_emit(struct amdgpu_ring *ring, struct dma_fence **f, struct amd if (job && job->job_run_counter) { /* reinit seq for resubmitted jobs */ fence->seqno = seq; + /* TO be inline with external fence creation and other drivers */ + dma_fence_get(fence); } else { - if (job) + if (job) { dma_fence_init(fence, &amdgpu_job_fence_ops, &ring->fence_drv.lock, adev->fence_context + ring->idx, seq); + /* Against remove in amdgpu_job_{free, free_cb} */ + dma_fence_get(fence); + } else dma_fence_init(fence, &amdgpu_fence_ops, &ring->fence_drv.lock, @@ -531,6 +537,24 @@ void amdgpu_fence_driver_hw_fini(struct amdgpu_device *adev) } } +/* Will either stop and flush handlers for amdgpu interrupt or reanble it */ +void amdgpu_fence_driver_isr_toggle(struct amdgpu_device *adev, bool stop) +{ + int i; + + for (i = 0; i < AMDGPU_MAX_RINGS; i++) { + struct amdgpu_ring *ring = adev->rings[i]; + + if (!ring || !ring->fence_drv.initialized || !ring->fence_drv.irq_src) + continue; + + if (stop) + disable_irq(adev->irq.irq); + else + enable_irq(adev->irq.irq); + } +} + void amdgpu_fence_driver_sw_fini(struct amdgpu_device *adev) { unsigned int i, j; @@ -594,8 +618,10 @@ void amdgpu_fence_driver_clear_job_fences(struct amdgpu_ring *ring) for (i = 0; i <= ring->fence_drv.num_fences_mask; i++) { ptr = &ring->fence_drv.fences[i]; old = rcu_dereference_protected(*ptr, 1); - if (old && old->ops == &amdgpu_job_fence_ops) + if (old && old->ops == &amdgpu_job_fence_ops) { RCU_INIT_POINTER(*ptr, NULL); + dma_fence_put(old); + } } } @@ -798,7 +824,10 @@ static int gpu_recover_get(void *data, u64 *val) return 0; } - *val = amdgpu_device_gpu_recover(adev, NULL); + if (amdgpu_reset_domain_schedule(adev->reset_domain, &adev->reset_work)) + flush_work(&adev->reset_work); + + *val = atomic_read(&adev->reset_domain->reset_res); pm_runtime_mark_last_busy(dev->dev); pm_runtime_put_autosuspend(dev->dev); @@ -810,6 +839,14 @@ DEFINE_SHOW_ATTRIBUTE(amdgpu_debugfs_fence_info); DEFINE_DEBUGFS_ATTRIBUTE(amdgpu_debugfs_gpu_recover_fops, gpu_recover_get, NULL, "%lld\n"); +static void amdgpu_debugfs_reset_work(struct work_struct *work) +{ + struct amdgpu_device *adev = container_of(work, struct amdgpu_device, + reset_work); + + amdgpu_device_gpu_recover(adev, NULL); +} + #endif void amdgpu_debugfs_fence_init(struct amdgpu_device *adev) @@ -821,9 +858,12 @@ void amdgpu_debugfs_fence_init(struct amdgpu_device *adev) debugfs_create_file("amdgpu_fence_info", 0444, root, adev, &amdgpu_debugfs_fence_info_fops); - if (!amdgpu_sriov_vf(adev)) + if (!amdgpu_sriov_vf(adev)) { + + INIT_WORK(&adev->reset_work, amdgpu_debugfs_reset_work); debugfs_create_file("amdgpu_gpu_recover", 0444, root, adev, &amdgpu_debugfs_gpu_recover_fops); + } #endif } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c index 16699158e00d..222d3d7ea076 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c @@ -142,7 +142,12 @@ void amdgpu_gfx_parse_disable_cu(unsigned *mask, unsigned max_se, unsigned max_s } } -static bool amdgpu_gfx_is_multipipe_capable(struct amdgpu_device *adev) +static bool amdgpu_gfx_is_graphics_multipipe_capable(struct amdgpu_device *adev) +{ + return amdgpu_async_gfx_ring && adev->gfx.me.num_pipe_per_me > 1; +} + +static bool amdgpu_gfx_is_compute_multipipe_capable(struct amdgpu_device *adev) { if (amdgpu_compute_multipipe != -1) { DRM_INFO("amdgpu: forcing compute pipe policy %d\n", @@ -158,6 +163,28 @@ static bool amdgpu_gfx_is_multipipe_capable(struct amdgpu_device *adev) return adev->gfx.mec.num_mec > 1; } +bool amdgpu_gfx_is_high_priority_graphics_queue(struct amdgpu_device *adev, + struct amdgpu_ring *ring) +{ + int queue = ring->queue; + int pipe = ring->pipe; + + /* Policy: use pipe1 queue0 as high priority graphics queue if we + * have more than one gfx pipe. + */ + if (amdgpu_gfx_is_graphics_multipipe_capable(adev) && + adev->gfx.num_gfx_rings > 1 && pipe == 1 && queue == 0) { + int me = ring->me; + int bit; + + bit = amdgpu_gfx_me_queue_to_bit(adev, me, pipe, queue); + if (ring == &adev->gfx.gfx_ring[bit]) + return true; + } + + return false; +} + bool amdgpu_gfx_is_high_priority_compute_queue(struct amdgpu_device *adev, struct amdgpu_ring *ring) { @@ -174,7 +201,7 @@ bool amdgpu_gfx_is_high_priority_compute_queue(struct amdgpu_device *adev, void amdgpu_gfx_compute_queue_acquire(struct amdgpu_device *adev) { int i, queue, pipe; - bool multipipe_policy = amdgpu_gfx_is_multipipe_capable(adev); + bool multipipe_policy = amdgpu_gfx_is_compute_multipipe_capable(adev); int max_queues_per_mec = min(adev->gfx.mec.num_pipe_per_mec * adev->gfx.mec.num_queue_per_pipe, adev->gfx.num_compute_rings); @@ -200,18 +227,24 @@ void amdgpu_gfx_compute_queue_acquire(struct amdgpu_device *adev) void amdgpu_gfx_graphics_queue_acquire(struct amdgpu_device *adev) { - int i, queue, me; - - for (i = 0; i < AMDGPU_MAX_GFX_QUEUES; ++i) { - queue = i % adev->gfx.me.num_queue_per_pipe; - me = (i / adev->gfx.me.num_queue_per_pipe) - / adev->gfx.me.num_pipe_per_me; + int i, queue, pipe; + bool multipipe_policy = amdgpu_gfx_is_graphics_multipipe_capable(adev); + int max_queues_per_me = adev->gfx.me.num_pipe_per_me * + adev->gfx.me.num_queue_per_pipe; - if (me >= adev->gfx.me.num_me) - break; + if (multipipe_policy) { /* policy: amdgpu owns the first queue per pipe at this stage * will extend to mulitple queues per pipe later */ - if (me == 0 && queue < 1) + for (i = 0; i < max_queues_per_me; i++) { + pipe = i % adev->gfx.me.num_pipe_per_me; + queue = (i / adev->gfx.me.num_pipe_per_me) % + adev->gfx.me.num_queue_per_pipe; + + set_bit(pipe * adev->gfx.me.num_queue_per_pipe + queue, + adev->gfx.me.queue_bitmap); + } + } else { + for (i = 0; i < max_queues_per_me; ++i) set_bit(i, adev->gfx.me.queue_bitmap); } @@ -666,6 +699,9 @@ uint32_t amdgpu_kiq_rreg(struct amdgpu_device *adev, uint32_t reg) if (amdgpu_device_skip_hw_access(adev)) return 0; + if (adev->mes.ring.sched.ready) + return amdgpu_mes_rreg(adev, reg); + BUG_ON(!ring->funcs->emit_rreg); spin_lock_irqsave(&kiq->ring_lock, flags); @@ -733,6 +769,11 @@ void amdgpu_kiq_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v) if (amdgpu_device_skip_hw_access(adev)) return; + if (adev->mes.ring.sched.ready) { + amdgpu_mes_wreg(adev, reg, v); + return; + } + spin_lock_irqsave(&kiq->ring_lock, flags); amdgpu_ring_alloc(ring, 32); amdgpu_ring_emit_wreg(ring, reg, v); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h index 53526ffb2ce1..23a696d38390 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h @@ -396,6 +396,8 @@ bool amdgpu_gfx_is_mec_queue_enabled(struct amdgpu_device *adev, int mec, int pipe, int queue); bool amdgpu_gfx_is_high_priority_compute_queue(struct amdgpu_device *adev, struct amdgpu_ring *ring); +bool amdgpu_gfx_is_high_priority_graphics_queue(struct amdgpu_device *adev, + struct amdgpu_ring *ring); int amdgpu_gfx_me_queue_to_bit(struct amdgpu_device *adev, int me, int pipe, int queue); void amdgpu_gfx_bit_to_me_queue(struct amdgpu_device *adev, int bit, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.c index 3df146579ad9..1d5af50331e4 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.c @@ -242,7 +242,7 @@ restart_ih: * @entry: IV entry * * Decodes the interrupt vector at the current rptr - * position and also advance the position for for Vega10 + * position and also advance the position for Vega10 * and later GPUs. */ void amdgpu_ih_decode_iv_helper(struct amdgpu_device *adev, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_imu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_imu.h index 56cf127cdf93..484e936812e4 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_imu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_imu.h @@ -24,12 +24,18 @@ #ifndef __AMDGPU_IMU_H__ #define __AMDGPU_IMU_H__ +enum imu_work_mode { + DEBUG_MODE, + MISSION_MODE +}; + struct amdgpu_imu_funcs { int (*init_microcode)(struct amdgpu_device *adev); int (*load_microcode)(struct amdgpu_device *adev); void (*setup_imu)(struct amdgpu_device *adev); int (*start_imu)(struct amdgpu_device *adev); void (*program_rlc_ram)(struct amdgpu_device *adev); + int (*wait_for_reset_status)(struct amdgpu_device *adev); }; struct imu_rlc_ram_golden { @@ -46,6 +52,7 @@ struct imu_rlc_ram_golden { struct amdgpu_imu { const struct amdgpu_imu_funcs *funcs; + enum imu_work_mode mode; }; #endif diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c index b4cf8717f554..89011bae7588 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c @@ -320,6 +320,7 @@ int amdgpu_irq_init(struct amdgpu_device *adev) if (!amdgpu_device_has_dc_support(adev)) { if (!adev->enable_virtual_display) /* Disable vblank IRQs aggressively for power-saving */ + /* XXX: can this be enabled for DC? */ adev_to_drm(adev)->vblank_disable_immediate = true; r = drm_vblank_init(adev_to_drm(adev), adev->mode_info.num_crtc); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c index 67f66f2f1809..22735790fe50 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c @@ -64,7 +64,7 @@ static enum drm_gpu_sched_stat amdgpu_job_timedout(struct drm_sched_job *s_job) ti.process_name, ti.tgid, ti.task_name, ti.pid); if (amdgpu_device_should_recover_gpu(ring->adev)) { - r = amdgpu_device_gpu_recover_imp(ring->adev, job); + r = amdgpu_device_gpu_recover(ring->adev, job); if (r) DRM_ERROR("GPU Recovery Failed: %d\n", r); } else { @@ -262,10 +262,6 @@ static struct dma_fence *amdgpu_job_run(struct drm_sched_job *sched_job) DRM_ERROR("Error scheduling IBs (%d)\n", r); } - if (!job->job_run_counter) - dma_fence_get(fence); - else if (finished->error < 0) - dma_fence_put(&job->hw_fence); job->job_run_counter++; amdgpu_job_free_resources(job); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c index 801f6fa692e9..6de63ea6687e 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c @@ -642,7 +642,6 @@ int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) atomic64_read(&adev->visible_pin_size), vram_gtt.vram_size); vram_gtt.gtt_size = ttm_manager_type(&adev->mman.bdev, TTM_PL_TT)->size; - vram_gtt.gtt_size *= PAGE_SIZE; vram_gtt.gtt_size -= atomic64_read(&adev->gart_pin_size); return copy_to_user(out, &vram_gtt, min((size_t)size, sizeof(vram_gtt))) ? -EFAULT : 0; @@ -675,7 +674,6 @@ int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) mem.cpu_accessible_vram.usable_heap_size * 3 / 4; mem.gtt.total_heap_size = gtt_man->size; - mem.gtt.total_heap_size *= PAGE_SIZE; mem.gtt.usable_heap_size = mem.gtt.total_heap_size - atomic64_read(&adev->gart_pin_size); mem.gtt.heap_usage = ttm_resource_manager_usage(gtt_man); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c index 69a70a0aaed9..bffde4aa6fe7 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c @@ -189,15 +189,29 @@ int amdgpu_mes_init(struct amdgpu_device *adev) r = amdgpu_device_wb_get(adev, &adev->mes.query_status_fence_offs); if (r) { + amdgpu_device_wb_free(adev, adev->mes.sch_ctx_offs); dev_err(adev->dev, "(%d) query_status_fence_offs wb alloc failed\n", r); - return r; + goto error_ids; } adev->mes.query_status_fence_gpu_addr = adev->wb.gpu_addr + (adev->mes.query_status_fence_offs * 4); adev->mes.query_status_fence_ptr = (uint64_t *)&adev->wb.wb[adev->mes.query_status_fence_offs]; + r = amdgpu_device_wb_get(adev, &adev->mes.read_val_offs); + if (r) { + amdgpu_device_wb_free(adev, adev->mes.sch_ctx_offs); + amdgpu_device_wb_free(adev, adev->mes.query_status_fence_offs); + dev_err(adev->dev, + "(%d) read_val_offs alloc failed\n", r); + goto error_ids; + } + adev->mes.read_val_gpu_addr = + adev->wb.gpu_addr + (adev->mes.read_val_offs * 4); + adev->mes.read_val_ptr = + (uint32_t *)&adev->wb.wb[adev->mes.read_val_offs]; + r = amdgpu_mes_doorbell_init(adev); if (r) goto error; @@ -206,6 +220,8 @@ int amdgpu_mes_init(struct amdgpu_device *adev) error: amdgpu_device_wb_free(adev, adev->mes.sch_ctx_offs); + amdgpu_device_wb_free(adev, adev->mes.query_status_fence_offs); + amdgpu_device_wb_free(adev, adev->mes.read_val_offs); error_ids: idr_destroy(&adev->mes.pasid_idr); idr_destroy(&adev->mes.gang_id_idr); @@ -218,6 +234,8 @@ error_ids: void amdgpu_mes_fini(struct amdgpu_device *adev) { amdgpu_device_wb_free(adev, adev->mes.sch_ctx_offs); + amdgpu_device_wb_free(adev, adev->mes.query_status_fence_offs); + amdgpu_device_wb_free(adev, adev->mes.read_val_offs); idr_destroy(&adev->mes.pasid_idr); idr_destroy(&adev->mes.gang_id_idr); @@ -675,8 +693,10 @@ int amdgpu_mes_add_hw_queue(struct amdgpu_device *adev, int gang_id, queue_input.doorbell_offset = qprops->doorbell_off; queue_input.mqd_addr = queue->mqd_gpu_addr; queue_input.wptr_addr = qprops->wptr_gpu_addr; + queue_input.wptr_mc_addr = qprops->wptr_mc_addr; queue_input.queue_type = qprops->queue_type; queue_input.paging = qprops->paging; + queue_input.is_kfd_process = 0; r = adev->mes.funcs->add_hw_queue(&adev->mes, &queue_input); if (r) { @@ -792,6 +812,118 @@ int amdgpu_mes_unmap_legacy_queue(struct amdgpu_device *adev, return r; } +uint32_t amdgpu_mes_rreg(struct amdgpu_device *adev, uint32_t reg) +{ + struct mes_misc_op_input op_input; + int r, val = 0; + + amdgpu_mes_lock(&adev->mes); + + op_input.op = MES_MISC_OP_READ_REG; + op_input.read_reg.reg_offset = reg; + op_input.read_reg.buffer_addr = adev->mes.read_val_gpu_addr; + + if (!adev->mes.funcs->misc_op) { + DRM_ERROR("mes rreg is not supported!\n"); + goto error; + } + + r = adev->mes.funcs->misc_op(&adev->mes, &op_input); + if (r) + DRM_ERROR("failed to read reg (0x%x)\n", reg); + else + val = *(adev->mes.read_val_ptr); + +error: + amdgpu_mes_unlock(&adev->mes); + return val; +} + +int amdgpu_mes_wreg(struct amdgpu_device *adev, + uint32_t reg, uint32_t val) +{ + struct mes_misc_op_input op_input; + int r; + + amdgpu_mes_lock(&adev->mes); + + op_input.op = MES_MISC_OP_WRITE_REG; + op_input.write_reg.reg_offset = reg; + op_input.write_reg.reg_value = val; + + if (!adev->mes.funcs->misc_op) { + DRM_ERROR("mes wreg is not supported!\n"); + r = -EINVAL; + goto error; + } + + r = adev->mes.funcs->misc_op(&adev->mes, &op_input); + if (r) + DRM_ERROR("failed to write reg (0x%x)\n", reg); + +error: + amdgpu_mes_unlock(&adev->mes); + return r; +} + +int amdgpu_mes_reg_write_reg_wait(struct amdgpu_device *adev, + uint32_t reg0, uint32_t reg1, + uint32_t ref, uint32_t mask) +{ + struct mes_misc_op_input op_input; + int r; + + amdgpu_mes_lock(&adev->mes); + + op_input.op = MES_MISC_OP_WRM_REG_WR_WAIT; + op_input.wrm_reg.reg0 = reg0; + op_input.wrm_reg.reg1 = reg1; + op_input.wrm_reg.ref = ref; + op_input.wrm_reg.mask = mask; + + if (!adev->mes.funcs->misc_op) { + DRM_ERROR("mes reg_write_reg_wait is not supported!\n"); + r = -EINVAL; + goto error; + } + + r = adev->mes.funcs->misc_op(&adev->mes, &op_input); + if (r) + DRM_ERROR("failed to reg_write_reg_wait\n"); + +error: + amdgpu_mes_unlock(&adev->mes); + return r; +} + +int amdgpu_mes_reg_wait(struct amdgpu_device *adev, uint32_t reg, + uint32_t val, uint32_t mask) +{ + struct mes_misc_op_input op_input; + int r; + + amdgpu_mes_lock(&adev->mes); + + op_input.op = MES_MISC_OP_WRM_REG_WAIT; + op_input.wrm_reg.reg0 = reg; + op_input.wrm_reg.ref = val; + op_input.wrm_reg.mask = mask; + + if (!adev->mes.funcs->misc_op) { + DRM_ERROR("mes reg wait is not supported!\n"); + r = -EINVAL; + goto error; + } + + r = adev->mes.funcs->misc_op(&adev->mes, &op_input); + if (r) + DRM_ERROR("failed to reg_write_reg_wait\n"); + +error: + amdgpu_mes_unlock(&adev->mes); + return r; +} + static void amdgpu_mes_ring_to_queue_props(struct amdgpu_device *adev, struct amdgpu_ring *ring, @@ -801,6 +933,8 @@ amdgpu_mes_ring_to_queue_props(struct amdgpu_device *adev, props->hqd_base_gpu_addr = ring->gpu_addr; props->rptr_gpu_addr = ring->rptr_gpu_addr; props->wptr_gpu_addr = ring->wptr_gpu_addr; + props->wptr_mc_addr = + ring->mes_ctx->meta_data_mc_addr + ring->wptr_offs; props->queue_size = ring->ring_size; props->eop_gpu_addr = ring->eop_gpu_addr; props->hqd_pipe_priority = AMDGPU_GFX_PIPE_PRIO_NORMAL; @@ -961,7 +1095,8 @@ int amdgpu_mes_ctx_alloc_meta_data(struct amdgpu_device *adev, r = amdgpu_bo_create_kernel(adev, sizeof(struct amdgpu_mes_ctx_meta_data), PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT, - &ctx_data->meta_data_obj, NULL, + &ctx_data->meta_data_obj, + &ctx_data->meta_data_mc_addr, &ctx_data->meta_data_ptr); if (!ctx_data->meta_data_obj) return -ENOMEM; @@ -975,7 +1110,9 @@ int amdgpu_mes_ctx_alloc_meta_data(struct amdgpu_device *adev, void amdgpu_mes_ctx_free_meta_data(struct amdgpu_mes_ctx_data *ctx_data) { if (ctx_data->meta_data_obj) - amdgpu_bo_free_kernel(&ctx_data->meta_data_obj, NULL, NULL); + amdgpu_bo_free_kernel(&ctx_data->meta_data_obj, + &ctx_data->meta_data_mc_addr, + &ctx_data->meta_data_ptr); } int amdgpu_mes_ctx_map_meta_data(struct amdgpu_device *adev, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h index 25590b301f25..3cec87e023b3 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h @@ -33,6 +33,13 @@ #define AMDGPU_MES_MAX_GFX_PIPES 2 #define AMDGPU_MES_MAX_SDMA_PIPES 2 +#define AMDGPU_MES_API_VERSION_SHIFT 12 +#define AMDGPU_MES_FEAT_VERSION_SHIFT 24 + +#define AMDGPU_MES_VERSION_MASK 0x00000fff +#define AMDGPU_MES_API_VERSION_MASK 0x00fff000 +#define AMDGPU_MES_FEAT_VERSION_MASK 0xff000000 + enum amdgpu_mes_priority_level { AMDGPU_MES_PRIORITY_LEVEL_LOW = 0, AMDGPU_MES_PRIORITY_LEVEL_NORMAL = 1, @@ -65,6 +72,9 @@ struct amdgpu_mes { spinlock_t queue_id_lock; + uint32_t sched_version; + uint32_t kiq_version; + uint32_t total_max_queue; uint32_t doorbell_id_offset; uint32_t max_doorbell_slices; @@ -109,6 +119,10 @@ struct amdgpu_mes { uint32_t query_status_fence_offs; uint64_t query_status_fence_gpu_addr; uint64_t *query_status_fence_ptr; + uint32_t read_val_offs; + uint64_t read_val_gpu_addr; + uint32_t *read_val_ptr; + uint32_t saved_flags; /* initialize kiq pipe */ @@ -166,6 +180,7 @@ struct amdgpu_mes_queue_properties { uint64_t hqd_base_gpu_addr; uint64_t rptr_gpu_addr; uint64_t wptr_gpu_addr; + uint64_t wptr_mc_addr; uint32_t queue_size; uint64_t eop_gpu_addr; uint32_t hqd_pipe_priority; @@ -198,12 +213,14 @@ struct mes_add_queue_input { uint32_t doorbell_offset; uint64_t mqd_addr; uint64_t wptr_addr; + uint64_t wptr_mc_addr; uint32_t queue_type; uint32_t paging; uint32_t gws_base; uint32_t gws_size; uint64_t tba_addr; uint64_t tma_addr; + uint32_t is_kfd_process; }; struct mes_remove_queue_input { @@ -233,6 +250,36 @@ struct mes_resume_gang_input { uint64_t gang_context_addr; }; +enum mes_misc_opcode { + MES_MISC_OP_WRITE_REG, + MES_MISC_OP_READ_REG, + MES_MISC_OP_WRM_REG_WAIT, + MES_MISC_OP_WRM_REG_WR_WAIT, +}; + +struct mes_misc_op_input { + enum mes_misc_opcode op; + + union { + struct { + uint32_t reg_offset; + uint64_t buffer_addr; + } read_reg; + + struct { + uint32_t reg_offset; + uint32_t reg_value; + } write_reg; + + struct { + uint32_t ref; + uint32_t mask; + uint32_t reg0; + uint32_t reg1; + } wrm_reg; + }; +}; + struct amdgpu_mes_funcs { int (*add_hw_queue)(struct amdgpu_mes *mes, struct mes_add_queue_input *input); @@ -248,6 +295,9 @@ struct amdgpu_mes_funcs { int (*resume_gang)(struct amdgpu_mes *mes, struct mes_resume_gang_input *input); + + int (*misc_op)(struct amdgpu_mes *mes, + struct mes_misc_op_input *input); }; #define amdgpu_mes_kiq_hw_init(adev) (adev)->mes.kiq_hw_init((adev)) @@ -280,6 +330,15 @@ int amdgpu_mes_unmap_legacy_queue(struct amdgpu_device *adev, enum amdgpu_unmap_queues_action action, u64 gpu_addr, u64 seq); +uint32_t amdgpu_mes_rreg(struct amdgpu_device *adev, uint32_t reg); +int amdgpu_mes_wreg(struct amdgpu_device *adev, + uint32_t reg, uint32_t val); +int amdgpu_mes_reg_wait(struct amdgpu_device *adev, uint32_t reg, + uint32_t val, uint32_t mask); +int amdgpu_mes_reg_write_reg_wait(struct amdgpu_device *adev, + uint32_t reg0, uint32_t reg1, + uint32_t ref, uint32_t mask); + int amdgpu_mes_add_ring(struct amdgpu_device *adev, int gang_id, int queue_type, int idx, struct amdgpu_mes_ctx_data *ctx_data, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes_ctx.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes_ctx.h index c000f656aae5..912a5be2ece6 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes_ctx.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes_ctx.h @@ -107,6 +107,7 @@ struct amdgpu_mes_ctx_meta_data { struct amdgpu_mes_ctx_data { struct amdgpu_bo *meta_data_obj; uint64_t meta_data_gpu_addr; + uint64_t meta_data_mc_addr; struct amdgpu_bo_va *meta_data_va; void *meta_data_ptr; uint32_t gang_ids[AMDGPU_HW_IP_DMA+1]; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h index 450d32ccd69d..d788a00043a5 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h @@ -350,15 +350,11 @@ struct amdgpu_mode_info { #define AMDGPU_MAX_BL_LEVEL 0xFF -#if defined(CONFIG_BACKLIGHT_CLASS_DEVICE) || defined(CONFIG_BACKLIGHT_CLASS_DEVICE_MODULE) - struct amdgpu_backlight_privdata { struct amdgpu_encoder *encoder; uint8_t negative; }; -#endif - struct amdgpu_atom_ss { uint16_t percentage; uint16_t percentage_divider; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c index dac202ae864d..285534bfc084 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c @@ -35,6 +35,8 @@ #include "amdgpu_xgmi.h" #include "ivsrcid/nbio/irqsrcs_nbif_7_4.h" #include "atom.h" +#include "amdgpu_reset.h" + #ifdef CONFIG_X86_MCE_AMD #include <asm/mce.h> @@ -2946,7 +2948,7 @@ int amdgpu_ras_reset_gpu(struct amdgpu_device *adev) struct amdgpu_ras *ras = amdgpu_ras_get_context(adev); if (atomic_cmpxchg(&ras->in_recovery, 0, 1) == 0) - schedule_work(&ras->recovery_work); + amdgpu_reset_domain_schedule(ras->adev->reset_domain, &ras->recovery_work); return 0; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h index b9a6fac2b8b2..bf5a95104ec1 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h @@ -328,10 +328,16 @@ struct ecc_info_per_ch { uint16_t ce_count_hi_chip; uint64_t mca_umc_status; uint64_t mca_umc_addr; + uint64_t mca_ceumc_addr; }; struct umc_ecc_info { struct ecc_info_per_ch ecc[MAX_UMC_CHANNEL_NUM]; + + /* Determine smu ecctable whether support + * record correctable error address + */ + int record_ce_addr_supported; }; struct amdgpu_ras { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.c index c80af0889773..32c86a0b145c 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.c @@ -132,6 +132,7 @@ struct amdgpu_reset_domain *amdgpu_reset_create_reset_domain(enum amdgpu_reset_d } atomic_set(&reset_domain->in_gpu_reset, 0); + atomic_set(&reset_domain->reset_res, 0); init_rwsem(&reset_domain->sem); return reset_domain; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.h index 1949dbe28a86..9e55a5d7a825 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.h @@ -82,6 +82,7 @@ struct amdgpu_reset_domain { enum amdgpu_reset_domain_type type; struct rw_semaphore sem; atomic_t in_gpu_reset; + atomic_t reset_res; }; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c index 13db99d653bd..d3558c34d406 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c @@ -543,12 +543,12 @@ static void amdgpu_ring_to_mqd_prop(struct amdgpu_ring *ring, */ prop->hqd_active = ring->funcs->type == AMDGPU_RING_TYPE_KIQ; - if (ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE) { - if (amdgpu_gfx_is_high_priority_compute_queue(adev, ring)) { - prop->hqd_pipe_priority = AMDGPU_GFX_PIPE_PRIO_HIGH; - prop->hqd_queue_priority = - AMDGPU_GFX_QUEUE_PRIORITY_MAXIMUM; - } + if ((ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE && + amdgpu_gfx_is_high_priority_compute_queue(adev, ring)) || + (ring->funcs->type == AMDGPU_RING_TYPE_GFX && + amdgpu_gfx_is_high_priority_graphics_queue(adev, ring))) { + prop->hqd_pipe_priority = AMDGPU_GFX_PIPE_PRIO_HIGH; + prop->hqd_queue_priority = AMDGPU_GFX_QUEUE_PRIORITY_MAXIMUM; } } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h index 7d89a52091c0..82c178a9033a 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h @@ -143,6 +143,7 @@ signed long amdgpu_fence_wait_polling(struct amdgpu_ring *ring, uint32_t wait_seq, signed long timeout); unsigned amdgpu_fence_count_emitted(struct amdgpu_ring *ring); +void amdgpu_fence_driver_isr_toggle(struct amdgpu_device *adev, bool stop); /* * Rings. diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c index be6f76a30ac6..3b4c19412625 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c @@ -1798,18 +1798,26 @@ int amdgpu_ttm_init(struct amdgpu_device *adev) DRM_INFO("amdgpu: %uM of VRAM memory ready\n", (unsigned) (adev->gmc.real_vram_size / (1024 * 1024))); - /* Compute GTT size, either bsaed on 3/4th the size of RAM size + /* Compute GTT size, either based on 1/2 the size of RAM size * or whatever the user passed on module init */ if (amdgpu_gtt_size == -1) { struct sysinfo si; si_meminfo(&si); - gtt_size = min(max((AMDGPU_DEFAULT_GTT_SIZE_MB << 20), - adev->gmc.mc_vram_size), - ((uint64_t)si.totalram * si.mem_unit * 3/4)); - } - else + /* Certain GL unit tests for large textures can cause problems + * with the OOM killer since there is no way to link this memory + * to a process. This was originally mitigated (but not necessarily + * eliminated) by limiting the GTT size. The problem is this limit + * is often too low for many modern games so just make the limit 1/2 + * of system memory which aligns with TTM. The OOM accounting needs + * to be addressed, but we shouldn't prevent common 3D applications + * from being usable just to potentially mitigate that corner case. + */ + gtt_size = max((AMDGPU_DEFAULT_GTT_SIZE_MB << 20), + (u64)si.totalram * si.mem_unit / 2); + } else { gtt_size = (uint64_t)amdgpu_gtt_size << 20; + } /* Initialize GTT memory pool */ r = amdgpu_gtt_mgr_init(adev, gtt_size); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c index ffa4c0d207db..c312577df596 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c @@ -486,26 +486,6 @@ amdgpu_ucode_get_load_type(struct amdgpu_device *adev, int load_type) case CHIP_POLARIS12: case CHIP_VEGAM: return AMDGPU_FW_LOAD_SMU; - case CHIP_VEGA10: - case CHIP_RAVEN: - case CHIP_VEGA12: - case CHIP_VEGA20: - case CHIP_ARCTURUS: - case CHIP_RENOIR: - case CHIP_NAVI10: - case CHIP_NAVI14: - case CHIP_NAVI12: - case CHIP_SIENNA_CICHLID: - case CHIP_NAVY_FLOUNDER: - case CHIP_VANGOGH: - case CHIP_DIMGREY_CAVEFISH: - case CHIP_ALDEBARAN: - case CHIP_BEIGE_GOBY: - case CHIP_YELLOW_CARP: - if (!load_type) - return AMDGPU_FW_LOAD_DIRECT; - else - return AMDGPU_FW_LOAD_PSP; case CHIP_CYAN_SKILLFISH: if (!(load_type && adev->apu_flags & AMD_APU_IS_CYAN_SKILLFISH2)) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c index aa7acfabf360..1bfdfb9207ac 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c @@ -329,6 +329,18 @@ int amdgpu_vcn_sw_fini(struct amdgpu_device *adev) return 0; } +/* from vcn4 and above, only unified queue is used */ +static bool amdgpu_vcn_using_unified_queue(struct amdgpu_ring *ring) +{ + struct amdgpu_device *adev = ring->adev; + bool ret = false; + + if (adev->ip_versions[UVD_HWIP][0] >= IP_VERSION(4, 0, 0)) + ret = true; + + return ret; +} + bool amdgpu_vcn_is_disabled_vcn(struct amdgpu_device *adev, enum vcn_ring_type type, uint32_t vcn_instance) { bool ret = false; @@ -718,19 +730,55 @@ error: return r; } +static uint32_t *amdgpu_vcn_unified_ring_ib_header(struct amdgpu_ib *ib, + uint32_t ib_pack_in_dw, bool enc) +{ + uint32_t *ib_checksum; + + ib->ptr[ib->length_dw++] = 0x00000010; /* single queue checksum */ + ib->ptr[ib->length_dw++] = 0x30000002; + ib_checksum = &ib->ptr[ib->length_dw++]; + ib->ptr[ib->length_dw++] = ib_pack_in_dw; + + ib->ptr[ib->length_dw++] = 0x00000010; /* engine info */ + ib->ptr[ib->length_dw++] = 0x30000001; + ib->ptr[ib->length_dw++] = enc ? 0x2 : 0x3; + ib->ptr[ib->length_dw++] = ib_pack_in_dw * sizeof(uint32_t); + + return ib_checksum; +} + +static void amdgpu_vcn_unified_ring_ib_checksum(uint32_t **ib_checksum, + uint32_t ib_pack_in_dw) +{ + uint32_t i; + uint32_t checksum = 0; + + for (i = 0; i < ib_pack_in_dw; i++) + checksum += *(*ib_checksum + 2 + i); + + **ib_checksum = checksum; +} + static int amdgpu_vcn_dec_sw_send_msg(struct amdgpu_ring *ring, struct amdgpu_ib *ib_msg, struct dma_fence **fence) { struct amdgpu_vcn_decode_buffer *decode_buffer = NULL; - const unsigned int ib_size_dw = 64; + unsigned int ib_size_dw = 64; struct amdgpu_device *adev = ring->adev; struct dma_fence *f = NULL; struct amdgpu_job *job; struct amdgpu_ib *ib; uint64_t addr = AMDGPU_GPU_PAGE_ALIGN(ib_msg->gpu_addr); + bool sq = amdgpu_vcn_using_unified_queue(ring); + uint32_t *ib_checksum; + uint32_t ib_pack_in_dw; int i, r; + if (sq) + ib_size_dw += 8; + r = amdgpu_job_alloc_with_ib(adev, ib_size_dw * 4, AMDGPU_IB_POOL_DIRECT, &job); if (r) @@ -739,6 +787,13 @@ static int amdgpu_vcn_dec_sw_send_msg(struct amdgpu_ring *ring, ib = &job->ibs[0]; ib->length_dw = 0; + /* single queue headers */ + if (sq) { + ib_pack_in_dw = sizeof(struct amdgpu_vcn_decode_buffer) / sizeof(uint32_t) + + 4 + 2; /* engine info + decoding ib in dw */ + ib_checksum = amdgpu_vcn_unified_ring_ib_header(ib, ib_pack_in_dw, false); + } + ib->ptr[ib->length_dw++] = sizeof(struct amdgpu_vcn_decode_buffer) + 8; ib->ptr[ib->length_dw++] = cpu_to_le32(AMDGPU_VCN_IB_FLAG_DECODE_BUFFER); decode_buffer = (struct amdgpu_vcn_decode_buffer *)&(ib->ptr[ib->length_dw]); @@ -752,6 +807,9 @@ static int amdgpu_vcn_dec_sw_send_msg(struct amdgpu_ring *ring, for (i = ib->length_dw; i < ib_size_dw; ++i) ib->ptr[i] = 0x0; + if (sq) + amdgpu_vcn_unified_ring_ib_checksum(&ib_checksum, ib_pack_in_dw); + r = amdgpu_job_submit_direct(job, ring, &f); if (r) goto err_free; @@ -838,13 +896,18 @@ static int amdgpu_vcn_enc_get_create_msg(struct amdgpu_ring *ring, uint32_t hand struct amdgpu_ib *ib_msg, struct dma_fence **fence) { - const unsigned ib_size_dw = 16; + unsigned int ib_size_dw = 16; struct amdgpu_job *job; struct amdgpu_ib *ib; struct dma_fence *f = NULL; + uint32_t *ib_checksum = NULL; uint64_t addr; + bool sq = amdgpu_vcn_using_unified_queue(ring); int i, r; + if (sq) + ib_size_dw += 8; + r = amdgpu_job_alloc_with_ib(ring->adev, ib_size_dw * 4, AMDGPU_IB_POOL_DIRECT, &job); if (r) @@ -854,6 +917,10 @@ static int amdgpu_vcn_enc_get_create_msg(struct amdgpu_ring *ring, uint32_t hand addr = AMDGPU_GPU_PAGE_ALIGN(ib_msg->gpu_addr); ib->length_dw = 0; + + if (sq) + ib_checksum = amdgpu_vcn_unified_ring_ib_header(ib, 0x11, true); + ib->ptr[ib->length_dw++] = 0x00000018; ib->ptr[ib->length_dw++] = 0x00000001; /* session info */ ib->ptr[ib->length_dw++] = handle; @@ -873,6 +940,9 @@ static int amdgpu_vcn_enc_get_create_msg(struct amdgpu_ring *ring, uint32_t hand for (i = ib->length_dw; i < ib_size_dw; ++i) ib->ptr[i] = 0x0; + if (sq) + amdgpu_vcn_unified_ring_ib_checksum(&ib_checksum, 0x11); + r = amdgpu_job_submit_direct(job, ring, &f); if (r) goto err; @@ -892,13 +962,18 @@ static int amdgpu_vcn_enc_get_destroy_msg(struct amdgpu_ring *ring, uint32_t han struct amdgpu_ib *ib_msg, struct dma_fence **fence) { - const unsigned ib_size_dw = 16; + unsigned int ib_size_dw = 16; struct amdgpu_job *job; struct amdgpu_ib *ib; struct dma_fence *f = NULL; + uint32_t *ib_checksum = NULL; uint64_t addr; + bool sq = amdgpu_vcn_using_unified_queue(ring); int i, r; + if (sq) + ib_size_dw += 8; + r = amdgpu_job_alloc_with_ib(ring->adev, ib_size_dw * 4, AMDGPU_IB_POOL_DIRECT, &job); if (r) @@ -908,6 +983,10 @@ static int amdgpu_vcn_enc_get_destroy_msg(struct amdgpu_ring *ring, uint32_t han addr = AMDGPU_GPU_PAGE_ALIGN(ib_msg->gpu_addr); ib->length_dw = 0; + + if (sq) + ib_checksum = amdgpu_vcn_unified_ring_ib_header(ib, 0x11, true); + ib->ptr[ib->length_dw++] = 0x00000018; ib->ptr[ib->length_dw++] = 0x00000001; ib->ptr[ib->length_dw++] = handle; @@ -927,6 +1006,9 @@ static int amdgpu_vcn_enc_get_destroy_msg(struct amdgpu_ring *ring, uint32_t han for (i = ib->length_dw; i < ib_size_dw; ++i) ib->ptr[i] = 0x0; + if (sq) + amdgpu_vcn_unified_ring_ib_checksum(&ib_checksum, 0x11); + r = amdgpu_job_submit_direct(job, ring, &f); if (r) goto err; @@ -977,6 +1059,20 @@ error: return r; } +int amdgpu_vcn_unified_ring_test_ib(struct amdgpu_ring *ring, long timeout) +{ + long r; + + r = amdgpu_vcn_enc_ring_test_ib(ring, timeout); + if (r) + goto error; + + r = amdgpu_vcn_dec_sw_ring_test_ib(ring, timeout); + +error: + return r; +} + enum amdgpu_ring_priority_level amdgpu_vcn_get_enc_ring_prio(int ring) { switch(ring) { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h index 6f90fcee0f9c..60c608144480 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h @@ -364,6 +364,7 @@ int amdgpu_vcn_dec_ring_test_ring(struct amdgpu_ring *ring); int amdgpu_vcn_dec_ring_test_ib(struct amdgpu_ring *ring, long timeout); int amdgpu_vcn_dec_sw_ring_test_ring(struct amdgpu_ring *ring); int amdgpu_vcn_dec_sw_ring_test_ib(struct amdgpu_ring *ring, long timeout); +int amdgpu_vcn_unified_ring_test_ib(struct amdgpu_ring *ring, long timeout); int amdgpu_vcn_enc_ring_test_ring(struct amdgpu_ring *ring); int amdgpu_vcn_enc_ring_test_ib(struct amdgpu_ring *ring, long timeout); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c index a8ecf04389b3..9be57389301b 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c @@ -76,6 +76,12 @@ void amdgpu_virt_kiq_reg_write_reg_wait(struct amdgpu_device *adev, unsigned long flags; uint32_t seq; + if (adev->mes.ring.sched.ready) { + amdgpu_mes_reg_write_reg_wait(adev, reg0, reg1, + ref, mask); + return; + } + spin_lock_irqsave(&kiq->ring_lock, flags); amdgpu_ring_alloc(ring, 32); amdgpu_ring_emit_reg_write_reg_wait(ring, reg0, reg1, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c index dc76d2b3ce52..8530befb2051 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c @@ -54,7 +54,7 @@ * (uncached system pages). * Each VM has an ID associated with it and there is a page table * associated with each VMID. When executing a command buffer, - * the kernel tells the the ring what VMID to use for that command + * the kernel tells the ring what VMID to use for that command * buffer. VMIDs are allocated dynamically as commands are submitted. * The userspace drivers maintain their own address space and the kernel * sets up their pages tables accordingly when they submit their diff --git a/drivers/gpu/drm/amd/amdgpu/atombios_encoders.c b/drivers/gpu/drm/amd/amdgpu/atombios_encoders.c index d4f5a584075d..fa7421afb9a6 100644 --- a/drivers/gpu/drm/amd/amdgpu/atombios_encoders.c +++ b/drivers/gpu/drm/amd/amdgpu/atombios_encoders.c @@ -118,8 +118,6 @@ amdgpu_atombios_encoder_set_backlight_level(struct amdgpu_encoder *amdgpu_encode } } -#if defined(CONFIG_BACKLIGHT_CLASS_DEVICE) || defined(CONFIG_BACKLIGHT_CLASS_DEVICE_MODULE) - static u8 amdgpu_atombios_encoder_backlight_level(struct backlight_device *bd) { u8 level; @@ -251,18 +249,6 @@ amdgpu_atombios_encoder_fini_backlight(struct amdgpu_encoder *amdgpu_encoder) } } -#else /* !CONFIG_BACKLIGHT_CLASS_DEVICE */ - -void amdgpu_atombios_encoder_init_backlight(struct amdgpu_encoder *encoder) -{ -} - -void amdgpu_atombios_encoder_fini_backlight(struct amdgpu_encoder *encoder) -{ -} - -#endif - bool amdgpu_atombios_encoder_is_digital(struct drm_encoder *encoder) { struct amdgpu_encoder *amdgpu_encoder = to_amdgpu_encoder(encoder); diff --git a/drivers/gpu/drm/amd/amdgpu/clearstate_gfx11.h b/drivers/gpu/drm/amd/amdgpu/clearstate_gfx11.h index f3852b59b1d6..a8b29d33c464 100644 --- a/drivers/gpu/drm/amd/amdgpu/clearstate_gfx11.h +++ b/drivers/gpu/drm/amd/amdgpu/clearstate_gfx11.h @@ -39,7 +39,7 @@ static const unsigned int gfx11_SECT_CONTEXT_def_1[] = 0x00000000, // DB_DEPTH_CLEAR 0x00000000, // PA_SC_SCREEN_SCISSOR_TL 0x40004000, // PA_SC_SCREEN_SCISSOR_BR - 0x00000000, // DB_DFSM_CONTROL + 0, // HOLE 0x00000000, // DB_RESERVED_REG_2 0x00000000, // DB_Z_INFO 0x00000000, // DB_STENCIL_INFO @@ -50,7 +50,7 @@ static const unsigned int gfx11_SECT_CONTEXT_def_1[] = 0x00000000, // DB_RESERVED_REG_1 0x00000000, // DB_RESERVED_REG_3 0x00000000, // DB_SPI_VRS_CENTER_LOCATION - 0x00000000, // DB_VRS_OVERRIDE_CNTL + 0, // HOLE 0x00000000, // DB_Z_READ_BASE_HI 0x00000000, // DB_STENCIL_READ_BASE_HI 0x00000000, // DB_Z_WRITE_BASE_HI @@ -270,29 +270,29 @@ static const unsigned int gfx11_SECT_CONTEXT_def_2[] = 0x00000000, // PA_SC_FSR_EN 0x00000000, // PA_SC_FSR_FBW_RECURSIONS_X 0x00000000, // PA_SC_FSR_FBW_RECURSIONS_Y - 0x00000000, // PA_SC_VRS_RATE_FEEDBACK_VIEW + 0, // HOLE 0x00000000, // PA_SC_VRS_OVERRIDE_CNTL 0x00000000, // PA_SC_VRS_RATE_FEEDBACK_BASE 0x00000000, // PA_SC_VRS_RATE_FEEDBACK_BASE_EXT 0x00000000, // PA_SC_VRS_RATE_FEEDBACK_SIZE_XY 0x00000000, // PA_SC_BINNER_OUTPUT_TIMEOUT_CNTL - 0, // HOLE + 0x00000000, // PA_SC_VRS_RATE_CACHE_CNTL 0, // HOLE 0, // HOLE 0x00000000, // PA_SC_VRS_RATE_BASE 0x00000000, // PA_SC_VRS_RATE_BASE_EXT 0x00000000, // PA_SC_VRS_RATE_SIZE_XY - 0x00000000, // PA_SC_VRS_RATE_VIEW - 0xffffffff, // VGT_MAX_VTX_INDX - 0x00000000, // VGT_MIN_VTX_INDX - 0x00000000, // VGT_INDX_OFFSET + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE 0x00000000, // VGT_MULTI_PRIM_IB_RESET_INDX 0x00550055, // CB_RMI_GL2_CACHE_CONTROL 0x00000000, // CB_BLEND_RED 0x00000000, // CB_BLEND_GREEN 0x00000000, // CB_BLEND_BLUE 0x00000000, // CB_BLEND_ALPHA - 0x00000000, // CB_DCC_CONTROL + 0x00000000, // CB_FDCC_CONTROL 0x00000000, // CB_COVERAGE_OUT_CONTROL 0x00000000, // DB_STENCIL_CONTROL 0x01000000, // DB_STENCILREFMASK @@ -470,8 +470,8 @@ static const unsigned int gfx11_SECT_CONTEXT_def_2[] = 0x00000000, // SPI_BARYC_CNTL 0, // HOLE 0x00000000, // SPI_TMPRING_SIZE - 0, // HOLE - 0, // HOLE + 0x00000000, // SPI_GFX_SCRATCH_BASE_LO + 0x00000000, // SPI_GFX_SCRATCH_BASE_HI 0, // HOLE 0, // HOLE 0, // HOLE @@ -545,7 +545,7 @@ static const unsigned int gfx11_SECT_CONTEXT_def_4[] = 0x00000000, // PA_STEREO_CNTL 0x00000000, // PA_STATE_STEREO_X 0x00000000, // PA_CL_VRS_CNTL - 0x00000000, // PA_SIDEBAND_REQUEST_DELAYS + 0, // HOLE 0, // HOLE 0, // HOLE 0, // HOLE @@ -658,30 +658,30 @@ static const unsigned int gfx11_SECT_CONTEXT_def_4[] = 0x00000000, // PA_SU_POINT_MINMAX 0x00000000, // PA_SU_LINE_CNTL 0x00000000, // PA_SC_LINE_STIPPLE - 0x00000000, // VGT_OUTPUT_PATH_CNTL - 0x00000000, // VGT_HOS_CNTL + 0, // HOLE + 0, // HOLE 0x00000000, // VGT_HOS_MAX_TESS_LEVEL 0x00000000, // VGT_HOS_MIN_TESS_LEVEL - 0x00000000, // VGT_HOS_REUSE_DEPTH - 0x00000000, // VGT_GROUP_PRIM_TYPE - 0x00000000, // VGT_GROUP_FIRST_DECR - 0x00000000, // VGT_GROUP_DECR - 0x00000000, // VGT_GROUP_VECT_0_CNTL - 0x00000000, // VGT_GROUP_VECT_1_CNTL - 0x00000000, // VGT_GROUP_VECT_0_FMT_CNTL - 0x00000000, // VGT_GROUP_VECT_1_FMT_CNTL - 0x00000000, // VGT_GS_MODE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE 0x00000000, // VGT_GS_ONCHIP_CNTL 0x00000000, // PA_SC_MODE_CNTL_0 0x00000000, // PA_SC_MODE_CNTL_1 0x00000000, // VGT_ENHANCE - 0x00000100, // VGT_GS_PER_ES - 0x00000080, // VGT_ES_PER_GS - 0x00000002, // VGT_GS_PER_VS - 0x00000000, // VGT_GSVS_RING_OFFSET_1 - 0x00000000, // VGT_GSVS_RING_OFFSET_2 - 0x00000000, // VGT_GSVS_RING_OFFSET_3 - 0x00000000, // VGT_GS_OUT_PRIM_TYPE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE 0x00000000, // IA_ENHANCE }; static const unsigned int gfx11_SECT_CONTEXT_def_5[] = @@ -695,37 +695,36 @@ static const unsigned int gfx11_SECT_CONTEXT_def_6[] = }; static const unsigned int gfx11_SECT_CONTEXT_def_7[] = { - 0x00000000, // VGT_MULTI_PRIM_IB_RESET_EN 0x00000000, // VGT_DRAW_PAYLOAD_CNTL 0, // HOLE - 0x00000000, // VGT_INSTANCE_STEP_RATE_0 - 0x00000000, // VGT_INSTANCE_STEP_RATE_1 - 0x000000ff, // IA_MULTI_VGT_PARAM + 0, // HOLE + 0, // HOLE + 0, // HOLE 0x00000000, // VGT_ESGS_RING_ITEMSIZE - 0x00000000, // VGT_GSVS_RING_ITEMSIZE + 0, // HOLE 0x00000000, // VGT_REUSE_OFF - 0x00000000, // VGT_VTX_CNT_EN + 0, // HOLE 0x00000000, // DB_HTILE_SURFACE 0x00000000, // DB_SRESULTS_COMPARE_STATE0 0x00000000, // DB_SRESULTS_COMPARE_STATE1 0x00000000, // DB_PRELOAD_CONTROL 0, // HOLE - 0x00000000, // VGT_STRMOUT_BUFFER_SIZE_0 - 0x00000000, // VGT_STRMOUT_VTX_STRIDE_0 0, // HOLE - 0x00000000, // VGT_STRMOUT_BUFFER_OFFSET_0 - 0x00000000, // VGT_STRMOUT_BUFFER_SIZE_1 - 0x00000000, // VGT_STRMOUT_VTX_STRIDE_1 0, // HOLE - 0x00000000, // VGT_STRMOUT_BUFFER_OFFSET_1 - 0x00000000, // VGT_STRMOUT_BUFFER_SIZE_2 - 0x00000000, // VGT_STRMOUT_VTX_STRIDE_2 0, // HOLE - 0x00000000, // VGT_STRMOUT_BUFFER_OFFSET_2 - 0x00000000, // VGT_STRMOUT_BUFFER_SIZE_3 - 0x00000000, // VGT_STRMOUT_VTX_STRIDE_3 0, // HOLE - 0x00000000, // VGT_STRMOUT_BUFFER_OFFSET_3 + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE 0, // HOLE 0, // HOLE 0, // HOLE @@ -745,10 +744,10 @@ static const unsigned int gfx11_SECT_CONTEXT_def_7[] = 0x00000000, // VGT_TESS_DISTRIBUTION 0x00000000, // VGT_SHADER_STAGES_EN 0x00000000, // VGT_LS_HS_CONFIG - 0x00000000, // VGT_GS_VERT_ITEMSIZE - 0x00000000, // VGT_GS_VERT_ITEMSIZE_1 - 0x00000000, // VGT_GS_VERT_ITEMSIZE_2 - 0x00000000, // VGT_GS_VERT_ITEMSIZE_3 + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE 0x00000000, // VGT_TF_PARAM 0x00000000, // DB_ALPHA_TO_MASK 0, // HOLE @@ -759,11 +758,22 @@ static const unsigned int gfx11_SECT_CONTEXT_def_7[] = 0x00000000, // PA_SU_POLY_OFFSET_BACK_SCALE 0x00000000, // PA_SU_POLY_OFFSET_BACK_OFFSET 0x00000000, // VGT_GS_INSTANCE_CNT - 0x00000000, // VGT_STRMOUT_CONFIG - 0x00000000, // VGT_STRMOUT_BUFFER_CONFIG -}; -static const unsigned int gfx11_SECT_CONTEXT_def_8[] = -{ + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE 0x00000000, // PA_SC_CENTROID_PRIORITY_0 0x00000000, // PA_SC_CENTROID_PRIORITY_1 0x00001000, // PA_SC_LINE_CNTL @@ -797,126 +807,126 @@ static const unsigned int gfx11_SECT_CONTEXT_def_8[] = 0x00100000, // PA_SC_CONSERVATIVE_RASTERIZATION_CNTL 0x00000000, // PA_SC_NGG_MODE_CNTL 0x00000000, // PA_SC_BINNER_CNTL_2 - 0x0000001e, // VGT_VERTEX_REUSE_BLOCK_CNTL - 0x00000020, // VGT_OUT_DEALLOC_CNTL + 0, // HOLE + 0, // HOLE 0x00000000, // CB_COLOR0_BASE - 0x00000000, // CB_COLOR0_PITCH - 0x00000000, // CB_COLOR0_SLICE + 0, // HOLE + 0, // HOLE 0x00000000, // CB_COLOR0_VIEW 0x00000000, // CB_COLOR0_INFO 0x00000000, // CB_COLOR0_ATTRIB - 0x00000000, // CB_COLOR0_DCC_CONTROL - 0x00000000, // CB_COLOR0_CMASK - 0x00000000, // CB_COLOR0_CMASK_SLICE - 0x00000000, // CB_COLOR0_FMASK - 0x00000000, // CB_COLOR0_FMASK_SLICE - 0x00000000, // CB_COLOR0_CLEAR_WORD0 - 0x00000000, // CB_COLOR0_CLEAR_WORD1 + 0x00000000, // CB_COLOR0_FDCC_CONTROL + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE 0x00000000, // CB_COLOR0_DCC_BASE 0, // HOLE 0x00000000, // CB_COLOR1_BASE - 0x00000000, // CB_COLOR1_PITCH - 0x00000000, // CB_COLOR1_SLICE + 0, // HOLE + 0, // HOLE 0x00000000, // CB_COLOR1_VIEW 0x00000000, // CB_COLOR1_INFO 0x00000000, // CB_COLOR1_ATTRIB - 0x00000000, // CB_COLOR1_DCC_CONTROL - 0x00000000, // CB_COLOR1_CMASK - 0x00000000, // CB_COLOR1_CMASK_SLICE - 0x00000000, // CB_COLOR1_FMASK - 0x00000000, // CB_COLOR1_FMASK_SLICE - 0x00000000, // CB_COLOR1_CLEAR_WORD0 - 0x00000000, // CB_COLOR1_CLEAR_WORD1 + 0x00000000, // CB_COLOR1_FDCC_CONTROL + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE 0x00000000, // CB_COLOR1_DCC_BASE 0, // HOLE 0x00000000, // CB_COLOR2_BASE - 0x00000000, // CB_COLOR2_PITCH - 0x00000000, // CB_COLOR2_SLICE + 0, // HOLE + 0, // HOLE 0x00000000, // CB_COLOR2_VIEW 0x00000000, // CB_COLOR2_INFO 0x00000000, // CB_COLOR2_ATTRIB - 0x00000000, // CB_COLOR2_DCC_CONTROL - 0x00000000, // CB_COLOR2_CMASK - 0x00000000, // CB_COLOR2_CMASK_SLICE - 0x00000000, // CB_COLOR2_FMASK - 0x00000000, // CB_COLOR2_FMASK_SLICE - 0x00000000, // CB_COLOR2_CLEAR_WORD0 - 0x00000000, // CB_COLOR2_CLEAR_WORD1 + 0x00000000, // CB_COLOR2_FDCC_CONTROL + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE 0x00000000, // CB_COLOR2_DCC_BASE 0, // HOLE 0x00000000, // CB_COLOR3_BASE - 0x00000000, // CB_COLOR3_PITCH - 0x00000000, // CB_COLOR3_SLICE + 0, // HOLE + 0, // HOLE 0x00000000, // CB_COLOR3_VIEW 0x00000000, // CB_COLOR3_INFO 0x00000000, // CB_COLOR3_ATTRIB - 0x00000000, // CB_COLOR3_DCC_CONTROL - 0x00000000, // CB_COLOR3_CMASK - 0x00000000, // CB_COLOR3_CMASK_SLICE - 0x00000000, // CB_COLOR3_FMASK - 0x00000000, // CB_COLOR3_FMASK_SLICE - 0x00000000, // CB_COLOR3_CLEAR_WORD0 - 0x00000000, // CB_COLOR3_CLEAR_WORD1 + 0x00000000, // CB_COLOR3_FDCC_CONTROL + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE 0x00000000, // CB_COLOR3_DCC_BASE 0, // HOLE 0x00000000, // CB_COLOR4_BASE - 0x00000000, // CB_COLOR4_PITCH - 0x00000000, // CB_COLOR4_SLICE + 0, // HOLE + 0, // HOLE 0x00000000, // CB_COLOR4_VIEW 0x00000000, // CB_COLOR4_INFO 0x00000000, // CB_COLOR4_ATTRIB - 0x00000000, // CB_COLOR4_DCC_CONTROL - 0x00000000, // CB_COLOR4_CMASK - 0x00000000, // CB_COLOR4_CMASK_SLICE - 0x00000000, // CB_COLOR4_FMASK - 0x00000000, // CB_COLOR4_FMASK_SLICE - 0x00000000, // CB_COLOR4_CLEAR_WORD0 - 0x00000000, // CB_COLOR4_CLEAR_WORD1 + 0x00000000, // CB_COLOR4_FDCC_CONTROL + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE 0x00000000, // CB_COLOR4_DCC_BASE 0, // HOLE 0x00000000, // CB_COLOR5_BASE - 0x00000000, // CB_COLOR5_PITCH - 0x00000000, // CB_COLOR5_SLICE + 0, // HOLE + 0, // HOLE 0x00000000, // CB_COLOR5_VIEW 0x00000000, // CB_COLOR5_INFO 0x00000000, // CB_COLOR5_ATTRIB - 0x00000000, // CB_COLOR5_DCC_CONTROL - 0x00000000, // CB_COLOR5_CMASK - 0x00000000, // CB_COLOR5_CMASK_SLICE - 0x00000000, // CB_COLOR5_FMASK - 0x00000000, // CB_COLOR5_FMASK_SLICE - 0x00000000, // CB_COLOR5_CLEAR_WORD0 - 0x00000000, // CB_COLOR5_CLEAR_WORD1 + 0x00000000, // CB_COLOR5_FDCC_CONTROL + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE 0x00000000, // CB_COLOR5_DCC_BASE 0, // HOLE 0x00000000, // CB_COLOR6_BASE - 0x00000000, // CB_COLOR6_PITCH - 0x00000000, // CB_COLOR6_SLICE + 0, // HOLE + 0, // HOLE 0x00000000, // CB_COLOR6_VIEW 0x00000000, // CB_COLOR6_INFO 0x00000000, // CB_COLOR6_ATTRIB - 0x00000000, // CB_COLOR6_DCC_CONTROL - 0x00000000, // CB_COLOR6_CMASK - 0x00000000, // CB_COLOR6_CMASK_SLICE - 0x00000000, // CB_COLOR6_FMASK - 0x00000000, // CB_COLOR6_FMASK_SLICE - 0x00000000, // CB_COLOR6_CLEAR_WORD0 - 0x00000000, // CB_COLOR6_CLEAR_WORD1 + 0x00000000, // CB_COLOR6_FDCC_CONTROL + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE 0x00000000, // CB_COLOR6_DCC_BASE 0, // HOLE 0x00000000, // CB_COLOR7_BASE - 0x00000000, // CB_COLOR7_PITCH - 0x00000000, // CB_COLOR7_SLICE + 0, // HOLE + 0, // HOLE 0x00000000, // CB_COLOR7_VIEW 0x00000000, // CB_COLOR7_INFO 0x00000000, // CB_COLOR7_ATTRIB - 0x00000000, // CB_COLOR7_DCC_CONTROL - 0x00000000, // CB_COLOR7_CMASK - 0x00000000, // CB_COLOR7_CMASK_SLICE - 0x00000000, // CB_COLOR7_FMASK - 0x00000000, // CB_COLOR7_FMASK_SLICE - 0x00000000, // CB_COLOR7_CLEAR_WORD0 - 0x00000000, // CB_COLOR7_CLEAR_WORD1 + 0x00000000, // CB_COLOR7_FDCC_CONTROL + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE 0x00000000, // CB_COLOR7_DCC_BASE 0, // HOLE 0x00000000, // CB_COLOR0_BASE_EXT @@ -927,22 +937,22 @@ static const unsigned int gfx11_SECT_CONTEXT_def_8[] = 0x00000000, // CB_COLOR5_BASE_EXT 0x00000000, // CB_COLOR6_BASE_EXT 0x00000000, // CB_COLOR7_BASE_EXT - 0x00000000, // CB_COLOR0_CMASK_BASE_EXT - 0x00000000, // CB_COLOR1_CMASK_BASE_EXT - 0x00000000, // CB_COLOR2_CMASK_BASE_EXT - 0x00000000, // CB_COLOR3_CMASK_BASE_EXT - 0x00000000, // CB_COLOR4_CMASK_BASE_EXT - 0x00000000, // CB_COLOR5_CMASK_BASE_EXT - 0x00000000, // CB_COLOR6_CMASK_BASE_EXT - 0x00000000, // CB_COLOR7_CMASK_BASE_EXT - 0x00000000, // CB_COLOR0_FMASK_BASE_EXT - 0x00000000, // CB_COLOR1_FMASK_BASE_EXT - 0x00000000, // CB_COLOR2_FMASK_BASE_EXT - 0x00000000, // CB_COLOR3_FMASK_BASE_EXT - 0x00000000, // CB_COLOR4_FMASK_BASE_EXT - 0x00000000, // CB_COLOR5_FMASK_BASE_EXT - 0x00000000, // CB_COLOR6_FMASK_BASE_EXT - 0x00000000, // CB_COLOR7_FMASK_BASE_EXT + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE 0x00000000, // CB_COLOR0_DCC_BASE_EXT 0x00000000, // CB_COLOR1_DCC_BASE_EXT 0x00000000, // CB_COLOR2_DCC_BASE_EXT @@ -976,8 +986,7 @@ static const struct cs_extent_def gfx11_SECT_CONTEXT_defs[] = {gfx11_SECT_CONTEXT_def_4, 0x0000a1ff, 158 }, {gfx11_SECT_CONTEXT_def_5, 0x0000a2a0, 2 }, {gfx11_SECT_CONTEXT_def_6, 0x0000a2a3, 1 }, - {gfx11_SECT_CONTEXT_def_7, 0x0000a2a5, 66 }, - {gfx11_SECT_CONTEXT_def_8, 0x0000a2f5, 203 }, + {gfx11_SECT_CONTEXT_def_7, 0x0000a2a6, 282 }, { 0, 0, 0 } }; static const struct cs_section_def gfx11_cs_data[] = { diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c index c5f46d264b23..abf2bf7f1a79 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c @@ -53,7 +53,7 @@ * 2. Async ring */ #define GFX10_NUM_GFX_RINGS_NV1X 1 -#define GFX10_NUM_GFX_RINGS_Sienna_Cichlid 1 +#define GFX10_NUM_GFX_RINGS_Sienna_Cichlid 2 #define GFX10_MEC_HPD_SIZE 2048 #define F32_CE_PROGRAM_RAM_SIZE 65536 @@ -3780,11 +3780,12 @@ static void gfx_v10_0_wait_reg_mem(struct amdgpu_ring *ring, int eng_sel, static int gfx_v10_0_ring_test_ring(struct amdgpu_ring *ring) { struct amdgpu_device *adev = ring->adev; + uint32_t scratch = SOC15_REG_OFFSET(GC, 0, mmSCRATCH_REG0); uint32_t tmp = 0; unsigned i; int r; - WREG32_SOC15(GC, 0, mmSCRATCH_REG0, 0xCAFEDEAD); + WREG32(scratch, 0xCAFEDEAD); r = amdgpu_ring_alloc(ring, 3); if (r) { DRM_ERROR("amdgpu: cp failed to lock ring %d (%d).\n", @@ -3793,13 +3794,13 @@ static int gfx_v10_0_ring_test_ring(struct amdgpu_ring *ring) } amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1)); - amdgpu_ring_write(ring, SOC15_REG_OFFSET(GC, 0, mmSCRATCH_REG0) - + amdgpu_ring_write(ring, scratch - PACKET3_SET_UCONFIG_REG_START); amdgpu_ring_write(ring, 0xDEADBEEF); amdgpu_ring_commit(ring); for (i = 0; i < adev->usec_timeout; i++) { - tmp = RREG32_SOC15(GC, 0, mmSCRATCH_REG0); + tmp = RREG32(scratch); if (tmp == 0xDEADBEEF) break; if (amdgpu_emu_mode == 1) @@ -4711,6 +4712,7 @@ static int gfx_v10_0_gfx_ring_init(struct amdgpu_device *adev, int ring_id, { struct amdgpu_ring *ring; unsigned int irq_type; + unsigned int hw_prio; ring = &adev->gfx.gfx_ring[ring_id]; @@ -4728,8 +4730,10 @@ static int gfx_v10_0_gfx_ring_init(struct amdgpu_device *adev, int ring_id, sprintf(ring->name, "gfx_%d.%d.%d", ring->me, ring->pipe, ring->queue); irq_type = AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP + ring->pipe; + hw_prio = amdgpu_gfx_is_high_priority_graphics_queue(adev, ring) ? + AMDGPU_GFX_PIPE_PRIO_HIGH : AMDGPU_GFX_PIPE_PRIO_NORMAL; return amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq, irq_type, - AMDGPU_RING_PRIO_DEFAULT, NULL); + hw_prio, NULL); } static int gfx_v10_0_compute_ring_init(struct amdgpu_device *adev, int ring_id, @@ -4791,7 +4795,7 @@ static int gfx_v10_0_sw_init(void *handle) case IP_VERSION(10, 3, 3): case IP_VERSION(10, 3, 7): adev->gfx.me.num_me = 1; - adev->gfx.me.num_pipe_per_me = 1; + adev->gfx.me.num_pipe_per_me = 2; adev->gfx.me.num_queue_per_pipe = 1; adev->gfx.mec.num_mec = 2; adev->gfx.mec.num_pipe_per_mec = 4; @@ -6581,6 +6585,24 @@ static void gfx_v10_0_kiq_setting(struct amdgpu_ring *ring) } } +static void gfx_v10_0_gfx_mqd_set_priority(struct amdgpu_device *adev, + struct v10_gfx_mqd *mqd, + struct amdgpu_mqd_prop *prop) +{ + bool priority = 0; + u32 tmp; + + /* set up default queue priority level + * 0x0 = low priority, 0x1 = high priority + */ + if (prop->hqd_pipe_priority == AMDGPU_GFX_PIPE_PRIO_HIGH) + priority = 1; + + tmp = RREG32_SOC15(GC, 0, mmCP_GFX_HQD_QUEUE_PRIORITY); + tmp = REG_SET_FIELD(tmp, CP_GFX_HQD_QUEUE_PRIORITY, PRIORITY_LEVEL, priority); + mqd->cp_gfx_hqd_queue_priority = tmp; +} + static int gfx_v10_0_gfx_mqd_init(struct amdgpu_device *adev, void *m, struct amdgpu_mqd_prop *prop) { @@ -6609,11 +6631,8 @@ static int gfx_v10_0_gfx_mqd_init(struct amdgpu_device *adev, void *m, tmp = REG_SET_FIELD(tmp, CP_GFX_HQD_VMID, VMID, 0); mqd->cp_gfx_hqd_vmid = 0; - /* set up default queue priority level - * 0x0 = low priority, 0x1 = high priority */ - tmp = RREG32_SOC15(GC, 0, mmCP_GFX_HQD_QUEUE_PRIORITY); - tmp = REG_SET_FIELD(tmp, CP_GFX_HQD_QUEUE_PRIORITY, PRIORITY_LEVEL, 0); - mqd->cp_gfx_hqd_queue_priority = tmp; + /* set up gfx queue priority */ + gfx_v10_0_gfx_mqd_set_priority(adev, mqd, prop); /* set up time quantum */ tmp = RREG32_SOC15(GC, 0, mmCP_GFX_HQD_QUANTUM); diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c index a4a6751b1e44..942d41a65f2f 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c @@ -4563,6 +4563,9 @@ static int gfx_v11_0_hw_init(void *handle) if (adev->gfx.imu.funcs->start_imu) adev->gfx.imu.funcs->start_imu(adev); } + + /* disable gpa mode in backdoor loading */ + gfx_v11_0_disable_gpa_mode(adev); } } @@ -4781,19 +4784,17 @@ static int gfx_v11_0_soft_reset(void *handle) /* Disable MEC parsing/prefetching */ gfx_v11_0_cp_compute_enable(adev, false); - if (grbm_soft_reset) { - tmp = RREG32_SOC15(GC, 0, regGRBM_SOFT_RESET); - tmp |= grbm_soft_reset; - dev_info(adev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp); - WREG32_SOC15(GC, 0, regGRBM_SOFT_RESET, tmp); - tmp = RREG32_SOC15(GC, 0, regGRBM_SOFT_RESET); + tmp = RREG32_SOC15(GC, 0, regGRBM_SOFT_RESET); + tmp |= grbm_soft_reset; + dev_info(adev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp); + WREG32_SOC15(GC, 0, regGRBM_SOFT_RESET, tmp); + tmp = RREG32_SOC15(GC, 0, regGRBM_SOFT_RESET); - udelay(50); + udelay(50); - tmp &= ~grbm_soft_reset; - WREG32_SOC15(GC, 0, regGRBM_SOFT_RESET, tmp); - tmp = RREG32_SOC15(GC, 0, regGRBM_SOFT_RESET); - } + tmp &= ~grbm_soft_reset; + WREG32_SOC15(GC, 0, regGRBM_SOFT_RESET, tmp); + tmp = RREG32_SOC15(GC, 0, regGRBM_SOFT_RESET); /* Wait a little for things to settle down */ udelay(50); @@ -6293,6 +6294,11 @@ static void gfx_v11_0_set_irq_funcs(struct amdgpu_device *adev) static void gfx_v11_0_set_imu_funcs(struct amdgpu_device *adev) { + if (adev->flags & AMD_IS_APU) + adev->gfx.imu.mode = MISSION_MODE; + else + adev->gfx.imu.mode = DEBUG_MODE; + adev->gfx.imu.funcs = &gfx_v11_0_imu_funcs; } diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c index 9077dfccaf3c..25d5743ae91b 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c @@ -456,7 +456,8 @@ static int gmc_v10_0_flush_gpu_tlb_pasid(struct amdgpu_device *adev, gmc_v10_0_flush_gpu_tlb(adev, vmid, AMDGPU_GFXHUB_0, flush_type); } - break; + if (!adev->enable_mes) + break; } } diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c index 7f4b480ae66e..edbdc0b934ea 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c @@ -37,6 +37,7 @@ #include "nbio_v4_3.h" #include "gfxhub_v3_0.h" #include "mmhub_v3_0.h" +#include "mmhub_v3_0_1.h" #include "mmhub_v3_0_2.h" #include "athub_v3_0.h" @@ -267,7 +268,7 @@ static void gmc_v11_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid, /* For SRIOV run time, driver shouldn't access the register through MMIO * Directly use kiq to do the vm invalidation instead */ - if (adev->gfx.kiq.ring.sched.ready && !adev->enable_mes && + if ((adev->gfx.kiq.ring.sched.ready || adev->mes.ring.sched.ready) && (amdgpu_sriov_runtime(adev) || !amdgpu_sriov_vf(adev))) { struct amdgpu_vmhub *hub = &adev->vmhub[vmhub]; const unsigned eng = 17; @@ -343,7 +344,6 @@ static int gmc_v11_0_flush_gpu_tlb_pasid(struct amdgpu_device *adev, gmc_v11_0_flush_gpu_tlb(adev, vmid, AMDGPU_GFXHUB_0, flush_type); } - break; } } @@ -548,6 +548,9 @@ static void gmc_v11_0_set_umc_funcs(struct amdgpu_device *adev) static void gmc_v11_0_set_mmhub_funcs(struct amdgpu_device *adev) { switch (adev->ip_versions[MMHUB_HWIP][0]) { + case IP_VERSION(3, 0, 1): + adev->mmhub.funcs = &mmhub_v3_0_1_funcs; + break; case IP_VERSION(3, 0, 2): adev->mmhub.funcs = &mmhub_v3_0_2_funcs; break; diff --git a/drivers/gpu/drm/amd/amdgpu/imu_v11_0.c b/drivers/gpu/drm/amd/amdgpu/imu_v11_0.c index d63d3f2b8a16..76383baa3929 100644 --- a/drivers/gpu/drm/amd/amdgpu/imu_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/imu_v11_0.c @@ -24,6 +24,7 @@ #include <linux/firmware.h> #include "amdgpu.h" #include "amdgpu_imu.h" +#include "amdgpu_dpm.h" #include "gc/gc_11_0_0_offset.h" #include "gc/gc_11_0_0_sh_mask.h" @@ -117,6 +118,25 @@ static int imu_v11_0_load_microcode(struct amdgpu_device *adev) return 0; } +static int imu_v11_0_wait_for_reset_status(struct amdgpu_device *adev) +{ + int i, imu_reg_val = 0; + + for (i = 0; i < adev->usec_timeout; i++) { + imu_reg_val = RREG32_SOC15(GC, 0, regGFX_IMU_GFX_RESET_CTRL); + if ((imu_reg_val & 0x1f) == 0x1f) + break; + udelay(1); + } + + if (i >= adev->usec_timeout) { + dev_err(adev->dev, "init imu: IMU start timeout\n"); + return -ETIMEDOUT; + } + + return 0; +} + static void imu_v11_0_setup(struct amdgpu_device *adev) { int imu_reg_val; @@ -125,9 +145,11 @@ static void imu_v11_0_setup(struct amdgpu_device *adev) WREG32_SOC15(GC, 0, regGFX_IMU_C2PMSG_ACCESS_CTRL0, 0xffffff); WREG32_SOC15(GC, 0, regGFX_IMU_C2PMSG_ACCESS_CTRL1, 0xffff); - imu_reg_val = RREG32_SOC15(GC, 0, regGFX_IMU_C2PMSG_16); - imu_reg_val |= 0x1; - WREG32_SOC15(GC, 0, regGFX_IMU_C2PMSG_16, imu_reg_val); + if (adev->gfx.imu.mode == DEBUG_MODE) { + imu_reg_val = RREG32_SOC15(GC, 0, regGFX_IMU_C2PMSG_16); + imu_reg_val |= 0x1; + WREG32_SOC15(GC, 0, regGFX_IMU_C2PMSG_16, imu_reg_val); + } //disble imu Rtavfs, SmsRepair, DfllBTC, and ClkB imu_reg_val = RREG32_SOC15(GC, 0, regGFX_IMU_SCRATCH_10); @@ -137,26 +159,17 @@ static void imu_v11_0_setup(struct amdgpu_device *adev) static int imu_v11_0_start(struct amdgpu_device *adev) { - int imu_reg_val, i; + int imu_reg_val; //Start IMU by set GFX_IMU_CORE_CTRL.CRESET = 0 imu_reg_val = RREG32_SOC15(GC, 0, regGFX_IMU_CORE_CTRL); imu_reg_val &= 0xfffffffe; WREG32_SOC15(GC, 0, regGFX_IMU_CORE_CTRL, imu_reg_val); - for (i = 0; i < adev->usec_timeout; i++) { - imu_reg_val = RREG32_SOC15(GC, 0, regGFX_IMU_GFX_RESET_CTRL); - if ((imu_reg_val & 0x1f) == 0x1f) - break; - udelay(1); - } + if (adev->flags & AMD_IS_APU) + amdgpu_dpm_set_gfx_power_up_by_imu(adev); - if (i >= adev->usec_timeout) { - dev_err(adev->dev, "init imu: IMU start timeout\n"); - return -ETIMEDOUT; - } - - return 0; + return imu_v11_0_wait_for_reset_status(adev); } static const struct imu_rlc_ram_golden imu_rlc_ram_golden_11[] = @@ -364,4 +377,5 @@ const struct amdgpu_imu_funcs gfx_v11_0_imu_funcs = { .setup_imu = imu_v11_0_setup, .start_imu = imu_v11_0_start, .program_rlc_ram = imu_v11_0_program_rlc_ram, + .wait_for_reset_status = imu_v11_0_wait_for_reset_status, }; diff --git a/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c b/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c index 7eee004cf3ce..5bdc2babb070 100644 --- a/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c @@ -156,7 +156,13 @@ static int mes_v11_0_add_hw_queue(struct amdgpu_mes *mes, input->gang_global_priority_level; mes_add_queue_pkt.doorbell_offset = input->doorbell_offset; mes_add_queue_pkt.mqd_addr = input->mqd_addr; - mes_add_queue_pkt.wptr_addr = input->wptr_addr; + + if (((adev->mes.sched_version & AMDGPU_MES_API_VERSION_MASK) >> + AMDGPU_MES_API_VERSION_SHIFT) >= 2) + mes_add_queue_pkt.wptr_addr = input->wptr_mc_addr; + else + mes_add_queue_pkt.wptr_addr = input->wptr_addr; + mes_add_queue_pkt.queue_type = convert_to_mes_queue_type(input->queue_type); mes_add_queue_pkt.paging = input->paging; @@ -165,6 +171,7 @@ static int mes_v11_0_add_hw_queue(struct amdgpu_mes *mes, mes_add_queue_pkt.gws_size = input->gws_size; mes_add_queue_pkt.trap_handler_addr = input->tba_addr; mes_add_queue_pkt.tma_addr = input->tma_addr; + mes_add_queue_pkt.is_kfd_process = input->is_kfd_process; mes_add_queue_pkt.api_status.api_completion_fence_addr = mes->ring.fence_drv.gpu_addr; @@ -267,6 +274,58 @@ static int mes_v11_0_query_sched_status(struct amdgpu_mes *mes) &mes_status_pkt, sizeof(mes_status_pkt)); } +static int mes_v11_0_misc_op(struct amdgpu_mes *mes, + struct mes_misc_op_input *input) +{ + union MESAPI__MISC misc_pkt; + + memset(&misc_pkt, 0, sizeof(misc_pkt)); + + misc_pkt.header.type = MES_API_TYPE_SCHEDULER; + misc_pkt.header.opcode = MES_SCH_API_MISC; + misc_pkt.header.dwsize = API_FRAME_SIZE_IN_DWORDS; + + switch (input->op) { + case MES_MISC_OP_READ_REG: + misc_pkt.opcode = MESAPI_MISC__READ_REG; + misc_pkt.read_reg.reg_offset = input->read_reg.reg_offset; + misc_pkt.read_reg.buffer_addr = input->read_reg.buffer_addr; + break; + case MES_MISC_OP_WRITE_REG: + misc_pkt.opcode = MESAPI_MISC__WRITE_REG; + misc_pkt.write_reg.reg_offset = input->write_reg.reg_offset; + misc_pkt.write_reg.reg_value = input->write_reg.reg_value; + break; + case MES_MISC_OP_WRM_REG_WAIT: + misc_pkt.opcode = MESAPI_MISC__WAIT_REG_MEM; + misc_pkt.wait_reg_mem.op = WRM_OPERATION__WAIT_REG_MEM; + misc_pkt.wait_reg_mem.reference = input->wrm_reg.ref; + misc_pkt.wait_reg_mem.mask = input->wrm_reg.mask; + misc_pkt.wait_reg_mem.reg_offset1 = input->wrm_reg.reg0; + misc_pkt.wait_reg_mem.reg_offset2 = 0; + break; + case MES_MISC_OP_WRM_REG_WR_WAIT: + misc_pkt.opcode = MESAPI_MISC__WAIT_REG_MEM; + misc_pkt.wait_reg_mem.op = WRM_OPERATION__WR_WAIT_WR_REG; + misc_pkt.wait_reg_mem.reference = input->wrm_reg.ref; + misc_pkt.wait_reg_mem.mask = input->wrm_reg.mask; + misc_pkt.wait_reg_mem.reg_offset1 = input->wrm_reg.reg0; + misc_pkt.wait_reg_mem.reg_offset2 = input->wrm_reg.reg1; + break; + default: + DRM_ERROR("unsupported misc op (%d) \n", input->op); + return -EINVAL; + } + + misc_pkt.api_status.api_completion_fence_addr = + mes->ring.fence_drv.gpu_addr; + misc_pkt.api_status.api_completion_fence_value = + ++mes->ring.fence_drv.sync_seq; + + return mes_v11_0_submit_pkt_and_poll_completion(mes, + &misc_pkt, sizeof(misc_pkt)); +} + static int mes_v11_0_set_hw_resources(struct amdgpu_mes *mes) { int i; @@ -312,6 +371,7 @@ static int mes_v11_0_set_hw_resources(struct amdgpu_mes *mes) mes_set_hw_res_pkt.disable_reset = 1; mes_set_hw_res_pkt.disable_mes_log = 1; mes_set_hw_res_pkt.use_different_vmid_compute = 1; + mes_set_hw_res_pkt.oversubscription_timer = 50; mes_set_hw_res_pkt.api_status.api_completion_fence_addr = mes->ring.fence_drv.gpu_addr; @@ -328,6 +388,7 @@ static const struct amdgpu_mes_funcs mes_v11_0_funcs = { .unmap_legacy_queue = mes_v11_0_unmap_legacy_queue, .suspend_gang = mes_v11_0_suspend_gang, .resume_gang = mes_v11_0_resume_gang, + .misc_op = mes_v11_0_misc_op, }; static int mes_v11_0_init_microcode(struct amdgpu_device *adev, @@ -858,6 +919,18 @@ static int mes_v11_0_queue_init(struct amdgpu_device *adev, mes_v11_0_queue_init_register(ring); } + /* get MES scheduler/KIQ versions */ + mutex_lock(&adev->srbm_mutex); + soc21_grbm_select(adev, 3, pipe, 0, 0); + + if (pipe == AMDGPU_MES_SCHED_PIPE) + adev->mes.sched_version = RREG32_SOC15(GC, 0, regCP_MES_GP3_LO); + else if (pipe == AMDGPU_MES_KIQ_PIPE && adev->enable_mes_kiq) + adev->mes.kiq_version = RREG32_SOC15(GC, 0, regCP_MES_GP3_LO); + + soc21_grbm_select(adev, 0, 0, 0, 0); + mutex_unlock(&adev->srbm_mutex); + return 0; } @@ -1120,6 +1193,7 @@ static int mes_v11_0_hw_init(void *handle) * with MES enabled. */ adev->gfx.kiq.ring.sched.ready = false; + adev->mes.ring.sched.ready = true; return 0; diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v3_0_1.c b/drivers/gpu/drm/amd/amdgpu/mmhub_v3_0_1.c new file mode 100644 index 000000000000..cac72ced94c8 --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v3_0_1.c @@ -0,0 +1,555 @@ +/* + * Copyright 2022 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#include "amdgpu.h" +#include "mmhub_v3_0_1.h" + +#include "mmhub/mmhub_3_0_1_offset.h" +#include "mmhub/mmhub_3_0_1_sh_mask.h" +#include "navi10_enum.h" + +#include "soc15_common.h" + +#define regMMVM_L2_CNTL3_DEFAULT 0x80100007 +#define regMMVM_L2_CNTL4_DEFAULT 0x000000c1 +#define regMMVM_L2_CNTL5_DEFAULT 0x00003fe0 + +static const char *mmhub_client_ids_v3_0_1[][2] = { + [0][0] = "VMC", + [4][0] = "DCEDMC", + [5][0] = "DCEVGA", + [6][0] = "MP0", + [7][0] = "MP1", + [8][0] = "MPIO", + [16][0] = "HDP", + [17][0] = "LSDMA", + [18][0] = "JPEG", + [19][0] = "VCNU0", + [21][0] = "VSCH", + [22][0] = "VCNU1", + [23][0] = "VCN1", + [32+20][0] = "VCN0", + [2][1] = "DBGUNBIO", + [3][1] = "DCEDWB", + [4][1] = "DCEDMC", + [5][1] = "DCEVGA", + [6][1] = "MP0", + [7][1] = "MP1", + [8][1] = "MPIO", + [10][1] = "DBGU0", + [11][1] = "DBGU1", + [12][1] = "DBGU2", + [13][1] = "DBGU3", + [14][1] = "XDP", + [15][1] = "OSSSYS", + [16][1] = "HDP", + [17][1] = "LSDMA", + [18][1] = "JPEG", + [19][1] = "VCNU0", + [20][1] = "VCN0", + [21][1] = "VSCH", + [22][1] = "VCNU1", + [23][1] = "VCN1", +}; + +static uint32_t mmhub_v3_0_1_get_invalidate_req(unsigned int vmid, + uint32_t flush_type) +{ + u32 req = 0; + + /* invalidate using legacy mode on vmid*/ + req = REG_SET_FIELD(req, MMVM_INVALIDATE_ENG0_REQ, + PER_VMID_INVALIDATE_REQ, 1 << vmid); + req = REG_SET_FIELD(req, MMVM_INVALIDATE_ENG0_REQ, FLUSH_TYPE, flush_type); + req = REG_SET_FIELD(req, MMVM_INVALIDATE_ENG0_REQ, INVALIDATE_L2_PTES, 1); + req = REG_SET_FIELD(req, MMVM_INVALIDATE_ENG0_REQ, INVALIDATE_L2_PDE0, 1); + req = REG_SET_FIELD(req, MMVM_INVALIDATE_ENG0_REQ, INVALIDATE_L2_PDE1, 1); + req = REG_SET_FIELD(req, MMVM_INVALIDATE_ENG0_REQ, INVALIDATE_L2_PDE2, 1); + req = REG_SET_FIELD(req, MMVM_INVALIDATE_ENG0_REQ, INVALIDATE_L1_PTES, 1); + req = REG_SET_FIELD(req, MMVM_INVALIDATE_ENG0_REQ, + CLEAR_PROTECTION_FAULT_STATUS_ADDR, 0); + + return req; +} + +static void +mmhub_v3_0_1_print_l2_protection_fault_status(struct amdgpu_device *adev, + uint32_t status) +{ + uint32_t cid, rw; + const char *mmhub_cid = NULL; + + cid = REG_GET_FIELD(status, + MMVM_L2_PROTECTION_FAULT_STATUS, CID); + rw = REG_GET_FIELD(status, + MMVM_L2_PROTECTION_FAULT_STATUS, RW); + + dev_err(adev->dev, + "MMVM_L2_PROTECTION_FAULT_STATUS:0x%08X\n", + status); + + switch (adev->ip_versions[MMHUB_HWIP][0]) { + case IP_VERSION(3, 0, 1): + mmhub_cid = mmhub_client_ids_v3_0_1[cid][rw]; + break; + default: + mmhub_cid = NULL; + break; + } + + dev_err(adev->dev, "\t Faulty UTCL2 client ID: %s (0x%x)\n", + mmhub_cid ? mmhub_cid : "unknown", cid); + dev_err(adev->dev, "\t MORE_FAULTS: 0x%lx\n", + REG_GET_FIELD(status, + MMVM_L2_PROTECTION_FAULT_STATUS, MORE_FAULTS)); + dev_err(adev->dev, "\t WALKER_ERROR: 0x%lx\n", + REG_GET_FIELD(status, + MMVM_L2_PROTECTION_FAULT_STATUS, WALKER_ERROR)); + dev_err(adev->dev, "\t PERMISSION_FAULTS: 0x%lx\n", + REG_GET_FIELD(status, + MMVM_L2_PROTECTION_FAULT_STATUS, PERMISSION_FAULTS)); + dev_err(adev->dev, "\t MAPPING_ERROR: 0x%lx\n", + REG_GET_FIELD(status, + MMVM_L2_PROTECTION_FAULT_STATUS, MAPPING_ERROR)); + dev_err(adev->dev, "\t RW: 0x%x\n", rw); +} + +static void mmhub_v3_0_1_setup_vm_pt_regs(struct amdgpu_device *adev, + uint32_t vmid, + uint64_t page_table_base) +{ + struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB_0]; + + WREG32_SOC15_OFFSET(MMHUB, 0, regMMVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32, + hub->ctx_addr_distance * vmid, + lower_32_bits(page_table_base)); + + WREG32_SOC15_OFFSET(MMHUB, 0, regMMVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_HI32, + hub->ctx_addr_distance * vmid, + upper_32_bits(page_table_base)); +} + +static void mmhub_v3_0_1_init_gart_aperture_regs(struct amdgpu_device *adev) +{ + uint64_t pt_base = amdgpu_gmc_pd_addr(adev->gart.bo); + + mmhub_v3_0_1_setup_vm_pt_regs(adev, 0, pt_base); + + WREG32_SOC15(MMHUB, 0, regMMVM_CONTEXT0_PAGE_TABLE_START_ADDR_LO32, + (u32)(adev->gmc.gart_start >> 12)); + WREG32_SOC15(MMHUB, 0, regMMVM_CONTEXT0_PAGE_TABLE_START_ADDR_HI32, + (u32)(adev->gmc.gart_start >> 44)); + + WREG32_SOC15(MMHUB, 0, regMMVM_CONTEXT0_PAGE_TABLE_END_ADDR_LO32, + (u32)(adev->gmc.gart_end >> 12)); + WREG32_SOC15(MMHUB, 0, regMMVM_CONTEXT0_PAGE_TABLE_END_ADDR_HI32, + (u32)(adev->gmc.gart_end >> 44)); +} + +static void mmhub_v3_0_1_init_system_aperture_regs(struct amdgpu_device *adev) +{ + uint64_t value; + uint32_t tmp; + + /* Program the AGP BAR */ + WREG32_SOC15(MMHUB, 0, regMMMC_VM_AGP_BASE, 0); + WREG32_SOC15(MMHUB, 0, regMMMC_VM_AGP_BOT, adev->gmc.agp_start >> 24); + WREG32_SOC15(MMHUB, 0, regMMMC_VM_AGP_TOP, adev->gmc.agp_end >> 24); + + /* + * the new L1 policy will block SRIOV guest from writing + * these regs, and they will be programed at host. + * so skip programing these regs. + */ + /* Program the system aperture low logical page number. */ + WREG32_SOC15(MMHUB, 0, regMMMC_VM_SYSTEM_APERTURE_LOW_ADDR, + adev->gmc.vram_start >> 18); + WREG32_SOC15(MMHUB, 0, regMMMC_VM_SYSTEM_APERTURE_HIGH_ADDR, + adev->gmc.vram_end >> 18); + + /* Set default page address. */ + value = adev->vram_scratch.gpu_addr - adev->gmc.vram_start + + adev->vm_manager.vram_base_offset; + WREG32_SOC15(MMHUB, 0, regMMMC_VM_SYSTEM_APERTURE_DEFAULT_ADDR_LSB, + (u32)(value >> 12)); + WREG32_SOC15(MMHUB, 0, regMMMC_VM_SYSTEM_APERTURE_DEFAULT_ADDR_MSB, + (u32)(value >> 44)); + + /* Program "protection fault". */ + WREG32_SOC15(MMHUB, 0, regMMVM_L2_PROTECTION_FAULT_DEFAULT_ADDR_LO32, + (u32)(adev->dummy_page_addr >> 12)); + WREG32_SOC15(MMHUB, 0, regMMVM_L2_PROTECTION_FAULT_DEFAULT_ADDR_HI32, + (u32)((u64)adev->dummy_page_addr >> 44)); + + tmp = RREG32_SOC15(MMHUB, 0, regMMVM_L2_PROTECTION_FAULT_CNTL2); + tmp = REG_SET_FIELD(tmp, MMVM_L2_PROTECTION_FAULT_CNTL2, + ACTIVE_PAGE_MIGRATION_PTE_READ_RETRY, 1); + WREG32_SOC15(MMHUB, 0, regMMVM_L2_PROTECTION_FAULT_CNTL2, tmp); +} + +static void mmhub_v3_0_1_init_tlb_regs(struct amdgpu_device *adev) +{ + uint32_t tmp; + + /* Setup TLB control */ + tmp = RREG32_SOC15(MMHUB, 0, regMMMC_VM_MX_L1_TLB_CNTL); + + tmp = REG_SET_FIELD(tmp, MMMC_VM_MX_L1_TLB_CNTL, ENABLE_L1_TLB, 1); + tmp = REG_SET_FIELD(tmp, MMMC_VM_MX_L1_TLB_CNTL, SYSTEM_ACCESS_MODE, 3); + tmp = REG_SET_FIELD(tmp, MMMC_VM_MX_L1_TLB_CNTL, + ENABLE_ADVANCED_DRIVER_MODEL, 1); + tmp = REG_SET_FIELD(tmp, MMMC_VM_MX_L1_TLB_CNTL, + SYSTEM_APERTURE_UNMAPPED_ACCESS, 0); + tmp = REG_SET_FIELD(tmp, MMMC_VM_MX_L1_TLB_CNTL, ECO_BITS, 0); + tmp = REG_SET_FIELD(tmp, MMMC_VM_MX_L1_TLB_CNTL, + MTYPE, MTYPE_UC); /* UC, uncached */ + + WREG32_SOC15(MMHUB, 0, regMMMC_VM_MX_L1_TLB_CNTL, tmp); +} + +static void mmhub_v3_0_1_init_cache_regs(struct amdgpu_device *adev) +{ + uint32_t tmp; + + /* Setup L2 cache */ + tmp = RREG32_SOC15(MMHUB, 0, regMMVM_L2_CNTL); + tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL, ENABLE_L2_CACHE, 1); + tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL, ENABLE_L2_FRAGMENT_PROCESSING, 0); + tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL, + ENABLE_DEFAULT_PAGE_OUT_TO_SYSTEM_MEMORY, 1); + /* XXX for emulation, Refer to closed source code.*/ + tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL, L2_PDE0_CACHE_TAG_GENERATION_MODE, + 0); + tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL, PDE_FAULT_CLASSIFICATION, 0); + tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL, CONTEXT1_IDENTITY_ACCESS_MODE, 1); + tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL, IDENTITY_MODE_FRAGMENT_SIZE, 0); + WREG32_SOC15(MMHUB, 0, regMMVM_L2_CNTL, tmp); + + tmp = RREG32_SOC15(MMHUB, 0, regMMVM_L2_CNTL2); + tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL2, INVALIDATE_ALL_L1_TLBS, 1); + tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL2, INVALIDATE_L2_CACHE, 1); + WREG32_SOC15(MMHUB, 0, regMMVM_L2_CNTL2, tmp); + + tmp = regMMVM_L2_CNTL3_DEFAULT; + if (adev->gmc.translate_further) { + tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL3, BANK_SELECT, 12); + tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL3, + L2_CACHE_BIGK_FRAGMENT_SIZE, 9); + } else { + tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL3, BANK_SELECT, 9); + tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL3, + L2_CACHE_BIGK_FRAGMENT_SIZE, 6); + } + WREG32_SOC15(MMHUB, 0, regMMVM_L2_CNTL3, tmp); + + tmp = regMMVM_L2_CNTL4_DEFAULT; + tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL4, VMC_TAP_PDE_REQUEST_PHYSICAL, 0); + tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL4, VMC_TAP_PTE_REQUEST_PHYSICAL, 0); + WREG32_SOC15(MMHUB, 0, regMMVM_L2_CNTL4, tmp); + + tmp = regMMVM_L2_CNTL5_DEFAULT; + tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL5, L2_CACHE_SMALLK_FRAGMENT_SIZE, 0); + WREG32_SOC15(GC, 0, regMMVM_L2_CNTL5, tmp); +} + +static void mmhub_v3_0_1_enable_system_domain(struct amdgpu_device *adev) +{ + uint32_t tmp; + + tmp = RREG32_SOC15(MMHUB, 0, regMMVM_CONTEXT0_CNTL); + tmp = REG_SET_FIELD(tmp, MMVM_CONTEXT0_CNTL, ENABLE_CONTEXT, 1); + tmp = REG_SET_FIELD(tmp, MMVM_CONTEXT0_CNTL, PAGE_TABLE_DEPTH, 0); + tmp = REG_SET_FIELD(tmp, MMVM_CONTEXT0_CNTL, + RETRY_PERMISSION_OR_INVALID_PAGE_FAULT, 0); + WREG32_SOC15(MMHUB, 0, regMMVM_CONTEXT0_CNTL, tmp); +} + +static void mmhub_v3_0_1_disable_identity_aperture(struct amdgpu_device *adev) +{ + WREG32_SOC15(MMHUB, 0, + regMMVM_L2_CONTEXT1_IDENTITY_APERTURE_LOW_ADDR_LO32, + 0xFFFFFFFF); + WREG32_SOC15(MMHUB, 0, + regMMVM_L2_CONTEXT1_IDENTITY_APERTURE_LOW_ADDR_HI32, + 0x0000000F); + + WREG32_SOC15(MMHUB, 0, + regMMVM_L2_CONTEXT1_IDENTITY_APERTURE_HIGH_ADDR_LO32, 0); + WREG32_SOC15(MMHUB, 0, + regMMVM_L2_CONTEXT1_IDENTITY_APERTURE_HIGH_ADDR_HI32, 0); + + WREG32_SOC15(MMHUB, 0, regMMVM_L2_CONTEXT_IDENTITY_PHYSICAL_OFFSET_LO32, + 0); + WREG32_SOC15(MMHUB, 0, regMMVM_L2_CONTEXT_IDENTITY_PHYSICAL_OFFSET_HI32, + 0); +} + +static void mmhub_v3_0_1_setup_vmid_config(struct amdgpu_device *adev) +{ + struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB_0]; + int i; + uint32_t tmp; + + for (i = 0; i <= 14; i++) { + tmp = RREG32_SOC15_OFFSET(MMHUB, 0, regMMVM_CONTEXT1_CNTL, i); + tmp = REG_SET_FIELD(tmp, MMVM_CONTEXT1_CNTL, ENABLE_CONTEXT, 1); + tmp = REG_SET_FIELD(tmp, MMVM_CONTEXT1_CNTL, PAGE_TABLE_DEPTH, + adev->vm_manager.num_level); + tmp = REG_SET_FIELD(tmp, MMVM_CONTEXT1_CNTL, + RANGE_PROTECTION_FAULT_ENABLE_DEFAULT, 1); + tmp = REG_SET_FIELD(tmp, MMVM_CONTEXT1_CNTL, + DUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT, + 1); + tmp = REG_SET_FIELD(tmp, MMVM_CONTEXT1_CNTL, + PDE0_PROTECTION_FAULT_ENABLE_DEFAULT, 1); + tmp = REG_SET_FIELD(tmp, MMVM_CONTEXT1_CNTL, + VALID_PROTECTION_FAULT_ENABLE_DEFAULT, 1); + tmp = REG_SET_FIELD(tmp, MMVM_CONTEXT1_CNTL, + READ_PROTECTION_FAULT_ENABLE_DEFAULT, 1); + tmp = REG_SET_FIELD(tmp, MMVM_CONTEXT1_CNTL, + WRITE_PROTECTION_FAULT_ENABLE_DEFAULT, 1); + tmp = REG_SET_FIELD(tmp, MMVM_CONTEXT1_CNTL, + EXECUTE_PROTECTION_FAULT_ENABLE_DEFAULT, 1); + tmp = REG_SET_FIELD(tmp, MMVM_CONTEXT1_CNTL, + PAGE_TABLE_BLOCK_SIZE, + adev->vm_manager.block_size - 9); + /* Send no-retry XNACK on fault to suppress VM fault storm. */ + tmp = REG_SET_FIELD(tmp, MMVM_CONTEXT1_CNTL, + RETRY_PERMISSION_OR_INVALID_PAGE_FAULT, + !amdgpu_noretry); + WREG32_SOC15_OFFSET(MMHUB, 0, regMMVM_CONTEXT1_CNTL, + i * hub->ctx_distance, tmp); + WREG32_SOC15_OFFSET(MMHUB, 0, regMMVM_CONTEXT1_PAGE_TABLE_START_ADDR_LO32, + i * hub->ctx_addr_distance, 0); + WREG32_SOC15_OFFSET(MMHUB, 0, regMMVM_CONTEXT1_PAGE_TABLE_START_ADDR_HI32, + i * hub->ctx_addr_distance, 0); + WREG32_SOC15_OFFSET(MMHUB, 0, regMMVM_CONTEXT1_PAGE_TABLE_END_ADDR_LO32, + i * hub->ctx_addr_distance, + lower_32_bits(adev->vm_manager.max_pfn - 1)); + WREG32_SOC15_OFFSET(MMHUB, 0, regMMVM_CONTEXT1_PAGE_TABLE_END_ADDR_HI32, + i * hub->ctx_addr_distance, + upper_32_bits(adev->vm_manager.max_pfn - 1)); + } + + hub->vm_cntx_cntl = tmp; +} + +static void mmhub_v3_0_1_program_invalidation(struct amdgpu_device *adev) +{ + struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB_0]; + unsigned i; + + for (i = 0; i < 18; ++i) { + WREG32_SOC15_OFFSET(MMHUB, 0, regMMVM_INVALIDATE_ENG0_ADDR_RANGE_LO32, + i * hub->eng_addr_distance, 0xffffffff); + WREG32_SOC15_OFFSET(MMHUB, 0, regMMVM_INVALIDATE_ENG0_ADDR_RANGE_HI32, + i * hub->eng_addr_distance, 0x1f); + } +} + +static int mmhub_v3_0_1_gart_enable(struct amdgpu_device *adev) +{ + /* GART Enable. */ + mmhub_v3_0_1_init_gart_aperture_regs(adev); + mmhub_v3_0_1_init_system_aperture_regs(adev); + mmhub_v3_0_1_init_tlb_regs(adev); + mmhub_v3_0_1_init_cache_regs(adev); + + mmhub_v3_0_1_enable_system_domain(adev); + mmhub_v3_0_1_disable_identity_aperture(adev); + mmhub_v3_0_1_setup_vmid_config(adev); + mmhub_v3_0_1_program_invalidation(adev); + + return 0; +} + +static void mmhub_v3_0_1_gart_disable(struct amdgpu_device *adev) +{ + struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB_0]; + u32 tmp; + u32 i; + + /* Disable all tables */ + for (i = 0; i < 16; i++) + WREG32_SOC15_OFFSET(MMHUB, 0, regMMVM_CONTEXT0_CNTL, + i * hub->ctx_distance, 0); + + /* Setup TLB control */ + tmp = RREG32_SOC15(MMHUB, 0, regMMMC_VM_MX_L1_TLB_CNTL); + tmp = REG_SET_FIELD(tmp, MMMC_VM_MX_L1_TLB_CNTL, ENABLE_L1_TLB, 0); + tmp = REG_SET_FIELD(tmp, MMMC_VM_MX_L1_TLB_CNTL, + ENABLE_ADVANCED_DRIVER_MODEL, 0); + WREG32_SOC15(MMHUB, 0, regMMMC_VM_MX_L1_TLB_CNTL, tmp); + + /* Setup L2 cache */ + tmp = RREG32_SOC15(MMHUB, 0, regMMVM_L2_CNTL); + tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL, ENABLE_L2_CACHE, 0); + WREG32_SOC15(MMHUB, 0, regMMVM_L2_CNTL, tmp); + WREG32_SOC15(MMHUB, 0, regMMVM_L2_CNTL3, 0); +} + +/** + * mmhub_v3_0_1_set_fault_enable_default - update GART/VM fault handling + * + * @adev: amdgpu_device pointer + * @value: true redirects VM faults to the default page + */ +static void mmhub_v3_0_1_set_fault_enable_default(struct amdgpu_device *adev, + bool value) +{ + u32 tmp; + + tmp = RREG32_SOC15(MMHUB, 0, regMMVM_L2_PROTECTION_FAULT_CNTL); + tmp = REG_SET_FIELD(tmp, MMVM_L2_PROTECTION_FAULT_CNTL, + RANGE_PROTECTION_FAULT_ENABLE_DEFAULT, value); + tmp = REG_SET_FIELD(tmp, MMVM_L2_PROTECTION_FAULT_CNTL, + PDE0_PROTECTION_FAULT_ENABLE_DEFAULT, value); + tmp = REG_SET_FIELD(tmp, MMVM_L2_PROTECTION_FAULT_CNTL, + PDE1_PROTECTION_FAULT_ENABLE_DEFAULT, value); + tmp = REG_SET_FIELD(tmp, MMVM_L2_PROTECTION_FAULT_CNTL, + PDE2_PROTECTION_FAULT_ENABLE_DEFAULT, value); + tmp = REG_SET_FIELD(tmp, MMVM_L2_PROTECTION_FAULT_CNTL, + TRANSLATE_FURTHER_PROTECTION_FAULT_ENABLE_DEFAULT, + value); + tmp = REG_SET_FIELD(tmp, MMVM_L2_PROTECTION_FAULT_CNTL, + NACK_PROTECTION_FAULT_ENABLE_DEFAULT, value); + tmp = REG_SET_FIELD(tmp, MMVM_L2_PROTECTION_FAULT_CNTL, + DUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT, value); + tmp = REG_SET_FIELD(tmp, MMVM_L2_PROTECTION_FAULT_CNTL, + VALID_PROTECTION_FAULT_ENABLE_DEFAULT, value); + tmp = REG_SET_FIELD(tmp, MMVM_L2_PROTECTION_FAULT_CNTL, + READ_PROTECTION_FAULT_ENABLE_DEFAULT, value); + tmp = REG_SET_FIELD(tmp, MMVM_L2_PROTECTION_FAULT_CNTL, + WRITE_PROTECTION_FAULT_ENABLE_DEFAULT, value); + tmp = REG_SET_FIELD(tmp, MMVM_L2_PROTECTION_FAULT_CNTL, + EXECUTE_PROTECTION_FAULT_ENABLE_DEFAULT, value); + if (!value) { + tmp = REG_SET_FIELD(tmp, MMVM_L2_PROTECTION_FAULT_CNTL, + CRASH_ON_NO_RETRY_FAULT, 1); + tmp = REG_SET_FIELD(tmp, MMVM_L2_PROTECTION_FAULT_CNTL, + CRASH_ON_RETRY_FAULT, 1); + } + WREG32_SOC15(MMHUB, 0, regMMVM_L2_PROTECTION_FAULT_CNTL, tmp); +} + +static const struct amdgpu_vmhub_funcs mmhub_v3_0_1_vmhub_funcs = { + .print_l2_protection_fault_status = mmhub_v3_0_1_print_l2_protection_fault_status, + .get_invalidate_req = mmhub_v3_0_1_get_invalidate_req, +}; + +static void mmhub_v3_0_1_init(struct amdgpu_device *adev) +{ + struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB_0]; + + hub->ctx0_ptb_addr_lo32 = + SOC15_REG_OFFSET(MMHUB, 0, + regMMVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32); + hub->ctx0_ptb_addr_hi32 = + SOC15_REG_OFFSET(MMHUB, 0, + regMMVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_HI32); + hub->vm_inv_eng0_sem = + SOC15_REG_OFFSET(MMHUB, 0, regMMVM_INVALIDATE_ENG0_SEM); + hub->vm_inv_eng0_req = + SOC15_REG_OFFSET(MMHUB, 0, regMMVM_INVALIDATE_ENG0_REQ); + hub->vm_inv_eng0_ack = + SOC15_REG_OFFSET(MMHUB, 0, regMMVM_INVALIDATE_ENG0_ACK); + hub->vm_context0_cntl = + SOC15_REG_OFFSET(MMHUB, 0, regMMVM_CONTEXT0_CNTL); + hub->vm_l2_pro_fault_status = + SOC15_REG_OFFSET(MMHUB, 0, regMMVM_L2_PROTECTION_FAULT_STATUS); + hub->vm_l2_pro_fault_cntl = + SOC15_REG_OFFSET(MMHUB, 0, regMMVM_L2_PROTECTION_FAULT_CNTL); + + hub->ctx_distance = regMMVM_CONTEXT1_CNTL - regMMVM_CONTEXT0_CNTL; + hub->ctx_addr_distance = regMMVM_CONTEXT1_PAGE_TABLE_BASE_ADDR_LO32 - + regMMVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32; + hub->eng_distance = regMMVM_INVALIDATE_ENG1_REQ - + regMMVM_INVALIDATE_ENG0_REQ; + hub->eng_addr_distance = regMMVM_INVALIDATE_ENG1_ADDR_RANGE_LO32 - + regMMVM_INVALIDATE_ENG0_ADDR_RANGE_LO32; + + hub->vm_cntx_cntl_vm_fault = MMVM_CONTEXT1_CNTL__RANGE_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK | + MMVM_CONTEXT1_CNTL__DUMMY_PAGE_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK | + MMVM_CONTEXT1_CNTL__PDE0_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK | + MMVM_CONTEXT1_CNTL__VALID_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK | + MMVM_CONTEXT1_CNTL__READ_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK | + MMVM_CONTEXT1_CNTL__WRITE_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK | + MMVM_CONTEXT1_CNTL__EXECUTE_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK; + + hub->vmhub_funcs = &mmhub_v3_0_1_vmhub_funcs; +} + +static u64 mmhub_v3_0_1_get_fb_location(struct amdgpu_device *adev) +{ + u64 base; + + base = RREG32_SOC15(MMHUB, 0, regMMMC_VM_FB_LOCATION_BASE); + base &= MMMC_VM_FB_LOCATION_BASE__FB_BASE_MASK; + base <<= 24; + + return base; +} + +static u64 mmhub_v3_0_1_get_mc_fb_offset(struct amdgpu_device *adev) +{ + return (u64)RREG32_SOC15(MMHUB, 0, regMMMC_VM_FB_OFFSET) << 24; +} + +static void mmhub_v3_0_1_update_medium_grain_clock_gating(struct amdgpu_device *adev, + bool enable) +{ + //TODO +} + +static void mmhub_v3_0_1_update_medium_grain_light_sleep(struct amdgpu_device *adev, + bool enable) +{ + //TODO +} + +static int mmhub_v3_0_1_set_clockgating(struct amdgpu_device *adev, + enum amd_clockgating_state state) +{ + mmhub_v3_0_1_update_medium_grain_clock_gating(adev, + state == AMD_CG_STATE_GATE); + mmhub_v3_0_1_update_medium_grain_light_sleep(adev, + state == AMD_CG_STATE_GATE); + return 0; +} + +static void mmhub_v3_0_1_get_clockgating(struct amdgpu_device *adev, u64 *flags) +{ + //TODO +} + +const struct amdgpu_mmhub_funcs mmhub_v3_0_1_funcs = { + .init = mmhub_v3_0_1_init, + .get_fb_location = mmhub_v3_0_1_get_fb_location, + .get_mc_fb_offset = mmhub_v3_0_1_get_mc_fb_offset, + .gart_enable = mmhub_v3_0_1_gart_enable, + .set_fault_enable_default = mmhub_v3_0_1_set_fault_enable_default, + .gart_disable = mmhub_v3_0_1_gart_disable, + .set_clockgating = mmhub_v3_0_1_set_clockgating, + .get_clockgating = mmhub_v3_0_1_get_clockgating, + .setup_vm_pt_regs = mmhub_v3_0_1_setup_vm_pt_regs, +}; diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v3_0_1.h b/drivers/gpu/drm/amd/amdgpu/mmhub_v3_0_1.h new file mode 100644 index 000000000000..4c1246735e7d --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v3_0_1.h @@ -0,0 +1,28 @@ +/* + * Copyright 2022 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ +#ifndef __MMHUB_V3_0_1_H__ +#define __MMHUB_V3_0_1_H__ + +extern const struct amdgpu_mmhub_funcs mmhub_v3_0_1_funcs; + +#endif diff --git a/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c b/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c index b81acf59870c..7ec5b5cf4bb9 100644 --- a/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c +++ b/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c @@ -284,7 +284,7 @@ flr_done: if (amdgpu_device_should_recover_gpu(adev) && (!amdgpu_device_has_job_running(adev) || adev->sdma_timeout == MAX_SCHEDULE_TIMEOUT)) - amdgpu_device_gpu_recover_imp(adev, NULL); + amdgpu_device_gpu_recover(adev, NULL); } static int xgpu_ai_set_mailbox_rcv_irq(struct amdgpu_device *adev, diff --git a/drivers/gpu/drm/amd/amdgpu/mxgpu_nv.c b/drivers/gpu/drm/amd/amdgpu/mxgpu_nv.c index 22c10b97ea81..e18b75c8fde6 100644 --- a/drivers/gpu/drm/amd/amdgpu/mxgpu_nv.c +++ b/drivers/gpu/drm/amd/amdgpu/mxgpu_nv.c @@ -311,7 +311,7 @@ flr_done: adev->gfx_timeout == MAX_SCHEDULE_TIMEOUT || adev->compute_timeout == MAX_SCHEDULE_TIMEOUT || adev->video_timeout == MAX_SCHEDULE_TIMEOUT)) - amdgpu_device_gpu_recover_imp(adev, NULL); + amdgpu_device_gpu_recover(adev, NULL); } static int xgpu_nv_set_mailbox_rcv_irq(struct amdgpu_device *adev, diff --git a/drivers/gpu/drm/amd/amdgpu/mxgpu_vi.c b/drivers/gpu/drm/amd/amdgpu/mxgpu_vi.c index 7b63d30b9b79..c5016a926331 100644 --- a/drivers/gpu/drm/amd/amdgpu/mxgpu_vi.c +++ b/drivers/gpu/drm/amd/amdgpu/mxgpu_vi.c @@ -523,7 +523,7 @@ static void xgpu_vi_mailbox_flr_work(struct work_struct *work) /* Trigger recovery due to world switch failure */ if (amdgpu_device_should_recover_gpu(adev)) - amdgpu_device_gpu_recover_imp(adev, NULL); + amdgpu_device_gpu_recover(adev, NULL); } static int xgpu_vi_set_mailbox_rcv_irq(struct amdgpu_device *adev, diff --git a/drivers/gpu/drm/amd/amdgpu/nbio_v2_3.c b/drivers/gpu/drm/amd/amdgpu/nbio_v2_3.c index 6cd1fb2eb913..34c610b9157d 100644 --- a/drivers/gpu/drm/amd/amdgpu/nbio_v2_3.c +++ b/drivers/gpu/drm/amd/amdgpu/nbio_v2_3.c @@ -547,7 +547,7 @@ static void nbio_v2_3_clear_doorbell_interrupt(struct amdgpu_device *adev) { uint32_t reg, reg_data; - if (adev->asic_type != CHIP_SIENNA_CICHLID) + if (adev->ip_versions[NBIO_HWIP][0] != IP_VERSION(3, 3, 0)) return; reg = RREG32_SOC15(NBIO, 0, mmBIF_RB_CNTL); diff --git a/drivers/gpu/drm/amd/amdgpu/nbio_v4_3.c b/drivers/gpu/drm/amd/amdgpu/nbio_v4_3.c index ed31d133f07a..982a89f841d5 100644 --- a/drivers/gpu/drm/amd/amdgpu/nbio_v4_3.c +++ b/drivers/gpu/drm/amd/amdgpu/nbio_v4_3.c @@ -240,8 +240,11 @@ static void nbio_v4_3_update_medium_grain_clock_gating(struct amdgpu_device *ade { uint32_t def, data; + if (enable && !(adev->cg_flags & AMD_CG_SUPPORT_BIF_MGCG)) + return; + def = data = RREG32_SOC15(NBIO, 0, regCPM_CONTROL); - if (enable && (adev->cg_flags & AMD_CG_SUPPORT_BIF_MGCG)) { + if (enable) { data |= (CPM_CONTROL__LCLK_DYN_GATE_ENABLE_MASK | CPM_CONTROL__TXCLK_DYN_GATE_ENABLE_MASK | CPM_CONTROL__TXCLK_LCNT_GATE_ENABLE_MASK | @@ -266,9 +269,12 @@ static void nbio_v4_3_update_medium_grain_light_sleep(struct amdgpu_device *adev { uint32_t def, data; + if (enable && !(adev->cg_flags & AMD_CG_SUPPORT_BIF_LS)) + return; + /* TODO: need update in future */ def = data = RREG32_SOC15(NBIO, 0, regPCIE_CNTL2); - if (enable && (adev->cg_flags & AMD_CG_SUPPORT_BIF_LS)) { + if (enable) { data |= PCIE_CNTL2__SLV_MEM_LS_EN_MASK; } else { data &= ~PCIE_CNTL2__SLV_MEM_LS_EN_MASK; @@ -344,6 +350,121 @@ static u32 nbio_v4_3_get_rom_offset(struct amdgpu_device *adev) return rom_offset; } +#ifdef CONFIG_PCIEASPM +static void nbio_v4_3_program_ltr(struct amdgpu_device *adev) +{ + uint32_t def, data; + + def = RREG32_SOC15(NBIO, 0, regRCC_EP_DEV0_0_EP_PCIE_TX_LTR_CNTL); + data = 0x35EB; + data &= ~EP_PCIE_TX_LTR_CNTL__LTR_PRIV_MSG_DIS_IN_PM_NON_D0_MASK; + data &= ~EP_PCIE_TX_LTR_CNTL__LTR_PRIV_RST_LTR_IN_DL_DOWN_MASK; + if (def != data) + WREG32_SOC15(NBIO, 0, regRCC_EP_DEV0_0_EP_PCIE_TX_LTR_CNTL, data); + + def = data = RREG32_SOC15(NBIO, 0, regRCC_STRAP0_RCC_BIF_STRAP2); + data &= ~RCC_BIF_STRAP2__STRAP_LTR_IN_ASPML1_DIS_MASK; + if (def != data) + WREG32_SOC15(NBIO, 0, regRCC_STRAP0_RCC_BIF_STRAP2, data); + + def = data = RREG32_SOC15(NBIO, 0, regBIF_CFG_DEV0_EPF0_DEVICE_CNTL2); + if (adev->pdev->ltr_path) + data |= BIF_CFG_DEV0_EPF0_DEVICE_CNTL2__LTR_EN_MASK; + else + data &= ~BIF_CFG_DEV0_EPF0_DEVICE_CNTL2__LTR_EN_MASK; + if (def != data) + WREG32_SOC15(NBIO, 0, regBIF_CFG_DEV0_EPF0_DEVICE_CNTL2, data); +} +#endif + +static void nbio_v4_3_program_aspm(struct amdgpu_device *adev) +{ +#ifdef CONFIG_PCIEASPM + uint32_t def, data; + + if (!(adev->ip_versions[PCIE_HWIP][0] == IP_VERSION(7, 4, 0)) && + !(adev->ip_versions[PCIE_HWIP][0] == IP_VERSION(7, 6, 0))) + return; + + def = data = RREG32_SOC15(NBIO, 0, regPCIE_LC_CNTL); + data &= ~PCIE_LC_CNTL__LC_L1_INACTIVITY_MASK; + data &= ~PCIE_LC_CNTL__LC_L0S_INACTIVITY_MASK; + data |= PCIE_LC_CNTL__LC_PMI_TO_L1_DIS_MASK; + if (def != data) + WREG32_SOC15(NBIO, 0, regPCIE_LC_CNTL, data); + + def = data = RREG32_SOC15(NBIO, 0, regPCIE_LC_CNTL7); + data |= PCIE_LC_CNTL7__LC_NBIF_ASPM_INPUT_EN_MASK; + if (def != data) + WREG32_SOC15(NBIO, 0, regPCIE_LC_CNTL7, data); + + def = data = RREG32_SOC15(NBIO, 0, regPCIE_LC_CNTL3); + data |= PCIE_LC_CNTL3__LC_DSC_DONT_ENTER_L23_AFTER_PME_ACK_MASK; + if (def != data) + WREG32_SOC15(NBIO, 0, regPCIE_LC_CNTL3, data); + + def = data = RREG32_SOC15(NBIO, 0, regRCC_STRAP0_RCC_BIF_STRAP3); + data &= ~RCC_BIF_STRAP3__STRAP_VLINK_ASPM_IDLE_TIMER_MASK; + data &= ~RCC_BIF_STRAP3__STRAP_VLINK_PM_L1_ENTRY_TIMER_MASK; + if (def != data) + WREG32_SOC15(NBIO, 0, regRCC_STRAP0_RCC_BIF_STRAP3, data); + + def = data = RREG32_SOC15(NBIO, 0, regRCC_STRAP0_RCC_BIF_STRAP5); + data &= ~RCC_BIF_STRAP5__STRAP_VLINK_LDN_ENTRY_TIMER_MASK; + if (def != data) + WREG32_SOC15(NBIO, 0, regRCC_STRAP0_RCC_BIF_STRAP5, data); + + def = data = RREG32_SOC15(NBIO, 0, regBIF_CFG_DEV0_EPF0_DEVICE_CNTL2); + data &= ~BIF_CFG_DEV0_EPF0_DEVICE_CNTL2__LTR_EN_MASK; + if (def != data) + WREG32_SOC15(NBIO, 0, regBIF_CFG_DEV0_EPF0_DEVICE_CNTL2, data); + + WREG32_SOC15(NBIO, 0, regBIF_CFG_DEV0_EPF0_PCIE_LTR_CAP, 0x10011001); + + def = data = RREG32_SOC15(NBIO, 0, regPSWUSP0_PCIE_LC_CNTL2); + data |= PSWUSP0_PCIE_LC_CNTL2__LC_ALLOW_PDWN_IN_L1_MASK | + PSWUSP0_PCIE_LC_CNTL2__LC_ALLOW_PDWN_IN_L23_MASK; + data &= ~PSWUSP0_PCIE_LC_CNTL2__LC_RCV_L0_TO_RCV_L0S_DIS_MASK; + if (def != data) + WREG32_SOC15(NBIO, 0, regPSWUSP0_PCIE_LC_CNTL2, data); + + def = data = RREG32_SOC15(NBIO, 0, regPCIE_LC_CNTL4); + data |= PCIE_LC_CNTL4__LC_L1_POWERDOWN_MASK; + if (def != data) + WREG32_SOC15(NBIO, 0, regPCIE_LC_CNTL4, data); + + def = data = RREG32_SOC15(NBIO, 0, regPCIE_LC_RXRECOVER_RXSTANDBY_CNTL); + data |= PCIE_LC_RXRECOVER_RXSTANDBY_CNTL__LC_RX_L0S_STANDBY_EN_MASK; + if (def != data) + WREG32_SOC15(NBIO, 0, regPCIE_LC_RXRECOVER_RXSTANDBY_CNTL, data); + + nbio_v4_3_program_ltr(adev); + + def = data = RREG32_SOC15(NBIO, 0, regRCC_STRAP0_RCC_BIF_STRAP3); + data |= 0x5DE0 << RCC_BIF_STRAP3__STRAP_VLINK_ASPM_IDLE_TIMER__SHIFT; + data |= 0x0010 << RCC_BIF_STRAP3__STRAP_VLINK_PM_L1_ENTRY_TIMER__SHIFT; + if (def != data) + WREG32_SOC15(NBIO, 0, regRCC_STRAP0_RCC_BIF_STRAP3, data); + + def = data = RREG32_SOC15(NBIO, 0, regRCC_STRAP0_RCC_BIF_STRAP5); + data |= 0x0010 << RCC_BIF_STRAP5__STRAP_VLINK_LDN_ENTRY_TIMER__SHIFT; + if (def != data) + WREG32_SOC15(NBIO, 0, regRCC_STRAP0_RCC_BIF_STRAP5, data); + + def = data = RREG32_SOC15(NBIO, 0, regPCIE_LC_CNTL); + data |= 0x0 << PCIE_LC_CNTL__LC_L0S_INACTIVITY__SHIFT; + data |= 0x9 << PCIE_LC_CNTL__LC_L1_INACTIVITY__SHIFT; + data &= ~PCIE_LC_CNTL__LC_PMI_TO_L1_DIS_MASK; + if (def != data) + WREG32_SOC15(NBIO, 0, regPCIE_LC_CNTL, data); + + def = data = RREG32_SOC15(NBIO, 0, regPCIE_LC_CNTL3); + data &= ~PCIE_LC_CNTL3__LC_DSC_DONT_ENTER_L23_AFTER_PME_ACK_MASK; + if (def != data) + WREG32_SOC15(NBIO, 0, regPCIE_LC_CNTL3, data); +#endif +} + const struct amdgpu_nbio_funcs nbio_v4_3_funcs = { .get_hdp_flush_req_offset = nbio_v4_3_get_hdp_flush_req_offset, .get_hdp_flush_done_offset = nbio_v4_3_get_hdp_flush_done_offset, @@ -365,4 +486,5 @@ const struct amdgpu_nbio_funcs nbio_v4_3_funcs = { .init_registers = nbio_v4_3_init_registers, .remap_hdp_registers = nbio_v4_3_remap_hdp_registers, .get_rom_offset = nbio_v4_3_get_rom_offset, + .program_aspm = nbio_v4_3_program_aspm, }; diff --git a/drivers/gpu/drm/amd/amdgpu/nbio_v7_7.c b/drivers/gpu/drm/amd/amdgpu/nbio_v7_7.c index cdc0c9779848..e786b825cea9 100644 --- a/drivers/gpu/drm/amd/amdgpu/nbio_v7_7.c +++ b/drivers/gpu/drm/amd/amdgpu/nbio_v7_7.c @@ -58,11 +58,17 @@ static void nbio_v7_7_sdma_doorbell_range(struct amdgpu_device *adev, int instan bool use_doorbell, int doorbell_index, int doorbell_size) { - u32 reg = SOC15_REG_OFFSET(NBIO, 0, regGDC0_BIF_SDMA0_DOORBELL_RANGE); + u32 reg = SOC15_REG_OFFSET(NBIO, 0, regGDC0_BIF_CSDMA_DOORBELL_RANGE); u32 doorbell_range = RREG32_PCIE_PORT(reg); if (use_doorbell) { doorbell_range = REG_SET_FIELD(doorbell_range, + GDC0_BIF_CSDMA_DOORBELL_RANGE, + OFFSET, doorbell_index); + doorbell_range = REG_SET_FIELD(doorbell_range, + GDC0_BIF_CSDMA_DOORBELL_RANGE, + SIZE, doorbell_size); + doorbell_range = REG_SET_FIELD(doorbell_range, GDC0_BIF_SDMA0_DOORBELL_RANGE, OFFSET, doorbell_index); doorbell_range = REG_SET_FIELD(doorbell_range, diff --git a/drivers/gpu/drm/amd/amdgpu/psp_v13_0.c b/drivers/gpu/drm/amd/amdgpu/psp_v13_0.c index 9e1ef81933ff..30386d34d0d6 100644 --- a/drivers/gpu/drm/amd/amdgpu/psp_v13_0.c +++ b/drivers/gpu/drm/amd/amdgpu/psp_v13_0.c @@ -39,7 +39,9 @@ MODULE_FIRMWARE("amdgpu/psp_13_0_5_ta.bin"); MODULE_FIRMWARE("amdgpu/psp_13_0_8_toc.bin"); MODULE_FIRMWARE("amdgpu/psp_13_0_8_ta.bin"); MODULE_FIRMWARE("amdgpu/psp_13_0_0_sos.bin"); +MODULE_FIRMWARE("amdgpu/psp_13_0_0_ta.bin"); MODULE_FIRMWARE("amdgpu/psp_13_0_7_sos.bin"); +MODULE_FIRMWARE("amdgpu/psp_13_0_7_ta.bin"); /* For large FW files the time to complete can be very long */ #define USBC_PD_POLLING_LIMIT_S 240 @@ -105,6 +107,10 @@ static int psp_v13_0_init_microcode(struct psp_context *psp) err = psp_init_sos_microcode(psp, chip_name); if (err) return err; + /* It's not necessary to load ras ta on Guest side */ + err = psp_init_ta_microcode(psp, chip_name); + if (err) + return err; break; default: BUG(); diff --git a/drivers/gpu/drm/amd/amdgpu/soc21.c b/drivers/gpu/drm/amd/amdgpu/soc21.c index 9e18a2b22607..495848515edf 100644 --- a/drivers/gpu/drm/amd/amdgpu/soc21.c +++ b/drivers/gpu/drm/amd/amdgpu/soc21.c @@ -310,6 +310,7 @@ static enum amd_reset_method soc21_asic_reset_method(struct amdgpu_device *adev) { if (amdgpu_reset_method == AMD_RESET_METHOD_MODE1 || + amdgpu_reset_method == AMD_RESET_METHOD_MODE2 || amdgpu_reset_method == AMD_RESET_METHOD_BACO) return amdgpu_reset_method; @@ -320,6 +321,8 @@ soc21_asic_reset_method(struct amdgpu_device *adev) switch (adev->ip_versions[MP1_HWIP][0]) { case IP_VERSION(13, 0, 0): return AMD_RESET_METHOD_MODE1; + case IP_VERSION(13, 0, 4): + return AMD_RESET_METHOD_MODE2; default: if (amdgpu_dpm_is_baco_supported(adev)) return AMD_RESET_METHOD_BACO; @@ -341,6 +344,10 @@ static int soc21_asic_reset(struct amdgpu_device *adev) dev_info(adev->dev, "BACO reset\n"); ret = amdgpu_dpm_baco_reset(adev); break; + case AMD_RESET_METHOD_MODE2: + dev_info(adev->dev, "MODE2 reset\n"); + ret = amdgpu_dpm_mode2_reset(adev); + break; default: dev_info(adev->dev, "MODE1 reset\n"); ret = amdgpu_device_mode1_reset(adev); @@ -379,11 +386,12 @@ static void soc21_pcie_gen3_enable(struct amdgpu_device *adev) static void soc21_program_aspm(struct amdgpu_device *adev) { - - if (amdgpu_aspm == 0) + if (!amdgpu_device_should_use_aspm(adev)) return; - /* todo */ + if (!(adev->flags & AMD_IS_APU) && + (adev->nbio.funcs->program_aspm)) + adev->nbio.funcs->program_aspm(adev); } static void soc21_enable_doorbell_aperture(struct amdgpu_device *adev, @@ -555,8 +563,11 @@ static int soc21_common_early_init(void *handle) adev->cg_flags = AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_CGLS | + AMD_CG_SUPPORT_REPEATER_FGCG | AMD_CG_SUPPORT_VCN_MGCG | - AMD_CG_SUPPORT_JPEG_MGCG; + AMD_CG_SUPPORT_JPEG_MGCG | + AMD_CG_SUPPORT_ATHUB_MGCG | + AMD_CG_SUPPORT_ATHUB_LS; adev->pg_flags = AMD_PG_SUPPORT_VCN | AMD_PG_SUPPORT_VCN_DPG | diff --git a/drivers/gpu/drm/amd/amdgpu/umc_v6_7.c b/drivers/gpu/drm/amd/amdgpu/umc_v6_7.c index 606892dbea1c..bf7524f16b66 100644 --- a/drivers/gpu/drm/amd/amdgpu/umc_v6_7.c +++ b/drivers/gpu/drm/amd/amdgpu/umc_v6_7.c @@ -119,6 +119,24 @@ static void umc_v6_7_ecc_info_query_correctable_error_count(struct amdgpu_device *error_count += 1; umc_v6_7_query_error_status_helper(adev, mc_umc_status, umc_reg_offset); + + if (ras->umc_ecc.record_ce_addr_supported) { + uint64_t err_addr, soc_pa; + uint32_t channel_index = + adev->umc.channel_idx_tbl[umc_inst * adev->umc.channel_inst_num + ch_inst]; + + err_addr = ras->umc_ecc.ecc[eccinfo_table_idx].mca_ceumc_addr; + err_addr = REG_GET_FIELD(err_addr, MCA_UMC_UMC0_MCUMC_ADDRT0, ErrorAddr); + /* translate umc channel address to soc pa, 3 parts are included */ + soc_pa = ADDR_OF_8KB_BLOCK(err_addr) | + ADDR_OF_256B_BLOCK(channel_index) | + OFFSET_IN_256B_BLOCK(err_addr); + + /* The umc channel bits are not original values, they are hashed */ + SET_CHANNEL_HASH(channel_index, soc_pa); + + dev_info(adev->dev, "Error Address(PA): 0x%llx\n", soc_pa); + } } } @@ -251,7 +269,9 @@ static void umc_v6_7_ecc_info_query_ras_error_address(struct amdgpu_device *adev static void umc_v6_7_query_correctable_error_count(struct amdgpu_device *adev, uint32_t umc_reg_offset, - unsigned long *error_count) + unsigned long *error_count, + uint32_t ch_inst, + uint32_t umc_inst) { uint32_t ecc_err_cnt_sel, ecc_err_cnt_sel_addr; uint32_t ecc_err_cnt, ecc_err_cnt_addr; @@ -295,6 +315,31 @@ static void umc_v6_7_query_correctable_error_count(struct amdgpu_device *adev, *error_count += 1; umc_v6_7_query_error_status_helper(adev, mc_umc_status, umc_reg_offset); + + { + uint64_t err_addr, soc_pa; + uint32_t mc_umc_addrt0; + uint32_t channel_index; + + mc_umc_addrt0 = + SOC15_REG_OFFSET(UMC, 0, regMCA_UMC_UMC0_MCUMC_ADDRT0); + + channel_index = + adev->umc.channel_idx_tbl[umc_inst * adev->umc.channel_inst_num + ch_inst]; + + err_addr = RREG64_PCIE((mc_umc_addrt0 + umc_reg_offset) * 4); + err_addr = REG_GET_FIELD(err_addr, MCA_UMC_UMC0_MCUMC_ADDRT0, ErrorAddr); + + /* translate umc channel address to soc pa, 3 parts are included */ + soc_pa = ADDR_OF_8KB_BLOCK(err_addr) | + ADDR_OF_256B_BLOCK(channel_index) | + OFFSET_IN_256B_BLOCK(err_addr); + + /* The umc channel bits are not original values, they are hashed */ + SET_CHANNEL_HASH(channel_index, soc_pa); + + dev_info(adev->dev, "Error Address(PA): 0x%llx\n", soc_pa); + } } } @@ -395,7 +440,8 @@ static void umc_v6_7_query_ras_error_count(struct amdgpu_device *adev, ch_inst); umc_v6_7_query_correctable_error_count(adev, umc_reg_offset, - &(err_data->ce_count)); + &(err_data->ce_count), + ch_inst, umc_inst); umc_v6_7_querry_uncorrectable_error_count(adev, umc_reg_offset, &(err_data->ue_count)); diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c index 9119e966ffff..84ac2401895a 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c @@ -29,7 +29,6 @@ #include "soc15d.h" #include "soc15_hw_ip.h" #include "vcn_v2_0.h" -#include "vcn_sw_ring.h" #include "vcn/vcn_4_0_0_offset.h" #include "vcn/vcn_4_0_0_sh_mask.h" @@ -45,15 +44,12 @@ #define VCN_VID_SOC_ADDRESS_2_0 0x1fb00 #define VCN1_VID_SOC_ADDRESS_3_0 0x48300 -bool unifiedQ_enabled = false; - static int amdgpu_ih_clientid_vcns[] = { SOC15_IH_CLIENTID_VCN, SOC15_IH_CLIENTID_VCN1 }; -static void vcn_v4_0_set_dec_ring_funcs(struct amdgpu_device *adev); -static void vcn_v4_0_set_enc_ring_funcs(struct amdgpu_device *adev); +static void vcn_v4_0_set_unified_ring_funcs(struct amdgpu_device *adev); static void vcn_v4_0_set_irq_funcs(struct amdgpu_device *adev); static int vcn_v4_0_set_powergating_state(void *handle, enum amd_powergating_state state); @@ -71,36 +67,15 @@ static int vcn_v4_0_early_init(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; - if (unifiedQ_enabled) { - adev->vcn.num_vcn_inst = 1; - adev->vcn.num_enc_rings = 1; - } else { - adev->vcn.num_enc_rings = 2; - } - - if (!unifiedQ_enabled) - vcn_v4_0_set_dec_ring_funcs(adev); + /* re-use enc ring as unified ring */ + adev->vcn.num_enc_rings = 1; - vcn_v4_0_set_enc_ring_funcs(adev); + vcn_v4_0_set_unified_ring_funcs(adev); vcn_v4_0_set_irq_funcs(adev); return 0; } -static void amdgpu_vcn_setup_unified_queue_ucode(struct amdgpu_device *adev) -{ - if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) { - const struct common_firmware_header *hdr; - - hdr = (const struct common_firmware_header *)adev->vcn.fw->data; - adev->firmware.ucode[AMDGPU_UCODE_ID_VCN].ucode_id = AMDGPU_UCODE_ID_VCN; - adev->firmware.ucode[AMDGPU_UCODE_ID_VCN].fw = adev->vcn.fw; - adev->firmware.fw_size += - ALIGN(le32_to_cpu(hdr->ucode_size_bytes), PAGE_SIZE); - DRM_INFO("PSP loading VCN firmware\n"); - } -} - /** * vcn_v4_0_sw_init - sw init for VCN block * @@ -111,17 +86,14 @@ static void amdgpu_vcn_setup_unified_queue_ucode(struct amdgpu_device *adev) static int vcn_v4_0_sw_init(void *handle) { struct amdgpu_ring *ring; - int i, j, r; struct amdgpu_device *adev = (struct amdgpu_device *)handle; + int i, r; r = amdgpu_vcn_sw_init(adev); if (r) return r; - if (unifiedQ_enabled) - amdgpu_vcn_setup_unified_queue_ucode(adev); - else - amdgpu_vcn_setup_ucode(adev); + amdgpu_vcn_setup_ucode(adev); r = amdgpu_vcn_resume(adev); if (r) @@ -129,81 +101,40 @@ static int vcn_v4_0_sw_init(void *handle) for (i = 0; i < adev->vcn.num_vcn_inst; i++) { volatile struct amdgpu_vcn4_fw_shared *fw_shared; + if (adev->vcn.harvest_config & (1 << i)) continue; - /* VCN DEC TRAP */ + + atomic_set(&adev->vcn.inst[i].sched_score, 0); + + /* VCN UNIFIED TRAP */ r = amdgpu_irq_add_id(adev, amdgpu_ih_clientid_vcns[i], - VCN_4_0__SRCID__UVD_TRAP, &adev->vcn.inst[i].irq); + VCN_4_0__SRCID__UVD_ENC_GENERAL_PURPOSE, &adev->vcn.inst[i].irq); if (r) return r; - atomic_set(&adev->vcn.inst[i].sched_score, 0); - if (!unifiedQ_enabled) { - ring = &adev->vcn.inst[i].ring_dec; - ring->use_doorbell = true; - - /* VCN4 doorbell layout - * 1: VCN_JPEG_DB_CTRL UVD_JRBC_RB_WPTR; (jpeg) - * 2: VCN_RB1_DB_CTRL UVD_RB_WPTR; (decode/encode for unified queue) - * 3: VCN_RB2_DB_CTRL UVD_RB_WPTR2; (encode only for swqueue) - * 4: VCN_RB3_DB_CTRL UVD_RB_WPTR3; (Reserved) - * 5: VCN_RB4_DB_CTRL UVD_RB_WPTR4; (decode only for swqueue) - */ - - ring->doorbell_index = (adev->doorbell_index.vcn.vcn_ring0_1 << 1) - + 5 + 8 * i; - - sprintf(ring->name, "vcn_dec_%d", i); - r = amdgpu_ring_init(adev, ring, 512, &adev->vcn.inst[i].irq, 0, - AMDGPU_RING_PRIO_DEFAULT, - &adev->vcn.inst[i].sched_score); - if (r) - return r; - } - for (j = 0; j < adev->vcn.num_enc_rings; ++j) { - /* VCN ENC TRAP */ - r = amdgpu_irq_add_id(adev, amdgpu_ih_clientid_vcns[i], - j + VCN_4_0__SRCID__UVD_ENC_GENERAL_PURPOSE, &adev->vcn.inst[i].irq); - if (r) - return r; - - ring = &adev->vcn.inst[i].ring_enc[j]; - ring->use_doorbell = true; - - ring->doorbell_index = (adev->doorbell_index.vcn.vcn_ring0_1 << 1) + 2 + j + 8 * i; - - if (unifiedQ_enabled) { - sprintf(ring->name, "vcn_unified%d", i); - r = amdgpu_ring_init(adev, ring, 512, &adev->vcn.inst->irq, 0, - AMDGPU_RING_PRIO_DEFAULT, NULL); - } else { - enum amdgpu_ring_priority_level hw_prio; + ring = &adev->vcn.inst[i].ring_enc[0]; + ring->use_doorbell = true; + ring->doorbell_index = (adev->doorbell_index.vcn.vcn_ring0_1 << 1) + 2 + 8 * i; - hw_prio = amdgpu_vcn_get_enc_ring_prio(j); - sprintf(ring->name, "vcn_enc_%d.%d", i, j); - r = amdgpu_ring_init(adev, ring, 512, &adev->vcn.inst[i].irq, 0, - hw_prio, &adev->vcn.inst[i].sched_score); - } - if (r) - return r; - } + sprintf(ring->name, "vcn_unified_%d", i); - fw_shared = adev->vcn.inst[i].fw_shared.cpu_addr; - fw_shared->present_flag_0 = 0; + r = amdgpu_ring_init(adev, ring, 512, &adev->vcn.inst[i].irq, 0, + AMDGPU_RING_PRIO_0, &adev->vcn.inst[i].sched_score); + if (r) + return r; - if (unifiedQ_enabled) { - fw_shared->present_flag_0 |= cpu_to_le32(AMDGPU_FW_SHARED_FLAG_0_UNIFIED_QUEUE); - fw_shared->sq.is_enabled = 1; - } + fw_shared = adev->vcn.inst[i].fw_shared.cpu_addr; + fw_shared->present_flag_0 = cpu_to_le32(AMDGPU_FW_SHARED_FLAG_0_UNIFIED_QUEUE); + fw_shared->sq.is_enabled = 1; if (amdgpu_vcnfw_log) amdgpu_vcn_fwlog_init(&adev->vcn.inst[i]); } - if (!unifiedQ_enabled) { - if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) - adev->vcn.pause_dpg_mode = vcn_v4_0_pause_dpg_mode; - } + if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) + adev->vcn.pause_dpg_mode = vcn_v4_0_pause_dpg_mode; + return 0; } @@ -220,19 +151,19 @@ static int vcn_v4_0_sw_fini(void *handle) int i, r, idx; if (drm_dev_enter(&adev->ddev, &idx)) { - for (i = 0; i < adev->vcn.num_vcn_inst; i++) { - volatile struct amdgpu_vcn4_fw_shared *fw_shared; + for (i = 0; i < adev->vcn.num_vcn_inst; i++) { + volatile struct amdgpu_vcn4_fw_shared *fw_shared; - if (adev->vcn.harvest_config & (1 << i)) - continue; + if (adev->vcn.harvest_config & (1 << i)) + continue; - fw_shared = adev->vcn.inst[i].fw_shared.cpu_addr; - fw_shared->present_flag_0 = 0; - fw_shared->sq.is_enabled = 0; - } + fw_shared = adev->vcn.inst[i].fw_shared.cpu_addr; + fw_shared->present_flag_0 = 0; + fw_shared->sq.is_enabled = 0; + } - drm_dev_exit(idx); - } + drm_dev_exit(idx); + } r = amdgpu_vcn_suspend(adev); if (r) @@ -254,15 +185,13 @@ static int vcn_v4_0_hw_init(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; struct amdgpu_ring *ring; - int i, j, r; + int i, r; for (i = 0; i < adev->vcn.num_vcn_inst; ++i) { if (adev->vcn.harvest_config & (1 << i)) continue; - if (unifiedQ_enabled) - ring = &adev->vcn.inst[i].ring_enc[0]; - else - ring = &adev->vcn.inst[i].ring_dec; + + ring = &adev->vcn.inst[i].ring_enc[0]; adev->nbio.funcs->vcn_doorbell_range(adev, ring->use_doorbell, ((adev->doorbell_index.vcn.vcn_ring0_1 << 1) + 8 * i), i); @@ -270,13 +199,6 @@ static int vcn_v4_0_hw_init(void *handle) r = amdgpu_ring_test_helper(ring); if (r) goto done; - - for (j = 0; j < adev->vcn.num_enc_rings; ++j) { - ring = &adev->vcn.inst[i].ring_enc[j]; - r = amdgpu_ring_test_helper(ring); - if (r) - goto done; - } } done: @@ -464,7 +386,6 @@ static void vcn_v4_0_mc_resume_dpg_mode(struct amdgpu_device *adev, int inst_idx WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET( VCN, inst_idx, regUVD_VCPU_CACHE_OFFSET0), AMDGPU_UVD_FIRMWARE_OFFSET >> 3, 0, indirect); - } if (!indirect) @@ -888,7 +809,6 @@ static int vcn_v4_0_start_dpg_mode(struct amdgpu_device *adev, int inst_idx, boo volatile struct amdgpu_vcn4_fw_shared *fw_shared = adev->vcn.inst[inst_idx].fw_shared.cpu_addr; struct amdgpu_ring *ring; uint32_t tmp; - int i; /* disable register anti-hang mechanism */ WREG32_P(SOC15_REG_OFFSET(VCN, inst_idx, regUVD_POWER_STATUS), 1, @@ -974,74 +894,32 @@ static int vcn_v4_0_start_dpg_mode(struct amdgpu_device *adev, int inst_idx, boo (uint32_t)((uintptr_t)adev->vcn.inst[inst_idx].dpg_sram_curr_addr - (uintptr_t)adev->vcn.inst[inst_idx].dpg_sram_cpu_addr)); - if (unifiedQ_enabled) { - ring = &adev->vcn.inst[inst_idx].ring_enc[0]; - fw_shared->sq.queue_mode |= FW_QUEUE_RING_RESET; - } else - ring = &adev->vcn.inst[inst_idx].ring_dec; - - WREG32_SOC15(VCN, inst_idx, regVCN_RB4_DB_CTRL, - ring->doorbell_index << VCN_RB4_DB_CTRL__OFFSET__SHIFT | - VCN_RB4_DB_CTRL__EN_MASK); - - /* program the RB_BASE for ring buffer */ - WREG32_SOC15(VCN, inst_idx, regUVD_RB_BASE_LO4, - lower_32_bits(ring->gpu_addr)); - WREG32_SOC15(VCN, inst_idx, regUVD_RB_BASE_HI4, - upper_32_bits(ring->gpu_addr)); + ring = &adev->vcn.inst[inst_idx].ring_enc[0]; - WREG32_SOC15(VCN, inst_idx, regUVD_RB_SIZE4, ring->ring_size / sizeof(uint32_t)); + WREG32_SOC15(VCN, inst_idx, regUVD_RB_BASE_LO, ring->gpu_addr); + WREG32_SOC15(VCN, inst_idx, regUVD_RB_BASE_HI, upper_32_bits(ring->gpu_addr)); + WREG32_SOC15(VCN, inst_idx, regUVD_RB_SIZE, ring->ring_size / 4); - /* reseting ring, fw should not check RB ring */ tmp = RREG32_SOC15(VCN, inst_idx, regVCN_RB_ENABLE); - tmp &= ~(VCN_RB_ENABLE__RB4_EN_MASK); + tmp &= ~(VCN_RB_ENABLE__RB1_EN_MASK); WREG32_SOC15(VCN, inst_idx, regVCN_RB_ENABLE, tmp); + fw_shared->sq.queue_mode |= FW_QUEUE_RING_RESET; + WREG32_SOC15(VCN, inst_idx, regUVD_RB_RPTR, 0); + WREG32_SOC15(VCN, inst_idx, regUVD_RB_WPTR, 0); - /* Initialize the ring buffer's read and write pointers */ - tmp = RREG32_SOC15(VCN, inst_idx, regUVD_RB_RPTR4); - WREG32_SOC15(VCN, inst_idx, regUVD_RB_WPTR4, tmp); - ring->wptr = RREG32_SOC15(VCN, inst_idx, regUVD_RB_WPTR4); + tmp = RREG32_SOC15(VCN, inst_idx, regUVD_RB_RPTR); + WREG32_SOC15(VCN, inst_idx, regUVD_RB_WPTR, tmp); + ring->wptr = RREG32_SOC15(VCN, inst_idx, regUVD_RB_WPTR); tmp = RREG32_SOC15(VCN, inst_idx, regVCN_RB_ENABLE); - tmp |= VCN_RB_ENABLE__RB4_EN_MASK; + tmp |= VCN_RB_ENABLE__RB1_EN_MASK; WREG32_SOC15(VCN, inst_idx, regVCN_RB_ENABLE, tmp); + fw_shared->sq.queue_mode &= ~(FW_QUEUE_RING_RESET | FW_QUEUE_DPG_HOLD_OFF); - WREG32_SOC15(VCN, inst_idx, regUVD_SCRATCH2, 0); - - if (unifiedQ_enabled) - fw_shared->sq.queue_mode &= ~FW_QUEUE_RING_RESET; - - for (i = 0; i < adev->vcn.num_enc_rings; i++) { - ring = &adev->vcn.inst[inst_idx].ring_enc[i]; - - if (i) { - ring = &adev->vcn.inst[inst_idx].ring_enc[1]; - - WREG32_SOC15(VCN, inst_idx, regUVD_RB_BASE_LO2, ring->gpu_addr); - WREG32_SOC15(VCN, inst_idx, regUVD_RB_BASE_HI2, upper_32_bits(ring->gpu_addr)); - WREG32_SOC15(VCN, inst_idx, regUVD_RB_SIZE2, ring->ring_size / 4); - tmp= RREG32_SOC15(VCN, inst_idx, regUVD_RB_RPTR2); - WREG32_SOC15(VCN, inst_idx, regUVD_RB_WPTR2, tmp); - ring->wptr = RREG32_SOC15(VCN, inst_idx, regUVD_RB_WPTR2); - - WREG32_SOC15(VCN, inst_idx, regVCN_RB2_DB_CTRL, - ring->doorbell_index << VCN_RB2_DB_CTRL__OFFSET__SHIFT | - VCN_RB2_DB_CTRL__EN_MASK); - } else { - ring = &adev->vcn.inst[inst_idx].ring_enc[0]; - - WREG32_SOC15(VCN, inst_idx, regUVD_RB_BASE_LO, ring->gpu_addr); - WREG32_SOC15(VCN, inst_idx, regUVD_RB_BASE_HI, upper_32_bits(ring->gpu_addr)); - WREG32_SOC15(VCN, inst_idx, regUVD_RB_SIZE, ring->ring_size / 4); - tmp= RREG32_SOC15(VCN, inst_idx, regUVD_RB_RPTR); - WREG32_SOC15(VCN, inst_idx, regUVD_RB_WPTR, tmp); - ring->wptr = RREG32_SOC15(VCN, inst_idx, regUVD_RB_WPTR); + WREG32_SOC15(VCN, inst_idx, regVCN_RB1_DB_CTRL, + ring->doorbell_index << VCN_RB1_DB_CTRL__OFFSET__SHIFT | + VCN_RB1_DB_CTRL__EN_MASK); - WREG32_SOC15(VCN, inst_idx, regVCN_RB1_DB_CTRL, - ring->doorbell_index << VCN_RB1_DB_CTRL__OFFSET__SHIFT | - VCN_RB1_DB_CTRL__EN_MASK); - } - } return 0; } @@ -1064,6 +942,8 @@ static int vcn_v4_0_start(struct amdgpu_device *adev) amdgpu_dpm_enable_uvd(adev, true); for (i = 0; i < adev->vcn.num_vcn_inst; ++i) { + fw_shared = adev->vcn.inst[i].fw_shared.cpu_addr; + if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) { r = vcn_v4_0_start_dpg_mode(adev, i, adev->vcn.indirect_sram); continue; @@ -1081,15 +961,15 @@ static int vcn_v4_0_start(struct amdgpu_device *adev) /* enable VCPU clock */ WREG32_P(SOC15_REG_OFFSET(VCN, i, regUVD_VCPU_CNTL), - UVD_VCPU_CNTL__CLK_EN_MASK, ~UVD_VCPU_CNTL__CLK_EN_MASK); + UVD_VCPU_CNTL__CLK_EN_MASK, ~UVD_VCPU_CNTL__CLK_EN_MASK); /* disable master interrupt */ WREG32_P(SOC15_REG_OFFSET(VCN, i, regUVD_MASTINT_EN), 0, - ~UVD_MASTINT_EN__VCPU_EN_MASK); + ~UVD_MASTINT_EN__VCPU_EN_MASK); /* enable LMI MC and UMC channels */ WREG32_P(SOC15_REG_OFFSET(VCN, i, regUVD_LMI_CTRL2), 0, - ~UVD_LMI_CTRL2__STALL_ARB_UMC_MASK); + ~UVD_LMI_CTRL2__STALL_ARB_UMC_MASK); tmp = RREG32_SOC15(VCN, i, regUVD_SOFT_RESET); tmp &= ~UVD_SOFT_RESET__LMI_SOFT_RESET_MASK; @@ -1099,10 +979,10 @@ static int vcn_v4_0_start(struct amdgpu_device *adev) /* setup regUVD_LMI_CTRL */ tmp = RREG32_SOC15(VCN, i, regUVD_LMI_CTRL); WREG32_SOC15(VCN, i, regUVD_LMI_CTRL, tmp | - UVD_LMI_CTRL__WRITE_CLEAN_TIMER_EN_MASK | - UVD_LMI_CTRL__MASK_MC_URGENT_MASK | - UVD_LMI_CTRL__DATA_COHERENCY_EN_MASK | - UVD_LMI_CTRL__VCPU_DATA_COHERENCY_EN_MASK); + UVD_LMI_CTRL__WRITE_CLEAN_TIMER_EN_MASK | + UVD_LMI_CTRL__MASK_MC_URGENT_MASK | + UVD_LMI_CTRL__DATA_COHERENCY_EN_MASK | + UVD_LMI_CTRL__VCPU_DATA_COHERENCY_EN_MASK); /* setup regUVD_MPC_CNTL */ tmp = RREG32_SOC15(VCN, i, regUVD_MPC_CNTL); @@ -1112,37 +992,37 @@ static int vcn_v4_0_start(struct amdgpu_device *adev) /* setup UVD_MPC_SET_MUXA0 */ WREG32_SOC15(VCN, i, regUVD_MPC_SET_MUXA0, - ((0x1 << UVD_MPC_SET_MUXA0__VARA_1__SHIFT) | - (0x2 << UVD_MPC_SET_MUXA0__VARA_2__SHIFT) | - (0x3 << UVD_MPC_SET_MUXA0__VARA_3__SHIFT) | - (0x4 << UVD_MPC_SET_MUXA0__VARA_4__SHIFT))); + ((0x1 << UVD_MPC_SET_MUXA0__VARA_1__SHIFT) | + (0x2 << UVD_MPC_SET_MUXA0__VARA_2__SHIFT) | + (0x3 << UVD_MPC_SET_MUXA0__VARA_3__SHIFT) | + (0x4 << UVD_MPC_SET_MUXA0__VARA_4__SHIFT))); /* setup UVD_MPC_SET_MUXB0 */ WREG32_SOC15(VCN, i, regUVD_MPC_SET_MUXB0, - ((0x1 << UVD_MPC_SET_MUXB0__VARB_1__SHIFT) | - (0x2 << UVD_MPC_SET_MUXB0__VARB_2__SHIFT) | - (0x3 << UVD_MPC_SET_MUXB0__VARB_3__SHIFT) | - (0x4 << UVD_MPC_SET_MUXB0__VARB_4__SHIFT))); + ((0x1 << UVD_MPC_SET_MUXB0__VARB_1__SHIFT) | + (0x2 << UVD_MPC_SET_MUXB0__VARB_2__SHIFT) | + (0x3 << UVD_MPC_SET_MUXB0__VARB_3__SHIFT) | + (0x4 << UVD_MPC_SET_MUXB0__VARB_4__SHIFT))); /* setup UVD_MPC_SET_MUX */ WREG32_SOC15(VCN, i, regUVD_MPC_SET_MUX, - ((0x0 << UVD_MPC_SET_MUX__SET_0__SHIFT) | - (0x1 << UVD_MPC_SET_MUX__SET_1__SHIFT) | - (0x2 << UVD_MPC_SET_MUX__SET_2__SHIFT))); + ((0x0 << UVD_MPC_SET_MUX__SET_0__SHIFT) | + (0x1 << UVD_MPC_SET_MUX__SET_1__SHIFT) | + (0x2 << UVD_MPC_SET_MUX__SET_2__SHIFT))); vcn_v4_0_mc_resume(adev, i); /* VCN global tiling registers */ WREG32_SOC15(VCN, i, regUVD_GFX10_ADDR_CONFIG, - adev->gfx.config.gb_addr_config); + adev->gfx.config.gb_addr_config); /* unblock VCPU register access */ WREG32_P(SOC15_REG_OFFSET(VCN, i, regUVD_RB_ARB_CTRL), 0, - ~UVD_RB_ARB_CTRL__VCPU_DIS_MASK); + ~UVD_RB_ARB_CTRL__VCPU_DIS_MASK); /* release VCPU reset to boot */ WREG32_P(SOC15_REG_OFFSET(VCN, i, regUVD_VCPU_CNTL), 0, - ~UVD_VCPU_CNTL__BLK_RST_MASK); + ~UVD_VCPU_CNTL__BLK_RST_MASK); for (j = 0; j < 10; ++j) { uint32_t status; @@ -1166,13 +1046,13 @@ static int vcn_v4_0_start(struct amdgpu_device *adev) if (status & 2) break; - dev_err(adev->dev, "VCN[%d] decode not responding, trying to reset the VCPU!!!\n", i); - WREG32_P(SOC15_REG_OFFSET(VCN, i, regUVD_VCPU_CNTL), - UVD_VCPU_CNTL__BLK_RST_MASK, - ~UVD_VCPU_CNTL__BLK_RST_MASK); + dev_err(adev->dev, "VCN[%d] is not responding, trying to reset the VCPU!!!\n", i); + WREG32_P(SOC15_REG_OFFSET(VCN, i, regUVD_VCPU_CNTL), + UVD_VCPU_CNTL__BLK_RST_MASK, + ~UVD_VCPU_CNTL__BLK_RST_MASK); mdelay(10); WREG32_P(SOC15_REG_OFFSET(VCN, i, regUVD_VCPU_CNTL), 0, - ~UVD_VCPU_CNTL__BLK_RST_MASK); + ~UVD_VCPU_CNTL__BLK_RST_MASK); mdelay(10); r = -1; @@ -1180,78 +1060,43 @@ static int vcn_v4_0_start(struct amdgpu_device *adev) } if (r) { - dev_err(adev->dev, "VCN[%d] decode not responding, giving up!!!\n", i); + dev_err(adev->dev, "VCN[%d] is not responding, giving up!!!\n", i); return r; } /* enable master interrupt */ WREG32_P(SOC15_REG_OFFSET(VCN, i, regUVD_MASTINT_EN), - UVD_MASTINT_EN__VCPU_EN_MASK, - ~UVD_MASTINT_EN__VCPU_EN_MASK); + UVD_MASTINT_EN__VCPU_EN_MASK, + ~UVD_MASTINT_EN__VCPU_EN_MASK); /* clear the busy bit of VCN_STATUS */ WREG32_P(SOC15_REG_OFFSET(VCN, i, regUVD_STATUS), 0, - ~(2 << UVD_STATUS__VCPU_REPORT__SHIFT)); - - fw_shared = adev->vcn.inst[i].fw_shared.cpu_addr; - if (unifiedQ_enabled) { - ring = &adev->vcn.inst[i].ring_enc[0]; - fw_shared->sq.queue_mode |= FW_QUEUE_RING_RESET; - } else { - ring = &adev->vcn.inst[i].ring_dec; + ~(2 << UVD_STATUS__VCPU_REPORT__SHIFT)); - WREG32_SOC15(VCN, i, regVCN_RB4_DB_CTRL, - ring->doorbell_index << VCN_RB4_DB_CTRL__OFFSET__SHIFT | - VCN_RB4_DB_CTRL__EN_MASK); - - /* program the RB_BASE for ring buffer */ - WREG32_SOC15(VCN, i, regUVD_RB_BASE_LO4, - lower_32_bits(ring->gpu_addr)); - WREG32_SOC15(VCN, i, regUVD_RB_BASE_HI4, - upper_32_bits(ring->gpu_addr)); - - WREG32_SOC15(VCN, i, regUVD_RB_SIZE4, ring->ring_size / sizeof(uint32_t)); - - /* resetting ring, fw should not check RB ring */ - tmp = RREG32_SOC15(VCN, i, regVCN_RB_ENABLE); - tmp &= ~(VCN_RB_ENABLE__RB4_EN_MASK); - WREG32_SOC15(VCN, i, regVCN_RB_ENABLE, tmp); + ring = &adev->vcn.inst[i].ring_enc[0]; + WREG32_SOC15(VCN, i, regVCN_RB1_DB_CTRL, + ring->doorbell_index << VCN_RB1_DB_CTRL__OFFSET__SHIFT | + VCN_RB1_DB_CTRL__EN_MASK); - /* Initialize the ring buffer's read and write pointers */ - tmp = RREG32_SOC15(VCN, i, regUVD_RB_RPTR4); - WREG32_SOC15(VCN, i, regUVD_RB_WPTR4, tmp); - ring->wptr = RREG32_SOC15(VCN, i, regUVD_RB_WPTR4); + WREG32_SOC15(VCN, i, regUVD_RB_BASE_LO, ring->gpu_addr); + WREG32_SOC15(VCN, i, regUVD_RB_BASE_HI, upper_32_bits(ring->gpu_addr)); + WREG32_SOC15(VCN, i, regUVD_RB_SIZE, ring->ring_size / 4); - tmp = RREG32_SOC15(VCN, i, regVCN_RB_ENABLE); - tmp |= VCN_RB_ENABLE__RB4_EN_MASK; - WREG32_SOC15(VCN, i, regVCN_RB_ENABLE, tmp); + tmp = RREG32_SOC15(VCN, i, regVCN_RB_ENABLE); + tmp &= ~(VCN_RB_ENABLE__RB1_EN_MASK); + WREG32_SOC15(VCN, i, regVCN_RB_ENABLE, tmp); + fw_shared->sq.queue_mode |= FW_QUEUE_RING_RESET; + WREG32_SOC15(VCN, i, regUVD_RB_RPTR, 0); + WREG32_SOC15(VCN, i, regUVD_RB_WPTR, 0); - ring->wptr = RREG32_SOC15(VCN, i, regUVD_RB_RPTR4); - } - ring = &adev->vcn.inst[i].ring_enc[0]; - WREG32_SOC15(VCN, i, regVCN_RB1_DB_CTRL, - ring->doorbell_index << VCN_RB1_DB_CTRL__OFFSET__SHIFT | - VCN_RB1_DB_CTRL__EN_MASK); tmp = RREG32_SOC15(VCN, i, regUVD_RB_RPTR); WREG32_SOC15(VCN, i, regUVD_RB_WPTR, tmp); ring->wptr = RREG32_SOC15(VCN, i, regUVD_RB_WPTR); - WREG32_SOC15(VCN, i, regUVD_RB_BASE_LO, ring->gpu_addr); - WREG32_SOC15(VCN, i, regUVD_RB_BASE_HI, upper_32_bits(ring->gpu_addr)); - WREG32_SOC15(VCN, i, regUVD_RB_SIZE, ring->ring_size / 4); - if (unifiedQ_enabled) - fw_shared->sq.queue_mode &= ~(FW_QUEUE_RING_RESET | FW_QUEUE_DPG_HOLD_OFF); - else { - ring = &adev->vcn.inst[i].ring_enc[1]; - WREG32_SOC15(VCN, i, regVCN_RB2_DB_CTRL, - ring->doorbell_index << VCN_RB2_DB_CTRL__OFFSET__SHIFT | - VCN_RB2_DB_CTRL__EN_MASK); - tmp = RREG32_SOC15(VCN, i, regUVD_RB_RPTR2); - WREG32_SOC15(VCN, i, regUVD_RB_WPTR2, tmp); - ring->wptr = RREG32_SOC15(VCN, i, regUVD_RB_WPTR2); - WREG32_SOC15(VCN, i, regUVD_RB_BASE_LO2, ring->gpu_addr); - WREG32_SOC15(VCN, i, regUVD_RB_BASE_HI2, upper_32_bits(ring->gpu_addr)); - WREG32_SOC15(VCN, i, regUVD_RB_SIZE2, ring->ring_size / 4); - } + + tmp = RREG32_SOC15(VCN, i, regVCN_RB_ENABLE); + tmp |= VCN_RB_ENABLE__RB1_EN_MASK; + WREG32_SOC15(VCN, i, regVCN_RB_ENABLE, tmp); + fw_shared->sq.queue_mode &= ~(FW_QUEUE_RING_RESET | FW_QUEUE_DPG_HOLD_OFF); } return 0; @@ -1277,12 +1122,6 @@ static int vcn_v4_0_stop_dpg_mode(struct amdgpu_device *adev, int inst_idx) tmp = RREG32_SOC15(VCN, inst_idx, regUVD_RB_WPTR); SOC15_WAIT_ON_RREG(VCN, inst_idx, regUVD_RB_RPTR, tmp, 0xFFFFFFFF); - tmp = RREG32_SOC15(VCN, inst_idx, regUVD_RB_WPTR2); - SOC15_WAIT_ON_RREG(VCN, inst_idx, regUVD_RB_RPTR2, tmp, 0xFFFFFFFF); - - tmp = RREG32_SOC15(VCN, inst_idx, regUVD_RB_WPTR4); - SOC15_WAIT_ON_RREG(VCN, inst_idx, regUVD_RB_RPTR4, tmp, 0xFFFFFFFF); - SOC15_WAIT_ON_RREG(VCN, inst_idx, regUVD_POWER_STATUS, 1, UVD_POWER_STATUS__UVD_POWER_STATUS_MASK); @@ -1301,10 +1140,14 @@ static int vcn_v4_0_stop_dpg_mode(struct amdgpu_device *adev, int inst_idx) */ static int vcn_v4_0_stop(struct amdgpu_device *adev) { + volatile struct amdgpu_vcn4_fw_shared *fw_shared; uint32_t tmp; int i, r = 0; for (i = 0; i < adev->vcn.num_vcn_inst; ++i) { + fw_shared = adev->vcn.inst[i].fw_shared.cpu_addr; + fw_shared->sq.queue_mode |= FW_QUEUE_DPG_HOLD_OFF; + if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) { r = vcn_v4_0_stop_dpg_mode(adev, i); continue; @@ -1414,8 +1257,6 @@ static int vcn_v4_0_pause_dpg_mode(struct amdgpu_device *adev, int inst_idx, /* unpause dpg, no need to wait */ reg_data &= ~UVD_DPG_PAUSE__NJ_PAUSE_DPG_REQ_MASK; WREG32_SOC15(VCN, inst_idx, regUVD_DPG_PAUSE, reg_data); - SOC15_WAIT_ON_RREG(VCN, inst_idx, regUVD_POWER_STATUS, 0x1, - UVD_POWER_STATUS__UVD_POWER_STATUS_MASK); } adev->vcn.inst[inst_idx].pause_state.fw_based = new_state->fw_based; } @@ -1424,165 +1265,72 @@ static int vcn_v4_0_pause_dpg_mode(struct amdgpu_device *adev, int inst_idx, } /** - * vcn_v4_0_dec_ring_get_rptr - get read pointer + * vcn_v4_0_unified_ring_get_rptr - get unified read pointer * * @ring: amdgpu_ring pointer * - * Returns the current hardware read pointer + * Returns the current hardware unified read pointer */ -static uint64_t vcn_v4_0_dec_ring_get_rptr(struct amdgpu_ring *ring) +static uint64_t vcn_v4_0_unified_ring_get_rptr(struct amdgpu_ring *ring) { struct amdgpu_device *adev = ring->adev; - return RREG32_SOC15(VCN, ring->me, regUVD_RB_RPTR4); + if (ring != &adev->vcn.inst[ring->me].ring_enc[0]) + DRM_ERROR("wrong ring id is identified in %s", __func__); + + return RREG32_SOC15(VCN, ring->me, regUVD_RB_RPTR); } /** - * vcn_v4_0_dec_ring_get_wptr - get write pointer + * vcn_v4_0_unified_ring_get_wptr - get unified write pointer * * @ring: amdgpu_ring pointer * - * Returns the current hardware write pointer + * Returns the current hardware unified write pointer */ -static uint64_t vcn_v4_0_dec_ring_get_wptr(struct amdgpu_ring *ring) +static uint64_t vcn_v4_0_unified_ring_get_wptr(struct amdgpu_ring *ring) { struct amdgpu_device *adev = ring->adev; + if (ring != &adev->vcn.inst[ring->me].ring_enc[0]) + DRM_ERROR("wrong ring id is identified in %s", __func__); + if (ring->use_doorbell) return *ring->wptr_cpu_addr; else - return RREG32_SOC15(VCN, ring->me, regUVD_RB_WPTR4); + return RREG32_SOC15(VCN, ring->me, regUVD_RB_WPTR); } /** - * vcn_v4_0_dec_ring_set_wptr - set write pointer + * vcn_v4_0_unified_ring_set_wptr - set enc write pointer * * @ring: amdgpu_ring pointer * - * Commits the write pointer to the hardware + * Commits the enc write pointer to the hardware */ -static void vcn_v4_0_dec_ring_set_wptr(struct amdgpu_ring *ring) +static void vcn_v4_0_unified_ring_set_wptr(struct amdgpu_ring *ring) { struct amdgpu_device *adev = ring->adev; - if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) { - WREG32_SOC15(VCN, ring->me, regUVD_SCRATCH2, - lower_32_bits(ring->wptr)); - } + if (ring != &adev->vcn.inst[ring->me].ring_enc[0]) + DRM_ERROR("wrong ring id is identified in %s", __func__); if (ring->use_doorbell) { *ring->wptr_cpu_addr = lower_32_bits(ring->wptr); WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr)); } else { - WREG32_SOC15(VCN, ring->me, regUVD_RB_WPTR4, lower_32_bits(ring->wptr)); - } -} - -static const struct amdgpu_ring_funcs vcn_v4_0_dec_sw_ring_vm_funcs = { - .type = AMDGPU_RING_TYPE_VCN_DEC, - .align_mask = 0x3f, - .nop = VCN_DEC_SW_CMD_NO_OP, - .vmhub = AMDGPU_MMHUB_0, - .get_rptr = vcn_v4_0_dec_ring_get_rptr, - .get_wptr = vcn_v4_0_dec_ring_get_wptr, - .set_wptr = vcn_v4_0_dec_ring_set_wptr, - .emit_frame_size = - SOC15_FLUSH_GPU_TLB_NUM_WREG * 3 + - SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 4 + - VCN_SW_RING_EMIT_FRAME_SIZE, - .emit_ib_size = 5, /* vcn_dec_sw_ring_emit_ib */ - .emit_ib = vcn_dec_sw_ring_emit_ib, - .emit_fence = vcn_dec_sw_ring_emit_fence, - .emit_vm_flush = vcn_dec_sw_ring_emit_vm_flush, - .test_ring = amdgpu_vcn_dec_sw_ring_test_ring, - .test_ib = amdgpu_vcn_dec_sw_ring_test_ib, - .insert_nop = amdgpu_ring_insert_nop, - .insert_end = vcn_dec_sw_ring_insert_end, - .pad_ib = amdgpu_ring_generic_pad_ib, - .begin_use = amdgpu_vcn_ring_begin_use, - .end_use = amdgpu_vcn_ring_end_use, - .emit_wreg = vcn_dec_sw_ring_emit_wreg, - .emit_reg_wait = vcn_dec_sw_ring_emit_reg_wait, - .emit_reg_write_reg_wait = amdgpu_ring_emit_reg_write_reg_wait_helper, -}; - -/** - * vcn_v4_0_enc_ring_get_rptr - get enc read pointer - * - * @ring: amdgpu_ring pointer - * - * Returns the current hardware enc read pointer - */ -static uint64_t vcn_v4_0_enc_ring_get_rptr(struct amdgpu_ring *ring) -{ - struct amdgpu_device *adev = ring->adev; - - if (ring == &adev->vcn.inst[ring->me].ring_enc[0]) - return RREG32_SOC15(VCN, ring->me, regUVD_RB_RPTR); - else - return RREG32_SOC15(VCN, ring->me, regUVD_RB_RPTR2); -} - -/** - * vcn_v4_0_enc_ring_get_wptr - get enc write pointer - * - * @ring: amdgpu_ring pointer - * - * Returns the current hardware enc write pointer - */ -static uint64_t vcn_v4_0_enc_ring_get_wptr(struct amdgpu_ring *ring) -{ - struct amdgpu_device *adev = ring->adev; - - if (ring == &adev->vcn.inst[ring->me].ring_enc[0]) { - if (ring->use_doorbell) - return *ring->wptr_cpu_addr; - else - return RREG32_SOC15(VCN, ring->me, regUVD_RB_WPTR); - } else { - if (ring->use_doorbell) - return *ring->wptr_cpu_addr; - else - return RREG32_SOC15(VCN, ring->me, regUVD_RB_WPTR2); - } -} - -/** - * vcn_v4_0_enc_ring_set_wptr - set enc write pointer - * - * @ring: amdgpu_ring pointer - * - * Commits the enc write pointer to the hardware - */ -static void vcn_v4_0_enc_ring_set_wptr(struct amdgpu_ring *ring) -{ - struct amdgpu_device *adev = ring->adev; - - if (ring == &adev->vcn.inst[ring->me].ring_enc[0]) { - if (ring->use_doorbell) { - *ring->wptr_cpu_addr = lower_32_bits(ring->wptr); - WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr)); - } else { - WREG32_SOC15(VCN, ring->me, regUVD_RB_WPTR, lower_32_bits(ring->wptr)); - } - } else { - if (ring->use_doorbell) { - *ring->wptr_cpu_addr = lower_32_bits(ring->wptr); - WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr)); - } else { - WREG32_SOC15(VCN, ring->me, regUVD_RB_WPTR2, lower_32_bits(ring->wptr)); - } + WREG32_SOC15(VCN, ring->me, regUVD_RB_WPTR, lower_32_bits(ring->wptr)); } } -static const struct amdgpu_ring_funcs vcn_v4_0_enc_ring_vm_funcs = { +static const struct amdgpu_ring_funcs vcn_v4_0_unified_ring_vm_funcs = { .type = AMDGPU_RING_TYPE_VCN_ENC, .align_mask = 0x3f, .nop = VCN_ENC_CMD_NO_OP, .vmhub = AMDGPU_MMHUB_0, - .get_rptr = vcn_v4_0_enc_ring_get_rptr, - .get_wptr = vcn_v4_0_enc_ring_get_wptr, - .set_wptr = vcn_v4_0_enc_ring_set_wptr, + .get_rptr = vcn_v4_0_unified_ring_get_rptr, + .get_wptr = vcn_v4_0_unified_ring_get_wptr, + .set_wptr = vcn_v4_0_unified_ring_set_wptr, .emit_frame_size = SOC15_FLUSH_GPU_TLB_NUM_WREG * 3 + SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 4 + @@ -1594,7 +1342,7 @@ static const struct amdgpu_ring_funcs vcn_v4_0_enc_ring_vm_funcs = { .emit_fence = vcn_v2_0_enc_ring_emit_fence, .emit_vm_flush = vcn_v2_0_enc_ring_emit_vm_flush, .test_ring = amdgpu_vcn_enc_ring_test_ring, - .test_ib = amdgpu_vcn_enc_ring_test_ib, + .test_ib = amdgpu_vcn_unified_ring_test_ib, .insert_nop = amdgpu_ring_insert_nop, .insert_end = vcn_v2_0_enc_ring_insert_end, .pad_ib = amdgpu_ring_generic_pad_ib, @@ -1606,13 +1354,13 @@ static const struct amdgpu_ring_funcs vcn_v4_0_enc_ring_vm_funcs = { }; /** - * vcn_v4_0_set_dec_ring_funcs - set dec ring functions + * vcn_v4_0_set_unified_ring_funcs - set unified ring functions * * @adev: amdgpu_device pointer * - * Set decode ring functions + * Set unified ring functions */ -static void vcn_v4_0_set_dec_ring_funcs(struct amdgpu_device *adev) +static void vcn_v4_0_set_unified_ring_funcs(struct amdgpu_device *adev) { int i; @@ -1620,32 +1368,10 @@ static void vcn_v4_0_set_dec_ring_funcs(struct amdgpu_device *adev) if (adev->vcn.harvest_config & (1 << i)) continue; - adev->vcn.inst[i].ring_dec.funcs = &vcn_v4_0_dec_sw_ring_vm_funcs; - adev->vcn.inst[i].ring_dec.me = i; - DRM_INFO("VCN(%d) decode software ring is enabled in VM mode\n", i); - } -} - -/** - * vcn_v4_0_set_enc_ring_funcs - set enc ring functions - * - * @adev: amdgpu_device pointer - * - * Set encode ring functions - */ -static void vcn_v4_0_set_enc_ring_funcs(struct amdgpu_device *adev) -{ - int i, j; - - for (i = 0; i < adev->vcn.num_vcn_inst; ++i) { - if (adev->vcn.harvest_config & (1 << i)) - continue; + adev->vcn.inst[i].ring_enc[0].funcs = &vcn_v4_0_unified_ring_vm_funcs; + adev->vcn.inst[i].ring_enc[0].me = i; - for (j = 0; j < adev->vcn.num_enc_rings; ++j) { - adev->vcn.inst[i].ring_enc[j].funcs = &vcn_v4_0_enc_ring_vm_funcs; - adev->vcn.inst[i].ring_enc[j].me = i; - } - DRM_INFO("VCN(%d) encode is enabled in VM mode\n", i); + DRM_INFO("VCN(%d) encode/decode are enabled in VM mode\n", i); } } @@ -1798,18 +1524,9 @@ static int vcn_v4_0_process_interrupt(struct amdgpu_device *adev, struct amdgpu_ DRM_DEBUG("IH: VCN TRAP\n"); switch (entry->src_id) { - case VCN_4_0__SRCID__UVD_TRAP: - if (!unifiedQ_enabled) { - amdgpu_fence_process(&adev->vcn.inst[ip_instance].ring_dec); - break; - } - break; case VCN_4_0__SRCID__UVD_ENC_GENERAL_PURPOSE: amdgpu_fence_process(&adev->vcn.inst[ip_instance].ring_enc[0]); break; - case VCN_4_0__SRCID__UVD_ENC_LOW_LATENCY: - amdgpu_fence_process(&adev->vcn.inst[ip_instance].ring_enc[1]); - break; default: DRM_ERROR("Unhandled interrupt: %d %d\n", entry->src_id, entry->src_data[0]); |