diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2018-02-01 17:48:47 -0800 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2018-02-01 17:48:47 -0800 |
commit | 4bf772b14675411a69b3c807f73006de0fe4b649 (patch) | |
tree | b841e3ba0e3429695589cb0ab73871fa12f42c38 /drivers/gpu/drm/amd/amdgpu | |
parent | 3879ae653a3e98380fe2daf653338830b7ca0097 (diff) | |
parent | 24b8ef699e8221d2b7f813adaab13eec053e1507 (diff) |
Merge tag 'drm-for-v4.16' of git://people.freedesktop.org/~airlied/linux
Pull drm updates from Dave Airlie:
"This seems to have been a comparatively quieter merge window, I assume
due to holidays etc. The "biggest" change is AMD header cleanups, which
merge/remove a bunch of them. The AMD gpu scheduler is now being made generic
with the etnaviv driver wanting to reuse the code, hopefully other drivers
can go in the same direction.
Otherwise it's the usual lots of stuff in i915/amdgpu, not so much stuff
elsewhere.
Core:
- Add .last_close and .output_poll_changed helpers to reduce driver footprints
- Fix plane clipping
- Improved debug printing support
- Add panel orientation property
- Update edid derived properties at edid setting
- Reduction in fbdev driver footprint
- Move amdgpu scheduler into core for other drivers to use.
i915:
- Selftest and IGT improvements
- Fast boot prep work on IPS, pipe config
- HW workarounds for Cannonlake, Geminilake
- Cannonlake clock and HDMI2.0 fixes
- GPU cache invalidation and context switch improvements
- Display planes cleanup
- New PMU interface for perf queries
- New firmware support for KBL/SKL
- Geminilake HW workaround for perforamce
- Coffeelake stolen memory improvements
- GPU reset robustness work
- Cannonlake horizontal plane flipping
- GVT work
amdgpu/radeon:
- RV and Vega header file cleanups (lots of lines gone!)
- TTM operation context support
- 48-bit GPUVM support for Vega/RV
- ECC support for Vega
- Resizeable BAR support
- Multi-display sync support
- Enable swapout for reserved BOs during allocation
- S3 fixes on Raven
- GPU reset cleanup and fixes
- 2+1 level GPU page table
amdkfd:
- GFX7/8 SDMA user queues support
- Hardware scheduling for multiple processes
- dGPU prep work
rcar:
- Added R8A7743/5 support
- System suspend/resume support
sun4i:
- Multi-plane support for YUV formats
- A83T and LVDS support
msm:
- Devfreq support for GPU
tegra:
- Prep work for adding Tegra186 support
- Tegra186 HDMI support
- HDMI2.0 and zpos support by using generic helpers
tilcdc:
- Misc fixes
omapdrm:
- Support memory bandwidth limits
- DSI command mode panel cleanups
- DMM error handling
exynos:
- drop the old IPP subdriver.
etnaviv:
- Occlusion query fixes
- Job handling fixes
- Prep work for hooking in gpu scheduler
armada:
- Move closer to atomic modesetting
- Allow disabling primary plane if overlay is full screen
imx:
- Format modifier support
- Add tile prefetch to PRE
- Runtime PM support for PRG
ast:
- fix LUT loading"
* tag 'drm-for-v4.16' of git://people.freedesktop.org/~airlied/linux: (1471 commits)
drm/ast: Load lut in crtc_commit
drm: Check for lessee in DROP_MASTER ioctl
drm: fix gpu scheduler link order
drm/amd/display: Demote error print to debug print when ATOM impl missing
dma-buf: fix reservation_object_wait_timeout_rcu once more v2
drm/amdgpu: Avoid leaking PM domain on driver unbind (v2)
drm/amd/amdgpu: Add Polaris version check
drm/amdgpu: Reenable manual GPU reset from sysfs
drm/amdgpu: disable MMHUB power gating on raven
drm/ttm: Don't unreserve swapped BOs that were previously reserved
drm/ttm: Don't add swapped BOs to swap-LRU list
drm/amdgpu: only check for ECC on Vega10
drm/amd/powerplay: Fix smu_table_entry.handle type
drm/ttm: add VADDR_FLAG_UPDATED_COUNT to correctly update dma_page global count
drm: Fix PANEL_ORIENTATION_QUIRKS breaking the Kconfig DRM menuconfig
drm/radeon: fill in rb backend map on evergreen/ni.
drm/amdgpu/gfx9: fix ngg enablement to clear gds reserved memory (v2)
drm/ttm: only free pages rather than update global memory count together
drm/amdgpu: fix CPU based VM updates
drm/amdgpu: fix typo in amdgpu_vce_validate_bo
...
Diffstat (limited to 'drivers/gpu/drm/amd/amdgpu')
118 files changed, 5948 insertions, 4847 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/Makefile b/drivers/gpu/drm/amd/amdgpu/Makefile index 90202cf4cd1e..d6e5b7273853 100644 --- a/drivers/gpu/drm/amd/amdgpu/Makefile +++ b/drivers/gpu/drm/amd/amdgpu/Makefile @@ -52,7 +52,8 @@ amdgpu-y += amdgpu_device.o amdgpu_kms.o \ amdgpu_prime.o amdgpu_vm.o amdgpu_ib.o amdgpu_pll.o \ amdgpu_ucode.o amdgpu_bo_list.o amdgpu_ctx.o amdgpu_sync.o \ amdgpu_gtt_mgr.o amdgpu_vram_mgr.o amdgpu_virt.o amdgpu_atomfirmware.o \ - amdgpu_queue_mgr.o amdgpu_vf_error.o amdgpu_sched.o + amdgpu_queue_mgr.o amdgpu_vf_error.o amdgpu_sched.o amdgpu_debugfs.o \ + amdgpu_ids.o # add asic specific block amdgpu-$(CONFIG_DRM_AMDGPU_CIK)+= cik.o cik_ih.o kv_smc.o kv_dpm.o \ @@ -62,7 +63,7 @@ amdgpu-$(CONFIG_DRM_AMDGPU_CIK)+= cik.o cik_ih.o kv_smc.o kv_dpm.o \ amdgpu-$(CONFIG_DRM_AMDGPU_SI)+= si.o gmc_v6_0.o gfx_v6_0.o si_ih.o si_dma.o dce_v6_0.o si_dpm.o si_smc.o amdgpu-y += \ - vi.o mxgpu_vi.o nbio_v6_1.o soc15.o mxgpu_ai.o nbio_v7_0.o + vi.o mxgpu_vi.o nbio_v6_1.o soc15.o mxgpu_ai.o nbio_v7_0.o vega10_reg_init.o # add GMC block amdgpu-y += \ @@ -135,10 +136,7 @@ amdgpu-y += \ amdgpu-y += amdgpu_cgs.o # GPU scheduler -amdgpu-y += \ - ../scheduler/gpu_scheduler.o \ - ../scheduler/sched_fence.o \ - amdgpu_job.o +amdgpu-y += amdgpu_job.o # ACP componet ifneq ($(CONFIG_DRM_AMD_ACP),) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index 0b14b5373783..d5a2eefd6c3e 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -45,8 +45,11 @@ #include <drm/drmP.h> #include <drm/drm_gem.h> #include <drm/amdgpu_drm.h> +#include <drm/gpu_scheduler.h> #include <kgd_kfd_interface.h> +#include "dm_pp_interface.h" +#include "kgd_pp_interface.h" #include "amd_shared.h" #include "amdgpu_mode.h" @@ -59,7 +62,6 @@ #include "amdgpu_sync.h" #include "amdgpu_ring.h" #include "amdgpu_vm.h" -#include "amd_powerplay.h" #include "amdgpu_dpm.h" #include "amdgpu_acp.h" #include "amdgpu_uvd.h" @@ -67,10 +69,9 @@ #include "amdgpu_vcn.h" #include "amdgpu_mn.h" #include "amdgpu_dm.h" - -#include "gpu_scheduler.h" #include "amdgpu_virt.h" #include "amdgpu_gart.h" +#include "amdgpu_debugfs.h" /* * Modules parameters. @@ -125,6 +126,7 @@ extern int amdgpu_param_buf_per_se; extern int amdgpu_job_hang_limit; extern int amdgpu_lbpw; extern int amdgpu_compute_multipipe; +extern int amdgpu_gpu_recovery; #ifdef CONFIG_DRM_AMDGPU_SI extern int amdgpu_si_support; @@ -177,6 +179,10 @@ extern int amdgpu_cik_support; #define CIK_CURSOR_WIDTH 128 #define CIK_CURSOR_HEIGHT 128 +/* GPU RESET flags */ +#define AMDGPU_RESET_INFO_VRAM_LOST (1 << 0) +#define AMDGPU_RESET_INFO_FULLRESET (1 << 1) + struct amdgpu_device; struct amdgpu_ib; struct amdgpu_cs_parser; @@ -218,17 +224,18 @@ enum amdgpu_kiq_irq { AMDGPU_CP_KIQ_IRQ_LAST }; -int amdgpu_set_clockgating_state(struct amdgpu_device *adev, - enum amd_ip_block_type block_type, - enum amd_clockgating_state state); -int amdgpu_set_powergating_state(struct amdgpu_device *adev, - enum amd_ip_block_type block_type, - enum amd_powergating_state state); -void amdgpu_get_clockgating_state(struct amdgpu_device *adev, u32 *flags); -int amdgpu_wait_for_idle(struct amdgpu_device *adev, - enum amd_ip_block_type block_type); -bool amdgpu_is_idle(struct amdgpu_device *adev, - enum amd_ip_block_type block_type); +int amdgpu_device_ip_set_clockgating_state(struct amdgpu_device *adev, + enum amd_ip_block_type block_type, + enum amd_clockgating_state state); +int amdgpu_device_ip_set_powergating_state(struct amdgpu_device *adev, + enum amd_ip_block_type block_type, + enum amd_powergating_state state); +void amdgpu_device_ip_get_clockgating_state(struct amdgpu_device *adev, + u32 *flags); +int amdgpu_device_ip_wait_for_idle(struct amdgpu_device *adev, + enum amd_ip_block_type block_type); +bool amdgpu_device_ip_is_idle(struct amdgpu_device *adev, + enum amd_ip_block_type block_type); #define AMDGPU_MAX_IP_NUM 16 @@ -253,15 +260,16 @@ struct amdgpu_ip_block { const struct amdgpu_ip_block_version *version; }; -int amdgpu_ip_block_version_cmp(struct amdgpu_device *adev, - enum amd_ip_block_type type, - u32 major, u32 minor); +int amdgpu_device_ip_block_version_cmp(struct amdgpu_device *adev, + enum amd_ip_block_type type, + u32 major, u32 minor); -struct amdgpu_ip_block * amdgpu_get_ip_block(struct amdgpu_device *adev, - enum amd_ip_block_type type); +struct amdgpu_ip_block * +amdgpu_device_ip_get_ip_block(struct amdgpu_device *adev, + enum amd_ip_block_type type); -int amdgpu_ip_block_add(struct amdgpu_device *adev, - const struct amdgpu_ip_block_version *ip_block_version); +int amdgpu_device_ip_block_add(struct amdgpu_device *adev, + const struct amdgpu_ip_block_version *ip_block_version); /* provided by hw blocks that can move/clear data. e.g., gfx or sdma */ struct amdgpu_buffer_funcs { @@ -341,8 +349,9 @@ struct amdgpu_gart_funcs { uint64_t (*get_vm_pte_flags)(struct amdgpu_device *adev, uint32_t flags); /* get the pde for a given mc addr */ - u64 (*get_vm_pde)(struct amdgpu_device *adev, u64 addr); - uint32_t (*get_invalidate_req)(unsigned int vm_id); + void (*get_vm_pde)(struct amdgpu_device *adev, int level, + u64 *dst, u64 *flags); + uint32_t (*get_invalidate_req)(unsigned int vmid); }; /* provided by the ih block */ @@ -368,9 +377,6 @@ struct amdgpu_dummy_page { struct page *page; dma_addr_t addr; }; -int amdgpu_dummy_page_init(struct amdgpu_device *adev); -void amdgpu_dummy_page_fini(struct amdgpu_device *adev); - /* * Clocks @@ -418,7 +424,6 @@ struct reservation_object *amdgpu_gem_prime_res_obj(struct drm_gem_object *); void *amdgpu_gem_prime_vmap(struct drm_gem_object *obj); void amdgpu_gem_prime_vunmap(struct drm_gem_object *obj, void *vaddr); int amdgpu_gem_prime_mmap(struct drm_gem_object *obj, struct vm_area_struct *vma); -int amdgpu_gem_debugfs_init(struct amdgpu_device *adev); /* sub-allocation manager, it has to be protected by another lock. * By conception this is an helper for other part of the driver @@ -535,6 +540,7 @@ struct amdgpu_mc { u64 private_aperture_end; /* protects concurrent invalidation */ spinlock_t invalidate_lock; + bool translate_further; }; /* @@ -645,12 +651,6 @@ typedef enum _AMDGPU_DOORBELL64_ASSIGNMENT AMDGPU_DOORBELL64_INVALID = 0xFFFF } AMDGPU_DOORBELL64_ASSIGNMENT; - -void amdgpu_doorbell_get_kfd_info(struct amdgpu_device *adev, - phys_addr_t *aperture_base, - size_t *aperture_size, - size_t *start_offset); - /* * IRQS. */ @@ -684,7 +684,7 @@ struct amdgpu_ib { uint32_t flags; }; -extern const struct amd_sched_backend_ops amdgpu_sched_ops; +extern const struct drm_sched_backend_ops amdgpu_sched_ops; int amdgpu_job_alloc(struct amdgpu_device *adev, unsigned num_ibs, struct amdgpu_job **job, struct amdgpu_vm *vm); @@ -694,7 +694,7 @@ int amdgpu_job_alloc_with_ib(struct amdgpu_device *adev, unsigned size, void amdgpu_job_free_resources(struct amdgpu_job *job); void amdgpu_job_free(struct amdgpu_job *job); int amdgpu_job_submit(struct amdgpu_job *job, struct amdgpu_ring *ring, - struct amd_sched_entity *entity, void *owner, + struct drm_sched_entity *entity, void *owner, struct dma_fence **f); /* @@ -727,7 +727,7 @@ int amdgpu_queue_mgr_map(struct amdgpu_device *adev, struct amdgpu_ctx_ring { uint64_t sequence; struct dma_fence **fences; - struct amd_sched_entity entity; + struct drm_sched_entity entity; }; struct amdgpu_ctx { @@ -735,14 +735,16 @@ struct amdgpu_ctx { struct amdgpu_device *adev; struct amdgpu_queue_mgr queue_mgr; unsigned reset_counter; + unsigned reset_counter_query; uint32_t vram_lost_counter; spinlock_t ring_lock; struct dma_fence **fences; struct amdgpu_ctx_ring rings[AMDGPU_MAX_RINGS]; bool preamble_presented; - enum amd_sched_priority init_priority; - enum amd_sched_priority override_priority; + enum drm_sched_priority init_priority; + enum drm_sched_priority override_priority; struct mutex lock; + atomic_t guilty; }; struct amdgpu_ctx_mgr { @@ -760,7 +762,7 @@ int amdgpu_ctx_add_fence(struct amdgpu_ctx *ctx, struct amdgpu_ring *ring, struct dma_fence *amdgpu_ctx_get_fence(struct amdgpu_ctx *ctx, struct amdgpu_ring *ring, uint64_t seq); void amdgpu_ctx_priority_override(struct amdgpu_ctx *ctx, - enum amd_sched_priority priority); + enum drm_sched_priority priority); int amdgpu_ctx_ioctl(struct drm_device *dev, void *data, struct drm_file *filp); @@ -957,6 +959,7 @@ struct amdgpu_gfx_config { }; struct amdgpu_cu_info { + uint32_t simd_per_cu; uint32_t max_waves_per_simd; uint32_t wave_front_size; uint32_t max_scratch_slots_per_cu; @@ -1109,12 +1112,11 @@ struct amdgpu_cs_parser { #define AMDGPU_HAVE_CTX_SWITCH (1 << 2) /* bit set means context switch occured */ struct amdgpu_job { - struct amd_sched_job base; + struct drm_sched_job base; struct amdgpu_device *adev; struct amdgpu_vm *vm; struct amdgpu_ring *ring; struct amdgpu_sync sync; - struct amdgpu_sync dep_sync; struct amdgpu_sync sched_sync; struct amdgpu_ib *ibs; struct dma_fence *fence; /* the hw fence */ @@ -1123,7 +1125,7 @@ struct amdgpu_job { void *owner; uint64_t fence_ctx; /* the fence_context this job uses */ bool vm_needs_flush; - unsigned vm_id; + unsigned vmid; uint64_t vm_pd_addr; uint32_t gds_base, gds_size; uint32_t gws_base, gws_size; @@ -1164,10 +1166,10 @@ struct amdgpu_wb { unsigned long used[DIV_ROUND_UP(AMDGPU_MAX_WB, BITS_PER_LONG)]; }; -int amdgpu_wb_get(struct amdgpu_device *adev, u32 *wb); -void amdgpu_wb_free(struct amdgpu_device *adev, u32 wb); +int amdgpu_device_wb_get(struct amdgpu_device *adev, u32 *wb); +void amdgpu_device_wb_free(struct amdgpu_device *adev, u32 wb); -void amdgpu_get_pcie_info(struct amdgpu_device *adev); +void amdgpu_device_get_pcie_info(struct amdgpu_device *adev); /* * SDMA @@ -1232,24 +1234,6 @@ void amdgpu_benchmark(struct amdgpu_device *adev, int test_number); */ void amdgpu_test_moves(struct amdgpu_device *adev); -/* - * Debugfs - */ -struct amdgpu_debugfs { - const struct drm_info_list *files; - unsigned num_files; -}; - -int amdgpu_debugfs_add_files(struct amdgpu_device *adev, - const struct drm_info_list *files, - unsigned nfiles); -int amdgpu_debugfs_fence_init(struct amdgpu_device *adev); - -#if defined(CONFIG_DEBUG_FS) -int amdgpu_debugfs_init(struct drm_minor *minor); -#endif - -int amdgpu_debugfs_firmware_init(struct amdgpu_device *adev); /* * amdgpu smumgr functions @@ -1404,8 +1388,6 @@ struct amdgpu_fw_vram_usage { void *va; }; -int amdgpu_fw_reserve_vram_init(struct amdgpu_device *adev); - /* * CGS */ @@ -1421,6 +1403,87 @@ typedef void (*amdgpu_wreg_t)(struct amdgpu_device*, uint32_t, uint32_t); typedef uint32_t (*amdgpu_block_rreg_t)(struct amdgpu_device*, uint32_t, uint32_t); typedef void (*amdgpu_block_wreg_t)(struct amdgpu_device*, uint32_t, uint32_t, uint32_t); + +/* + * amdgpu nbio functions + * + */ +struct nbio_hdp_flush_reg { + u32 ref_and_mask_cp0; + u32 ref_and_mask_cp1; + u32 ref_and_mask_cp2; + u32 ref_and_mask_cp3; + u32 ref_and_mask_cp4; + u32 ref_and_mask_cp5; + u32 ref_and_mask_cp6; + u32 ref_and_mask_cp7; + u32 ref_and_mask_cp8; + u32 ref_and_mask_cp9; + u32 ref_and_mask_sdma0; + u32 ref_and_mask_sdma1; +}; + +struct amdgpu_nbio_funcs { + const struct nbio_hdp_flush_reg *hdp_flush_reg; + u32 (*get_hdp_flush_req_offset)(struct amdgpu_device *adev); + u32 (*get_hdp_flush_done_offset)(struct amdgpu_device *adev); + u32 (*get_pcie_index_offset)(struct amdgpu_device *adev); + u32 (*get_pcie_data_offset)(struct amdgpu_device *adev); + u32 (*get_rev_id)(struct amdgpu_device *adev); + void (*mc_access_enable)(struct amdgpu_device *adev, bool enable); + void (*hdp_flush)(struct amdgpu_device *adev); + u32 (*get_memsize)(struct amdgpu_device *adev); + void (*sdma_doorbell_range)(struct amdgpu_device *adev, int instance, + bool use_doorbell, int doorbell_index); + void (*enable_doorbell_aperture)(struct amdgpu_device *adev, + bool enable); + void (*enable_doorbell_selfring_aperture)(struct amdgpu_device *adev, + bool enable); + void (*ih_doorbell_range)(struct amdgpu_device *adev, + bool use_doorbell, int doorbell_index); + void (*update_medium_grain_clock_gating)(struct amdgpu_device *adev, + bool enable); + void (*update_medium_grain_light_sleep)(struct amdgpu_device *adev, + bool enable); + void (*get_clockgating_state)(struct amdgpu_device *adev, + u32 *flags); + void (*ih_control)(struct amdgpu_device *adev); + void (*init_registers)(struct amdgpu_device *adev); + void (*detect_hw_virt)(struct amdgpu_device *adev); +}; + + +/* Define the HW IP blocks will be used in driver , add more if necessary */ +enum amd_hw_ip_block_type { + GC_HWIP = 1, + HDP_HWIP, + SDMA0_HWIP, + SDMA1_HWIP, + MMHUB_HWIP, + ATHUB_HWIP, + NBIO_HWIP, + MP0_HWIP, + UVD_HWIP, + VCN_HWIP = UVD_HWIP, + VCE_HWIP, + DF_HWIP, + DCE_HWIP, + OSSSYS_HWIP, + SMUIO_HWIP, + PWR_HWIP, + NBIF_HWIP, + MAX_HWIP +}; + +#define HWIP_MAX_INSTANCE 6 + +struct amd_powerplay { + struct cgs_device *cgs_device; + void *pp_handle; + const struct amd_ip_funcs *ip_funcs; + const struct amd_pm_funcs *pp_funcs; +}; + #define AMDGPU_RESET_MAGIC_NUM 64 struct amdgpu_device { struct device *dev; @@ -1606,6 +1669,11 @@ struct amdgpu_device { /* amdkfd interface */ struct kfd_dev *kfd; + /* soc15 register offset based on ip, instance and segment */ + uint32_t *reg_offset[MAX_HWIP][HWIP_MAX_INSTANCE]; + + const struct amdgpu_nbio_funcs *nbio_funcs; + /* delayed work_func for deferring clockgating during resume */ struct delayed_work late_init_work; @@ -1616,9 +1684,6 @@ struct amdgpu_device { /* link all shadow bo */ struct list_head shadow_list; struct mutex shadow_list_lock; - /* link all gtt */ - spinlock_t gtt_list_lock; - struct list_head gtt_list; /* keep an lru list of rings by HW IP */ struct list_head ring_lru_list; spinlock_t ring_lru_list_lock; @@ -1629,7 +1694,8 @@ struct amdgpu_device { /* record last mm index being written through WREG32*/ unsigned long last_mm_index; - bool in_sriov_reset; + bool in_gpu_reset; + struct mutex lock_reset; }; static inline struct amdgpu_device *amdgpu_ttm_adev(struct ttm_bo_device *bdev) @@ -1773,7 +1839,7 @@ amdgpu_get_sdma_instance(struct amdgpu_ring *ring) #define amdgpu_asic_get_config_memsize(adev) (adev)->asic_funcs->get_config_memsize((adev)) #define amdgpu_gart_flush_gpu_tlb(adev, vmid) (adev)->gart.gart_funcs->flush_gpu_tlb((adev), (vmid)) #define amdgpu_gart_set_pte_pde(adev, pt, idx, addr, flags) (adev)->gart.gart_funcs->set_pte_pde((adev), (pt), (idx), (addr), (flags)) -#define amdgpu_gart_get_vm_pde(adev, addr) (adev)->gart.gart_funcs->get_vm_pde((adev), (addr)) +#define amdgpu_gart_get_vm_pde(adev, level, dst, flags) (adev)->gart.gart_funcs->get_vm_pde((adev), (level), (dst), (flags)) #define amdgpu_vm_copy_pte(adev, ib, pe, src, count) ((adev)->vm_manager.vm_pte_funcs->copy_pte((ib), (pe), (src), (count))) #define amdgpu_vm_write_pte(adev, ib, pe, value, count, incr) ((adev)->vm_manager.vm_pte_funcs->write_pte((ib), (pe), (value), (count), (incr))) #define amdgpu_vm_set_pte_pde(adev, ib, pe, addr, count, incr, flags) ((adev)->vm_manager.vm_pte_funcs->set_pte_pde((ib), (pe), (addr), (count), (incr), (flags))) @@ -1784,7 +1850,7 @@ amdgpu_get_sdma_instance(struct amdgpu_ring *ring) #define amdgpu_ring_get_rptr(r) (r)->funcs->get_rptr((r)) #define amdgpu_ring_get_wptr(r) (r)->funcs->get_wptr((r)) #define amdgpu_ring_set_wptr(r) (r)->funcs->set_wptr((r)) -#define amdgpu_ring_emit_ib(r, ib, vm_id, c) (r)->funcs->emit_ib((r), (ib), (vm_id), (c)) +#define amdgpu_ring_emit_ib(r, ib, vmid, c) (r)->funcs->emit_ib((r), (ib), (vmid), (c)) #define amdgpu_ring_emit_pipeline_sync(r) (r)->funcs->emit_pipeline_sync((r)) #define amdgpu_ring_emit_vm_flush(r, vmid, addr) (r)->funcs->emit_vm_flush((r), (vmid), (addr)) #define amdgpu_ring_emit_fence(r, addr, seq, flags) (r)->funcs->emit_fence((r), (addr), (seq), (flags)) @@ -1823,22 +1889,25 @@ amdgpu_get_sdma_instance(struct amdgpu_ring *ring) #define amdgpu_psp_check_fw_loading_status(adev, i) (adev)->firmware.funcs->check_fw_loading_status((adev), (i)) /* Common functions */ -int amdgpu_gpu_reset(struct amdgpu_device *adev); -bool amdgpu_need_backup(struct amdgpu_device *adev); -void amdgpu_pci_config_reset(struct amdgpu_device *adev); -bool amdgpu_need_post(struct amdgpu_device *adev); +int amdgpu_device_gpu_recover(struct amdgpu_device *adev, + struct amdgpu_job* job, bool force); +void amdgpu_device_pci_config_reset(struct amdgpu_device *adev); +bool amdgpu_device_need_post(struct amdgpu_device *adev); void amdgpu_update_display_priority(struct amdgpu_device *adev); void amdgpu_cs_report_moved_bytes(struct amdgpu_device *adev, u64 num_bytes, u64 num_vis_bytes); void amdgpu_ttm_placement_from_domain(struct amdgpu_bo *abo, u32 domain); bool amdgpu_ttm_bo_is_amdgpu_bo(struct ttm_buffer_object *bo); -void amdgpu_vram_location(struct amdgpu_device *adev, struct amdgpu_mc *mc, u64 base); -void amdgpu_gart_location(struct amdgpu_device *adev, struct amdgpu_mc *mc); +void amdgpu_device_vram_location(struct amdgpu_device *adev, + struct amdgpu_mc *mc, u64 base); +void amdgpu_device_gart_location(struct amdgpu_device *adev, + struct amdgpu_mc *mc); +int amdgpu_device_resize_fb_bar(struct amdgpu_device *adev); void amdgpu_ttm_set_active_vram_size(struct amdgpu_device *adev, u64 size); int amdgpu_ttm_init(struct amdgpu_device *adev); void amdgpu_ttm_fini(struct amdgpu_device *adev); -void amdgpu_program_register_sequence(struct amdgpu_device *adev, +void amdgpu_device_program_register_sequence(struct amdgpu_device *adev, const u32 *registers, const u32 array_size); @@ -1872,7 +1941,7 @@ void amdgpu_driver_lastclose_kms(struct drm_device *dev); int amdgpu_driver_open_kms(struct drm_device *dev, struct drm_file *file_priv); void amdgpu_driver_postclose_kms(struct drm_device *dev, struct drm_file *file_priv); -int amdgpu_suspend(struct amdgpu_device *adev); +int amdgpu_device_ip_suspend(struct amdgpu_device *adev); int amdgpu_device_suspend(struct drm_device *dev, bool suspend, bool fbcon); int amdgpu_device_resume(struct drm_device *dev, bool resume, bool fbcon); u32 amdgpu_get_vblank_counter_kms(struct drm_device *dev, unsigned int pipe); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_acp.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_acp.c index c04f44a90392..a29362f9ef41 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_acp.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_acp.c @@ -277,7 +277,7 @@ static int acp_hw_init(void *handle) struct amdgpu_device *adev = (struct amdgpu_device *)handle; const struct amdgpu_ip_block *ip_block = - amdgpu_get_ip_block(adev, AMD_IP_BLOCK_TYPE_ACP); + amdgpu_device_ip_get_ip_block(adev, AMD_IP_BLOCK_TYPE_ACP); if (!ip_block) return -EINVAL; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c index 5432af39a674..1d605e1c1d66 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c @@ -85,7 +85,7 @@ void amdgpu_amdkfd_device_probe(struct amdgpu_device *adev) kfd2kgd = amdgpu_amdkfd_gfx_8_0_get_functions(); break; default: - dev_info(adev->dev, "kfd not supported on this ASIC\n"); + dev_dbg(adev->dev, "kfd not supported on this ASIC\n"); return; } @@ -93,6 +93,39 @@ void amdgpu_amdkfd_device_probe(struct amdgpu_device *adev) adev->pdev, kfd2kgd); } +/** + * amdgpu_doorbell_get_kfd_info - Report doorbell configuration required to + * setup amdkfd + * + * @adev: amdgpu_device pointer + * @aperture_base: output returning doorbell aperture base physical address + * @aperture_size: output returning doorbell aperture size in bytes + * @start_offset: output returning # of doorbell bytes reserved for amdgpu. + * + * amdgpu and amdkfd share the doorbell aperture. amdgpu sets it up, + * takes doorbells required for its own rings and reports the setup to amdkfd. + * amdgpu reserved doorbells are at the start of the doorbell aperture. + */ +static void amdgpu_doorbell_get_kfd_info(struct amdgpu_device *adev, + phys_addr_t *aperture_base, + size_t *aperture_size, + size_t *start_offset) +{ + /* + * The first num_doorbells are used by amdgpu. + * amdkfd takes whatever's left in the aperture. + */ + if (adev->doorbell.size > adev->doorbell.num_doorbells * sizeof(u32)) { + *aperture_base = adev->doorbell.base; + *aperture_size = adev->doorbell.size; + *start_offset = adev->doorbell.num_doorbells * sizeof(u32); + } else { + *aperture_base = 0; + *aperture_size = 0; + *start_offset = 0; + } +} + void amdgpu_amdkfd_device_init(struct amdgpu_device *adev) { int i; @@ -242,14 +275,34 @@ void free_gtt_mem(struct kgd_dev *kgd, void *mem_obj) kfree(mem); } -uint64_t get_vmem_size(struct kgd_dev *kgd) +void get_local_mem_info(struct kgd_dev *kgd, + struct kfd_local_mem_info *mem_info) { - struct amdgpu_device *adev = - (struct amdgpu_device *)kgd; + struct amdgpu_device *adev = (struct amdgpu_device *)kgd; + uint64_t address_mask = adev->dev->dma_mask ? ~*adev->dev->dma_mask : + ~((1ULL << 32) - 1); + resource_size_t aper_limit = adev->mc.aper_base + adev->mc.aper_size; + + memset(mem_info, 0, sizeof(*mem_info)); + if (!(adev->mc.aper_base & address_mask || aper_limit & address_mask)) { + mem_info->local_mem_size_public = adev->mc.visible_vram_size; + mem_info->local_mem_size_private = adev->mc.real_vram_size - + adev->mc.visible_vram_size; + } else { + mem_info->local_mem_size_public = 0; + mem_info->local_mem_size_private = adev->mc.real_vram_size; + } + mem_info->vram_width = adev->mc.vram_width; - BUG_ON(kgd == NULL); + pr_debug("Address base: %pap limit %pap public 0x%llx private 0x%llx\n", + &adev->mc.aper_base, &aper_limit, + mem_info->local_mem_size_public, + mem_info->local_mem_size_private); - return adev->mc.real_vram_size; + if (amdgpu_sriov_vf(adev)) + mem_info->mem_clk_max = adev->clock.default_mclk / 100; + else + mem_info->mem_clk_max = amdgpu_dpm_get_mclk(adev, false) / 100; } uint64_t get_gpu_clock_counter(struct kgd_dev *kgd) @@ -265,6 +318,39 @@ uint32_t get_max_engine_clock_in_mhz(struct kgd_dev *kgd) { struct amdgpu_device *adev = (struct amdgpu_device *)kgd; - /* The sclk is in quantas of 10kHz */ - return adev->pm.dpm.dyn_state.max_clock_voltage_on_ac.sclk / 100; + /* the sclk is in quantas of 10kHz */ + if (amdgpu_sriov_vf(adev)) + return adev->clock.default_sclk / 100; + + return amdgpu_dpm_get_sclk(adev, false) / 100; +} + +void get_cu_info(struct kgd_dev *kgd, struct kfd_cu_info *cu_info) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)kgd; + struct amdgpu_cu_info acu_info = adev->gfx.cu_info; + + memset(cu_info, 0, sizeof(*cu_info)); + if (sizeof(cu_info->cu_bitmap) != sizeof(acu_info.bitmap)) + return; + + cu_info->cu_active_number = acu_info.number; + cu_info->cu_ao_mask = acu_info.ao_cu_mask; + memcpy(&cu_info->cu_bitmap[0], &acu_info.bitmap[0], + sizeof(acu_info.bitmap)); + cu_info->num_shader_engines = adev->gfx.config.max_shader_engines; + cu_info->num_shader_arrays_per_engine = adev->gfx.config.max_sh_per_se; + cu_info->num_cu_per_sh = adev->gfx.config.max_cu_per_sh; + cu_info->simd_per_cu = acu_info.simd_per_cu; + cu_info->max_waves_per_simd = acu_info.max_waves_per_simd; + cu_info->wave_front_size = acu_info.wave_front_size; + cu_info->max_scratch_slots_per_cu = acu_info.max_scratch_slots_per_cu; + cu_info->lds_size = acu_info.lds_size; +} + +uint64_t amdgpu_amdkfd_get_vram_usage(struct kgd_dev *kgd) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)kgd; + + return amdgpu_vram_mgr_usage(&adev->mman.bdev.man[TTM_PL_VRAM]); } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h index 8d689ab7e429..2a519f9062ee 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h @@ -56,10 +56,13 @@ int alloc_gtt_mem(struct kgd_dev *kgd, size_t size, void **mem_obj, uint64_t *gpu_addr, void **cpu_ptr); void free_gtt_mem(struct kgd_dev *kgd, void *mem_obj); -uint64_t get_vmem_size(struct kgd_dev *kgd); +void get_local_mem_info(struct kgd_dev *kgd, + struct kfd_local_mem_info *mem_info); uint64_t get_gpu_clock_counter(struct kgd_dev *kgd); uint32_t get_max_engine_clock_in_mhz(struct kgd_dev *kgd); +void get_cu_info(struct kgd_dev *kgd, struct kfd_cu_info *cu_info); +uint64_t amdgpu_amdkfd_get_vram_usage(struct kgd_dev *kgd); #define read_user_wptr(mmptr, wptr, dst) \ ({ \ diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c index 1e3e9be7d77e..a9e6aea0e5f8 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c @@ -105,7 +105,14 @@ static int kgd_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id, uint32_t queue_id, uint32_t __user *wptr, uint32_t wptr_shift, uint32_t wptr_mask, struct mm_struct *mm); -static int kgd_hqd_sdma_load(struct kgd_dev *kgd, void *mqd); +static int kgd_hqd_dump(struct kgd_dev *kgd, + uint32_t pipe_id, uint32_t queue_id, + uint32_t (**dump)[2], uint32_t *n_regs); +static int kgd_hqd_sdma_load(struct kgd_dev *kgd, void *mqd, + uint32_t __user *wptr, struct mm_struct *mm); +static int kgd_hqd_sdma_dump(struct kgd_dev *kgd, + uint32_t engine_id, uint32_t queue_id, + uint32_t (**dump)[2], uint32_t *n_regs); static bool kgd_hqd_is_occupied(struct kgd_dev *kgd, uint64_t queue_address, uint32_t pipe_id, uint32_t queue_id); @@ -166,17 +173,19 @@ static int get_tile_config(struct kgd_dev *kgd, static const struct kfd2kgd_calls kfd2kgd = { .init_gtt_mem_allocation = alloc_gtt_mem, .free_gtt_mem = free_gtt_mem, - .get_vmem_size = get_vmem_size, + .get_local_mem_info = get_local_mem_info, .get_gpu_clock_counter = get_gpu_clock_counter, .get_max_engine_clock_in_mhz = get_max_engine_clock_in_mhz, - .alloc_pasid = amdgpu_vm_alloc_pasid, - .free_pasid = amdgpu_vm_free_pasid, + .alloc_pasid = amdgpu_pasid_alloc, + .free_pasid = amdgpu_pasid_free, .program_sh_mem_settings = kgd_program_sh_mem_settings, .set_pasid_vmid_mapping = kgd_set_pasid_vmid_mapping, .init_pipeline = kgd_init_pipeline, .init_interrupts = kgd_init_interrupts, .hqd_load = kgd_hqd_load, .hqd_sdma_load = kgd_hqd_sdma_load, + .hqd_dump = kgd_hqd_dump, + .hqd_sdma_dump = kgd_hqd_sdma_dump, .hqd_is_occupied = kgd_hqd_is_occupied, .hqd_sdma_is_occupied = kgd_hqd_sdma_is_occupied, .hqd_destroy = kgd_hqd_destroy, @@ -191,6 +200,8 @@ static const struct kfd2kgd_calls kfd2kgd = { .get_fw_version = get_fw_version, .set_scratch_backing_va = set_scratch_backing_va, .get_tile_config = get_tile_config, + .get_cu_info = get_cu_info, + .get_vram_usage = amdgpu_amdkfd_get_vram_usage }; struct kfd2kgd_calls *amdgpu_amdkfd_gfx_7_get_functions(void) @@ -375,7 +386,44 @@ static int kgd_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id, return 0; } -static int kgd_hqd_sdma_load(struct kgd_dev *kgd, void *mqd) +static int kgd_hqd_dump(struct kgd_dev *kgd, + uint32_t pipe_id, uint32_t queue_id, + uint32_t (**dump)[2], uint32_t *n_regs) +{ + struct amdgpu_device *adev = get_amdgpu_device(kgd); + uint32_t i = 0, reg; +#define HQD_N_REGS (35+4) +#define DUMP_REG(addr) do { \ + if (WARN_ON_ONCE(i >= HQD_N_REGS)) \ + break; \ + (*dump)[i][0] = (addr) << 2; \ + (*dump)[i++][1] = RREG32(addr); \ + } while (0) + + *dump = kmalloc(HQD_N_REGS*2*sizeof(uint32_t), GFP_KERNEL); + if (*dump == NULL) + return -ENOMEM; + + acquire_queue(kgd, pipe_id, queue_id); + + DUMP_REG(mmCOMPUTE_STATIC_THREAD_MGMT_SE0); + DUMP_REG(mmCOMPUTE_STATIC_THREAD_MGMT_SE1); + DUMP_REG(mmCOMPUTE_STATIC_THREAD_MGMT_SE2); + DUMP_REG(mmCOMPUTE_STATIC_THREAD_MGMT_SE3); + + for (reg = mmCP_MQD_BASE_ADDR; reg <= mmCP_MQD_CONTROL; reg++) + DUMP_REG(reg); + + release_queue(kgd); + + WARN_ON_ONCE(i != HQD_N_REGS); + *n_regs = i; + + return 0; +} + +static int kgd_hqd_sdma_load(struct kgd_dev *kgd, void *mqd, + uint32_t __user *wptr, struct mm_struct *mm) { struct amdgpu_device *adev = get_amdgpu_device(kgd); struct cik_sdma_rlc_registers *m; @@ -410,10 +458,17 @@ static int kgd_hqd_sdma_load(struct kgd_dev *kgd, void *mqd) WREG32(mmSDMA0_GFX_CONTEXT_CNTL, data); } - WREG32(sdma_base_addr + mmSDMA0_RLC0_DOORBELL, - m->sdma_rlc_doorbell); - WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_RPTR, 0); - WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_WPTR, 0); + data = REG_SET_FIELD(m->sdma_rlc_doorbell, SDMA0_RLC0_DOORBELL, + ENABLE, 1); + WREG32(sdma_base_addr + mmSDMA0_RLC0_DOORBELL, data); + WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_RPTR, m->sdma_rlc_rb_rptr); + + if (read_user_wptr(mm, wptr, data)) + WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_WPTR, data); + else + WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_WPTR, + m->sdma_rlc_rb_rptr); + WREG32(sdma_base_addr + mmSDMA0_RLC0_VIRTUAL_ADDR, m->sdma_rlc_virtual_addr); WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_BASE, m->sdma_rlc_rb_base); @@ -423,8 +478,37 @@ static int kgd_hqd_sdma_load(struct kgd_dev *kgd, void *mqd) m->sdma_rlc_rb_rptr_addr_lo); WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_RPTR_ADDR_HI, m->sdma_rlc_rb_rptr_addr_hi); - WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL, - m->sdma_rlc_rb_cntl); + + data = REG_SET_FIELD(m->sdma_rlc_rb_cntl, SDMA0_RLC0_RB_CNTL, + RB_ENABLE, 1); + WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL, data); + + return 0; +} + +static int kgd_hqd_sdma_dump(struct kgd_dev *kgd, + uint32_t engine_id, uint32_t queue_id, + uint32_t (**dump)[2], uint32_t *n_regs) +{ + struct amdgpu_device *adev = get_amdgpu_device(kgd); + uint32_t sdma_offset = engine_id * SDMA1_REGISTER_OFFSET + + queue_id * KFD_CIK_SDMA_QUEUE_OFFSET; + uint32_t i = 0, reg; +#undef HQD_N_REGS +#define HQD_N_REGS (19+4) + + *dump = kmalloc(HQD_N_REGS*2*sizeof(uint32_t), GFP_KERNEL); + if (*dump == NULL) + return -ENOMEM; + + for (reg = mmSDMA0_RLC0_RB_CNTL; reg <= mmSDMA0_RLC0_DOORBELL; reg++) + DUMP_REG(sdma_offset + reg); + for (reg = mmSDMA0_RLC0_VIRTUAL_ADDR; reg <= mmSDMA0_RLC0_WATERMARK; + reg++) + DUMP_REG(sdma_offset + reg); + + WARN_ON_ONCE(i != HQD_N_REGS); + *n_regs = i; return 0; } @@ -575,7 +659,7 @@ static int kgd_hqd_sdma_destroy(struct kgd_dev *kgd, void *mqd, struct cik_sdma_rlc_registers *m; uint32_t sdma_base_addr; uint32_t temp; - int timeout = utimeout; + unsigned long end_jiffies = (utimeout * HZ / 1000) + jiffies; m = get_sdma_mqd(mqd); sdma_base_addr = get_sdma_base_addr(m); @@ -588,10 +672,9 @@ static int kgd_hqd_sdma_destroy(struct kgd_dev *kgd, void *mqd, temp = RREG32(sdma_base_addr + mmSDMA0_RLC0_CONTEXT_STATUS); if (temp & SDMA0_STATUS_REG__RB_CMD_IDLE__SHIFT) break; - if (timeout <= 0) + if (time_after(jiffies, end_jiffies)) return -ETIME; - msleep(20); - timeout -= 20; + usleep_range(500, 1000); } WREG32(sdma_base_addr + mmSDMA0_RLC0_DOORBELL, 0); @@ -599,6 +682,8 @@ static int kgd_hqd_sdma_destroy(struct kgd_dev *kgd, void *mqd, RREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL) | SDMA0_RLC0_RB_CNTL__RB_ENABLE_MASK); + m->sdma_rlc_rb_rptr = RREG32(sdma_base_addr + mmSDMA0_RLC0_RB_RPTR); + return 0; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c index 056929b8ccd0..b127259d7d85 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c @@ -45,7 +45,7 @@ enum hqd_dequeue_request_type { RESET_WAVES }; -struct cik_sdma_rlc_registers; +struct vi_sdma_mqd; /* * Register access functions @@ -64,7 +64,14 @@ static int kgd_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id, uint32_t queue_id, uint32_t __user *wptr, uint32_t wptr_shift, uint32_t wptr_mask, struct mm_struct *mm); -static int kgd_hqd_sdma_load(struct kgd_dev *kgd, void *mqd); +static int kgd_hqd_dump(struct kgd_dev *kgd, + uint32_t pipe_id, uint32_t queue_id, + uint32_t (**dump)[2], uint32_t *n_regs); +static int kgd_hqd_sdma_load(struct kgd_dev *kgd, void *mqd, + uint32_t __user *wptr, struct mm_struct *mm); +static int kgd_hqd_sdma_dump(struct kgd_dev *kgd, + uint32_t engine_id, uint32_t queue_id, + uint32_t (**dump)[2], uint32_t *n_regs); static bool kgd_hqd_is_occupied(struct kgd_dev *kgd, uint64_t queue_address, uint32_t pipe_id, uint32_t queue_id); static bool kgd_hqd_sdma_is_occupied(struct kgd_dev *kgd, void *mqd); @@ -125,17 +132,19 @@ static int get_tile_config(struct kgd_dev *kgd, static const struct kfd2kgd_calls kfd2kgd = { .init_gtt_mem_allocation = alloc_gtt_mem, .free_gtt_mem = free_gtt_mem, - .get_vmem_size = get_vmem_size, + .get_local_mem_info = get_local_mem_info, .get_gpu_clock_counter = get_gpu_clock_counter, .get_max_engine_clock_in_mhz = get_max_engine_clock_in_mhz, - .alloc_pasid = amdgpu_vm_alloc_pasid, - .free_pasid = amdgpu_vm_free_pasid, + .alloc_pasid = amdgpu_pasid_alloc, + .free_pasid = amdgpu_pasid_free, .program_sh_mem_settings = kgd_program_sh_mem_settings, .set_pasid_vmid_mapping = kgd_set_pasid_vmid_mapping, .init_pipeline = kgd_init_pipeline, .init_interrupts = kgd_init_interrupts, .hqd_load = kgd_hqd_load, .hqd_sdma_load = kgd_hqd_sdma_load, + .hqd_dump = kgd_hqd_dump, + .hqd_sdma_dump = kgd_hqd_sdma_dump, .hqd_is_occupied = kgd_hqd_is_occupied, .hqd_sdma_is_occupied = kgd_hqd_sdma_is_occupied, .hqd_destroy = kgd_hqd_destroy, @@ -152,6 +161,8 @@ static const struct kfd2kgd_calls kfd2kgd = { .get_fw_version = get_fw_version, .set_scratch_backing_va = set_scratch_backing_va, .get_tile_config = get_tile_config, + .get_cu_info = get_cu_info, + .get_vram_usage = amdgpu_amdkfd_get_vram_usage }; struct kfd2kgd_calls *amdgpu_amdkfd_gfx_8_0_get_functions(void) @@ -268,9 +279,15 @@ static int kgd_init_interrupts(struct kgd_dev *kgd, uint32_t pipe_id) return 0; } -static inline uint32_t get_sdma_base_addr(struct cik_sdma_rlc_registers *m) +static inline uint32_t get_sdma_base_addr(struct vi_sdma_mqd *m) { - return 0; + uint32_t retval; + + retval = m->sdma_engine_id * SDMA1_REGISTER_OFFSET + + m->sdma_queue_id * KFD_VI_SDMA_QUEUE_OFFSET; + pr_debug("kfd: sdma base address: 0x%x\n", retval); + + return retval; } static inline struct vi_mqd *get_mqd(void *mqd) @@ -278,9 +295,9 @@ static inline struct vi_mqd *get_mqd(void *mqd) return (struct vi_mqd *)mqd; } -static inline struct cik_sdma_rlc_registers *get_sdma_mqd(void *mqd) +static inline struct vi_sdma_mqd *get_sdma_mqd(void *mqd) { - return (struct cik_sdma_rlc_registers *)mqd; + return (struct vi_sdma_mqd *)mqd; } static int kgd_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id, @@ -358,8 +375,138 @@ static int kgd_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id, return 0; } -static int kgd_hqd_sdma_load(struct kgd_dev *kgd, void *mqd) +static int kgd_hqd_dump(struct kgd_dev *kgd, + uint32_t pipe_id, uint32_t queue_id, + uint32_t (**dump)[2], uint32_t *n_regs) { + struct amdgpu_device *adev = get_amdgpu_device(kgd); + uint32_t i = 0, reg; +#define HQD_N_REGS (54+4) +#define DUMP_REG(addr) do { \ + if (WARN_ON_ONCE(i >= HQD_N_REGS)) \ + break; \ + (*dump)[i][0] = (addr) << 2; \ + (*dump)[i++][1] = RREG32(addr); \ + } while (0) + + *dump = kmalloc(HQD_N_REGS*2*sizeof(uint32_t), GFP_KERNEL); + if (*dump == NULL) + return -ENOMEM; + + acquire_queue(kgd, pipe_id, queue_id); + + DUMP_REG(mmCOMPUTE_STATIC_THREAD_MGMT_SE0); + DUMP_REG(mmCOMPUTE_STATIC_THREAD_MGMT_SE1); + DUMP_REG(mmCOMPUTE_STATIC_THREAD_MGMT_SE2); + DUMP_REG(mmCOMPUTE_STATIC_THREAD_MGMT_SE3); + + for (reg = mmCP_MQD_BASE_ADDR; reg <= mmCP_HQD_EOP_DONES; reg++) + DUMP_REG(reg); + + release_queue(kgd); + + WARN_ON_ONCE(i != HQD_N_REGS); + *n_regs = i; + + return 0; +} + +static int kgd_hqd_sdma_load(struct kgd_dev *kgd, void *mqd, + uint32_t __user *wptr, struct mm_struct *mm) +{ + struct amdgpu_device *adev = get_amdgpu_device(kgd); + struct vi_sdma_mqd *m; + unsigned long end_jiffies; + uint32_t sdma_base_addr; + uint32_t data; + + m = get_sdma_mqd(mqd); + sdma_base_addr = get_sdma_base_addr(m); + WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL, + m->sdmax_rlcx_rb_cntl & (~SDMA0_RLC0_RB_CNTL__RB_ENABLE_MASK)); + + end_jiffies = msecs_to_jiffies(2000) + jiffies; + while (true) { + data = RREG32(sdma_base_addr + mmSDMA0_RLC0_CONTEXT_STATUS); + if (data & SDMA0_RLC0_CONTEXT_STATUS__IDLE_MASK) + break; + if (time_after(jiffies, end_jiffies)) + return -ETIME; + usleep_range(500, 1000); + } + if (m->sdma_engine_id) { + data = RREG32(mmSDMA1_GFX_CONTEXT_CNTL); + data = REG_SET_FIELD(data, SDMA1_GFX_CONTEXT_CNTL, + RESUME_CTX, 0); + WREG32(mmSDMA1_GFX_CONTEXT_CNTL, data); + } else { + data = RREG32(mmSDMA0_GFX_CONTEXT_CNTL); + data = REG_SET_FIELD(data, SDMA0_GFX_CONTEXT_CNTL, + RESUME_CTX, 0); + WREG32(mmSDMA0_GFX_CONTEXT_CNTL, data); + } + + data = REG_SET_FIELD(m->sdmax_rlcx_doorbell, SDMA0_RLC0_DOORBELL, + ENABLE, 1); + WREG32(sdma_base_addr + mmSDMA0_RLC0_DOORBELL, data); + WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_RPTR, m->sdmax_rlcx_rb_rptr); + + if (read_user_wptr(mm, wptr, data)) + WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_WPTR, data); + else + WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_WPTR, + m->sdmax_rlcx_rb_rptr); + + WREG32(sdma_base_addr + mmSDMA0_RLC0_VIRTUAL_ADDR, + m->sdmax_rlcx_virtual_addr); + WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_BASE, m->sdmax_rlcx_rb_base); + WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_BASE_HI, + m->sdmax_rlcx_rb_base_hi); + WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_RPTR_ADDR_LO, + m->sdmax_rlcx_rb_rptr_addr_lo); + WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_RPTR_ADDR_HI, + m->sdmax_rlcx_rb_rptr_addr_hi); + + data = REG_SET_FIELD(m->sdmax_rlcx_rb_cntl, SDMA0_RLC0_RB_CNTL, + RB_ENABLE, 1); + WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL, data); + + return 0; +} + +static int kgd_hqd_sdma_dump(struct kgd_dev *kgd, + uint32_t engine_id, uint32_t queue_id, + uint32_t (**dump)[2], uint32_t *n_regs) +{ + struct amdgpu_device *adev = get_amdgpu_device(kgd); + uint32_t sdma_offset = engine_id * SDMA1_REGISTER_OFFSET + + queue_id * KFD_VI_SDMA_QUEUE_OFFSET; + uint32_t i = 0, reg; +#undef HQD_N_REGS +#define HQD_N_REGS (19+4+2+3+7) + + *dump = kmalloc(HQD_N_REGS*2*sizeof(uint32_t), GFP_KERNEL); + if (*dump == NULL) + return -ENOMEM; + + for (reg = mmSDMA0_RLC0_RB_CNTL; reg <= mmSDMA0_RLC0_DOORBELL; reg++) + DUMP_REG(sdma_offset + reg); + for (reg = mmSDMA0_RLC0_VIRTUAL_ADDR; reg <= mmSDMA0_RLC0_WATERMARK; + reg++) + DUMP_REG(sdma_offset + reg); + for (reg = mmSDMA0_RLC0_CSA_ADDR_LO; reg <= mmSDMA0_RLC0_CSA_ADDR_HI; + reg++) + DUMP_REG(sdma_offset + reg); + for (reg = mmSDMA0_RLC0_IB_SUB_REMAIN; reg <= mmSDMA0_RLC0_DUMMY_REG; + reg++) + DUMP_REG(sdma_offset + reg); + for (reg = mmSDMA0_RLC0_MIDCMD_DATA0; reg <= mmSDMA0_RLC0_MIDCMD_CNTL; + reg++) + DUMP_REG(sdma_offset + reg); + + WARN_ON_ONCE(i != HQD_N_REGS); + *n_regs = i; + return 0; } @@ -388,7 +535,7 @@ static bool kgd_hqd_is_occupied(struct kgd_dev *kgd, uint64_t queue_address, static bool kgd_hqd_sdma_is_occupied(struct kgd_dev *kgd, void *mqd) { struct amdgpu_device *adev = get_amdgpu_device(kgd); - struct cik_sdma_rlc_registers *m; + struct vi_sdma_mqd *m; uint32_t sdma_base_addr; uint32_t sdma_rlc_rb_cntl; @@ -509,10 +656,10 @@ static int kgd_hqd_sdma_destroy(struct kgd_dev *kgd, void *mqd, unsigned int utimeout) { struct amdgpu_device *adev = get_amdgpu_device(kgd); - struct cik_sdma_rlc_registers *m; + struct vi_sdma_mqd *m; uint32_t sdma_base_addr; uint32_t temp; - int timeout = utimeout; + unsigned long end_jiffies = (utimeout * HZ / 1000) + jiffies; m = get_sdma_mqd(mqd); sdma_base_addr = get_sdma_base_addr(m); @@ -523,18 +670,19 @@ static int kgd_hqd_sdma_destroy(struct kgd_dev *kgd, void *mqd, while (true) { temp = RREG32(sdma_base_addr + mmSDMA0_RLC0_CONTEXT_STATUS); - if (temp & SDMA0_STATUS_REG__RB_CMD_IDLE__SHIFT) + if (temp & SDMA0_RLC0_CONTEXT_STATUS__IDLE_MASK) break; - if (timeout <= 0) + if (time_after(jiffies, end_jiffies)) return -ETIME; - msleep(20); - timeout -= 20; + usleep_range(500, 1000); } WREG32(sdma_base_addr + mmSDMA0_RLC0_DOORBELL, 0); - WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_RPTR, 0); - WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_WPTR, 0); - WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_BASE, 0); + WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL, + RREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL) | + SDMA0_RLC0_RB_CNTL__RB_ENABLE_MASK); + + m->sdmax_rlcx_rb_rptr = RREG32(sdma_base_addr + mmSDMA0_RLC0_RB_RPTR); return 0; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.c index f450b69323fa..bf872f694f50 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.c @@ -27,6 +27,7 @@ #include <drm/amdgpu_drm.h> #include "amdgpu.h" #include "amdgpu_atombios.h" +#include "amdgpu_atomfirmware.h" #include "amdgpu_i2c.h" #include "atom.h" @@ -690,12 +691,12 @@ int amdgpu_atombios_get_clock_info(struct amdgpu_device *adev) le32_to_cpu(firmware_info->info_21.ulDefaultDispEngineClkFreq); /* set a reasonable default for DP */ if (adev->clock.default_dispclk < 53900) { - DRM_INFO("Changing default dispclk from %dMhz to 600Mhz\n", - adev->clock.default_dispclk / 100); + DRM_DEBUG("Changing default dispclk from %dMhz to 600Mhz\n", + adev->clock.default_dispclk / 100); adev->clock.default_dispclk = 60000; } else if (adev->clock.default_dispclk <= 60000) { - DRM_INFO("Changing default dispclk from %dMhz to 625Mhz\n", - adev->clock.default_dispclk / 100); + DRM_DEBUG("Changing default dispclk from %dMhz to 625Mhz\n", + adev->clock.default_dispclk / 100); adev->clock.default_dispclk = 62500; } adev->clock.dp_extclk = @@ -1699,7 +1700,7 @@ void amdgpu_atombios_scratch_regs_lock(struct amdgpu_device *adev, bool lock) WREG32(adev->bios_scratch_reg_offset + 6, bios_6_scratch); } -void amdgpu_atombios_scratch_regs_init(struct amdgpu_device *adev) +static void amdgpu_atombios_scratch_regs_init(struct amdgpu_device *adev) { uint32_t bios_2_scratch, bios_6_scratch; @@ -1721,28 +1722,6 @@ void amdgpu_atombios_scratch_regs_init(struct amdgpu_device *adev) WREG32(adev->bios_scratch_reg_offset + 6, bios_6_scratch); } -void amdgpu_atombios_scratch_regs_save(struct amdgpu_device *adev) -{ - int i; - - for (i = 0; i < AMDGPU_BIOS_NUM_SCRATCH; i++) - adev->bios_scratch[i] = RREG32(adev->bios_scratch_reg_offset + i); -} - -void amdgpu_atombios_scratch_regs_restore(struct amdgpu_device *adev) -{ - int i; - - /* - * VBIOS will check ASIC_INIT_COMPLETE bit to decide if - * execute ASIC_Init posting via driver - */ - adev->bios_scratch[7] &= ~ATOM_S7_ASIC_INIT_COMPLETE_MASK; - - for (i = 0; i < AMDGPU_BIOS_NUM_SCRATCH; i++) - WREG32(adev->bios_scratch_reg_offset + i, adev->bios_scratch[i]); -} - void amdgpu_atombios_scratch_regs_engine_hung(struct amdgpu_device *adev, bool hung) { @@ -1798,7 +1777,7 @@ void amdgpu_atombios_copy_swap(u8 *dst, u8 *src, u8 num_bytes, bool to_le) #endif } -int amdgpu_atombios_allocate_fb_scratch(struct amdgpu_device *adev) +static int amdgpu_atombios_allocate_fb_scratch(struct amdgpu_device *adev) { struct atom_context *ctx = adev->mode_info.atom_context; int index = GetIndexIntoMasterTable(DATA, VRAM_UsageByFirmware); @@ -1841,3 +1820,234 @@ int amdgpu_atombios_allocate_fb_scratch(struct amdgpu_device *adev) ctx->scratch_size_bytes = usage_bytes; return 0; } + +/* ATOM accessor methods */ +/* + * ATOM is an interpreted byte code stored in tables in the vbios. The + * driver registers callbacks to access registers and the interpreter + * in the driver parses the tables and executes then to program specific + * actions (set display modes, asic init, etc.). See amdgpu_atombios.c, + * atombios.h, and atom.c + */ + +/** + * cail_pll_read - read PLL register + * + * @info: atom card_info pointer + * @reg: PLL register offset + * + * Provides a PLL register accessor for the atom interpreter (r4xx+). + * Returns the value of the PLL register. + */ +static uint32_t cail_pll_read(struct card_info *info, uint32_t reg) +{ + return 0; +} + +/** + * cail_pll_write - write PLL register + * + * @info: atom card_info pointer + * @reg: PLL register offset + * @val: value to write to the pll register + * + * Provides a PLL register accessor for the atom interpreter (r4xx+). + */ +static void cail_pll_write(struct card_info *info, uint32_t reg, uint32_t val) +{ + +} + +/** + * cail_mc_read - read MC (Memory Controller) register + * + * @info: atom card_info pointer + * @reg: MC register offset + * + * Provides an MC register accessor for the atom interpreter (r4xx+). + * Returns the value of the MC register. + */ +static uint32_t cail_mc_read(struct card_info *info, uint32_t reg) +{ + return 0; +} + +/** + * cail_mc_write - write MC (Memory Controller) register + * + * @info: atom card_info pointer + * @reg: MC register offset + * @val: value to write to the pll register + * + * Provides a MC register accessor for the atom interpreter (r4xx+). + */ +static void cail_mc_write(struct card_info *info, uint32_t reg, uint32_t val) +{ + +} + +/** + * cail_reg_write - write MMIO register + * + * @info: atom card_info pointer + * @reg: MMIO register offset + * @val: value to write to the pll register + * + * Provides a MMIO register accessor for the atom interpreter (r4xx+). + */ +static void cail_reg_write(struct card_info *info, uint32_t reg, uint32_t val) +{ + struct amdgpu_device *adev = info->dev->dev_private; + + WREG32(reg, val); +} + +/** + * cail_reg_read - read MMIO register + * + * @info: atom card_info pointer + * @reg: MMIO register offset + * + * Provides an MMIO register accessor for the atom interpreter (r4xx+). + * Returns the value of the MMIO register. + */ +static uint32_t cail_reg_read(struct card_info *info, uint32_t reg) +{ + struct amdgpu_device *adev = info->dev->dev_private; + uint32_t r; + + r = RREG32(reg); + return r; +} + +/** + * cail_ioreg_write - write IO register + * + * @info: atom card_info pointer + * @reg: IO register offset + * @val: value to write to the pll register + * + * Provides a IO register accessor for the atom interpreter (r4xx+). + */ +static void cail_ioreg_write(struct card_info *info, uint32_t reg, uint32_t val) +{ + struct amdgpu_device *adev = info->dev->dev_private; + + WREG32_IO(reg, val); +} + +/** + * cail_ioreg_read - read IO register + * + * @info: atom card_info pointer + * @reg: IO register offset + * + * Provides an IO register accessor for the atom interpreter (r4xx+). + * Returns the value of the IO register. + */ +static uint32_t cail_ioreg_read(struct card_info *info, uint32_t reg) +{ + struct amdgpu_device *adev = info->dev->dev_private; + uint32_t r; + + r = RREG32_IO(reg); + return r; +} + +static ssize_t amdgpu_atombios_get_vbios_version(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + struct drm_device *ddev = dev_get_drvdata(dev); + struct amdgpu_device *adev = ddev->dev_private; + struct atom_context *ctx = adev->mode_info.atom_context; + + return snprintf(buf, PAGE_SIZE, "%s\n", ctx->vbios_version); +} + +static DEVICE_ATTR(vbios_version, 0444, amdgpu_atombios_get_vbios_version, + NULL); + +/** + * amdgpu_atombios_fini - free the driver info and callbacks for atombios + * + * @adev: amdgpu_device pointer + * + * Frees the driver info and register access callbacks for the ATOM + * interpreter (r4xx+). + * Called at driver shutdown. + */ +void amdgpu_atombios_fini(struct amdgpu_device *adev) +{ + if (adev->mode_info.atom_context) { + kfree(adev->mode_info.atom_context->scratch); + kfree(adev->mode_info.atom_context->iio); + } + kfree(adev->mode_info.atom_context); + adev->mode_info.atom_context = NULL; + kfree(adev->mode_info.atom_card_info); + adev->mode_info.atom_card_info = NULL; + device_remove_file(adev->dev, &dev_attr_vbios_version); +} + +/** + * amdgpu_atombios_init - init the driver info and callbacks for atombios + * + * @adev: amdgpu_device pointer + * + * Initializes the driver info and register access callbacks for the + * ATOM interpreter (r4xx+). + * Returns 0 on sucess, -ENOMEM on failure. + * Called at driver startup. + */ +int amdgpu_atombios_init(struct amdgpu_device *adev) +{ + struct card_info *atom_card_info = + kzalloc(sizeof(struct card_info), GFP_KERNEL); + int ret; + + if (!atom_card_info) + return -ENOMEM; + + adev->mode_info.atom_card_info = atom_card_info; + atom_card_info->dev = adev->ddev; + atom_card_info->reg_read = cail_reg_read; + atom_card_info->reg_write = cail_reg_write; + /* needed for iio ops */ + if (adev->rio_mem) { + atom_card_info->ioreg_read = cail_ioreg_read; + atom_card_info->ioreg_write = cail_ioreg_write; + } else { + DRM_DEBUG("PCI I/O BAR is not found. Using MMIO to access ATOM BIOS\n"); + atom_card_info->ioreg_read = cail_reg_read; + atom_card_info->ioreg_write = cail_reg_write; + } + atom_card_info->mc_read = cail_mc_read; + atom_card_info->mc_write = cail_mc_write; + atom_card_info->pll_read = cail_pll_read; + atom_card_info->pll_write = cail_pll_write; + + adev->mode_info.atom_context = amdgpu_atom_parse(atom_card_info, adev->bios); + if (!adev->mode_info.atom_context) { + amdgpu_atombios_fini(adev); + return -ENOMEM; + } + + mutex_init(&adev->mode_info.atom_context->mutex); + if (adev->is_atom_fw) { + amdgpu_atomfirmware_scratch_regs_init(adev); + amdgpu_atomfirmware_allocate_fb_scratch(adev); + } else { + amdgpu_atombios_scratch_regs_init(adev); + amdgpu_atombios_allocate_fb_scratch(adev); + } + + ret = device_create_file(adev->dev, &dev_attr_vbios_version); + if (ret) { + DRM_ERROR("Failed to create device file for VBIOS version\n"); + return ret; + } + + return 0; +} + diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.h index b0d5d1d7fdba..fd8f18074f7a 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.h @@ -195,9 +195,6 @@ int amdgpu_atombios_init_mc_reg_table(struct amdgpu_device *adev, bool amdgpu_atombios_has_gpu_virtualization_table(struct amdgpu_device *adev); void amdgpu_atombios_scratch_regs_lock(struct amdgpu_device *adev, bool lock); -void amdgpu_atombios_scratch_regs_init(struct amdgpu_device *adev); -void amdgpu_atombios_scratch_regs_save(struct amdgpu_device *adev); -void amdgpu_atombios_scratch_regs_restore(struct amdgpu_device *adev); void amdgpu_atombios_scratch_regs_engine_hung(struct amdgpu_device *adev, bool hung); bool amdgpu_atombios_scratch_need_asic_init(struct amdgpu_device *adev); @@ -219,6 +216,7 @@ int amdgpu_atombios_get_svi2_info(struct amdgpu_device *adev, u8 voltage_type, u8 *svd_gpio_id, u8 *svc_gpio_id); -int amdgpu_atombios_allocate_fb_scratch(struct amdgpu_device *adev); +void amdgpu_atombios_fini(struct amdgpu_device *adev); +int amdgpu_atombios_init(struct amdgpu_device *adev); #endif diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_atpx_handler.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_atpx_handler.c index c13c51af0b68..e2c3c5ec42d1 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_atpx_handler.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_atpx_handler.c @@ -14,6 +14,16 @@ #include "amd_acpi.h" +#define AMDGPU_PX_QUIRK_FORCE_ATPX (1 << 0) + +struct amdgpu_px_quirk { + u32 chip_vendor; + u32 chip_device; + u32 subsys_vendor; + u32 subsys_device; + u32 px_quirk_flags; +}; + struct amdgpu_atpx_functions { bool px_params; bool power_cntl; @@ -35,6 +45,7 @@ struct amdgpu_atpx { static struct amdgpu_atpx_priv { bool atpx_detected; bool bridge_pm_usable; + unsigned int quirks; /* handle for device - and atpx */ acpi_handle dhandle; acpi_handle other_handle; @@ -205,13 +216,19 @@ static int amdgpu_atpx_validate(struct amdgpu_atpx *atpx) atpx->is_hybrid = false; if (valid_bits & ATPX_MS_HYBRID_GFX_SUPPORTED) { - printk("ATPX Hybrid Graphics\n"); - /* - * Disable legacy PM methods only when pcie port PM is usable, - * otherwise the device might fail to power off or power on. - */ - atpx->functions.power_cntl = !amdgpu_atpx_priv.bridge_pm_usable; - atpx->is_hybrid = true; + if (amdgpu_atpx_priv.quirks & AMDGPU_PX_QUIRK_FORCE_ATPX) { + printk("ATPX Hybrid Graphics, forcing to ATPX\n"); + atpx->functions.power_cntl = true; + atpx->is_hybrid = false; + } else { + printk("ATPX Hybrid Graphics\n"); + /* + * Disable legacy PM methods only when pcie port PM is usable, + * otherwise the device might fail to power off or power on. + */ + atpx->functions.power_cntl = !amdgpu_atpx_priv.bridge_pm_usable; + atpx->is_hybrid = true; + } } atpx->dgpu_req_power_for_displays = false; @@ -547,6 +564,30 @@ static const struct vga_switcheroo_handler amdgpu_atpx_handler = { .get_client_id = amdgpu_atpx_get_client_id, }; +static const struct amdgpu_px_quirk amdgpu_px_quirk_list[] = { + /* HG _PR3 doesn't seem to work on this A+A weston board */ + { 0x1002, 0x6900, 0x1002, 0x0124, AMDGPU_PX_QUIRK_FORCE_ATPX }, + { 0x1002, 0x6900, 0x1028, 0x0812, AMDGPU_PX_QUIRK_FORCE_ATPX }, + { 0, 0, 0, 0, 0 }, +}; + +static void amdgpu_atpx_get_quirks(struct pci_dev *pdev) +{ + const struct amdgpu_px_quirk *p = amdgpu_px_quirk_list; + + /* Apply PX quirks */ + while (p && p->chip_device != 0) { + if (pdev->vendor == p->chip_vendor && + pdev->device == p->chip_device && + pdev->subsystem_vendor == p->subsys_vendor && + pdev->subsystem_device == p->subsys_device) { + amdgpu_atpx_priv.quirks |= p->px_quirk_flags; + break; + } + ++p; + } +} + /** * amdgpu_atpx_detect - detect whether we have PX * @@ -570,6 +611,7 @@ static bool amdgpu_atpx_detect(void) parent_pdev = pci_upstream_bridge(pdev); d3_supported |= parent_pdev && parent_pdev->bridge_d3; + amdgpu_atpx_get_quirks(pdev); } while ((pdev = pci_get_class(PCI_CLASS_DISPLAY_OTHER << 8, pdev)) != NULL) { @@ -579,6 +621,7 @@ static bool amdgpu_atpx_detect(void) parent_pdev = pci_upstream_bridge(pdev); d3_supported |= parent_pdev && parent_pdev->bridge_d3; + amdgpu_atpx_get_quirks(pdev); } if (has_atpx && vga_count == 2) { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_bios.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_bios.c index 057e1ecd83ce..a5df80d50d44 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_bios.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_bios.c @@ -93,7 +93,7 @@ static bool igp_read_bios_from_vram(struct amdgpu_device *adev) resource_size_t size = 256 * 1024; /* ??? */ if (!(adev->flags & AMD_IS_APU)) - if (amdgpu_need_post(adev)) + if (amdgpu_device_need_post(adev)) return false; adev->bios = NULL; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c index f2b72c7c6857..4466f3535e2d 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c @@ -801,6 +801,11 @@ static int amdgpu_cgs_get_firmware_info(struct cgs_device *cgs_device, else strcpy(fw_name, "amdgpu/vega10_smc.bin"); break; + case CHIP_CARRIZO: + case CHIP_STONEY: + case CHIP_RAVEN: + adev->pm.fw_version = info->version; + return 0; default: DRM_ERROR("SMC firmware not supported\n"); return -EINVAL; @@ -948,7 +953,6 @@ static int amdgpu_cgs_get_active_displays_info(struct cgs_device *cgs_device, (amdgpu_crtc->v_border * 2); mode_info->vblank_time_us = vblank_lines * line_time_us; mode_info->refresh_rate = drm_mode_vrefresh(&amdgpu_crtc->hw_mode); - mode_info->ref_clock = adev->clock.spll.reference_freq; mode_info = NULL; } } @@ -958,7 +962,6 @@ static int amdgpu_cgs_get_active_displays_info(struct cgs_device *cgs_device, if (mode_info != NULL) { mode_info->vblank_time_us = adev->pm.pm_display_cfg.min_vblank_time; mode_info->refresh_rate = adev->pm.pm_display_cfg.vrefresh; - mode_info->ref_clock = adev->clock.spll.reference_freq; } } return 0; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c index df9cbc78e168..8ca3783f2deb 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c @@ -358,7 +358,6 @@ static int amdgpu_connector_ddc_get_modes(struct drm_connector *connector) if (amdgpu_connector->edid) { drm_mode_connector_update_edid_property(connector, amdgpu_connector->edid); ret = drm_add_edid_modes(connector, amdgpu_connector->edid); - drm_edid_to_eld(connector, amdgpu_connector->edid); return ret; } drm_mode_connector_update_edid_property(connector, NULL); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c index 57abf7abd7a9..e80fc38141b5 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c @@ -90,6 +90,12 @@ static int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, void *data) goto free_chunk; } + /* skip guilty context job */ + if (atomic_read(&p->ctx->guilty) == 1) { + ret = -ECANCELED; + goto free_chunk; + } + mutex_lock(&p->ctx->lock); /* get chunks */ @@ -337,7 +343,12 @@ static int amdgpu_cs_bo_validate(struct amdgpu_cs_parser *p, struct amdgpu_bo *bo) { struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev); - u64 initial_bytes_moved, bytes_moved; + struct ttm_operation_ctx ctx = { + .interruptible = true, + .no_wait_gpu = false, + .allow_reserved_eviction = false, + .resv = bo->tbo.resv + }; uint32_t domain; int r; @@ -367,15 +378,13 @@ static int amdgpu_cs_bo_validate(struct amdgpu_cs_parser *p, retry: amdgpu_ttm_placement_from_domain(bo, domain); - initial_bytes_moved = atomic64_read(&adev->num_bytes_moved); - r = ttm_bo_validate(&bo->tbo, &bo->placement, true, false); - bytes_moved = atomic64_read(&adev->num_bytes_moved) - - initial_bytes_moved; - p->bytes_moved += bytes_moved; + r = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx); + + p->bytes_moved += ctx.bytes_moved; if (adev->mc.visible_vram_size < adev->mc.real_vram_size && bo->tbo.mem.mem_type == TTM_PL_VRAM && bo->tbo.mem.start < adev->mc.visible_vram_size >> PAGE_SHIFT) - p->bytes_moved_vis += bytes_moved; + p->bytes_moved_vis += ctx.bytes_moved; if (unlikely(r == -ENOMEM) && domain != bo->allowed_domains) { domain = bo->allowed_domains; @@ -390,6 +399,7 @@ static bool amdgpu_cs_try_evict(struct amdgpu_cs_parser *p, struct amdgpu_bo *validated) { uint32_t domain = validated->allowed_domains; + struct ttm_operation_ctx ctx = { true, false }; int r; if (!p->evictable) @@ -431,7 +441,7 @@ static bool amdgpu_cs_try_evict(struct amdgpu_cs_parser *p, bo->tbo.mem.mem_type == TTM_PL_VRAM && bo->tbo.mem.start < adev->mc.visible_vram_size >> PAGE_SHIFT; initial_bytes_moved = atomic64_read(&adev->num_bytes_moved); - r = ttm_bo_validate(&bo->tbo, &bo->placement, true, false); + r = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx); bytes_moved = atomic64_read(&adev->num_bytes_moved) - initial_bytes_moved; p->bytes_moved += bytes_moved; @@ -470,6 +480,7 @@ static int amdgpu_cs_validate(void *param, struct amdgpu_bo *bo) static int amdgpu_cs_list_validate(struct amdgpu_cs_parser *p, struct list_head *validated) { + struct ttm_operation_ctx ctx = { true, false }; struct amdgpu_bo_list_entry *lobj; int r; @@ -487,8 +498,7 @@ static int amdgpu_cs_list_validate(struct amdgpu_cs_parser *p, lobj->user_pages) { amdgpu_ttm_placement_from_domain(bo, AMDGPU_GEM_DOMAIN_CPU); - r = ttm_bo_validate(&bo->tbo, &bo->placement, true, - false); + r = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx); if (r) return r; amdgpu_ttm_tt_set_user_pages(bo->tbo.ttm, @@ -678,7 +688,7 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p, if (!r && p->uf_entry.robj) { struct amdgpu_bo *uf = p->uf_entry.robj; - r = amdgpu_ttm_bind(&uf->tbo, &uf->tbo.mem); + r = amdgpu_ttm_alloc_gart(&uf->tbo); p->job->uf_addr += amdgpu_bo_gpu_offset(uf); } @@ -768,10 +778,6 @@ static int amdgpu_bo_vm_update_pte(struct amdgpu_cs_parser *p) struct amdgpu_bo *bo; int i, r; - r = amdgpu_vm_update_directories(adev, vm); - if (r) - return r; - r = amdgpu_vm_clear_freed(adev, vm, NULL); if (r) return r; @@ -781,7 +787,7 @@ static int amdgpu_bo_vm_update_pte(struct amdgpu_cs_parser *p) return r; r = amdgpu_sync_fence(adev, &p->job->sync, - fpriv->prt_va->last_pt_update); + fpriv->prt_va->last_pt_update, false); if (r) return r; @@ -795,7 +801,7 @@ static int amdgpu_bo_vm_update_pte(struct amdgpu_cs_parser *p) return r; f = bo_va->last_pt_update; - r = amdgpu_sync_fence(adev, &p->job->sync, f); + r = amdgpu_sync_fence(adev, &p->job->sync, f, false); if (r) return r; } @@ -818,7 +824,7 @@ static int amdgpu_bo_vm_update_pte(struct amdgpu_cs_parser *p) return r; f = bo_va->last_pt_update; - r = amdgpu_sync_fence(adev, &p->job->sync, f); + r = amdgpu_sync_fence(adev, &p->job->sync, f, false); if (r) return r; } @@ -829,7 +835,11 @@ static int amdgpu_bo_vm_update_pte(struct amdgpu_cs_parser *p) if (r) return r; - r = amdgpu_sync_fence(adev, &p->job->sync, vm->last_update); + r = amdgpu_vm_update_directories(adev, vm); + if (r) + return r; + + r = amdgpu_sync_fence(adev, &p->job->sync, vm->last_update, false); if (r) return r; @@ -865,8 +875,8 @@ static int amdgpu_cs_ib_vm_chunk(struct amdgpu_device *adev, struct amdgpu_bo_va_mapping *m; struct amdgpu_bo *aobj = NULL; struct amdgpu_cs_chunk *chunk; + uint64_t offset, va_start; struct amdgpu_ib *ib; - uint64_t offset; uint8_t *kptr; chunk = &p->chunks[i]; @@ -876,14 +886,14 @@ static int amdgpu_cs_ib_vm_chunk(struct amdgpu_device *adev, if (chunk->chunk_id != AMDGPU_CHUNK_ID_IB) continue; - r = amdgpu_cs_find_mapping(p, chunk_ib->va_start, - &aobj, &m); + va_start = chunk_ib->va_start & AMDGPU_VA_HOLE_MASK; + r = amdgpu_cs_find_mapping(p, va_start, &aobj, &m); if (r) { DRM_ERROR("IB va_start is invalid\n"); return r; } - if ((chunk_ib->va_start + chunk_ib->ib_bytes) > + if ((va_start + chunk_ib->ib_bytes) > (m->last + 1) * AMDGPU_GPU_PAGE_SIZE) { DRM_ERROR("IB va_start+ib_bytes is invalid\n"); return -EINVAL; @@ -896,7 +906,7 @@ static int amdgpu_cs_ib_vm_chunk(struct amdgpu_device *adev, } offset = m->start * AMDGPU_GPU_PAGE_SIZE; - kptr += chunk_ib->va_start - offset; + kptr += va_start - offset; memcpy(ib->ptr, kptr, chunk_ib->ib_bytes); amdgpu_bo_kunmap(aobj); @@ -1033,8 +1043,8 @@ static int amdgpu_cs_process_fence_dep(struct amdgpu_cs_parser *p, amdgpu_ctx_put(ctx); return r; } else if (fence) { - r = amdgpu_sync_fence(p->adev, &p->job->sync, - fence); + r = amdgpu_sync_fence(p->adev, &p->job->sync, fence, + true); dma_fence_put(fence); amdgpu_ctx_put(ctx); if (r) @@ -1053,7 +1063,7 @@ static int amdgpu_syncobj_lookup_and_add_to_sync(struct amdgpu_cs_parser *p, if (r) return r; - r = amdgpu_sync_fence(p->adev, &p->job->sync, fence); + r = amdgpu_sync_fence(p->adev, &p->job->sync, fence, true); dma_fence_put(fence); return r; @@ -1145,7 +1155,7 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p, union drm_amdgpu_cs *cs) { struct amdgpu_ring *ring = p->job->ring; - struct amd_sched_entity *entity = &p->ctx->rings[ring->idx].entity; + struct drm_sched_entity *entity = &p->ctx->rings[ring->idx].entity; struct amdgpu_job *job; unsigned i; uint64_t seq; @@ -1168,7 +1178,7 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p, job = p->job; p->job = NULL; - r = amd_sched_job_init(&job->base, &ring->sched, entity, p->filp); + r = drm_sched_job_init(&job->base, &ring->sched, entity, p->filp); if (r) { amdgpu_job_free(job); amdgpu_mn_unlock(p->mn); @@ -1194,11 +1204,10 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p, job->uf_sequence = seq; amdgpu_job_free_resources(job); - amdgpu_ring_priority_get(job->ring, - amd_sched_get_job_priority(&job->base)); + amdgpu_ring_priority_get(job->ring, job->base.s_priority); trace_amdgpu_cs_ioctl(job); - amd_sched_entity_push_job(&job->base); + drm_sched_entity_push_job(&job->base, entity); ttm_eu_fence_buffer_objects(&p->ticket, &p->validated, p->fence); amdgpu_mn_unlock(p->mn); @@ -1570,6 +1579,7 @@ int amdgpu_cs_find_mapping(struct amdgpu_cs_parser *parser, struct amdgpu_bo_va_mapping **map) { struct amdgpu_fpriv *fpriv = parser->filp->driver_priv; + struct ttm_operation_ctx ctx = { false, false }; struct amdgpu_vm *vm = &fpriv->vm; struct amdgpu_bo_va_mapping *mapping; int r; @@ -1590,11 +1600,10 @@ int amdgpu_cs_find_mapping(struct amdgpu_cs_parser *parser, if (!((*bo)->flags & AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS)) { (*bo)->flags |= AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS; amdgpu_ttm_placement_from_domain(*bo, (*bo)->allowed_domains); - r = ttm_bo_validate(&(*bo)->tbo, &(*bo)->placement, false, - false); + r = ttm_bo_validate(&(*bo)->tbo, &(*bo)->placement, &ctx); if (r) return r; } - return amdgpu_ttm_bind(&(*bo)->tbo, &(*bo)->tbo.mem); + return amdgpu_ttm_alloc_gart(&(*bo)->tbo); } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c index c184468e2b2b..09d35051fdd6 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c @@ -28,10 +28,10 @@ #include "amdgpu_sched.h" static int amdgpu_ctx_priority_permit(struct drm_file *filp, - enum amd_sched_priority priority) + enum drm_sched_priority priority) { /* NORMAL and below are accessible by everyone */ - if (priority <= AMD_SCHED_PRIORITY_NORMAL) + if (priority <= DRM_SCHED_PRIORITY_NORMAL) return 0; if (capable(CAP_SYS_NICE)) @@ -44,14 +44,14 @@ static int amdgpu_ctx_priority_permit(struct drm_file *filp, } static int amdgpu_ctx_init(struct amdgpu_device *adev, - enum amd_sched_priority priority, + enum drm_sched_priority priority, struct drm_file *filp, struct amdgpu_ctx *ctx) { unsigned i, j; int r; - if (priority < 0 || priority >= AMD_SCHED_PRIORITY_MAX) + if (priority < 0 || priority >= DRM_SCHED_PRIORITY_MAX) return -EINVAL; r = amdgpu_ctx_priority_permit(filp, priority); @@ -75,22 +75,23 @@ static int amdgpu_ctx_init(struct amdgpu_device *adev, } ctx->reset_counter = atomic_read(&adev->gpu_reset_counter); + ctx->reset_counter_query = ctx->reset_counter; ctx->vram_lost_counter = atomic_read(&adev->vram_lost_counter); ctx->init_priority = priority; - ctx->override_priority = AMD_SCHED_PRIORITY_UNSET; + ctx->override_priority = DRM_SCHED_PRIORITY_UNSET; /* create context entity for each ring */ for (i = 0; i < adev->num_rings; i++) { struct amdgpu_ring *ring = adev->rings[i]; - struct amd_sched_rq *rq; + struct drm_sched_rq *rq; rq = &ring->sched.sched_rq[priority]; if (ring == &adev->gfx.kiq.ring) continue; - r = amd_sched_entity_init(&ring->sched, &ctx->rings[i].entity, - rq, amdgpu_sched_jobs); + r = drm_sched_entity_init(&ring->sched, &ctx->rings[i].entity, + rq, amdgpu_sched_jobs, &ctx->guilty); if (r) goto failed; } @@ -103,7 +104,7 @@ static int amdgpu_ctx_init(struct amdgpu_device *adev, failed: for (j = 0; j < i; j++) - amd_sched_entity_fini(&adev->rings[j]->sched, + drm_sched_entity_fini(&adev->rings[j]->sched, &ctx->rings[j].entity); kfree(ctx->fences); ctx->fences = NULL; @@ -125,7 +126,7 @@ static void amdgpu_ctx_fini(struct amdgpu_ctx *ctx) ctx->fences = NULL; for (i = 0; i < adev->num_rings; i++) - amd_sched_entity_fini(&adev->rings[i]->sched, + drm_sched_entity_fini(&adev->rings[i]->sched, &ctx->rings[i].entity); amdgpu_queue_mgr_fini(adev, &ctx->queue_mgr); @@ -136,7 +137,7 @@ static void amdgpu_ctx_fini(struct amdgpu_ctx *ctx) static int amdgpu_ctx_alloc(struct amdgpu_device *adev, struct amdgpu_fpriv *fpriv, struct drm_file *filp, - enum amd_sched_priority priority, + enum drm_sched_priority priority, uint32_t *id) { struct amdgpu_ctx_mgr *mgr = &fpriv->ctx_mgr; @@ -216,11 +217,45 @@ static int amdgpu_ctx_query(struct amdgpu_device *adev, /* determine if a GPU reset has occured since the last call */ reset_counter = atomic_read(&adev->gpu_reset_counter); /* TODO: this should ideally return NO, GUILTY, or INNOCENT. */ - if (ctx->reset_counter == reset_counter) + if (ctx->reset_counter_query == reset_counter) out->state.reset_status = AMDGPU_CTX_NO_RESET; else out->state.reset_status = AMDGPU_CTX_UNKNOWN_RESET; - ctx->reset_counter = reset_counter; + ctx->reset_counter_query = reset_counter; + + mutex_unlock(&mgr->lock); + return 0; +} + +static int amdgpu_ctx_query2(struct amdgpu_device *adev, + struct amdgpu_fpriv *fpriv, uint32_t id, + union drm_amdgpu_ctx_out *out) +{ + struct amdgpu_ctx *ctx; + struct amdgpu_ctx_mgr *mgr; + + if (!fpriv) + return -EINVAL; + + mgr = &fpriv->ctx_mgr; + mutex_lock(&mgr->lock); + ctx = idr_find(&mgr->ctx_handles, id); + if (!ctx) { + mutex_unlock(&mgr->lock); + return -EINVAL; + } + + out->state.flags = 0x0; + out->state.hangs = 0x0; + + if (ctx->reset_counter != atomic_read(&adev->gpu_reset_counter)) + out->state.flags |= AMDGPU_CTX_QUERY2_FLAGS_RESET; + + if (ctx->vram_lost_counter != atomic_read(&adev->vram_lost_counter)) + out->state.flags |= AMDGPU_CTX_QUERY2_FLAGS_VRAMLOST; + + if (atomic_read(&ctx->guilty)) + out->state.flags |= AMDGPU_CTX_QUERY2_FLAGS_GUILTY; mutex_unlock(&mgr->lock); return 0; @@ -231,7 +266,7 @@ int amdgpu_ctx_ioctl(struct drm_device *dev, void *data, { int r; uint32_t id; - enum amd_sched_priority priority; + enum drm_sched_priority priority; union drm_amdgpu_ctx *args = data; struct amdgpu_device *adev = dev->dev_private; @@ -243,8 +278,8 @@ int amdgpu_ctx_ioctl(struct drm_device *dev, void *data, /* For backwards compatibility reasons, we need to accept * ioctls with garbage in the priority field */ - if (priority == AMD_SCHED_PRIORITY_INVALID) - priority = AMD_SCHED_PRIORITY_NORMAL; + if (priority == DRM_SCHED_PRIORITY_INVALID) + priority = DRM_SCHED_PRIORITY_NORMAL; switch (args->in.op) { case AMDGPU_CTX_OP_ALLOC_CTX: @@ -257,6 +292,9 @@ int amdgpu_ctx_ioctl(struct drm_device *dev, void *data, case AMDGPU_CTX_OP_QUERY_STATE: r = amdgpu_ctx_query(adev, fpriv, id, &args->out); break; + case AMDGPU_CTX_OP_QUERY_STATE2: + r = amdgpu_ctx_query2(adev, fpriv, id, &args->out); + break; default: return -EINVAL; } @@ -347,18 +385,18 @@ struct dma_fence *amdgpu_ctx_get_fence(struct amdgpu_ctx *ctx, } void amdgpu_ctx_priority_override(struct amdgpu_ctx *ctx, - enum amd_sched_priority priority) + enum drm_sched_priority priority) { int i; struct amdgpu_device *adev = ctx->adev; - struct amd_sched_rq *rq; - struct amd_sched_entity *entity; + struct drm_sched_rq *rq; + struct drm_sched_entity *entity; struct amdgpu_ring *ring; - enum amd_sched_priority ctx_prio; + enum drm_sched_priority ctx_prio; ctx->override_priority = priority; - ctx_prio = (ctx->override_priority == AMD_SCHED_PRIORITY_UNSET) ? + ctx_prio = (ctx->override_priority == DRM_SCHED_PRIORITY_UNSET) ? ctx->init_priority : ctx->override_priority; for (i = 0; i < adev->num_rings; i++) { @@ -369,7 +407,7 @@ void amdgpu_ctx_priority_override(struct amdgpu_ctx *ctx, if (ring->funcs->type == AMDGPU_RING_TYPE_KIQ) continue; - amd_sched_entity_set_rq(entity, rq); + drm_sched_entity_set_rq(entity, rq); } } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c new file mode 100644 index 000000000000..ee76b468774a --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c @@ -0,0 +1,792 @@ +/* + * Copyright 2008 Advanced Micro Devices, Inc. + * Copyright 2008 Red Hat Inc. + * Copyright 2009 Jerome Glisse. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#include <linux/kthread.h> +#include <drm/drmP.h> +#include <linux/debugfs.h> +#include "amdgpu.h" + +/* + * Debugfs + */ +int amdgpu_debugfs_add_files(struct amdgpu_device *adev, + const struct drm_info_list *files, + unsigned nfiles) +{ + unsigned i; + + for (i = 0; i < adev->debugfs_count; i++) { + if (adev->debugfs[i].files == files) { + /* Already registered */ + return 0; + } + } + + i = adev->debugfs_count + 1; + if (i > AMDGPU_DEBUGFS_MAX_COMPONENTS) { + DRM_ERROR("Reached maximum number of debugfs components.\n"); + DRM_ERROR("Report so we increase " + "AMDGPU_DEBUGFS_MAX_COMPONENTS.\n"); + return -EINVAL; + } + adev->debugfs[adev->debugfs_count].files = files; + adev->debugfs[adev->debugfs_count].num_files = nfiles; + adev->debugfs_count = i; +#if defined(CONFIG_DEBUG_FS) + drm_debugfs_create_files(files, nfiles, + adev->ddev->primary->debugfs_root, + adev->ddev->primary); +#endif + return 0; +} + +#if defined(CONFIG_DEBUG_FS) + +static ssize_t amdgpu_debugfs_regs_read(struct file *f, char __user *buf, + size_t size, loff_t *pos) +{ + struct amdgpu_device *adev = file_inode(f)->i_private; + ssize_t result = 0; + int r; + bool pm_pg_lock, use_bank; + unsigned instance_bank, sh_bank, se_bank; + + if (size & 0x3 || *pos & 0x3) + return -EINVAL; + + /* are we reading registers for which a PG lock is necessary? */ + pm_pg_lock = (*pos >> 23) & 1; + + if (*pos & (1ULL << 62)) { + se_bank = (*pos & GENMASK_ULL(33, 24)) >> 24; + sh_bank = (*pos & GENMASK_ULL(43, 34)) >> 34; + instance_bank = (*pos & GENMASK_ULL(53, 44)) >> 44; + + if (se_bank == 0x3FF) + se_bank = 0xFFFFFFFF; + if (sh_bank == 0x3FF) + sh_bank = 0xFFFFFFFF; + if (instance_bank == 0x3FF) + instance_bank = 0xFFFFFFFF; + use_bank = 1; + } else { + use_bank = 0; + } + + *pos &= (1UL << 22) - 1; + + if (use_bank) { + if ((sh_bank != 0xFFFFFFFF && sh_bank >= adev->gfx.config.max_sh_per_se) || + (se_bank != 0xFFFFFFFF && se_bank >= adev->gfx.config.max_shader_engines)) + return -EINVAL; + mutex_lock(&adev->grbm_idx_mutex); + amdgpu_gfx_select_se_sh(adev, se_bank, + sh_bank, instance_bank); + } + + if (pm_pg_lock) + mutex_lock(&adev->pm.mutex); + + while (size) { + uint32_t value; + + if (*pos > adev->rmmio_size) + goto end; + + value = RREG32(*pos >> 2); + r = put_user(value, (uint32_t *)buf); + if (r) { + result = r; + goto end; + } + + result += 4; + buf += 4; + *pos += 4; + size -= 4; + } + +end: + if (use_bank) { + amdgpu_gfx_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); + mutex_unlock(&adev->grbm_idx_mutex); + } + + if (pm_pg_lock) + mutex_unlock(&adev->pm.mutex); + + return result; +} + +static ssize_t amdgpu_debugfs_regs_write(struct file *f, const char __user *buf, + size_t size, loff_t *pos) +{ + struct amdgpu_device *adev = file_inode(f)->i_private; + ssize_t result = 0; + int r; + bool pm_pg_lock, use_bank; + unsigned instance_bank, sh_bank, se_bank; + + if (size & 0x3 || *pos & 0x3) + return -EINVAL; + + /* are we reading registers for which a PG lock is necessary? */ + pm_pg_lock = (*pos >> 23) & 1; + + if (*pos & (1ULL << 62)) { + se_bank = (*pos & GENMASK_ULL(33, 24)) >> 24; + sh_bank = (*pos & GENMASK_ULL(43, 34)) >> 34; + instance_bank = (*pos & GENMASK_ULL(53, 44)) >> 44; + + if (se_bank == 0x3FF) + se_bank = 0xFFFFFFFF; + if (sh_bank == 0x3FF) + sh_bank = 0xFFFFFFFF; + if (instance_bank == 0x3FF) + instance_bank = 0xFFFFFFFF; + use_bank = 1; + } else { + use_bank = 0; + } + + *pos &= (1UL << 22) - 1; + + if (use_bank) { + if ((sh_bank != 0xFFFFFFFF && sh_bank >= adev->gfx.config.max_sh_per_se) || + (se_bank != 0xFFFFFFFF && se_bank >= adev->gfx.config.max_shader_engines)) + return -EINVAL; + mutex_lock(&adev->grbm_idx_mutex); + amdgpu_gfx_select_se_sh(adev, se_bank, + sh_bank, instance_bank); + } + + if (pm_pg_lock) + mutex_lock(&adev->pm.mutex); + + while (size) { + uint32_t value; + + if (*pos > adev->rmmio_size) + return result; + + r = get_user(value, (uint32_t *)buf); + if (r) + return r; + + WREG32(*pos >> 2, value); + + result += 4; + buf += 4; + *pos += 4; + size -= 4; + } + + if (use_bank) { + amdgpu_gfx_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); + mutex_unlock(&adev->grbm_idx_mutex); + } + + if (pm_pg_lock) + mutex_unlock(&adev->pm.mutex); + + return result; +} + +static ssize_t amdgpu_debugfs_regs_pcie_read(struct file *f, char __user *buf, + size_t size, loff_t *pos) +{ + struct amdgpu_device *adev = file_inode(f)->i_private; + ssize_t result = 0; + int r; + + if (size & 0x3 || *pos & 0x3) + return -EINVAL; + + while (size) { + uint32_t value; + + value = RREG32_PCIE(*pos >> 2); + r = put_user(value, (uint32_t *)buf); + if (r) + return r; + + result += 4; + buf += 4; + *pos += 4; + size -= 4; + } + + return result; +} + +static ssize_t amdgpu_debugfs_regs_pcie_write(struct file *f, const char __user *buf, + size_t size, loff_t *pos) +{ + struct amdgpu_device *adev = file_inode(f)->i_private; + ssize_t result = 0; + int r; + + if (size & 0x3 || *pos & 0x3) + return -EINVAL; + + while (size) { + uint32_t value; + + r = get_user(value, (uint32_t *)buf); + if (r) + return r; + + WREG32_PCIE(*pos >> 2, value); + + result += 4; + buf += 4; + *pos += 4; + size -= 4; + } + + return result; +} + +static ssize_t amdgpu_debugfs_regs_didt_read(struct file *f, char __user *buf, + size_t size, loff_t *pos) +{ + struct amdgpu_device *adev = file_inode(f)->i_private; + ssize_t result = 0; + int r; + + if (size & 0x3 || *pos & 0x3) + return -EINVAL; + + while (size) { + uint32_t value; + + value = RREG32_DIDT(*pos >> 2); + r = put_user(value, (uint32_t *)buf); + if (r) + return r; + + result += 4; + buf += 4; + *pos += 4; + size -= 4; + } + + return result; +} + +static ssize_t amdgpu_debugfs_regs_didt_write(struct file *f, const char __user *buf, + size_t size, loff_t *pos) +{ + struct amdgpu_device *adev = file_inode(f)->i_private; + ssize_t result = 0; + int r; + + if (size & 0x3 || *pos & 0x3) + return -EINVAL; + + while (size) { + uint32_t value; + + r = get_user(value, (uint32_t *)buf); + if (r) + return r; + + WREG32_DIDT(*pos >> 2, value); + + result += 4; + buf += 4; + *pos += 4; + size -= 4; + } + + return result; +} + +static ssize_t amdgpu_debugfs_regs_smc_read(struct file *f, char __user *buf, + size_t size, loff_t *pos) +{ + struct amdgpu_device *adev = file_inode(f)->i_private; + ssize_t result = 0; + int r; + + if (size & 0x3 || *pos & 0x3) + return -EINVAL; + + while (size) { + uint32_t value; + + value = RREG32_SMC(*pos); + r = put_user(value, (uint32_t *)buf); + if (r) + return r; + + result += 4; + buf += 4; + *pos += 4; + size -= 4; + } + + return result; +} + +static ssize_t amdgpu_debugfs_regs_smc_write(struct file *f, const char __user *buf, + size_t size, loff_t *pos) +{ + struct amdgpu_device *adev = file_inode(f)->i_private; + ssize_t result = 0; + int r; + + if (size & 0x3 || *pos & 0x3) + return -EINVAL; + + while (size) { + uint32_t value; + + r = get_user(value, (uint32_t *)buf); + if (r) + return r; + + WREG32_SMC(*pos, value); + + result += 4; + buf += 4; + *pos += 4; + size -= 4; + } + + return result; +} + +static ssize_t amdgpu_debugfs_gca_config_read(struct file *f, char __user *buf, + size_t size, loff_t *pos) +{ + struct amdgpu_device *adev = file_inode(f)->i_private; + ssize_t result = 0; + int r; + uint32_t *config, no_regs = 0; + + if (size & 0x3 || *pos & 0x3) + return -EINVAL; + + config = kmalloc_array(256, sizeof(*config), GFP_KERNEL); + if (!config) + return -ENOMEM; + + /* version, increment each time something is added */ + config[no_regs++] = 3; + config[no_regs++] = adev->gfx.config.max_shader_engines; + config[no_regs++] = adev->gfx.config.max_tile_pipes; + config[no_regs++] = adev->gfx.config.max_cu_per_sh; + config[no_regs++] = adev->gfx.config.max_sh_per_se; + config[no_regs++] = adev->gfx.config.max_backends_per_se; + config[no_regs++] = adev->gfx.config.max_texture_channel_caches; + config[no_regs++] = adev->gfx.config.max_gprs; + config[no_regs++] = adev->gfx.config.max_gs_threads; + config[no_regs++] = adev->gfx.config.max_hw_contexts; + config[no_regs++] = adev->gfx.config.sc_prim_fifo_size_frontend; + config[no_regs++] = adev->gfx.config.sc_prim_fifo_size_backend; + config[no_regs++] = adev->gfx.config.sc_hiz_tile_fifo_size; + config[no_regs++] = adev->gfx.config.sc_earlyz_tile_fifo_size; + config[no_regs++] = adev->gfx.config.num_tile_pipes; + config[no_regs++] = adev->gfx.config.backend_enable_mask; + config[no_regs++] = adev->gfx.config.mem_max_burst_length_bytes; + config[no_regs++] = adev->gfx.config.mem_row_size_in_kb; + config[no_regs++] = adev->gfx.config.shader_engine_tile_size; + config[no_regs++] = adev->gfx.config.num_gpus; + config[no_regs++] = adev->gfx.config.multi_gpu_tile_size; + config[no_regs++] = adev->gfx.config.mc_arb_ramcfg; + config[no_regs++] = adev->gfx.config.gb_addr_config; + config[no_regs++] = adev->gfx.config.num_rbs; + + /* rev==1 */ + config[no_regs++] = adev->rev_id; + config[no_regs++] = adev->pg_flags; + config[no_regs++] = adev->cg_flags; + + /* rev==2 */ + config[no_regs++] = adev->family; + config[no_regs++] = adev->external_rev_id; + + /* rev==3 */ + config[no_regs++] = adev->pdev->device; + config[no_regs++] = adev->pdev->revision; + config[no_regs++] = adev->pdev->subsystem_device; + config[no_regs++] = adev->pdev->subsystem_vendor; + + while (size && (*pos < no_regs * 4)) { + uint32_t value; + + value = config[*pos >> 2]; + r = put_user(value, (uint32_t *)buf); + if (r) { + kfree(config); + return r; + } + + result += 4; + buf += 4; + *pos += 4; + size -= 4; + } + + kfree(config); + return result; +} + +static ssize_t amdgpu_debugfs_sensor_read(struct file *f, char __user *buf, + size_t size, loff_t *pos) +{ + struct amdgpu_device *adev = file_inode(f)->i_private; + int idx, x, outsize, r, valuesize; + uint32_t values[16]; + + if (size & 3 || *pos & 0x3) + return -EINVAL; + + if (amdgpu_dpm == 0) + return -EINVAL; + + /* convert offset to sensor number */ + idx = *pos >> 2; + + valuesize = sizeof(values); + if (adev->powerplay.pp_funcs && adev->powerplay.pp_funcs->read_sensor) + r = amdgpu_dpm_read_sensor(adev, idx, &values[0], &valuesize); + else + return -EINVAL; + + if (size > valuesize) + return -EINVAL; + + outsize = 0; + x = 0; + if (!r) { + while (size) { + r = put_user(values[x++], (int32_t *)buf); + buf += 4; + size -= 4; + outsize += 4; + } + } + + return !r ? outsize : r; +} + +static ssize_t amdgpu_debugfs_wave_read(struct file *f, char __user *buf, + size_t size, loff_t *pos) +{ + struct amdgpu_device *adev = f->f_inode->i_private; + int r, x; + ssize_t result=0; + uint32_t offset, se, sh, cu, wave, simd, data[32]; + + if (size & 3 || *pos & 3) + return -EINVAL; + + /* decode offset */ + offset = (*pos & GENMASK_ULL(6, 0)); + se = (*pos & GENMASK_ULL(14, 7)) >> 7; + sh = (*pos & GENMASK_ULL(22, 15)) >> 15; + cu = (*pos & GENMASK_ULL(30, 23)) >> 23; + wave = (*pos & GENMASK_ULL(36, 31)) >> 31; + simd = (*pos & GENMASK_ULL(44, 37)) >> 37; + + /* switch to the specific se/sh/cu */ + mutex_lock(&adev->grbm_idx_mutex); + amdgpu_gfx_select_se_sh(adev, se, sh, cu); + + x = 0; + if (adev->gfx.funcs->read_wave_data) + adev->gfx.funcs->read_wave_data(adev, simd, wave, data, &x); + + amdgpu_gfx_select_se_sh(adev, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF); + mutex_unlock(&adev->grbm_idx_mutex); + + if (!x) + return -EINVAL; + + while (size && (offset < x * 4)) { + uint32_t value; + + value = data[offset >> 2]; + r = put_user(value, (uint32_t *)buf); + if (r) + return r; + + result += 4; + buf += 4; + offset += 4; + size -= 4; + } + + return result; +} + +static ssize_t amdgpu_debugfs_gpr_read(struct file *f, char __user *buf, + size_t size, loff_t *pos) +{ + struct amdgpu_device *adev = f->f_inode->i_private; + int r; + ssize_t result = 0; + uint32_t offset, se, sh, cu, wave, simd, thread, bank, *data; + + if (size & 3 || *pos & 3) + return -EINVAL; + + /* decode offset */ + offset = *pos & GENMASK_ULL(11, 0); + se = (*pos & GENMASK_ULL(19, 12)) >> 12; + sh = (*pos & GENMASK_ULL(27, 20)) >> 20; + cu = (*pos & GENMASK_ULL(35, 28)) >> 28; + wave = (*pos & GENMASK_ULL(43, 36)) >> 36; + simd = (*pos & GENMASK_ULL(51, 44)) >> 44; + thread = (*pos & GENMASK_ULL(59, 52)) >> 52; + bank = (*pos & GENMASK_ULL(61, 60)) >> 60; + + data = kmalloc_array(1024, sizeof(*data), GFP_KERNEL); + if (!data) + return -ENOMEM; + + /* switch to the specific se/sh/cu */ + mutex_lock(&adev->grbm_idx_mutex); + amdgpu_gfx_select_se_sh(adev, se, sh, cu); + + if (bank == 0) { + if (adev->gfx.funcs->read_wave_vgprs) + adev->gfx.funcs->read_wave_vgprs(adev, simd, wave, thread, offset, size>>2, data); + } else { + if (adev->gfx.funcs->read_wave_sgprs) + adev->gfx.funcs->read_wave_sgprs(adev, simd, wave, offset, size>>2, data); + } + + amdgpu_gfx_select_se_sh(adev, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF); + mutex_unlock(&adev->grbm_idx_mutex); + + while (size) { + uint32_t value; + + value = data[offset++]; + r = put_user(value, (uint32_t *)buf); + if (r) { + result = r; + goto err; + } + + result += 4; + buf += 4; + size -= 4; + } + +err: + kfree(data); + return result; +} + +static const struct file_operations amdgpu_debugfs_regs_fops = { + .owner = THIS_MODULE, + .read = amdgpu_debugfs_regs_read, + .write = amdgpu_debugfs_regs_write, + .llseek = default_llseek +}; +static const struct file_operations amdgpu_debugfs_regs_didt_fops = { + .owner = THIS_MODULE, + .read = amdgpu_debugfs_regs_didt_read, + .write = amdgpu_debugfs_regs_didt_write, + .llseek = default_llseek +}; +static const struct file_operations amdgpu_debugfs_regs_pcie_fops = { + .owner = THIS_MODULE, + .read = amdgpu_debugfs_regs_pcie_read, + .write = amdgpu_debugfs_regs_pcie_write, + .llseek = default_llseek +}; +static const struct file_operations amdgpu_debugfs_regs_smc_fops = { + .owner = THIS_MODULE, + .read = amdgpu_debugfs_regs_smc_read, + .write = amdgpu_debugfs_regs_smc_write, + .llseek = default_llseek +}; + +static const struct file_operations amdgpu_debugfs_gca_config_fops = { + .owner = THIS_MODULE, + .read = amdgpu_debugfs_gca_config_read, + .llseek = default_llseek +}; + +static const struct file_operations amdgpu_debugfs_sensors_fops = { + .owner = THIS_MODULE, + .read = amdgpu_debugfs_sensor_read, + .llseek = default_llseek +}; + +static const struct file_operations amdgpu_debugfs_wave_fops = { + .owner = THIS_MODULE, + .read = amdgpu_debugfs_wave_read, + .llseek = default_llseek +}; +static const struct file_operations amdgpu_debugfs_gpr_fops = { + .owner = THIS_MODULE, + .read = amdgpu_debugfs_gpr_read, + .llseek = default_llseek +}; + +static const struct file_operations *debugfs_regs[] = { + &amdgpu_debugfs_regs_fops, + &amdgpu_debugfs_regs_didt_fops, + &amdgpu_debugfs_regs_pcie_fops, + &amdgpu_debugfs_regs_smc_fops, + &amdgpu_debugfs_gca_config_fops, + &amdgpu_debugfs_sensors_fops, + &amdgpu_debugfs_wave_fops, + &amdgpu_debugfs_gpr_fops, +}; + +static const char *debugfs_regs_names[] = { + "amdgpu_regs", + "amdgpu_regs_didt", + "amdgpu_regs_pcie", + "amdgpu_regs_smc", + "amdgpu_gca_config", + "amdgpu_sensors", + "amdgpu_wave", + "amdgpu_gpr", +}; + +int amdgpu_debugfs_regs_init(struct amdgpu_device *adev) +{ + struct drm_minor *minor = adev->ddev->primary; + struct dentry *ent, *root = minor->debugfs_root; + unsigned i, j; + + for (i = 0; i < ARRAY_SIZE(debugfs_regs); i++) { + ent = debugfs_create_file(debugfs_regs_names[i], + S_IFREG | S_IRUGO, root, + adev, debugfs_regs[i]); + if (IS_ERR(ent)) { + for (j = 0; j < i; j++) { + debugfs_remove(adev->debugfs_regs[i]); + adev->debugfs_regs[i] = NULL; + } + return PTR_ERR(ent); + } + + if (!i) + i_size_write(ent->d_inode, adev->rmmio_size); + adev->debugfs_regs[i] = ent; + } + + return 0; +} + +void amdgpu_debugfs_regs_cleanup(struct amdgpu_device *adev) +{ + unsigned i; + + for (i = 0; i < ARRAY_SIZE(debugfs_regs); i++) { + if (adev->debugfs_regs[i]) { + debugfs_remove(adev->debugfs_regs[i]); + adev->debugfs_regs[i] = NULL; + } + } +} + +static int amdgpu_debugfs_test_ib(struct seq_file *m, void *data) +{ + struct drm_info_node *node = (struct drm_info_node *) m->private; + struct drm_device *dev = node->minor->dev; + struct amdgpu_device *adev = dev->dev_private; + int r = 0, i; + + /* hold on the scheduler */ + for (i = 0; i < AMDGPU_MAX_RINGS; i++) { + struct amdgpu_ring *ring = adev->rings[i]; + + if (!ring || !ring->sched.thread) + continue; + kthread_park(ring->sched.thread); + } + + seq_printf(m, "run ib test:\n"); + r = amdgpu_ib_ring_tests(adev); + if (r) + seq_printf(m, "ib ring tests failed (%d).\n", r); + else + seq_printf(m, "ib ring tests passed.\n"); + + /* go on the scheduler */ + for (i = 0; i < AMDGPU_MAX_RINGS; i++) { + struct amdgpu_ring *ring = adev->rings[i]; + + if (!ring || !ring->sched.thread) + continue; + kthread_unpark(ring->sched.thread); + } + + return 0; +} + +static int amdgpu_debugfs_get_vbios_dump(struct seq_file *m, void *data) +{ + struct drm_info_node *node = (struct drm_info_node *) m->private; + struct drm_device *dev = node->minor->dev; + struct amdgpu_device *adev = dev->dev_private; + + seq_write(m, adev->bios, adev->bios_size); + return 0; +} + +static int amdgpu_debugfs_evict_vram(struct seq_file *m, void *data) +{ + struct drm_info_node *node = (struct drm_info_node *)m->private; + struct drm_device *dev = node->minor->dev; + struct amdgpu_device *adev = dev->dev_private; + + seq_printf(m, "(%d)\n", amdgpu_bo_evict_vram(adev)); + return 0; +} + +static const struct drm_info_list amdgpu_debugfs_list[] = { + {"amdgpu_vbios", amdgpu_debugfs_get_vbios_dump}, + {"amdgpu_test_ib", &amdgpu_debugfs_test_ib}, + {"amdgpu_evict_vram", &amdgpu_debugfs_evict_vram} +}; + +int amdgpu_debugfs_init(struct amdgpu_device *adev) +{ + return amdgpu_debugfs_add_files(adev, amdgpu_debugfs_list, + ARRAY_SIZE(amdgpu_debugfs_list)); +} + +#else +int amdgpu_debugfs_init(struct amdgpu_device *adev) +{ + return 0; +} +int amdgpu_debugfs_regs_init(struct amdgpu_device *adev) +{ + return 0; +} +void amdgpu_debugfs_regs_cleanup(struct amdgpu_device *adev) { } +#endif diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.h new file mode 100644 index 000000000000..8260d8073c26 --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.h @@ -0,0 +1,42 @@ +/* + * Copyright 2008 Advanced Micro Devices, Inc. + * Copyright 2008 Red Hat Inc. + * Copyright 2009 Jerome Glisse. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ + +/* + * Debugfs + */ +struct amdgpu_debugfs { + const struct drm_info_list *files; + unsigned num_files; +}; + +int amdgpu_debugfs_regs_init(struct amdgpu_device *adev); +void amdgpu_debugfs_regs_cleanup(struct amdgpu_device *adev); +int amdgpu_debugfs_init(struct amdgpu_device *adev); +int amdgpu_debugfs_add_files(struct amdgpu_device *adev, + const struct drm_info_list *files, + unsigned nfiles); +int amdgpu_debugfs_fence_init(struct amdgpu_device *adev); +int amdgpu_debugfs_firmware_init(struct amdgpu_device *adev); +int amdgpu_debugfs_gem_init(struct amdgpu_device *adev); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index 3573ecdb06ee..00a50cc5ec9a 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -28,7 +28,6 @@ #include <linux/kthread.h> #include <linux/console.h> #include <linux/slab.h> -#include <linux/debugfs.h> #include <drm/drmP.h> #include <drm/drm_crtc_helper.h> #include <drm/drm_atomic_helper.h> @@ -64,11 +63,6 @@ MODULE_FIRMWARE("amdgpu/raven_gpu_info.bin"); #define AMDGPU_RESUME_MS 2000 -static int amdgpu_debugfs_regs_init(struct amdgpu_device *adev); -static void amdgpu_debugfs_regs_cleanup(struct amdgpu_device *adev); -static int amdgpu_debugfs_test_ib_ring_init(struct amdgpu_device *adev); -static int amdgpu_debugfs_vbios_dump_init(struct amdgpu_device *adev); - static const char *amdgpu_asic_name[] = { "TAHITI", "PITCAIRN", @@ -333,7 +327,7 @@ static void amdgpu_block_invalid_wreg(struct amdgpu_device *adev, BUG(); } -static int amdgpu_vram_scratch_init(struct amdgpu_device *adev) +static int amdgpu_device_vram_scratch_init(struct amdgpu_device *adev) { return amdgpu_bo_create_kernel(adev, AMDGPU_GPU_PAGE_SIZE, PAGE_SIZE, AMDGPU_GEM_DOMAIN_VRAM, @@ -342,13 +336,13 @@ static int amdgpu_vram_scratch_init(struct amdgpu_device *adev) (void **)&adev->vram_scratch.ptr); } -static void amdgpu_vram_scratch_fini(struct amdgpu_device *adev) +static void amdgpu_device_vram_scratch_fini(struct amdgpu_device *adev) { amdgpu_bo_free_kernel(&adev->vram_scratch.robj, NULL, NULL); } /** - * amdgpu_program_register_sequence - program an array of registers. + * amdgpu_device_program_register_sequence - program an array of registers. * * @adev: amdgpu_device pointer * @registers: pointer to the register array @@ -357,9 +351,9 @@ static void amdgpu_vram_scratch_fini(struct amdgpu_device *adev) * Programs an array or registers with and and or masks. * This is a helper for setting golden registers. */ -void amdgpu_program_register_sequence(struct amdgpu_device *adev, - const u32 *registers, - const u32 array_size) +void amdgpu_device_program_register_sequence(struct amdgpu_device *adev, + const u32 *registers, + const u32 array_size) { u32 tmp, reg, and_mask, or_mask; int i; @@ -383,7 +377,7 @@ void amdgpu_program_register_sequence(struct amdgpu_device *adev, } } -void amdgpu_pci_config_reset(struct amdgpu_device *adev) +void amdgpu_device_pci_config_reset(struct amdgpu_device *adev) { pci_write_config_dword(adev->pdev, 0x7c, AMDGPU_ASIC_RESET_DATA); } @@ -392,14 +386,14 @@ void amdgpu_pci_config_reset(struct amdgpu_device *adev) * GPU doorbell aperture helpers function. */ /** - * amdgpu_doorbell_init - Init doorbell driver information. + * amdgpu_device_doorbell_init - Init doorbell driver information. * * @adev: amdgpu_device pointer * * Init doorbell driver information (CIK) * Returns 0 on success, error on failure. */ -static int amdgpu_doorbell_init(struct amdgpu_device *adev) +static int amdgpu_device_doorbell_init(struct amdgpu_device *adev) { /* No doorbell on SI hardware generation */ if (adev->asic_type < CHIP_BONAIRE) { @@ -410,6 +404,9 @@ static int amdgpu_doorbell_init(struct amdgpu_device *adev) return 0; } + if (pci_resource_flags(adev->pdev, 2) & IORESOURCE_UNSET) + return -EINVAL; + /* doorbell bar mapping */ adev->doorbell.base = pci_resource_start(adev->pdev, 2); adev->doorbell.size = pci_resource_len(adev->pdev, 2); @@ -429,66 +426,35 @@ static int amdgpu_doorbell_init(struct amdgpu_device *adev) } /** - * amdgpu_doorbell_fini - Tear down doorbell driver information. + * amdgpu_device_doorbell_fini - Tear down doorbell driver information. * * @adev: amdgpu_device pointer * * Tear down doorbell driver information (CIK) */ -static void amdgpu_doorbell_fini(struct amdgpu_device *adev) +static void amdgpu_device_doorbell_fini(struct amdgpu_device *adev) { iounmap(adev->doorbell.ptr); adev->doorbell.ptr = NULL; } -/** - * amdgpu_doorbell_get_kfd_info - Report doorbell configuration required to - * setup amdkfd - * - * @adev: amdgpu_device pointer - * @aperture_base: output returning doorbell aperture base physical address - * @aperture_size: output returning doorbell aperture size in bytes - * @start_offset: output returning # of doorbell bytes reserved for amdgpu. - * - * amdgpu and amdkfd share the doorbell aperture. amdgpu sets it up, - * takes doorbells required for its own rings and reports the setup to amdkfd. - * amdgpu reserved doorbells are at the start of the doorbell aperture. - */ -void amdgpu_doorbell_get_kfd_info(struct amdgpu_device *adev, - phys_addr_t *aperture_base, - size_t *aperture_size, - size_t *start_offset) -{ - /* - * The first num_doorbells are used by amdgpu. - * amdkfd takes whatever's left in the aperture. - */ - if (adev->doorbell.size > adev->doorbell.num_doorbells * sizeof(u32)) { - *aperture_base = adev->doorbell.base; - *aperture_size = adev->doorbell.size; - *start_offset = adev->doorbell.num_doorbells * sizeof(u32); - } else { - *aperture_base = 0; - *aperture_size = 0; - *start_offset = 0; - } -} + /* - * amdgpu_wb_*() + * amdgpu_device_wb_*() * Writeback is the method by which the GPU updates special pages in memory * with the status of certain GPU events (fences, ring pointers,etc.). */ /** - * amdgpu_wb_fini - Disable Writeback and free memory + * amdgpu_device_wb_fini - Disable Writeback and free memory * * @adev: amdgpu_device pointer * * Disables Writeback and frees the Writeback memory (all asics). * Used at driver shutdown. */ -static void amdgpu_wb_fini(struct amdgpu_device *adev) +static void amdgpu_device_wb_fini(struct amdgpu_device *adev) { if (adev->wb.wb_obj) { amdgpu_bo_free_kernel(&adev->wb.wb_obj, @@ -499,7 +465,7 @@ static void amdgpu_wb_fini(struct amdgpu_device *adev) } /** - * amdgpu_wb_init- Init Writeback driver info and allocate memory + * amdgpu_device_wb_init- Init Writeback driver info and allocate memory * * @adev: amdgpu_device pointer * @@ -507,7 +473,7 @@ static void amdgpu_wb_fini(struct amdgpu_device *adev) * Used at driver startup. * Returns 0 on success or an -error on failure. */ -static int amdgpu_wb_init(struct amdgpu_device *adev) +static int amdgpu_device_wb_init(struct amdgpu_device *adev) { int r; @@ -533,7 +499,7 @@ static int amdgpu_wb_init(struct amdgpu_device *adev) } /** - * amdgpu_wb_get - Allocate a wb entry + * amdgpu_device_wb_get - Allocate a wb entry * * @adev: amdgpu_device pointer * @wb: wb index @@ -541,7 +507,7 @@ static int amdgpu_wb_init(struct amdgpu_device *adev) * Allocate a wb slot for use by the driver (all asics). * Returns 0 on success or -EINVAL on failure. */ -int amdgpu_wb_get(struct amdgpu_device *adev, u32 *wb) +int amdgpu_device_wb_get(struct amdgpu_device *adev, u32 *wb) { unsigned long offset = find_first_zero_bit(adev->wb.used, adev->wb.num_wb); @@ -555,61 +521,34 @@ int amdgpu_wb_get(struct amdgpu_device *adev, u32 *wb) } /** - * amdgpu_wb_free - Free a wb entry + * amdgpu_device_wb_free - Free a wb entry * * @adev: amdgpu_device pointer * @wb: wb index * * Free a wb slot allocated for use by the driver (all asics) */ -void amdgpu_wb_free(struct amdgpu_device *adev, u32 wb) +void amdgpu_device_wb_free(struct amdgpu_device *adev, u32 wb) { if (wb < adev->wb.num_wb) __clear_bit(wb >> 3, adev->wb.used); } /** - * amdgpu_vram_location - try to find VRAM location + * amdgpu_device_vram_location - try to find VRAM location * @adev: amdgpu device structure holding all necessary informations * @mc: memory controller structure holding memory informations * @base: base address at which to put VRAM * * Function will try to place VRAM at base address provided - * as parameter (which is so far either PCI aperture address or - * for IGP TOM base address). - * - * If there is not enough space to fit the unvisible VRAM in the 32bits - * address space then we limit the VRAM size to the aperture. - * - * Note: We don't explicitly enforce VRAM start to be aligned on VRAM size, - * this shouldn't be a problem as we are using the PCI aperture as a reference. - * Otherwise this would be needed for rv280, all r3xx, and all r4xx, but - * not IGP. - * - * Note: we use mc_vram_size as on some board we need to program the mc to - * cover the whole aperture even if VRAM size is inferior to aperture size - * Novell bug 204882 + along with lots of ubuntu ones - * - * Note: when limiting vram it's safe to overwritte real_vram_size because - * we are not in case where real_vram_size is inferior to mc_vram_size (ie - * note afected by bogus hw of Novell bug 204882 + along with lots of ubuntu - * ones) - * - * Note: IGP TOM addr should be the same as the aperture addr, we don't - * explicitly check for that though. - * - * FIXME: when reducing VRAM size align new size on power of 2. + * as parameter. */ -void amdgpu_vram_location(struct amdgpu_device *adev, struct amdgpu_mc *mc, u64 base) +void amdgpu_device_vram_location(struct amdgpu_device *adev, + struct amdgpu_mc *mc, u64 base) { uint64_t limit = (uint64_t)amdgpu_vram_limit << 20; mc->vram_start = base; - if (mc->mc_vram_size > (adev->mc.mc_mask - base + 1)) { - dev_warn(adev->dev, "limiting VRAM to PCI aperture size\n"); - mc->real_vram_size = mc->aper_size; - mc->mc_vram_size = mc->aper_size; - } mc->vram_end = mc->vram_start + mc->mc_vram_size - 1; if (limit && limit < mc->real_vram_size) mc->real_vram_size = limit; @@ -619,7 +558,7 @@ void amdgpu_vram_location(struct amdgpu_device *adev, struct amdgpu_mc *mc, u64 } /** - * amdgpu_gart_location - try to find GTT location + * amdgpu_device_gart_location - try to find GTT location * @adev: amdgpu device structure holding all necessary informations * @mc: memory controller structure holding memory informations * @@ -630,7 +569,8 @@ void amdgpu_vram_location(struct amdgpu_device *adev, struct amdgpu_mc *mc, u64 * * FIXME: when reducing GTT size align new size on power of 2. */ -void amdgpu_gart_location(struct amdgpu_device *adev, struct amdgpu_mc *mc) +void amdgpu_device_gart_location(struct amdgpu_device *adev, + struct amdgpu_mc *mc) { u64 size_af, size_bf; @@ -647,93 +587,91 @@ void amdgpu_gart_location(struct amdgpu_device *adev, struct amdgpu_mc *mc) dev_warn(adev->dev, "limiting GTT\n"); mc->gart_size = size_af; } - mc->gart_start = mc->vram_end + 1; + /* VCE doesn't like it when BOs cross a 4GB segment, so align + * the GART base on a 4GB boundary as well. + */ + mc->gart_start = ALIGN(mc->vram_end + 1, 0x100000000ULL); } mc->gart_end = mc->gart_start + mc->gart_size - 1; dev_info(adev->dev, "GTT: %lluM 0x%016llX - 0x%016llX\n", mc->gart_size >> 20, mc->gart_start, mc->gart_end); } -/* - * Firmware Reservation functions - */ /** - * amdgpu_fw_reserve_vram_fini - free fw reserved vram + * amdgpu_device_resize_fb_bar - try to resize FB BAR * * @adev: amdgpu_device pointer * - * free fw reserved vram if it has been reserved. + * Try to resize FB BAR to make all VRAM CPU accessible. We try very hard not + * to fail, but if any of the BARs is not accessible after the size we abort + * driver loading by returning -ENODEV. */ -void amdgpu_fw_reserve_vram_fini(struct amdgpu_device *adev) +int amdgpu_device_resize_fb_bar(struct amdgpu_device *adev) { - amdgpu_bo_free_kernel(&adev->fw_vram_usage.reserved_bo, - NULL, &adev->fw_vram_usage.va); -} + u64 space_needed = roundup_pow_of_two(adev->mc.real_vram_size); + u32 rbar_size = order_base_2(((space_needed >> 20) | 1)) - 1; + struct pci_bus *root; + struct resource *res; + unsigned i; + u16 cmd; + int r; -/** - * amdgpu_fw_reserve_vram_init - create bo vram reservation from fw - * - * @adev: amdgpu_device pointer - * - * create bo vram reservation from fw. - */ -int amdgpu_fw_reserve_vram_init(struct amdgpu_device *adev) -{ - int r = 0; - u64 gpu_addr; - u64 vram_size = adev->mc.visible_vram_size; + /* Bypass for VF */ + if (amdgpu_sriov_vf(adev)) + return 0; - adev->fw_vram_usage.va = NULL; - adev->fw_vram_usage.reserved_bo = NULL; + /* Check if the root BUS has 64bit memory resources */ + root = adev->pdev->bus; + while (root->parent) + root = root->parent; - if (adev->fw_vram_usage.size > 0 && - adev->fw_vram_usage.size <= vram_size) { + pci_bus_for_each_resource(root, res, i) { + if (res && res->flags & (IORESOURCE_MEM | IORESOURCE_MEM_64) && + res->start > 0x100000000ull) + break; + } - r = amdgpu_bo_create(adev, adev->fw_vram_usage.size, - PAGE_SIZE, true, 0, - AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED | - AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS, NULL, NULL, 0, - &adev->fw_vram_usage.reserved_bo); - if (r) - goto error_create; + /* Trying to resize is pointless without a root hub window above 4GB */ + if (!res) + return 0; - r = amdgpu_bo_reserve(adev->fw_vram_usage.reserved_bo, false); - if (r) - goto error_reserve; - r = amdgpu_bo_pin_restricted(adev->fw_vram_usage.reserved_bo, - AMDGPU_GEM_DOMAIN_VRAM, - adev->fw_vram_usage.start_offset, - (adev->fw_vram_usage.start_offset + - adev->fw_vram_usage.size), &gpu_addr); - if (r) - goto error_pin; - r = amdgpu_bo_kmap(adev->fw_vram_usage.reserved_bo, - &adev->fw_vram_usage.va); - if (r) - goto error_kmap; + /* Disable memory decoding while we change the BAR addresses and size */ + pci_read_config_word(adev->pdev, PCI_COMMAND, &cmd); + pci_write_config_word(adev->pdev, PCI_COMMAND, + cmd & ~PCI_COMMAND_MEMORY); - amdgpu_bo_unreserve(adev->fw_vram_usage.reserved_bo); - } - return r; + /* Free the VRAM and doorbell BAR, we most likely need to move both. */ + amdgpu_device_doorbell_fini(adev); + if (adev->asic_type >= CHIP_BONAIRE) + pci_release_resource(adev->pdev, 2); -error_kmap: - amdgpu_bo_unpin(adev->fw_vram_usage.reserved_bo); -error_pin: - amdgpu_bo_unreserve(adev->fw_vram_usage.reserved_bo); -error_reserve: - amdgpu_bo_unref(&adev->fw_vram_usage.reserved_bo); -error_create: - adev->fw_vram_usage.va = NULL; - adev->fw_vram_usage.reserved_bo = NULL; - return r; -} + pci_release_resource(adev->pdev, 0); + + r = pci_resize_resource(adev->pdev, 0, rbar_size); + if (r == -ENOSPC) + DRM_INFO("Not enough PCI address space for a large BAR."); + else if (r && r != -ENOTSUPP) + DRM_ERROR("Problem resizing BAR0 (%d).", r); + + pci_assign_unassigned_bus_resources(adev->pdev->bus); + + /* When the doorbell or fb BAR isn't available we have no chance of + * using the device. + */ + r = amdgpu_device_doorbell_init(adev); + if (r || (pci_resource_flags(adev->pdev, 0) & IORESOURCE_UNSET)) + return -ENODEV; + + pci_write_config_word(adev->pdev, PCI_COMMAND, cmd); + return 0; +} /* * GPU helpers function. */ /** - * amdgpu_need_post - check if the hw need post or not + * amdgpu_device_need_post - check if the hw need post or not * * @adev: amdgpu_device pointer * @@ -741,7 +679,7 @@ error_create: * or post is needed if hw reset is performed. * Returns true if need or false if not. */ -bool amdgpu_need_post(struct amdgpu_device *adev) +bool amdgpu_device_need_post(struct amdgpu_device *adev) { uint32_t reg; @@ -786,285 +724,9 @@ bool amdgpu_need_post(struct amdgpu_device *adev) return true; } -/** - * amdgpu_dummy_page_init - init dummy page used by the driver - * - * @adev: amdgpu_device pointer - * - * Allocate the dummy page used by the driver (all asics). - * This dummy page is used by the driver as a filler for gart entries - * when pages are taken out of the GART - * Returns 0 on sucess, -ENOMEM on failure. - */ -int amdgpu_dummy_page_init(struct amdgpu_device *adev) -{ - if (adev->dummy_page.page) - return 0; - adev->dummy_page.page = alloc_page(GFP_DMA32 | GFP_KERNEL | __GFP_ZERO); - if (adev->dummy_page.page == NULL) - return -ENOMEM; - adev->dummy_page.addr = pci_map_page(adev->pdev, adev->dummy_page.page, - 0, PAGE_SIZE, PCI_DMA_BIDIRECTIONAL); - if (pci_dma_mapping_error(adev->pdev, adev->dummy_page.addr)) { - dev_err(&adev->pdev->dev, "Failed to DMA MAP the dummy page\n"); - __free_page(adev->dummy_page.page); - adev->dummy_page.page = NULL; - return -ENOMEM; - } - return 0; -} - -/** - * amdgpu_dummy_page_fini - free dummy page used by the driver - * - * @adev: amdgpu_device pointer - * - * Frees the dummy page used by the driver (all asics). - */ -void amdgpu_dummy_page_fini(struct amdgpu_device *adev) -{ - if (adev->dummy_page.page == NULL) - return; - pci_unmap_page(adev->pdev, adev->dummy_page.addr, - PAGE_SIZE, PCI_DMA_BIDIRECTIONAL); - __free_page(adev->dummy_page.page); - adev->dummy_page.page = NULL; -} - - -/* ATOM accessor methods */ -/* - * ATOM is an interpreted byte code stored in tables in the vbios. The - * driver registers callbacks to access registers and the interpreter - * in the driver parses the tables and executes then to program specific - * actions (set display modes, asic init, etc.). See amdgpu_atombios.c, - * atombios.h, and atom.c - */ - -/** - * cail_pll_read - read PLL register - * - * @info: atom card_info pointer - * @reg: PLL register offset - * - * Provides a PLL register accessor for the atom interpreter (r4xx+). - * Returns the value of the PLL register. - */ -static uint32_t cail_pll_read(struct card_info *info, uint32_t reg) -{ - return 0; -} - -/** - * cail_pll_write - write PLL register - * - * @info: atom card_info pointer - * @reg: PLL register offset - * @val: value to write to the pll register - * - * Provides a PLL register accessor for the atom interpreter (r4xx+). - */ -static void cail_pll_write(struct card_info *info, uint32_t reg, uint32_t val) -{ - -} - -/** - * cail_mc_read - read MC (Memory Controller) register - * - * @info: atom card_info pointer - * @reg: MC register offset - * - * Provides an MC register accessor for the atom interpreter (r4xx+). - * Returns the value of the MC register. - */ -static uint32_t cail_mc_read(struct card_info *info, uint32_t reg) -{ - return 0; -} - -/** - * cail_mc_write - write MC (Memory Controller) register - * - * @info: atom card_info pointer - * @reg: MC register offset - * @val: value to write to the pll register - * - * Provides a MC register accessor for the atom interpreter (r4xx+). - */ -static void cail_mc_write(struct card_info *info, uint32_t reg, uint32_t val) -{ - -} - -/** - * cail_reg_write - write MMIO register - * - * @info: atom card_info pointer - * @reg: MMIO register offset - * @val: value to write to the pll register - * - * Provides a MMIO register accessor for the atom interpreter (r4xx+). - */ -static void cail_reg_write(struct card_info *info, uint32_t reg, uint32_t val) -{ - struct amdgpu_device *adev = info->dev->dev_private; - - WREG32(reg, val); -} - -/** - * cail_reg_read - read MMIO register - * - * @info: atom card_info pointer - * @reg: MMIO register offset - * - * Provides an MMIO register accessor for the atom interpreter (r4xx+). - * Returns the value of the MMIO register. - */ -static uint32_t cail_reg_read(struct card_info *info, uint32_t reg) -{ - struct amdgpu_device *adev = info->dev->dev_private; - uint32_t r; - - r = RREG32(reg); - return r; -} - -/** - * cail_ioreg_write - write IO register - * - * @info: atom card_info pointer - * @reg: IO register offset - * @val: value to write to the pll register - * - * Provides a IO register accessor for the atom interpreter (r4xx+). - */ -static void cail_ioreg_write(struct card_info *info, uint32_t reg, uint32_t val) -{ - struct amdgpu_device *adev = info->dev->dev_private; - - WREG32_IO(reg, val); -} - -/** - * cail_ioreg_read - read IO register - * - * @info: atom card_info pointer - * @reg: IO register offset - * - * Provides an IO register accessor for the atom interpreter (r4xx+). - * Returns the value of the IO register. - */ -static uint32_t cail_ioreg_read(struct card_info *info, uint32_t reg) -{ - struct amdgpu_device *adev = info->dev->dev_private; - uint32_t r; - - r = RREG32_IO(reg); - return r; -} - -static ssize_t amdgpu_atombios_get_vbios_version(struct device *dev, - struct device_attribute *attr, - char *buf) -{ - struct drm_device *ddev = dev_get_drvdata(dev); - struct amdgpu_device *adev = ddev->dev_private; - struct atom_context *ctx = adev->mode_info.atom_context; - - return snprintf(buf, PAGE_SIZE, "%s\n", ctx->vbios_version); -} - -static DEVICE_ATTR(vbios_version, 0444, amdgpu_atombios_get_vbios_version, - NULL); - -/** - * amdgpu_atombios_fini - free the driver info and callbacks for atombios - * - * @adev: amdgpu_device pointer - * - * Frees the driver info and register access callbacks for the ATOM - * interpreter (r4xx+). - * Called at driver shutdown. - */ -static void amdgpu_atombios_fini(struct amdgpu_device *adev) -{ - if (adev->mode_info.atom_context) { - kfree(adev->mode_info.atom_context->scratch); - kfree(adev->mode_info.atom_context->iio); - } - kfree(adev->mode_info.atom_context); - adev->mode_info.atom_context = NULL; - kfree(adev->mode_info.atom_card_info); - adev->mode_info.atom_card_info = NULL; - device_remove_file(adev->dev, &dev_attr_vbios_version); -} - -/** - * amdgpu_atombios_init - init the driver info and callbacks for atombios - * - * @adev: amdgpu_device pointer - * - * Initializes the driver info and register access callbacks for the - * ATOM interpreter (r4xx+). - * Returns 0 on sucess, -ENOMEM on failure. - * Called at driver startup. - */ -static int amdgpu_atombios_init(struct amdgpu_device *adev) -{ - struct card_info *atom_card_info = - kzalloc(sizeof(struct card_info), GFP_KERNEL); - int ret; - - if (!atom_card_info) - return -ENOMEM; - - adev->mode_info.atom_card_info = atom_card_info; - atom_card_info->dev = adev->ddev; - atom_card_info->reg_read = cail_reg_read; - atom_card_info->reg_write = cail_reg_write; - /* needed for iio ops */ - if (adev->rio_mem) { - atom_card_info->ioreg_read = cail_ioreg_read; - atom_card_info->ioreg_write = cail_ioreg_write; - } else { - DRM_INFO("PCI I/O BAR is not found. Using MMIO to access ATOM BIOS\n"); - atom_card_info->ioreg_read = cail_reg_read; - atom_card_info->ioreg_write = cail_reg_write; - } - atom_card_info->mc_read = cail_mc_read; - atom_card_info->mc_write = cail_mc_write; - atom_card_info->pll_read = cail_pll_read; - atom_card_info->pll_write = cail_pll_write; - - adev->mode_info.atom_context = amdgpu_atom_parse(atom_card_info, adev->bios); - if (!adev->mode_info.atom_context) { - amdgpu_atombios_fini(adev); - return -ENOMEM; - } - - mutex_init(&adev->mode_info.atom_context->mutex); - if (adev->is_atom_fw) { - amdgpu_atomfirmware_scratch_regs_init(adev); - amdgpu_atomfirmware_allocate_fb_scratch(adev); - } else { - amdgpu_atombios_scratch_regs_init(adev); - amdgpu_atombios_allocate_fb_scratch(adev); - } - - ret = device_create_file(adev->dev, &dev_attr_vbios_version); - if (ret) { - DRM_ERROR("Failed to create device file for VBIOS version\n"); - return ret; - } - - return 0; -} - /* if we get transitioned to only one device, take VGA back */ /** - * amdgpu_vga_set_decode - enable/disable vga decode + * amdgpu_device_vga_set_decode - enable/disable vga decode * * @cookie: amdgpu_device pointer * @state: enable/disable vga decode @@ -1072,7 +734,7 @@ static int amdgpu_atombios_init(struct amdgpu_device *adev) * Enable/disable vga decode (all asics). * Returns VGA resource flags. */ -static unsigned int amdgpu_vga_set_decode(void *cookie, bool state) +static unsigned int amdgpu_device_vga_set_decode(void *cookie, bool state) { struct amdgpu_device *adev = cookie; amdgpu_asic_set_vga_state(adev, state); @@ -1083,7 +745,7 @@ static unsigned int amdgpu_vga_set_decode(void *cookie, bool state) return VGA_RSRC_NORMAL_IO | VGA_RSRC_NORMAL_MEM; } -static void amdgpu_check_block_size(struct amdgpu_device *adev) +static void amdgpu_device_check_block_size(struct amdgpu_device *adev) { /* defines number of bits in page table versus page directory, * a page is 4KB so we have 12 bits offset, minimum 9 bits in the @@ -1094,64 +756,32 @@ static void amdgpu_check_block_size(struct amdgpu_device *adev) if (amdgpu_vm_block_size < 9) { dev_warn(adev->dev, "VM page table size (%d) too small\n", amdgpu_vm_block_size); - goto def_value; + amdgpu_vm_block_size = -1; } - - if (amdgpu_vm_block_size > 24 || - (amdgpu_vm_size * 1024) < (1ull << amdgpu_vm_block_size)) { - dev_warn(adev->dev, "VM page table size (%d) too large\n", - amdgpu_vm_block_size); - goto def_value; - } - - return; - -def_value: - amdgpu_vm_block_size = -1; } -static void amdgpu_check_vm_size(struct amdgpu_device *adev) +static void amdgpu_device_check_vm_size(struct amdgpu_device *adev) { /* no need to check the default value */ if (amdgpu_vm_size == -1) return; - if (!is_power_of_2(amdgpu_vm_size)) { - dev_warn(adev->dev, "VM size (%d) must be a power of 2\n", - amdgpu_vm_size); - goto def_value; - } - if (amdgpu_vm_size < 1) { dev_warn(adev->dev, "VM size (%d) too small, min is 1GB\n", amdgpu_vm_size); - goto def_value; - } - - /* - * Max GPUVM size for Cayman, SI, CI VI are 40 bits. - */ - if (amdgpu_vm_size > 1024) { - dev_warn(adev->dev, "VM size (%d) too large, max is 1TB\n", - amdgpu_vm_size); - goto def_value; + amdgpu_vm_size = -1; } - - return; - -def_value: - amdgpu_vm_size = -1; } /** - * amdgpu_check_arguments - validate module params + * amdgpu_device_check_arguments - validate module params * * @adev: amdgpu_device pointer * * Validates certain module parameters and updates * the associated values used by the driver (all asics). */ -static void amdgpu_check_arguments(struct amdgpu_device *adev) +static void amdgpu_device_check_arguments(struct amdgpu_device *adev) { if (amdgpu_sched_jobs < 4) { dev_warn(adev->dev, "sched jobs (%d) must be at least 4\n", @@ -1184,9 +814,9 @@ static void amdgpu_check_arguments(struct amdgpu_device *adev) amdgpu_vm_fragment_size = -1; } - amdgpu_check_vm_size(adev); + amdgpu_device_check_vm_size(adev); - amdgpu_check_block_size(adev); + amdgpu_device_check_block_size(adev); if (amdgpu_vram_page_split != -1 && (amdgpu_vram_page_split < 16 || !is_power_of_2(amdgpu_vram_page_split))) { @@ -1194,6 +824,11 @@ static void amdgpu_check_arguments(struct amdgpu_device *adev) amdgpu_vram_page_split); amdgpu_vram_page_split = 1024; } + + if (amdgpu_lockup_timeout == 0) { + dev_warn(adev->dev, "lockup_timeout msut be > 0, adjusting to 10000\n"); + amdgpu_lockup_timeout = 10000; + } } /** @@ -1257,9 +892,9 @@ static const struct vga_switcheroo_client_ops amdgpu_switcheroo_ops = { .can_switch = amdgpu_switcheroo_can_switch, }; -int amdgpu_set_clockgating_state(struct amdgpu_device *adev, - enum amd_ip_block_type block_type, - enum amd_clockgating_state state) +int amdgpu_device_ip_set_clockgating_state(struct amdgpu_device *adev, + enum amd_ip_block_type block_type, + enum amd_clockgating_state state) { int i, r = 0; @@ -1279,9 +914,9 @@ int amdgpu_set_clockgating_state(struct amdgpu_device *adev, return r; } -int amdgpu_set_powergating_state(struct amdgpu_device *adev, - enum amd_ip_block_type block_type, - enum amd_powergating_state state) +int amdgpu_device_ip_set_powergating_state(struct amdgpu_device *adev, + enum amd_ip_block_type block_type, + enum amd_powergating_state state) { int i, r = 0; @@ -1301,7 +936,8 @@ int amdgpu_set_powergating_state(struct amdgpu_device *adev, return r; } -void amdgpu_get_clockgating_state(struct amdgpu_device *adev, u32 *flags) +void amdgpu_device_ip_get_clockgating_state(struct amdgpu_device *adev, + u32 *flags) { int i; @@ -1313,8 +949,8 @@ void amdgpu_get_clockgating_state(struct amdgpu_device *adev, u32 *flags) } } -int amdgpu_wait_for_idle(struct amdgpu_device *adev, - enum amd_ip_block_type block_type) +int amdgpu_device_ip_wait_for_idle(struct amdgpu_device *adev, + enum amd_ip_block_type block_type) { int i, r; @@ -1332,8 +968,8 @@ int amdgpu_wait_for_idle(struct amdgpu_device *adev, } -bool amdgpu_is_idle(struct amdgpu_device *adev, - enum amd_ip_block_type block_type) +bool amdgpu_device_ip_is_idle(struct amdgpu_device *adev, + enum amd_ip_block_type block_type) { int i; @@ -1347,8 +983,9 @@ bool amdgpu_is_idle(struct amdgpu_device *adev, } -struct amdgpu_ip_block * amdgpu_get_ip_block(struct amdgpu_device *adev, - enum amd_ip_block_type type) +struct amdgpu_ip_block * +amdgpu_device_ip_get_ip_block(struct amdgpu_device *adev, + enum amd_ip_block_type type) { int i; @@ -1360,7 +997,7 @@ struct amdgpu_ip_block * amdgpu_get_ip_block(struct amdgpu_device *adev, } /** - * amdgpu_ip_block_version_cmp + * amdgpu_device_ip_block_version_cmp * * @adev: amdgpu_device pointer * @type: enum amd_ip_block_type @@ -1370,11 +1007,11 @@ struct amdgpu_ip_block * amdgpu_get_ip_block(struct amdgpu_device *adev, * return 0 if equal or greater * return 1 if smaller or the ip_block doesn't exist */ -int amdgpu_ip_block_version_cmp(struct amdgpu_device *adev, - enum amd_ip_block_type type, - u32 major, u32 minor) +int amdgpu_device_ip_block_version_cmp(struct amdgpu_device *adev, + enum amd_ip_block_type type, + u32 major, u32 minor) { - struct amdgpu_ip_block *ip_block = amdgpu_get_ip_block(adev, type); + struct amdgpu_ip_block *ip_block = amdgpu_device_ip_get_ip_block(adev, type); if (ip_block && ((ip_block->version->major > major) || ((ip_block->version->major == major) && @@ -1385,7 +1022,7 @@ int amdgpu_ip_block_version_cmp(struct amdgpu_device *adev, } /** - * amdgpu_ip_block_add + * amdgpu_device_ip_block_add * * @adev: amdgpu_device pointer * @ip_block_version: pointer to the IP to add @@ -1393,8 +1030,8 @@ int amdgpu_ip_block_version_cmp(struct amdgpu_device *adev, * Adds the IP block driver information to the collection of IPs * on the asic. */ -int amdgpu_ip_block_add(struct amdgpu_device *adev, - const struct amdgpu_ip_block_version *ip_block_version) +int amdgpu_device_ip_block_add(struct amdgpu_device *adev, + const struct amdgpu_ip_block_version *ip_block_version) { if (!ip_block_version) return -EINVAL; @@ -1550,7 +1187,7 @@ out: return err; } -static int amdgpu_early_init(struct amdgpu_device *adev) +static int amdgpu_device_ip_early_init(struct amdgpu_device *adev) { int i, r; @@ -1622,10 +1259,12 @@ static int amdgpu_early_init(struct amdgpu_device *adev) if (r) return r; + amdgpu_amdkfd_device_probe(adev); + if (amdgpu_sriov_vf(adev)) { r = amdgpu_virt_request_full_gpu(adev, true); if (r) - return r; + return -EAGAIN; } for (i = 0; i < adev->num_ip_blocks; i++) { @@ -1657,7 +1296,7 @@ static int amdgpu_early_init(struct amdgpu_device *adev) return 0; } -static int amdgpu_init(struct amdgpu_device *adev) +static int amdgpu_device_ip_init(struct amdgpu_device *adev) { int i, r; @@ -1673,7 +1312,7 @@ static int amdgpu_init(struct amdgpu_device *adev) adev->ip_blocks[i].status.sw = true; /* need to do gmc hw init early so we can allocate gpu mem */ if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC) { - r = amdgpu_vram_scratch_init(adev); + r = amdgpu_device_vram_scratch_init(adev); if (r) { DRM_ERROR("amdgpu_vram_scratch_init failed %d\n", r); return r; @@ -1683,9 +1322,9 @@ static int amdgpu_init(struct amdgpu_device *adev) DRM_ERROR("hw_init %d failed %d\n", i, r); return r; } - r = amdgpu_wb_init(adev); + r = amdgpu_device_wb_init(adev); if (r) { - DRM_ERROR("amdgpu_wb_init failed %d\n", r); + DRM_ERROR("amdgpu_device_wb_init failed %d\n", r); return r; } adev->ip_blocks[i].status.hw = true; @@ -1716,21 +1355,26 @@ static int amdgpu_init(struct amdgpu_device *adev) adev->ip_blocks[i].status.hw = true; } + amdgpu_amdkfd_device_init(adev); + + if (amdgpu_sriov_vf(adev)) + amdgpu_virt_release_full_gpu(adev, true); + return 0; } -static void amdgpu_fill_reset_magic(struct amdgpu_device *adev) +static void amdgpu_device_fill_reset_magic(struct amdgpu_device *adev) { memcpy(adev->reset_magic, adev->gart.ptr, AMDGPU_RESET_MAGIC_NUM); } -static bool amdgpu_check_vram_lost(struct amdgpu_device *adev) +static bool amdgpu_device_check_vram_lost(struct amdgpu_device *adev) { return !!memcmp(adev->gart.ptr, adev->reset_magic, AMDGPU_RESET_MAGIC_NUM); } -static int amdgpu_late_set_cg_state(struct amdgpu_device *adev) +static int amdgpu_device_ip_late_set_cg_state(struct amdgpu_device *adev) { int i = 0, r; @@ -1753,7 +1397,7 @@ static int amdgpu_late_set_cg_state(struct amdgpu_device *adev) return 0; } -static int amdgpu_late_init(struct amdgpu_device *adev) +static int amdgpu_device_ip_late_init(struct amdgpu_device *adev) { int i = 0, r; @@ -1774,15 +1418,16 @@ static int amdgpu_late_init(struct amdgpu_device *adev) mod_delayed_work(system_wq, &adev->late_init_work, msecs_to_jiffies(AMDGPU_RESUME_MS)); - amdgpu_fill_reset_magic(adev); + amdgpu_device_fill_reset_magic(adev); return 0; } -static int amdgpu_fini(struct amdgpu_device *adev) +static int amdgpu_device_ip_fini(struct amdgpu_device *adev) { int i, r; + amdgpu_amdkfd_device_fini(adev); /* need to disable SMC first */ for (i = 0; i < adev->num_ip_blocks; i++) { if (!adev->ip_blocks[i].status.hw) @@ -1811,8 +1456,9 @@ static int amdgpu_fini(struct amdgpu_device *adev) if (!adev->ip_blocks[i].status.hw) continue; if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC) { - amdgpu_wb_fini(adev); - amdgpu_vram_scratch_fini(adev); + amdgpu_free_static_csa(adev); + amdgpu_device_wb_fini(adev); + amdgpu_device_vram_scratch_fini(adev); } if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_UVD && @@ -1859,19 +1505,20 @@ static int amdgpu_fini(struct amdgpu_device *adev) } if (amdgpu_sriov_vf(adev)) - amdgpu_virt_release_full_gpu(adev, false); + if (amdgpu_virt_release_full_gpu(adev, false)) + DRM_ERROR("failed to release exclusive mode on fini\n"); return 0; } -static void amdgpu_late_init_func_handler(struct work_struct *work) +static void amdgpu_device_ip_late_init_func_handler(struct work_struct *work) { struct amdgpu_device *adev = container_of(work, struct amdgpu_device, late_init_work.work); - amdgpu_late_set_cg_state(adev); + amdgpu_device_ip_late_set_cg_state(adev); } -int amdgpu_suspend(struct amdgpu_device *adev) +int amdgpu_device_ip_suspend(struct amdgpu_device *adev) { int i, r; @@ -1879,10 +1526,10 @@ int amdgpu_suspend(struct amdgpu_device *adev) amdgpu_virt_request_full_gpu(adev, false); /* ungate SMC block first */ - r = amdgpu_set_clockgating_state(adev, AMD_IP_BLOCK_TYPE_SMC, - AMD_CG_STATE_UNGATE); + r = amdgpu_device_ip_set_clockgating_state(adev, AMD_IP_BLOCK_TYPE_SMC, + AMD_CG_STATE_UNGATE); if (r) { - DRM_ERROR("set_clockgating_state(ungate) SMC failed %d\n",r); + DRM_ERROR("set_clockgating_state(ungate) SMC failed %d\n", r); } for (i = adev->num_ip_blocks - 1; i >= 0; i--) { @@ -1912,7 +1559,7 @@ int amdgpu_suspend(struct amdgpu_device *adev) return 0; } -static int amdgpu_sriov_reinit_early(struct amdgpu_device *adev) +static int amdgpu_device_ip_reinit_early_sriov(struct amdgpu_device *adev) { int i, r; @@ -1941,7 +1588,7 @@ static int amdgpu_sriov_reinit_early(struct amdgpu_device *adev) return 0; } -static int amdgpu_sriov_reinit_late(struct amdgpu_device *adev) +static int amdgpu_device_ip_reinit_late_sriov(struct amdgpu_device *adev) { int i, r; @@ -1974,7 +1621,7 @@ static int amdgpu_sriov_reinit_late(struct amdgpu_device *adev) return 0; } -static int amdgpu_resume_phase1(struct amdgpu_device *adev) +static int amdgpu_device_ip_resume_phase1(struct amdgpu_device *adev) { int i, r; @@ -1997,7 +1644,7 @@ static int amdgpu_resume_phase1(struct amdgpu_device *adev) return 0; } -static int amdgpu_resume_phase2(struct amdgpu_device *adev) +static int amdgpu_device_ip_resume_phase2(struct amdgpu_device *adev) { int i, r; @@ -2019,14 +1666,14 @@ static int amdgpu_resume_phase2(struct amdgpu_device *adev) return 0; } -static int amdgpu_resume(struct amdgpu_device *adev) +static int amdgpu_device_ip_resume(struct amdgpu_device *adev) { int r; - r = amdgpu_resume_phase1(adev); + r = amdgpu_device_ip_resume_phase1(adev); if (r) return r; - r = amdgpu_resume_phase2(adev); + r = amdgpu_device_ip_resume_phase2(adev); return r; } @@ -2163,8 +1810,9 @@ int amdgpu_device_init(struct amdgpu_device *adev, mutex_init(&adev->mn_lock); mutex_init(&adev->virt.vf_errors.lock); hash_init(adev->mn_hash); + mutex_init(&adev->lock_reset); - amdgpu_check_arguments(adev); + amdgpu_device_check_arguments(adev); spin_lock_init(&adev->mmio_idx_lock); spin_lock_init(&adev->smc_idx_lock); @@ -2179,13 +1827,11 @@ int amdgpu_device_init(struct amdgpu_device *adev, INIT_LIST_HEAD(&adev->shadow_list); mutex_init(&adev->shadow_list_lock); - INIT_LIST_HEAD(&adev->gtt_list); - spin_lock_init(&adev->gtt_list_lock); - INIT_LIST_HEAD(&adev->ring_lru_list); spin_lock_init(&adev->ring_lru_list_lock); - INIT_DELAYED_WORK(&adev->late_init_work, amdgpu_late_init_func_handler); + INIT_DELAYED_WORK(&adev->late_init_work, + amdgpu_device_ip_late_init_func_handler); /* Registers mapping */ /* TODO: block userspace mapping of io register */ @@ -2205,7 +1851,7 @@ int amdgpu_device_init(struct amdgpu_device *adev, DRM_INFO("register mmio size: %u\n", (unsigned)adev->rmmio_size); /* doorbell bar mapping */ - amdgpu_doorbell_init(adev); + amdgpu_device_doorbell_init(adev); /* io port mapping */ for (i = 0; i < DEVICE_COUNT_RESOURCE; i++) { @@ -2219,17 +1865,15 @@ int amdgpu_device_init(struct amdgpu_device *adev, DRM_INFO("PCI I/O BAR is not found.\n"); /* early init functions */ - r = amdgpu_early_init(adev); + r = amdgpu_device_ip_early_init(adev); if (r) return r; /* if we have > 1 VGA cards, then disable the amdgpu VGA resources */ /* this will fail for cards that aren't VGA class devices, just * ignore it */ - vga_client_register(adev->pdev, adev, NULL, amdgpu_vga_set_decode); + vga_client_register(adev->pdev, adev, NULL, amdgpu_device_vga_set_decode); - if (amdgpu_runtime_pm == 1) - runtime = true; if (amdgpu_device_is_px(ddev)) runtime = true; if (!pci_is_thunderbolt_attached(adev->pdev)) @@ -2255,7 +1899,7 @@ int amdgpu_device_init(struct amdgpu_device *adev, amdgpu_device_detect_sriov_bios(adev); /* Post card if necessary */ - if (amdgpu_need_post(adev)) { + if (amdgpu_device_need_post(adev)) { if (!adev->bios) { dev_err(adev->dev, "no vBIOS found\n"); r = -EINVAL; @@ -2267,8 +1911,6 @@ int amdgpu_device_init(struct amdgpu_device *adev, dev_err(adev->dev, "gpu post error!\n"); goto failed; } - } else { - DRM_INFO("GPU post is not needed\n"); } if (adev->is_atom_fw) { @@ -2303,11 +1945,23 @@ int amdgpu_device_init(struct amdgpu_device *adev, /* init the mode config */ drm_mode_config_init(adev->ddev); - r = amdgpu_init(adev); + r = amdgpu_device_ip_init(adev); if (r) { - dev_err(adev->dev, "amdgpu_init failed\n"); + /* failed in exclusive mode due to timeout */ + if (amdgpu_sriov_vf(adev) && + !amdgpu_sriov_runtime(adev) && + amdgpu_virt_mmio_blocked(adev) && + !amdgpu_virt_wait_reset(adev)) { + dev_err(adev->dev, "VF exclusive mode timeout\n"); + /* Don't send request since VF is inactive. */ + adev->virt.caps &= ~AMDGPU_SRIOV_CAPS_RUNTIME; + adev->virt.ops = NULL; + r = -EAGAIN; + goto failed; + } + dev_err(adev->dev, "amdgpu_device_ip_init failed\n"); amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_AMDGPU_INIT_FAIL, 0, 0); - amdgpu_fini(adev); + amdgpu_device_ip_fini(adev); goto failed; } @@ -2343,7 +1997,7 @@ int amdgpu_device_init(struct amdgpu_device *adev, if (r) DRM_ERROR("registering pm debugfs failed (%d).\n", r); - r = amdgpu_gem_debugfs_init(adev); + r = amdgpu_debugfs_gem_init(adev); if (r) DRM_ERROR("registering gem debugfs failed (%d).\n", r); @@ -2351,17 +2005,13 @@ int amdgpu_device_init(struct amdgpu_device *adev, if (r) DRM_ERROR("registering register debugfs failed (%d).\n", r); - r = amdgpu_debugfs_test_ib_ring_init(adev); - if (r) - DRM_ERROR("registering register test ib ring debugfs failed (%d).\n", r); - r = amdgpu_debugfs_firmware_init(adev); if (r) DRM_ERROR("registering firmware debugfs failed (%d).\n", r); - r = amdgpu_debugfs_vbios_dump_init(adev); + r = amdgpu_debugfs_init(adev); if (r) - DRM_ERROR("Creating vbios dump debugfs failed (%d).\n", r); + DRM_ERROR("Creating debugfs files failed (%d).\n", r); if ((amdgpu_testing & 1)) { if (adev->accel_working) @@ -2379,9 +2029,9 @@ int amdgpu_device_init(struct amdgpu_device *adev, /* enable clockgating, etc. after ib tests, etc. since some blocks require * explicit gating rather than handling it automatically. */ - r = amdgpu_late_init(adev); + r = amdgpu_device_ip_late_init(adev); if (r) { - dev_err(adev->dev, "amdgpu_late_init failed\n"); + dev_err(adev->dev, "amdgpu_device_ip_late_init failed\n"); amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_AMDGPU_LATE_INIT_FAIL, 0, r); goto failed; } @@ -2392,6 +2042,7 @@ failed: amdgpu_vf_error_trans_all(adev); if (runtime) vga_switcheroo_fini_domain_pm_ops(adev->dev); + return r; } @@ -2411,13 +2062,11 @@ void amdgpu_device_fini(struct amdgpu_device *adev) adev->shutdown = true; if (adev->mode_info.mode_config_initialized) drm_crtc_force_disable_all(adev->ddev); - /* evict vram memory */ - amdgpu_bo_evict_vram(adev); + amdgpu_ib_pool_fini(adev); - amdgpu_fw_reserve_vram_fini(adev); amdgpu_fence_driver_fini(adev); amdgpu_fbdev_fini(adev); - r = amdgpu_fini(adev); + r = amdgpu_device_ip_fini(adev); if (adev->firmware.gpu_info_fw) { release_firmware(adev->firmware.gpu_info_fw); adev->firmware.gpu_info_fw = NULL; @@ -2440,7 +2089,7 @@ void amdgpu_device_fini(struct amdgpu_device *adev) adev->rio_mem = NULL; iounmap(adev->rmmio); adev->rmmio = NULL; - amdgpu_doorbell_fini(adev); + amdgpu_device_doorbell_fini(adev); amdgpu_pm_sysfs_fini(adev); amdgpu_debugfs_regs_cleanup(adev); } @@ -2521,7 +2170,7 @@ int amdgpu_device_suspend(struct drm_device *dev, bool suspend, bool fbcon) amdgpu_fence_driver_suspend(adev); - r = amdgpu_suspend(adev); + r = amdgpu_device_ip_suspend(adev); /* evict remaining vram memory * This second call to evict vram is to evict the gart page table @@ -2529,7 +2178,6 @@ int amdgpu_device_suspend(struct drm_device *dev, bool suspend, bool fbcon) */ amdgpu_bo_evict_vram(adev); - amdgpu_atombios_scratch_regs_save(adev); pci_save_state(dev->pdev); if (suspend) { /* Shut down the device */ @@ -2578,18 +2226,17 @@ int amdgpu_device_resume(struct drm_device *dev, bool resume, bool fbcon) if (r) goto unlock; } - amdgpu_atombios_scratch_regs_restore(adev); /* post card */ - if (amdgpu_need_post(adev)) { + if (amdgpu_device_need_post(adev)) { r = amdgpu_atom_asic_init(adev->mode_info.atom_context); if (r) DRM_ERROR("amdgpu asic init failed\n"); } - r = amdgpu_resume(adev); + r = amdgpu_device_ip_resume(adev); if (r) { - DRM_ERROR("amdgpu_resume failed (%d).\n", r); + DRM_ERROR("amdgpu_device_ip_resume failed (%d).\n", r); goto unlock; } amdgpu_fence_driver_resume(adev); @@ -2600,7 +2247,7 @@ int amdgpu_device_resume(struct drm_device *dev, bool resume, bool fbcon) DRM_ERROR("ib ring test failed (%d).\n", r); } - r = amdgpu_late_init(adev); + r = amdgpu_device_ip_late_init(adev); if (r) goto unlock; @@ -2680,7 +2327,7 @@ unlock: return r; } -static bool amdgpu_check_soft_reset(struct amdgpu_device *adev) +static bool amdgpu_device_ip_check_soft_reset(struct amdgpu_device *adev) { int i; bool asic_hang = false; @@ -2702,7 +2349,7 @@ static bool amdgpu_check_soft_reset(struct amdgpu_device *adev) return asic_hang; } -static int amdgpu_pre_soft_reset(struct amdgpu_device *adev) +static int amdgpu_device_ip_pre_soft_reset(struct amdgpu_device *adev) { int i, r = 0; @@ -2720,7 +2367,7 @@ static int amdgpu_pre_soft_reset(struct amdgpu_device *adev) return 0; } -static bool amdgpu_need_full_reset(struct amdgpu_device *adev) +static bool amdgpu_device_ip_need_full_reset(struct amdgpu_device *adev) { int i; @@ -2741,7 +2388,7 @@ static bool amdgpu_need_full_reset(struct amdgpu_device *adev) return false; } -static int amdgpu_soft_reset(struct amdgpu_device *adev) +static int amdgpu_device_ip_soft_reset(struct amdgpu_device *adev) { int i, r = 0; @@ -2759,7 +2406,7 @@ static int amdgpu_soft_reset(struct amdgpu_device *adev) return 0; } -static int amdgpu_post_soft_reset(struct amdgpu_device *adev) +static int amdgpu_device_ip_post_soft_reset(struct amdgpu_device *adev) { int i, r = 0; @@ -2776,18 +2423,10 @@ static int amdgpu_post_soft_reset(struct amdgpu_device *adev) return 0; } -bool amdgpu_need_backup(struct amdgpu_device *adev) -{ - if (adev->flags & AMD_IS_APU) - return false; - - return amdgpu_lockup_timeout > 0 ? true : false; -} - -static int amdgpu_recover_vram_from_shadow(struct amdgpu_device *adev, - struct amdgpu_ring *ring, - struct amdgpu_bo *bo, - struct dma_fence **fence) +static int amdgpu_device_recover_vram_from_shadow(struct amdgpu_device *adev, + struct amdgpu_ring *ring, + struct amdgpu_bo *bo, + struct dma_fence **fence) { uint32_t domain; int r; @@ -2819,163 +2458,181 @@ err: return r; } -/** - * amdgpu_sriov_gpu_reset - reset the asic +/* + * amdgpu_device_reset - reset ASIC/GPU for bare-metal or passthrough * * @adev: amdgpu device pointer - * @job: which job trigger hang + * @reset_flags: output param tells caller the reset result * - * Attempt the reset the GPU if it has hung (all asics). - * for SRIOV case. - * Returns 0 for success or an error on failure. - */ -int amdgpu_sriov_gpu_reset(struct amdgpu_device *adev, struct amdgpu_job *job) + * attempt to do soft-reset or full-reset and reinitialize Asic + * return 0 means successed otherwise failed +*/ +static int amdgpu_device_reset(struct amdgpu_device *adev, + uint64_t* reset_flags) { - int i, j, r = 0; - int resched; - struct amdgpu_bo *bo, *tmp; - struct amdgpu_ring *ring; - struct dma_fence *fence = NULL, *next = NULL; + bool need_full_reset, vram_lost = 0; + int r; - mutex_lock(&adev->virt.lock_reset); - atomic_inc(&adev->gpu_reset_counter); - adev->in_sriov_reset = true; + need_full_reset = amdgpu_device_ip_need_full_reset(adev); - /* block TTM */ - resched = ttm_bo_lock_delayed_workqueue(&adev->mman.bdev); + if (!need_full_reset) { + amdgpu_device_ip_pre_soft_reset(adev); + r = amdgpu_device_ip_soft_reset(adev); + amdgpu_device_ip_post_soft_reset(adev); + if (r || amdgpu_device_ip_check_soft_reset(adev)) { + DRM_INFO("soft reset failed, will fallback to full reset!\n"); + need_full_reset = true; + } - /* we start from the ring trigger GPU hang */ - j = job ? job->ring->idx : 0; + } - /* block scheduler */ - for (i = j; i < j + AMDGPU_MAX_RINGS; ++i) { - ring = adev->rings[i % AMDGPU_MAX_RINGS]; - if (!ring || !ring->sched.thread) - continue; + if (need_full_reset) { + r = amdgpu_device_ip_suspend(adev); - kthread_park(ring->sched.thread); +retry: + r = amdgpu_asic_reset(adev); + /* post card */ + amdgpu_atom_asic_init(adev->mode_info.atom_context); - if (job && j != i) - continue; + if (!r) { + dev_info(adev->dev, "GPU reset succeeded, trying to resume\n"); + r = amdgpu_device_ip_resume_phase1(adev); + if (r) + goto out; - /* here give the last chance to check if job removed from mirror-list - * since we already pay some time on kthread_park */ - if (job && list_empty(&job->base.node)) { - kthread_unpark(ring->sched.thread); - goto give_up_reset; + vram_lost = amdgpu_device_check_vram_lost(adev); + if (vram_lost) { + DRM_ERROR("VRAM is lost!\n"); + atomic_inc(&adev->vram_lost_counter); + } + + r = amdgpu_gtt_mgr_recover( + &adev->mman.bdev.man[TTM_PL_TT]); + if (r) + goto out; + + r = amdgpu_device_ip_resume_phase2(adev); + if (r) + goto out; + + if (vram_lost) + amdgpu_device_fill_reset_magic(adev); } + } - if (amd_sched_invalidate_job(&job->base, amdgpu_job_hang_limit)) - amd_sched_job_kickout(&job->base); +out: + if (!r) { + amdgpu_irq_gpu_reset_resume_helper(adev); + r = amdgpu_ib_ring_tests(adev); + if (r) { + dev_err(adev->dev, "ib ring test failed (%d).\n", r); + r = amdgpu_device_ip_suspend(adev); + need_full_reset = true; + goto retry; + } + } - /* only do job_reset on the hang ring if @job not NULL */ - amd_sched_hw_job_reset(&ring->sched); + if (reset_flags) { + if (vram_lost) + (*reset_flags) |= AMDGPU_RESET_INFO_VRAM_LOST; - /* after all hw jobs are reset, hw fence is meaningless, so force_completion */ - amdgpu_fence_driver_force_completion_ring(ring); + if (need_full_reset) + (*reset_flags) |= AMDGPU_RESET_INFO_FULLRESET; } - /* request to take full control of GPU before re-initialization */ - if (job) - amdgpu_virt_reset_gpu(adev); - else - amdgpu_virt_request_full_gpu(adev, true); + return r; +} +/* + * amdgpu_device_reset_sriov - reset ASIC for SR-IOV vf + * + * @adev: amdgpu device pointer + * @reset_flags: output param tells caller the reset result + * + * do VF FLR and reinitialize Asic + * return 0 means successed otherwise failed +*/ +static int amdgpu_device_reset_sriov(struct amdgpu_device *adev, + uint64_t *reset_flags, + bool from_hypervisor) +{ + int r; + + if (from_hypervisor) + r = amdgpu_virt_request_full_gpu(adev, true); + else + r = amdgpu_virt_reset_gpu(adev); + if (r) + return r; /* Resume IP prior to SMC */ - amdgpu_sriov_reinit_early(adev); + r = amdgpu_device_ip_reinit_early_sriov(adev); + if (r) + goto error; /* we need recover gart prior to run SMC/CP/SDMA resume */ - amdgpu_ttm_recover_gart(adev); + amdgpu_gtt_mgr_recover(&adev->mman.bdev.man[TTM_PL_TT]); /* now we are okay to resume SMC/CP/SDMA */ - amdgpu_sriov_reinit_late(adev); + r = amdgpu_device_ip_reinit_late_sriov(adev); + if (r) + goto error; amdgpu_irq_gpu_reset_resume_helper(adev); - - if (amdgpu_ib_ring_tests(adev)) + r = amdgpu_ib_ring_tests(adev); + if (r) dev_err(adev->dev, "[GPU_RESET] ib ring test failed (%d).\n", r); +error: /* release full control of GPU after ib test */ amdgpu_virt_release_full_gpu(adev, true); - DRM_INFO("recover vram bo from shadow\n"); - - ring = adev->mman.buffer_funcs_ring; - mutex_lock(&adev->shadow_list_lock); - list_for_each_entry_safe(bo, tmp, &adev->shadow_list, shadow_list) { - next = NULL; - amdgpu_recover_vram_from_shadow(adev, ring, bo, &next); - if (fence) { - r = dma_fence_wait(fence, false); - if (r) { - WARN(r, "recovery from shadow isn't completed\n"); - break; - } - } - - dma_fence_put(fence); - fence = next; - } - mutex_unlock(&adev->shadow_list_lock); - - if (fence) { - r = dma_fence_wait(fence, false); - if (r) - WARN(r, "recovery from shadow isn't completed\n"); - } - dma_fence_put(fence); - - for (i = j; i < j + AMDGPU_MAX_RINGS; ++i) { - ring = adev->rings[i % AMDGPU_MAX_RINGS]; - if (!ring || !ring->sched.thread) - continue; - - if (job && j != i) { - kthread_unpark(ring->sched.thread); - continue; + if (reset_flags) { + if (adev->virt.gim_feature & AMDGIM_FEATURE_GIM_FLR_VRAMLOST) { + (*reset_flags) |= AMDGPU_RESET_INFO_VRAM_LOST; + atomic_inc(&adev->vram_lost_counter); } - amd_sched_job_recovery(&ring->sched); - kthread_unpark(ring->sched.thread); + /* VF FLR or hotlink reset is always full-reset */ + (*reset_flags) |= AMDGPU_RESET_INFO_FULLRESET; } - drm_helper_resume_force_mode(adev->ddev); -give_up_reset: - ttm_bo_unlock_delayed_workqueue(&adev->mman.bdev, resched); - if (r) { - /* bad news, how to tell it to userspace ? */ - dev_info(adev->dev, "GPU reset failed\n"); - } else { - dev_info(adev->dev, "GPU reset successed!\n"); - } - - adev->in_sriov_reset = false; - mutex_unlock(&adev->virt.lock_reset); return r; } /** - * amdgpu_gpu_reset - reset the asic + * amdgpu_device_gpu_recover - reset the asic and recover scheduler * * @adev: amdgpu device pointer + * @job: which job trigger hang + * @force forces reset regardless of amdgpu_gpu_recovery * - * Attempt the reset the GPU if it has hung (all asics). + * Attempt to reset the GPU if it has hung (all asics). * Returns 0 for success or an error on failure. */ -int amdgpu_gpu_reset(struct amdgpu_device *adev) +int amdgpu_device_gpu_recover(struct amdgpu_device *adev, + struct amdgpu_job *job, bool force) { struct drm_atomic_state *state = NULL; - int i, r; - int resched; - bool need_full_reset, vram_lost = false; + uint64_t reset_flags = 0; + int i, r, resched; - if (!amdgpu_check_soft_reset(adev)) { + if (!force && !amdgpu_device_ip_check_soft_reset(adev)) { DRM_INFO("No hardware hang detected. Did some blocks stall?\n"); return 0; } + if (!force && (amdgpu_gpu_recovery == 0 || + (amdgpu_gpu_recovery == -1 && !amdgpu_sriov_vf(adev)))) { + DRM_INFO("GPU recovery disabled.\n"); + return 0; + } + + dev_info(adev->dev, "GPU reset begin!\n"); + + mutex_lock(&adev->lock_reset); atomic_inc(&adev->gpu_reset_counter); + adev->in_gpu_reset = 1; /* block TTM */ resched = ttm_bo_lock_delayed_workqueue(&adev->mman.bdev); @@ -2989,69 +2646,26 @@ int amdgpu_gpu_reset(struct amdgpu_device *adev) if (!ring || !ring->sched.thread) continue; - kthread_park(ring->sched.thread); - amd_sched_hw_job_reset(&ring->sched); - } - /* after all hw jobs are reset, hw fence is meaningless, so force_completion */ - amdgpu_fence_driver_force_completion(adev); - need_full_reset = amdgpu_need_full_reset(adev); + /* only focus on the ring hit timeout if &job not NULL */ + if (job && job->ring->idx != i) + continue; - if (!need_full_reset) { - amdgpu_pre_soft_reset(adev); - r = amdgpu_soft_reset(adev); - amdgpu_post_soft_reset(adev); - if (r || amdgpu_check_soft_reset(adev)) { - DRM_INFO("soft reset failed, will fallback to full reset!\n"); - need_full_reset = true; - } - } + kthread_park(ring->sched.thread); + drm_sched_hw_job_reset(&ring->sched, &job->base); - if (need_full_reset) { - r = amdgpu_suspend(adev); + /* after all hw jobs are reset, hw fence is meaningless, so force_completion */ + amdgpu_fence_driver_force_completion(ring); + } -retry: - amdgpu_atombios_scratch_regs_save(adev); - r = amdgpu_asic_reset(adev); - amdgpu_atombios_scratch_regs_restore(adev); - /* post card */ - amdgpu_atom_asic_init(adev->mode_info.atom_context); + if (amdgpu_sriov_vf(adev)) + r = amdgpu_device_reset_sriov(adev, &reset_flags, job ? false : true); + else + r = amdgpu_device_reset(adev, &reset_flags); - if (!r) { - dev_info(adev->dev, "GPU reset succeeded, trying to resume\n"); - r = amdgpu_resume_phase1(adev); - if (r) - goto out; - vram_lost = amdgpu_check_vram_lost(adev); - if (vram_lost) { - DRM_ERROR("VRAM is lost!\n"); - atomic_inc(&adev->vram_lost_counter); - } - r = amdgpu_ttm_recover_gart(adev); - if (r) - goto out; - r = amdgpu_resume_phase2(adev); - if (r) - goto out; - if (vram_lost) - amdgpu_fill_reset_magic(adev); - } - } -out: if (!r) { - amdgpu_irq_gpu_reset_resume_helper(adev); - r = amdgpu_ib_ring_tests(adev); - if (r) { - dev_err(adev->dev, "ib ring test failed (%d).\n", r); - r = amdgpu_suspend(adev); - need_full_reset = true; - goto retry; - } - /** - * recovery vm page tables, since we cannot depend on VRAM is - * consistent after gpu full reset. - */ - if (need_full_reset && amdgpu_need_backup(adev)) { + if (((reset_flags & AMDGPU_RESET_INFO_FULLRESET) && !(adev->flags & AMD_IS_APU)) || + (reset_flags & AMDGPU_RESET_INFO_VRAM_LOST)) { struct amdgpu_ring *ring = adev->mman.buffer_funcs_ring; struct amdgpu_bo *bo, *tmp; struct dma_fence *fence = NULL, *next = NULL; @@ -3060,7 +2674,7 @@ out: mutex_lock(&adev->shadow_list_lock); list_for_each_entry_safe(bo, tmp, &adev->shadow_list, shadow_list) { next = NULL; - amdgpu_recover_vram_from_shadow(adev, ring, bo, &next); + amdgpu_device_recover_vram_from_shadow(adev, ring, bo, &next); if (fence) { r = dma_fence_wait(fence, false); if (r) { @@ -3080,44 +2694,60 @@ out: } dma_fence_put(fence); } + for (i = 0; i < AMDGPU_MAX_RINGS; ++i) { struct amdgpu_ring *ring = adev->rings[i]; if (!ring || !ring->sched.thread) continue; - amd_sched_job_recovery(&ring->sched); + /* only focus on the ring hit timeout if &job not NULL */ + if (job && job->ring->idx != i) + continue; + + drm_sched_job_recovery(&ring->sched); kthread_unpark(ring->sched.thread); } } else { - dev_err(adev->dev, "asic resume failed (%d).\n", r); for (i = 0; i < AMDGPU_MAX_RINGS; ++i) { - if (adev->rings[i] && adev->rings[i]->sched.thread) { - kthread_unpark(adev->rings[i]->sched.thread); - } + struct amdgpu_ring *ring = adev->rings[i]; + + if (!ring || !ring->sched.thread) + continue; + + /* only focus on the ring hit timeout if &job not NULL */ + if (job && job->ring->idx != i) + continue; + + kthread_unpark(adev->rings[i]->sched.thread); } } if (amdgpu_device_has_dc_support(adev)) { - r = drm_atomic_helper_resume(adev->ddev, state); + if (drm_atomic_helper_resume(adev->ddev, state)) + dev_info(adev->dev, "drm resume failed:%d\n", r); amdgpu_dm_display_resume(adev); - } else + } else { drm_helper_resume_force_mode(adev->ddev); + } ttm_bo_unlock_delayed_workqueue(&adev->mman.bdev, resched); + if (r) { /* bad news, how to tell it to userspace ? */ - dev_info(adev->dev, "GPU reset failed\n"); - } - else { - dev_info(adev->dev, "GPU reset successed!\n"); + dev_info(adev->dev, "GPU reset(%d) failed\n", atomic_read(&adev->gpu_reset_counter)); + amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_GPU_RESET_FAIL, 0, r); + } else { + dev_info(adev->dev, "GPU reset(%d) successed!\n",atomic_read(&adev->gpu_reset_counter)); } amdgpu_vf_error_trans_all(adev); + adev->in_gpu_reset = 0; + mutex_unlock(&adev->lock_reset); return r; } -void amdgpu_get_pcie_info(struct amdgpu_device *adev) +void amdgpu_device_get_pcie_info(struct amdgpu_device *adev) { u32 mask; int ret; @@ -3209,773 +2839,3 @@ void amdgpu_get_pcie_info(struct amdgpu_device *adev) } } -/* - * Debugfs - */ -int amdgpu_debugfs_add_files(struct amdgpu_device *adev, - const struct drm_info_list *files, - unsigned nfiles) -{ - unsigned i; - - for (i = 0; i < adev->debugfs_count; i++) { - if (adev->debugfs[i].files == files) { - /* Already registered */ - return 0; - } - } - - i = adev->debugfs_count + 1; - if (i > AMDGPU_DEBUGFS_MAX_COMPONENTS) { - DRM_ERROR("Reached maximum number of debugfs components.\n"); - DRM_ERROR("Report so we increase " - "AMDGPU_DEBUGFS_MAX_COMPONENTS.\n"); - return -EINVAL; - } - adev->debugfs[adev->debugfs_count].files = files; - adev->debugfs[adev->debugfs_count].num_files = nfiles; - adev->debugfs_count = i; -#if defined(CONFIG_DEBUG_FS) - drm_debugfs_create_files(files, nfiles, - adev->ddev->primary->debugfs_root, - adev->ddev->primary); -#endif - return 0; -} - -#if defined(CONFIG_DEBUG_FS) - -static ssize_t amdgpu_debugfs_regs_read(struct file *f, char __user *buf, - size_t size, loff_t *pos) -{ - struct amdgpu_device *adev = file_inode(f)->i_private; - ssize_t result = 0; - int r; - bool pm_pg_lock, use_bank; - unsigned instance_bank, sh_bank, se_bank; - - if (size & 0x3 || *pos & 0x3) - return -EINVAL; - - /* are we reading registers for which a PG lock is necessary? */ - pm_pg_lock = (*pos >> 23) & 1; - - if (*pos & (1ULL << 62)) { - se_bank = (*pos & GENMASK_ULL(33, 24)) >> 24; - sh_bank = (*pos & GENMASK_ULL(43, 34)) >> 34; - instance_bank = (*pos & GENMASK_ULL(53, 44)) >> 44; - - if (se_bank == 0x3FF) - se_bank = 0xFFFFFFFF; - if (sh_bank == 0x3FF) - sh_bank = 0xFFFFFFFF; - if (instance_bank == 0x3FF) - instance_bank = 0xFFFFFFFF; - use_bank = 1; - } else { - use_bank = 0; - } - - *pos &= (1UL << 22) - 1; - - if (use_bank) { - if ((sh_bank != 0xFFFFFFFF && sh_bank >= adev->gfx.config.max_sh_per_se) || - (se_bank != 0xFFFFFFFF && se_bank >= adev->gfx.config.max_shader_engines)) - return -EINVAL; - mutex_lock(&adev->grbm_idx_mutex); - amdgpu_gfx_select_se_sh(adev, se_bank, - sh_bank, instance_bank); - } - - if (pm_pg_lock) - mutex_lock(&adev->pm.mutex); - - while (size) { - uint32_t value; - - if (*pos > adev->rmmio_size) - goto end; - - value = RREG32(*pos >> 2); - r = put_user(value, (uint32_t *)buf); - if (r) { - result = r; - goto end; - } - - result += 4; - buf += 4; - *pos += 4; - size -= 4; - } - -end: - if (use_bank) { - amdgpu_gfx_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); - mutex_unlock(&adev->grbm_idx_mutex); - } - - if (pm_pg_lock) - mutex_unlock(&adev->pm.mutex); - - return result; -} - -static ssize_t amdgpu_debugfs_regs_write(struct file *f, const char __user *buf, - size_t size, loff_t *pos) -{ - struct amdgpu_device *adev = file_inode(f)->i_private; - ssize_t result = 0; - int r; - bool pm_pg_lock, use_bank; - unsigned instance_bank, sh_bank, se_bank; - - if (size & 0x3 || *pos & 0x3) - return -EINVAL; - - /* are we reading registers for which a PG lock is necessary? */ - pm_pg_lock = (*pos >> 23) & 1; - - if (*pos & (1ULL << 62)) { - se_bank = (*pos & GENMASK_ULL(33, 24)) >> 24; - sh_bank = (*pos & GENMASK_ULL(43, 34)) >> 34; - instance_bank = (*pos & GENMASK_ULL(53, 44)) >> 44; - - if (se_bank == 0x3FF) - se_bank = 0xFFFFFFFF; - if (sh_bank == 0x3FF) - sh_bank = 0xFFFFFFFF; - if (instance_bank == 0x3FF) - instance_bank = 0xFFFFFFFF; - use_bank = 1; - } else { - use_bank = 0; - } - - *pos &= (1UL << 22) - 1; - - if (use_bank) { - if ((sh_bank != 0xFFFFFFFF && sh_bank >= adev->gfx.config.max_sh_per_se) || - (se_bank != 0xFFFFFFFF && se_bank >= adev->gfx.config.max_shader_engines)) - return -EINVAL; - mutex_lock(&adev->grbm_idx_mutex); - amdgpu_gfx_select_se_sh(adev, se_bank, - sh_bank, instance_bank); - } - - if (pm_pg_lock) - mutex_lock(&adev->pm.mutex); - - while (size) { - uint32_t value; - - if (*pos > adev->rmmio_size) - return result; - - r = get_user(value, (uint32_t *)buf); - if (r) - return r; - - WREG32(*pos >> 2, value); - - result += 4; - buf += 4; - *pos += 4; - size -= 4; - } - - if (use_bank) { - amdgpu_gfx_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); - mutex_unlock(&adev->grbm_idx_mutex); - } - - if (pm_pg_lock) - mutex_unlock(&adev->pm.mutex); - - return result; -} - -static ssize_t amdgpu_debugfs_regs_pcie_read(struct file *f, char __user *buf, - size_t size, loff_t *pos) -{ - struct amdgpu_device *adev = file_inode(f)->i_private; - ssize_t result = 0; - int r; - - if (size & 0x3 || *pos & 0x3) - return -EINVAL; - - while (size) { - uint32_t value; - - value = RREG32_PCIE(*pos >> 2); - r = put_user(value, (uint32_t *)buf); - if (r) - return r; - - result += 4; - buf += 4; - *pos += 4; - size -= 4; - } - - return result; -} - -static ssize_t amdgpu_debugfs_regs_pcie_write(struct file *f, const char __user *buf, - size_t size, loff_t *pos) -{ - struct amdgpu_device *adev = file_inode(f)->i_private; - ssize_t result = 0; - int r; - - if (size & 0x3 || *pos & 0x3) - return -EINVAL; - - while (size) { - uint32_t value; - - r = get_user(value, (uint32_t *)buf); - if (r) - return r; - - WREG32_PCIE(*pos >> 2, value); - - result += 4; - buf += 4; - *pos += 4; - size -= 4; - } - - return result; -} - -static ssize_t amdgpu_debugfs_regs_didt_read(struct file *f, char __user *buf, - size_t size, loff_t *pos) -{ - struct amdgpu_device *adev = file_inode(f)->i_private; - ssize_t result = 0; - int r; - - if (size & 0x3 || *pos & 0x3) - return -EINVAL; - - while (size) { - uint32_t value; - - value = RREG32_DIDT(*pos >> 2); - r = put_user(value, (uint32_t *)buf); - if (r) - return r; - - result += 4; - buf += 4; - *pos += 4; - size -= 4; - } - - return result; -} - -static ssize_t amdgpu_debugfs_regs_didt_write(struct file *f, const char __user *buf, - size_t size, loff_t *pos) -{ - struct amdgpu_device *adev = file_inode(f)->i_private; - ssize_t result = 0; - int r; - - if (size & 0x3 || *pos & 0x3) - return -EINVAL; - - while (size) { - uint32_t value; - - r = get_user(value, (uint32_t *)buf); - if (r) - return r; - - WREG32_DIDT(*pos >> 2, value); - - result += 4; - buf += 4; - *pos += 4; - size -= 4; - } - - return result; -} - -static ssize_t amdgpu_debugfs_regs_smc_read(struct file *f, char __user *buf, - size_t size, loff_t *pos) -{ - struct amdgpu_device *adev = file_inode(f)->i_private; - ssize_t result = 0; - int r; - - if (size & 0x3 || *pos & 0x3) - return -EINVAL; - - while (size) { - uint32_t value; - - value = RREG32_SMC(*pos); - r = put_user(value, (uint32_t *)buf); - if (r) - return r; - - result += 4; - buf += 4; - *pos += 4; - size -= 4; - } - - return result; -} - -static ssize_t amdgpu_debugfs_regs_smc_write(struct file *f, const char __user *buf, - size_t size, loff_t *pos) -{ - struct amdgpu_device *adev = file_inode(f)->i_private; - ssize_t result = 0; - int r; - - if (size & 0x3 || *pos & 0x3) - return -EINVAL; - - while (size) { - uint32_t value; - - r = get_user(value, (uint32_t *)buf); - if (r) - return r; - - WREG32_SMC(*pos, value); - - result += 4; - buf += 4; - *pos += 4; - size -= 4; - } - - return result; -} - -static ssize_t amdgpu_debugfs_gca_config_read(struct file *f, char __user *buf, - size_t size, loff_t *pos) -{ - struct amdgpu_device *adev = file_inode(f)->i_private; - ssize_t result = 0; - int r; - uint32_t *config, no_regs = 0; - - if (size & 0x3 || *pos & 0x3) - return -EINVAL; - - config = kmalloc_array(256, sizeof(*config), GFP_KERNEL); - if (!config) - return -ENOMEM; - - /* version, increment each time something is added */ - config[no_regs++] = 3; - config[no_regs++] = adev->gfx.config.max_shader_engines; - config[no_regs++] = adev->gfx.config.max_tile_pipes; - config[no_regs++] = adev->gfx.config.max_cu_per_sh; - config[no_regs++] = adev->gfx.config.max_sh_per_se; - config[no_regs++] = adev->gfx.config.max_backends_per_se; - config[no_regs++] = adev->gfx.config.max_texture_channel_caches; - config[no_regs++] = adev->gfx.config.max_gprs; - config[no_regs++] = adev->gfx.config.max_gs_threads; - config[no_regs++] = adev->gfx.config.max_hw_contexts; - config[no_regs++] = adev->gfx.config.sc_prim_fifo_size_frontend; - config[no_regs++] = adev->gfx.config.sc_prim_fifo_size_backend; - config[no_regs++] = adev->gfx.config.sc_hiz_tile_fifo_size; - config[no_regs++] = adev->gfx.config.sc_earlyz_tile_fifo_size; - config[no_regs++] = adev->gfx.config.num_tile_pipes; - config[no_regs++] = adev->gfx.config.backend_enable_mask; - config[no_regs++] = adev->gfx.config.mem_max_burst_length_bytes; - config[no_regs++] = adev->gfx.config.mem_row_size_in_kb; - config[no_regs++] = adev->gfx.config.shader_engine_tile_size; - config[no_regs++] = adev->gfx.config.num_gpus; - config[no_regs++] = adev->gfx.config.multi_gpu_tile_size; - config[no_regs++] = adev->gfx.config.mc_arb_ramcfg; - config[no_regs++] = adev->gfx.config.gb_addr_config; - config[no_regs++] = adev->gfx.config.num_rbs; - - /* rev==1 */ - config[no_regs++] = adev->rev_id; - config[no_regs++] = adev->pg_flags; - config[no_regs++] = adev->cg_flags; - - /* rev==2 */ - config[no_regs++] = adev->family; - config[no_regs++] = adev->external_rev_id; - - /* rev==3 */ - config[no_regs++] = adev->pdev->device; - config[no_regs++] = adev->pdev->revision; - config[no_regs++] = adev->pdev->subsystem_device; - config[no_regs++] = adev->pdev->subsystem_vendor; - - while (size && (*pos < no_regs * 4)) { - uint32_t value; - - value = config[*pos >> 2]; - r = put_user(value, (uint32_t *)buf); - if (r) { - kfree(config); - return r; - } - - result += 4; - buf += 4; - *pos += 4; - size -= 4; - } - - kfree(config); - return result; -} - -static ssize_t amdgpu_debugfs_sensor_read(struct file *f, char __user *buf, - size_t size, loff_t *pos) -{ - struct amdgpu_device *adev = file_inode(f)->i_private; - int idx, x, outsize, r, valuesize; - uint32_t values[16]; - - if (size & 3 || *pos & 0x3) - return -EINVAL; - - if (amdgpu_dpm == 0) - return -EINVAL; - - /* convert offset to sensor number */ - idx = *pos >> 2; - - valuesize = sizeof(values); - if (adev->powerplay.pp_funcs && adev->powerplay.pp_funcs->read_sensor) - r = amdgpu_dpm_read_sensor(adev, idx, &values[0], &valuesize); - else - return -EINVAL; - - if (size > valuesize) - return -EINVAL; - - outsize = 0; - x = 0; - if (!r) { - while (size) { - r = put_user(values[x++], (int32_t *)buf); - buf += 4; - size -= 4; - outsize += 4; - } - } - - return !r ? outsize : r; -} - -static ssize_t amdgpu_debugfs_wave_read(struct file *f, char __user *buf, - size_t size, loff_t *pos) -{ - struct amdgpu_device *adev = f->f_inode->i_private; - int r, x; - ssize_t result=0; - uint32_t offset, se, sh, cu, wave, simd, data[32]; - - if (size & 3 || *pos & 3) - return -EINVAL; - - /* decode offset */ - offset = (*pos & GENMASK_ULL(6, 0)); - se = (*pos & GENMASK_ULL(14, 7)) >> 7; - sh = (*pos & GENMASK_ULL(22, 15)) >> 15; - cu = (*pos & GENMASK_ULL(30, 23)) >> 23; - wave = (*pos & GENMASK_ULL(36, 31)) >> 31; - simd = (*pos & GENMASK_ULL(44, 37)) >> 37; - - /* switch to the specific se/sh/cu */ - mutex_lock(&adev->grbm_idx_mutex); - amdgpu_gfx_select_se_sh(adev, se, sh, cu); - - x = 0; - if (adev->gfx.funcs->read_wave_data) - adev->gfx.funcs->read_wave_data(adev, simd, wave, data, &x); - - amdgpu_gfx_select_se_sh(adev, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF); - mutex_unlock(&adev->grbm_idx_mutex); - - if (!x) - return -EINVAL; - - while (size && (offset < x * 4)) { - uint32_t value; - - value = data[offset >> 2]; - r = put_user(value, (uint32_t *)buf); - if (r) - return r; - - result += 4; - buf += 4; - offset += 4; - size -= 4; - } - - return result; -} - -static ssize_t amdgpu_debugfs_gpr_read(struct file *f, char __user *buf, - size_t size, loff_t *pos) -{ - struct amdgpu_device *adev = f->f_inode->i_private; - int r; - ssize_t result = 0; - uint32_t offset, se, sh, cu, wave, simd, thread, bank, *data; - - if (size & 3 || *pos & 3) - return -EINVAL; - - /* decode offset */ - offset = *pos & GENMASK_ULL(11, 0); - se = (*pos & GENMASK_ULL(19, 12)) >> 12; - sh = (*pos & GENMASK_ULL(27, 20)) >> 20; - cu = (*pos & GENMASK_ULL(35, 28)) >> 28; - wave = (*pos & GENMASK_ULL(43, 36)) >> 36; - simd = (*pos & GENMASK_ULL(51, 44)) >> 44; - thread = (*pos & GENMASK_ULL(59, 52)) >> 52; - bank = (*pos & GENMASK_ULL(61, 60)) >> 60; - - data = kmalloc_array(1024, sizeof(*data), GFP_KERNEL); - if (!data) - return -ENOMEM; - - /* switch to the specific se/sh/cu */ - mutex_lock(&adev->grbm_idx_mutex); - amdgpu_gfx_select_se_sh(adev, se, sh, cu); - - if (bank == 0) { - if (adev->gfx.funcs->read_wave_vgprs) - adev->gfx.funcs->read_wave_vgprs(adev, simd, wave, thread, offset, size>>2, data); - } else { - if (adev->gfx.funcs->read_wave_sgprs) - adev->gfx.funcs->read_wave_sgprs(adev, simd, wave, offset, size>>2, data); - } - - amdgpu_gfx_select_se_sh(adev, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF); - mutex_unlock(&adev->grbm_idx_mutex); - - while (size) { - uint32_t value; - - value = data[offset++]; - r = put_user(value, (uint32_t *)buf); - if (r) { - result = r; - goto err; - } - - result += 4; - buf += 4; - size -= 4; - } - -err: - kfree(data); - return result; -} - -static const struct file_operations amdgpu_debugfs_regs_fops = { - .owner = THIS_MODULE, - .read = amdgpu_debugfs_regs_read, - .write = amdgpu_debugfs_regs_write, - .llseek = default_llseek -}; -static const struct file_operations amdgpu_debugfs_regs_didt_fops = { - .owner = THIS_MODULE, - .read = amdgpu_debugfs_regs_didt_read, - .write = amdgpu_debugfs_regs_didt_write, - .llseek = default_llseek -}; -static const struct file_operations amdgpu_debugfs_regs_pcie_fops = { - .owner = THIS_MODULE, - .read = amdgpu_debugfs_regs_pcie_read, - .write = amdgpu_debugfs_regs_pcie_write, - .llseek = default_llseek -}; -static const struct file_operations amdgpu_debugfs_regs_smc_fops = { - .owner = THIS_MODULE, - .read = amdgpu_debugfs_regs_smc_read, - .write = amdgpu_debugfs_regs_smc_write, - .llseek = default_llseek -}; - -static const struct file_operations amdgpu_debugfs_gca_config_fops = { - .owner = THIS_MODULE, - .read = amdgpu_debugfs_gca_config_read, - .llseek = default_llseek -}; - -static const struct file_operations amdgpu_debugfs_sensors_fops = { - .owner = THIS_MODULE, - .read = amdgpu_debugfs_sensor_read, - .llseek = default_llseek -}; - -static const struct file_operations amdgpu_debugfs_wave_fops = { - .owner = THIS_MODULE, - .read = amdgpu_debugfs_wave_read, - .llseek = default_llseek -}; -static const struct file_operations amdgpu_debugfs_gpr_fops = { - .owner = THIS_MODULE, - .read = amdgpu_debugfs_gpr_read, - .llseek = default_llseek -}; - -static const struct file_operations *debugfs_regs[] = { - &amdgpu_debugfs_regs_fops, - &amdgpu_debugfs_regs_didt_fops, - &amdgpu_debugfs_regs_pcie_fops, - &amdgpu_debugfs_regs_smc_fops, - &amdgpu_debugfs_gca_config_fops, - &amdgpu_debugfs_sensors_fops, - &amdgpu_debugfs_wave_fops, - &amdgpu_debugfs_gpr_fops, -}; - -static const char *debugfs_regs_names[] = { - "amdgpu_regs", - "amdgpu_regs_didt", - "amdgpu_regs_pcie", - "amdgpu_regs_smc", - "amdgpu_gca_config", - "amdgpu_sensors", - "amdgpu_wave", - "amdgpu_gpr", -}; - -static int amdgpu_debugfs_regs_init(struct amdgpu_device *adev) -{ - struct drm_minor *minor = adev->ddev->primary; - struct dentry *ent, *root = minor->debugfs_root; - unsigned i, j; - - for (i = 0; i < ARRAY_SIZE(debugfs_regs); i++) { - ent = debugfs_create_file(debugfs_regs_names[i], - S_IFREG | S_IRUGO, root, - adev, debugfs_regs[i]); - if (IS_ERR(ent)) { - for (j = 0; j < i; j++) { - debugfs_remove(adev->debugfs_regs[i]); - adev->debugfs_regs[i] = NULL; - } - return PTR_ERR(ent); - } - - if (!i) - i_size_write(ent->d_inode, adev->rmmio_size); - adev->debugfs_regs[i] = ent; - } - - return 0; -} - -static void amdgpu_debugfs_regs_cleanup(struct amdgpu_device *adev) -{ - unsigned i; - - for (i = 0; i < ARRAY_SIZE(debugfs_regs); i++) { - if (adev->debugfs_regs[i]) { - debugfs_remove(adev->debugfs_regs[i]); - adev->debugfs_regs[i] = NULL; - } - } -} - -static int amdgpu_debugfs_test_ib(struct seq_file *m, void *data) -{ - struct drm_info_node *node = (struct drm_info_node *) m->private; - struct drm_device *dev = node->minor->dev; - struct amdgpu_device *adev = dev->dev_private; - int r = 0, i; - - /* hold on the scheduler */ - for (i = 0; i < AMDGPU_MAX_RINGS; i++) { - struct amdgpu_ring *ring = adev->rings[i]; - - if (!ring || !ring->sched.thread) - continue; - kthread_park(ring->sched.thread); - } - - seq_printf(m, "run ib test:\n"); - r = amdgpu_ib_ring_tests(adev); - if (r) - seq_printf(m, "ib ring tests failed (%d).\n", r); - else - seq_printf(m, "ib ring tests passed.\n"); - - /* go on the scheduler */ - for (i = 0; i < AMDGPU_MAX_RINGS; i++) { - struct amdgpu_ring *ring = adev->rings[i]; - - if (!ring || !ring->sched.thread) - continue; - kthread_unpark(ring->sched.thread); - } - - return 0; -} - -static const struct drm_info_list amdgpu_debugfs_test_ib_ring_list[] = { - {"amdgpu_test_ib", &amdgpu_debugfs_test_ib} -}; - -static int amdgpu_debugfs_test_ib_ring_init(struct amdgpu_device *adev) -{ - return amdgpu_debugfs_add_files(adev, - amdgpu_debugfs_test_ib_ring_list, 1); -} - -int amdgpu_debugfs_init(struct drm_minor *minor) -{ - return 0; -} - -static int amdgpu_debugfs_get_vbios_dump(struct seq_file *m, void *data) -{ - struct drm_info_node *node = (struct drm_info_node *) m->private; - struct drm_device *dev = node->minor->dev; - struct amdgpu_device *adev = dev->dev_private; - - seq_write(m, adev->bios, adev->bios_size); - return 0; -} - -static const struct drm_info_list amdgpu_vbios_dump_list[] = { - {"amdgpu_vbios", - amdgpu_debugfs_get_vbios_dump, - 0, NULL}, -}; - -static int amdgpu_debugfs_vbios_dump_init(struct amdgpu_device *adev) -{ - return amdgpu_debugfs_add_files(adev, - amdgpu_vbios_dump_list, 1); -} -#else -static int amdgpu_debugfs_test_ib_ring_init(struct amdgpu_device *adev) -{ - return 0; -} -static int amdgpu_debugfs_regs_init(struct amdgpu_device *adev) -{ - return 0; -} -static int amdgpu_debugfs_vbios_dump_init(struct amdgpu_device *adev) -{ - return 0; -} -static void amdgpu_debugfs_regs_cleanup(struct amdgpu_device *adev) { } -#endif diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c index 138beb550a58..38d47559f098 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c @@ -34,6 +34,7 @@ #include <linux/pm_runtime.h> #include <drm/drm_crtc_helper.h> #include <drm/drm_edid.h> +#include <drm/drm_fb_helper.h> static void amdgpu_flip_callback(struct dma_fence *f, struct dma_fence_cb *cb) { @@ -556,15 +557,9 @@ amdgpu_user_framebuffer_create(struct drm_device *dev, return &amdgpu_fb->base; } -void amdgpu_output_poll_changed(struct drm_device *dev) -{ - struct amdgpu_device *adev = dev->dev_private; - amdgpu_fb_output_poll_changed(adev); -} - const struct drm_mode_config_funcs amdgpu_mode_funcs = { .fb_create = amdgpu_user_framebuffer_create, - .output_poll_changed = amdgpu_output_poll_changed + .output_poll_changed = drm_fb_helper_output_poll_changed, }; static const struct drm_prop_enum_list amdgpu_underscan_enum_list[] = diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.h index 3cc0ef0c055e..0bcb6c6e0ca9 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.h @@ -25,9 +25,7 @@ struct drm_framebuffer * amdgpu_user_framebuffer_create(struct drm_device *dev, - struct drm_file *file_priv, - const struct drm_mode_fb_cmd2 *mode_cmd); - -void amdgpu_output_poll_changed(struct drm_device *dev); + struct drm_file *file_priv, + const struct drm_mode_fb_cmd2 *mode_cmd); #endif diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_dpm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_dpm.h index 56caaeee6fea..a8437a3296a6 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_dpm.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_dpm.h @@ -360,6 +360,12 @@ enum amdgpu_pcie_gen { ((adev)->powerplay.pp_funcs->set_clockgating_by_smu(\ (adev)->powerplay.pp_handle, msg_id)) +#define amdgpu_dpm_notify_smu_memory_info(adev, virtual_addr_low, \ + virtual_addr_hi, mc_addr_low, mc_addr_hi, size) \ + ((adev)->powerplay.pp_funcs->notify_smu_memory_info)( \ + (adev)->powerplay.pp_handle, virtual_addr_low, \ + virtual_addr_hi, mc_addr_low, mc_addr_hi, size) + struct amdgpu_dpm { struct amdgpu_ps *ps; /* number of valid power states */ diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c index c2f414ffb2cc..50afcf65181a 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c @@ -90,7 +90,7 @@ int amdgpu_disp_priority = 0; int amdgpu_hw_i2c = 0; int amdgpu_pcie_gen2 = -1; int amdgpu_msi = -1; -int amdgpu_lockup_timeout = 0; +int amdgpu_lockup_timeout = 10000; int amdgpu_dpm = -1; int amdgpu_fw_load_type = -1; int amdgpu_aspm = -1; @@ -128,6 +128,7 @@ int amdgpu_param_buf_per_se = 0; int amdgpu_job_hang_limit = 0; int amdgpu_lbpw = -1; int amdgpu_compute_multipipe = -1; +int amdgpu_gpu_recovery = -1; /* auto */ MODULE_PARM_DESC(vramlimit, "Restrict VRAM for testing, in megabytes"); module_param_named(vramlimit, amdgpu_vram_limit, int, 0600); @@ -165,7 +166,7 @@ module_param_named(pcie_gen2, amdgpu_pcie_gen2, int, 0444); MODULE_PARM_DESC(msi, "MSI support (1 = enable, 0 = disable, -1 = auto)"); module_param_named(msi, amdgpu_msi, int, 0444); -MODULE_PARM_DESC(lockup_timeout, "GPU lockup timeout in ms (default 0 = disable)"); +MODULE_PARM_DESC(lockup_timeout, "GPU lockup timeout in ms > 0 (default 10000)"); module_param_named(lockup_timeout, amdgpu_lockup_timeout, int, 0444); MODULE_PARM_DESC(dpm, "DPM support (1 = enable, 0 = disable, -1 = auto)"); @@ -216,7 +217,7 @@ module_param_named(exp_hw_support, amdgpu_exp_hw_support, int, 0444); MODULE_PARM_DESC(dc, "Display Core driver (1 = enable, 0 = disable, -1 = auto (default))"); module_param_named(dc, amdgpu_dc, int, 0444); -MODULE_PARM_DESC(dc, "Display Core Log Level (0 = minimal (default), 1 = chatty"); +MODULE_PARM_DESC(dc_log, "Display Core Log Level (0 = minimal (default), 1 = chatty"); module_param_named(dc_log, amdgpu_dc_log, int, 0444); MODULE_PARM_DESC(sched_jobs, "the max number of jobs supported in the sw queue (default 32)"); @@ -280,6 +281,9 @@ module_param_named(lbpw, amdgpu_lbpw, int, 0444); MODULE_PARM_DESC(compute_multipipe, "Force compute queues to be spread across pipes (1 = enable, 0 = disable, -1 = auto)"); module_param_named(compute_multipipe, amdgpu_compute_multipipe, int, 0444); +MODULE_PARM_DESC(gpu_recovery, "Enable GPU recovery mechanism, (1 = enable, 0 = disable, -1 = auto"); +module_param_named(gpu_recovery, amdgpu_gpu_recovery, int, 0444); + #ifdef CONFIG_DRM_AMDGPU_SI #if defined(CONFIG_DRM_RADEON) || defined(CONFIG_DRM_RADEON_MODULE) @@ -306,7 +310,6 @@ MODULE_PARM_DESC(cik_support, "CIK support (1 = enabled (default), 0 = disabled) module_param_named(cik_support, amdgpu_cik_support, int, 0444); #endif - static const struct pci_device_id pciidlist[] = { #ifdef CONFIG_DRM_AMDGPU_SI {0x1002, 0x6780, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_TAHITI}, @@ -566,12 +569,13 @@ static int amdgpu_kick_out_firmware_fb(struct pci_dev *pdev) return 0; } + static int amdgpu_pci_probe(struct pci_dev *pdev, const struct pci_device_id *ent) { struct drm_device *dev; unsigned long flags = ent->driver_data; - int ret; + int ret, retry = 0; if ((flags & AMD_EXP_HW_SUPPORT) && !amdgpu_exp_hw_support) { DRM_INFO("This hardware requires experimental hardware support.\n" @@ -604,8 +608,14 @@ static int amdgpu_pci_probe(struct pci_dev *pdev, pci_set_drvdata(pdev, dev); +retry_init: ret = drm_dev_register(dev, ent->driver_data); - if (ret) + if (ret == -EAGAIN && ++retry <= 3) { + DRM_INFO("retry init %d\n", retry); + /* Don't request EX mode too frequently which is attacking */ + msleep(5000); + goto retry_init; + } else if (ret) goto err_pci; return 0; @@ -639,7 +649,7 @@ amdgpu_pci_shutdown(struct pci_dev *pdev) * unfortunately we can't detect certain * hypervisors so just do this all the time. */ - amdgpu_suspend(adev); + amdgpu_device_ip_suspend(adev); } static int amdgpu_pmops_suspend(struct device *dev) @@ -844,9 +854,6 @@ static struct drm_driver kms_driver = { .disable_vblank = amdgpu_disable_vblank_kms, .get_vblank_timestamp = drm_calc_vbltimestamp_from_scanoutpos, .get_scanout_position = amdgpu_get_crtc_scanout_position, -#if defined(CONFIG_DEBUG_FS) - .debugfs_init = amdgpu_debugfs_init, -#endif .irq_preinstall = amdgpu_irq_preinstall, .irq_postinstall = amdgpu_irq_postinstall, .irq_uninstall = amdgpu_irq_uninstall, @@ -906,10 +913,6 @@ static int __init amdgpu_init(void) if (r) goto error_fence; - r = amd_sched_fence_slab_init(); - if (r) - goto error_sched; - if (vgacon_text_force()) { DRM_ERROR("VGACON disables amdgpu kernel modesetting.\n"); return -EINVAL; @@ -922,9 +925,6 @@ static int __init amdgpu_init(void) /* let modprobe override vga console setting */ return pci_register_driver(pdriver); -error_sched: - amdgpu_fence_slab_fini(); - error_fence: amdgpu_sync_fini(); @@ -938,7 +938,6 @@ static void __exit amdgpu_exit(void) pci_unregister_driver(pdriver); amdgpu_unregister_atpx_handler(); amdgpu_sync_fini(); - amd_sched_fence_slab_fini(); amdgpu_fence_slab_fini(); } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c index 90fa8e8bc6fb..ff3e9beb7d19 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c @@ -283,12 +283,6 @@ out: return ret; } -void amdgpu_fb_output_poll_changed(struct amdgpu_device *adev) -{ - if (adev->mode_info.rfbdev) - drm_fb_helper_hotplug_event(&adev->mode_info.rfbdev->helper); -} - static int amdgpu_fbdev_destroy(struct drm_device *dev, struct amdgpu_fbdev *rfbdev) { struct amdgpu_framebuffer *rfb = &rfbdev->rfb; @@ -393,24 +387,3 @@ bool amdgpu_fbdev_robj_is_fb(struct amdgpu_device *adev, struct amdgpu_bo *robj) return true; return false; } - -void amdgpu_fbdev_restore_mode(struct amdgpu_device *adev) -{ - struct amdgpu_fbdev *afbdev; - struct drm_fb_helper *fb_helper; - int ret; - - if (!adev) - return; - - afbdev = adev->mode_info.rfbdev; - - if (!afbdev) - return; - - fb_helper = &afbdev->helper; - - ret = drm_fb_helper_restore_fbdev_mode_unlocked(fb_helper); - if (ret) - DRM_DEBUG("failed to restore crtc mode\n"); -} diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c index 2fa95aef74d5..008e1984b7e3 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c @@ -187,7 +187,7 @@ int amdgpu_fence_emit_polling(struct amdgpu_ring *ring, uint32_t *s) seq = ++ring->fence_drv.sync_seq; amdgpu_ring_emit_fence(ring, ring->fence_drv.gpu_addr, - seq, AMDGPU_FENCE_FLAG_INT); + seq, 0); *s = seq; @@ -391,9 +391,9 @@ int amdgpu_fence_driver_start_ring(struct amdgpu_ring *ring, ring->fence_drv.irq_type = irq_type; ring->fence_drv.initialized = true; - dev_info(adev->dev, "fence driver on ring %d use gpu addr 0x%016llx, " - "cpu addr 0x%p\n", ring->idx, - ring->fence_drv.gpu_addr, ring->fence_drv.cpu_addr); + dev_dbg(adev->dev, "fence driver on ring %d use gpu addr 0x%016llx, " + "cpu addr 0x%p\n", ring->idx, + ring->fence_drv.gpu_addr, ring->fence_drv.cpu_addr); return 0; } @@ -410,7 +410,6 @@ int amdgpu_fence_driver_start_ring(struct amdgpu_ring *ring, int amdgpu_fence_driver_init_ring(struct amdgpu_ring *ring, unsigned num_hw_submission) { - long timeout; int r; /* Check that num_hw_submission is a power of two */ @@ -434,20 +433,9 @@ int amdgpu_fence_driver_init_ring(struct amdgpu_ring *ring, /* No need to setup the GPU scheduler for KIQ ring */ if (ring->funcs->type != AMDGPU_RING_TYPE_KIQ) { - timeout = msecs_to_jiffies(amdgpu_lockup_timeout); - if (timeout == 0) { - /* - * FIXME: - * Delayed workqueue cannot use it directly, - * so the scheduler will not use delayed workqueue if - * MAX_SCHEDULE_TIMEOUT is set. - * Currently keep it simple and silly. - */ - timeout = MAX_SCHEDULE_TIMEOUT; - } - r = amd_sched_init(&ring->sched, &amdgpu_sched_ops, - num_hw_submission, - timeout, ring->name); + r = drm_sched_init(&ring->sched, &amdgpu_sched_ops, + num_hw_submission, amdgpu_job_hang_limit, + msecs_to_jiffies(amdgpu_lockup_timeout), ring->name); if (r) { DRM_ERROR("Failed to create scheduler on ring %s.\n", ring->name); @@ -499,11 +487,11 @@ void amdgpu_fence_driver_fini(struct amdgpu_device *adev) r = amdgpu_fence_wait_empty(ring); if (r) { /* no need to trigger GPU reset as we are unloading */ - amdgpu_fence_driver_force_completion(adev); + amdgpu_fence_driver_force_completion(ring); } amdgpu_irq_put(adev, ring->fence_drv.irq_src, ring->fence_drv.irq_type); - amd_sched_fini(&ring->sched); + drm_sched_fini(&ring->sched); del_timer_sync(&ring->fence_drv.fallback_timer); for (j = 0; j <= ring->fence_drv.num_fences_mask; ++j) dma_fence_put(ring->fence_drv.fences[j]); @@ -534,7 +522,7 @@ void amdgpu_fence_driver_suspend(struct amdgpu_device *adev) r = amdgpu_fence_wait_empty(ring); if (r) { /* delay GPU reset to resume */ - amdgpu_fence_driver_force_completion(adev); + amdgpu_fence_driver_force_completion(ring); } /* disable the interrupt */ @@ -571,30 +559,15 @@ void amdgpu_fence_driver_resume(struct amdgpu_device *adev) } /** - * amdgpu_fence_driver_force_completion - force all fence waiter to complete + * amdgpu_fence_driver_force_completion - force signal latest fence of ring * - * @adev: amdgpu device pointer + * @ring: fence of the ring to signal * - * In case of GPU reset failure make sure no process keep waiting on fence - * that will never complete. */ -void amdgpu_fence_driver_force_completion(struct amdgpu_device *adev) +void amdgpu_fence_driver_force_completion(struct amdgpu_ring *ring) { - int i; - - for (i = 0; i < AMDGPU_MAX_RINGS; i++) { - struct amdgpu_ring *ring = adev->rings[i]; - if (!ring || !ring->fence_drv.initialized) - continue; - - amdgpu_fence_write(ring, ring->fence_drv.sync_seq); - } -} - -void amdgpu_fence_driver_force_completion_ring(struct amdgpu_ring *ring) -{ - if (ring) - amdgpu_fence_write(ring, ring->fence_drv.sync_seq); + amdgpu_fence_write(ring, ring->fence_drv.sync_seq); + amdgpu_fence_process(ring); } /* @@ -709,25 +682,25 @@ static int amdgpu_debugfs_fence_info(struct seq_file *m, void *data) } /** - * amdgpu_debugfs_gpu_reset - manually trigger a gpu reset + * amdgpu_debugfs_gpu_recover - manually trigger a gpu reset & recover * * Manually trigger a gpu reset at the next fence wait. */ -static int amdgpu_debugfs_gpu_reset(struct seq_file *m, void *data) +static int amdgpu_debugfs_gpu_recover(struct seq_file *m, void *data) { struct drm_info_node *node = (struct drm_info_node *) m->private; struct drm_device *dev = node->minor->dev; struct amdgpu_device *adev = dev->dev_private; - seq_printf(m, "gpu reset\n"); - amdgpu_gpu_reset(adev); + seq_printf(m, "gpu recover\n"); + amdgpu_device_gpu_recover(adev, NULL, true); return 0; } static const struct drm_info_list amdgpu_debugfs_fence_list[] = { {"amdgpu_fence_info", &amdgpu_debugfs_fence_info, 0, NULL}, - {"amdgpu_gpu_reset", &amdgpu_debugfs_gpu_reset, 0, NULL} + {"amdgpu_gpu_recover", &amdgpu_debugfs_gpu_recover, 0, NULL} }; static const struct drm_info_list amdgpu_debugfs_fence_list_sriov[] = { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c index fe818501c520..0a4f34afaaaa 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c @@ -57,60 +57,48 @@ */ /** - * amdgpu_gart_table_ram_alloc - allocate system ram for gart page table + * amdgpu_dummy_page_init - init dummy page used by the driver * * @adev: amdgpu_device pointer * - * Allocate system memory for GART page table - * (r1xx-r3xx, non-pcie r4xx, rs400). These asics require the - * gart table to be in system memory. - * Returns 0 for success, -ENOMEM for failure. + * Allocate the dummy page used by the driver (all asics). + * This dummy page is used by the driver as a filler for gart entries + * when pages are taken out of the GART + * Returns 0 on sucess, -ENOMEM on failure. */ -int amdgpu_gart_table_ram_alloc(struct amdgpu_device *adev) +static int amdgpu_gart_dummy_page_init(struct amdgpu_device *adev) { - void *ptr; - - ptr = pci_alloc_consistent(adev->pdev, adev->gart.table_size, - &adev->gart.table_addr); - if (ptr == NULL) { + if (adev->dummy_page.page) + return 0; + adev->dummy_page.page = alloc_page(GFP_DMA32 | GFP_KERNEL | __GFP_ZERO); + if (adev->dummy_page.page == NULL) + return -ENOMEM; + adev->dummy_page.addr = pci_map_page(adev->pdev, adev->dummy_page.page, + 0, PAGE_SIZE, PCI_DMA_BIDIRECTIONAL); + if (pci_dma_mapping_error(adev->pdev, adev->dummy_page.addr)) { + dev_err(&adev->pdev->dev, "Failed to DMA MAP the dummy page\n"); + __free_page(adev->dummy_page.page); + adev->dummy_page.page = NULL; return -ENOMEM; } -#ifdef CONFIG_X86 - if (0) { - set_memory_uc((unsigned long)ptr, - adev->gart.table_size >> PAGE_SHIFT); - } -#endif - adev->gart.ptr = ptr; - memset((void *)adev->gart.ptr, 0, adev->gart.table_size); return 0; } /** - * amdgpu_gart_table_ram_free - free system ram for gart page table + * amdgpu_dummy_page_fini - free dummy page used by the driver * * @adev: amdgpu_device pointer * - * Free system memory for GART page table - * (r1xx-r3xx, non-pcie r4xx, rs400). These asics require the - * gart table to be in system memory. + * Frees the dummy page used by the driver (all asics). */ -void amdgpu_gart_table_ram_free(struct amdgpu_device *adev) +static void amdgpu_gart_dummy_page_fini(struct amdgpu_device *adev) { - if (adev->gart.ptr == NULL) { + if (adev->dummy_page.page == NULL) return; - } -#ifdef CONFIG_X86 - if (0) { - set_memory_wb((unsigned long)adev->gart.ptr, - adev->gart.table_size >> PAGE_SHIFT); - } -#endif - pci_free_consistent(adev->pdev, adev->gart.table_size, - (void *)adev->gart.ptr, - adev->gart.table_addr); - adev->gart.ptr = NULL; - adev->gart.table_addr = 0; + pci_unmap_page(adev->pdev, adev->dummy_page.addr, + PAGE_SIZE, PCI_DMA_BIDIRECTIONAL); + __free_page(adev->dummy_page.page); + adev->dummy_page.page = NULL; } /** @@ -365,7 +353,7 @@ int amdgpu_gart_init(struct amdgpu_device *adev) DRM_ERROR("Page size is smaller than GPU page size!\n"); return -EINVAL; } - r = amdgpu_dummy_page_init(adev); + r = amdgpu_gart_dummy_page_init(adev); if (r) return r; /* Compute table size */ @@ -377,10 +365,8 @@ int amdgpu_gart_init(struct amdgpu_device *adev) #ifdef CONFIG_DRM_AMDGPU_GART_DEBUGFS /* Allocate pages table */ adev->gart.pages = vzalloc(sizeof(void *) * adev->gart.num_cpu_pages); - if (adev->gart.pages == NULL) { - amdgpu_gart_fini(adev); + if (adev->gart.pages == NULL) return -ENOMEM; - } #endif return 0; @@ -395,14 +381,9 @@ int amdgpu_gart_init(struct amdgpu_device *adev) */ void amdgpu_gart_fini(struct amdgpu_device *adev) { - if (adev->gart.ready) { - /* unbind pages */ - amdgpu_gart_unbind(adev, 0, adev->gart.num_cpu_pages); - } - adev->gart.ready = false; #ifdef CONFIG_DRM_AMDGPU_GART_DEBUGFS vfree(adev->gart.pages); adev->gart.pages = NULL; #endif - amdgpu_dummy_page_fini(adev); + amdgpu_gart_dummy_page_fini(adev); } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.h index afbe803b1a13..d4a43302c2be 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.h @@ -39,7 +39,7 @@ struct amdgpu_gart_funcs; #define AMDGPU_GPU_PAGE_ALIGN(a) (((a) + AMDGPU_GPU_PAGE_MASK) & ~AMDGPU_GPU_PAGE_MASK) struct amdgpu_gart { - dma_addr_t table_addr; + u64 table_addr; struct amdgpu_bo *robj; void *ptr; unsigned num_gpu_pages; @@ -56,8 +56,6 @@ struct amdgpu_gart { const struct amdgpu_gart_funcs *gart_funcs; }; -int amdgpu_gart_table_ram_alloc(struct amdgpu_device *adev); -void amdgpu_gart_table_ram_free(struct amdgpu_device *adev); int amdgpu_gart_table_vram_alloc(struct amdgpu_device *adev); void amdgpu_gart_table_vram_free(struct amdgpu_device *adev); int amdgpu_gart_table_vram_pin(struct amdgpu_device *adev); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c index e87eedcc0da9..e48b4ec88c8c 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c @@ -72,7 +72,7 @@ retry: initial_domain |= AMDGPU_GEM_DOMAIN_GTT; goto retry; } - DRM_ERROR("Failed to allocate GEM object (%ld, %d, %u, %d)\n", + DRM_DEBUG("Failed to allocate GEM object (%ld, %d, %u, %d)\n", size, initial_domain, alignment, r); } return r; @@ -282,6 +282,7 @@ int amdgpu_gem_create_ioctl(struct drm_device *dev, void *data, int amdgpu_gem_userptr_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) { + struct ttm_operation_ctx ctx = { true, false }; struct amdgpu_device *adev = dev->dev_private; struct drm_amdgpu_gem_userptr *args = data; struct drm_gem_object *gobj; @@ -335,7 +336,7 @@ int amdgpu_gem_userptr_ioctl(struct drm_device *dev, void *data, goto free_pages; amdgpu_ttm_placement_from_domain(bo, AMDGPU_GEM_DOMAIN_GTT); - r = ttm_bo_validate(&bo->tbo, &bo->placement, true, false); + r = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx); amdgpu_bo_unreserve(bo); if (r) goto free_pages; @@ -517,10 +518,6 @@ static void amdgpu_gem_va_update_vm(struct amdgpu_device *adev, if (!amdgpu_vm_ready(vm)) return; - r = amdgpu_vm_update_directories(adev, vm); - if (r) - goto error; - r = amdgpu_vm_clear_freed(adev, vm, NULL); if (r) goto error; @@ -529,6 +526,10 @@ static void amdgpu_gem_va_update_vm(struct amdgpu_device *adev, operation == AMDGPU_VA_OP_REPLACE) r = amdgpu_vm_bo_update(adev, bo_va, false); + r = amdgpu_vm_update_directories(adev, vm); + if (r) + goto error; + error: if (r && r != -ERESTARTSYS) DRM_ERROR("Couldn't update BO_VA (%d)\n", r); @@ -557,14 +558,25 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev, void *data, int r = 0; if (args->va_address < AMDGPU_VA_RESERVED_SIZE) { - dev_err(&dev->pdev->dev, + dev_dbg(&dev->pdev->dev, "va_address 0x%LX is in reserved area 0x%LX\n", args->va_address, AMDGPU_VA_RESERVED_SIZE); return -EINVAL; } + if (args->va_address >= AMDGPU_VA_HOLE_START && + args->va_address < AMDGPU_VA_HOLE_END) { + dev_dbg(&dev->pdev->dev, + "va_address 0x%LX is in VA hole 0x%LX-0x%LX\n", + args->va_address, AMDGPU_VA_HOLE_START, + AMDGPU_VA_HOLE_END); + return -EINVAL; + } + + args->va_address &= AMDGPU_VA_HOLE_MASK; + if ((args->flags & ~valid_flags) && (args->flags & ~prt_flags)) { - dev_err(&dev->pdev->dev, "invalid flags combination 0x%08X\n", + dev_dbg(&dev->pdev->dev, "invalid flags combination 0x%08X\n", args->flags); return -EINVAL; } @@ -576,7 +588,7 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev, void *data, case AMDGPU_VA_OP_REPLACE: break; default: - dev_err(&dev->pdev->dev, "unsupported operation %d\n", + dev_dbg(&dev->pdev->dev, "unsupported operation %d\n", args->operation); return -EINVAL; } @@ -839,7 +851,7 @@ static const struct drm_info_list amdgpu_debugfs_gem_list[] = { }; #endif -int amdgpu_gem_debugfs_init(struct amdgpu_device *adev) +int amdgpu_debugfs_gem_init(struct amdgpu_device *adev) { #if defined(CONFIG_DEBUG_FS) return amdgpu_debugfs_add_files(adev, amdgpu_debugfs_gem_list, 1); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c index ef043361009f..bb40d2529a30 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c @@ -203,7 +203,7 @@ int amdgpu_gfx_kiq_init_ring(struct amdgpu_device *adev, spin_lock_init(&kiq->ring_lock); - r = amdgpu_wb_get(adev, &adev->virt.reg_val_offs); + r = amdgpu_device_wb_get(adev, &adev->virt.reg_val_offs); if (r) return r; @@ -229,7 +229,7 @@ int amdgpu_gfx_kiq_init_ring(struct amdgpu_device *adev, void amdgpu_gfx_kiq_free_ring(struct amdgpu_ring *ring, struct amdgpu_irq_src *irq) { - amdgpu_wb_free(ring->adev, ring->adev->virt.reg_val_offs); + amdgpu_device_wb_free(ring->adev, ring->adev->virt.reg_val_offs); amdgpu_ring_fini(ring); } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gtt_mgr.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gtt_mgr.c index 00e0ce10862f..e14ab34d8262 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gtt_mgr.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gtt_mgr.c @@ -31,6 +31,11 @@ struct amdgpu_gtt_mgr { atomic64_t available; }; +struct amdgpu_gtt_node { + struct drm_mm_node node; + struct ttm_buffer_object *tbo; +}; + /** * amdgpu_gtt_mgr_init - init GTT manager and DRM MM * @@ -79,17 +84,17 @@ static int amdgpu_gtt_mgr_fini(struct ttm_mem_type_manager *man) } /** - * amdgpu_gtt_mgr_is_allocated - Check if mem has address space + * amdgpu_gtt_mgr_has_gart_addr - Check if mem has address space * * @mem: the mem object to check * * Check if a mem object has already address space allocated. */ -bool amdgpu_gtt_mgr_is_allocated(struct ttm_mem_reg *mem) +bool amdgpu_gtt_mgr_has_gart_addr(struct ttm_mem_reg *mem) { - struct drm_mm_node *node = mem->mm_node; + struct amdgpu_gtt_node *node = mem->mm_node; - return (node->start != AMDGPU_BO_INVALID_OFFSET); + return (node->node.start != AMDGPU_BO_INVALID_OFFSET); } /** @@ -109,12 +114,12 @@ static int amdgpu_gtt_mgr_alloc(struct ttm_mem_type_manager *man, { struct amdgpu_device *adev = amdgpu_ttm_adev(man->bdev); struct amdgpu_gtt_mgr *mgr = man->priv; - struct drm_mm_node *node = mem->mm_node; + struct amdgpu_gtt_node *node = mem->mm_node; enum drm_mm_insert_mode mode; unsigned long fpfn, lpfn; int r; - if (amdgpu_gtt_mgr_is_allocated(mem)) + if (amdgpu_gtt_mgr_has_gart_addr(mem)) return 0; if (place) @@ -132,13 +137,13 @@ static int amdgpu_gtt_mgr_alloc(struct ttm_mem_type_manager *man, mode = DRM_MM_INSERT_HIGH; spin_lock(&mgr->lock); - r = drm_mm_insert_node_in_range(&mgr->mm, node, - mem->num_pages, mem->page_alignment, 0, - fpfn, lpfn, mode); + r = drm_mm_insert_node_in_range(&mgr->mm, &node->node, mem->num_pages, + mem->page_alignment, 0, fpfn, lpfn, + mode); spin_unlock(&mgr->lock); if (!r) - mem->start = node->start; + mem->start = node->node.start; return r; } @@ -159,7 +164,7 @@ static int amdgpu_gtt_mgr_new(struct ttm_mem_type_manager *man, struct ttm_mem_reg *mem) { struct amdgpu_gtt_mgr *mgr = man->priv; - struct drm_mm_node *node; + struct amdgpu_gtt_node *node; int r; spin_lock(&mgr->lock); @@ -177,8 +182,9 @@ static int amdgpu_gtt_mgr_new(struct ttm_mem_type_manager *man, goto err_out; } - node->start = AMDGPU_BO_INVALID_OFFSET; - node->size = mem->num_pages; + node->node.start = AMDGPU_BO_INVALID_OFFSET; + node->node.size = mem->num_pages; + node->tbo = tbo; mem->mm_node = node; if (place->fpfn || place->lpfn || place->flags & TTM_PL_FLAG_TOPDOWN) { @@ -190,7 +196,7 @@ static int amdgpu_gtt_mgr_new(struct ttm_mem_type_manager *man, goto err_out; } } else { - mem->start = node->start; + mem->start = node->node.start; } return 0; @@ -214,14 +220,14 @@ static void amdgpu_gtt_mgr_del(struct ttm_mem_type_manager *man, struct ttm_mem_reg *mem) { struct amdgpu_gtt_mgr *mgr = man->priv; - struct drm_mm_node *node = mem->mm_node; + struct amdgpu_gtt_node *node = mem->mm_node; if (!node) return; spin_lock(&mgr->lock); - if (node->start != AMDGPU_BO_INVALID_OFFSET) - drm_mm_remove_node(node); + if (node->node.start != AMDGPU_BO_INVALID_OFFSET) + drm_mm_remove_node(&node->node); spin_unlock(&mgr->lock); atomic64_add(mem->num_pages, &mgr->available); @@ -244,6 +250,25 @@ uint64_t amdgpu_gtt_mgr_usage(struct ttm_mem_type_manager *man) return (result > 0 ? result : 0) * PAGE_SIZE; } +int amdgpu_gtt_mgr_recover(struct ttm_mem_type_manager *man) +{ + struct amdgpu_gtt_mgr *mgr = man->priv; + struct amdgpu_gtt_node *node; + struct drm_mm_node *mm_node; + int r = 0; + + spin_lock(&mgr->lock); + drm_mm_for_each_node(mm_node, &mgr->mm) { + node = container_of(mm_node, struct amdgpu_gtt_node, node); + r = amdgpu_ttm_recover_gart(node->tbo); + if (r) + break; + } + spin_unlock(&mgr->lock); + + return r; +} + /** * amdgpu_gtt_mgr_debug - dump VRAM table * diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c index 659997bfff30..a162d87ca0c8 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c @@ -149,7 +149,7 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs, return -EINVAL; } - if (vm && !job->vm_id) { + if (vm && !job->vmid) { dev_err(adev->dev, "VM IB without ID\n"); return -EINVAL; } @@ -164,7 +164,7 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs, } if (ring->funcs->emit_pipeline_sync && job && - ((tmp = amdgpu_sync_get_fence(&job->sched_sync)) || + ((tmp = amdgpu_sync_get_fence(&job->sched_sync, NULL)) || amdgpu_vm_need_pipeline_sync(ring, job))) { need_pipe_sync = true; dma_fence_put(tmp); @@ -211,7 +211,7 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs, !amdgpu_sriov_vf(adev)) /* for SRIOV preemption, Preamble CE ib must be inserted anyway */ continue; - amdgpu_ring_emit_ib(ring, ib, job ? job->vm_id : 0, + amdgpu_ring_emit_ib(ring, ib, job ? job->vmid : 0, need_ctx_switch); need_ctx_switch = false; } @@ -229,9 +229,8 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs, r = amdgpu_fence_emit(ring, f); if (r) { dev_err(adev->dev, "failed to emit fence (%d)\n", r); - if (job && job->vm_id) - amdgpu_vm_reset_id(adev, ring->funcs->vmhub, - job->vm_id); + if (job && job->vmid) + amdgpu_vmid_reset(adev, ring->funcs->vmhub, job->vmid); amdgpu_ring_undo(ring); return r; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c new file mode 100644 index 000000000000..16884a0b677b --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c @@ -0,0 +1,459 @@ +/* + * Copyright 2017 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ +#include "amdgpu_ids.h" + +#include <linux/idr.h> +#include <linux/dma-fence-array.h> +#include <drm/drmP.h> + +#include "amdgpu.h" +#include "amdgpu_trace.h" + +/* + * PASID manager + * + * PASIDs are global address space identifiers that can be shared + * between the GPU, an IOMMU and the driver. VMs on different devices + * may use the same PASID if they share the same address + * space. Therefore PASIDs are allocated using a global IDA. VMs are + * looked up from the PASID per amdgpu_device. + */ +static DEFINE_IDA(amdgpu_pasid_ida); + +/** + * amdgpu_pasid_alloc - Allocate a PASID + * @bits: Maximum width of the PASID in bits, must be at least 1 + * + * Allocates a PASID of the given width while keeping smaller PASIDs + * available if possible. + * + * Returns a positive integer on success. Returns %-EINVAL if bits==0. + * Returns %-ENOSPC if no PASID was available. Returns %-ENOMEM on + * memory allocation failure. + */ +int amdgpu_pasid_alloc(unsigned int bits) +{ + int pasid = -EINVAL; + + for (bits = min(bits, 31U); bits > 0; bits--) { + pasid = ida_simple_get(&amdgpu_pasid_ida, + 1U << (bits - 1), 1U << bits, + GFP_KERNEL); + if (pasid != -ENOSPC) + break; + } + + return pasid; +} + +/** + * amdgpu_pasid_free - Free a PASID + * @pasid: PASID to free + */ +void amdgpu_pasid_free(unsigned int pasid) +{ + ida_simple_remove(&amdgpu_pasid_ida, pasid); +} + +/* + * VMID manager + * + * VMIDs are a per VMHUB identifier for page tables handling. + */ + +/** + * amdgpu_vmid_had_gpu_reset - check if reset occured since last use + * + * @adev: amdgpu_device pointer + * @id: VMID structure + * + * Check if GPU reset occured since last use of the VMID. + */ +bool amdgpu_vmid_had_gpu_reset(struct amdgpu_device *adev, + struct amdgpu_vmid *id) +{ + return id->current_gpu_reset_count != + atomic_read(&adev->gpu_reset_counter); +} + +/* idr_mgr->lock must be held */ +static int amdgpu_vmid_grab_reserved_locked(struct amdgpu_vm *vm, + struct amdgpu_ring *ring, + struct amdgpu_sync *sync, + struct dma_fence *fence, + struct amdgpu_job *job) +{ + struct amdgpu_device *adev = ring->adev; + unsigned vmhub = ring->funcs->vmhub; + uint64_t fence_context = adev->fence_context + ring->idx; + struct amdgpu_vmid *id = vm->reserved_vmid[vmhub]; + struct amdgpu_vmid_mgr *id_mgr = &adev->vm_manager.id_mgr[vmhub]; + struct dma_fence *updates = sync->last_vm_update; + int r = 0; + struct dma_fence *flushed, *tmp; + bool needs_flush = vm->use_cpu_for_update; + + flushed = id->flushed_updates; + if ((amdgpu_vmid_had_gpu_reset(adev, id)) || + (atomic64_read(&id->owner) != vm->entity.fence_context) || + (job->vm_pd_addr != id->pd_gpu_addr) || + (updates && (!flushed || updates->context != flushed->context || + dma_fence_is_later(updates, flushed))) || + (!id->last_flush || (id->last_flush->context != fence_context && + !dma_fence_is_signaled(id->last_flush)))) { + needs_flush = true; + /* to prevent one context starved by another context */ + id->pd_gpu_addr = 0; + tmp = amdgpu_sync_peek_fence(&id->active, ring); + if (tmp) { + r = amdgpu_sync_fence(adev, sync, tmp, false); + return r; + } + } + + /* Good we can use this VMID. Remember this submission as + * user of the VMID. + */ + r = amdgpu_sync_fence(ring->adev, &id->active, fence, false); + if (r) + goto out; + + if (updates && (!flushed || updates->context != flushed->context || + dma_fence_is_later(updates, flushed))) { + dma_fence_put(id->flushed_updates); + id->flushed_updates = dma_fence_get(updates); + } + id->pd_gpu_addr = job->vm_pd_addr; + atomic64_set(&id->owner, vm->entity.fence_context); + job->vm_needs_flush = needs_flush; + if (needs_flush) { + dma_fence_put(id->last_flush); + id->last_flush = NULL; + } + job->vmid = id - id_mgr->ids; + trace_amdgpu_vm_grab_id(vm, ring, job); +out: + return r; +} + +/** + * amdgpu_vm_grab_id - allocate the next free VMID + * + * @vm: vm to allocate id for + * @ring: ring we want to submit job to + * @sync: sync object where we add dependencies + * @fence: fence protecting ID from reuse + * + * Allocate an id for the vm, adding fences to the sync obj as necessary. + */ +int amdgpu_vmid_grab(struct amdgpu_vm *vm, struct amdgpu_ring *ring, + struct amdgpu_sync *sync, struct dma_fence *fence, + struct amdgpu_job *job) +{ + struct amdgpu_device *adev = ring->adev; + unsigned vmhub = ring->funcs->vmhub; + struct amdgpu_vmid_mgr *id_mgr = &adev->vm_manager.id_mgr[vmhub]; + uint64_t fence_context = adev->fence_context + ring->idx; + struct dma_fence *updates = sync->last_vm_update; + struct amdgpu_vmid *id, *idle; + struct dma_fence **fences; + unsigned i; + int r = 0; + + mutex_lock(&id_mgr->lock); + if (vm->reserved_vmid[vmhub]) { + r = amdgpu_vmid_grab_reserved_locked(vm, ring, sync, fence, job); + mutex_unlock(&id_mgr->lock); + return r; + } + fences = kmalloc_array(sizeof(void *), id_mgr->num_ids, GFP_KERNEL); + if (!fences) { + mutex_unlock(&id_mgr->lock); + return -ENOMEM; + } + /* Check if we have an idle VMID */ + i = 0; + list_for_each_entry(idle, &id_mgr->ids_lru, list) { + fences[i] = amdgpu_sync_peek_fence(&idle->active, ring); + if (!fences[i]) + break; + ++i; + } + + /* If we can't find a idle VMID to use, wait till one becomes available */ + if (&idle->list == &id_mgr->ids_lru) { + u64 fence_context = adev->vm_manager.fence_context + ring->idx; + unsigned seqno = ++adev->vm_manager.seqno[ring->idx]; + struct dma_fence_array *array; + unsigned j; + + for (j = 0; j < i; ++j) + dma_fence_get(fences[j]); + + array = dma_fence_array_create(i, fences, fence_context, + seqno, true); + if (!array) { + for (j = 0; j < i; ++j) + dma_fence_put(fences[j]); + kfree(fences); + r = -ENOMEM; + goto error; + } + + + r = amdgpu_sync_fence(ring->adev, sync, &array->base, false); + dma_fence_put(&array->base); + if (r) + goto error; + + mutex_unlock(&id_mgr->lock); + return 0; + + } + kfree(fences); + + job->vm_needs_flush = vm->use_cpu_for_update; + /* Check if we can use a VMID already assigned to this VM */ + list_for_each_entry_reverse(id, &id_mgr->ids_lru, list) { + struct dma_fence *flushed; + bool needs_flush = vm->use_cpu_for_update; + + /* Check all the prerequisites to using this VMID */ + if (amdgpu_vmid_had_gpu_reset(adev, id)) + continue; + + if (atomic64_read(&id->owner) != vm->entity.fence_context) + continue; + + if (job->vm_pd_addr != id->pd_gpu_addr) + continue; + + if (!id->last_flush || + (id->last_flush->context != fence_context && + !dma_fence_is_signaled(id->last_flush))) + needs_flush = true; + + flushed = id->flushed_updates; + if (updates && (!flushed || dma_fence_is_later(updates, flushed))) + needs_flush = true; + + /* Concurrent flushes are only possible starting with Vega10 */ + if (adev->asic_type < CHIP_VEGA10 && needs_flush) + continue; + + /* Good we can use this VMID. Remember this submission as + * user of the VMID. + */ + r = amdgpu_sync_fence(ring->adev, &id->active, fence, false); + if (r) + goto error; + + if (updates && (!flushed || dma_fence_is_later(updates, flushed))) { + dma_fence_put(id->flushed_updates); + id->flushed_updates = dma_fence_get(updates); + } + + if (needs_flush) + goto needs_flush; + else + goto no_flush_needed; + + }; + + /* Still no ID to use? Then use the idle one found earlier */ + id = idle; + + /* Remember this submission as user of the VMID */ + r = amdgpu_sync_fence(ring->adev, &id->active, fence, false); + if (r) + goto error; + + id->pd_gpu_addr = job->vm_pd_addr; + dma_fence_put(id->flushed_updates); + id->flushed_updates = dma_fence_get(updates); + atomic64_set(&id->owner, vm->entity.fence_context); + +needs_flush: + job->vm_needs_flush = true; + dma_fence_put(id->last_flush); + id->last_flush = NULL; + +no_flush_needed: + list_move_tail(&id->list, &id_mgr->ids_lru); + + job->vmid = id - id_mgr->ids; + trace_amdgpu_vm_grab_id(vm, ring, job); + +error: + mutex_unlock(&id_mgr->lock); + return r; +} + +int amdgpu_vmid_alloc_reserved(struct amdgpu_device *adev, + struct amdgpu_vm *vm, + unsigned vmhub) +{ + struct amdgpu_vmid_mgr *id_mgr; + struct amdgpu_vmid *idle; + int r = 0; + + id_mgr = &adev->vm_manager.id_mgr[vmhub]; + mutex_lock(&id_mgr->lock); + if (vm->reserved_vmid[vmhub]) + goto unlock; + if (atomic_inc_return(&id_mgr->reserved_vmid_num) > + AMDGPU_VM_MAX_RESERVED_VMID) { + DRM_ERROR("Over limitation of reserved vmid\n"); + atomic_dec(&id_mgr->reserved_vmid_num); + r = -EINVAL; + goto unlock; + } + /* Select the first entry VMID */ + idle = list_first_entry(&id_mgr->ids_lru, struct amdgpu_vmid, list); + list_del_init(&idle->list); + vm->reserved_vmid[vmhub] = idle; + mutex_unlock(&id_mgr->lock); + + return 0; +unlock: + mutex_unlock(&id_mgr->lock); + return r; +} + +void amdgpu_vmid_free_reserved(struct amdgpu_device *adev, + struct amdgpu_vm *vm, + unsigned vmhub) +{ + struct amdgpu_vmid_mgr *id_mgr = &adev->vm_manager.id_mgr[vmhub]; + + mutex_lock(&id_mgr->lock); + if (vm->reserved_vmid[vmhub]) { + list_add(&vm->reserved_vmid[vmhub]->list, + &id_mgr->ids_lru); + vm->reserved_vmid[vmhub] = NULL; + atomic_dec(&id_mgr->reserved_vmid_num); + } + mutex_unlock(&id_mgr->lock); +} + +/** + * amdgpu_vmid_reset - reset VMID to zero + * + * @adev: amdgpu device structure + * @vmid: vmid number to use + * + * Reset saved GDW, GWS and OA to force switch on next flush. + */ +void amdgpu_vmid_reset(struct amdgpu_device *adev, unsigned vmhub, + unsigned vmid) +{ + struct amdgpu_vmid_mgr *id_mgr = &adev->vm_manager.id_mgr[vmhub]; + struct amdgpu_vmid *id = &id_mgr->ids[vmid]; + + atomic64_set(&id->owner, 0); + id->gds_base = 0; + id->gds_size = 0; + id->gws_base = 0; + id->gws_size = 0; + id->oa_base = 0; + id->oa_size = 0; +} + +/** + * amdgpu_vmid_reset_all - reset VMID to zero + * + * @adev: amdgpu device structure + * + * Reset VMID to force flush on next use + */ +void amdgpu_vmid_reset_all(struct amdgpu_device *adev) +{ + unsigned i, j; + + for (i = 0; i < AMDGPU_MAX_VMHUBS; ++i) { + struct amdgpu_vmid_mgr *id_mgr = + &adev->vm_manager.id_mgr[i]; + + for (j = 1; j < id_mgr->num_ids; ++j) + amdgpu_vmid_reset(adev, i, j); + } +} + +/** + * amdgpu_vmid_mgr_init - init the VMID manager + * + * @adev: amdgpu_device pointer + * + * Initialize the VM manager structures + */ +void amdgpu_vmid_mgr_init(struct amdgpu_device *adev) +{ + unsigned i, j; + + for (i = 0; i < AMDGPU_MAX_VMHUBS; ++i) { + struct amdgpu_vmid_mgr *id_mgr = + &adev->vm_manager.id_mgr[i]; + + mutex_init(&id_mgr->lock); + INIT_LIST_HEAD(&id_mgr->ids_lru); + atomic_set(&id_mgr->reserved_vmid_num, 0); + + /* skip over VMID 0, since it is the system VM */ + for (j = 1; j < id_mgr->num_ids; ++j) { + amdgpu_vmid_reset(adev, i, j); + amdgpu_sync_create(&id_mgr->ids[i].active); + list_add_tail(&id_mgr->ids[j].list, &id_mgr->ids_lru); + } + } + + adev->vm_manager.fence_context = + dma_fence_context_alloc(AMDGPU_MAX_RINGS); + for (i = 0; i < AMDGPU_MAX_RINGS; ++i) + adev->vm_manager.seqno[i] = 0; +} + +/** + * amdgpu_vmid_mgr_fini - cleanup VM manager + * + * @adev: amdgpu_device pointer + * + * Cleanup the VM manager and free resources. + */ +void amdgpu_vmid_mgr_fini(struct amdgpu_device *adev) +{ + unsigned i, j; + + for (i = 0; i < AMDGPU_MAX_VMHUBS; ++i) { + struct amdgpu_vmid_mgr *id_mgr = + &adev->vm_manager.id_mgr[i]; + + mutex_destroy(&id_mgr->lock); + for (j = 0; j < AMDGPU_NUM_VMID; ++j) { + struct amdgpu_vmid *id = &id_mgr->ids[j]; + + amdgpu_sync_free(&id->active); + dma_fence_put(id->flushed_updates); + dma_fence_put(id->last_flush); + } + } +} diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.h new file mode 100644 index 000000000000..ad931fa570b3 --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.h @@ -0,0 +1,91 @@ +/* + * Copyright 2017 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ +#ifndef __AMDGPU_IDS_H__ +#define __AMDGPU_IDS_H__ + +#include <linux/types.h> +#include <linux/mutex.h> +#include <linux/list.h> +#include <linux/dma-fence.h> + +#include "amdgpu_sync.h" + +/* maximum number of VMIDs */ +#define AMDGPU_NUM_VMID 16 + +struct amdgpu_device; +struct amdgpu_vm; +struct amdgpu_ring; +struct amdgpu_sync; +struct amdgpu_job; + +struct amdgpu_vmid { + struct list_head list; + struct amdgpu_sync active; + struct dma_fence *last_flush; + atomic64_t owner; + + uint64_t pd_gpu_addr; + /* last flushed PD/PT update */ + struct dma_fence *flushed_updates; + + uint32_t current_gpu_reset_count; + + uint32_t gds_base; + uint32_t gds_size; + uint32_t gws_base; + uint32_t gws_size; + uint32_t oa_base; + uint32_t oa_size; +}; + +struct amdgpu_vmid_mgr { + struct mutex lock; + unsigned num_ids; + struct list_head ids_lru; + struct amdgpu_vmid ids[AMDGPU_NUM_VMID]; + atomic_t reserved_vmid_num; +}; + +int amdgpu_pasid_alloc(unsigned int bits); +void amdgpu_pasid_free(unsigned int pasid); + +bool amdgpu_vmid_had_gpu_reset(struct amdgpu_device *adev, + struct amdgpu_vmid *id); +int amdgpu_vmid_alloc_reserved(struct amdgpu_device *adev, + struct amdgpu_vm *vm, + unsigned vmhub); +void amdgpu_vmid_free_reserved(struct amdgpu_device *adev, + struct amdgpu_vm *vm, + unsigned vmhub); +int amdgpu_vmid_grab(struct amdgpu_vm *vm, struct amdgpu_ring *ring, + struct amdgpu_sync *sync, struct dma_fence *fence, + struct amdgpu_job *job); +void amdgpu_vmid_reset(struct amdgpu_device *adev, unsigned vmhub, + unsigned vmid); +void amdgpu_vmid_reset_all(struct amdgpu_device *adev); + +void amdgpu_vmid_mgr_init(struct amdgpu_device *adev); +void amdgpu_vmid_mgr_fini(struct amdgpu_device *adev); + +#endif diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.c index f5f27e4f0f7f..06373d44b3da 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.c @@ -92,15 +92,15 @@ int amdgpu_ih_ring_init(struct amdgpu_device *adev, unsigned ring_size, } return 0; } else { - r = amdgpu_wb_get(adev, &adev->irq.ih.wptr_offs); + r = amdgpu_device_wb_get(adev, &adev->irq.ih.wptr_offs); if (r) { dev_err(adev->dev, "(%d) ih wptr_offs wb alloc failed\n", r); return r; } - r = amdgpu_wb_get(adev, &adev->irq.ih.rptr_offs); + r = amdgpu_device_wb_get(adev, &adev->irq.ih.rptr_offs); if (r) { - amdgpu_wb_free(adev, adev->irq.ih.wptr_offs); + amdgpu_device_wb_free(adev, adev->irq.ih.wptr_offs); dev_err(adev->dev, "(%d) ih rptr_offs wb alloc failed\n", r); return r; } @@ -133,8 +133,8 @@ void amdgpu_ih_ring_fini(struct amdgpu_device *adev) amdgpu_bo_free_kernel(&adev->irq.ih.ring_obj, &adev->irq.ih.gpu_addr, (void **)&adev->irq.ih.ring); - amdgpu_wb_free(adev, adev->irq.ih.wptr_offs); - amdgpu_wb_free(adev, adev->irq.ih.rptr_offs); + amdgpu_device_wb_free(adev, adev->irq.ih.wptr_offs); + amdgpu_device_wb_free(adev, adev->irq.ih.rptr_offs); } } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.h index ada89358e220..29cf10927a92 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.h @@ -105,8 +105,8 @@ struct amdgpu_iv_entry { unsigned client_id; unsigned src_id; unsigned ring_id; - unsigned vm_id; - unsigned vm_id_src; + unsigned vmid; + unsigned vmid_src; uint64_t timestamp; unsigned timestamp_src; unsigned pas_id; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c index 47c5ce9807db..56bcd59c3399 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c @@ -88,7 +88,7 @@ static void amdgpu_irq_reset_work_func(struct work_struct *work) reset_work); if (!amdgpu_sriov_vf(adev)) - amdgpu_gpu_reset(adev); + amdgpu_device_gpu_recover(adev, NULL, false); } /* Disable *all* interrupts */ @@ -232,7 +232,7 @@ int amdgpu_irq_init(struct amdgpu_device *adev) int ret = pci_enable_msi(adev->pdev); if (!ret) { adev->irq.msi_enabled = true; - dev_info(adev->dev, "amdgpu: using MSI.\n"); + dev_dbg(adev->dev, "amdgpu: using MSI.\n"); } } @@ -262,7 +262,7 @@ int amdgpu_irq_init(struct amdgpu_device *adev) return r; } - DRM_INFO("amdgpu: irq initialized.\n"); + DRM_DEBUG("amdgpu: irq initialized.\n"); return 0; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c index 0cfc68db575b..2bd56760c744 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c @@ -28,7 +28,7 @@ #include "amdgpu.h" #include "amdgpu_trace.h" -static void amdgpu_job_timedout(struct amd_sched_job *s_job) +static void amdgpu_job_timedout(struct drm_sched_job *s_job) { struct amdgpu_job *job = container_of(s_job, struct amdgpu_job, base); @@ -37,10 +37,7 @@ static void amdgpu_job_timedout(struct amd_sched_job *s_job) atomic_read(&job->ring->fence_drv.last_seq), job->ring->fence_drv.sync_seq); - if (amdgpu_sriov_vf(job->adev)) - amdgpu_sriov_gpu_reset(job->adev, job); - else - amdgpu_gpu_reset(job->adev); + amdgpu_device_gpu_recover(job->adev, job, false); } int amdgpu_job_alloc(struct amdgpu_device *adev, unsigned num_ibs, @@ -63,7 +60,6 @@ int amdgpu_job_alloc(struct amdgpu_device *adev, unsigned num_ibs, (*job)->num_ibs = num_ibs; amdgpu_sync_create(&(*job)->sync); - amdgpu_sync_create(&(*job)->dep_sync); amdgpu_sync_create(&(*job)->sched_sync); (*job)->vram_lost_counter = atomic_read(&adev->vram_lost_counter); @@ -100,14 +96,13 @@ void amdgpu_job_free_resources(struct amdgpu_job *job) amdgpu_ib_free(job->adev, &job->ibs[i], f); } -static void amdgpu_job_free_cb(struct amd_sched_job *s_job) +static void amdgpu_job_free_cb(struct drm_sched_job *s_job) { struct amdgpu_job *job = container_of(s_job, struct amdgpu_job, base); - amdgpu_ring_priority_put(job->ring, amd_sched_get_job_priority(s_job)); + amdgpu_ring_priority_put(job->ring, s_job->s_priority); dma_fence_put(job->fence); amdgpu_sync_free(&job->sync); - amdgpu_sync_free(&job->dep_sync); amdgpu_sync_free(&job->sched_sync); kfree(job); } @@ -118,13 +113,12 @@ void amdgpu_job_free(struct amdgpu_job *job) dma_fence_put(job->fence); amdgpu_sync_free(&job->sync); - amdgpu_sync_free(&job->dep_sync); amdgpu_sync_free(&job->sched_sync); kfree(job); } int amdgpu_job_submit(struct amdgpu_job *job, struct amdgpu_ring *ring, - struct amd_sched_entity *entity, void *owner, + struct drm_sched_entity *entity, void *owner, struct dma_fence **f) { int r; @@ -133,7 +127,7 @@ int amdgpu_job_submit(struct amdgpu_job *job, struct amdgpu_ring *ring, if (!f) return -EINVAL; - r = amd_sched_job_init(&job->base, &ring->sched, entity, owner); + r = drm_sched_job_init(&job->base, &ring->sched, entity, owner); if (r) return r; @@ -141,46 +135,47 @@ int amdgpu_job_submit(struct amdgpu_job *job, struct amdgpu_ring *ring, job->fence_ctx = entity->fence_context; *f = dma_fence_get(&job->base.s_fence->finished); amdgpu_job_free_resources(job); - amdgpu_ring_priority_get(job->ring, - amd_sched_get_job_priority(&job->base)); - amd_sched_entity_push_job(&job->base); + amdgpu_ring_priority_get(job->ring, job->base.s_priority); + drm_sched_entity_push_job(&job->base, entity); return 0; } -static struct dma_fence *amdgpu_job_dependency(struct amd_sched_job *sched_job) +static struct dma_fence *amdgpu_job_dependency(struct drm_sched_job *sched_job, + struct drm_sched_entity *s_entity) { struct amdgpu_job *job = to_amdgpu_job(sched_job); struct amdgpu_vm *vm = job->vm; - - struct dma_fence *fence = amdgpu_sync_get_fence(&job->dep_sync); + bool explicit = false; int r; - - if (amd_sched_dependency_optimized(fence, sched_job->s_entity)) { - r = amdgpu_sync_fence(job->adev, &job->sched_sync, fence); - if (r) - DRM_ERROR("Error adding fence to sync (%d)\n", r); + struct dma_fence *fence = amdgpu_sync_get_fence(&job->sync, &explicit); + + if (fence && explicit) { + if (drm_sched_dependency_optimized(fence, s_entity)) { + r = amdgpu_sync_fence(job->adev, &job->sched_sync, fence, false); + if (r) + DRM_ERROR("Error adding fence to sync (%d)\n", r); + } } - if (!fence) - fence = amdgpu_sync_get_fence(&job->sync); - while (fence == NULL && vm && !job->vm_id) { + + while (fence == NULL && vm && !job->vmid) { struct amdgpu_ring *ring = job->ring; - r = amdgpu_vm_grab_id(vm, ring, &job->sync, - &job->base.s_fence->finished, - job); + r = amdgpu_vmid_grab(vm, ring, &job->sync, + &job->base.s_fence->finished, + job); if (r) DRM_ERROR("Error getting VM ID (%d)\n", r); - fence = amdgpu_sync_get_fence(&job->sync); + fence = amdgpu_sync_get_fence(&job->sync, NULL); } return fence; } -static struct dma_fence *amdgpu_job_run(struct amd_sched_job *sched_job) +static struct dma_fence *amdgpu_job_run(struct drm_sched_job *sched_job) { - struct dma_fence *fence = NULL; + struct dma_fence *fence = NULL, *finished; struct amdgpu_device *adev; struct amdgpu_job *job; int r; @@ -190,15 +185,18 @@ static struct dma_fence *amdgpu_job_run(struct amd_sched_job *sched_job) return NULL; } job = to_amdgpu_job(sched_job); + finished = &job->base.s_fence->finished; adev = job->adev; BUG_ON(amdgpu_sync_peek_fence(&job->sync, NULL)); trace_amdgpu_sched_run_job(job); - /* skip ib schedule when vram is lost */ - if (job->vram_lost_counter != atomic_read(&adev->vram_lost_counter)) { - dma_fence_set_error(&job->base.s_fence->finished, -ECANCELED); - DRM_ERROR("Skip scheduling IBs!\n"); + + if (job->vram_lost_counter != atomic_read(&adev->vram_lost_counter)) + dma_fence_set_error(finished, -ECANCELED);/* skip IB as well if VRAM lost */ + + if (finished->error < 0) { + DRM_INFO("Skip scheduling IBs!\n"); } else { r = amdgpu_ib_schedule(job->ring, job->num_ibs, job->ibs, job, &fence); @@ -213,7 +211,7 @@ static struct dma_fence *amdgpu_job_run(struct amd_sched_job *sched_job) return fence; } -const struct amd_sched_backend_ops amdgpu_sched_ops = { +const struct drm_sched_backend_ops amdgpu_sched_ops = { .dependency = amdgpu_job_dependency, .run_job = amdgpu_job_run, .timedout_job = amdgpu_job_timedout, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c index 720139e182a3..bd6e9a40f421 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c @@ -63,8 +63,6 @@ void amdgpu_driver_unload_kms(struct drm_device *dev) pm_runtime_forbid(dev->dev); } - amdgpu_amdkfd_device_fini(adev); - amdgpu_acpi_fini(adev); amdgpu_device_fini(adev); @@ -159,9 +157,6 @@ int amdgpu_driver_load_kms(struct drm_device *dev, unsigned long flags) "Error during ACPI methods call\n"); } - amdgpu_amdkfd_device_probe(adev); - amdgpu_amdkfd_device_init(adev); - if (amdgpu_device_is_px(dev)) { pm_runtime_use_autosuspend(dev->dev); pm_runtime_set_autosuspend_delay(dev->dev, 5000); @@ -171,9 +166,6 @@ int amdgpu_driver_load_kms(struct drm_device *dev, unsigned long flags) pm_runtime_put_autosuspend(dev->dev); } - if (amdgpu_sriov_vf(adev)) - amdgpu_virt_release_full_gpu(adev, true); - out: if (r) { /* balance pm_runtime_get_sync in amdgpu_driver_unload_kms */ @@ -558,6 +550,7 @@ static int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file } case AMDGPU_INFO_DEV_INFO: { struct drm_amdgpu_info_device dev_info = {}; + uint64_t vm_size; dev_info.device_id = dev->pdev->device; dev_info.chip_rev = adev->rev_id; @@ -585,8 +578,17 @@ static int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file dev_info.ids_flags |= AMDGPU_IDS_FLAGS_FUSION; if (amdgpu_sriov_vf(adev)) dev_info.ids_flags |= AMDGPU_IDS_FLAGS_PREEMPTION; + + vm_size = adev->vm_manager.max_pfn * AMDGPU_GPU_PAGE_SIZE; dev_info.virtual_address_offset = AMDGPU_VA_RESERVED_SIZE; - dev_info.virtual_address_max = (uint64_t)adev->vm_manager.max_pfn * AMDGPU_GPU_PAGE_SIZE; + dev_info.virtual_address_max = + min(vm_size, AMDGPU_VA_HOLE_START); + + vm_size -= AMDGPU_VA_RESERVED_SIZE; + if (vm_size > AMDGPU_VA_HOLE_START) { + dev_info.high_va_offset = AMDGPU_VA_HOLE_END; + dev_info.high_va_max = AMDGPU_VA_HOLE_END | vm_size; + } dev_info.virtual_address_alignment = max((int)PAGE_SIZE, AMDGPU_GPU_PAGE_SIZE); dev_info.pte_fragment_size = (1 << adev->vm_manager.fragment_size) * AMDGPU_GPU_PAGE_SIZE; dev_info.gart_page_size = AMDGPU_GPU_PAGE_SIZE; @@ -786,9 +788,7 @@ static int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file */ void amdgpu_driver_lastclose_kms(struct drm_device *dev) { - struct amdgpu_device *adev = dev->dev_private; - - amdgpu_fbdev_restore_mode(adev); + drm_fb_helper_lastclose(dev); vga_switcheroo_process_delayed_switch(); } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h index ffde1e9666e8..54f06c959340 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h @@ -89,7 +89,6 @@ enum amdgpu_hpd_id { AMDGPU_HPD_4, AMDGPU_HPD_5, AMDGPU_HPD_6, - AMDGPU_HPD_LAST, AMDGPU_HPD_NONE = 0xff, }; @@ -106,7 +105,6 @@ enum amdgpu_crtc_irq { AMDGPU_CRTC_IRQ_VLINE4, AMDGPU_CRTC_IRQ_VLINE5, AMDGPU_CRTC_IRQ_VLINE6, - AMDGPU_CRTC_IRQ_LAST, AMDGPU_CRTC_IRQ_NONE = 0xff }; @@ -117,7 +115,6 @@ enum amdgpu_pageflip_irq { AMDGPU_PAGEFLIP_IRQ_D4, AMDGPU_PAGEFLIP_IRQ_D5, AMDGPU_PAGEFLIP_IRQ_D6, - AMDGPU_PAGEFLIP_IRQ_LAST, AMDGPU_PAGEFLIP_IRQ_NONE = 0xff }; @@ -661,10 +658,6 @@ void amdgpu_fbdev_fini(struct amdgpu_device *adev); void amdgpu_fbdev_set_suspend(struct amdgpu_device *adev, int state); int amdgpu_fbdev_total_size(struct amdgpu_device *adev); bool amdgpu_fbdev_robj_is_fb(struct amdgpu_device *adev, struct amdgpu_bo *robj); -void amdgpu_fbdev_restore_mode(struct amdgpu_device *adev); - -void amdgpu_fb_output_poll_changed(struct amdgpu_device *adev); - int amdgpu_align_pitch(struct amdgpu_device *adev, int width, int bpp, bool tiled); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c index ea25164e7f4b..5c4c3e0d527b 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c @@ -37,6 +37,18 @@ #include "amdgpu.h" #include "amdgpu_trace.h" +static bool amdgpu_need_backup(struct amdgpu_device *adev) +{ + if (adev->flags & AMD_IS_APU) + return false; + + if (amdgpu_gpu_recovery == 0 || + (amdgpu_gpu_recovery == -1 && !amdgpu_sriov_vf(adev))) + return false; + + return true; +} + static void amdgpu_ttm_bo_destroy(struct ttm_buffer_object *tbo) { struct amdgpu_device *adev = amdgpu_ttm_adev(tbo->bdev); @@ -281,6 +293,44 @@ void amdgpu_bo_free_kernel(struct amdgpu_bo **bo, u64 *gpu_addr, *cpu_addr = NULL; } +/* Validate bo size is bit bigger then the request domain */ +static bool amdgpu_bo_validate_size(struct amdgpu_device *adev, + unsigned long size, u32 domain) +{ + struct ttm_mem_type_manager *man = NULL; + + /* + * If GTT is part of requested domains the check must succeed to + * allow fall back to GTT + */ + if (domain & AMDGPU_GEM_DOMAIN_GTT) { + man = &adev->mman.bdev.man[TTM_PL_TT]; + + if (size < (man->size << PAGE_SHIFT)) + return true; + else + goto fail; + } + + if (domain & AMDGPU_GEM_DOMAIN_VRAM) { + man = &adev->mman.bdev.man[TTM_PL_VRAM]; + + if (size < (man->size << PAGE_SHIFT)) + return true; + else + goto fail; + } + + + /* TODO add more domains checks, such as AMDGPU_GEM_DOMAIN_CPU */ + return true; + +fail: + DRM_DEBUG("BO size %lu > total memory in domain: %llu\n", size, + man->size << PAGE_SHIFT); + return false; +} + static int amdgpu_bo_do_create(struct amdgpu_device *adev, unsigned long size, int byte_align, bool kernel, u32 domain, u64 flags, @@ -289,16 +339,24 @@ static int amdgpu_bo_do_create(struct amdgpu_device *adev, uint64_t init_value, struct amdgpu_bo **bo_ptr) { + struct ttm_operation_ctx ctx = { + .interruptible = !kernel, + .no_wait_gpu = false, + .allow_reserved_eviction = true, + .resv = resv + }; struct amdgpu_bo *bo; enum ttm_bo_type type; unsigned long page_align; - u64 initial_bytes_moved, bytes_moved; size_t acc_size; int r; page_align = roundup(byte_align, PAGE_SIZE) >> PAGE_SHIFT; size = ALIGN(size, PAGE_SIZE); + if (!amdgpu_bo_validate_size(adev, size, domain)) + return -ENOMEM; + if (kernel) { type = ttm_bo_type_kernel; } else if (sg) { @@ -364,22 +422,19 @@ static int amdgpu_bo_do_create(struct amdgpu_device *adev, bo->tbo.bdev = &adev->mman.bdev; amdgpu_ttm_placement_from_domain(bo, domain); - initial_bytes_moved = atomic64_read(&adev->num_bytes_moved); - /* Kernel allocation are uninterruptible */ r = ttm_bo_init_reserved(&adev->mman.bdev, &bo->tbo, size, type, - &bo->placement, page_align, !kernel, NULL, + &bo->placement, page_align, &ctx, NULL, acc_size, sg, resv, &amdgpu_ttm_bo_destroy); if (unlikely(r != 0)) return r; - bytes_moved = atomic64_read(&adev->num_bytes_moved) - - initial_bytes_moved; if (adev->mc.visible_vram_size < adev->mc.real_vram_size && bo->tbo.mem.mem_type == TTM_PL_VRAM && bo->tbo.mem.start < adev->mc.visible_vram_size >> PAGE_SHIFT) - amdgpu_cs_report_moved_bytes(adev, bytes_moved, bytes_moved); + amdgpu_cs_report_moved_bytes(adev, ctx.bytes_moved, + ctx.bytes_moved); else - amdgpu_cs_report_moved_bytes(adev, bytes_moved, 0); + amdgpu_cs_report_moved_bytes(adev, ctx.bytes_moved, 0); if (kernel) bo->tbo.priority = 1; @@ -511,6 +566,7 @@ err: int amdgpu_bo_validate(struct amdgpu_bo *bo) { + struct ttm_operation_ctx ctx = { false, false }; uint32_t domain; int r; @@ -521,7 +577,7 @@ int amdgpu_bo_validate(struct amdgpu_bo *bo) retry: amdgpu_ttm_placement_from_domain(bo, domain); - r = ttm_bo_validate(&bo->tbo, &bo->placement, false, false); + r = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx); if (unlikely(r == -ENOMEM) && domain != bo->allowed_domains) { domain = bo->allowed_domains; goto retry; @@ -632,6 +688,7 @@ int amdgpu_bo_pin_restricted(struct amdgpu_bo *bo, u32 domain, u64 *gpu_addr) { struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev); + struct ttm_operation_ctx ctx = { false, false }; int r, i; if (amdgpu_ttm_tt_get_usermm(bo->tbo.ttm)) @@ -647,7 +704,7 @@ int amdgpu_bo_pin_restricted(struct amdgpu_bo *bo, u32 domain, if (bo->pin_count) { uint32_t mem_type = bo->tbo.mem.mem_type; - if (domain != amdgpu_mem_type_to_domain(mem_type)) + if (!(domain & amdgpu_mem_type_to_domain(mem_type))) return -EINVAL; bo->pin_count++; @@ -682,21 +739,23 @@ int amdgpu_bo_pin_restricted(struct amdgpu_bo *bo, u32 domain, bo->placements[i].flags |= TTM_PL_FLAG_NO_EVICT; } - r = ttm_bo_validate(&bo->tbo, &bo->placement, false, false); + r = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx); if (unlikely(r)) { dev_err(adev->dev, "%p pin failed\n", bo); goto error; } + r = amdgpu_ttm_alloc_gart(&bo->tbo); + if (unlikely(r)) { + dev_err(adev->dev, "%p bind failed\n", bo); + goto error; + } + bo->pin_count = 1; - if (gpu_addr != NULL) { - r = amdgpu_ttm_bind(&bo->tbo, &bo->tbo.mem); - if (unlikely(r)) { - dev_err(adev->dev, "%p bind failed\n", bo); - goto error; - } + if (gpu_addr != NULL) *gpu_addr = amdgpu_bo_gpu_offset(bo); - } + + domain = amdgpu_mem_type_to_domain(bo->tbo.mem.mem_type); if (domain == AMDGPU_GEM_DOMAIN_VRAM) { adev->vram_pin_size += amdgpu_bo_size(bo); if (bo->flags & AMDGPU_GEM_CREATE_NO_CPU_ACCESS) @@ -717,6 +776,7 @@ int amdgpu_bo_pin(struct amdgpu_bo *bo, u32 domain, u64 *gpu_addr) int amdgpu_bo_unpin(struct amdgpu_bo *bo) { struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev); + struct ttm_operation_ctx ctx = { false, false }; int r, i; if (!bo->pin_count) { @@ -730,7 +790,7 @@ int amdgpu_bo_unpin(struct amdgpu_bo *bo) bo->placements[i].lpfn = 0; bo->placements[i].flags &= ~TTM_PL_FLAG_NO_EVICT; } - r = ttm_bo_validate(&bo->tbo, &bo->placement, false, false); + r = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx); if (unlikely(r)) { dev_err(adev->dev, "%p validate failed for unpin\n", bo); goto error; @@ -779,8 +839,8 @@ int amdgpu_bo_init(struct amdgpu_device *adev) adev->mc.vram_mtrr = arch_phys_wc_add(adev->mc.aper_base, adev->mc.aper_size); DRM_INFO("Detected VRAM RAM=%lluM, BAR=%lluM\n", - adev->mc.mc_vram_size >> 20, - (unsigned long long)adev->mc.aper_size >> 20); + adev->mc.mc_vram_size >> 20, + (unsigned long long)adev->mc.aper_size >> 20); DRM_INFO("RAM width %dbits %s\n", adev->mc.vram_width, amdgpu_vram_names[adev->mc.vram_type]); return amdgpu_ttm_init(adev); @@ -902,6 +962,7 @@ void amdgpu_bo_move_notify(struct ttm_buffer_object *bo, int amdgpu_bo_fault_reserve_notify(struct ttm_buffer_object *bo) { struct amdgpu_device *adev = amdgpu_ttm_adev(bo->bdev); + struct ttm_operation_ctx ctx = { false, false }; struct amdgpu_bo *abo; unsigned long offset, size; int r; @@ -935,7 +996,7 @@ int amdgpu_bo_fault_reserve_notify(struct ttm_buffer_object *bo) abo->placement.num_busy_placement = 1; abo->placement.busy_placement = &abo->placements[1]; - r = ttm_bo_validate(bo, &abo->placement, false, false); + r = ttm_bo_validate(bo, &abo->placement, &ctx); if (unlikely(r != 0)) return r; @@ -980,7 +1041,7 @@ u64 amdgpu_bo_gpu_offset(struct amdgpu_bo *bo) { WARN_ON_ONCE(bo->tbo.mem.mem_type == TTM_PL_SYSTEM); WARN_ON_ONCE(bo->tbo.mem.mem_type == TTM_PL_TT && - !amdgpu_ttm_is_bound(bo->tbo.ttm)); + !amdgpu_gtt_mgr_has_gart_addr(&bo->tbo.mem)); WARN_ON_ONCE(!ww_mutex_is_locked(&bo->tbo.resv->lock) && !bo->pin_count); WARN_ON_ONCE(bo->tbo.mem.start == AMDGPU_BO_INVALID_OFFSET); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h index 428aae048f4b..33615e2ea2e6 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h @@ -187,7 +187,7 @@ static inline u64 amdgpu_bo_mmap_offset(struct amdgpu_bo *bo) static inline bool amdgpu_bo_gpu_accessible(struct amdgpu_bo *bo) { switch (bo->tbo.mem.mem_type) { - case TTM_PL_TT: return amdgpu_ttm_is_bound(bo->tbo.ttm); + case TTM_PL_TT: return amdgpu_gtt_mgr_has_gart_addr(&bo->tbo.mem); case TTM_PL_VRAM: return true; default: return false; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c index f8edf5483f11..01a996c6b802 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c @@ -32,7 +32,6 @@ #include <linux/hwmon.h> #include <linux/hwmon-sysfs.h> -#include "amd_powerplay.h" static int amdgpu_debugfs_pm_init(struct amdgpu_device *adev); @@ -1279,16 +1278,16 @@ void amdgpu_dpm_enable_vce(struct amdgpu_device *adev, bool enable) /* XXX select vce level based on ring/task */ adev->pm.dpm.vce_level = AMD_VCE_LEVEL_AC_ALL; mutex_unlock(&adev->pm.mutex); - amdgpu_set_clockgating_state(adev, AMD_IP_BLOCK_TYPE_VCE, - AMD_CG_STATE_UNGATE); - amdgpu_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_VCE, - AMD_PG_STATE_UNGATE); + amdgpu_device_ip_set_clockgating_state(adev, AMD_IP_BLOCK_TYPE_VCE, + AMD_CG_STATE_UNGATE); + amdgpu_device_ip_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_VCE, + AMD_PG_STATE_UNGATE); amdgpu_pm_compute_clocks(adev); } else { - amdgpu_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_VCE, - AMD_PG_STATE_GATE); - amdgpu_set_clockgating_state(adev, AMD_IP_BLOCK_TYPE_VCE, - AMD_CG_STATE_GATE); + amdgpu_device_ip_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_VCE, + AMD_PG_STATE_GATE); + amdgpu_device_ip_set_clockgating_state(adev, AMD_IP_BLOCK_TYPE_VCE, + AMD_CG_STATE_GATE); mutex_lock(&adev->pm.mutex); adev->pm.dpm.vce_active = false; mutex_unlock(&adev->pm.mutex); @@ -1585,7 +1584,7 @@ static int amdgpu_debugfs_pm_info(struct seq_file *m, void *data) struct drm_device *ddev = adev->ddev; u32 flags = 0; - amdgpu_get_clockgating_state(adev, &flags); + amdgpu_device_ip_get_clockgating_state(adev, &flags); seq_printf(m, "Clock Gating Flags Mask: 0x%x\n", flags); amdgpu_parse_cg_state(m, flags); seq_printf(m, "\n"); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c index 447d446b5015..2157d4509e84 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c @@ -264,7 +264,7 @@ static int psp_hw_start(struct psp_context *psp) struct amdgpu_device *adev = psp->adev; int ret; - if (!amdgpu_sriov_vf(adev) || !adev->in_sriov_reset) { + if (!amdgpu_sriov_vf(adev) || !adev->in_gpu_reset) { ret = psp_bootloader_load_sysdrv(psp); if (ret) return ret; @@ -334,23 +334,26 @@ static int psp_load_fw(struct amdgpu_device *adev) int ret; struct psp_context *psp = &adev->psp; + if (amdgpu_sriov_vf(adev) && adev->in_gpu_reset != 0) + goto skip_memalloc; + psp->cmd = kzalloc(sizeof(struct psp_gfx_cmd_resp), GFP_KERNEL); if (!psp->cmd) return -ENOMEM; ret = amdgpu_bo_create_kernel(adev, PSP_1_MEG, PSP_1_MEG, - AMDGPU_GEM_DOMAIN_GTT, - &psp->fw_pri_bo, - &psp->fw_pri_mc_addr, - &psp->fw_pri_buf); + AMDGPU_GEM_DOMAIN_GTT, + &psp->fw_pri_bo, + &psp->fw_pri_mc_addr, + &psp->fw_pri_buf); if (ret) goto failed; ret = amdgpu_bo_create_kernel(adev, PSP_FENCE_BUFFER_SIZE, PAGE_SIZE, - AMDGPU_GEM_DOMAIN_VRAM, - &psp->fence_buf_bo, - &psp->fence_buf_mc_addr, - &psp->fence_buf); + AMDGPU_GEM_DOMAIN_VRAM, + &psp->fence_buf_bo, + &psp->fence_buf_mc_addr, + &psp->fence_buf); if (ret) goto failed_mem2; @@ -375,6 +378,7 @@ static int psp_load_fw(struct amdgpu_device *adev) if (ret) goto failed_mem; +skip_memalloc: ret = psp_hw_start(psp); if (ret) goto failed_mem; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_queue_mgr.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_queue_mgr.c index 93d86619e802..262c1267249e 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_queue_mgr.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_queue_mgr.c @@ -225,7 +225,7 @@ int amdgpu_queue_mgr_map(struct amdgpu_device *adev, /* Right now all IPs have only one instance - multiple rings. */ if (instance != 0) { - DRM_ERROR("invalid ip instance: %d\n", instance); + DRM_DEBUG("invalid ip instance: %d\n", instance); return -EINVAL; } @@ -255,13 +255,13 @@ int amdgpu_queue_mgr_map(struct amdgpu_device *adev, ip_num_rings = adev->vcn.num_enc_rings; break; default: - DRM_ERROR("unknown ip type: %d\n", hw_ip); + DRM_DEBUG("unknown ip type: %d\n", hw_ip); return -EINVAL; } if (ring >= ip_num_rings) { - DRM_ERROR("Ring index:%d exceeds maximum:%d for ip:%d\n", - ring, ip_num_rings, hw_ip); + DRM_DEBUG("Ring index:%d exceeds maximum:%d for ip:%d\n", + ring, ip_num_rings, hw_ip); return -EINVAL; } @@ -292,7 +292,7 @@ int amdgpu_queue_mgr_map(struct amdgpu_device *adev, default: *out_ring = NULL; r = -EINVAL; - DRM_ERROR("unknown HW IP type: %d\n", mapper->hw_ip); + DRM_DEBUG("unknown HW IP type: %d\n", mapper->hw_ip); } out_unlock: diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c index a98fbbb4739f..13044e66dcaf 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c @@ -164,7 +164,7 @@ void amdgpu_ring_undo(struct amdgpu_ring *ring) * Release a request for executing at @priority */ void amdgpu_ring_priority_put(struct amdgpu_ring *ring, - enum amd_sched_priority priority) + enum drm_sched_priority priority) { int i; @@ -175,7 +175,7 @@ void amdgpu_ring_priority_put(struct amdgpu_ring *ring, return; /* no need to restore if the job is already at the lowest priority */ - if (priority == AMD_SCHED_PRIORITY_NORMAL) + if (priority == DRM_SCHED_PRIORITY_NORMAL) return; mutex_lock(&ring->priority_mutex); @@ -184,8 +184,8 @@ void amdgpu_ring_priority_put(struct amdgpu_ring *ring, goto out_unlock; /* decay priority to the next level with a job available */ - for (i = priority; i >= AMD_SCHED_PRIORITY_MIN; i--) { - if (i == AMD_SCHED_PRIORITY_NORMAL + for (i = priority; i >= DRM_SCHED_PRIORITY_MIN; i--) { + if (i == DRM_SCHED_PRIORITY_NORMAL || atomic_read(&ring->num_jobs[i])) { ring->priority = i; ring->funcs->set_priority(ring, i); @@ -206,7 +206,7 @@ out_unlock: * Request a ring's priority to be raised to @priority (refcounted). */ void amdgpu_ring_priority_get(struct amdgpu_ring *ring, - enum amd_sched_priority priority) + enum drm_sched_priority priority) { if (!ring->funcs->set_priority) return; @@ -263,25 +263,25 @@ int amdgpu_ring_init(struct amdgpu_device *adev, struct amdgpu_ring *ring, return r; } - r = amdgpu_wb_get(adev, &ring->rptr_offs); + r = amdgpu_device_wb_get(adev, &ring->rptr_offs); if (r) { dev_err(adev->dev, "(%d) ring rptr_offs wb alloc failed\n", r); return r; } - r = amdgpu_wb_get(adev, &ring->wptr_offs); + r = amdgpu_device_wb_get(adev, &ring->wptr_offs); if (r) { dev_err(adev->dev, "(%d) ring wptr_offs wb alloc failed\n", r); return r; } - r = amdgpu_wb_get(adev, &ring->fence_offs); + r = amdgpu_device_wb_get(adev, &ring->fence_offs); if (r) { dev_err(adev->dev, "(%d) ring fence_offs wb alloc failed\n", r); return r; } - r = amdgpu_wb_get(adev, &ring->cond_exe_offs); + r = amdgpu_device_wb_get(adev, &ring->cond_exe_offs); if (r) { dev_err(adev->dev, "(%d) ring cond_exec_polling wb alloc failed\n", r); return r; @@ -317,12 +317,12 @@ int amdgpu_ring_init(struct amdgpu_device *adev, struct amdgpu_ring *ring, } ring->max_dw = max_dw; - ring->priority = AMD_SCHED_PRIORITY_NORMAL; + ring->priority = DRM_SCHED_PRIORITY_NORMAL; mutex_init(&ring->priority_mutex); INIT_LIST_HEAD(&ring->lru_list); amdgpu_ring_lru_touch(adev, ring); - for (i = 0; i < AMD_SCHED_PRIORITY_MAX; ++i) + for (i = 0; i < DRM_SCHED_PRIORITY_MAX; ++i) atomic_set(&ring->num_jobs[i], 0); if (amdgpu_debugfs_ring_init(adev, ring)) { @@ -348,11 +348,11 @@ void amdgpu_ring_fini(struct amdgpu_ring *ring) if (!(ring->adev) || !(ring->adev->rings[ring->idx])) return; - amdgpu_wb_free(ring->adev, ring->rptr_offs); - amdgpu_wb_free(ring->adev, ring->wptr_offs); + amdgpu_device_wb_free(ring->adev, ring->rptr_offs); + amdgpu_device_wb_free(ring->adev, ring->wptr_offs); - amdgpu_wb_free(ring->adev, ring->cond_exe_offs); - amdgpu_wb_free(ring->adev, ring->fence_offs); + amdgpu_device_wb_free(ring->adev, ring->cond_exe_offs); + amdgpu_device_wb_free(ring->adev, ring->fence_offs); amdgpu_bo_free_kernel(&ring->ring_obj, &ring->gpu_addr, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h index b18c2b96691f..102dad3edf6a 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h @@ -25,7 +25,7 @@ #define __AMDGPU_RING_H__ #include <drm/amdgpu_drm.h> -#include "gpu_scheduler.h" +#include <drm/gpu_scheduler.h> /* max number of rings */ #define AMDGPU_MAX_RINGS 18 @@ -79,8 +79,7 @@ struct amdgpu_fence_driver { int amdgpu_fence_driver_init(struct amdgpu_device *adev); void amdgpu_fence_driver_fini(struct amdgpu_device *adev); -void amdgpu_fence_driver_force_completion(struct amdgpu_device *adev); -void amdgpu_fence_driver_force_completion_ring(struct amdgpu_ring *ring); +void amdgpu_fence_driver_force_completion(struct amdgpu_ring *ring); int amdgpu_fence_driver_init_ring(struct amdgpu_ring *ring, unsigned num_hw_submission); @@ -122,11 +121,11 @@ struct amdgpu_ring_funcs { /* command emit functions */ void (*emit_ib)(struct amdgpu_ring *ring, struct amdgpu_ib *ib, - unsigned vm_id, bool ctx_switch); + unsigned vmid, bool ctx_switch); void (*emit_fence)(struct amdgpu_ring *ring, uint64_t addr, uint64_t seq, unsigned flags); void (*emit_pipeline_sync)(struct amdgpu_ring *ring); - void (*emit_vm_flush)(struct amdgpu_ring *ring, unsigned vm_id, + void (*emit_vm_flush)(struct amdgpu_ring *ring, unsigned vmid, uint64_t pd_addr); void (*emit_hdp_flush)(struct amdgpu_ring *ring); void (*emit_hdp_invalidate)(struct amdgpu_ring *ring); @@ -155,14 +154,14 @@ struct amdgpu_ring_funcs { void (*emit_tmz)(struct amdgpu_ring *ring, bool start); /* priority functions */ void (*set_priority) (struct amdgpu_ring *ring, - enum amd_sched_priority priority); + enum drm_sched_priority priority); }; struct amdgpu_ring { struct amdgpu_device *adev; const struct amdgpu_ring_funcs *funcs; struct amdgpu_fence_driver fence_drv; - struct amd_gpu_scheduler sched; + struct drm_gpu_scheduler sched; struct list_head lru_list; struct amdgpu_bo *ring_obj; @@ -187,6 +186,7 @@ struct amdgpu_ring { uint64_t eop_gpu_addr; u32 doorbell_index; bool use_doorbell; + bool use_pollmem; unsigned wptr_offs; unsigned fence_offs; uint64_t current_ctx; @@ -197,7 +197,7 @@ struct amdgpu_ring { unsigned vm_inv_eng; bool has_compute_vm_bug; - atomic_t num_jobs[AMD_SCHED_PRIORITY_MAX]; + atomic_t num_jobs[DRM_SCHED_PRIORITY_MAX]; struct mutex priority_mutex; /* protected by priority_mutex */ int priority; @@ -213,9 +213,9 @@ void amdgpu_ring_generic_pad_ib(struct amdgpu_ring *ring, struct amdgpu_ib *ib); void amdgpu_ring_commit(struct amdgpu_ring *ring); void amdgpu_ring_undo(struct amdgpu_ring *ring); void amdgpu_ring_priority_get(struct amdgpu_ring *ring, - enum amd_sched_priority priority); + enum drm_sched_priority priority); void amdgpu_ring_priority_put(struct amdgpu_ring *ring, - enum amd_sched_priority priority); + enum drm_sched_priority priority); int amdgpu_ring_init(struct amdgpu_device *adev, struct amdgpu_ring *ring, unsigned ring_size, struct amdgpu_irq_src *irq_src, unsigned irq_type); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sched.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_sched.c index 290cc3f9c433..86a0715d9431 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sched.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sched.c @@ -29,29 +29,29 @@ #include "amdgpu_vm.h" -enum amd_sched_priority amdgpu_to_sched_priority(int amdgpu_priority) +enum drm_sched_priority amdgpu_to_sched_priority(int amdgpu_priority) { switch (amdgpu_priority) { case AMDGPU_CTX_PRIORITY_VERY_HIGH: - return AMD_SCHED_PRIORITY_HIGH_HW; + return DRM_SCHED_PRIORITY_HIGH_HW; case AMDGPU_CTX_PRIORITY_HIGH: - return AMD_SCHED_PRIORITY_HIGH_SW; + return DRM_SCHED_PRIORITY_HIGH_SW; case AMDGPU_CTX_PRIORITY_NORMAL: - return AMD_SCHED_PRIORITY_NORMAL; + return DRM_SCHED_PRIORITY_NORMAL; case AMDGPU_CTX_PRIORITY_LOW: case AMDGPU_CTX_PRIORITY_VERY_LOW: - return AMD_SCHED_PRIORITY_LOW; + return DRM_SCHED_PRIORITY_LOW; case AMDGPU_CTX_PRIORITY_UNSET: - return AMD_SCHED_PRIORITY_UNSET; + return DRM_SCHED_PRIORITY_UNSET; default: WARN(1, "Invalid context priority %d\n", amdgpu_priority); - return AMD_SCHED_PRIORITY_INVALID; + return DRM_SCHED_PRIORITY_INVALID; } } static int amdgpu_sched_process_priority_override(struct amdgpu_device *adev, int fd, - enum amd_sched_priority priority) + enum drm_sched_priority priority) { struct file *filp = fcheck(fd); struct drm_file *file; @@ -86,11 +86,11 @@ int amdgpu_sched_ioctl(struct drm_device *dev, void *data, { union drm_amdgpu_sched *args = data; struct amdgpu_device *adev = dev->dev_private; - enum amd_sched_priority priority; + enum drm_sched_priority priority; int r; priority = amdgpu_to_sched_priority(args->in.priority); - if (args->in.flags || priority == AMD_SCHED_PRIORITY_INVALID) + if (args->in.flags || priority == DRM_SCHED_PRIORITY_INVALID) return -EINVAL; switch (args->in.op) { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sched.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_sched.h index b28c067d3822..2a1a0c734bdd 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sched.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sched.h @@ -27,7 +27,7 @@ #include <drm/drmP.h> -enum amd_sched_priority amdgpu_to_sched_priority(int amdgpu_priority); +enum drm_sched_priority amdgpu_to_sched_priority(int amdgpu_priority); int amdgpu_sched_ioctl(struct drm_device *dev, void *data, struct drm_file *filp); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c index a4bf21f8f1c1..df65c66dc956 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c @@ -35,6 +35,7 @@ struct amdgpu_sync_entry { struct hlist_node node; struct dma_fence *fence; + bool explicit; }; static struct kmem_cache *amdgpu_sync_slab; @@ -63,7 +64,7 @@ void amdgpu_sync_create(struct amdgpu_sync *sync) static bool amdgpu_sync_same_dev(struct amdgpu_device *adev, struct dma_fence *f) { - struct amd_sched_fence *s_fence = to_amd_sched_fence(f); + struct drm_sched_fence *s_fence = to_drm_sched_fence(f); if (s_fence) { struct amdgpu_ring *ring; @@ -84,7 +85,7 @@ static bool amdgpu_sync_same_dev(struct amdgpu_device *adev, */ static void *amdgpu_sync_get_owner(struct dma_fence *f) { - struct amd_sched_fence *s_fence = to_amd_sched_fence(f); + struct drm_sched_fence *s_fence = to_drm_sched_fence(f); if (s_fence) return s_fence->owner; @@ -119,7 +120,7 @@ static void amdgpu_sync_keep_later(struct dma_fence **keep, * Tries to add the fence to an existing hash entry. Returns true when an entry * was found, false otherwise. */ -static bool amdgpu_sync_add_later(struct amdgpu_sync *sync, struct dma_fence *f) +static bool amdgpu_sync_add_later(struct amdgpu_sync *sync, struct dma_fence *f, bool explicit) { struct amdgpu_sync_entry *e; @@ -128,6 +129,10 @@ static bool amdgpu_sync_add_later(struct amdgpu_sync *sync, struct dma_fence *f) continue; amdgpu_sync_keep_later(&e->fence, f); + + /* Preserve eplicit flag to not loose pipe line sync */ + e->explicit |= explicit; + return true; } return false; @@ -141,24 +146,25 @@ static bool amdgpu_sync_add_later(struct amdgpu_sync *sync, struct dma_fence *f) * */ int amdgpu_sync_fence(struct amdgpu_device *adev, struct amdgpu_sync *sync, - struct dma_fence *f) + struct dma_fence *f, bool explicit) { struct amdgpu_sync_entry *e; if (!f) return 0; - if (amdgpu_sync_same_dev(adev, f) && amdgpu_sync_get_owner(f) == AMDGPU_FENCE_OWNER_VM) amdgpu_sync_keep_later(&sync->last_vm_update, f); - if (amdgpu_sync_add_later(sync, f)) + if (amdgpu_sync_add_later(sync, f, explicit)) return 0; e = kmem_cache_alloc(amdgpu_sync_slab, GFP_KERNEL); if (!e) return -ENOMEM; + e->explicit = explicit; + hash_add(sync->fences, &e->node, f->context); e->fence = dma_fence_get(f); return 0; @@ -189,10 +195,7 @@ int amdgpu_sync_resv(struct amdgpu_device *adev, /* always sync to the exclusive fence */ f = reservation_object_get_excl(resv); - r = amdgpu_sync_fence(adev, sync, f); - - if (explicit_sync) - return r; + r = amdgpu_sync_fence(adev, sync, f, false); flist = reservation_object_get_list(resv); if (!flist || r) @@ -212,15 +215,15 @@ int amdgpu_sync_resv(struct amdgpu_device *adev, (fence_owner == AMDGPU_FENCE_OWNER_VM))) continue; - /* Ignore fence from the same owner as + /* Ignore fence from the same owner and explicit one as * long as it isn't undefined. */ if (owner != AMDGPU_FENCE_OWNER_UNDEFINED && - fence_owner == owner) + (fence_owner == owner || explicit_sync)) continue; } - r = amdgpu_sync_fence(adev, sync, f); + r = amdgpu_sync_fence(adev, sync, f, false); if (r) break; } @@ -245,7 +248,7 @@ struct dma_fence *amdgpu_sync_peek_fence(struct amdgpu_sync *sync, hash_for_each_safe(sync->fences, i, tmp, e, node) { struct dma_fence *f = e->fence; - struct amd_sched_fence *s_fence = to_amd_sched_fence(f); + struct drm_sched_fence *s_fence = to_drm_sched_fence(f); if (dma_fence_is_signaled(f)) { hash_del(&e->node); @@ -275,19 +278,21 @@ struct dma_fence *amdgpu_sync_peek_fence(struct amdgpu_sync *sync, * amdgpu_sync_get_fence - get the next fence from the sync object * * @sync: sync object to use + * @explicit: true if the next fence is explicit * * Get and removes the next fence from the sync object not signaled yet. */ -struct dma_fence *amdgpu_sync_get_fence(struct amdgpu_sync *sync) +struct dma_fence *amdgpu_sync_get_fence(struct amdgpu_sync *sync, bool *explicit) { struct amdgpu_sync_entry *e; struct hlist_node *tmp; struct dma_fence *f; int i; - hash_for_each_safe(sync->fences, i, tmp, e, node) { f = e->fence; + if (explicit) + *explicit = e->explicit; hash_del(&e->node); kmem_cache_free(amdgpu_sync_slab, e); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.h index 70d7e3a279a0..7aba38d5c9df 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.h @@ -41,7 +41,7 @@ struct amdgpu_sync { void amdgpu_sync_create(struct amdgpu_sync *sync); int amdgpu_sync_fence(struct amdgpu_device *adev, struct amdgpu_sync *sync, - struct dma_fence *f); + struct dma_fence *f, bool explicit); int amdgpu_sync_resv(struct amdgpu_device *adev, struct amdgpu_sync *sync, struct reservation_object *resv, @@ -49,7 +49,7 @@ int amdgpu_sync_resv(struct amdgpu_device *adev, bool explicit_sync); struct dma_fence *amdgpu_sync_peek_fence(struct amdgpu_sync *sync, struct amdgpu_ring *ring); -struct dma_fence *amdgpu_sync_get_fence(struct amdgpu_sync *sync); +struct dma_fence *amdgpu_sync_get_fence(struct amdgpu_sync *sync, bool *explicit); int amdgpu_sync_wait(struct amdgpu_sync *sync, bool intr); void amdgpu_sync_free(struct amdgpu_sync *sync); int amdgpu_sync_init(void); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h index 06525f2c36c3..cace7a93fc94 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h @@ -82,8 +82,8 @@ TRACE_EVENT(amdgpu_iv, __field(unsigned, client_id) __field(unsigned, src_id) __field(unsigned, ring_id) - __field(unsigned, vm_id) - __field(unsigned, vm_id_src) + __field(unsigned, vmid) + __field(unsigned, vmid_src) __field(uint64_t, timestamp) __field(unsigned, timestamp_src) __field(unsigned, pas_id) @@ -93,8 +93,8 @@ TRACE_EVENT(amdgpu_iv, __entry->client_id = iv->client_id; __entry->src_id = iv->src_id; __entry->ring_id = iv->ring_id; - __entry->vm_id = iv->vm_id; - __entry->vm_id_src = iv->vm_id_src; + __entry->vmid = iv->vmid; + __entry->vmid_src = iv->vmid_src; __entry->timestamp = iv->timestamp; __entry->timestamp_src = iv->timestamp_src; __entry->pas_id = iv->pas_id; @@ -103,9 +103,9 @@ TRACE_EVENT(amdgpu_iv, __entry->src_data[2] = iv->src_data[2]; __entry->src_data[3] = iv->src_data[3]; ), - TP_printk("client_id:%u src_id:%u ring:%u vm_id:%u timestamp: %llu pas_id:%u src_data: %08x %08x %08x %08x\n", + TP_printk("client_id:%u src_id:%u ring:%u vmid:%u timestamp: %llu pas_id:%u src_data: %08x %08x %08x %08x\n", __entry->client_id, __entry->src_id, - __entry->ring_id, __entry->vm_id, + __entry->ring_id, __entry->vmid, __entry->timestamp, __entry->pas_id, __entry->src_data[0], __entry->src_data[1], __entry->src_data[2], __entry->src_data[3]) @@ -219,7 +219,7 @@ TRACE_EVENT(amdgpu_vm_grab_id, TP_STRUCT__entry( __field(struct amdgpu_vm *, vm) __field(u32, ring) - __field(u32, vm_id) + __field(u32, vmid) __field(u32, vm_hub) __field(u64, pd_addr) __field(u32, needs_flush) @@ -228,13 +228,13 @@ TRACE_EVENT(amdgpu_vm_grab_id, TP_fast_assign( __entry->vm = vm; __entry->ring = ring->idx; - __entry->vm_id = job->vm_id; + __entry->vmid = job->vmid; __entry->vm_hub = ring->funcs->vmhub, __entry->pd_addr = job->vm_pd_addr; __entry->needs_flush = job->vm_needs_flush; ), TP_printk("vm=%p, ring=%u, id=%u, hub=%u, pd_addr=%010Lx needs_flush=%u", - __entry->vm, __entry->ring, __entry->vm_id, + __entry->vm, __entry->ring, __entry->vmid, __entry->vm_hub, __entry->pd_addr, __entry->needs_flush) ); @@ -357,24 +357,24 @@ TRACE_EVENT(amdgpu_vm_copy_ptes, ); TRACE_EVENT(amdgpu_vm_flush, - TP_PROTO(struct amdgpu_ring *ring, unsigned vm_id, + TP_PROTO(struct amdgpu_ring *ring, unsigned vmid, uint64_t pd_addr), - TP_ARGS(ring, vm_id, pd_addr), + TP_ARGS(ring, vmid, pd_addr), TP_STRUCT__entry( __field(u32, ring) - __field(u32, vm_id) + __field(u32, vmid) __field(u32, vm_hub) __field(u64, pd_addr) ), TP_fast_assign( __entry->ring = ring->idx; - __entry->vm_id = vm_id; + __entry->vmid = vmid; __entry->vm_hub = ring->funcs->vmhub; __entry->pd_addr = pd_addr; ), TP_printk("ring=%u, id=%u, hub=%u, pd_addr=%010Lx", - __entry->ring, __entry->vm_id, + __entry->ring, __entry->vmid, __entry->vm_hub,__entry->pd_addr) ); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c index ad5bf86ee8a3..e4bb435e614b 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c @@ -76,7 +76,7 @@ static int amdgpu_ttm_global_init(struct amdgpu_device *adev) { struct drm_global_reference *global_ref; struct amdgpu_ring *ring; - struct amd_sched_rq *rq; + struct drm_sched_rq *rq; int r; adev->mman.mem_global_referenced = false; @@ -108,9 +108,9 @@ static int amdgpu_ttm_global_init(struct amdgpu_device *adev) mutex_init(&adev->mman.gtt_window_lock); ring = adev->mman.buffer_funcs_ring; - rq = &ring->sched.sched_rq[AMD_SCHED_PRIORITY_KERNEL]; - r = amd_sched_entity_init(&ring->sched, &adev->mman.entity, - rq, amdgpu_sched_jobs); + rq = &ring->sched.sched_rq[DRM_SCHED_PRIORITY_KERNEL]; + r = drm_sched_entity_init(&ring->sched, &adev->mman.entity, + rq, amdgpu_sched_jobs, NULL); if (r) { DRM_ERROR("Failed setting up TTM BO move run queue.\n"); goto error_entity; @@ -131,7 +131,7 @@ error_mem: static void amdgpu_ttm_global_fini(struct amdgpu_device *adev) { if (adev->mman.mem_global_referenced) { - amd_sched_entity_fini(adev->mman.entity.sched, + drm_sched_entity_fini(adev->mman.entity.sched, &adev->mman.entity); mutex_destroy(&adev->mman.gtt_window_lock); drm_global_item_unref(&adev->mman.bo_global_ref.ref); @@ -282,8 +282,7 @@ static uint64_t amdgpu_mm_node_addr(struct ttm_buffer_object *bo, { uint64_t addr = 0; - if (mem->mem_type != TTM_PL_TT || - amdgpu_gtt_mgr_is_allocated(mem)) { + if (mem->mem_type != TTM_PL_TT || amdgpu_gtt_mgr_has_gart_addr(mem)) { addr = mm_node->start << PAGE_SHIFT; addr += bo->bdev->man[mem->mem_type].gpu_offset; } @@ -369,7 +368,7 @@ int amdgpu_ttm_copy_mem_to_mem(struct amdgpu_device *adev, * dst to window 1 */ if (src->mem->mem_type == TTM_PL_TT && - !amdgpu_gtt_mgr_is_allocated(src->mem)) { + !amdgpu_gtt_mgr_has_gart_addr(src->mem)) { r = amdgpu_map_buffer(src->bo, src->mem, PFN_UP(cur_size + src_page_offset), src_node_start, 0, ring, @@ -383,7 +382,7 @@ int amdgpu_ttm_copy_mem_to_mem(struct amdgpu_device *adev, } if (dst->mem->mem_type == TTM_PL_TT && - !amdgpu_gtt_mgr_is_allocated(dst->mem)) { + !amdgpu_gtt_mgr_has_gart_addr(dst->mem)) { r = amdgpu_map_buffer(dst->bo, dst->mem, PFN_UP(cur_size + dst_page_offset), dst_node_start, 1, ring, @@ -467,9 +466,8 @@ error: return r; } -static int amdgpu_move_vram_ram(struct ttm_buffer_object *bo, - bool evict, bool interruptible, - bool no_wait_gpu, +static int amdgpu_move_vram_ram(struct ttm_buffer_object *bo, bool evict, + struct ttm_operation_ctx *ctx, struct ttm_mem_reg *new_mem) { struct amdgpu_device *adev; @@ -489,8 +487,7 @@ static int amdgpu_move_vram_ram(struct ttm_buffer_object *bo, placements.fpfn = 0; placements.lpfn = 0; placements.flags = TTM_PL_MASK_CACHING | TTM_PL_FLAG_TT; - r = ttm_bo_mem_space(bo, &placement, &tmp_mem, - interruptible, no_wait_gpu); + r = ttm_bo_mem_space(bo, &placement, &tmp_mem, ctx); if (unlikely(r)) { return r; } @@ -500,23 +497,22 @@ static int amdgpu_move_vram_ram(struct ttm_buffer_object *bo, goto out_cleanup; } - r = ttm_tt_bind(bo->ttm, &tmp_mem); + r = ttm_tt_bind(bo->ttm, &tmp_mem, ctx); if (unlikely(r)) { goto out_cleanup; } - r = amdgpu_move_blit(bo, true, no_wait_gpu, &tmp_mem, old_mem); + r = amdgpu_move_blit(bo, true, ctx->no_wait_gpu, &tmp_mem, old_mem); if (unlikely(r)) { goto out_cleanup; } - r = ttm_bo_move_ttm(bo, interruptible, no_wait_gpu, new_mem); + r = ttm_bo_move_ttm(bo, ctx, new_mem); out_cleanup: ttm_bo_mem_put(bo, &tmp_mem); return r; } -static int amdgpu_move_ram_vram(struct ttm_buffer_object *bo, - bool evict, bool interruptible, - bool no_wait_gpu, +static int amdgpu_move_ram_vram(struct ttm_buffer_object *bo, bool evict, + struct ttm_operation_ctx *ctx, struct ttm_mem_reg *new_mem) { struct amdgpu_device *adev; @@ -536,16 +532,15 @@ static int amdgpu_move_ram_vram(struct ttm_buffer_object *bo, placements.fpfn = 0; placements.lpfn = 0; placements.flags = TTM_PL_MASK_CACHING | TTM_PL_FLAG_TT; - r = ttm_bo_mem_space(bo, &placement, &tmp_mem, - interruptible, no_wait_gpu); + r = ttm_bo_mem_space(bo, &placement, &tmp_mem, ctx); if (unlikely(r)) { return r; } - r = ttm_bo_move_ttm(bo, interruptible, no_wait_gpu, &tmp_mem); + r = ttm_bo_move_ttm(bo, ctx, &tmp_mem); if (unlikely(r)) { goto out_cleanup; } - r = amdgpu_move_blit(bo, true, no_wait_gpu, new_mem, old_mem); + r = amdgpu_move_blit(bo, true, ctx->no_wait_gpu, new_mem, old_mem); if (unlikely(r)) { goto out_cleanup; } @@ -554,10 +549,9 @@ out_cleanup: return r; } -static int amdgpu_bo_move(struct ttm_buffer_object *bo, - bool evict, bool interruptible, - bool no_wait_gpu, - struct ttm_mem_reg *new_mem) +static int amdgpu_bo_move(struct ttm_buffer_object *bo, bool evict, + struct ttm_operation_ctx *ctx, + struct ttm_mem_reg *new_mem) { struct amdgpu_device *adev; struct amdgpu_bo *abo; @@ -592,19 +586,18 @@ static int amdgpu_bo_move(struct ttm_buffer_object *bo, if (old_mem->mem_type == TTM_PL_VRAM && new_mem->mem_type == TTM_PL_SYSTEM) { - r = amdgpu_move_vram_ram(bo, evict, interruptible, - no_wait_gpu, new_mem); + r = amdgpu_move_vram_ram(bo, evict, ctx, new_mem); } else if (old_mem->mem_type == TTM_PL_SYSTEM && new_mem->mem_type == TTM_PL_VRAM) { - r = amdgpu_move_ram_vram(bo, evict, interruptible, - no_wait_gpu, new_mem); + r = amdgpu_move_ram_vram(bo, evict, ctx, new_mem); } else { - r = amdgpu_move_blit(bo, evict, no_wait_gpu, new_mem, old_mem); + r = amdgpu_move_blit(bo, evict, ctx->no_wait_gpu, + new_mem, old_mem); } if (r) { memcpy: - r = ttm_bo_move_memcpy(bo, interruptible, no_wait_gpu, new_mem); + r = ttm_bo_move_memcpy(bo, ctx, new_mem); if (r) { return r; } @@ -690,7 +683,6 @@ struct amdgpu_ttm_tt { struct list_head guptasks; atomic_t mmu_invalidations; uint32_t last_set_pages; - struct list_head list; }; int amdgpu_ttm_tt_get_user_pages(struct ttm_tt *ttm, struct page **pages) @@ -861,44 +853,35 @@ static int amdgpu_ttm_backend_bind(struct ttm_tt *ttm, bo_mem->mem_type == AMDGPU_PL_OA) return -EINVAL; - if (!amdgpu_gtt_mgr_is_allocated(bo_mem)) + if (!amdgpu_gtt_mgr_has_gart_addr(bo_mem)) { + gtt->offset = AMDGPU_BO_INVALID_OFFSET; return 0; + } - spin_lock(>t->adev->gtt_list_lock); flags = amdgpu_ttm_tt_pte_flags(gtt->adev, ttm, bo_mem); gtt->offset = (u64)bo_mem->start << PAGE_SHIFT; r = amdgpu_gart_bind(gtt->adev, gtt->offset, ttm->num_pages, ttm->pages, gtt->ttm.dma_address, flags); - if (r) { + if (r) DRM_ERROR("failed to bind %lu pages at 0x%08llX\n", ttm->num_pages, gtt->offset); - goto error_gart_bind; - } - - list_add_tail(>t->list, >t->adev->gtt_list); -error_gart_bind: - spin_unlock(>t->adev->gtt_list_lock); return r; } -bool amdgpu_ttm_is_bound(struct ttm_tt *ttm) -{ - struct amdgpu_ttm_tt *gtt = (void *)ttm; - - return gtt && !list_empty(>t->list); -} - -int amdgpu_ttm_bind(struct ttm_buffer_object *bo, struct ttm_mem_reg *bo_mem) +int amdgpu_ttm_alloc_gart(struct ttm_buffer_object *bo) { struct amdgpu_device *adev = amdgpu_ttm_adev(bo->bdev); - struct ttm_tt *ttm = bo->ttm; + struct ttm_operation_ctx ctx = { false, false }; + struct amdgpu_ttm_tt *gtt = (void*)bo->ttm; struct ttm_mem_reg tmp; struct ttm_placement placement; struct ttm_place placements; + uint64_t flags; int r; - if (!ttm || amdgpu_ttm_is_bound(ttm)) + if (bo->mem.mem_type != TTM_PL_TT || + amdgpu_gtt_mgr_has_gart_addr(&bo->mem)) return 0; tmp = bo->mem; @@ -912,43 +895,44 @@ int amdgpu_ttm_bind(struct ttm_buffer_object *bo, struct ttm_mem_reg *bo_mem) placements.flags = (bo->mem.placement & ~TTM_PL_MASK_MEM) | TTM_PL_FLAG_TT; - r = ttm_bo_mem_space(bo, &placement, &tmp, true, false); + r = ttm_bo_mem_space(bo, &placement, &tmp, &ctx); if (unlikely(r)) return r; - r = ttm_bo_move_ttm(bo, true, false, &tmp); - if (unlikely(r)) + flags = amdgpu_ttm_tt_pte_flags(adev, bo->ttm, &tmp); + gtt->offset = (u64)tmp.start << PAGE_SHIFT; + r = amdgpu_gart_bind(adev, gtt->offset, bo->ttm->num_pages, + bo->ttm->pages, gtt->ttm.dma_address, flags); + if (unlikely(r)) { ttm_bo_mem_put(bo, &tmp); - else - bo->offset = (bo->mem.start << PAGE_SHIFT) + - bo->bdev->man[bo->mem.mem_type].gpu_offset; + return r; + } - return r; + ttm_bo_mem_put(bo, &bo->mem); + bo->mem = tmp; + bo->offset = (bo->mem.start << PAGE_SHIFT) + + bo->bdev->man[bo->mem.mem_type].gpu_offset; + + return 0; } -int amdgpu_ttm_recover_gart(struct amdgpu_device *adev) +int amdgpu_ttm_recover_gart(struct ttm_buffer_object *tbo) { - struct amdgpu_ttm_tt *gtt, *tmp; - struct ttm_mem_reg bo_mem; + struct amdgpu_device *adev = amdgpu_ttm_adev(tbo->bdev); + struct amdgpu_ttm_tt *gtt = (void *)tbo->ttm; uint64_t flags; int r; - bo_mem.mem_type = TTM_PL_TT; - spin_lock(&adev->gtt_list_lock); - list_for_each_entry_safe(gtt, tmp, &adev->gtt_list, list) { - flags = amdgpu_ttm_tt_pte_flags(gtt->adev, >t->ttm.ttm, &bo_mem); - r = amdgpu_gart_bind(adev, gtt->offset, gtt->ttm.ttm.num_pages, - gtt->ttm.ttm.pages, gtt->ttm.dma_address, - flags); - if (r) { - spin_unlock(&adev->gtt_list_lock); - DRM_ERROR("failed to bind %lu pages at 0x%08llX\n", - gtt->ttm.ttm.num_pages, gtt->offset); - return r; - } - } - spin_unlock(&adev->gtt_list_lock); - return 0; + if (!gtt) + return 0; + + flags = amdgpu_ttm_tt_pte_flags(adev, >t->ttm.ttm, &tbo->mem); + r = amdgpu_gart_bind(adev, gtt->offset, gtt->ttm.ttm.num_pages, + gtt->ttm.ttm.pages, gtt->ttm.dma_address, flags); + if (r) + DRM_ERROR("failed to bind %lu pages at 0x%08llX\n", + gtt->ttm.ttm.num_pages, gtt->offset); + return r; } static int amdgpu_ttm_backend_unbind(struct ttm_tt *ttm) @@ -959,20 +943,14 @@ static int amdgpu_ttm_backend_unbind(struct ttm_tt *ttm) if (gtt->userptr) amdgpu_ttm_tt_unpin_userptr(ttm); - if (!amdgpu_ttm_is_bound(ttm)) + if (gtt->offset == AMDGPU_BO_INVALID_OFFSET) return 0; /* unbind shouldn't be done for GDS/GWS/OA in ttm_bo_clean_mm */ - spin_lock(>t->adev->gtt_list_lock); r = amdgpu_gart_unbind(gtt->adev, gtt->offset, ttm->num_pages); - if (r) { + if (r) DRM_ERROR("failed to unbind %lu pages at 0x%08llX\n", gtt->ttm.ttm.num_pages, gtt->offset); - goto error_unbind; - } - list_del_init(>t->list); -error_unbind: - spin_unlock(>t->adev->gtt_list_lock); return r; } @@ -1009,11 +987,11 @@ static struct ttm_tt *amdgpu_ttm_tt_create(struct ttm_bo_device *bdev, kfree(gtt); return NULL; } - INIT_LIST_HEAD(>t->list); return >t->ttm.ttm; } -static int amdgpu_ttm_tt_populate(struct ttm_tt *ttm) +static int amdgpu_ttm_tt_populate(struct ttm_tt *ttm, + struct ttm_operation_ctx *ctx) { struct amdgpu_device *adev = amdgpu_ttm_adev(ttm->bdev); struct amdgpu_ttm_tt *gtt = (void *)ttm; @@ -1041,11 +1019,11 @@ static int amdgpu_ttm_tt_populate(struct ttm_tt *ttm) #ifdef CONFIG_SWIOTLB if (swiotlb_nr_tbl()) { - return ttm_dma_populate(>t->ttm, adev->dev); + return ttm_dma_populate(>t->ttm, adev->dev, ctx); } #endif - return ttm_populate_and_map_pages(adev->dev, >t->ttm); + return ttm_populate_and_map_pages(adev->dev, >t->ttm, ctx); } static void amdgpu_ttm_tt_unpopulate(struct ttm_tt *ttm) @@ -1292,6 +1270,101 @@ static struct ttm_bo_driver amdgpu_bo_driver = { .access_memory = &amdgpu_ttm_access_memory }; +/* + * Firmware Reservation functions + */ +/** + * amdgpu_ttm_fw_reserve_vram_fini - free fw reserved vram + * + * @adev: amdgpu_device pointer + * + * free fw reserved vram if it has been reserved. + */ +static void amdgpu_ttm_fw_reserve_vram_fini(struct amdgpu_device *adev) +{ + amdgpu_bo_free_kernel(&adev->fw_vram_usage.reserved_bo, + NULL, &adev->fw_vram_usage.va); +} + +/** + * amdgpu_ttm_fw_reserve_vram_init - create bo vram reservation from fw + * + * @adev: amdgpu_device pointer + * + * create bo vram reservation from fw. + */ +static int amdgpu_ttm_fw_reserve_vram_init(struct amdgpu_device *adev) +{ + struct ttm_operation_ctx ctx = { false, false }; + int r = 0; + int i; + u64 vram_size = adev->mc.visible_vram_size; + u64 offset = adev->fw_vram_usage.start_offset; + u64 size = adev->fw_vram_usage.size; + struct amdgpu_bo *bo; + + adev->fw_vram_usage.va = NULL; + adev->fw_vram_usage.reserved_bo = NULL; + + if (adev->fw_vram_usage.size > 0 && + adev->fw_vram_usage.size <= vram_size) { + + r = amdgpu_bo_create(adev, adev->fw_vram_usage.size, + PAGE_SIZE, true, AMDGPU_GEM_DOMAIN_VRAM, + AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED | + AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS, NULL, NULL, 0, + &adev->fw_vram_usage.reserved_bo); + if (r) + goto error_create; + + r = amdgpu_bo_reserve(adev->fw_vram_usage.reserved_bo, false); + if (r) + goto error_reserve; + + /* remove the original mem node and create a new one at the + * request position + */ + bo = adev->fw_vram_usage.reserved_bo; + offset = ALIGN(offset, PAGE_SIZE); + for (i = 0; i < bo->placement.num_placement; ++i) { + bo->placements[i].fpfn = offset >> PAGE_SHIFT; + bo->placements[i].lpfn = (offset + size) >> PAGE_SHIFT; + } + + ttm_bo_mem_put(&bo->tbo, &bo->tbo.mem); + r = ttm_bo_mem_space(&bo->tbo, &bo->placement, + &bo->tbo.mem, &ctx); + if (r) + goto error_pin; + + r = amdgpu_bo_pin_restricted(adev->fw_vram_usage.reserved_bo, + AMDGPU_GEM_DOMAIN_VRAM, + adev->fw_vram_usage.start_offset, + (adev->fw_vram_usage.start_offset + + adev->fw_vram_usage.size), NULL); + if (r) + goto error_pin; + r = amdgpu_bo_kmap(adev->fw_vram_usage.reserved_bo, + &adev->fw_vram_usage.va); + if (r) + goto error_kmap; + + amdgpu_bo_unreserve(adev->fw_vram_usage.reserved_bo); + } + return r; + +error_kmap: + amdgpu_bo_unpin(adev->fw_vram_usage.reserved_bo); +error_pin: + amdgpu_bo_unreserve(adev->fw_vram_usage.reserved_bo); +error_reserve: + amdgpu_bo_unref(&adev->fw_vram_usage.reserved_bo); +error_create: + adev->fw_vram_usage.va = NULL; + adev->fw_vram_usage.reserved_bo = NULL; + return r; +} + int amdgpu_ttm_init(struct amdgpu_device *adev) { uint64_t gtt_size; @@ -1334,7 +1407,7 @@ int amdgpu_ttm_init(struct amdgpu_device *adev) *The reserved vram for firmware must be pinned to the specified *place on the VRAM, so reserve it early. */ - r = amdgpu_fw_reserve_vram_init(adev); + r = amdgpu_ttm_fw_reserve_vram_init(adev); if (r) { return r; } @@ -1348,9 +1421,14 @@ int amdgpu_ttm_init(struct amdgpu_device *adev) DRM_INFO("amdgpu: %uM of VRAM memory ready\n", (unsigned) (adev->mc.real_vram_size / (1024 * 1024))); - if (amdgpu_gtt_size == -1) - gtt_size = max((AMDGPU_DEFAULT_GTT_SIZE_MB << 20), - adev->mc.mc_vram_size); + if (amdgpu_gtt_size == -1) { + struct sysinfo si; + + si_meminfo(&si); + gtt_size = min(max((AMDGPU_DEFAULT_GTT_SIZE_MB << 20), + adev->mc.mc_vram_size), + ((uint64_t)si.totalram * si.mem_unit * 3/4)); + } else gtt_size = (uint64_t)amdgpu_gtt_size << 20; r = ttm_bo_init_mm(&adev->mman.bdev, TTM_PL_TT, gtt_size >> PAGE_SHIFT); @@ -1410,19 +1488,13 @@ int amdgpu_ttm_init(struct amdgpu_device *adev) void amdgpu_ttm_fini(struct amdgpu_device *adev) { - int r; - if (!adev->mman.initialized) return; + amdgpu_ttm_debugfs_fini(adev); - if (adev->stolen_vga_memory) { - r = amdgpu_bo_reserve(adev->stolen_vga_memory, true); - if (r == 0) { - amdgpu_bo_unpin(adev->stolen_vga_memory); - amdgpu_bo_unreserve(adev->stolen_vga_memory); - } - amdgpu_bo_unref(&adev->stolen_vga_memory); - } + amdgpu_bo_free_kernel(&adev->stolen_vga_memory, NULL, NULL); + amdgpu_ttm_fw_reserve_vram_fini(adev); + ttm_bo_clean_mm(&adev->mman.bdev, TTM_PL_VRAM); ttm_bo_clean_mm(&adev->mman.bdev, TTM_PL_TT); if (adev->gds.mem.total_size) @@ -1432,7 +1504,6 @@ void amdgpu_ttm_fini(struct amdgpu_device *adev) if (adev->gds.oa.total_size) ttm_bo_clean_mm(&adev->mman.bdev, AMDGPU_PL_OA); ttm_bo_device_release(&adev->mman.bdev); - amdgpu_gart_fini(adev); amdgpu_ttm_global_fini(adev); adev->mman.initialized = false; DRM_INFO("amdgpu: ttm finalized\n"); @@ -1628,7 +1699,7 @@ int amdgpu_fill_buffer(struct amdgpu_bo *bo, } if (bo->tbo.mem.mem_type == TTM_PL_TT) { - r = amdgpu_ttm_bind(&bo->tbo, &bo->tbo.mem); + r = amdgpu_ttm_alloc_gart(&bo->tbo); if (r) return r; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h index abd4084982a3..167856f6080f 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h @@ -25,7 +25,7 @@ #define __AMDGPU_TTM_H__ #include "amdgpu.h" -#include "gpu_scheduler.h" +#include <drm/gpu_scheduler.h> #define AMDGPU_PL_GDS (TTM_PL_PRIV + 0) #define AMDGPU_PL_GWS (TTM_PL_PRIV + 1) @@ -55,7 +55,7 @@ struct amdgpu_mman { struct mutex gtt_window_lock; /* Scheduler entity for buffer moves */ - struct amd_sched_entity entity; + struct drm_sched_entity entity; }; struct amdgpu_copy_mem { @@ -67,8 +67,9 @@ struct amdgpu_copy_mem { extern const struct ttm_mem_type_manager_func amdgpu_gtt_mgr_func; extern const struct ttm_mem_type_manager_func amdgpu_vram_mgr_func; -bool amdgpu_gtt_mgr_is_allocated(struct ttm_mem_reg *mem); +bool amdgpu_gtt_mgr_has_gart_addr(struct ttm_mem_reg *mem); uint64_t amdgpu_gtt_mgr_usage(struct ttm_mem_type_manager *man); +int amdgpu_gtt_mgr_recover(struct ttm_mem_type_manager *man); uint64_t amdgpu_vram_mgr_usage(struct ttm_mem_type_manager *man); uint64_t amdgpu_vram_mgr_vis_usage(struct ttm_mem_type_manager *man); @@ -90,9 +91,8 @@ int amdgpu_fill_buffer(struct amdgpu_bo *bo, struct dma_fence **fence); int amdgpu_mmap(struct file *filp, struct vm_area_struct *vma); -bool amdgpu_ttm_is_bound(struct ttm_tt *ttm); -int amdgpu_ttm_bind(struct ttm_buffer_object *bo, struct ttm_mem_reg *bo_mem); -int amdgpu_ttm_recover_gart(struct amdgpu_device *adev); +int amdgpu_ttm_alloc_gart(struct ttm_buffer_object *bo); +int amdgpu_ttm_recover_gart(struct ttm_buffer_object *tbo); int amdgpu_ttm_tt_get_user_pages(struct ttm_tt *ttm, struct page **pages); void amdgpu_ttm_tt_set_user_pages(struct ttm_tt *ttm, struct page **pages); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c index 65649026b836..474f88fbafce 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c @@ -359,7 +359,6 @@ static int amdgpu_ucode_patch_jt(struct amdgpu_firmware_info *ucode, int amdgpu_ucode_init_bo(struct amdgpu_device *adev) { - struct amdgpu_bo **bo = &adev->firmware.fw_buf; uint64_t fw_offset = 0; int i, err; struct amdgpu_firmware_info *ucode = NULL; @@ -370,36 +369,16 @@ int amdgpu_ucode_init_bo(struct amdgpu_device *adev) return 0; } - if (!amdgpu_sriov_vf(adev) || !adev->in_sriov_reset) { - err = amdgpu_bo_create(adev, adev->firmware.fw_size, PAGE_SIZE, true, + if (!adev->in_gpu_reset) { + err = amdgpu_bo_create_kernel(adev, adev->firmware.fw_size, PAGE_SIZE, amdgpu_sriov_vf(adev) ? AMDGPU_GEM_DOMAIN_VRAM : AMDGPU_GEM_DOMAIN_GTT, - AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS, - NULL, NULL, 0, bo); + &adev->firmware.fw_buf, + &adev->firmware.fw_buf_mc, + &adev->firmware.fw_buf_ptr); if (err) { - dev_err(adev->dev, "(%d) Firmware buffer allocate failed\n", err); + dev_err(adev->dev, "failed to create kernel buffer for firmware.fw_buf\n"); goto failed; } - - err = amdgpu_bo_reserve(*bo, false); - if (err) { - dev_err(adev->dev, "(%d) Firmware buffer reserve failed\n", err); - goto failed_reserve; - } - - err = amdgpu_bo_pin(*bo, amdgpu_sriov_vf(adev) ? AMDGPU_GEM_DOMAIN_VRAM : AMDGPU_GEM_DOMAIN_GTT, - &adev->firmware.fw_buf_mc); - if (err) { - dev_err(adev->dev, "(%d) Firmware buffer pin failed\n", err); - goto failed_pin; - } - - err = amdgpu_bo_kmap(*bo, &adev->firmware.fw_buf_ptr); - if (err) { - dev_err(adev->dev, "(%d) Firmware buffer kmap failed\n", err); - goto failed_kmap; - } - - amdgpu_bo_unreserve(*bo); } memset(adev->firmware.fw_buf_ptr, 0, adev->firmware.fw_size); @@ -436,12 +415,6 @@ int amdgpu_ucode_init_bo(struct amdgpu_device *adev) } return 0; -failed_kmap: - amdgpu_bo_unpin(*bo); -failed_pin: - amdgpu_bo_unreserve(*bo); -failed_reserve: - amdgpu_bo_unref(bo); failed: if (err) adev->firmware.load_type = AMDGPU_FW_LOAD_DIRECT; @@ -464,8 +437,10 @@ int amdgpu_ucode_fini_bo(struct amdgpu_device *adev) ucode->kaddr = NULL; } } - amdgpu_bo_unref(&adev->firmware.fw_buf); - adev->firmware.fw_buf = NULL; + + amdgpu_bo_free_kernel(&adev->firmware.fw_buf, + &adev->firmware.fw_buf_mc, + &adev->firmware.fw_buf_ptr); return 0; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c index e8bd50cf9785..b2eae86bf906 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c @@ -116,7 +116,7 @@ static void amdgpu_uvd_idle_work_handler(struct work_struct *work); int amdgpu_uvd_sw_init(struct amdgpu_device *adev) { struct amdgpu_ring *ring; - struct amd_sched_rq *rq; + struct drm_sched_rq *rq; unsigned long bo_size; const char *fw_name; const struct common_firmware_header *hdr; @@ -230,9 +230,9 @@ int amdgpu_uvd_sw_init(struct amdgpu_device *adev) } ring = &adev->uvd.ring; - rq = &ring->sched.sched_rq[AMD_SCHED_PRIORITY_NORMAL]; - r = amd_sched_entity_init(&ring->sched, &adev->uvd.entity, - rq, amdgpu_sched_jobs); + rq = &ring->sched.sched_rq[DRM_SCHED_PRIORITY_NORMAL]; + r = drm_sched_entity_init(&ring->sched, &adev->uvd.entity, + rq, amdgpu_sched_jobs, NULL); if (r != 0) { DRM_ERROR("Failed setting up UVD run queue.\n"); return r; @@ -244,7 +244,7 @@ int amdgpu_uvd_sw_init(struct amdgpu_device *adev) } /* from uvd v5.0 HW addressing capacity increased to 64 bits */ - if (!amdgpu_ip_block_version_cmp(adev, AMD_IP_BLOCK_TYPE_UVD, 5, 0)) + if (!amdgpu_device_ip_block_version_cmp(adev, AMD_IP_BLOCK_TYPE_UVD, 5, 0)) adev->uvd.address_64_bit = true; switch (adev->asic_type) { @@ -272,7 +272,7 @@ int amdgpu_uvd_sw_fini(struct amdgpu_device *adev) int i; kfree(adev->uvd.saved_bo); - amd_sched_entity_fini(&adev->uvd.ring.sched, &adev->uvd.entity); + drm_sched_entity_fini(&adev->uvd.ring.sched, &adev->uvd.entity); amdgpu_bo_free_kernel(&adev->uvd.vcpu_bo, &adev->uvd.gpu_addr, @@ -297,6 +297,8 @@ int amdgpu_uvd_suspend(struct amdgpu_device *adev) if (adev->uvd.vcpu_bo == NULL) return 0; + cancel_delayed_work_sync(&adev->uvd.idle_work); + for (i = 0; i < adev->uvd.max_handles; ++i) if (atomic_read(&adev->uvd.handles[i])) break; @@ -304,8 +306,6 @@ int amdgpu_uvd_suspend(struct amdgpu_device *adev) if (i == AMDGPU_MAX_UVD_HANDLES) return 0; - cancel_delayed_work_sync(&adev->uvd.idle_work); - size = amdgpu_bo_size(adev->uvd.vcpu_bo); ptr = adev->uvd.cpu_addr; @@ -346,6 +346,8 @@ int amdgpu_uvd_resume(struct amdgpu_device *adev) ptr += le32_to_cpu(hdr->ucode_size_bytes); } memset_io(ptr, 0, size); + /* to restore uvd fence seq */ + amdgpu_fence_driver_force_completion(&adev->uvd.ring); } return 0; @@ -408,6 +410,7 @@ static u64 amdgpu_uvd_get_addr_from_ctx(struct amdgpu_uvd_cs_ctx *ctx) */ static int amdgpu_uvd_cs_pass1(struct amdgpu_uvd_cs_ctx *ctx) { + struct ttm_operation_ctx tctx = { false, false }; struct amdgpu_bo_va_mapping *mapping; struct amdgpu_bo *bo; uint32_t cmd; @@ -430,7 +433,7 @@ static int amdgpu_uvd_cs_pass1(struct amdgpu_uvd_cs_ctx *ctx) } amdgpu_uvd_force_into_uvd_segment(bo); - r = ttm_bo_validate(&bo->tbo, &bo->placement, false, false); + r = ttm_bo_validate(&bo->tbo, &bo->placement, &tctx); } return r; @@ -949,6 +952,7 @@ int amdgpu_uvd_ring_parse_cs(struct amdgpu_cs_parser *parser, uint32_t ib_idx) static int amdgpu_uvd_send_msg(struct amdgpu_ring *ring, struct amdgpu_bo *bo, bool direct, struct dma_fence **fence) { + struct ttm_operation_ctx ctx = { true, false }; struct ttm_validate_buffer tv; struct ww_acquire_ctx ticket; struct list_head head; @@ -975,7 +979,7 @@ static int amdgpu_uvd_send_msg(struct amdgpu_ring *ring, struct amdgpu_bo *bo, amdgpu_uvd_force_into_uvd_segment(bo); } - r = ttm_bo_validate(&bo->tbo, &bo->placement, true, false); + r = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx); if (r) goto err; @@ -1151,10 +1155,10 @@ static void amdgpu_uvd_idle_work_handler(struct work_struct *work) } else { amdgpu_asic_set_uvd_clocks(adev, 0, 0); /* shutdown the UVD block */ - amdgpu_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_UVD, - AMD_PG_STATE_GATE); - amdgpu_set_clockgating_state(adev, AMD_IP_BLOCK_TYPE_UVD, - AMD_CG_STATE_GATE); + amdgpu_device_ip_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_UVD, + AMD_PG_STATE_GATE); + amdgpu_device_ip_set_clockgating_state(adev, AMD_IP_BLOCK_TYPE_UVD, + AMD_CG_STATE_GATE); } } else { schedule_delayed_work(&adev->uvd.idle_work, UVD_IDLE_TIMEOUT); @@ -1174,10 +1178,10 @@ void amdgpu_uvd_ring_begin_use(struct amdgpu_ring *ring) amdgpu_dpm_enable_uvd(adev, true); } else { amdgpu_asic_set_uvd_clocks(adev, 53300, 40000); - amdgpu_set_clockgating_state(adev, AMD_IP_BLOCK_TYPE_UVD, - AMD_CG_STATE_UNGATE); - amdgpu_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_UVD, - AMD_PG_STATE_UNGATE); + amdgpu_device_ip_set_clockgating_state(adev, AMD_IP_BLOCK_TYPE_UVD, + AMD_CG_STATE_UNGATE); + amdgpu_device_ip_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_UVD, + AMD_PG_STATE_UNGATE); } } } @@ -1218,7 +1222,7 @@ int amdgpu_uvd_ring_test_ib(struct amdgpu_ring *ring, long timeout) } else if (r < 0) { DRM_ERROR("amdgpu: fence wait failed (%ld).\n", r); } else { - DRM_INFO("ib test on ring %d succeeded\n", ring->idx); + DRM_DEBUG("ib test on ring %d succeeded\n", ring->idx); r = 0; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.h index 3553b92bf69a..32ea20b99e53 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.h @@ -31,6 +31,10 @@ #define AMDGPU_UVD_SESSION_SIZE (50*1024) #define AMDGPU_UVD_FIRMWARE_OFFSET 256 +#define AMDGPU_UVD_FIRMWARE_SIZE(adev) \ + (AMDGPU_GPU_PAGE_ALIGN(le32_to_cpu(((const struct common_firmware_header *)(adev)->uvd.fw->data)->ucode_size_bytes) + \ + 8) - AMDGPU_UVD_FIRMWARE_OFFSET) + struct amdgpu_uvd { struct amdgpu_bo *vcpu_bo; void *cpu_addr; @@ -47,8 +51,8 @@ struct amdgpu_uvd { struct amdgpu_irq_src irq; bool address_64_bit; bool use_ctx_buf; - struct amd_sched_entity entity; - struct amd_sched_entity entity_enc; + struct drm_sched_entity entity; + struct drm_sched_entity entity_enc; uint32_t srbm_soft_reset; unsigned num_enc_rings; }; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c index 2918de2f39ec..d274ae535530 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c @@ -85,7 +85,7 @@ static void amdgpu_vce_idle_work_handler(struct work_struct *work); int amdgpu_vce_sw_init(struct amdgpu_device *adev, unsigned long size) { struct amdgpu_ring *ring; - struct amd_sched_rq *rq; + struct drm_sched_rq *rq; const char *fw_name; const struct common_firmware_header *hdr; unsigned ucode_version, version_major, version_minor, binary_id; @@ -174,9 +174,9 @@ int amdgpu_vce_sw_init(struct amdgpu_device *adev, unsigned long size) } ring = &adev->vce.ring[0]; - rq = &ring->sched.sched_rq[AMD_SCHED_PRIORITY_NORMAL]; - r = amd_sched_entity_init(&ring->sched, &adev->vce.entity, - rq, amdgpu_sched_jobs); + rq = &ring->sched.sched_rq[DRM_SCHED_PRIORITY_NORMAL]; + r = drm_sched_entity_init(&ring->sched, &adev->vce.entity, + rq, amdgpu_sched_jobs, NULL); if (r != 0) { DRM_ERROR("Failed setting up VCE run queue.\n"); return r; @@ -207,7 +207,7 @@ int amdgpu_vce_sw_fini(struct amdgpu_device *adev) if (adev->vce.vcpu_bo == NULL) return 0; - amd_sched_entity_fini(&adev->vce.ring[0].sched, &adev->vce.entity); + drm_sched_entity_fini(&adev->vce.ring[0].sched, &adev->vce.entity); amdgpu_bo_free_kernel(&adev->vce.vcpu_bo, &adev->vce.gpu_addr, (void **)&adev->vce.cpu_addr); @@ -311,10 +311,10 @@ static void amdgpu_vce_idle_work_handler(struct work_struct *work) amdgpu_dpm_enable_vce(adev, false); } else { amdgpu_asic_set_vce_clocks(adev, 0, 0); - amdgpu_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_VCE, - AMD_PG_STATE_GATE); - amdgpu_set_clockgating_state(adev, AMD_IP_BLOCK_TYPE_VCE, - AMD_CG_STATE_GATE); + amdgpu_device_ip_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_VCE, + AMD_PG_STATE_GATE); + amdgpu_device_ip_set_clockgating_state(adev, AMD_IP_BLOCK_TYPE_VCE, + AMD_CG_STATE_GATE); } } else { schedule_delayed_work(&adev->vce.idle_work, VCE_IDLE_TIMEOUT); @@ -343,10 +343,10 @@ void amdgpu_vce_ring_begin_use(struct amdgpu_ring *ring) amdgpu_dpm_enable_vce(adev, true); } else { amdgpu_asic_set_vce_clocks(adev, 53300, 40000); - amdgpu_set_clockgating_state(adev, AMD_IP_BLOCK_TYPE_VCE, - AMD_CG_STATE_UNGATE); - amdgpu_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_VCE, - AMD_PG_STATE_UNGATE); + amdgpu_device_ip_set_clockgating_state(adev, AMD_IP_BLOCK_TYPE_VCE, + AMD_CG_STATE_UNGATE); + amdgpu_device_ip_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_VCE, + AMD_PG_STATE_UNGATE); } } @@ -544,6 +544,55 @@ err: } /** + * amdgpu_vce_cs_validate_bo - make sure not to cross 4GB boundary + * + * @p: parser context + * @lo: address of lower dword + * @hi: address of higher dword + * @size: minimum size + * @index: bs/fb index + * + * Make sure that no BO cross a 4GB boundary. + */ +static int amdgpu_vce_validate_bo(struct amdgpu_cs_parser *p, uint32_t ib_idx, + int lo, int hi, unsigned size, int32_t index) +{ + int64_t offset = ((uint64_t)size) * ((int64_t)index); + struct ttm_operation_ctx ctx = { false, false }; + struct amdgpu_bo_va_mapping *mapping; + unsigned i, fpfn, lpfn; + struct amdgpu_bo *bo; + uint64_t addr; + int r; + + addr = ((uint64_t)amdgpu_get_ib_value(p, ib_idx, lo)) | + ((uint64_t)amdgpu_get_ib_value(p, ib_idx, hi)) << 32; + if (index >= 0) { + addr += offset; + fpfn = PAGE_ALIGN(offset) >> PAGE_SHIFT; + lpfn = 0x100000000ULL >> PAGE_SHIFT; + } else { + fpfn = 0; + lpfn = (0x100000000ULL - PAGE_ALIGN(offset)) >> PAGE_SHIFT; + } + + r = amdgpu_cs_find_mapping(p, addr, &bo, &mapping); + if (r) { + DRM_ERROR("Can't find BO for addr 0x%010Lx %d %d %d %d\n", + addr, lo, hi, size, index); + return r; + } + + for (i = 0; i < bo->placement.num_placement; ++i) { + bo->placements[i].fpfn = max(bo->placements[i].fpfn, fpfn); + bo->placements[i].lpfn = bo->placements[i].lpfn ? + min(bo->placements[i].lpfn, lpfn) : lpfn; + } + return ttm_bo_validate(&bo->tbo, &bo->placement, &ctx); +} + + +/** * amdgpu_vce_cs_reloc - command submission relocation * * @p: parser context @@ -648,12 +697,13 @@ int amdgpu_vce_ring_parse_cs(struct amdgpu_cs_parser *p, uint32_t ib_idx) uint32_t allocated = 0; uint32_t tmp, handle = 0; uint32_t *size = &tmp; - int i, r = 0, idx = 0; + unsigned idx; + int i, r = 0; p->job->vm = NULL; ib->gpu_addr = amdgpu_sa_bo_gpu_addr(ib->sa_bo); - while (idx < ib->length_dw) { + for (idx = 0; idx < ib->length_dw;) { uint32_t len = amdgpu_get_ib_value(p, ib_idx, idx); uint32_t cmd = amdgpu_get_ib_value(p, ib_idx, idx + 1); @@ -664,6 +714,54 @@ int amdgpu_vce_ring_parse_cs(struct amdgpu_cs_parser *p, uint32_t ib_idx) } switch (cmd) { + case 0x00000002: /* task info */ + fb_idx = amdgpu_get_ib_value(p, ib_idx, idx + 6); + bs_idx = amdgpu_get_ib_value(p, ib_idx, idx + 7); + break; + + case 0x03000001: /* encode */ + r = amdgpu_vce_validate_bo(p, ib_idx, idx + 10, + idx + 9, 0, 0); + if (r) + goto out; + + r = amdgpu_vce_validate_bo(p, ib_idx, idx + 12, + idx + 11, 0, 0); + if (r) + goto out; + break; + + case 0x05000001: /* context buffer */ + r = amdgpu_vce_validate_bo(p, ib_idx, idx + 3, + idx + 2, 0, 0); + if (r) + goto out; + break; + + case 0x05000004: /* video bitstream buffer */ + tmp = amdgpu_get_ib_value(p, ib_idx, idx + 4); + r = amdgpu_vce_validate_bo(p, ib_idx, idx + 3, idx + 2, + tmp, bs_idx); + if (r) + goto out; + break; + + case 0x05000005: /* feedback buffer */ + r = amdgpu_vce_validate_bo(p, ib_idx, idx + 3, idx + 2, + 4096, fb_idx); + if (r) + goto out; + break; + } + + idx += len / 4; + } + + for (idx = 0; idx < ib->length_dw;) { + uint32_t len = amdgpu_get_ib_value(p, ib_idx, idx); + uint32_t cmd = amdgpu_get_ib_value(p, ib_idx, idx + 1); + + switch (cmd) { case 0x00000001: /* session */ handle = amdgpu_get_ib_value(p, ib_idx, idx + 2); session_idx = amdgpu_vce_validate_handle(p, handle, @@ -893,7 +991,7 @@ out: * */ void amdgpu_vce_ring_emit_ib(struct amdgpu_ring *ring, struct amdgpu_ib *ib, - unsigned vm_id, bool ctx_switch) + unsigned vmid, bool ctx_switch) { amdgpu_ring_write(ring, VCE_CMD_IB); amdgpu_ring_write(ring, lower_32_bits(ib->gpu_addr)); @@ -954,7 +1052,7 @@ int amdgpu_vce_ring_test_ring(struct amdgpu_ring *ring) } if (i < timeout) { - DRM_INFO("ring test on %d succeeded in %d usecs\n", + DRM_DEBUG("ring test on %d succeeded in %d usecs\n", ring->idx, i); } else { DRM_ERROR("amdgpu: ring %d test failed\n", @@ -999,7 +1097,7 @@ int amdgpu_vce_ring_test_ib(struct amdgpu_ring *ring, long timeout) } else if (r < 0) { DRM_ERROR("amdgpu: fence wait failed (%ld).\n", r); } else { - DRM_INFO("ib test on ring %d succeeded\n", ring->idx); + DRM_DEBUG("ib test on ring %d succeeded\n", ring->idx); r = 0; } error: diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.h index 5ce54cde472d..0fd378ae92c3 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.h @@ -46,7 +46,7 @@ struct amdgpu_vce { struct amdgpu_ring ring[AMDGPU_MAX_VCE_RINGS]; struct amdgpu_irq_src irq; unsigned harvest_config; - struct amd_sched_entity entity; + struct drm_sched_entity entity; uint32_t srbm_soft_reset; unsigned num_rings; }; @@ -63,7 +63,7 @@ void amdgpu_vce_free_handles(struct amdgpu_device *adev, struct drm_file *filp); int amdgpu_vce_ring_parse_cs(struct amdgpu_cs_parser *p, uint32_t ib_idx); int amdgpu_vce_ring_parse_cs_vm(struct amdgpu_cs_parser *p, uint32_t ib_idx); void amdgpu_vce_ring_emit_ib(struct amdgpu_ring *ring, struct amdgpu_ib *ib, - unsigned vm_id, bool ctx_switch); + unsigned vmid, bool ctx_switch); void amdgpu_vce_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u64 seq, unsigned flags); int amdgpu_vce_ring_test_ring(struct amdgpu_ring *ring); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c index 041e0121590c..837962118dbc 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c @@ -35,8 +35,7 @@ #include "soc15d.h" #include "soc15_common.h" -#include "vega10/soc15ip.h" -#include "raven1/VCN/vcn_1_0_offset.h" +#include "vcn/vcn_1_0_offset.h" /* 1 second timeout */ #define VCN_IDLE_TIMEOUT msecs_to_jiffies(1000) @@ -51,7 +50,7 @@ static void amdgpu_vcn_idle_work_handler(struct work_struct *work); int amdgpu_vcn_sw_init(struct amdgpu_device *adev) { struct amdgpu_ring *ring; - struct amd_sched_rq *rq; + struct drm_sched_rq *rq; unsigned long bo_size; const char *fw_name; const struct common_firmware_header *hdr; @@ -104,18 +103,18 @@ int amdgpu_vcn_sw_init(struct amdgpu_device *adev) } ring = &adev->vcn.ring_dec; - rq = &ring->sched.sched_rq[AMD_SCHED_PRIORITY_NORMAL]; - r = amd_sched_entity_init(&ring->sched, &adev->vcn.entity_dec, - rq, amdgpu_sched_jobs); + rq = &ring->sched.sched_rq[DRM_SCHED_PRIORITY_NORMAL]; + r = drm_sched_entity_init(&ring->sched, &adev->vcn.entity_dec, + rq, amdgpu_sched_jobs, NULL); if (r != 0) { DRM_ERROR("Failed setting up VCN dec run queue.\n"); return r; } ring = &adev->vcn.ring_enc[0]; - rq = &ring->sched.sched_rq[AMD_SCHED_PRIORITY_NORMAL]; - r = amd_sched_entity_init(&ring->sched, &adev->vcn.entity_enc, - rq, amdgpu_sched_jobs); + rq = &ring->sched.sched_rq[DRM_SCHED_PRIORITY_NORMAL]; + r = drm_sched_entity_init(&ring->sched, &adev->vcn.entity_enc, + rq, amdgpu_sched_jobs, NULL); if (r != 0) { DRM_ERROR("Failed setting up VCN enc run queue.\n"); return r; @@ -130,9 +129,9 @@ int amdgpu_vcn_sw_fini(struct amdgpu_device *adev) kfree(adev->vcn.saved_bo); - amd_sched_entity_fini(&adev->vcn.ring_dec.sched, &adev->vcn.entity_dec); + drm_sched_entity_fini(&adev->vcn.ring_dec.sched, &adev->vcn.entity_dec); - amd_sched_entity_fini(&adev->vcn.ring_enc[0].sched, &adev->vcn.entity_enc); + drm_sched_entity_fini(&adev->vcn.ring_enc[0].sched, &adev->vcn.entity_enc); amdgpu_bo_free_kernel(&adev->vcn.vcpu_bo, &adev->vcn.gpu_addr, @@ -261,7 +260,7 @@ int amdgpu_vcn_dec_ring_test_ring(struct amdgpu_ring *ring) } if (i < adev->usec_timeout) { - DRM_INFO("ring test on %d succeeded in %d usecs\n", + DRM_DEBUG("ring test on %d succeeded in %d usecs\n", ring->idx, i); } else { DRM_ERROR("amdgpu: ring %d test failed (0x%08X)\n", @@ -274,6 +273,7 @@ int amdgpu_vcn_dec_ring_test_ring(struct amdgpu_ring *ring) static int amdgpu_vcn_dec_send_msg(struct amdgpu_ring *ring, struct amdgpu_bo *bo, bool direct, struct dma_fence **fence) { + struct ttm_operation_ctx ctx = { true, false }; struct ttm_validate_buffer tv; struct ww_acquire_ctx ticket; struct list_head head; @@ -294,7 +294,7 @@ static int amdgpu_vcn_dec_send_msg(struct amdgpu_ring *ring, struct amdgpu_bo *b if (r) return r; - r = ttm_bo_validate(&bo->tbo, &bo->placement, true, false); + r = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx); if (r) goto err; @@ -467,7 +467,7 @@ int amdgpu_vcn_dec_ring_test_ib(struct amdgpu_ring *ring, long timeout) } else if (r < 0) { DRM_ERROR("amdgpu: fence wait failed (%ld).\n", r); } else { - DRM_INFO("ib test on ring %d succeeded\n", ring->idx); + DRM_DEBUG("ib test on ring %d succeeded\n", ring->idx); r = 0; } @@ -500,7 +500,7 @@ int amdgpu_vcn_enc_ring_test_ring(struct amdgpu_ring *ring) } if (i < adev->usec_timeout) { - DRM_INFO("ring test on %d succeeded in %d usecs\n", + DRM_DEBUG("ring test on %d succeeded in %d usecs\n", ring->idx, i); } else { DRM_ERROR("amdgpu: ring %d test failed\n", @@ -643,7 +643,7 @@ int amdgpu_vcn_enc_ring_test_ib(struct amdgpu_ring *ring, long timeout) } else if (r < 0) { DRM_ERROR("amdgpu: fence wait failed (%ld).\n", r); } else { - DRM_INFO("ib test on ring %d succeeded\n", ring->idx); + DRM_DEBUG("ib test on ring %d succeeded\n", ring->idx); r = 0; } error: diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h index d50ba0657854..2fd7db891689 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h @@ -56,8 +56,8 @@ struct amdgpu_vcn { struct amdgpu_ring ring_dec; struct amdgpu_ring ring_enc[AMDGPU_VCN_MAX_ENC_RINGS]; struct amdgpu_irq_src irq; - struct amd_sched_entity entity_dec; - struct amd_sched_entity entity_enc; + struct drm_sched_entity entity_dec; + struct drm_sched_entity entity_enc; unsigned num_enc_rings; }; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c index 6738df836a70..e7dfb7b44b4b 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c @@ -24,6 +24,14 @@ #include "amdgpu.h" #define MAX_KIQ_REG_WAIT 100000000 /* in usecs */ +bool amdgpu_virt_mmio_blocked(struct amdgpu_device *adev) +{ + /* By now all MMIO pages except mailbox are blocked */ + /* if blocking is enabled in hypervisor. Choose the */ + /* SCRATCH_REG0 to test. */ + return RREG32_NO_KIQ(0xc040) == 0xffffffff; +} + int amdgpu_allocate_static_csa(struct amdgpu_device *adev) { int r; @@ -39,6 +47,12 @@ int amdgpu_allocate_static_csa(struct amdgpu_device *adev) return 0; } +void amdgpu_free_static_csa(struct amdgpu_device *adev) { + amdgpu_bo_free_kernel(&adev->virt.csa_obj, + &adev->virt.csa_vmid0_addr, + NULL); +} + /* * amdgpu_map_static_csa should be called during amdgpu_vm_init * it maps virtual address "AMDGPU_VA_RESERVED_SIZE - AMDGPU_CSA_SIZE" @@ -107,8 +121,6 @@ void amdgpu_virt_init_setting(struct amdgpu_device *adev) adev->enable_virtual_display = true; adev->cg_flags = 0; adev->pg_flags = 0; - - mutex_init(&adev->virt.lock_reset); } uint32_t amdgpu_virt_kiq_rreg(struct amdgpu_device *adev, uint32_t reg) @@ -228,6 +240,22 @@ int amdgpu_virt_reset_gpu(struct amdgpu_device *adev) } /** + * amdgpu_virt_wait_reset() - wait for reset gpu completed + * @amdgpu: amdgpu device. + * Wait for GPU reset completed. + * Return: Zero if reset success, otherwise will return error. + */ +int amdgpu_virt_wait_reset(struct amdgpu_device *adev) +{ + struct amdgpu_virt *virt = &adev->virt; + + if (!virt->ops || !virt->ops->wait_reset) + return -EINVAL; + + return virt->ops->wait_reset(adev); +} + +/** * amdgpu_virt_alloc_mm_table() - alloc memory for mm table * @amdgpu: amdgpu device. * MM table is used by UVD and VCE for its initialization @@ -296,7 +324,6 @@ int amdgpu_virt_fw_reserve_get_checksum(void *obj, void amdgpu_virt_init_data_exchange(struct amdgpu_device *adev) { - uint32_t pf2vf_ver = 0; uint32_t pf2vf_size = 0; uint32_t checksum = 0; uint32_t checkval; @@ -309,9 +336,9 @@ void amdgpu_virt_init_data_exchange(struct amdgpu_device *adev) adev->virt.fw_reserve.p_pf2vf = (struct amdgim_pf2vf_info_header *)( adev->fw_vram_usage.va + AMDGIM_DATAEXCHANGE_OFFSET); - pf2vf_ver = adev->virt.fw_reserve.p_pf2vf->version; AMDGPU_FW_VRAM_PF2VF_READ(adev, header.size, &pf2vf_size); AMDGPU_FW_VRAM_PF2VF_READ(adev, checksum, &checksum); + AMDGPU_FW_VRAM_PF2VF_READ(adev, feature_flags, &adev->virt.gim_feature); /* pf2vf message must be in 4K */ if (pf2vf_size > 0 && pf2vf_size < 4096) { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h index b89d37fc406f..6a83425aa9ed 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h @@ -55,6 +55,7 @@ struct amdgpu_virt_ops { int (*req_full_gpu)(struct amdgpu_device *adev, bool init); int (*rel_full_gpu)(struct amdgpu_device *adev, bool init); int (*reset_gpu)(struct amdgpu_device *adev); + int (*wait_reset)(struct amdgpu_device *adev); void (*trans_msg)(struct amdgpu_device *adev, u32 req, u32 data1, u32 data2, u32 data3); }; @@ -80,6 +81,8 @@ enum AMDGIM_FEATURE_FLAG { AMDGIM_FEATURE_ERROR_LOG_COLLECT = 0x1, /* GIM supports feature of loading uCodes */ AMDGIM_FEATURE_GIM_LOAD_UCODES = 0x2, + /* VRAM LOST by GIM */ + AMDGIM_FEATURE_GIM_FLR_VRAMLOST = 0x4, }; struct amdgim_pf2vf_info_header { @@ -238,7 +241,6 @@ struct amdgpu_virt { uint64_t csa_vmid0_addr; bool chained_ib_support; uint32_t reg_val_offs; - struct mutex lock_reset; struct amdgpu_irq_src ack_irq; struct amdgpu_irq_src rcv_irq; struct work_struct flr_work; @@ -246,6 +248,7 @@ struct amdgpu_virt { const struct amdgpu_virt_ops *ops; struct amdgpu_vf_error_buffer vf_errors; struct amdgpu_virt_fw_reserve fw_reserve; + uint32_t gim_feature; }; #define AMDGPU_CSA_SIZE (8 * 1024) @@ -276,16 +279,18 @@ static inline bool is_virtual_machine(void) } struct amdgpu_vm; +bool amdgpu_virt_mmio_blocked(struct amdgpu_device *adev); int amdgpu_allocate_static_csa(struct amdgpu_device *adev); int amdgpu_map_static_csa(struct amdgpu_device *adev, struct amdgpu_vm *vm, struct amdgpu_bo_va **bo_va); +void amdgpu_free_static_csa(struct amdgpu_device *adev); void amdgpu_virt_init_setting(struct amdgpu_device *adev); uint32_t amdgpu_virt_kiq_rreg(struct amdgpu_device *adev, uint32_t reg); void amdgpu_virt_kiq_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v); int amdgpu_virt_request_full_gpu(struct amdgpu_device *adev, bool init); int amdgpu_virt_release_full_gpu(struct amdgpu_device *adev, bool init); int amdgpu_virt_reset_gpu(struct amdgpu_device *adev); -int amdgpu_sriov_gpu_reset(struct amdgpu_device *adev, struct amdgpu_job *job); +int amdgpu_virt_wait_reset(struct amdgpu_device *adev); int amdgpu_virt_alloc_mm_table(struct amdgpu_device *adev); void amdgpu_virt_free_mm_table(struct amdgpu_device *adev); int amdgpu_virt_fw_reserve_get_checksum(void *obj, unsigned long obj_size, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c index c8c26f21993c..6fc16eecf2dc 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c @@ -34,52 +34,6 @@ #include "amdgpu_trace.h" /* - * PASID manager - * - * PASIDs are global address space identifiers that can be shared - * between the GPU, an IOMMU and the driver. VMs on different devices - * may use the same PASID if they share the same address - * space. Therefore PASIDs are allocated using a global IDA. VMs are - * looked up from the PASID per amdgpu_device. - */ -static DEFINE_IDA(amdgpu_vm_pasid_ida); - -/** - * amdgpu_vm_alloc_pasid - Allocate a PASID - * @bits: Maximum width of the PASID in bits, must be at least 1 - * - * Allocates a PASID of the given width while keeping smaller PASIDs - * available if possible. - * - * Returns a positive integer on success. Returns %-EINVAL if bits==0. - * Returns %-ENOSPC if no PASID was available. Returns %-ENOMEM on - * memory allocation failure. - */ -int amdgpu_vm_alloc_pasid(unsigned int bits) -{ - int pasid = -EINVAL; - - for (bits = min(bits, 31U); bits > 0; bits--) { - pasid = ida_simple_get(&amdgpu_vm_pasid_ida, - 1U << (bits - 1), 1U << bits, - GFP_KERNEL); - if (pasid != -ENOSPC) - break; - } - - return pasid; -} - -/** - * amdgpu_vm_free_pasid - Free a PASID - * @pasid: PASID to free - */ -void amdgpu_vm_free_pasid(unsigned int pasid) -{ - ida_simple_remove(&amdgpu_vm_pasid_ida, pasid); -} - -/* * GPUVM * GPUVM is similar to the legacy gart on older asics, however * rather than there being a single global gart table @@ -139,6 +93,35 @@ struct amdgpu_prt_cb { }; /** + * amdgpu_vm_level_shift - return the addr shift for each level + * + * @adev: amdgpu_device pointer + * + * Returns the number of bits the pfn needs to be right shifted for a level. + */ +static unsigned amdgpu_vm_level_shift(struct amdgpu_device *adev, + unsigned level) +{ + unsigned shift = 0xff; + + switch (level) { + case AMDGPU_VM_PDB2: + case AMDGPU_VM_PDB1: + case AMDGPU_VM_PDB0: + shift = 9 * (AMDGPU_VM_PDB0 - level) + + adev->vm_manager.block_size; + break; + case AMDGPU_VM_PTB: + shift = 0; + break; + default: + dev_err(adev->dev, "the level%d isn't supported.\n", level); + } + + return shift; +} + +/** * amdgpu_vm_num_entries - return the number of entries in a PD/PT * * @adev: amdgpu_device pointer @@ -148,17 +131,18 @@ struct amdgpu_prt_cb { static unsigned amdgpu_vm_num_entries(struct amdgpu_device *adev, unsigned level) { - if (level == 0) + unsigned shift = amdgpu_vm_level_shift(adev, + adev->vm_manager.root_level); + + if (level == adev->vm_manager.root_level) /* For the root directory */ - return adev->vm_manager.max_pfn >> - (adev->vm_manager.block_size * - adev->vm_manager.num_level); - else if (level == adev->vm_manager.num_level) + return round_up(adev->vm_manager.max_pfn, 1 << shift) >> shift; + else if (level != AMDGPU_VM_PTB) + /* Everything in between */ + return 512; + else /* For the page tables on the leaves */ return AMDGPU_VM_PTE_COUNT(adev); - else - /* Everything in between */ - return 1 << adev->vm_manager.block_size; } /** @@ -288,8 +272,7 @@ static int amdgpu_vm_alloc_levels(struct amdgpu_device *adev, uint64_t saddr, uint64_t eaddr, unsigned level) { - unsigned shift = (adev->vm_manager.num_level - level) * - adev->vm_manager.block_size; + unsigned shift = amdgpu_vm_level_shift(adev, level); unsigned pt_idx, from, to; int r; u64 flags; @@ -312,9 +295,6 @@ static int amdgpu_vm_alloc_levels(struct amdgpu_device *adev, to >= amdgpu_vm_num_entries(adev, level)) return -EINVAL; - if (to > parent->last_entry_used) - parent->last_entry_used = to; - ++level; saddr = saddr & ((1 << shift) - 1); eaddr = eaddr & ((1 << shift) - 1); @@ -329,7 +309,7 @@ static int amdgpu_vm_alloc_levels(struct amdgpu_device *adev, if (vm->pte_support_ats) { init_value = AMDGPU_PTE_DEFAULT_ATC; - if (level != adev->vm_manager.num_level - 1) + if (level != AMDGPU_VM_PTB) init_value |= AMDGPU_PDE_PTE; } @@ -369,10 +349,9 @@ static int amdgpu_vm_alloc_levels(struct amdgpu_device *adev, spin_lock(&vm->status_lock); list_add(&entry->base.vm_status, &vm->relocated); spin_unlock(&vm->status_lock); - entry->addr = 0; } - if (level < adev->vm_manager.num_level) { + if (level < AMDGPU_VM_PTB) { uint64_t sub_saddr = (pt_idx == from) ? saddr : 0; uint64_t sub_eaddr = (pt_idx == to) ? eaddr : ((1 << shift) - 1); @@ -418,287 +397,8 @@ int amdgpu_vm_alloc_pts(struct amdgpu_device *adev, saddr /= AMDGPU_GPU_PAGE_SIZE; eaddr /= AMDGPU_GPU_PAGE_SIZE; - return amdgpu_vm_alloc_levels(adev, vm, &vm->root, saddr, eaddr, 0); -} - -/** - * amdgpu_vm_had_gpu_reset - check if reset occured since last use - * - * @adev: amdgpu_device pointer - * @id: VMID structure - * - * Check if GPU reset occured since last use of the VMID. - */ -static bool amdgpu_vm_had_gpu_reset(struct amdgpu_device *adev, - struct amdgpu_vm_id *id) -{ - return id->current_gpu_reset_count != - atomic_read(&adev->gpu_reset_counter); -} - -static bool amdgpu_vm_reserved_vmid_ready(struct amdgpu_vm *vm, unsigned vmhub) -{ - return !!vm->reserved_vmid[vmhub]; -} - -/* idr_mgr->lock must be held */ -static int amdgpu_vm_grab_reserved_vmid_locked(struct amdgpu_vm *vm, - struct amdgpu_ring *ring, - struct amdgpu_sync *sync, - struct dma_fence *fence, - struct amdgpu_job *job) -{ - struct amdgpu_device *adev = ring->adev; - unsigned vmhub = ring->funcs->vmhub; - uint64_t fence_context = adev->fence_context + ring->idx; - struct amdgpu_vm_id *id = vm->reserved_vmid[vmhub]; - struct amdgpu_vm_id_manager *id_mgr = &adev->vm_manager.id_mgr[vmhub]; - struct dma_fence *updates = sync->last_vm_update; - int r = 0; - struct dma_fence *flushed, *tmp; - bool needs_flush = vm->use_cpu_for_update; - - flushed = id->flushed_updates; - if ((amdgpu_vm_had_gpu_reset(adev, id)) || - (atomic64_read(&id->owner) != vm->client_id) || - (job->vm_pd_addr != id->pd_gpu_addr) || - (updates && (!flushed || updates->context != flushed->context || - dma_fence_is_later(updates, flushed))) || - (!id->last_flush || (id->last_flush->context != fence_context && - !dma_fence_is_signaled(id->last_flush)))) { - needs_flush = true; - /* to prevent one context starved by another context */ - id->pd_gpu_addr = 0; - tmp = amdgpu_sync_peek_fence(&id->active, ring); - if (tmp) { - r = amdgpu_sync_fence(adev, sync, tmp); - return r; - } - } - - /* Good we can use this VMID. Remember this submission as - * user of the VMID. - */ - r = amdgpu_sync_fence(ring->adev, &id->active, fence); - if (r) - goto out; - - if (updates && (!flushed || updates->context != flushed->context || - dma_fence_is_later(updates, flushed))) { - dma_fence_put(id->flushed_updates); - id->flushed_updates = dma_fence_get(updates); - } - id->pd_gpu_addr = job->vm_pd_addr; - atomic64_set(&id->owner, vm->client_id); - job->vm_needs_flush = needs_flush; - if (needs_flush) { - dma_fence_put(id->last_flush); - id->last_flush = NULL; - } - job->vm_id = id - id_mgr->ids; - trace_amdgpu_vm_grab_id(vm, ring, job); -out: - return r; -} - -/** - * amdgpu_vm_grab_id - allocate the next free VMID - * - * @vm: vm to allocate id for - * @ring: ring we want to submit job to - * @sync: sync object where we add dependencies - * @fence: fence protecting ID from reuse - * - * Allocate an id for the vm, adding fences to the sync obj as necessary. - */ -int amdgpu_vm_grab_id(struct amdgpu_vm *vm, struct amdgpu_ring *ring, - struct amdgpu_sync *sync, struct dma_fence *fence, - struct amdgpu_job *job) -{ - struct amdgpu_device *adev = ring->adev; - unsigned vmhub = ring->funcs->vmhub; - struct amdgpu_vm_id_manager *id_mgr = &adev->vm_manager.id_mgr[vmhub]; - uint64_t fence_context = adev->fence_context + ring->idx; - struct dma_fence *updates = sync->last_vm_update; - struct amdgpu_vm_id *id, *idle; - struct dma_fence **fences; - unsigned i; - int r = 0; - - mutex_lock(&id_mgr->lock); - if (amdgpu_vm_reserved_vmid_ready(vm, vmhub)) { - r = amdgpu_vm_grab_reserved_vmid_locked(vm, ring, sync, fence, job); - mutex_unlock(&id_mgr->lock); - return r; - } - fences = kmalloc_array(sizeof(void *), id_mgr->num_ids, GFP_KERNEL); - if (!fences) { - mutex_unlock(&id_mgr->lock); - return -ENOMEM; - } - /* Check if we have an idle VMID */ - i = 0; - list_for_each_entry(idle, &id_mgr->ids_lru, list) { - fences[i] = amdgpu_sync_peek_fence(&idle->active, ring); - if (!fences[i]) - break; - ++i; - } - - /* If we can't find a idle VMID to use, wait till one becomes available */ - if (&idle->list == &id_mgr->ids_lru) { - u64 fence_context = adev->vm_manager.fence_context + ring->idx; - unsigned seqno = ++adev->vm_manager.seqno[ring->idx]; - struct dma_fence_array *array; - unsigned j; - - for (j = 0; j < i; ++j) - dma_fence_get(fences[j]); - - array = dma_fence_array_create(i, fences, fence_context, - seqno, true); - if (!array) { - for (j = 0; j < i; ++j) - dma_fence_put(fences[j]); - kfree(fences); - r = -ENOMEM; - goto error; - } - - - r = amdgpu_sync_fence(ring->adev, sync, &array->base); - dma_fence_put(&array->base); - if (r) - goto error; - - mutex_unlock(&id_mgr->lock); - return 0; - - } - kfree(fences); - - job->vm_needs_flush = vm->use_cpu_for_update; - /* Check if we can use a VMID already assigned to this VM */ - list_for_each_entry_reverse(id, &id_mgr->ids_lru, list) { - struct dma_fence *flushed; - bool needs_flush = vm->use_cpu_for_update; - - /* Check all the prerequisites to using this VMID */ - if (amdgpu_vm_had_gpu_reset(adev, id)) - continue; - - if (atomic64_read(&id->owner) != vm->client_id) - continue; - - if (job->vm_pd_addr != id->pd_gpu_addr) - continue; - - if (!id->last_flush || - (id->last_flush->context != fence_context && - !dma_fence_is_signaled(id->last_flush))) - needs_flush = true; - - flushed = id->flushed_updates; - if (updates && (!flushed || dma_fence_is_later(updates, flushed))) - needs_flush = true; - - /* Concurrent flushes are only possible starting with Vega10 */ - if (adev->asic_type < CHIP_VEGA10 && needs_flush) - continue; - - /* Good we can use this VMID. Remember this submission as - * user of the VMID. - */ - r = amdgpu_sync_fence(ring->adev, &id->active, fence); - if (r) - goto error; - - if (updates && (!flushed || dma_fence_is_later(updates, flushed))) { - dma_fence_put(id->flushed_updates); - id->flushed_updates = dma_fence_get(updates); - } - - if (needs_flush) - goto needs_flush; - else - goto no_flush_needed; - - }; - - /* Still no ID to use? Then use the idle one found earlier */ - id = idle; - - /* Remember this submission as user of the VMID */ - r = amdgpu_sync_fence(ring->adev, &id->active, fence); - if (r) - goto error; - - id->pd_gpu_addr = job->vm_pd_addr; - dma_fence_put(id->flushed_updates); - id->flushed_updates = dma_fence_get(updates); - atomic64_set(&id->owner, vm->client_id); - -needs_flush: - job->vm_needs_flush = true; - dma_fence_put(id->last_flush); - id->last_flush = NULL; - -no_flush_needed: - list_move_tail(&id->list, &id_mgr->ids_lru); - - job->vm_id = id - id_mgr->ids; - trace_amdgpu_vm_grab_id(vm, ring, job); - -error: - mutex_unlock(&id_mgr->lock); - return r; -} - -static void amdgpu_vm_free_reserved_vmid(struct amdgpu_device *adev, - struct amdgpu_vm *vm, - unsigned vmhub) -{ - struct amdgpu_vm_id_manager *id_mgr = &adev->vm_manager.id_mgr[vmhub]; - - mutex_lock(&id_mgr->lock); - if (vm->reserved_vmid[vmhub]) { - list_add(&vm->reserved_vmid[vmhub]->list, - &id_mgr->ids_lru); - vm->reserved_vmid[vmhub] = NULL; - atomic_dec(&id_mgr->reserved_vmid_num); - } - mutex_unlock(&id_mgr->lock); -} - -static int amdgpu_vm_alloc_reserved_vmid(struct amdgpu_device *adev, - struct amdgpu_vm *vm, - unsigned vmhub) -{ - struct amdgpu_vm_id_manager *id_mgr; - struct amdgpu_vm_id *idle; - int r = 0; - - id_mgr = &adev->vm_manager.id_mgr[vmhub]; - mutex_lock(&id_mgr->lock); - if (vm->reserved_vmid[vmhub]) - goto unlock; - if (atomic_inc_return(&id_mgr->reserved_vmid_num) > - AMDGPU_VM_MAX_RESERVED_VMID) { - DRM_ERROR("Over limitation of reserved vmid\n"); - atomic_dec(&id_mgr->reserved_vmid_num); - r = -EINVAL; - goto unlock; - } - /* Select the first entry VMID */ - idle = list_first_entry(&id_mgr->ids_lru, struct amdgpu_vm_id, list); - list_del_init(&idle->list); - vm->reserved_vmid[vmhub] = idle; - mutex_unlock(&id_mgr->lock); - - return 0; -unlock: - mutex_unlock(&id_mgr->lock); - return r; + return amdgpu_vm_alloc_levels(adev, vm, &vm->root, saddr, eaddr, + adev->vm_manager.root_level); } /** @@ -715,7 +415,7 @@ void amdgpu_vm_check_compute_bug(struct amdgpu_device *adev) has_compute_vm_bug = false; - ip_block = amdgpu_get_ip_block(adev, AMD_IP_BLOCK_TYPE_GFX); + ip_block = amdgpu_device_ip_get_ip_block(adev, AMD_IP_BLOCK_TYPE_GFX); if (ip_block) { /* Compute has a VM bug for GFX version < 7. Compute has a VM bug for GFX 8 MEC firmware version < 673.*/ @@ -741,14 +441,14 @@ bool amdgpu_vm_need_pipeline_sync(struct amdgpu_ring *ring, { struct amdgpu_device *adev = ring->adev; unsigned vmhub = ring->funcs->vmhub; - struct amdgpu_vm_id_manager *id_mgr = &adev->vm_manager.id_mgr[vmhub]; - struct amdgpu_vm_id *id; + struct amdgpu_vmid_mgr *id_mgr = &adev->vm_manager.id_mgr[vmhub]; + struct amdgpu_vmid *id; bool gds_switch_needed; bool vm_flush_needed = job->vm_needs_flush || ring->has_compute_vm_bug; - if (job->vm_id == 0) + if (job->vmid == 0) return false; - id = &id_mgr->ids[job->vm_id]; + id = &id_mgr->ids[job->vmid]; gds_switch_needed = ring->funcs->emit_gds_switch && ( id->gds_base != job->gds_base || id->gds_size != job->gds_size || @@ -757,7 +457,7 @@ bool amdgpu_vm_need_pipeline_sync(struct amdgpu_ring *ring, id->oa_base != job->oa_base || id->oa_size != job->oa_size); - if (amdgpu_vm_had_gpu_reset(adev, id)) + if (amdgpu_vmid_had_gpu_reset(adev, id)) return true; return vm_flush_needed || gds_switch_needed; @@ -772,7 +472,7 @@ static bool amdgpu_vm_is_large_bar(struct amdgpu_device *adev) * amdgpu_vm_flush - hardware flush the vm * * @ring: ring to use for flush - * @vm_id: vmid number to use + * @vmid: vmid number to use * @pd_addr: address of the page directory * * Emit a VM flush when it is necessary. @@ -781,8 +481,8 @@ int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job, bool need_ { struct amdgpu_device *adev = ring->adev; unsigned vmhub = ring->funcs->vmhub; - struct amdgpu_vm_id_manager *id_mgr = &adev->vm_manager.id_mgr[vmhub]; - struct amdgpu_vm_id *id = &id_mgr->ids[job->vm_id]; + struct amdgpu_vmid_mgr *id_mgr = &adev->vm_manager.id_mgr[vmhub]; + struct amdgpu_vmid *id = &id_mgr->ids[job->vmid]; bool gds_switch_needed = ring->funcs->emit_gds_switch && ( id->gds_base != job->gds_base || id->gds_size != job->gds_size || @@ -794,7 +494,7 @@ int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job, bool need_ unsigned patch_offset = 0; int r; - if (amdgpu_vm_had_gpu_reset(adev, id)) { + if (amdgpu_vmid_had_gpu_reset(adev, id)) { gds_switch_needed = true; vm_flush_needed = true; } @@ -811,8 +511,8 @@ int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job, bool need_ if (ring->funcs->emit_vm_flush && vm_flush_needed) { struct dma_fence *fence; - trace_amdgpu_vm_flush(ring, job->vm_id, job->vm_pd_addr); - amdgpu_ring_emit_vm_flush(ring, job->vm_id, job->vm_pd_addr); + trace_amdgpu_vm_flush(ring, job->vmid, job->vm_pd_addr); + amdgpu_ring_emit_vm_flush(ring, job->vmid, job->vm_pd_addr); r = amdgpu_fence_emit(ring, &fence); if (r) @@ -832,7 +532,7 @@ int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job, bool need_ id->gws_size = job->gws_size; id->oa_base = job->oa_base; id->oa_size = job->oa_size; - amdgpu_ring_emit_gds_switch(ring, job->vm_id, job->gds_base, + amdgpu_ring_emit_gds_switch(ring, job->vmid, job->gds_base, job->gds_size, job->gws_base, job->gws_size, job->oa_base, job->oa_size); @@ -850,49 +550,6 @@ int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job, bool need_ } /** - * amdgpu_vm_reset_id - reset VMID to zero - * - * @adev: amdgpu device structure - * @vm_id: vmid number to use - * - * Reset saved GDW, GWS and OA to force switch on next flush. - */ -void amdgpu_vm_reset_id(struct amdgpu_device *adev, unsigned vmhub, - unsigned vmid) -{ - struct amdgpu_vm_id_manager *id_mgr = &adev->vm_manager.id_mgr[vmhub]; - struct amdgpu_vm_id *id = &id_mgr->ids[vmid]; - - atomic64_set(&id->owner, 0); - id->gds_base = 0; - id->gds_size = 0; - id->gws_base = 0; - id->gws_size = 0; - id->oa_base = 0; - id->oa_size = 0; -} - -/** - * amdgpu_vm_reset_all_id - reset VMID to zero - * - * @adev: amdgpu device structure - * - * Reset VMID to force flush on next use - */ -void amdgpu_vm_reset_all_ids(struct amdgpu_device *adev) -{ - unsigned i, j; - - for (i = 0; i < AMDGPU_MAX_VMHUBS; ++i) { - struct amdgpu_vm_id_manager *id_mgr = - &adev->vm_manager.id_mgr[i]; - - for (j = 1; j < id_mgr->num_ids; ++j) - amdgpu_vm_reset_id(adev, i, j); - } -} - -/** * amdgpu_vm_bo_find - find the bo_va for a specific vm & bo * * @vm: requested vm @@ -1043,162 +700,52 @@ static int amdgpu_vm_wait_pd(struct amdgpu_device *adev, struct amdgpu_vm *vm, } /* - * amdgpu_vm_update_level - update a single level in the hierarchy + * amdgpu_vm_update_pde - update a single level in the hierarchy * - * @adev: amdgpu_device pointer + * @param: parameters for the update * @vm: requested vm * @parent: parent directory + * @entry: entry to update * - * Makes sure all entries in @parent are up to date. - * Returns 0 for success, error for failure. + * Makes sure the requested entry in parent is up to date. */ -static int amdgpu_vm_update_level(struct amdgpu_device *adev, - struct amdgpu_vm *vm, - struct amdgpu_vm_pt *parent) +static void amdgpu_vm_update_pde(struct amdgpu_pte_update_params *params, + struct amdgpu_vm *vm, + struct amdgpu_vm_pt *parent, + struct amdgpu_vm_pt *entry) { - struct amdgpu_bo *shadow; - struct amdgpu_ring *ring = NULL; + struct amdgpu_bo *bo = entry->base.bo, *shadow = NULL, *pbo; uint64_t pd_addr, shadow_addr = 0; - uint64_t last_pde = ~0, last_pt = ~0, last_shadow = ~0; - unsigned count = 0, pt_idx, ndw = 0; - struct amdgpu_job *job; - struct amdgpu_pte_update_params params; - struct dma_fence *fence = NULL; - uint32_t incr; - - int r; - - if (!parent->entries) - return 0; + uint64_t pde, pt, flags; + unsigned level; - memset(¶ms, 0, sizeof(params)); - params.adev = adev; - shadow = parent->base.bo->shadow; + /* Don't update huge pages here */ + if (entry->huge) + return; if (vm->use_cpu_for_update) { pd_addr = (unsigned long)amdgpu_bo_kptr(parent->base.bo); - r = amdgpu_vm_wait_pd(adev, vm, AMDGPU_FENCE_OWNER_VM); - if (unlikely(r)) - return r; - - params.func = amdgpu_vm_cpu_set_ptes; } else { - ring = container_of(vm->entity.sched, struct amdgpu_ring, - sched); - - /* padding, etc. */ - ndw = 64; - - /* assume the worst case */ - ndw += parent->last_entry_used * 6; - pd_addr = amdgpu_bo_gpu_offset(parent->base.bo); - - if (shadow) { + shadow = parent->base.bo->shadow; + if (shadow) shadow_addr = amdgpu_bo_gpu_offset(shadow); - ndw *= 2; - } else { - shadow_addr = 0; - } - - r = amdgpu_job_alloc_with_ib(adev, ndw * 4, &job); - if (r) - return r; - - params.ib = &job->ibs[0]; - params.func = amdgpu_vm_do_set_ptes; - } - - - /* walk over the address space and update the directory */ - for (pt_idx = 0; pt_idx <= parent->last_entry_used; ++pt_idx) { - struct amdgpu_vm_pt *entry = &parent->entries[pt_idx]; - struct amdgpu_bo *bo = entry->base.bo; - uint64_t pde, pt; - - if (bo == NULL) - continue; - - spin_lock(&vm->status_lock); - list_del_init(&entry->base.vm_status); - spin_unlock(&vm->status_lock); - - pt = amdgpu_bo_gpu_offset(bo); - pt = amdgpu_gart_get_vm_pde(adev, pt); - /* Don't update huge pages here */ - if ((parent->entries[pt_idx].addr & AMDGPU_PDE_PTE) || - parent->entries[pt_idx].addr == (pt | AMDGPU_PTE_VALID)) - continue; - - parent->entries[pt_idx].addr = pt | AMDGPU_PTE_VALID; - - pde = pd_addr + pt_idx * 8; - incr = amdgpu_bo_size(bo); - if (((last_pde + 8 * count) != pde) || - ((last_pt + incr * count) != pt) || - (count == AMDGPU_VM_MAX_UPDATE_SIZE)) { - - if (count) { - if (shadow) - params.func(¶ms, - last_shadow, - last_pt, count, - incr, - AMDGPU_PTE_VALID); - - params.func(¶ms, last_pde, - last_pt, count, incr, - AMDGPU_PTE_VALID); - } - - count = 1; - last_pde = pde; - last_shadow = shadow_addr + pt_idx * 8; - last_pt = pt; - } else { - ++count; - } - } - - if (count) { - if (vm->root.base.bo->shadow) - params.func(¶ms, last_shadow, last_pt, - count, incr, AMDGPU_PTE_VALID); - - params.func(¶ms, last_pde, last_pt, - count, incr, AMDGPU_PTE_VALID); } - if (!vm->use_cpu_for_update) { - if (params.ib->length_dw == 0) { - amdgpu_job_free(job); - } else { - amdgpu_ring_pad_ib(ring, params.ib); - amdgpu_sync_resv(adev, &job->sync, - parent->base.bo->tbo.resv, - AMDGPU_FENCE_OWNER_VM, false); - if (shadow) - amdgpu_sync_resv(adev, &job->sync, - shadow->tbo.resv, - AMDGPU_FENCE_OWNER_VM, false); - - WARN_ON(params.ib->length_dw > ndw); - r = amdgpu_job_submit(job, ring, &vm->entity, - AMDGPU_FENCE_OWNER_VM, &fence); - if (r) - goto error_free; + for (level = 0, pbo = parent->base.bo->parent; pbo; ++level) + pbo = pbo->parent; - amdgpu_bo_fence(parent->base.bo, fence, true); - dma_fence_put(vm->last_update); - vm->last_update = fence; - } + level += params->adev->vm_manager.root_level; + pt = amdgpu_bo_gpu_offset(bo); + flags = AMDGPU_PTE_VALID; + amdgpu_gart_get_vm_pde(params->adev, level, &pt, &flags); + if (shadow) { + pde = shadow_addr + (entry - parent->entries) * 8; + params->func(params, pde, pt, 1, 0, flags); } - return 0; - -error_free: - amdgpu_job_free(job); - return r; + pde = pd_addr + (entry - parent->entries) * 8; + params->func(params, pde, pt, 1, 0, flags); } /* @@ -1208,27 +755,29 @@ error_free: * * Mark all PD level as invalid after an error. */ -static void amdgpu_vm_invalidate_level(struct amdgpu_vm *vm, - struct amdgpu_vm_pt *parent) +static void amdgpu_vm_invalidate_level(struct amdgpu_device *adev, + struct amdgpu_vm *vm, + struct amdgpu_vm_pt *parent, + unsigned level) { - unsigned pt_idx; + unsigned pt_idx, num_entries; /* * Recurse into the subdirectories. This recursion is harmless because * we only have a maximum of 5 layers. */ - for (pt_idx = 0; pt_idx <= parent->last_entry_used; ++pt_idx) { + num_entries = amdgpu_vm_num_entries(adev, level); + for (pt_idx = 0; pt_idx < num_entries; ++pt_idx) { struct amdgpu_vm_pt *entry = &parent->entries[pt_idx]; if (!entry->base.bo) continue; - entry->addr = ~0ULL; spin_lock(&vm->status_lock); if (list_empty(&entry->base.vm_status)) list_add(&entry->base.vm_status, &vm->relocated); spin_unlock(&vm->status_lock); - amdgpu_vm_invalidate_level(vm, entry); + amdgpu_vm_invalidate_level(adev, vm, entry, level + 1); } } @@ -1244,38 +793,63 @@ static void amdgpu_vm_invalidate_level(struct amdgpu_vm *vm, int amdgpu_vm_update_directories(struct amdgpu_device *adev, struct amdgpu_vm *vm) { + struct amdgpu_pte_update_params params; + struct amdgpu_job *job; + unsigned ndw = 0; int r = 0; + if (list_empty(&vm->relocated)) + return 0; + +restart: + memset(¶ms, 0, sizeof(params)); + params.adev = adev; + + if (vm->use_cpu_for_update) { + r = amdgpu_vm_wait_pd(adev, vm, AMDGPU_FENCE_OWNER_VM); + if (unlikely(r)) + return r; + + params.func = amdgpu_vm_cpu_set_ptes; + } else { + ndw = 512 * 8; + r = amdgpu_job_alloc_with_ib(adev, ndw * 4, &job); + if (r) + return r; + + params.ib = &job->ibs[0]; + params.func = amdgpu_vm_do_set_ptes; + } + spin_lock(&vm->status_lock); while (!list_empty(&vm->relocated)) { - struct amdgpu_vm_bo_base *bo_base; + struct amdgpu_vm_bo_base *bo_base, *parent; + struct amdgpu_vm_pt *pt, *entry; struct amdgpu_bo *bo; bo_base = list_first_entry(&vm->relocated, struct amdgpu_vm_bo_base, vm_status); + list_del_init(&bo_base->vm_status); spin_unlock(&vm->status_lock); bo = bo_base->bo->parent; - if (bo) { - struct amdgpu_vm_bo_base *parent; - struct amdgpu_vm_pt *pt; - - parent = list_first_entry(&bo->va, - struct amdgpu_vm_bo_base, - bo_list); - pt = container_of(parent, struct amdgpu_vm_pt, base); - - r = amdgpu_vm_update_level(adev, vm, pt); - if (r) { - amdgpu_vm_invalidate_level(vm, &vm->root); - return r; - } - spin_lock(&vm->status_lock); - } else { + if (!bo) { spin_lock(&vm->status_lock); - list_del_init(&bo_base->vm_status); + continue; } + + parent = list_first_entry(&bo->va, struct amdgpu_vm_bo_base, + bo_list); + pt = container_of(parent, struct amdgpu_vm_pt, base); + entry = container_of(bo_base, struct amdgpu_vm_pt, base); + + amdgpu_vm_update_pde(¶ms, vm, pt, entry); + + spin_lock(&vm->status_lock); + if (!vm->use_cpu_for_update && + (ndw - params.ib->length_dw) < 32) + break; } spin_unlock(&vm->status_lock); @@ -1283,8 +857,44 @@ int amdgpu_vm_update_directories(struct amdgpu_device *adev, /* Flush HDP */ mb(); amdgpu_gart_flush_gpu_tlb(adev, 0); + } else if (params.ib->length_dw == 0) { + amdgpu_job_free(job); + } else { + struct amdgpu_bo *root = vm->root.base.bo; + struct amdgpu_ring *ring; + struct dma_fence *fence; + + ring = container_of(vm->entity.sched, struct amdgpu_ring, + sched); + + amdgpu_ring_pad_ib(ring, params.ib); + amdgpu_sync_resv(adev, &job->sync, root->tbo.resv, + AMDGPU_FENCE_OWNER_VM, false); + if (root->shadow) + amdgpu_sync_resv(adev, &job->sync, + root->shadow->tbo.resv, + AMDGPU_FENCE_OWNER_VM, false); + + WARN_ON(params.ib->length_dw > ndw); + r = amdgpu_job_submit(job, ring, &vm->entity, + AMDGPU_FENCE_OWNER_VM, &fence); + if (r) + goto error; + + amdgpu_bo_fence(root, fence, true); + dma_fence_put(vm->last_update); + vm->last_update = fence; } + if (!list_empty(&vm->relocated)) + goto restart; + + return 0; + +error: + amdgpu_vm_invalidate_level(adev, vm, &vm->root, + adev->vm_manager.root_level); + amdgpu_job_free(job); return r; } @@ -1302,18 +912,19 @@ void amdgpu_vm_get_entry(struct amdgpu_pte_update_params *p, uint64_t addr, struct amdgpu_vm_pt **entry, struct amdgpu_vm_pt **parent) { - unsigned idx, level = p->adev->vm_manager.num_level; + unsigned level = p->adev->vm_manager.root_level; *parent = NULL; *entry = &p->vm->root; while ((*entry)->entries) { - idx = addr >> (p->adev->vm_manager.block_size * level--); - idx %= amdgpu_bo_size((*entry)->base.bo) / 8; + unsigned shift = amdgpu_vm_level_shift(p->adev, level++); + *parent = *entry; - *entry = &(*entry)->entries[idx]; + *entry = &(*entry)->entries[addr >> shift]; + addr &= (1ULL << shift) - 1; } - if (level) + if (level != AMDGPU_VM_PTB) *entry = NULL; } @@ -1335,56 +946,42 @@ static void amdgpu_vm_handle_huge_pages(struct amdgpu_pte_update_params *p, unsigned nptes, uint64_t dst, uint64_t flags) { - bool use_cpu_update = (p->func == amdgpu_vm_cpu_set_ptes); uint64_t pd_addr, pde; /* In the case of a mixed PT the PDE must point to it*/ - if (p->adev->asic_type < CHIP_VEGA10 || - nptes != AMDGPU_VM_PTE_COUNT(p->adev) || - p->src || - !(flags & AMDGPU_PTE_VALID)) { - - dst = amdgpu_bo_gpu_offset(entry->base.bo); - dst = amdgpu_gart_get_vm_pde(p->adev, dst); - flags = AMDGPU_PTE_VALID; - } else { + if (p->adev->asic_type >= CHIP_VEGA10 && !p->src && + nptes == AMDGPU_VM_PTE_COUNT(p->adev)) { /* Set the huge page flag to stop scanning at this PDE */ flags |= AMDGPU_PDE_PTE; } - if (entry->addr == (dst | flags)) + if (!(flags & AMDGPU_PDE_PTE)) { + if (entry->huge) { + /* Add the entry to the relocated list to update it. */ + entry->huge = false; + spin_lock(&p->vm->status_lock); + list_move(&entry->base.vm_status, &p->vm->relocated); + spin_unlock(&p->vm->status_lock); + } return; + } - entry->addr = (dst | flags); - - if (use_cpu_update) { - /* In case a huge page is replaced with a system - * memory mapping, p->pages_addr != NULL and - * amdgpu_vm_cpu_set_ptes would try to translate dst - * through amdgpu_vm_map_gart. But dst is already a - * GPU address (of the page table). Disable - * amdgpu_vm_map_gart temporarily. - */ - dma_addr_t *tmp; - - tmp = p->pages_addr; - p->pages_addr = NULL; + entry->huge = true; + amdgpu_gart_get_vm_pde(p->adev, AMDGPU_VM_PDB0, + &dst, &flags); + if (p->func == amdgpu_vm_cpu_set_ptes) { pd_addr = (unsigned long)amdgpu_bo_kptr(parent->base.bo); - pde = pd_addr + (entry - parent->entries) * 8; - amdgpu_vm_cpu_set_ptes(p, pde, dst, 1, 0, flags); - - p->pages_addr = tmp; } else { if (parent->base.bo->shadow) { pd_addr = amdgpu_bo_gpu_offset(parent->base.bo->shadow); pde = pd_addr + (entry - parent->entries) * 8; - amdgpu_vm_do_set_ptes(p, pde, dst, 1, 0, flags); + p->func(p, pde, dst, 1, 0, flags); } pd_addr = amdgpu_bo_gpu_offset(parent->base.bo); - pde = pd_addr + (entry - parent->entries) * 8; - amdgpu_vm_do_set_ptes(p, pde, dst, 1, 0, flags); } + pde = pd_addr + (entry - parent->entries) * 8; + p->func(p, pde, dst, 1, 0, flags); } /** @@ -1429,7 +1026,7 @@ static int amdgpu_vm_update_ptes(struct amdgpu_pte_update_params *params, amdgpu_vm_handle_huge_pages(params, entry, parent, nptes, dst, flags); /* We don't need to update PTEs for huge pages */ - if (entry->addr & AMDGPU_PDE_PTE) + if (entry->huge) continue; pt = entry->base.bo; @@ -1588,14 +1185,14 @@ static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev, * * The second command is for the shadow pagetables. */ - ncmds = ((nptes >> min(adev->vm_manager.block_size, 11u)) + 1) * 2; + if (vm->root.base.bo->shadow) + ncmds = ((nptes >> min(adev->vm_manager.block_size, 11u)) + 1) * 2; + else + ncmds = ((nptes >> min(adev->vm_manager.block_size, 11u)) + 1); /* padding, etc. */ ndw = 64; - /* one PDE write for each huge page */ - ndw += ((nptes >> adev->vm_manager.block_size) + 1) * 6; - if (pages_addr) { /* copy commands needed */ ndw += ncmds * adev->vm_manager.vm_pte_funcs->copy_pte_num_dw; @@ -1639,7 +1236,7 @@ static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev, addr = 0; } - r = amdgpu_sync_fence(adev, &job->sync, exclusive); + r = amdgpu_sync_fence(adev, &job->sync, exclusive, false); if (r) goto error_free; @@ -1670,7 +1267,6 @@ static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev, error_free: amdgpu_job_free(job); - amdgpu_vm_invalidate_level(vm, &vm->root); return r; } @@ -2081,18 +1677,31 @@ int amdgpu_vm_handle_moved(struct amdgpu_device *adev, spin_lock(&vm->status_lock); while (!list_empty(&vm->moved)) { struct amdgpu_bo_va *bo_va; + struct reservation_object *resv; bo_va = list_first_entry(&vm->moved, struct amdgpu_bo_va, base.vm_status); spin_unlock(&vm->status_lock); + resv = bo_va->base.bo->tbo.resv; + /* Per VM BOs never need to bo cleared in the page tables */ - clear = bo_va->base.bo->tbo.resv != vm->root.base.bo->tbo.resv; + if (resv == vm->root.base.bo->tbo.resv) + clear = false; + /* Try to reserve the BO to avoid clearing its ptes */ + else if (!amdgpu_vm_debug && reservation_object_trylock(resv)) + clear = false; + /* Somebody else is using the BO right now */ + else + clear = true; r = amdgpu_vm_bo_update(adev, bo_va, clear); if (r) return r; + if (!clear && resv != vm->root.base.bo->tbo.resv) + reservation_object_unlock(resv); + spin_lock(&vm->status_lock); } spin_unlock(&vm->status_lock); @@ -2132,8 +1741,26 @@ struct amdgpu_bo_va *amdgpu_vm_bo_add(struct amdgpu_device *adev, INIT_LIST_HEAD(&bo_va->valids); INIT_LIST_HEAD(&bo_va->invalids); - if (bo) - list_add_tail(&bo_va->base.bo_list, &bo->va); + if (!bo) + return bo_va; + + list_add_tail(&bo_va->base.bo_list, &bo->va); + + if (bo->tbo.resv != vm->root.base.bo->tbo.resv) + return bo_va; + + if (bo->preferred_domains & + amdgpu_mem_type_to_domain(bo->tbo.mem.mem_type)) + return bo_va; + + /* + * We checked all the prerequisites, but it looks like this per VM BO + * is currently evicted. add the BO to the evicted list to make sure it + * is validated on next VM use to avoid fault. + * */ + spin_lock(&vm->status_lock); + list_move_tail(&bo_va->base.vm_status, &vm->evicted); + spin_unlock(&vm->status_lock); return bo_va; } @@ -2556,47 +2183,69 @@ static uint32_t amdgpu_vm_get_block_size(uint64_t vm_size) } /** - * amdgpu_vm_set_fragment_size - adjust fragment size in PTE - * - * @adev: amdgpu_device pointer - * @fragment_size_default: the default fragment size if it's set auto - */ -void amdgpu_vm_set_fragment_size(struct amdgpu_device *adev, - uint32_t fragment_size_default) -{ - if (amdgpu_vm_fragment_size == -1) - adev->vm_manager.fragment_size = fragment_size_default; - else - adev->vm_manager.fragment_size = amdgpu_vm_fragment_size; -} - -/** * amdgpu_vm_adjust_size - adjust vm size, block size and fragment size * * @adev: amdgpu_device pointer * @vm_size: the default vm size if it's set auto */ -void amdgpu_vm_adjust_size(struct amdgpu_device *adev, uint64_t vm_size, - uint32_t fragment_size_default) +void amdgpu_vm_adjust_size(struct amdgpu_device *adev, uint32_t vm_size, + uint32_t fragment_size_default, unsigned max_level, + unsigned max_bits) { - /* adjust vm size firstly */ - if (amdgpu_vm_size == -1) - adev->vm_manager.vm_size = vm_size; - else - adev->vm_manager.vm_size = amdgpu_vm_size; + uint64_t tmp; + + /* adjust vm size first */ + if (amdgpu_vm_size != -1) { + unsigned max_size = 1 << (max_bits - 30); - /* block size depends on vm size */ - if (amdgpu_vm_block_size == -1) + vm_size = amdgpu_vm_size; + if (vm_size > max_size) { + dev_warn(adev->dev, "VM size (%d) too large, max is %u GB\n", + amdgpu_vm_size, max_size); + vm_size = max_size; + } + } + + adev->vm_manager.max_pfn = (uint64_t)vm_size << 18; + + tmp = roundup_pow_of_two(adev->vm_manager.max_pfn); + if (amdgpu_vm_block_size != -1) + tmp >>= amdgpu_vm_block_size - 9; + tmp = DIV_ROUND_UP(fls64(tmp) - 1, 9) - 1; + adev->vm_manager.num_level = min(max_level, (unsigned)tmp); + switch (adev->vm_manager.num_level) { + case 3: + adev->vm_manager.root_level = AMDGPU_VM_PDB2; + break; + case 2: + adev->vm_manager.root_level = AMDGPU_VM_PDB1; + break; + case 1: + adev->vm_manager.root_level = AMDGPU_VM_PDB0; + break; + default: + dev_err(adev->dev, "VMPT only supports 2~4+1 levels\n"); + } + /* block size depends on vm size and hw setup*/ + if (amdgpu_vm_block_size != -1) adev->vm_manager.block_size = - amdgpu_vm_get_block_size(adev->vm_manager.vm_size); + min((unsigned)amdgpu_vm_block_size, max_bits + - AMDGPU_GPU_PAGE_SHIFT + - 9 * adev->vm_manager.num_level); + else if (adev->vm_manager.num_level > 1) + adev->vm_manager.block_size = 9; else - adev->vm_manager.block_size = amdgpu_vm_block_size; + adev->vm_manager.block_size = amdgpu_vm_get_block_size(tmp); - amdgpu_vm_set_fragment_size(adev, fragment_size_default); + if (amdgpu_vm_fragment_size == -1) + adev->vm_manager.fragment_size = fragment_size_default; + else + adev->vm_manager.fragment_size = amdgpu_vm_fragment_size; - DRM_INFO("vm size is %llu GB, block size is %u-bit, fragment size is %u-bit\n", - adev->vm_manager.vm_size, adev->vm_manager.block_size, - adev->vm_manager.fragment_size); + DRM_INFO("vm size is %u GB, %u levels, block size is %u-bit, fragment size is %u-bit\n", + vm_size, adev->vm_manager.num_level + 1, + adev->vm_manager.block_size, + adev->vm_manager.fragment_size); } /** @@ -2615,13 +2264,12 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm, AMDGPU_VM_PTE_COUNT(adev) * 8); unsigned ring_instance; struct amdgpu_ring *ring; - struct amd_sched_rq *rq; + struct drm_sched_rq *rq; int r, i; u64 flags; uint64_t init_pde_value = 0; vm->va = RB_ROOT_CACHED; - vm->client_id = atomic64_inc_return(&adev->vm_manager.client_counter); for (i = 0; i < AMDGPU_MAX_VMHUBS; i++) vm->reserved_vmid[i] = NULL; spin_lock_init(&vm->status_lock); @@ -2635,9 +2283,9 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm, ring_instance = atomic_inc_return(&adev->vm_manager.vm_pte_next_ring); ring_instance %= adev->vm_manager.vm_pte_num_rings; ring = adev->vm_manager.vm_pte_rings[ring_instance]; - rq = &ring->sched.sched_rq[AMD_SCHED_PRIORITY_KERNEL]; - r = amd_sched_entity_init(&ring->sched, &vm->entity, - rq, amdgpu_sched_jobs); + rq = &ring->sched.sched_rq[DRM_SCHED_PRIORITY_KERNEL]; + r = drm_sched_entity_init(&ring->sched, &vm->entity, + rq, amdgpu_sched_jobs, NULL); if (r) return r; @@ -2670,7 +2318,9 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm, flags |= (AMDGPU_GEM_CREATE_NO_CPU_ACCESS | AMDGPU_GEM_CREATE_SHADOW); - r = amdgpu_bo_create(adev, amdgpu_vm_bo_size(adev, 0), align, true, + r = amdgpu_bo_create(adev, + amdgpu_vm_bo_size(adev, adev->vm_manager.root_level), + align, true, AMDGPU_GEM_DOMAIN_VRAM, flags, NULL, NULL, init_pde_value, &vm->root.base.bo); @@ -2716,7 +2366,7 @@ error_free_root: vm->root.base.bo = NULL; error_free_sched_entity: - amd_sched_entity_fini(&ring->sched, &vm->entity); + drm_sched_entity_fini(&ring->sched, &vm->entity); return r; } @@ -2724,26 +2374,31 @@ error_free_sched_entity: /** * amdgpu_vm_free_levels - free PD/PT levels * - * @level: PD/PT starting level to free + * @adev: amdgpu device structure + * @parent: PD/PT starting level to free + * @level: level of parent structure * * Free the page directory or page table level and all sub levels. */ -static void amdgpu_vm_free_levels(struct amdgpu_vm_pt *level) +static void amdgpu_vm_free_levels(struct amdgpu_device *adev, + struct amdgpu_vm_pt *parent, + unsigned level) { - unsigned i; + unsigned i, num_entries = amdgpu_vm_num_entries(adev, level); - if (level->base.bo) { - list_del(&level->base.bo_list); - list_del(&level->base.vm_status); - amdgpu_bo_unref(&level->base.bo->shadow); - amdgpu_bo_unref(&level->base.bo); + if (parent->base.bo) { + list_del(&parent->base.bo_list); + list_del(&parent->base.vm_status); + amdgpu_bo_unref(&parent->base.bo->shadow); + amdgpu_bo_unref(&parent->base.bo); } - if (level->entries) - for (i = 0; i <= level->last_entry_used; i++) - amdgpu_vm_free_levels(&level->entries[i]); + if (parent->entries) + for (i = 0; i < num_entries; i++) + amdgpu_vm_free_levels(adev, &parent->entries[i], + level + 1); - kvfree(level->entries); + kvfree(parent->entries); } /** @@ -2775,7 +2430,7 @@ void amdgpu_vm_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm) spin_unlock_irqrestore(&adev->vm_manager.pasid_lock, flags); } - amd_sched_entity_fini(vm->entity.sched, &vm->entity); + drm_sched_entity_fini(vm->entity.sched, &vm->entity); if (!RB_EMPTY_ROOT(&vm->va.rb_root)) { dev_err(adev->dev, "still active bo inside vm\n"); @@ -2801,13 +2456,14 @@ void amdgpu_vm_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm) if (r) { dev_err(adev->dev, "Leaking page tables because BO reservation failed\n"); } else { - amdgpu_vm_free_levels(&vm->root); + amdgpu_vm_free_levels(adev, &vm->root, + adev->vm_manager.root_level); amdgpu_bo_unreserve(root); } amdgpu_bo_unref(&root); dma_fence_put(vm->last_update); for (i = 0; i < AMDGPU_MAX_VMHUBS; i++) - amdgpu_vm_free_reserved_vmid(adev, vm, i); + amdgpu_vmid_free_reserved(adev, vm, i); } /** @@ -2826,17 +2482,21 @@ bool amdgpu_vm_pasid_fault_credit(struct amdgpu_device *adev, spin_lock(&adev->vm_manager.pasid_lock); vm = idr_find(&adev->vm_manager.pasid_idr, pasid); - spin_unlock(&adev->vm_manager.pasid_lock); - if (!vm) + if (!vm) { /* VM not found, can't track fault credit */ + spin_unlock(&adev->vm_manager.pasid_lock); return true; + } /* No lock needed. only accessed by IRQ handler */ - if (!vm->fault_credit) + if (!vm->fault_credit) { /* Too many faults in this VM */ + spin_unlock(&adev->vm_manager.pasid_lock); return false; + } vm->fault_credit--; + spin_unlock(&adev->vm_manager.pasid_lock); return true; } @@ -2849,23 +2509,9 @@ bool amdgpu_vm_pasid_fault_credit(struct amdgpu_device *adev, */ void amdgpu_vm_manager_init(struct amdgpu_device *adev) { - unsigned i, j; - - for (i = 0; i < AMDGPU_MAX_VMHUBS; ++i) { - struct amdgpu_vm_id_manager *id_mgr = - &adev->vm_manager.id_mgr[i]; - - mutex_init(&id_mgr->lock); - INIT_LIST_HEAD(&id_mgr->ids_lru); - atomic_set(&id_mgr->reserved_vmid_num, 0); + unsigned i; - /* skip over VMID 0, since it is the system VM */ - for (j = 1; j < id_mgr->num_ids; ++j) { - amdgpu_vm_reset_id(adev, i, j); - amdgpu_sync_create(&id_mgr->ids[i].active); - list_add_tail(&id_mgr->ids[j].list, &id_mgr->ids_lru); - } - } + amdgpu_vmid_mgr_init(adev); adev->vm_manager.fence_context = dma_fence_context_alloc(AMDGPU_MAX_RINGS); @@ -2873,7 +2519,6 @@ void amdgpu_vm_manager_init(struct amdgpu_device *adev) adev->vm_manager.seqno[i] = 0; atomic_set(&adev->vm_manager.vm_pte_next_ring, 0); - atomic64_set(&adev->vm_manager.client_counter, 0); spin_lock_init(&adev->vm_manager.prt_lock); atomic_set(&adev->vm_manager.num_prt_users, 0); @@ -2906,24 +2551,10 @@ void amdgpu_vm_manager_init(struct amdgpu_device *adev) */ void amdgpu_vm_manager_fini(struct amdgpu_device *adev) { - unsigned i, j; - WARN_ON(!idr_is_empty(&adev->vm_manager.pasid_idr)); idr_destroy(&adev->vm_manager.pasid_idr); - for (i = 0; i < AMDGPU_MAX_VMHUBS; ++i) { - struct amdgpu_vm_id_manager *id_mgr = - &adev->vm_manager.id_mgr[i]; - - mutex_destroy(&id_mgr->lock); - for (j = 0; j < AMDGPU_NUM_VM; ++j) { - struct amdgpu_vm_id *id = &id_mgr->ids[j]; - - amdgpu_sync_free(&id->active); - dma_fence_put(id->flushed_updates); - dma_fence_put(id->last_flush); - } - } + amdgpu_vmid_mgr_fini(adev); } int amdgpu_vm_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) @@ -2936,13 +2567,12 @@ int amdgpu_vm_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) switch (args->in.op) { case AMDGPU_VM_OP_RESERVE_VMID: /* current, we only have requirement to reserve vmid from gfxhub */ - r = amdgpu_vm_alloc_reserved_vmid(adev, &fpriv->vm, - AMDGPU_GFXHUB); + r = amdgpu_vmid_alloc_reserved(adev, &fpriv->vm, AMDGPU_GFXHUB); if (r) return r; break; case AMDGPU_VM_OP_UNRESERVE_VMID: - amdgpu_vm_free_reserved_vmid(adev, &fpriv->vm, AMDGPU_GFXHUB); + amdgpu_vmid_free_reserved(adev, &fpriv->vm, AMDGPU_GFXHUB); break; default: return -EINVAL; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h index bae77353447b..21a80f1bb2b9 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h @@ -24,12 +24,14 @@ #ifndef __AMDGPU_VM_H__ #define __AMDGPU_VM_H__ -#include <linux/rbtree.h> #include <linux/idr.h> +#include <linux/kfifo.h> +#include <linux/rbtree.h> +#include <drm/gpu_scheduler.h> -#include "gpu_scheduler.h" #include "amdgpu_sync.h" #include "amdgpu_ring.h" +#include "amdgpu_ids.h" struct amdgpu_bo_va; struct amdgpu_job; @@ -39,9 +41,6 @@ struct amdgpu_bo_list_entry; * GPUVM handling */ -/* maximum number of VMIDs */ -#define AMDGPU_NUM_VM 16 - /* Maximum number of PTEs the hardware can write with one command */ #define AMDGPU_VM_MAX_UPDATE_SIZE 0x3FFFF @@ -69,6 +68,12 @@ struct amdgpu_bo_list_entry; /* PDE is handled as PTE for VEGA10 */ #define AMDGPU_PDE_PTE (1ULL << 54) +/* PTE is handled as PDE for VEGA10 (Translate Further) */ +#define AMDGPU_PTE_TF (1ULL << 56) + +/* PDE Block Fragment Size for VEGA10 */ +#define AMDGPU_PDE_BFS(a) ((uint64_t)a << 59) + /* VEGA10 only */ #define AMDGPU_PTE_MTYPE(a) ((uint64_t)a << 57) #define AMDGPU_PTE_MTYPE_MASK AMDGPU_PTE_MTYPE(3ULL) @@ -96,6 +101,19 @@ struct amdgpu_bo_list_entry; /* hardcode that limit for now */ #define AMDGPU_VA_RESERVED_SIZE (8ULL << 20) +/* VA hole for 48bit addresses on Vega10 */ +#define AMDGPU_VA_HOLE_START 0x0000800000000000ULL +#define AMDGPU_VA_HOLE_END 0xffff800000000000ULL + +/* + * Hardware is programmed as if the hole doesn't exists with start and end + * address values. + * + * This mask is used to remove the upper 16bits of the VA and so come up with + * the linear addr value. + */ +#define AMDGPU_VA_HOLE_MASK 0x0000ffffffffffffULL + /* max vmids dedicated for process */ #define AMDGPU_VM_MAX_RESERVED_VMID 1 @@ -106,6 +124,16 @@ struct amdgpu_bo_list_entry; #define AMDGPU_VM_USE_CPU_FOR_GFX (1 << 0) #define AMDGPU_VM_USE_CPU_FOR_COMPUTE (1 << 1) +/* VMPT level enumerate, and the hiberachy is: + * PDB2->PDB1->PDB0->PTB + */ +enum amdgpu_vm_level { + AMDGPU_VM_PDB2, + AMDGPU_VM_PDB1, + AMDGPU_VM_PDB0, + AMDGPU_VM_PTB +}; + /* base structure for tracking BO usage in a VM */ struct amdgpu_vm_bo_base { /* constant after initialization */ @@ -124,11 +152,10 @@ struct amdgpu_vm_bo_base { struct amdgpu_vm_pt { struct amdgpu_vm_bo_base base; - uint64_t addr; + bool huge; /* array of page tables, one for each directory entry */ struct amdgpu_vm_pt *entries; - unsigned last_entry_used; }; #define AMDGPU_VM_FAULT(pasid, addr) (((u64)(pasid) << 48) | (addr)) @@ -162,13 +189,11 @@ struct amdgpu_vm { spinlock_t freed_lock; /* Scheduler entity for page table updates */ - struct amd_sched_entity entity; + struct drm_sched_entity entity; - /* client id and PASID (TODO: replace client_id with PASID) */ - u64 client_id; unsigned int pasid; /* dedicated to vm */ - struct amdgpu_vm_id *reserved_vmid[AMDGPU_MAX_VMHUBS]; + struct amdgpu_vmid *reserved_vmid[AMDGPU_MAX_VMHUBS]; /* Flag to indicate if VM tables are updated by CPU or GPU (SDMA) */ bool use_cpu_for_update; @@ -183,37 +208,9 @@ struct amdgpu_vm { unsigned int fault_credit; }; -struct amdgpu_vm_id { - struct list_head list; - struct amdgpu_sync active; - struct dma_fence *last_flush; - atomic64_t owner; - - uint64_t pd_gpu_addr; - /* last flushed PD/PT update */ - struct dma_fence *flushed_updates; - - uint32_t current_gpu_reset_count; - - uint32_t gds_base; - uint32_t gds_size; - uint32_t gws_base; - uint32_t gws_size; - uint32_t oa_base; - uint32_t oa_size; -}; - -struct amdgpu_vm_id_manager { - struct mutex lock; - unsigned num_ids; - struct list_head ids_lru; - struct amdgpu_vm_id ids[AMDGPU_NUM_VM]; - atomic_t reserved_vmid_num; -}; - struct amdgpu_vm_manager { /* Handling of VMIDs */ - struct amdgpu_vm_id_manager id_mgr[AMDGPU_MAX_VMHUBS]; + struct amdgpu_vmid_mgr id_mgr[AMDGPU_MAX_VMHUBS]; /* Handling of VM fences */ u64 fence_context; @@ -221,9 +218,9 @@ struct amdgpu_vm_manager { uint64_t max_pfn; uint32_t num_level; - uint64_t vm_size; uint32_t block_size; uint32_t fragment_size; + enum amdgpu_vm_level root_level; /* vram base address for page table entry */ u64 vram_base_offset; /* vm pte handling */ @@ -231,8 +228,6 @@ struct amdgpu_vm_manager { struct amdgpu_ring *vm_pte_rings[AMDGPU_MAX_RINGS]; unsigned vm_pte_num_rings; atomic_t vm_pte_next_ring; - /* client id counter */ - atomic64_t client_counter; /* partial resident texture handling */ spinlock_t prt_lock; @@ -251,8 +246,6 @@ struct amdgpu_vm_manager { spinlock_t pasid_lock; }; -int amdgpu_vm_alloc_pasid(unsigned int bits); -void amdgpu_vm_free_pasid(unsigned int pasid); void amdgpu_vm_manager_init(struct amdgpu_device *adev); void amdgpu_vm_manager_fini(struct amdgpu_device *adev); int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm, @@ -270,13 +263,7 @@ int amdgpu_vm_validate_pt_bos(struct amdgpu_device *adev, struct amdgpu_vm *vm, int amdgpu_vm_alloc_pts(struct amdgpu_device *adev, struct amdgpu_vm *vm, uint64_t saddr, uint64_t size); -int amdgpu_vm_grab_id(struct amdgpu_vm *vm, struct amdgpu_ring *ring, - struct amdgpu_sync *sync, struct dma_fence *fence, - struct amdgpu_job *job); int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job, bool need_pipe_sync); -void amdgpu_vm_reset_id(struct amdgpu_device *adev, unsigned vmhub, - unsigned vmid); -void amdgpu_vm_reset_all_ids(struct amdgpu_device *adev); int amdgpu_vm_update_directories(struct amdgpu_device *adev, struct amdgpu_vm *vm); int amdgpu_vm_clear_freed(struct amdgpu_device *adev, @@ -312,10 +299,9 @@ struct amdgpu_bo_va_mapping *amdgpu_vm_bo_lookup_mapping(struct amdgpu_vm *vm, uint64_t addr); void amdgpu_vm_bo_rmv(struct amdgpu_device *adev, struct amdgpu_bo_va *bo_va); -void amdgpu_vm_set_fragment_size(struct amdgpu_device *adev, - uint32_t fragment_size_default); -void amdgpu_vm_adjust_size(struct amdgpu_device *adev, uint64_t vm_size, - uint32_t fragment_size_default); +void amdgpu_vm_adjust_size(struct amdgpu_device *adev, uint32_t vm_size, + uint32_t fragment_size_default, unsigned max_level, + unsigned max_bits); int amdgpu_vm_ioctl(struct drm_device *dev, void *data, struct drm_file *filp); bool amdgpu_vm_need_pipeline_sync(struct amdgpu_ring *ring, struct amdgpu_job *job); diff --git a/drivers/gpu/drm/amd/amdgpu/atombios_i2c.c b/drivers/gpu/drm/amd/amdgpu/atombios_i2c.c index b374653bd6cf..f9b2ce9a98f3 100644 --- a/drivers/gpu/drm/amd/amdgpu/atombios_i2c.c +++ b/drivers/gpu/drm/amd/amdgpu/atombios_i2c.c @@ -65,8 +65,15 @@ static int amdgpu_atombios_i2c_process_i2c_ch(struct amdgpu_i2c_chan *chan, args.ucRegIndex = buf[0]; if (num) num--; - if (num) - memcpy(&out, &buf[1], num); + if (num) { + if (buf) { + memcpy(&out, &buf[1], num); + } else { + DRM_ERROR("hw i2c: missing buf with num > 1\n"); + r = -EINVAL; + goto done; + } + } args.lpI2CDataOut = cpu_to_le16(out); } else { if (num > ATOM_MAX_HW_I2C_READ) { diff --git a/drivers/gpu/drm/amd/amdgpu/ci_dpm.c b/drivers/gpu/drm/amd/amdgpu/ci_dpm.c index 68b505c768ad..a0943aa8d1d3 100644 --- a/drivers/gpu/drm/amd/amdgpu/ci_dpm.c +++ b/drivers/gpu/drm/amd/amdgpu/ci_dpm.c @@ -891,12 +891,12 @@ static void ci_dpm_powergate_uvd(void *handle, bool gate) if (gate) { /* stop the UVD block */ - amdgpu_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_UVD, - AMD_PG_STATE_GATE); + amdgpu_device_ip_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_UVD, + AMD_PG_STATE_GATE); ci_update_uvd_dpm(adev, gate); } else { - amdgpu_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_UVD, - AMD_PG_STATE_UNGATE); + amdgpu_device_ip_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_UVD, + AMD_PG_STATE_UNGATE); ci_update_uvd_dpm(adev, gate); } } @@ -4540,9 +4540,9 @@ static int ci_set_mc_special_registers(struct amdgpu_device *adev, ((temp_reg & 0xffff0000)) | ((table->mc_reg_table_entry[k].mc_data[i] & 0xffff0000) >> 16); } j++; + if (j >= SMU7_DISCRETE_MC_REGISTER_ARRAY_SIZE) return -EINVAL; - temp_reg = RREG32(mmMC_PMG_CMD_MRS); table->mc_reg_address[j].s1 = mmMC_PMG_CMD_MRS; table->mc_reg_address[j].s0 = mmMC_SEQ_PMG_CMD_MRS_LP; @@ -4553,10 +4553,10 @@ static int ci_set_mc_special_registers(struct amdgpu_device *adev, table->mc_reg_table_entry[k].mc_data[j] |= 0x100; } j++; - if (j > SMU7_DISCRETE_MC_REGISTER_ARRAY_SIZE) - return -EINVAL; if (adev->mc.vram_type != AMDGPU_VRAM_TYPE_GDDR5) { + if (j >= SMU7_DISCRETE_MC_REGISTER_ARRAY_SIZE) + return -EINVAL; table->mc_reg_address[j].s1 = mmMC_PMG_AUTO_CMD; table->mc_reg_address[j].s0 = mmMC_PMG_AUTO_CMD; for (k = 0; k < table->num_entries; k++) { @@ -4564,8 +4564,6 @@ static int ci_set_mc_special_registers(struct amdgpu_device *adev, (table->mc_reg_table_entry[k].mc_data[i] & 0xffff0000) >> 16; } j++; - if (j > SMU7_DISCRETE_MC_REGISTER_ARRAY_SIZE) - return -EINVAL; } break; case mmMC_SEQ_RESERVE_M: @@ -4577,8 +4575,6 @@ static int ci_set_mc_special_registers(struct amdgpu_device *adev, (temp_reg & 0xffff0000) | (table->mc_reg_table_entry[k].mc_data[i] & 0x0000ffff); } j++; - if (j > SMU7_DISCRETE_MC_REGISTER_ARRAY_SIZE) - return -EINVAL; break; default: break; @@ -6625,9 +6621,9 @@ static int ci_dpm_print_clock_levels(void *handle, for (i = 0; i < pcie_table->count; i++) size += sprintf(buf + size, "%d: %s %s\n", i, - (pcie_table->dpm_levels[i].value == 0) ? "2.5GB, x1" : - (pcie_table->dpm_levels[i].value == 1) ? "5.0GB, x16" : - (pcie_table->dpm_levels[i].value == 2) ? "8.0GB, x16" : "", + (pcie_table->dpm_levels[i].value == 0) ? "2.5GT/s, x1" : + (pcie_table->dpm_levels[i].value == 1) ? "5.0GT/s, x16" : + (pcie_table->dpm_levels[i].value == 2) ? "8.0GT/s, x16" : "", (i == now) ? "*" : ""); break; default: diff --git a/drivers/gpu/drm/amd/amdgpu/cik.c b/drivers/gpu/drm/amd/amdgpu/cik.c index a296f7bbe57c..8e59e65efd44 100644 --- a/drivers/gpu/drm/amd/amdgpu/cik.c +++ b/drivers/gpu/drm/amd/amdgpu/cik.c @@ -755,74 +755,74 @@ static void cik_init_golden_registers(struct amdgpu_device *adev) switch (adev->asic_type) { case CHIP_BONAIRE: - amdgpu_program_register_sequence(adev, - bonaire_mgcg_cgcg_init, - (const u32)ARRAY_SIZE(bonaire_mgcg_cgcg_init)); - amdgpu_program_register_sequence(adev, - bonaire_golden_registers, - (const u32)ARRAY_SIZE(bonaire_golden_registers)); - amdgpu_program_register_sequence(adev, - bonaire_golden_common_registers, - (const u32)ARRAY_SIZE(bonaire_golden_common_registers)); - amdgpu_program_register_sequence(adev, - bonaire_golden_spm_registers, - (const u32)ARRAY_SIZE(bonaire_golden_spm_registers)); + amdgpu_device_program_register_sequence(adev, + bonaire_mgcg_cgcg_init, + ARRAY_SIZE(bonaire_mgcg_cgcg_init)); + amdgpu_device_program_register_sequence(adev, + bonaire_golden_registers, + ARRAY_SIZE(bonaire_golden_registers)); + amdgpu_device_program_register_sequence(adev, + bonaire_golden_common_registers, + ARRAY_SIZE(bonaire_golden_common_registers)); + amdgpu_device_program_register_sequence(adev, + bonaire_golden_spm_registers, + ARRAY_SIZE(bonaire_golden_spm_registers)); break; case CHIP_KABINI: - amdgpu_program_register_sequence(adev, - kalindi_mgcg_cgcg_init, - (const u32)ARRAY_SIZE(kalindi_mgcg_cgcg_init)); - amdgpu_program_register_sequence(adev, - kalindi_golden_registers, - (const u32)ARRAY_SIZE(kalindi_golden_registers)); - amdgpu_program_register_sequence(adev, - kalindi_golden_common_registers, - (const u32)ARRAY_SIZE(kalindi_golden_common_registers)); - amdgpu_program_register_sequence(adev, - kalindi_golden_spm_registers, - (const u32)ARRAY_SIZE(kalindi_golden_spm_registers)); + amdgpu_device_program_register_sequence(adev, + kalindi_mgcg_cgcg_init, + ARRAY_SIZE(kalindi_mgcg_cgcg_init)); + amdgpu_device_program_register_sequence(adev, + kalindi_golden_registers, + ARRAY_SIZE(kalindi_golden_registers)); + amdgpu_device_program_register_sequence(adev, + kalindi_golden_common_registers, + ARRAY_SIZE(kalindi_golden_common_registers)); + amdgpu_device_program_register_sequence(adev, + kalindi_golden_spm_registers, + ARRAY_SIZE(kalindi_golden_spm_registers)); break; case CHIP_MULLINS: - amdgpu_program_register_sequence(adev, - kalindi_mgcg_cgcg_init, - (const u32)ARRAY_SIZE(kalindi_mgcg_cgcg_init)); - amdgpu_program_register_sequence(adev, - godavari_golden_registers, - (const u32)ARRAY_SIZE(godavari_golden_registers)); - amdgpu_program_register_sequence(adev, - kalindi_golden_common_registers, - (const u32)ARRAY_SIZE(kalindi_golden_common_registers)); - amdgpu_program_register_sequence(adev, - kalindi_golden_spm_registers, - (const u32)ARRAY_SIZE(kalindi_golden_spm_registers)); + amdgpu_device_program_register_sequence(adev, + kalindi_mgcg_cgcg_init, + ARRAY_SIZE(kalindi_mgcg_cgcg_init)); + amdgpu_device_program_register_sequence(adev, + godavari_golden_registers, + ARRAY_SIZE(godavari_golden_registers)); + amdgpu_device_program_register_sequence(adev, + kalindi_golden_common_registers, + ARRAY_SIZE(kalindi_golden_common_registers)); + amdgpu_device_program_register_sequence(adev, + kalindi_golden_spm_registers, + ARRAY_SIZE(kalindi_golden_spm_registers)); break; case CHIP_KAVERI: - amdgpu_program_register_sequence(adev, - spectre_mgcg_cgcg_init, - (const u32)ARRAY_SIZE(spectre_mgcg_cgcg_init)); - amdgpu_program_register_sequence(adev, - spectre_golden_registers, - (const u32)ARRAY_SIZE(spectre_golden_registers)); - amdgpu_program_register_sequence(adev, - spectre_golden_common_registers, - (const u32)ARRAY_SIZE(spectre_golden_common_registers)); - amdgpu_program_register_sequence(adev, - spectre_golden_spm_registers, - (const u32)ARRAY_SIZE(spectre_golden_spm_registers)); + amdgpu_device_program_register_sequence(adev, + spectre_mgcg_cgcg_init, + ARRAY_SIZE(spectre_mgcg_cgcg_init)); + amdgpu_device_program_register_sequence(adev, + spectre_golden_registers, + ARRAY_SIZE(spectre_golden_registers)); + amdgpu_device_program_register_sequence(adev, + spectre_golden_common_registers, + ARRAY_SIZE(spectre_golden_common_registers)); + amdgpu_device_program_register_sequence(adev, + spectre_golden_spm_registers, + ARRAY_SIZE(spectre_golden_spm_registers)); break; case CHIP_HAWAII: - amdgpu_program_register_sequence(adev, - hawaii_mgcg_cgcg_init, - (const u32)ARRAY_SIZE(hawaii_mgcg_cgcg_init)); - amdgpu_program_register_sequence(adev, - hawaii_golden_registers, - (const u32)ARRAY_SIZE(hawaii_golden_registers)); - amdgpu_program_register_sequence(adev, - hawaii_golden_common_registers, - (const u32)ARRAY_SIZE(hawaii_golden_common_registers)); - amdgpu_program_register_sequence(adev, - hawaii_golden_spm_registers, - (const u32)ARRAY_SIZE(hawaii_golden_spm_registers)); + amdgpu_device_program_register_sequence(adev, + hawaii_mgcg_cgcg_init, + ARRAY_SIZE(hawaii_mgcg_cgcg_init)); + amdgpu_device_program_register_sequence(adev, + hawaii_golden_registers, + ARRAY_SIZE(hawaii_golden_registers)); + amdgpu_device_program_register_sequence(adev, + hawaii_golden_common_registers, + ARRAY_SIZE(hawaii_golden_common_registers)); + amdgpu_device_program_register_sequence(adev, + hawaii_golden_spm_registers, + ARRAY_SIZE(hawaii_golden_spm_registers)); break; default: break; @@ -1246,7 +1246,7 @@ static int cik_gpu_pci_config_reset(struct amdgpu_device *adev) /* disable BM */ pci_clear_master(adev->pdev); /* reset */ - amdgpu_pci_config_reset(adev); + amdgpu_device_pci_config_reset(adev); udelay(100); @@ -1866,7 +1866,7 @@ static int cik_common_early_init(void *handle) adev->firmware.load_type = amdgpu_ucode_get_load_type(adev, amdgpu_fw_load_type); - amdgpu_get_pcie_info(adev); + amdgpu_device_get_pcie_info(adev); return 0; } @@ -1974,77 +1974,77 @@ int cik_set_ip_blocks(struct amdgpu_device *adev) switch (adev->asic_type) { case CHIP_BONAIRE: - amdgpu_ip_block_add(adev, &cik_common_ip_block); - amdgpu_ip_block_add(adev, &gmc_v7_0_ip_block); - amdgpu_ip_block_add(adev, &cik_ih_ip_block); - amdgpu_ip_block_add(adev, &amdgpu_pp_ip_block); + amdgpu_device_ip_block_add(adev, &cik_common_ip_block); + amdgpu_device_ip_block_add(adev, &gmc_v7_0_ip_block); + amdgpu_device_ip_block_add(adev, &cik_ih_ip_block); + amdgpu_device_ip_block_add(adev, &amdgpu_pp_ip_block); if (adev->enable_virtual_display) - amdgpu_ip_block_add(adev, &dce_virtual_ip_block); + amdgpu_device_ip_block_add(adev, &dce_virtual_ip_block); #if defined(CONFIG_DRM_AMD_DC) else if (amdgpu_device_has_dc_support(adev)) - amdgpu_ip_block_add(adev, &dm_ip_block); + amdgpu_device_ip_block_add(adev, &dm_ip_block); #endif else - amdgpu_ip_block_add(adev, &dce_v8_2_ip_block); - amdgpu_ip_block_add(adev, &gfx_v7_2_ip_block); - amdgpu_ip_block_add(adev, &cik_sdma_ip_block); - amdgpu_ip_block_add(adev, &uvd_v4_2_ip_block); - amdgpu_ip_block_add(adev, &vce_v2_0_ip_block); + amdgpu_device_ip_block_add(adev, &dce_v8_2_ip_block); + amdgpu_device_ip_block_add(adev, &gfx_v7_2_ip_block); + amdgpu_device_ip_block_add(adev, &cik_sdma_ip_block); + amdgpu_device_ip_block_add(adev, &uvd_v4_2_ip_block); + amdgpu_device_ip_block_add(adev, &vce_v2_0_ip_block); break; case CHIP_HAWAII: - amdgpu_ip_block_add(adev, &cik_common_ip_block); - amdgpu_ip_block_add(adev, &gmc_v7_0_ip_block); - amdgpu_ip_block_add(adev, &cik_ih_ip_block); - amdgpu_ip_block_add(adev, &amdgpu_pp_ip_block); + amdgpu_device_ip_block_add(adev, &cik_common_ip_block); + amdgpu_device_ip_block_add(adev, &gmc_v7_0_ip_block); + amdgpu_device_ip_block_add(adev, &cik_ih_ip_block); + amdgpu_device_ip_block_add(adev, &amdgpu_pp_ip_block); if (adev->enable_virtual_display) - amdgpu_ip_block_add(adev, &dce_virtual_ip_block); + amdgpu_device_ip_block_add(adev, &dce_virtual_ip_block); #if defined(CONFIG_DRM_AMD_DC) else if (amdgpu_device_has_dc_support(adev)) - amdgpu_ip_block_add(adev, &dm_ip_block); + amdgpu_device_ip_block_add(adev, &dm_ip_block); #endif else - amdgpu_ip_block_add(adev, &dce_v8_5_ip_block); - amdgpu_ip_block_add(adev, &gfx_v7_3_ip_block); - amdgpu_ip_block_add(adev, &cik_sdma_ip_block); - amdgpu_ip_block_add(adev, &uvd_v4_2_ip_block); - amdgpu_ip_block_add(adev, &vce_v2_0_ip_block); + amdgpu_device_ip_block_add(adev, &dce_v8_5_ip_block); + amdgpu_device_ip_block_add(adev, &gfx_v7_3_ip_block); + amdgpu_device_ip_block_add(adev, &cik_sdma_ip_block); + amdgpu_device_ip_block_add(adev, &uvd_v4_2_ip_block); + amdgpu_device_ip_block_add(adev, &vce_v2_0_ip_block); break; case CHIP_KAVERI: - amdgpu_ip_block_add(adev, &cik_common_ip_block); - amdgpu_ip_block_add(adev, &gmc_v7_0_ip_block); - amdgpu_ip_block_add(adev, &cik_ih_ip_block); - amdgpu_ip_block_add(adev, &amdgpu_pp_ip_block); + amdgpu_device_ip_block_add(adev, &cik_common_ip_block); + amdgpu_device_ip_block_add(adev, &gmc_v7_0_ip_block); + amdgpu_device_ip_block_add(adev, &cik_ih_ip_block); + amdgpu_device_ip_block_add(adev, &amdgpu_pp_ip_block); if (adev->enable_virtual_display) - amdgpu_ip_block_add(adev, &dce_virtual_ip_block); + amdgpu_device_ip_block_add(adev, &dce_virtual_ip_block); #if defined(CONFIG_DRM_AMD_DC) else if (amdgpu_device_has_dc_support(adev)) - amdgpu_ip_block_add(adev, &dm_ip_block); + amdgpu_device_ip_block_add(adev, &dm_ip_block); #endif else - amdgpu_ip_block_add(adev, &dce_v8_1_ip_block); - amdgpu_ip_block_add(adev, &gfx_v7_1_ip_block); - amdgpu_ip_block_add(adev, &cik_sdma_ip_block); - amdgpu_ip_block_add(adev, &uvd_v4_2_ip_block); - amdgpu_ip_block_add(adev, &vce_v2_0_ip_block); + amdgpu_device_ip_block_add(adev, &dce_v8_1_ip_block); + amdgpu_device_ip_block_add(adev, &gfx_v7_1_ip_block); + amdgpu_device_ip_block_add(adev, &cik_sdma_ip_block); + amdgpu_device_ip_block_add(adev, &uvd_v4_2_ip_block); + amdgpu_device_ip_block_add(adev, &vce_v2_0_ip_block); break; case CHIP_KABINI: case CHIP_MULLINS: - amdgpu_ip_block_add(adev, &cik_common_ip_block); - amdgpu_ip_block_add(adev, &gmc_v7_0_ip_block); - amdgpu_ip_block_add(adev, &cik_ih_ip_block); - amdgpu_ip_block_add(adev, &amdgpu_pp_ip_block); + amdgpu_device_ip_block_add(adev, &cik_common_ip_block); + amdgpu_device_ip_block_add(adev, &gmc_v7_0_ip_block); + amdgpu_device_ip_block_add(adev, &cik_ih_ip_block); + amdgpu_device_ip_block_add(adev, &amdgpu_pp_ip_block); if (adev->enable_virtual_display) - amdgpu_ip_block_add(adev, &dce_virtual_ip_block); + amdgpu_device_ip_block_add(adev, &dce_virtual_ip_block); #if defined(CONFIG_DRM_AMD_DC) else if (amdgpu_device_has_dc_support(adev)) - amdgpu_ip_block_add(adev, &dm_ip_block); + amdgpu_device_ip_block_add(adev, &dm_ip_block); #endif else - amdgpu_ip_block_add(adev, &dce_v8_3_ip_block); - amdgpu_ip_block_add(adev, &gfx_v7_2_ip_block); - amdgpu_ip_block_add(adev, &cik_sdma_ip_block); - amdgpu_ip_block_add(adev, &uvd_v4_2_ip_block); - amdgpu_ip_block_add(adev, &vce_v2_0_ip_block); + amdgpu_device_ip_block_add(adev, &dce_v8_3_ip_block); + amdgpu_device_ip_block_add(adev, &gfx_v7_2_ip_block); + amdgpu_device_ip_block_add(adev, &cik_sdma_ip_block); + amdgpu_device_ip_block_add(adev, &uvd_v4_2_ip_block); + amdgpu_device_ip_block_add(adev, &vce_v2_0_ip_block); break; default: /* FIXME: not supported yet */ diff --git a/drivers/gpu/drm/amd/amdgpu/cik_ih.c b/drivers/gpu/drm/amd/amdgpu/cik_ih.c index a870b354e3f7..d5a05c19708f 100644 --- a/drivers/gpu/drm/amd/amdgpu/cik_ih.c +++ b/drivers/gpu/drm/amd/amdgpu/cik_ih.c @@ -280,7 +280,7 @@ static void cik_ih_decode_iv(struct amdgpu_device *adev, entry->src_id = dw[0] & 0xff; entry->src_data[0] = dw[1] & 0xfffffff; entry->ring_id = dw[2] & 0xff; - entry->vm_id = (dw[2] >> 8) & 0xff; + entry->vmid = (dw[2] >> 8) & 0xff; entry->pas_id = (dw[2] >> 16) & 0xffff; /* wptr/rptr are in bytes! */ diff --git a/drivers/gpu/drm/amd/amdgpu/cik_sdma.c b/drivers/gpu/drm/amd/amdgpu/cik_sdma.c index 60cecd117705..6e8278e689b1 100644 --- a/drivers/gpu/drm/amd/amdgpu/cik_sdma.c +++ b/drivers/gpu/drm/amd/amdgpu/cik_sdma.c @@ -221,9 +221,9 @@ static void cik_sdma_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count) */ static void cik_sdma_ring_emit_ib(struct amdgpu_ring *ring, struct amdgpu_ib *ib, - unsigned vm_id, bool ctx_switch) + unsigned vmid, bool ctx_switch) { - u32 extra_bits = vm_id & 0xf; + u32 extra_bits = vmid & 0xf; /* IB packet must end on a 8 DW boundary */ cik_sdma_ring_insert_nop(ring, (12 - (lower_32_bits(ring->wptr) & 7)) % 8); @@ -626,7 +626,7 @@ static int cik_sdma_ring_test_ring(struct amdgpu_ring *ring) u32 tmp; u64 gpu_addr; - r = amdgpu_wb_get(adev, &index); + r = amdgpu_device_wb_get(adev, &index); if (r) { dev_err(adev->dev, "(%d) failed to allocate wb slot\n", r); return r; @@ -639,7 +639,7 @@ static int cik_sdma_ring_test_ring(struct amdgpu_ring *ring) r = amdgpu_ring_alloc(ring, 5); if (r) { DRM_ERROR("amdgpu: dma failed to lock ring %d (%d).\n", ring->idx, r); - amdgpu_wb_free(adev, index); + amdgpu_device_wb_free(adev, index); return r; } amdgpu_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_WRITE, SDMA_WRITE_SUB_OPCODE_LINEAR, 0)); @@ -657,13 +657,13 @@ static int cik_sdma_ring_test_ring(struct amdgpu_ring *ring) } if (i < adev->usec_timeout) { - DRM_INFO("ring test on %d succeeded in %d usecs\n", ring->idx, i); + DRM_DEBUG("ring test on %d succeeded in %d usecs\n", ring->idx, i); } else { DRM_ERROR("amdgpu: ring %d test failed (0x%08X)\n", ring->idx, tmp); r = -EINVAL; } - amdgpu_wb_free(adev, index); + amdgpu_device_wb_free(adev, index); return r; } @@ -686,7 +686,7 @@ static int cik_sdma_ring_test_ib(struct amdgpu_ring *ring, long timeout) u64 gpu_addr; long r; - r = amdgpu_wb_get(adev, &index); + r = amdgpu_device_wb_get(adev, &index); if (r) { dev_err(adev->dev, "(%ld) failed to allocate wb slot\n", r); return r; @@ -724,7 +724,7 @@ static int cik_sdma_ring_test_ib(struct amdgpu_ring *ring, long timeout) } tmp = le32_to_cpu(adev->wb.wb[index]); if (tmp == 0xDEADBEEF) { - DRM_INFO("ib test on ring %d succeeded\n", ring->idx); + DRM_DEBUG("ib test on ring %d succeeded\n", ring->idx); r = 0; } else { DRM_ERROR("amdgpu: ib test failed (0x%08X)\n", tmp); @@ -735,7 +735,7 @@ err1: amdgpu_ib_free(adev, &ib, NULL); dma_fence_put(f); err0: - amdgpu_wb_free(adev, index); + amdgpu_device_wb_free(adev, index); return r; } @@ -880,23 +880,23 @@ static void cik_sdma_ring_emit_pipeline_sync(struct amdgpu_ring *ring) * using sDMA (CIK). */ static void cik_sdma_ring_emit_vm_flush(struct amdgpu_ring *ring, - unsigned vm_id, uint64_t pd_addr) + unsigned vmid, uint64_t pd_addr) { u32 extra_bits = (SDMA_POLL_REG_MEM_EXTRA_OP(0) | SDMA_POLL_REG_MEM_EXTRA_FUNC(0)); /* always */ amdgpu_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SRBM_WRITE, 0, 0xf000)); - if (vm_id < 8) { - amdgpu_ring_write(ring, (mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR + vm_id)); + if (vmid < 8) { + amdgpu_ring_write(ring, (mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR + vmid)); } else { - amdgpu_ring_write(ring, (mmVM_CONTEXT8_PAGE_TABLE_BASE_ADDR + vm_id - 8)); + amdgpu_ring_write(ring, (mmVM_CONTEXT8_PAGE_TABLE_BASE_ADDR + vmid - 8)); } amdgpu_ring_write(ring, pd_addr >> 12); /* flush TLB */ amdgpu_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SRBM_WRITE, 0, 0xf000)); amdgpu_ring_write(ring, mmVM_INVALIDATE_REQUEST); - amdgpu_ring_write(ring, 1 << vm_id); + amdgpu_ring_write(ring, 1 << vmid); amdgpu_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_POLL_REG_MEM, 0, extra_bits)); amdgpu_ring_write(ring, mmVM_INVALIDATE_REQUEST << 2); diff --git a/drivers/gpu/drm/amd/amdgpu/cikd.h b/drivers/gpu/drm/amd/amdgpu/cikd.h index 6a9e38a3d2a0..cee6e8a3ad9c 100644 --- a/drivers/gpu/drm/amd/amdgpu/cikd.h +++ b/drivers/gpu/drm/amd/amdgpu/cikd.h @@ -562,7 +562,7 @@ #define PRIVATE_BASE(x) ((x) << 0) /* scratch */ #define SHARED_BASE(x) ((x) << 16) /* LDS */ -#define KFD_CIK_SDMA_QUEUE_OFFSET 0x200 +#define KFD_CIK_SDMA_QUEUE_OFFSET (mmSDMA0_RLC1_RB_CNTL - mmSDMA0_RLC0_RB_CNTL) /* valid for both DEFAULT_MTYPE and APE1_MTYPE */ enum { diff --git a/drivers/gpu/drm/amd/amdgpu/clearstate_gfx9.h b/drivers/gpu/drm/amd/amdgpu/clearstate_gfx9.h index 003a131bad47..567a904804bc 100644 --- a/drivers/gpu/drm/amd/amdgpu/clearstate_gfx9.h +++ b/drivers/gpu/drm/amd/amdgpu/clearstate_gfx9.h @@ -48,7 +48,7 @@ static const unsigned int gfx9_SECT_CONTEXT_def_1[] = 0x00000000, // DB_STENCIL_WRITE_BASE 0x00000000, // DB_STENCIL_WRITE_BASE_HI 0x00000000, // DB_DFSM_CONTROL - 0x00000000, // DB_RENDER_FILTER + 0, // HOLE 0x00000000, // DB_Z_INFO2 0x00000000, // DB_STENCIL_INFO2 0, // HOLE @@ -259,8 +259,8 @@ static const unsigned int gfx9_SECT_CONTEXT_def_2[] = 0x00000000, // PA_SC_RIGHT_VERT_GRID 0x00000000, // PA_SC_LEFT_VERT_GRID 0x00000000, // PA_SC_HORIZ_GRID - 0x00000000, // PA_SC_FOV_WINDOW_LR - 0x00000000, // PA_SC_FOV_WINDOW_TB + 0, // HOLE + 0, // HOLE 0, // HOLE 0, // HOLE 0, // HOLE @@ -701,7 +701,7 @@ static const unsigned int gfx9_SECT_CONTEXT_def_7[] = { 0x00000000, // VGT_GS_MAX_PRIMS_PER_SUBGROUP 0x00000000, // VGT_DRAW_PAYLOAD_CNTL - 0x00000000, // VGT_INDEX_PAYLOAD_CNTL + 0, // HOLE 0x00000000, // VGT_INSTANCE_STEP_RATE_0 0x00000000, // VGT_INSTANCE_STEP_RATE_1 0, // HOLE diff --git a/drivers/gpu/drm/amd/amdgpu/cz_ih.c b/drivers/gpu/drm/amd/amdgpu/cz_ih.c index fa61d649bb44..f576e9cbbc61 100644 --- a/drivers/gpu/drm/amd/amdgpu/cz_ih.c +++ b/drivers/gpu/drm/amd/amdgpu/cz_ih.c @@ -259,7 +259,7 @@ static void cz_ih_decode_iv(struct amdgpu_device *adev, entry->src_id = dw[0] & 0xff; entry->src_data[0] = dw[1] & 0xfffffff; entry->ring_id = dw[2] & 0xff; - entry->vm_id = (dw[2] >> 8) & 0xff; + entry->vmid = (dw[2] >> 8) & 0xff; entry->pas_id = (dw[2] >> 16) & 0xffff; /* wptr/rptr are in bytes! */ diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c b/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c index 4e519dc42916..f34bc68aadfb 100644 --- a/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c +++ b/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c @@ -145,20 +145,20 @@ static void dce_v10_0_init_golden_registers(struct amdgpu_device *adev) { switch (adev->asic_type) { case CHIP_FIJI: - amdgpu_program_register_sequence(adev, - fiji_mgcg_cgcg_init, - (const u32)ARRAY_SIZE(fiji_mgcg_cgcg_init)); - amdgpu_program_register_sequence(adev, - golden_settings_fiji_a10, - (const u32)ARRAY_SIZE(golden_settings_fiji_a10)); + amdgpu_device_program_register_sequence(adev, + fiji_mgcg_cgcg_init, + ARRAY_SIZE(fiji_mgcg_cgcg_init)); + amdgpu_device_program_register_sequence(adev, + golden_settings_fiji_a10, + ARRAY_SIZE(golden_settings_fiji_a10)); break; case CHIP_TONGA: - amdgpu_program_register_sequence(adev, - tonga_mgcg_cgcg_init, - (const u32)ARRAY_SIZE(tonga_mgcg_cgcg_init)); - amdgpu_program_register_sequence(adev, - golden_settings_tonga_a11, - (const u32)ARRAY_SIZE(golden_settings_tonga_a11)); + amdgpu_device_program_register_sequence(adev, + tonga_mgcg_cgcg_init, + ARRAY_SIZE(tonga_mgcg_cgcg_init)); + amdgpu_device_program_register_sequence(adev, + golden_settings_tonga_a11, + ARRAY_SIZE(golden_settings_tonga_a11)); break; default: break; @@ -2773,7 +2773,6 @@ static int dce_v10_0_early_init(void *handle) adev->audio_endpt_wreg = &dce_v10_0_audio_endpt_wreg; dce_v10_0_set_display_funcs(adev); - dce_v10_0_set_irq_funcs(adev); adev->mode_info.num_crtc = dce_v10_0_get_num_crtc(adev); @@ -2788,6 +2787,8 @@ static int dce_v10_0_early_init(void *handle) return -EINVAL; } + dce_v10_0_set_irq_funcs(adev); + return 0; } @@ -3635,13 +3636,16 @@ static const struct amdgpu_irq_src_funcs dce_v10_0_hpd_irq_funcs = { static void dce_v10_0_set_irq_funcs(struct amdgpu_device *adev) { - adev->crtc_irq.num_types = AMDGPU_CRTC_IRQ_LAST; + if (adev->mode_info.num_crtc > 0) + adev->crtc_irq.num_types = AMDGPU_CRTC_IRQ_VLINE1 + adev->mode_info.num_crtc; + else + adev->crtc_irq.num_types = 0; adev->crtc_irq.funcs = &dce_v10_0_crtc_irq_funcs; - adev->pageflip_irq.num_types = AMDGPU_PAGEFLIP_IRQ_LAST; + adev->pageflip_irq.num_types = adev->mode_info.num_crtc; adev->pageflip_irq.funcs = &dce_v10_0_pageflip_irq_funcs; - adev->hpd_irq.num_types = AMDGPU_HPD_LAST; + adev->hpd_irq.num_types = adev->mode_info.num_hpd; adev->hpd_irq.funcs = &dce_v10_0_hpd_irq_funcs; } diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c b/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c index 11edc75edaa9..26378bd6aba4 100644 --- a/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c @@ -154,28 +154,28 @@ static void dce_v11_0_init_golden_registers(struct amdgpu_device *adev) { switch (adev->asic_type) { case CHIP_CARRIZO: - amdgpu_program_register_sequence(adev, - cz_mgcg_cgcg_init, - (const u32)ARRAY_SIZE(cz_mgcg_cgcg_init)); - amdgpu_program_register_sequence(adev, - cz_golden_settings_a11, - (const u32)ARRAY_SIZE(cz_golden_settings_a11)); + amdgpu_device_program_register_sequence(adev, + cz_mgcg_cgcg_init, + ARRAY_SIZE(cz_mgcg_cgcg_init)); + amdgpu_device_program_register_sequence(adev, + cz_golden_settings_a11, + ARRAY_SIZE(cz_golden_settings_a11)); break; case CHIP_STONEY: - amdgpu_program_register_sequence(adev, - stoney_golden_settings_a11, - (const u32)ARRAY_SIZE(stoney_golden_settings_a11)); + amdgpu_device_program_register_sequence(adev, + stoney_golden_settings_a11, + ARRAY_SIZE(stoney_golden_settings_a11)); break; case CHIP_POLARIS11: case CHIP_POLARIS12: - amdgpu_program_register_sequence(adev, - polaris11_golden_settings_a11, - (const u32)ARRAY_SIZE(polaris11_golden_settings_a11)); + amdgpu_device_program_register_sequence(adev, + polaris11_golden_settings_a11, + ARRAY_SIZE(polaris11_golden_settings_a11)); break; case CHIP_POLARIS10: - amdgpu_program_register_sequence(adev, - polaris10_golden_settings_a11, - (const u32)ARRAY_SIZE(polaris10_golden_settings_a11)); + amdgpu_device_program_register_sequence(adev, + polaris10_golden_settings_a11, + ARRAY_SIZE(polaris10_golden_settings_a11)); break; default: break; @@ -2876,7 +2876,6 @@ static int dce_v11_0_early_init(void *handle) adev->audio_endpt_wreg = &dce_v11_0_audio_endpt_wreg; dce_v11_0_set_display_funcs(adev); - dce_v11_0_set_irq_funcs(adev); adev->mode_info.num_crtc = dce_v11_0_get_num_crtc(adev); @@ -2903,6 +2902,8 @@ static int dce_v11_0_early_init(void *handle) return -EINVAL; } + dce_v11_0_set_irq_funcs(adev); + return 0; } @@ -3759,13 +3760,16 @@ static const struct amdgpu_irq_src_funcs dce_v11_0_hpd_irq_funcs = { static void dce_v11_0_set_irq_funcs(struct amdgpu_device *adev) { - adev->crtc_irq.num_types = AMDGPU_CRTC_IRQ_LAST; + if (adev->mode_info.num_crtc > 0) + adev->crtc_irq.num_types = AMDGPU_CRTC_IRQ_VLINE1 + adev->mode_info.num_crtc; + else + adev->crtc_irq.num_types = 0; adev->crtc_irq.funcs = &dce_v11_0_crtc_irq_funcs; - adev->pageflip_irq.num_types = AMDGPU_PAGEFLIP_IRQ_LAST; + adev->pageflip_irq.num_types = adev->mode_info.num_crtc; adev->pageflip_irq.funcs = &dce_v11_0_pageflip_irq_funcs; - adev->hpd_irq.num_types = AMDGPU_HPD_LAST; + adev->hpd_irq.num_types = adev->mode_info.num_hpd; adev->hpd_irq.funcs = &dce_v11_0_hpd_irq_funcs; } diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c b/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c index a51e35f824a1..bd2c4f727df6 100644 --- a/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c +++ b/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c @@ -2639,7 +2639,6 @@ static int dce_v6_0_early_init(void *handle) adev->audio_endpt_wreg = &dce_v6_0_audio_endpt_wreg; dce_v6_0_set_display_funcs(adev); - dce_v6_0_set_irq_funcs(adev); adev->mode_info.num_crtc = dce_v6_0_get_num_crtc(adev); @@ -2658,6 +2657,8 @@ static int dce_v6_0_early_init(void *handle) return -EINVAL; } + dce_v6_0_set_irq_funcs(adev); + return 0; } @@ -3441,13 +3442,16 @@ static const struct amdgpu_irq_src_funcs dce_v6_0_hpd_irq_funcs = { static void dce_v6_0_set_irq_funcs(struct amdgpu_device *adev) { - adev->crtc_irq.num_types = AMDGPU_CRTC_IRQ_LAST; + if (adev->mode_info.num_crtc > 0) + adev->crtc_irq.num_types = AMDGPU_CRTC_IRQ_VLINE1 + adev->mode_info.num_crtc; + else + adev->crtc_irq.num_types = 0; adev->crtc_irq.funcs = &dce_v6_0_crtc_irq_funcs; - adev->pageflip_irq.num_types = AMDGPU_PAGEFLIP_IRQ_LAST; + adev->pageflip_irq.num_types = adev->mode_info.num_crtc; adev->pageflip_irq.funcs = &dce_v6_0_pageflip_irq_funcs; - adev->hpd_irq.num_types = AMDGPU_HPD_LAST; + adev->hpd_irq.num_types = adev->mode_info.num_hpd; adev->hpd_irq.funcs = &dce_v6_0_hpd_irq_funcs; } diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c b/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c index 9cf14b8b2db9..c008dc030687 100644 --- a/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c +++ b/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c @@ -2664,7 +2664,6 @@ static int dce_v8_0_early_init(void *handle) adev->audio_endpt_wreg = &dce_v8_0_audio_endpt_wreg; dce_v8_0_set_display_funcs(adev); - dce_v8_0_set_irq_funcs(adev); adev->mode_info.num_crtc = dce_v8_0_get_num_crtc(adev); @@ -2688,6 +2687,8 @@ static int dce_v8_0_early_init(void *handle) return -EINVAL; } + dce_v8_0_set_irq_funcs(adev); + return 0; } @@ -3525,13 +3526,16 @@ static const struct amdgpu_irq_src_funcs dce_v8_0_hpd_irq_funcs = { static void dce_v8_0_set_irq_funcs(struct amdgpu_device *adev) { - adev->crtc_irq.num_types = AMDGPU_CRTC_IRQ_LAST; + if (adev->mode_info.num_crtc > 0) + adev->crtc_irq.num_types = AMDGPU_CRTC_IRQ_VLINE1 + adev->mode_info.num_crtc; + else + adev->crtc_irq.num_types = 0; adev->crtc_irq.funcs = &dce_v8_0_crtc_irq_funcs; - adev->pageflip_irq.num_types = AMDGPU_PAGEFLIP_IRQ_LAST; + adev->pageflip_irq.num_types = adev->mode_info.num_crtc; adev->pageflip_irq.funcs = &dce_v8_0_pageflip_irq_funcs; - adev->hpd_irq.num_types = AMDGPU_HPD_LAST; + adev->hpd_irq.num_types = adev->mode_info.num_hpd; adev->hpd_irq.funcs = &dce_v8_0_hpd_irq_funcs; } diff --git a/drivers/gpu/drm/amd/amdgpu/dce_virtual.c b/drivers/gpu/drm/amd/amdgpu/dce_virtual.c index a8829af120c1..120dd3b26fc2 100644 --- a/drivers/gpu/drm/amd/amdgpu/dce_virtual.c +++ b/drivers/gpu/drm/amd/amdgpu/dce_virtual.c @@ -44,6 +44,9 @@ static void dce_virtual_set_display_funcs(struct amdgpu_device *adev); static void dce_virtual_set_irq_funcs(struct amdgpu_device *adev); static int dce_virtual_connector_encoder_init(struct amdgpu_device *adev, int index); +static void dce_virtual_set_crtc_vblank_interrupt_state(struct amdgpu_device *adev, + int crtc, + enum amdgpu_interrupt_state state); /** * dce_virtual_vblank_wait - vblank wait asic callback. @@ -437,6 +440,8 @@ static int dce_virtual_sw_fini(void *handle) drm_kms_helper_poll_fini(adev->ddev); drm_mode_config_cleanup(adev->ddev); + /* clear crtcs pointer to avoid dce irq finish routine access freed data */ + memset(adev->mode_info.crtcs, 0, sizeof(adev->mode_info.crtcs[0]) * AMDGPU_MAX_CRTCS); adev->mode_info.mode_config_initialized = false; return 0; } @@ -489,6 +494,13 @@ static int dce_virtual_hw_init(void *handle) static int dce_virtual_hw_fini(void *handle) { + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + int i = 0; + + for (i = 0; i<adev->mode_info.num_crtc; i++) + if (adev->mode_info.crtcs[i]) + dce_virtual_set_crtc_vblank_interrupt_state(adev, i, AMDGPU_IRQ_STATE_DISABLE); + return 0; } @@ -723,7 +735,7 @@ static void dce_virtual_set_crtc_vblank_interrupt_state(struct amdgpu_device *ad int crtc, enum amdgpu_interrupt_state state) { - if (crtc >= adev->mode_info.num_crtc) { + if (crtc >= adev->mode_info.num_crtc || !adev->mode_info.crtcs[crtc]) { DRM_DEBUG("invalid crtc %d\n", crtc); return; } diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c index dbbe986f90f2..9870d83b68c1 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c @@ -1798,7 +1798,7 @@ static int gfx_v6_0_ring_test_ring(struct amdgpu_ring *ring) DRM_UDELAY(1); } if (i < adev->usec_timeout) { - DRM_INFO("ring test on %d succeeded in %d usecs\n", ring->idx, i); + DRM_DEBUG("ring test on %d succeeded in %d usecs\n", ring->idx, i); } else { DRM_ERROR("amdgpu: ring %d test failed (scratch(0x%04X)=0x%08X)\n", ring->idx, scratch, tmp); @@ -1874,7 +1874,7 @@ static void gfx_v6_0_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, static void gfx_v6_0_ring_emit_ib(struct amdgpu_ring *ring, struct amdgpu_ib *ib, - unsigned vm_id, bool ctx_switch) + unsigned vmid, bool ctx_switch) { u32 header, control = 0; @@ -1889,7 +1889,7 @@ static void gfx_v6_0_ring_emit_ib(struct amdgpu_ring *ring, else header = PACKET3(PACKET3_INDIRECT_BUFFER, 2); - control |= ib->length_dw | (vm_id << 24); + control |= ib->length_dw | (vmid << 24); amdgpu_ring_write(ring, header); amdgpu_ring_write(ring, @@ -1951,7 +1951,7 @@ static int gfx_v6_0_ring_test_ib(struct amdgpu_ring *ring, long timeout) } tmp = RREG32(scratch); if (tmp == 0xDEADBEEF) { - DRM_INFO("ib test on ring %d succeeded\n", ring->idx); + DRM_DEBUG("ib test on ring %d succeeded\n", ring->idx); r = 0; } else { DRM_ERROR("amdgpu: ib test failed (scratch(0x%04X)=0x%08X)\n", @@ -2354,7 +2354,7 @@ static void gfx_v6_0_ring_emit_pipeline_sync(struct amdgpu_ring *ring) } static void gfx_v6_0_ring_emit_vm_flush(struct amdgpu_ring *ring, - unsigned vm_id, uint64_t pd_addr) + unsigned vmid, uint64_t pd_addr) { int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX); @@ -2362,10 +2362,10 @@ static void gfx_v6_0_ring_emit_vm_flush(struct amdgpu_ring *ring, amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(1) | WRITE_DATA_DST_SEL(0))); - if (vm_id < 8) { - amdgpu_ring_write(ring, (mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR + vm_id )); + if (vmid < 8) { + amdgpu_ring_write(ring, (mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR + vmid )); } else { - amdgpu_ring_write(ring, (mmVM_CONTEXT8_PAGE_TABLE_BASE_ADDR + (vm_id - 8))); + amdgpu_ring_write(ring, (mmVM_CONTEXT8_PAGE_TABLE_BASE_ADDR + (vmid - 8))); } amdgpu_ring_write(ring, 0); amdgpu_ring_write(ring, pd_addr >> 12); @@ -2376,7 +2376,7 @@ static void gfx_v6_0_ring_emit_vm_flush(struct amdgpu_ring *ring, WRITE_DATA_DST_SEL(0))); amdgpu_ring_write(ring, mmVM_INVALIDATE_REQUEST); amdgpu_ring_write(ring, 0); - amdgpu_ring_write(ring, 1 << vm_id); + amdgpu_ring_write(ring, 1 << vmid); /* wait for the invalidate to complete */ amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5)); @@ -2962,25 +2962,7 @@ static void gfx_v6_0_get_csb_buffer(struct amdgpu_device *adev, buffer[count++] = cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, 1)); buffer[count++] = cpu_to_le32(mmPA_SC_RASTER_CONFIG - PACKET3_SET_CONTEXT_REG_START); - - switch (adev->asic_type) { - case CHIP_TAHITI: - case CHIP_PITCAIRN: - buffer[count++] = cpu_to_le32(0x2a00126a); - break; - case CHIP_VERDE: - buffer[count++] = cpu_to_le32(0x0000124a); - break; - case CHIP_OLAND: - buffer[count++] = cpu_to_le32(0x00000082); - break; - case CHIP_HAINAN: - buffer[count++] = cpu_to_le32(0x00000000); - break; - default: - buffer[count++] = cpu_to_le32(0x00000000); - break; - } + buffer[count++] = cpu_to_le32(adev->gfx.config.rb_config[0][0].raster_config); buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0)); buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE); diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c index 419ba0ce7ee5..a066c5eda135 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c @@ -48,6 +48,8 @@ #include "oss/oss_2_0_d.h" #include "oss/oss_2_0_sh_mask.h" +#define NUM_SIMD_PER_CU 0x4 /* missing from the gfx_7 IP headers */ + #define GFX7_NUM_GFX_RINGS 1 #define GFX7_MEC_HPD_SIZE 2048 @@ -2085,7 +2087,7 @@ static int gfx_v7_0_ring_test_ring(struct amdgpu_ring *ring) DRM_UDELAY(1); } if (i < adev->usec_timeout) { - DRM_INFO("ring test on %d succeeded in %d usecs\n", ring->idx, i); + DRM_DEBUG("ring test on %d succeeded in %d usecs\n", ring->idx, i); } else { DRM_ERROR("amdgpu: ring %d test failed (scratch(0x%04X)=0x%08X)\n", ring->idx, scratch, tmp); @@ -2252,7 +2254,7 @@ static void gfx_v7_0_ring_emit_fence_compute(struct amdgpu_ring *ring, */ static void gfx_v7_0_ring_emit_ib_gfx(struct amdgpu_ring *ring, struct amdgpu_ib *ib, - unsigned vm_id, bool ctx_switch) + unsigned vmid, bool ctx_switch) { u32 header, control = 0; @@ -2267,7 +2269,7 @@ static void gfx_v7_0_ring_emit_ib_gfx(struct amdgpu_ring *ring, else header = PACKET3(PACKET3_INDIRECT_BUFFER, 2); - control |= ib->length_dw | (vm_id << 24); + control |= ib->length_dw | (vmid << 24); amdgpu_ring_write(ring, header); amdgpu_ring_write(ring, @@ -2281,9 +2283,9 @@ static void gfx_v7_0_ring_emit_ib_gfx(struct amdgpu_ring *ring, static void gfx_v7_0_ring_emit_ib_compute(struct amdgpu_ring *ring, struct amdgpu_ib *ib, - unsigned vm_id, bool ctx_switch) + unsigned vmid, bool ctx_switch) { - u32 control = INDIRECT_BUFFER_VALID | ib->length_dw | (vm_id << 24); + u32 control = INDIRECT_BUFFER_VALID | ib->length_dw | (vmid << 24); amdgpu_ring_write(ring, PACKET3(PACKET3_INDIRECT_BUFFER, 2)); amdgpu_ring_write(ring, @@ -2365,7 +2367,7 @@ static int gfx_v7_0_ring_test_ib(struct amdgpu_ring *ring, long timeout) } tmp = RREG32(scratch); if (tmp == 0xDEADBEEF) { - DRM_INFO("ib test on ring %d succeeded\n", ring->idx); + DRM_DEBUG("ib test on ring %d succeeded\n", ring->idx); r = 0; } else { DRM_ERROR("amdgpu: ib test failed (scratch(0x%04X)=0x%08X)\n", @@ -2551,29 +2553,8 @@ static int gfx_v7_0_cp_gfx_start(struct amdgpu_device *adev) amdgpu_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 2)); amdgpu_ring_write(ring, mmPA_SC_RASTER_CONFIG - PACKET3_SET_CONTEXT_REG_START); - switch (adev->asic_type) { - case CHIP_BONAIRE: - amdgpu_ring_write(ring, 0x16000012); - amdgpu_ring_write(ring, 0x00000000); - break; - case CHIP_KAVERI: - amdgpu_ring_write(ring, 0x00000000); /* XXX */ - amdgpu_ring_write(ring, 0x00000000); - break; - case CHIP_KABINI: - case CHIP_MULLINS: - amdgpu_ring_write(ring, 0x00000000); /* XXX */ - amdgpu_ring_write(ring, 0x00000000); - break; - case CHIP_HAWAII: - amdgpu_ring_write(ring, 0x3a00161a); - amdgpu_ring_write(ring, 0x0000002e); - break; - default: - amdgpu_ring_write(ring, 0x00000000); - amdgpu_ring_write(ring, 0x00000000); - break; - } + amdgpu_ring_write(ring, adev->gfx.config.rb_config[0][0].raster_config); + amdgpu_ring_write(ring, adev->gfx.config.rb_config[0][0].raster_config_1); amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0)); amdgpu_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE); @@ -3258,19 +3239,19 @@ static void gfx_v7_0_ring_emit_pipeline_sync(struct amdgpu_ring *ring) * using the CP (CIK). */ static void gfx_v7_0_ring_emit_vm_flush(struct amdgpu_ring *ring, - unsigned vm_id, uint64_t pd_addr) + unsigned vmid, uint64_t pd_addr) { int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX); amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) | WRITE_DATA_DST_SEL(0))); - if (vm_id < 8) { + if (vmid < 8) { amdgpu_ring_write(ring, - (mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR + vm_id)); + (mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR + vmid)); } else { amdgpu_ring_write(ring, - (mmVM_CONTEXT8_PAGE_TABLE_BASE_ADDR + vm_id - 8)); + (mmVM_CONTEXT8_PAGE_TABLE_BASE_ADDR + vmid - 8)); } amdgpu_ring_write(ring, 0); amdgpu_ring_write(ring, pd_addr >> 12); @@ -3281,7 +3262,7 @@ static void gfx_v7_0_ring_emit_vm_flush(struct amdgpu_ring *ring, WRITE_DATA_DST_SEL(0))); amdgpu_ring_write(ring, mmVM_INVALIDATE_REQUEST); amdgpu_ring_write(ring, 0); - amdgpu_ring_write(ring, 1 << vm_id); + amdgpu_ring_write(ring, 1 << vmid); /* wait for the invalidate to complete */ amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5)); @@ -5298,6 +5279,11 @@ static void gfx_v7_0_get_cu_info(struct amdgpu_device *adev) cu_info->number = active_cu_number; cu_info->ao_cu_mask = ao_cu_mask; + cu_info->simd_per_cu = NUM_SIMD_PER_CU; + cu_info->max_waves_per_simd = 10; + cu_info->max_scratch_slots_per_cu = 32; + cu_info->wave_front_size = 64; + cu_info->lds_size = 64; } const struct amdgpu_ip_block_version gfx_v7_0_ip_block = diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c index 9ecdf621a74a..4e694ae9f308 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c @@ -679,55 +679,55 @@ static void gfx_v8_0_init_golden_registers(struct amdgpu_device *adev) { switch (adev->asic_type) { case CHIP_TOPAZ: - amdgpu_program_register_sequence(adev, - iceland_mgcg_cgcg_init, - (const u32)ARRAY_SIZE(iceland_mgcg_cgcg_init)); - amdgpu_program_register_sequence(adev, - golden_settings_iceland_a11, - (const u32)ARRAY_SIZE(golden_settings_iceland_a11)); - amdgpu_program_register_sequence(adev, - iceland_golden_common_all, - (const u32)ARRAY_SIZE(iceland_golden_common_all)); + amdgpu_device_program_register_sequence(adev, + iceland_mgcg_cgcg_init, + ARRAY_SIZE(iceland_mgcg_cgcg_init)); + amdgpu_device_program_register_sequence(adev, + golden_settings_iceland_a11, + ARRAY_SIZE(golden_settings_iceland_a11)); + amdgpu_device_program_register_sequence(adev, + iceland_golden_common_all, + ARRAY_SIZE(iceland_golden_common_all)); break; case CHIP_FIJI: - amdgpu_program_register_sequence(adev, - fiji_mgcg_cgcg_init, - (const u32)ARRAY_SIZE(fiji_mgcg_cgcg_init)); - amdgpu_program_register_sequence(adev, - golden_settings_fiji_a10, - (const u32)ARRAY_SIZE(golden_settings_fiji_a10)); - amdgpu_program_register_sequence(adev, - fiji_golden_common_all, - (const u32)ARRAY_SIZE(fiji_golden_common_all)); + amdgpu_device_program_register_sequence(adev, + fiji_mgcg_cgcg_init, + ARRAY_SIZE(fiji_mgcg_cgcg_init)); + amdgpu_device_program_register_sequence(adev, + golden_settings_fiji_a10, + ARRAY_SIZE(golden_settings_fiji_a10)); + amdgpu_device_program_register_sequence(adev, + fiji_golden_common_all, + ARRAY_SIZE(fiji_golden_common_all)); break; case CHIP_TONGA: - amdgpu_program_register_sequence(adev, - tonga_mgcg_cgcg_init, - (const u32)ARRAY_SIZE(tonga_mgcg_cgcg_init)); - amdgpu_program_register_sequence(adev, - golden_settings_tonga_a11, - (const u32)ARRAY_SIZE(golden_settings_tonga_a11)); - amdgpu_program_register_sequence(adev, - tonga_golden_common_all, - (const u32)ARRAY_SIZE(tonga_golden_common_all)); + amdgpu_device_program_register_sequence(adev, + tonga_mgcg_cgcg_init, + ARRAY_SIZE(tonga_mgcg_cgcg_init)); + amdgpu_device_program_register_sequence(adev, + golden_settings_tonga_a11, + ARRAY_SIZE(golden_settings_tonga_a11)); + amdgpu_device_program_register_sequence(adev, + tonga_golden_common_all, + ARRAY_SIZE(tonga_golden_common_all)); break; case CHIP_POLARIS11: case CHIP_POLARIS12: - amdgpu_program_register_sequence(adev, - golden_settings_polaris11_a11, - (const u32)ARRAY_SIZE(golden_settings_polaris11_a11)); - amdgpu_program_register_sequence(adev, - polaris11_golden_common_all, - (const u32)ARRAY_SIZE(polaris11_golden_common_all)); + amdgpu_device_program_register_sequence(adev, + golden_settings_polaris11_a11, + ARRAY_SIZE(golden_settings_polaris11_a11)); + amdgpu_device_program_register_sequence(adev, + polaris11_golden_common_all, + ARRAY_SIZE(polaris11_golden_common_all)); break; case CHIP_POLARIS10: - amdgpu_program_register_sequence(adev, - golden_settings_polaris10_a11, - (const u32)ARRAY_SIZE(golden_settings_polaris10_a11)); - amdgpu_program_register_sequence(adev, - polaris10_golden_common_all, - (const u32)ARRAY_SIZE(polaris10_golden_common_all)); + amdgpu_device_program_register_sequence(adev, + golden_settings_polaris10_a11, + ARRAY_SIZE(golden_settings_polaris10_a11)); + amdgpu_device_program_register_sequence(adev, + polaris10_golden_common_all, + ARRAY_SIZE(polaris10_golden_common_all)); WREG32_SMC(ixCG_ACLK_CNTL, 0x0000001C); if (adev->pdev->revision == 0xc7 && ((adev->pdev->subsystem_device == 0xb37 && adev->pdev->subsystem_vendor == 0x1002) || @@ -738,26 +738,26 @@ static void gfx_v8_0_init_golden_registers(struct amdgpu_device *adev) } break; case CHIP_CARRIZO: - amdgpu_program_register_sequence(adev, - cz_mgcg_cgcg_init, - (const u32)ARRAY_SIZE(cz_mgcg_cgcg_init)); - amdgpu_program_register_sequence(adev, - cz_golden_settings_a11, - (const u32)ARRAY_SIZE(cz_golden_settings_a11)); - amdgpu_program_register_sequence(adev, - cz_golden_common_all, - (const u32)ARRAY_SIZE(cz_golden_common_all)); + amdgpu_device_program_register_sequence(adev, + cz_mgcg_cgcg_init, + ARRAY_SIZE(cz_mgcg_cgcg_init)); + amdgpu_device_program_register_sequence(adev, + cz_golden_settings_a11, + ARRAY_SIZE(cz_golden_settings_a11)); + amdgpu_device_program_register_sequence(adev, + cz_golden_common_all, + ARRAY_SIZE(cz_golden_common_all)); break; case CHIP_STONEY: - amdgpu_program_register_sequence(adev, - stoney_mgcg_cgcg_init, - (const u32)ARRAY_SIZE(stoney_mgcg_cgcg_init)); - amdgpu_program_register_sequence(adev, - stoney_golden_settings_a11, - (const u32)ARRAY_SIZE(stoney_golden_settings_a11)); - amdgpu_program_register_sequence(adev, - stoney_golden_common_all, - (const u32)ARRAY_SIZE(stoney_golden_common_all)); + amdgpu_device_program_register_sequence(adev, + stoney_mgcg_cgcg_init, + ARRAY_SIZE(stoney_mgcg_cgcg_init)); + amdgpu_device_program_register_sequence(adev, + stoney_golden_settings_a11, + ARRAY_SIZE(stoney_golden_settings_a11)); + amdgpu_device_program_register_sequence(adev, + stoney_golden_common_all, + ARRAY_SIZE(stoney_golden_common_all)); break; default: break; @@ -804,7 +804,7 @@ static int gfx_v8_0_ring_test_ring(struct amdgpu_ring *ring) DRM_UDELAY(1); } if (i < adev->usec_timeout) { - DRM_INFO("ring test on %d succeeded in %d usecs\n", + DRM_DEBUG("ring test on %d succeeded in %d usecs\n", ring->idx, i); } else { DRM_ERROR("amdgpu: ring %d test failed (scratch(0x%04X)=0x%08X)\n", @@ -856,7 +856,7 @@ static int gfx_v8_0_ring_test_ib(struct amdgpu_ring *ring, long timeout) } tmp = RREG32(scratch); if (tmp == 0xDEADBEEF) { - DRM_INFO("ib test on ring %d succeeded\n", ring->idx); + DRM_DEBUG("ib test on ring %d succeeded\n", ring->idx); r = 0; } else { DRM_ERROR("amdgpu: ib test failed (scratch(0x%04X)=0x%08X)\n", @@ -2114,7 +2114,6 @@ static int gfx_v8_0_sw_fini(void *handle) amdgpu_gfx_compute_mqd_sw_fini(adev); amdgpu_gfx_kiq_free_ring(&adev->gfx.kiq.ring, &adev->gfx.kiq.irq); amdgpu_gfx_kiq_fini(adev); - amdgpu_bo_free_kernel(&adev->virt.csa_obj, &adev->virt.csa_vmid0_addr, NULL); gfx_v8_0_mec_fini(adev); gfx_v8_0_rlc_fini(adev); @@ -3851,6 +3850,14 @@ static void gfx_v8_0_wait_for_rlc_serdes(struct amdgpu_device *adev) break; udelay(1); } + if (k == adev->usec_timeout) { + gfx_v8_0_select_se_sh(adev, 0xffffffff, + 0xffffffff, 0xffffffff); + mutex_unlock(&adev->grbm_idx_mutex); + DRM_INFO("Timeout wait for RLC serdes %u,%u\n", + i, j); + return; + } } } gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); @@ -4305,37 +4312,8 @@ static int gfx_v8_0_cp_gfx_start(struct amdgpu_device *adev) amdgpu_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 2)); amdgpu_ring_write(ring, mmPA_SC_RASTER_CONFIG - PACKET3_SET_CONTEXT_REG_START); - switch (adev->asic_type) { - case CHIP_TONGA: - case CHIP_POLARIS10: - amdgpu_ring_write(ring, 0x16000012); - amdgpu_ring_write(ring, 0x0000002A); - break; - case CHIP_POLARIS11: - case CHIP_POLARIS12: - amdgpu_ring_write(ring, 0x16000012); - amdgpu_ring_write(ring, 0x00000000); - break; - case CHIP_FIJI: - amdgpu_ring_write(ring, 0x3a00161a); - amdgpu_ring_write(ring, 0x0000002e); - break; - case CHIP_CARRIZO: - amdgpu_ring_write(ring, 0x00000002); - amdgpu_ring_write(ring, 0x00000000); - break; - case CHIP_TOPAZ: - amdgpu_ring_write(ring, adev->gfx.config.num_rbs == 1 ? - 0x00000000 : 0x00000002); - amdgpu_ring_write(ring, 0x00000000); - break; - case CHIP_STONEY: - amdgpu_ring_write(ring, 0x00000000); - amdgpu_ring_write(ring, 0x00000000); - break; - default: - BUG(); - } + amdgpu_ring_write(ring, adev->gfx.config.rb_config[0][0].raster_config); + amdgpu_ring_write(ring, adev->gfx.config.rb_config[0][0].raster_config_1); amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0)); amdgpu_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE); @@ -4816,7 +4794,7 @@ static int gfx_v8_0_kiq_init_queue(struct amdgpu_ring *ring) gfx_v8_0_kiq_setting(ring); - if (adev->in_sriov_reset) { /* for GPU_RESET case */ + if (adev->in_gpu_reset) { /* for GPU_RESET case */ /* reset MQD to a clean status */ if (adev->gfx.mec.mqd_backup[mqd_idx]) memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(struct vi_mqd_allocation)); @@ -4853,7 +4831,7 @@ static int gfx_v8_0_kcq_init_queue(struct amdgpu_ring *ring) struct vi_mqd *mqd = ring->mqd_ptr; int mqd_idx = ring - &adev->gfx.compute_ring[0]; - if (!adev->in_sriov_reset && !adev->gfx.in_suspend) { + if (!adev->in_gpu_reset && !adev->gfx.in_suspend) { memset((void *)mqd, 0, sizeof(struct vi_mqd_allocation)); ((struct vi_mqd_allocation *)mqd)->dynamic_cu_mask = 0xFFFFFFFF; ((struct vi_mqd_allocation *)mqd)->dynamic_rb_mask = 0xFFFFFFFF; @@ -4865,13 +4843,10 @@ static int gfx_v8_0_kcq_init_queue(struct amdgpu_ring *ring) if (adev->gfx.mec.mqd_backup[mqd_idx]) memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(struct vi_mqd_allocation)); - } else if (adev->in_sriov_reset) { /* for GPU_RESET case */ + } else if (adev->in_gpu_reset) { /* for GPU_RESET case */ /* reset MQD to a clean status */ if (adev->gfx.mec.mqd_backup[mqd_idx]) memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(struct vi_mqd_allocation)); - /* reset ring buffer */ - ring->wptr = 0; - amdgpu_ring_clear_ring(ring); } else { amdgpu_ring_clear_ring(ring); } @@ -4946,6 +4921,13 @@ static int gfx_v8_0_kiq_resume(struct amdgpu_device *adev) /* Test KCQs */ for (i = 0; i < adev->gfx.num_compute_rings; i++) { ring = &adev->gfx.compute_ring[i]; + if (adev->in_gpu_reset) { + /* move reset ring buffer to here to workaround + * compute ring test failed + */ + ring->wptr = 0; + amdgpu_ring_clear_ring(ring); + } ring->ready = true; r = amdgpu_ring_test_ring(ring); if (r) @@ -5080,8 +5062,9 @@ static int gfx_v8_0_hw_fini(void *handle) gfx_v8_0_cp_enable(adev, false); gfx_v8_0_rlc_stop(adev); - amdgpu_set_powergating_state(adev, - AMD_IP_BLOCK_TYPE_GFX, AMD_PG_STATE_UNGATE); + amdgpu_device_ip_set_powergating_state(adev, + AMD_IP_BLOCK_TYPE_GFX, + AMD_PG_STATE_UNGATE); return 0; } @@ -5498,8 +5481,9 @@ static int gfx_v8_0_late_init(void *handle) if (r) return r; - amdgpu_set_powergating_state(adev, - AMD_IP_BLOCK_TYPE_GFX, AMD_PG_STATE_GATE); + amdgpu_device_ip_set_powergating_state(adev, + AMD_IP_BLOCK_TYPE_GFX, + AMD_PG_STATE_GATE); return 0; } @@ -5510,10 +5494,10 @@ static void gfx_v8_0_enable_gfx_static_mg_power_gating(struct amdgpu_device *ade if ((adev->asic_type == CHIP_POLARIS11) || (adev->asic_type == CHIP_POLARIS12)) /* Send msg to SMU via Powerplay */ - amdgpu_set_powergating_state(adev, - AMD_IP_BLOCK_TYPE_SMC, - enable ? - AMD_PG_STATE_GATE : AMD_PG_STATE_UNGATE); + amdgpu_device_ip_set_powergating_state(adev, + AMD_IP_BLOCK_TYPE_SMC, + enable ? + AMD_PG_STATE_GATE : AMD_PG_STATE_UNGATE); WREG32_FIELD(RLC_PG_CNTL, STATIC_PER_CU_PG_ENABLE, enable ? 1 : 0); } @@ -6261,7 +6245,7 @@ static void gfx_v8_0_ring_emit_hdp_invalidate(struct amdgpu_ring *ring) static void gfx_v8_0_ring_emit_ib_gfx(struct amdgpu_ring *ring, struct amdgpu_ib *ib, - unsigned vm_id, bool ctx_switch) + unsigned vmid, bool ctx_switch) { u32 header, control = 0; @@ -6270,7 +6254,7 @@ static void gfx_v8_0_ring_emit_ib_gfx(struct amdgpu_ring *ring, else header = PACKET3(PACKET3_INDIRECT_BUFFER, 2); - control |= ib->length_dw | (vm_id << 24); + control |= ib->length_dw | (vmid << 24); if (amdgpu_sriov_vf(ring->adev) && (ib->flags & AMDGPU_IB_FLAG_PREEMPT)) { control |= INDIRECT_BUFFER_PRE_ENB(1); @@ -6291,9 +6275,9 @@ static void gfx_v8_0_ring_emit_ib_gfx(struct amdgpu_ring *ring, static void gfx_v8_0_ring_emit_ib_compute(struct amdgpu_ring *ring, struct amdgpu_ib *ib, - unsigned vm_id, bool ctx_switch) + unsigned vmid, bool ctx_switch) { - u32 control = INDIRECT_BUFFER_VALID | ib->length_dw | (vm_id << 24); + u32 control = INDIRECT_BUFFER_VALID | ib->length_dw | (vmid << 24); amdgpu_ring_write(ring, PACKET3(PACKET3_INDIRECT_BUFFER, 2)); amdgpu_ring_write(ring, @@ -6344,7 +6328,7 @@ static void gfx_v8_0_ring_emit_pipeline_sync(struct amdgpu_ring *ring) } static void gfx_v8_0_ring_emit_vm_flush(struct amdgpu_ring *ring, - unsigned vm_id, uint64_t pd_addr) + unsigned vmid, uint64_t pd_addr) { int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX); @@ -6352,12 +6336,12 @@ static void gfx_v8_0_ring_emit_vm_flush(struct amdgpu_ring *ring, amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) | WRITE_DATA_DST_SEL(0)) | WR_CONFIRM); - if (vm_id < 8) { + if (vmid < 8) { amdgpu_ring_write(ring, - (mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR + vm_id)); + (mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR + vmid)); } else { amdgpu_ring_write(ring, - (mmVM_CONTEXT8_PAGE_TABLE_BASE_ADDR + vm_id - 8)); + (mmVM_CONTEXT8_PAGE_TABLE_BASE_ADDR + vmid - 8)); } amdgpu_ring_write(ring, 0); amdgpu_ring_write(ring, pd_addr >> 12); @@ -6369,7 +6353,7 @@ static void gfx_v8_0_ring_emit_vm_flush(struct amdgpu_ring *ring, WRITE_DATA_DST_SEL(0))); amdgpu_ring_write(ring, mmVM_INVALIDATE_REQUEST); amdgpu_ring_write(ring, 0); - amdgpu_ring_write(ring, 1 << vm_id); + amdgpu_ring_write(ring, 1 << vmid); /* wait for the invalidate to complete */ amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5)); @@ -6490,10 +6474,10 @@ static void gfx_v8_0_hqd_set_priority(struct amdgpu_device *adev, mutex_unlock(&adev->srbm_mutex); } static void gfx_v8_0_ring_set_priority_compute(struct amdgpu_ring *ring, - enum amd_sched_priority priority) + enum drm_sched_priority priority) { struct amdgpu_device *adev = ring->adev; - bool acquire = priority == AMD_SCHED_PRIORITY_HIGH_HW; + bool acquire = priority == DRM_SCHED_PRIORITY_HIGH_HW; if (ring->funcs->type != AMDGPU_RING_TYPE_COMPUTE) return; @@ -7132,6 +7116,11 @@ static void gfx_v8_0_get_cu_info(struct amdgpu_device *adev) cu_info->number = active_cu_number; cu_info->ao_cu_mask = ao_cu_mask; + cu_info->simd_per_cu = NUM_SIMD_PER_CU; + cu_info->max_waves_per_simd = 10; + cu_info->max_scratch_slots_per_cu = 32; + cu_info->wave_front_size = 64; + cu_info->lds_size = 64; } const struct amdgpu_ip_block_version gfx_v8_0_ip_block = diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c index 5aeb5f8816f3..c06479615e8a 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c @@ -28,11 +28,10 @@ #include "soc15.h" #include "soc15d.h" -#include "vega10/soc15ip.h" -#include "vega10/GC/gc_9_0_offset.h" -#include "vega10/GC/gc_9_0_sh_mask.h" -#include "vega10/vega10_enum.h" -#include "vega10/HDP/hdp_4_0_offset.h" +#include "gc/gc_9_0_offset.h" +#include "gc/gc_9_0_sh_mask.h" +#include "vega10_enum.h" +#include "hdp/hdp_4_0_offset.h" #include "soc15_common.h" #include "clearstate_gfx9.h" @@ -65,152 +64,84 @@ MODULE_FIRMWARE("amdgpu/raven_mec.bin"); MODULE_FIRMWARE("amdgpu/raven_mec2.bin"); MODULE_FIRMWARE("amdgpu/raven_rlc.bin"); -static const struct amdgpu_gds_reg_offset amdgpu_gds_reg_offset[] = -{ - { SOC15_REG_OFFSET(GC, 0, mmGDS_VMID0_BASE), - SOC15_REG_OFFSET(GC, 0, mmGDS_VMID0_SIZE), - SOC15_REG_OFFSET(GC, 0, mmGDS_GWS_VMID0), - SOC15_REG_OFFSET(GC, 0, mmGDS_OA_VMID0) }, - { SOC15_REG_OFFSET(GC, 0, mmGDS_VMID1_BASE), - SOC15_REG_OFFSET(GC, 0, mmGDS_VMID1_SIZE), - SOC15_REG_OFFSET(GC, 0, mmGDS_GWS_VMID1), - SOC15_REG_OFFSET(GC, 0, mmGDS_OA_VMID1) }, - { SOC15_REG_OFFSET(GC, 0, mmGDS_VMID2_BASE), - SOC15_REG_OFFSET(GC, 0, mmGDS_VMID2_SIZE), - SOC15_REG_OFFSET(GC, 0, mmGDS_GWS_VMID2), - SOC15_REG_OFFSET(GC, 0, mmGDS_OA_VMID2) }, - { SOC15_REG_OFFSET(GC, 0, mmGDS_VMID3_BASE), - SOC15_REG_OFFSET(GC, 0, mmGDS_VMID3_SIZE), - SOC15_REG_OFFSET(GC, 0, mmGDS_GWS_VMID3), - SOC15_REG_OFFSET(GC, 0, mmGDS_OA_VMID3) }, - { SOC15_REG_OFFSET(GC, 0, mmGDS_VMID4_BASE), - SOC15_REG_OFFSET(GC, 0, mmGDS_VMID4_SIZE), - SOC15_REG_OFFSET(GC, 0, mmGDS_GWS_VMID4), - SOC15_REG_OFFSET(GC, 0, mmGDS_OA_VMID4) }, - { SOC15_REG_OFFSET(GC, 0, mmGDS_VMID5_BASE), - SOC15_REG_OFFSET(GC, 0, mmGDS_VMID5_SIZE), - SOC15_REG_OFFSET(GC, 0, mmGDS_GWS_VMID5), - SOC15_REG_OFFSET(GC, 0, mmGDS_OA_VMID5) }, - { SOC15_REG_OFFSET(GC, 0, mmGDS_VMID6_BASE), - SOC15_REG_OFFSET(GC, 0, mmGDS_VMID6_SIZE), - SOC15_REG_OFFSET(GC, 0, mmGDS_GWS_VMID6), - SOC15_REG_OFFSET(GC, 0, mmGDS_OA_VMID6) }, - { SOC15_REG_OFFSET(GC, 0, mmGDS_VMID7_BASE), - SOC15_REG_OFFSET(GC, 0, mmGDS_VMID7_SIZE), - SOC15_REG_OFFSET(GC, 0, mmGDS_GWS_VMID7), - SOC15_REG_OFFSET(GC, 0, mmGDS_OA_VMID7) }, - { SOC15_REG_OFFSET(GC, 0, mmGDS_VMID8_BASE), - SOC15_REG_OFFSET(GC, 0, mmGDS_VMID8_SIZE), - SOC15_REG_OFFSET(GC, 0, mmGDS_GWS_VMID8), - SOC15_REG_OFFSET(GC, 0, mmGDS_OA_VMID8) }, - { SOC15_REG_OFFSET(GC, 0, mmGDS_VMID9_BASE), - SOC15_REG_OFFSET(GC, 0, mmGDS_VMID9_SIZE), - SOC15_REG_OFFSET(GC, 0, mmGDS_GWS_VMID9), - SOC15_REG_OFFSET(GC, 0, mmGDS_OA_VMID9) }, - { SOC15_REG_OFFSET(GC, 0, mmGDS_VMID10_BASE), - SOC15_REG_OFFSET(GC, 0, mmGDS_VMID10_SIZE), - SOC15_REG_OFFSET(GC, 0, mmGDS_GWS_VMID10), - SOC15_REG_OFFSET(GC, 0, mmGDS_OA_VMID10) }, - { SOC15_REG_OFFSET(GC, 0, mmGDS_VMID11_BASE), - SOC15_REG_OFFSET(GC, 0, mmGDS_VMID11_SIZE), - SOC15_REG_OFFSET(GC, 0, mmGDS_GWS_VMID11), - SOC15_REG_OFFSET(GC, 0, mmGDS_OA_VMID11) }, - { SOC15_REG_OFFSET(GC, 0, mmGDS_VMID12_BASE), - SOC15_REG_OFFSET(GC, 0, mmGDS_VMID12_SIZE), - SOC15_REG_OFFSET(GC, 0, mmGDS_GWS_VMID12), - SOC15_REG_OFFSET(GC, 0, mmGDS_OA_VMID12)}, - { SOC15_REG_OFFSET(GC, 0, mmGDS_VMID13_BASE), - SOC15_REG_OFFSET(GC, 0, mmGDS_VMID13_SIZE), - SOC15_REG_OFFSET(GC, 0, mmGDS_GWS_VMID13), - SOC15_REG_OFFSET(GC, 0, mmGDS_OA_VMID13) }, - { SOC15_REG_OFFSET(GC, 0, mmGDS_VMID14_BASE), - SOC15_REG_OFFSET(GC, 0, mmGDS_VMID14_SIZE), - SOC15_REG_OFFSET(GC, 0, mmGDS_GWS_VMID14), - SOC15_REG_OFFSET(GC, 0, mmGDS_OA_VMID14) }, - { SOC15_REG_OFFSET(GC, 0, mmGDS_VMID15_BASE), - SOC15_REG_OFFSET(GC, 0, mmGDS_VMID15_SIZE), - SOC15_REG_OFFSET(GC, 0, mmGDS_GWS_VMID15), - SOC15_REG_OFFSET(GC, 0, mmGDS_OA_VMID15) } +static const struct soc15_reg_golden golden_settings_gc_9_0[] = +{ + SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPC_UTCL1_CNTL, 0x08000000, 0x08000080), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPF_UTCL1_CNTL, 0x08000000, 0x08000080), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPG_UTCL1_CNTL, 0x08000000, 0x08000080), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0xf00fffff, 0x00000420), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_GPU_ID, 0x0000000f, 0x00000000), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmIA_UTCL1_CNTL, 0x08000000, 0x08000080), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_BINNER_EVENT_CNTL_3, 0x00000003, 0x82400024), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE, 0x3fffffff, 0x00000001), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_0, 0x08000000, 0x08000080), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_1, 0x08000000, 0x08000080), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_2, 0x08000000, 0x08000080), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_PREWALKER_UTCL1_CNTL, 0x08000000, 0x08000080), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_UTCL1_CNTL, 0x08000000, 0x08000080), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmSH_MEM_CONFIG, 0x00001000, 0x00001000), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_CONFIG_CNTL_1, 0x0000000f, 0x01000107), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQC_CONFIG, 0x03000000, 0x020a2000), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x010b0000), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x4a2c0e68), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0xb5d3f197), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_CACHE_INVALIDATION, 0x3fff3af3, 0x19200000), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_GS_MAX_WAVE_ID, 0x00000fff, 0x000003ff), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmWD_UTCL1_CNTL, 0x08000000, 0x08000080) }; -static const u32 golden_settings_gc_9_0[] = -{ - SOC15_REG_OFFSET(GC, 0, mmCPC_UTCL1_CNTL), 0x08000000, 0x08000080, - SOC15_REG_OFFSET(GC, 0, mmCPF_UTCL1_CNTL), 0x08000000, 0x08000080, - SOC15_REG_OFFSET(GC, 0, mmCPG_UTCL1_CNTL), 0x08000000, 0x08000080, - SOC15_REG_OFFSET(GC, 0, mmDB_DEBUG2), 0xf00fffff, 0x00000420, - SOC15_REG_OFFSET(GC, 0, mmGB_GPU_ID), 0x0000000f, 0x00000000, - SOC15_REG_OFFSET(GC, 0, mmIA_UTCL1_CNTL), 0x08000000, 0x08000080, - SOC15_REG_OFFSET(GC, 0, mmPA_SC_BINNER_EVENT_CNTL_3), 0x00000003, 0x82400024, - SOC15_REG_OFFSET(GC, 0, mmPA_SC_ENHANCE), 0x3fffffff, 0x00000001, - SOC15_REG_OFFSET(GC, 0, mmPA_SC_LINE_STIPPLE_STATE), 0x0000ff0f, 0x00000000, - SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_UTCL1_CNTL_0), 0x08000000, 0x08000080, - SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_UTCL1_CNTL_1), 0x08000000, 0x08000080, - SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_UTCL1_CNTL_2), 0x08000000, 0x08000080, - SOC15_REG_OFFSET(GC, 0, mmRLC_PREWALKER_UTCL1_CNTL), 0x08000000, 0x08000080, - SOC15_REG_OFFSET(GC, 0, mmRLC_SPM_UTCL1_CNTL), 0x08000000, 0x08000080, - SOC15_REG_OFFSET(GC, 0, mmSH_MEM_CONFIG), 0x00001000, 0x00001000, - SOC15_REG_OFFSET(GC, 0, mmSPI_CONFIG_CNTL_1), 0x0000000f, 0x01000107, - SOC15_REG_OFFSET(GC, 0, mmSQC_CONFIG), 0x03000000, 0x020a2000, - SOC15_REG_OFFSET(GC, 0, mmTA_CNTL_AUX), 0xfffffeef, 0x010b0000, - SOC15_REG_OFFSET(GC, 0, mmTCP_CHAN_STEER_HI), 0xffffffff, 0x4a2c0e68, - SOC15_REG_OFFSET(GC, 0, mmTCP_CHAN_STEER_LO), 0xffffffff, 0xb5d3f197, - SOC15_REG_OFFSET(GC, 0, mmVGT_CACHE_INVALIDATION), 0x3fff3af3, 0x19200000, - SOC15_REG_OFFSET(GC, 0, mmVGT_GS_MAX_WAVE_ID), 0x00000fff, 0x000003ff, - SOC15_REG_OFFSET(GC, 0, mmWD_UTCL1_CNTL), 0x08000000, 0x08000080 -}; - -static const u32 golden_settings_gc_9_0_vg10[] = +static const struct soc15_reg_golden golden_settings_gc_9_0_vg10[] = { - SOC15_REG_OFFSET(GC, 0, mmCB_HW_CONTROL), 0x0000f000, 0x00012107, - SOC15_REG_OFFSET(GC, 0, mmCB_HW_CONTROL_3), 0x30000000, 0x10000000, - SOC15_REG_OFFSET(GC, 0, mmGB_ADDR_CONFIG), 0xffff77ff, 0x2a114042, - SOC15_REG_OFFSET(GC, 0, mmGB_ADDR_CONFIG_READ), 0xffff77ff, 0x2a114042, - SOC15_REG_OFFSET(GC, 0, mmPA_SC_ENHANCE_1), 0x00008000, 0x00048000, - SOC15_REG_OFFSET(GC, 0, mmRMI_UTCL1_CNTL2), 0x00030000, 0x00020000, - SOC15_REG_OFFSET(GC, 0, mmTD_CNTL), 0x00001800, 0x00000800 + SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0x0000f000, 0x00012107), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_3, 0x30000000, 0x10000000), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x2a114042), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xffff77ff, 0x2a114042), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0x00008000, 0x00048000), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmRMI_UTCL1_CNTL2, 0x00030000, 0x00020000), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x00001800, 0x00000800) }; -static const u32 golden_settings_gc_9_1[] = -{ - SOC15_REG_OFFSET(GC, 0, mmCB_HW_CONTROL), 0xfffdf3cf, 0x00014104, - SOC15_REG_OFFSET(GC, 0, mmCPC_UTCL1_CNTL), 0x08000000, 0x08000080, - SOC15_REG_OFFSET(GC, 0, mmCPF_UTCL1_CNTL), 0x08000000, 0x08000080, - SOC15_REG_OFFSET(GC, 0, mmCPG_UTCL1_CNTL), 0x08000000, 0x08000080, - SOC15_REG_OFFSET(GC, 0, mmDB_DEBUG2), 0xf00fffff, 0x00000420, - SOC15_REG_OFFSET(GC, 0, mmGB_GPU_ID), 0x0000000f, 0x00000000, - SOC15_REG_OFFSET(GC, 0, mmIA_UTCL1_CNTL), 0x08000000, 0x08000080, - SOC15_REG_OFFSET(GC, 0, mmPA_SC_BINNER_EVENT_CNTL_3), 0x00000003, 0x82400024, - SOC15_REG_OFFSET(GC, 0, mmPA_SC_ENHANCE), 0x3fffffff, 0x00000001, - SOC15_REG_OFFSET(GC, 0, mmPA_SC_LINE_STIPPLE_STATE), 0x0000ff0f, 0x00000000, - SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_UTCL1_CNTL_0), 0x08000000, 0x08000080, - SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_UTCL1_CNTL_1), 0x08000000, 0x08000080, - SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_UTCL1_CNTL_2), 0x08000000, 0x08000080, - SOC15_REG_OFFSET(GC, 0, mmRLC_PREWALKER_UTCL1_CNTL), 0x08000000, 0x08000080, - SOC15_REG_OFFSET(GC, 0, mmRLC_SPM_UTCL1_CNTL), 0x08000000, 0x08000080, - SOC15_REG_OFFSET(GC, 0, mmTA_CNTL_AUX), 0xfffffeef, 0x010b0000, - SOC15_REG_OFFSET(GC, 0, mmTCP_CHAN_STEER_HI), 0xffffffff, 0x00000000, - SOC15_REG_OFFSET(GC, 0, mmTCP_CHAN_STEER_LO), 0xffffffff, 0x00003120, - SOC15_REG_OFFSET(GC, 0, mmVGT_CACHE_INVALIDATION), 0x3fff3af3, 0x19200000, - SOC15_REG_OFFSET(GC, 0, mmVGT_GS_MAX_WAVE_ID), 0x00000fff, 0x000000ff, - SOC15_REG_OFFSET(GC, 0, mmWD_UTCL1_CNTL), 0x08000000, 0x08000080 +static const struct soc15_reg_golden golden_settings_gc_9_1[] = +{ + SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0xfffdf3cf, 0x00014104), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPC_UTCL1_CNTL, 0x08000000, 0x08000080), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPF_UTCL1_CNTL, 0x08000000, 0x08000080), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPG_UTCL1_CNTL, 0x08000000, 0x08000080), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0xf00fffff, 0x00000420), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_GPU_ID, 0x0000000f, 0x00000000), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmIA_UTCL1_CNTL, 0x08000000, 0x08000080), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_BINNER_EVENT_CNTL_3, 0x00000003, 0x82400024), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE, 0x3fffffff, 0x00000001), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_0, 0x08000000, 0x08000080), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_1, 0x08000000, 0x08000080), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_2, 0x08000000, 0x08000080), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_PREWALKER_UTCL1_CNTL, 0x08000000, 0x08000080), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_UTCL1_CNTL, 0x08000000, 0x08000080), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x010b0000), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00003120), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_CACHE_INVALIDATION, 0x3fff3af3, 0x19200000), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_GS_MAX_WAVE_ID, 0x00000fff, 0x000000ff), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmWD_UTCL1_CNTL, 0x08000000, 0x08000080) }; -static const u32 golden_settings_gc_9_1_rv1[] = +static const struct soc15_reg_golden golden_settings_gc_9_1_rv1[] = { - SOC15_REG_OFFSET(GC, 0, mmCB_HW_CONTROL_3), 0x30000000, 0x10000000, - SOC15_REG_OFFSET(GC, 0, mmGB_ADDR_CONFIG), 0xffff77ff, 0x24000042, - SOC15_REG_OFFSET(GC, 0, mmGB_ADDR_CONFIG_READ), 0xffff77ff, 0x24000042, - SOC15_REG_OFFSET(GC, 0, mmPA_SC_ENHANCE_1), 0xffffffff, 0x04048000, - SOC15_REG_OFFSET(GC, 0, mmPA_SC_MODE_CNTL_1), 0x06000000, 0x06000000, - SOC15_REG_OFFSET(GC, 0, mmRMI_UTCL1_CNTL2), 0x00030000, 0x00020000, - SOC15_REG_OFFSET(GC, 0, mmTD_CNTL), 0x01bd9f33, 0x00000800 + SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_3, 0x30000000, 0x10000000), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x24000042), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xffff77ff, 0x24000042), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xffffffff, 0x04048000), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_MODE_CNTL_1, 0x06000000, 0x06000000), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmRMI_UTCL1_CNTL2, 0x00030000, 0x00020000), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x01bd9f33, 0x00000800) }; -static const u32 golden_settings_gc_9_x_common[] = +static const struct soc15_reg_golden golden_settings_gc_9_x_common[] = { - SOC15_REG_OFFSET(GC, 0, mmGRBM_CAM_INDEX), 0xffffffff, 0x00000000, - SOC15_REG_OFFSET(GC, 0, mmGRBM_CAM_DATA), 0xffffffff, 0x2544c382 + SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_CAM_INDEX, 0xffffffff, 0x00000000), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_CAM_DATA, 0xffffffff, 0x2544c382) }; #define VEGA10_GB_ADDR_CONFIG_GOLDEN 0x2a114042 @@ -230,26 +161,26 @@ static void gfx_v9_0_init_golden_registers(struct amdgpu_device *adev) { switch (adev->asic_type) { case CHIP_VEGA10: - amdgpu_program_register_sequence(adev, + soc15_program_register_sequence(adev, golden_settings_gc_9_0, - (const u32)ARRAY_SIZE(golden_settings_gc_9_0)); - amdgpu_program_register_sequence(adev, + ARRAY_SIZE(golden_settings_gc_9_0)); + soc15_program_register_sequence(adev, golden_settings_gc_9_0_vg10, - (const u32)ARRAY_SIZE(golden_settings_gc_9_0_vg10)); + ARRAY_SIZE(golden_settings_gc_9_0_vg10)); break; case CHIP_RAVEN: - amdgpu_program_register_sequence(adev, + soc15_program_register_sequence(adev, golden_settings_gc_9_1, - (const u32)ARRAY_SIZE(golden_settings_gc_9_1)); - amdgpu_program_register_sequence(adev, + ARRAY_SIZE(golden_settings_gc_9_1)); + soc15_program_register_sequence(adev, golden_settings_gc_9_1_rv1, - (const u32)ARRAY_SIZE(golden_settings_gc_9_1_rv1)); + ARRAY_SIZE(golden_settings_gc_9_1_rv1)); break; default: break; } - amdgpu_program_register_sequence(adev, golden_settings_gc_9_x_common, + soc15_program_register_sequence(adev, golden_settings_gc_9_x_common, (const u32)ARRAY_SIZE(golden_settings_gc_9_x_common)); } @@ -327,7 +258,7 @@ static int gfx_v9_0_ring_test_ring(struct amdgpu_ring *ring) DRM_UDELAY(1); } if (i < adev->usec_timeout) { - DRM_INFO("ring test on %d succeeded in %d usecs\n", + DRM_DEBUG("ring test on %d succeeded in %d usecs\n", ring->idx, i); } else { DRM_ERROR("amdgpu: ring %d test failed (scratch(0x%04X)=0x%08X)\n", @@ -379,7 +310,7 @@ static int gfx_v9_0_ring_test_ib(struct amdgpu_ring *ring, long timeout) } tmp = RREG32(scratch); if (tmp == 0xDEADBEEF) { - DRM_INFO("ib test on ring %d succeeded\n", ring->idx); + DRM_DEBUG("ib test on ring %d succeeded\n", ring->idx); r = 0; } else { DRM_ERROR("amdgpu: ib test failed (scratch(0x%04X)=0x%08X)\n", @@ -1137,8 +1068,8 @@ static int gfx_v9_0_ngg_init(struct amdgpu_device *adev) adev->gfx.ngg.gds_reserve_size = ALIGN(5 * 4, 0x40); adev->gds.mem.total_size -= adev->gfx.ngg.gds_reserve_size; adev->gds.mem.gfx_partition_size -= adev->gfx.ngg.gds_reserve_size; - adev->gfx.ngg.gds_reserve_addr = amdgpu_gds_reg_offset[0].mem_base; - adev->gfx.ngg.gds_reserve_addr += adev->gds.mem.gfx_partition_size; + adev->gfx.ngg.gds_reserve_addr = RREG32_SOC15(GC, 0, mmGDS_VMID0_BASE); + adev->gfx.ngg.gds_reserve_addr += RREG32_SOC15(GC, 0, mmGDS_VMID0_SIZE); /* Primitive Buffer */ r = gfx_v9_0_ngg_create_buf(adev, &adev->gfx.ngg.buf[NGG_PRIM], @@ -1243,23 +1174,24 @@ static int gfx_v9_0_ngg_en(struct amdgpu_device *adev) } gfx_v9_0_write_data_to_reg(ring, 0, false, - amdgpu_gds_reg_offset[0].mem_size, + SOC15_REG_OFFSET(GC, 0, mmGDS_VMID0_SIZE), (adev->gds.mem.total_size + adev->gfx.ngg.gds_reserve_size) >> AMDGPU_GDS_SHIFT); amdgpu_ring_write(ring, PACKET3(PACKET3_DMA_DATA, 5)); amdgpu_ring_write(ring, (PACKET3_DMA_DATA_CP_SYNC | + PACKET3_DMA_DATA_DST_SEL(1) | PACKET3_DMA_DATA_SRC_SEL(2))); amdgpu_ring_write(ring, 0); amdgpu_ring_write(ring, 0); amdgpu_ring_write(ring, adev->gfx.ngg.gds_reserve_addr); amdgpu_ring_write(ring, 0); - amdgpu_ring_write(ring, adev->gfx.ngg.gds_reserve_size); - + amdgpu_ring_write(ring, PACKET3_DMA_DATA_CMD_RAW_WAIT | + adev->gfx.ngg.gds_reserve_size); gfx_v9_0_write_data_to_reg(ring, 0, false, - amdgpu_gds_reg_offset[0].mem_size, 0); + SOC15_REG_OFFSET(GC, 0, mmGDS_VMID0_SIZE), 0); amdgpu_ring_commit(ring); @@ -1464,7 +1396,6 @@ static int gfx_v9_0_sw_fini(void *handle) amdgpu_gfx_compute_mqd_sw_fini(adev); amdgpu_gfx_kiq_free_ring(&adev->gfx.kiq.ring, &adev->gfx.kiq.irq); amdgpu_gfx_kiq_fini(adev); - amdgpu_bo_free_kernel(&adev->virt.csa_obj, &adev->virt.csa_vmid0_addr, NULL); gfx_v9_0_mec_fini(adev); gfx_v9_0_ngg_fini(adev); @@ -1596,14 +1527,21 @@ static void gfx_v9_0_gpu_init(struct amdgpu_device *adev) /* XXX SH_MEM regs */ /* where to put LDS, scratch, GPUVM in FSA64 space */ mutex_lock(&adev->srbm_mutex); - for (i = 0; i < 16; i++) { + for (i = 0; i < adev->vm_manager.id_mgr[AMDGPU_GFXHUB].num_ids; i++) { soc15_grbm_select(adev, 0, 0, 0, i); /* CP and shaders */ - tmp = 0; - tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, ALIGNMENT_MODE, - SH_MEM_ALIGNMENT_MODE_UNALIGNED); - WREG32_SOC15(GC, 0, mmSH_MEM_CONFIG, tmp); - WREG32_SOC15(GC, 0, mmSH_MEM_BASES, 0); + if (i == 0) { + tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, ALIGNMENT_MODE, + SH_MEM_ALIGNMENT_MODE_UNALIGNED); + WREG32_SOC15(GC, 0, mmSH_MEM_CONFIG, tmp); + WREG32_SOC15(GC, 0, mmSH_MEM_BASES, 0); + } else { + tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, ALIGNMENT_MODE, + SH_MEM_ALIGNMENT_MODE_UNALIGNED); + WREG32_SOC15(GC, 0, mmSH_MEM_CONFIG, tmp); + tmp = adev->mc.shared_aperture_start >> 48; + WREG32_SOC15(GC, 0, mmSH_MEM_BASES, tmp); + } } soc15_grbm_select(adev, 0, 0, 0, 0); @@ -1645,6 +1583,14 @@ static void gfx_v9_0_wait_for_rlc_serdes(struct amdgpu_device *adev) break; udelay(1); } + if (k == adev->usec_timeout) { + gfx_v9_0_select_se_sh(adev, 0xffffffff, + 0xffffffff, 0xffffffff); + mutex_unlock(&adev->grbm_idx_mutex); + DRM_INFO("Timeout wait for RLC serdes %u,%u\n", + i, j); + return; + } } } gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); @@ -2749,7 +2695,7 @@ static int gfx_v9_0_kiq_init_queue(struct amdgpu_ring *ring) gfx_v9_0_kiq_setting(ring); - if (adev->in_sriov_reset) { /* for GPU_RESET case */ + if (adev->in_gpu_reset) { /* for GPU_RESET case */ /* reset MQD to a clean status */ if (adev->gfx.mec.mqd_backup[mqd_idx]) memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(struct v9_mqd_allocation)); @@ -2787,7 +2733,7 @@ static int gfx_v9_0_kcq_init_queue(struct amdgpu_ring *ring) struct v9_mqd *mqd = ring->mqd_ptr; int mqd_idx = ring - &adev->gfx.compute_ring[0]; - if (!adev->in_sriov_reset && !adev->gfx.in_suspend) { + if (!adev->in_gpu_reset && !adev->gfx.in_suspend) { memset((void *)mqd, 0, sizeof(struct v9_mqd_allocation)); ((struct v9_mqd_allocation *)mqd)->dynamic_cu_mask = 0xFFFFFFFF; ((struct v9_mqd_allocation *)mqd)->dynamic_rb_mask = 0xFFFFFFFF; @@ -2799,7 +2745,7 @@ static int gfx_v9_0_kcq_init_queue(struct amdgpu_ring *ring) if (adev->gfx.mec.mqd_backup[mqd_idx]) memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(struct v9_mqd_allocation)); - } else if (adev->in_sriov_reset) { /* for GPU_RESET case */ + } else if (adev->in_gpu_reset) { /* for GPU_RESET case */ /* reset MQD to a clean status */ if (adev->gfx.mec.mqd_backup[mqd_idx]) memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(struct v9_mqd_allocation)); @@ -3139,6 +3085,8 @@ static void gfx_v9_0_ring_emit_gds_switch(struct amdgpu_ring *ring, uint32_t gws_base, uint32_t gws_size, uint32_t oa_base, uint32_t oa_size) { + struct amdgpu_device *adev = ring->adev; + gds_base = gds_base >> AMDGPU_GDS_SHIFT; gds_size = gds_size >> AMDGPU_GDS_SHIFT; @@ -3150,22 +3098,22 @@ static void gfx_v9_0_ring_emit_gds_switch(struct amdgpu_ring *ring, /* GDS Base */ gfx_v9_0_write_data_to_reg(ring, 0, false, - amdgpu_gds_reg_offset[vmid].mem_base, + SOC15_REG_OFFSET(GC, 0, mmGDS_VMID0_BASE) + 2 * vmid, gds_base); /* GDS Size */ gfx_v9_0_write_data_to_reg(ring, 0, false, - amdgpu_gds_reg_offset[vmid].mem_size, + SOC15_REG_OFFSET(GC, 0, mmGDS_VMID0_SIZE) + 2 * vmid, gds_size); /* GWS */ gfx_v9_0_write_data_to_reg(ring, 0, false, - amdgpu_gds_reg_offset[vmid].gws, + SOC15_REG_OFFSET(GC, 0, mmGDS_GWS_VMID0) + vmid, gws_size << GDS_GWS_VMID0__SIZE__SHIFT | gws_base); /* OA */ gfx_v9_0_write_data_to_reg(ring, 0, false, - amdgpu_gds_reg_offset[vmid].oa, + SOC15_REG_OFFSET(GC, 0, mmGDS_OA_VMID0) + vmid, (1 << (oa_size + oa_base)) - (1 << oa_base)); } @@ -3610,13 +3558,9 @@ static void gfx_v9_0_ring_set_wptr_gfx(struct amdgpu_ring *ring) static void gfx_v9_0_ring_emit_hdp_flush(struct amdgpu_ring *ring) { + struct amdgpu_device *adev = ring->adev; u32 ref_and_mask, reg_mem_engine; - const struct nbio_hdp_flush_reg *nbio_hf_reg; - - if (ring->adev->flags & AMD_IS_APU) - nbio_hf_reg = &nbio_v7_0_hdp_flush_reg; - else - nbio_hf_reg = &nbio_v6_1_hdp_flush_reg; + const struct nbio_hdp_flush_reg *nbio_hf_reg = adev->nbio_funcs->hdp_flush_reg; if (ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE) { switch (ring->me) { @@ -3636,20 +3580,22 @@ static void gfx_v9_0_ring_emit_hdp_flush(struct amdgpu_ring *ring) } gfx_v9_0_wait_reg_mem(ring, reg_mem_engine, 0, 1, - nbio_hf_reg->hdp_flush_req_offset, - nbio_hf_reg->hdp_flush_done_offset, + adev->nbio_funcs->get_hdp_flush_req_offset(adev), + adev->nbio_funcs->get_hdp_flush_done_offset(adev), ref_and_mask, ref_and_mask, 0x20); } static void gfx_v9_0_ring_emit_hdp_invalidate(struct amdgpu_ring *ring) { + struct amdgpu_device *adev = ring->adev; + gfx_v9_0_write_data_to_reg(ring, 0, true, SOC15_REG_OFFSET(HDP, 0, mmHDP_READ_CACHE_INVALIDATE), 1); } static void gfx_v9_0_ring_emit_ib_gfx(struct amdgpu_ring *ring, struct amdgpu_ib *ib, - unsigned vm_id, bool ctx_switch) + unsigned vmid, bool ctx_switch) { u32 header, control = 0; @@ -3658,7 +3604,7 @@ static void gfx_v9_0_ring_emit_ib_gfx(struct amdgpu_ring *ring, else header = PACKET3(PACKET3_INDIRECT_BUFFER, 2); - control |= ib->length_dw | (vm_id << 24); + control |= ib->length_dw | (vmid << 24); if (amdgpu_sriov_vf(ring->adev) && (ib->flags & AMDGPU_IB_FLAG_PREEMPT)) { control |= INDIRECT_BUFFER_PRE_ENB(1); @@ -3680,9 +3626,9 @@ BUG_ON(ib->gpu_addr & 0x3); /* Dword align */ static void gfx_v9_0_ring_emit_ib_compute(struct amdgpu_ring *ring, struct amdgpu_ib *ib, - unsigned vm_id, bool ctx_switch) + unsigned vmid, bool ctx_switch) { - u32 control = INDIRECT_BUFFER_VALID | ib->length_dw | (vm_id << 24); + u32 control = INDIRECT_BUFFER_VALID | ib->length_dw | (vmid << 24); amdgpu_ring_write(ring, PACKET3(PACKET3_INDIRECT_BUFFER, 2)); BUG_ON(ib->gpu_addr & 0x3); /* Dword align */ @@ -3738,22 +3684,23 @@ static void gfx_v9_0_ring_emit_pipeline_sync(struct amdgpu_ring *ring) } static void gfx_v9_0_ring_emit_vm_flush(struct amdgpu_ring *ring, - unsigned vm_id, uint64_t pd_addr) + unsigned vmid, uint64_t pd_addr) { struct amdgpu_vmhub *hub = &ring->adev->vmhub[ring->funcs->vmhub]; int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX); - uint32_t req = ring->adev->gart.gart_funcs->get_invalidate_req(vm_id); + uint32_t req = ring->adev->gart.gart_funcs->get_invalidate_req(vmid); + uint64_t flags = AMDGPU_PTE_VALID; unsigned eng = ring->vm_inv_eng; - pd_addr = amdgpu_gart_get_vm_pde(ring->adev, pd_addr); - pd_addr |= AMDGPU_PTE_VALID; + amdgpu_gart_get_vm_pde(ring->adev, -1, &pd_addr, &flags); + pd_addr |= flags; gfx_v9_0_write_data_to_reg(ring, usepfp, true, - hub->ctx0_ptb_addr_lo32 + (2 * vm_id), + hub->ctx0_ptb_addr_lo32 + (2 * vmid), lower_32_bits(pd_addr)); gfx_v9_0_write_data_to_reg(ring, usepfp, true, - hub->ctx0_ptb_addr_hi32 + (2 * vm_id), + hub->ctx0_ptb_addr_hi32 + (2 * vmid), upper_32_bits(pd_addr)); gfx_v9_0_write_data_to_reg(ring, usepfp, true, @@ -3761,7 +3708,7 @@ static void gfx_v9_0_ring_emit_vm_flush(struct amdgpu_ring *ring, /* wait for the invalidate to complete */ gfx_v9_0_wait_reg_mem(ring, 0, 0, 0, hub->vm_inv_eng0_ack + - eng, 0, 1 << vm_id, 1 << vm_id, 0x20); + eng, 0, 1 << vmid, 1 << vmid, 0x20); /* compute doesn't have PFP */ if (usepfp) { @@ -3804,6 +3751,8 @@ static void gfx_v9_0_ring_set_wptr_compute(struct amdgpu_ring *ring) static void gfx_v9_0_ring_emit_fence_kiq(struct amdgpu_ring *ring, u64 addr, u64 seq, unsigned int flags) { + struct amdgpu_device *adev = ring->adev; + /* we only allocate 32bit for each seq wb address */ BUG_ON(flags & AMDGPU_FENCE_FLAG_64BIT); diff --git a/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c b/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c index c17996e18086..56f5fe4e2fee 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c @@ -23,11 +23,10 @@ #include "amdgpu.h" #include "gfxhub_v1_0.h" -#include "vega10/soc15ip.h" -#include "vega10/GC/gc_9_0_offset.h" -#include "vega10/GC/gc_9_0_sh_mask.h" -#include "vega10/GC/gc_9_0_default.h" -#include "vega10/vega10_enum.h" +#include "gc/gc_9_0_offset.h" +#include "gc/gc_9_0_sh_mask.h" +#include "gc/gc_9_0_default.h" +#include "vega10_enum.h" #include "soc15_common.h" @@ -144,8 +143,15 @@ static void gfxhub_v1_0_init_cache_regs(struct amdgpu_device *adev) WREG32_SOC15(GC, 0, mmVM_L2_CNTL2, tmp); tmp = mmVM_L2_CNTL3_DEFAULT; - tmp = REG_SET_FIELD(tmp, VM_L2_CNTL3, BANK_SELECT, 9); - tmp = REG_SET_FIELD(tmp, VM_L2_CNTL3, L2_CACHE_BIGK_FRAGMENT_SIZE, 6); + if (adev->mc.translate_further) { + tmp = REG_SET_FIELD(tmp, VM_L2_CNTL3, BANK_SELECT, 12); + tmp = REG_SET_FIELD(tmp, VM_L2_CNTL3, + L2_CACHE_BIGK_FRAGMENT_SIZE, 9); + } else { + tmp = REG_SET_FIELD(tmp, VM_L2_CNTL3, BANK_SELECT, 9); + tmp = REG_SET_FIELD(tmp, VM_L2_CNTL3, + L2_CACHE_BIGK_FRAGMENT_SIZE, 6); + } WREG32_SOC15(GC, 0, mmVM_L2_CNTL3, tmp); tmp = mmVM_L2_CNTL4_DEFAULT; @@ -183,31 +189,40 @@ static void gfxhub_v1_0_disable_identity_aperture(struct amdgpu_device *adev) static void gfxhub_v1_0_setup_vmid_config(struct amdgpu_device *adev) { - int i; + unsigned num_level, block_size; uint32_t tmp; + int i; + + num_level = adev->vm_manager.num_level; + block_size = adev->vm_manager.block_size; + if (adev->mc.translate_further) + num_level -= 1; + else + block_size -= 9; for (i = 0; i <= 14; i++) { tmp = RREG32_SOC15_OFFSET(GC, 0, mmVM_CONTEXT1_CNTL, i); tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL, ENABLE_CONTEXT, 1); tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL, PAGE_TABLE_DEPTH, - adev->vm_manager.num_level); + num_level); tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL, - RANGE_PROTECTION_FAULT_ENABLE_DEFAULT, 1); + RANGE_PROTECTION_FAULT_ENABLE_DEFAULT, 1); tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL, - DUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT, 1); + DUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT, + 1); tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL, - PDE0_PROTECTION_FAULT_ENABLE_DEFAULT, 1); + PDE0_PROTECTION_FAULT_ENABLE_DEFAULT, 1); tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL, - VALID_PROTECTION_FAULT_ENABLE_DEFAULT, 1); + VALID_PROTECTION_FAULT_ENABLE_DEFAULT, 1); tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL, - READ_PROTECTION_FAULT_ENABLE_DEFAULT, 1); + READ_PROTECTION_FAULT_ENABLE_DEFAULT, 1); tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL, - WRITE_PROTECTION_FAULT_ENABLE_DEFAULT, 1); + WRITE_PROTECTION_FAULT_ENABLE_DEFAULT, 1); tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL, - EXECUTE_PROTECTION_FAULT_ENABLE_DEFAULT, 1); + EXECUTE_PROTECTION_FAULT_ENABLE_DEFAULT, 1); tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL, - PAGE_TABLE_BLOCK_SIZE, - adev->vm_manager.block_size - 9); + PAGE_TABLE_BLOCK_SIZE, + block_size); /* Send no-retry XNACK on fault to suppress VM fault storm. */ tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL, RETRY_PERMISSION_OR_INVALID_PAGE_FAULT, 0); diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c index f4603a7c8ef3..8e28270d1ea9 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c @@ -222,13 +222,8 @@ static void gmc_v6_0_vram_gtt_location(struct amdgpu_device *adev, u64 base = RREG32(mmMC_VM_FB_LOCATION) & 0xFFFF; base <<= 24; - if (mc->mc_vram_size > 0xFFC0000000ULL) { - dev_warn(adev->dev, "limiting VRAM\n"); - mc->real_vram_size = 0xFFC0000000ULL; - mc->mc_vram_size = 0xFFC0000000ULL; - } - amdgpu_vram_location(adev, &adev->mc, base); - amdgpu_gart_location(adev, mc); + amdgpu_device_vram_location(adev, &adev->mc, base); + amdgpu_device_gart_location(adev, mc); } static void gmc_v6_0_mc_program(struct amdgpu_device *adev) @@ -283,6 +278,7 @@ static int gmc_v6_0_mc_init(struct amdgpu_device *adev) u32 tmp; int chansize, numchan; + int r; tmp = RREG32(mmMC_ARB_RAMCFG); if (tmp & (1 << 11)) { @@ -324,12 +320,17 @@ static int gmc_v6_0_mc_init(struct amdgpu_device *adev) break; } adev->mc.vram_width = numchan * chansize; - /* Could aper size report 0 ? */ - adev->mc.aper_base = pci_resource_start(adev->pdev, 0); - adev->mc.aper_size = pci_resource_len(adev->pdev, 0); /* size in MB on si */ adev->mc.mc_vram_size = RREG32(mmCONFIG_MEMSIZE) * 1024ULL * 1024ULL; adev->mc.real_vram_size = RREG32(mmCONFIG_MEMSIZE) * 1024ULL * 1024ULL; + + if (!(adev->flags & AMD_IS_APU)) { + r = amdgpu_device_resize_fb_bar(adev); + if (r) + return r; + } + adev->mc.aper_base = pci_resource_start(adev->pdev, 0); + adev->mc.aper_size = pci_resource_len(adev->pdev, 0); adev->mc.visible_vram_size = adev->mc.aper_size; /* set the gart size */ @@ -394,10 +395,10 @@ static uint64_t gmc_v6_0_get_vm_pte_flags(struct amdgpu_device *adev, return pte_flag; } -static uint64_t gmc_v6_0_get_vm_pde(struct amdgpu_device *adev, uint64_t addr) +static void gmc_v6_0_get_vm_pde(struct amdgpu_device *adev, int level, + uint64_t *addr, uint64_t *flags) { - BUG_ON(addr & 0xFFFFFF0000000FFFULL); - return addr; + BUG_ON(*addr & 0xFFFFFF0000000FFFULL); } static void gmc_v6_0_set_fault_enable_default(struct amdgpu_device *adev, @@ -831,8 +832,7 @@ static int gmc_v6_0_sw_init(void *handle) if (r) return r; - amdgpu_vm_adjust_size(adev, 64, 9); - adev->vm_manager.max_pfn = adev->vm_manager.vm_size << 18; + amdgpu_vm_adjust_size(adev, 64, 9, 1, 40); adev->mc.mc_mask = 0xffffffffffULL; @@ -877,7 +877,6 @@ static int gmc_v6_0_sw_init(void *handle) * amdkfd will use VMIDs 8-15 */ adev->vm_manager.id_mgr[0].num_ids = AMDGPU_NUM_OF_VMIDS; - adev->vm_manager.num_level = 1; amdgpu_vm_manager_init(adev); /* base offset of vram pages */ @@ -897,9 +896,9 @@ static int gmc_v6_0_sw_fini(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; + amdgpu_gem_force_release(adev); amdgpu_vm_manager_fini(adev); gmc_v6_0_gart_fini(adev); - amdgpu_gem_force_release(adev); amdgpu_bo_fini(adev); release_firmware(adev->mc.fw); adev->mc.fw = NULL; @@ -957,7 +956,7 @@ static int gmc_v6_0_resume(void *handle) if (r) return r; - amdgpu_vm_reset_all_ids(adev); + amdgpu_vmid_reset_all(adev); return 0; } diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c index b0528ca9207b..86e9d682c59e 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c @@ -67,12 +67,12 @@ static void gmc_v7_0_init_golden_registers(struct amdgpu_device *adev) { switch (adev->asic_type) { case CHIP_TOPAZ: - amdgpu_program_register_sequence(adev, - iceland_mgcg_cgcg_init, - (const u32)ARRAY_SIZE(iceland_mgcg_cgcg_init)); - amdgpu_program_register_sequence(adev, - golden_settings_iceland_a11, - (const u32)ARRAY_SIZE(golden_settings_iceland_a11)); + amdgpu_device_program_register_sequence(adev, + iceland_mgcg_cgcg_init, + ARRAY_SIZE(iceland_mgcg_cgcg_init)); + amdgpu_device_program_register_sequence(adev, + golden_settings_iceland_a11, + ARRAY_SIZE(golden_settings_iceland_a11)); break; default: break; @@ -240,14 +240,8 @@ static void gmc_v7_0_vram_gtt_location(struct amdgpu_device *adev, u64 base = RREG32(mmMC_VM_FB_LOCATION) & 0xFFFF; base <<= 24; - if (mc->mc_vram_size > 0xFFC0000000ULL) { - /* leave room for at least 1024M GTT */ - dev_warn(adev->dev, "limiting VRAM\n"); - mc->real_vram_size = 0xFFC0000000ULL; - mc->mc_vram_size = 0xFFC0000000ULL; - } - amdgpu_vram_location(adev, &adev->mc, base); - amdgpu_gart_location(adev, mc); + amdgpu_device_vram_location(adev, &adev->mc, base); + amdgpu_device_gart_location(adev, mc); } /** @@ -322,6 +316,8 @@ static void gmc_v7_0_mc_program(struct amdgpu_device *adev) */ static int gmc_v7_0_mc_init(struct amdgpu_device *adev) { + int r; + adev->mc.vram_width = amdgpu_atombios_get_vram_width(adev); if (!adev->mc.vram_width) { u32 tmp; @@ -367,13 +363,18 @@ static int gmc_v7_0_mc_init(struct amdgpu_device *adev) } adev->mc.vram_width = numchan * chansize; } - /* Could aper size report 0 ? */ - adev->mc.aper_base = pci_resource_start(adev->pdev, 0); - adev->mc.aper_size = pci_resource_len(adev->pdev, 0); /* size in MB on si */ adev->mc.mc_vram_size = RREG32(mmCONFIG_MEMSIZE) * 1024ULL * 1024ULL; adev->mc.real_vram_size = RREG32(mmCONFIG_MEMSIZE) * 1024ULL * 1024ULL; + if (!(adev->flags & AMD_IS_APU)) { + r = amdgpu_device_resize_fb_bar(adev); + if (r) + return r; + } + adev->mc.aper_base = pci_resource_start(adev->pdev, 0); + adev->mc.aper_size = pci_resource_len(adev->pdev, 0); + #ifdef CONFIG_X86_64 if (adev->flags & AMD_IS_APU) { adev->mc.aper_base = ((u64)RREG32(mmMC_VM_FB_OFFSET)) << 22; @@ -479,10 +480,10 @@ static uint64_t gmc_v7_0_get_vm_pte_flags(struct amdgpu_device *adev, return pte_flag; } -static uint64_t gmc_v7_0_get_vm_pde(struct amdgpu_device *adev, uint64_t addr) +static void gmc_v7_0_get_vm_pde(struct amdgpu_device *adev, int level, + uint64_t *addr, uint64_t *flags) { - BUG_ON(addr & 0xFFFFFF0000000FFFULL); - return addr; + BUG_ON(*addr & 0xFFFFFF0000000FFFULL); } /** @@ -970,8 +971,7 @@ static int gmc_v7_0_sw_init(void *handle) * Currently set to 4GB ((1 << 20) 4k pages). * Max GPUVM size for cayman and SI is 40 bits. */ - amdgpu_vm_adjust_size(adev, 64, 9); - adev->vm_manager.max_pfn = adev->vm_manager.vm_size << 18; + amdgpu_vm_adjust_size(adev, 64, 9, 1, 40); /* Set the internal MC address mask * This is the max address of the GPU's @@ -1026,7 +1026,6 @@ static int gmc_v7_0_sw_init(void *handle) * amdkfd will use VMIDs 8-15 */ adev->vm_manager.id_mgr[0].num_ids = AMDGPU_NUM_OF_VMIDS; - adev->vm_manager.num_level = 1; amdgpu_vm_manager_init(adev); /* base offset of vram pages */ @@ -1046,9 +1045,9 @@ static int gmc_v7_0_sw_fini(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; + amdgpu_gem_force_release(adev); amdgpu_vm_manager_fini(adev); gmc_v7_0_gart_fini(adev); - amdgpu_gem_force_release(adev); amdgpu_bo_fini(adev); release_firmware(adev->mc.fw); adev->mc.fw = NULL; @@ -1108,7 +1107,7 @@ static int gmc_v7_0_resume(void *handle) if (r) return r; - amdgpu_vm_reset_all_ids(adev); + amdgpu_vmid_reset_all(adev); return 0; } diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c index f368cfe2f585..9a813d834f1a 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c @@ -120,44 +120,44 @@ static void gmc_v8_0_init_golden_registers(struct amdgpu_device *adev) { switch (adev->asic_type) { case CHIP_FIJI: - amdgpu_program_register_sequence(adev, - fiji_mgcg_cgcg_init, - (const u32)ARRAY_SIZE(fiji_mgcg_cgcg_init)); - amdgpu_program_register_sequence(adev, - golden_settings_fiji_a10, - (const u32)ARRAY_SIZE(golden_settings_fiji_a10)); + amdgpu_device_program_register_sequence(adev, + fiji_mgcg_cgcg_init, + ARRAY_SIZE(fiji_mgcg_cgcg_init)); + amdgpu_device_program_register_sequence(adev, + golden_settings_fiji_a10, + ARRAY_SIZE(golden_settings_fiji_a10)); break; case CHIP_TONGA: - amdgpu_program_register_sequence(adev, - tonga_mgcg_cgcg_init, - (const u32)ARRAY_SIZE(tonga_mgcg_cgcg_init)); - amdgpu_program_register_sequence(adev, - golden_settings_tonga_a11, - (const u32)ARRAY_SIZE(golden_settings_tonga_a11)); + amdgpu_device_program_register_sequence(adev, + tonga_mgcg_cgcg_init, + ARRAY_SIZE(tonga_mgcg_cgcg_init)); + amdgpu_device_program_register_sequence(adev, + golden_settings_tonga_a11, + ARRAY_SIZE(golden_settings_tonga_a11)); break; case CHIP_POLARIS11: case CHIP_POLARIS12: - amdgpu_program_register_sequence(adev, - golden_settings_polaris11_a11, - (const u32)ARRAY_SIZE(golden_settings_polaris11_a11)); + amdgpu_device_program_register_sequence(adev, + golden_settings_polaris11_a11, + ARRAY_SIZE(golden_settings_polaris11_a11)); break; case CHIP_POLARIS10: - amdgpu_program_register_sequence(adev, - golden_settings_polaris10_a11, - (const u32)ARRAY_SIZE(golden_settings_polaris10_a11)); + amdgpu_device_program_register_sequence(adev, + golden_settings_polaris10_a11, + ARRAY_SIZE(golden_settings_polaris10_a11)); break; case CHIP_CARRIZO: - amdgpu_program_register_sequence(adev, - cz_mgcg_cgcg_init, - (const u32)ARRAY_SIZE(cz_mgcg_cgcg_init)); + amdgpu_device_program_register_sequence(adev, + cz_mgcg_cgcg_init, + ARRAY_SIZE(cz_mgcg_cgcg_init)); break; case CHIP_STONEY: - amdgpu_program_register_sequence(adev, - stoney_mgcg_cgcg_init, - (const u32)ARRAY_SIZE(stoney_mgcg_cgcg_init)); - amdgpu_program_register_sequence(adev, - golden_settings_stoney_common, - (const u32)ARRAY_SIZE(golden_settings_stoney_common)); + amdgpu_device_program_register_sequence(adev, + stoney_mgcg_cgcg_init, + ARRAY_SIZE(stoney_mgcg_cgcg_init)); + amdgpu_device_program_register_sequence(adev, + golden_settings_stoney_common, + ARRAY_SIZE(golden_settings_stoney_common)); break; default: break; @@ -405,14 +405,8 @@ static void gmc_v8_0_vram_gtt_location(struct amdgpu_device *adev, base = RREG32(mmMC_VM_FB_LOCATION) & 0xFFFF; base <<= 24; - if (mc->mc_vram_size > 0xFFC0000000ULL) { - /* leave room for at least 1024M GTT */ - dev_warn(adev->dev, "limiting VRAM\n"); - mc->real_vram_size = 0xFFC0000000ULL; - mc->mc_vram_size = 0xFFC0000000ULL; - } - amdgpu_vram_location(adev, &adev->mc, base); - amdgpu_gart_location(adev, mc); + amdgpu_device_vram_location(adev, &adev->mc, base); + amdgpu_device_gart_location(adev, mc); } /** @@ -498,6 +492,8 @@ static void gmc_v8_0_mc_program(struct amdgpu_device *adev) */ static int gmc_v8_0_mc_init(struct amdgpu_device *adev) { + int r; + adev->mc.vram_width = amdgpu_atombios_get_vram_width(adev); if (!adev->mc.vram_width) { u32 tmp; @@ -543,13 +539,18 @@ static int gmc_v8_0_mc_init(struct amdgpu_device *adev) } adev->mc.vram_width = numchan * chansize; } - /* Could aper size report 0 ? */ - adev->mc.aper_base = pci_resource_start(adev->pdev, 0); - adev->mc.aper_size = pci_resource_len(adev->pdev, 0); /* size in MB on si */ adev->mc.mc_vram_size = RREG32(mmCONFIG_MEMSIZE) * 1024ULL * 1024ULL; adev->mc.real_vram_size = RREG32(mmCONFIG_MEMSIZE) * 1024ULL * 1024ULL; + if (!(adev->flags & AMD_IS_APU)) { + r = amdgpu_device_resize_fb_bar(adev); + if (r) + return r; + } + adev->mc.aper_base = pci_resource_start(adev->pdev, 0); + adev->mc.aper_size = pci_resource_len(adev->pdev, 0); + #ifdef CONFIG_X86_64 if (adev->flags & AMD_IS_APU) { adev->mc.aper_base = ((u64)RREG32(mmMC_VM_FB_OFFSET)) << 22; @@ -676,10 +677,10 @@ static uint64_t gmc_v8_0_get_vm_pte_flags(struct amdgpu_device *adev, return pte_flag; } -static uint64_t gmc_v8_0_get_vm_pde(struct amdgpu_device *adev, uint64_t addr) +static void gmc_v8_0_get_vm_pde(struct amdgpu_device *adev, int level, + uint64_t *addr, uint64_t *flags) { - BUG_ON(addr & 0xFFFFFF0000000FFFULL); - return addr; + BUG_ON(*addr & 0xFFFFFF0000000FFFULL); } /** @@ -1067,8 +1068,7 @@ static int gmc_v8_0_sw_init(void *handle) * Currently set to 4GB ((1 << 20) 4k pages). * Max GPUVM size for cayman and SI is 40 bits. */ - amdgpu_vm_adjust_size(adev, 64, 9); - adev->vm_manager.max_pfn = adev->vm_manager.vm_size << 18; + amdgpu_vm_adjust_size(adev, 64, 9, 1, 40); /* Set the internal MC address mask * This is the max address of the GPU's @@ -1123,7 +1123,6 @@ static int gmc_v8_0_sw_init(void *handle) * amdkfd will use VMIDs 8-15 */ adev->vm_manager.id_mgr[0].num_ids = AMDGPU_NUM_OF_VMIDS; - adev->vm_manager.num_level = 1; amdgpu_vm_manager_init(adev); /* base offset of vram pages */ @@ -1143,9 +1142,9 @@ static int gmc_v8_0_sw_fini(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; + amdgpu_gem_force_release(adev); amdgpu_vm_manager_fini(adev); gmc_v8_0_gart_fini(adev); - amdgpu_gem_force_release(adev); amdgpu_bo_fini(adev); release_firmware(adev->mc.fw); adev->mc.fw = NULL; @@ -1213,7 +1212,7 @@ static int gmc_v8_0_resume(void *handle) if (r) return r; - amdgpu_vm_reset_all_ids(adev); + amdgpu_vmid_reset_all(adev); return 0; } diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c index c8f1aebeac7a..2719937e09d6 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c @@ -25,20 +25,19 @@ #include "gmc_v9_0.h" #include "amdgpu_atomfirmware.h" -#include "vega10/soc15ip.h" -#include "vega10/HDP/hdp_4_0_offset.h" -#include "vega10/HDP/hdp_4_0_sh_mask.h" -#include "vega10/GC/gc_9_0_sh_mask.h" -#include "vega10/DC/dce_12_0_offset.h" -#include "vega10/DC/dce_12_0_sh_mask.h" -#include "vega10/vega10_enum.h" -#include "vega10/MMHUB/mmhub_1_0_offset.h" -#include "vega10/ATHUB/athub_1_0_offset.h" - +#include "hdp/hdp_4_0_offset.h" +#include "hdp/hdp_4_0_sh_mask.h" +#include "gc/gc_9_0_sh_mask.h" +#include "dce/dce_12_0_offset.h" +#include "dce/dce_12_0_sh_mask.h" +#include "vega10_enum.h" +#include "mmhub/mmhub_1_0_offset.h" +#include "athub/athub_1_0_offset.h" + +#include "soc15.h" #include "soc15_common.h" +#include "umc/umc_6_0_sh_mask.h" -#include "nbio_v6_1.h" -#include "nbio_v7_0.h" #include "gfxhub_v1_0.h" #include "mmhub_v1_0.h" @@ -73,16 +72,131 @@ static const u32 golden_settings_vega10_hdp[] = 0xf6e, 0x0fffffff, 0x00000000, }; -static const u32 golden_settings_mmhub_1_0_0[] = +static const struct soc15_reg_golden golden_settings_mmhub_1_0_0[] = { - SOC15_REG_OFFSET(MMHUB, 0, mmDAGB1_WRCLI2), 0x00000007, 0xfe5fe0fa, - SOC15_REG_OFFSET(MMHUB, 0, mmMMEA1_DRAM_WR_CLI2GRP_MAP0), 0x00000030, 0x55555565 + SOC15_REG_GOLDEN_VALUE(MMHUB, 0, mmDAGB1_WRCLI2, 0x00000007, 0xfe5fe0fa), + SOC15_REG_GOLDEN_VALUE(MMHUB, 0, mmMMEA1_DRAM_WR_CLI2GRP_MAP0, 0x00000030, 0x55555565) }; -static const u32 golden_settings_athub_1_0_0[] = +static const struct soc15_reg_golden golden_settings_athub_1_0_0[] = { - SOC15_REG_OFFSET(ATHUB, 0, mmRPB_ARB_CNTL), 0x0000ff00, 0x00000800, - SOC15_REG_OFFSET(ATHUB, 0, mmRPB_ARB_CNTL2), 0x00ff00ff, 0x00080008 + SOC15_REG_GOLDEN_VALUE(ATHUB, 0, mmRPB_ARB_CNTL, 0x0000ff00, 0x00000800), + SOC15_REG_GOLDEN_VALUE(ATHUB, 0, mmRPB_ARB_CNTL2, 0x00ff00ff, 0x00080008) +}; + +/* Ecc related register addresses, (BASE + reg offset) */ +/* Universal Memory Controller caps (may be fused). */ +/* UMCCH:UmcLocalCap */ +#define UMCLOCALCAPS_ADDR0 (0x00014306 + 0x00000000) +#define UMCLOCALCAPS_ADDR1 (0x00014306 + 0x00000800) +#define UMCLOCALCAPS_ADDR2 (0x00014306 + 0x00001000) +#define UMCLOCALCAPS_ADDR3 (0x00014306 + 0x00001800) +#define UMCLOCALCAPS_ADDR4 (0x00054306 + 0x00000000) +#define UMCLOCALCAPS_ADDR5 (0x00054306 + 0x00000800) +#define UMCLOCALCAPS_ADDR6 (0x00054306 + 0x00001000) +#define UMCLOCALCAPS_ADDR7 (0x00054306 + 0x00001800) +#define UMCLOCALCAPS_ADDR8 (0x00094306 + 0x00000000) +#define UMCLOCALCAPS_ADDR9 (0x00094306 + 0x00000800) +#define UMCLOCALCAPS_ADDR10 (0x00094306 + 0x00001000) +#define UMCLOCALCAPS_ADDR11 (0x00094306 + 0x00001800) +#define UMCLOCALCAPS_ADDR12 (0x000d4306 + 0x00000000) +#define UMCLOCALCAPS_ADDR13 (0x000d4306 + 0x00000800) +#define UMCLOCALCAPS_ADDR14 (0x000d4306 + 0x00001000) +#define UMCLOCALCAPS_ADDR15 (0x000d4306 + 0x00001800) + +/* Universal Memory Controller Channel config. */ +/* UMCCH:UMC_CONFIG */ +#define UMCCH_UMC_CONFIG_ADDR0 (0x00014040 + 0x00000000) +#define UMCCH_UMC_CONFIG_ADDR1 (0x00014040 + 0x00000800) +#define UMCCH_UMC_CONFIG_ADDR2 (0x00014040 + 0x00001000) +#define UMCCH_UMC_CONFIG_ADDR3 (0x00014040 + 0x00001800) +#define UMCCH_UMC_CONFIG_ADDR4 (0x00054040 + 0x00000000) +#define UMCCH_UMC_CONFIG_ADDR5 (0x00054040 + 0x00000800) +#define UMCCH_UMC_CONFIG_ADDR6 (0x00054040 + 0x00001000) +#define UMCCH_UMC_CONFIG_ADDR7 (0x00054040 + 0x00001800) +#define UMCCH_UMC_CONFIG_ADDR8 (0x00094040 + 0x00000000) +#define UMCCH_UMC_CONFIG_ADDR9 (0x00094040 + 0x00000800) +#define UMCCH_UMC_CONFIG_ADDR10 (0x00094040 + 0x00001000) +#define UMCCH_UMC_CONFIG_ADDR11 (0x00094040 + 0x00001800) +#define UMCCH_UMC_CONFIG_ADDR12 (0x000d4040 + 0x00000000) +#define UMCCH_UMC_CONFIG_ADDR13 (0x000d4040 + 0x00000800) +#define UMCCH_UMC_CONFIG_ADDR14 (0x000d4040 + 0x00001000) +#define UMCCH_UMC_CONFIG_ADDR15 (0x000d4040 + 0x00001800) + +/* Universal Memory Controller Channel Ecc config. */ +/* UMCCH:EccCtrl */ +#define UMCCH_ECCCTRL_ADDR0 (0x00014053 + 0x00000000) +#define UMCCH_ECCCTRL_ADDR1 (0x00014053 + 0x00000800) +#define UMCCH_ECCCTRL_ADDR2 (0x00014053 + 0x00001000) +#define UMCCH_ECCCTRL_ADDR3 (0x00014053 + 0x00001800) +#define UMCCH_ECCCTRL_ADDR4 (0x00054053 + 0x00000000) +#define UMCCH_ECCCTRL_ADDR5 (0x00054053 + 0x00000800) +#define UMCCH_ECCCTRL_ADDR6 (0x00054053 + 0x00001000) +#define UMCCH_ECCCTRL_ADDR7 (0x00054053 + 0x00001800) +#define UMCCH_ECCCTRL_ADDR8 (0x00094053 + 0x00000000) +#define UMCCH_ECCCTRL_ADDR9 (0x00094053 + 0x00000800) +#define UMCCH_ECCCTRL_ADDR10 (0x00094053 + 0x00001000) +#define UMCCH_ECCCTRL_ADDR11 (0x00094053 + 0x00001800) +#define UMCCH_ECCCTRL_ADDR12 (0x000d4053 + 0x00000000) +#define UMCCH_ECCCTRL_ADDR13 (0x000d4053 + 0x00000800) +#define UMCCH_ECCCTRL_ADDR14 (0x000d4053 + 0x00001000) +#define UMCCH_ECCCTRL_ADDR15 (0x000d4053 + 0x00001800) + +static const uint32_t ecc_umclocalcap_addrs[] = { + UMCLOCALCAPS_ADDR0, + UMCLOCALCAPS_ADDR1, + UMCLOCALCAPS_ADDR2, + UMCLOCALCAPS_ADDR3, + UMCLOCALCAPS_ADDR4, + UMCLOCALCAPS_ADDR5, + UMCLOCALCAPS_ADDR6, + UMCLOCALCAPS_ADDR7, + UMCLOCALCAPS_ADDR8, + UMCLOCALCAPS_ADDR9, + UMCLOCALCAPS_ADDR10, + UMCLOCALCAPS_ADDR11, + UMCLOCALCAPS_ADDR12, + UMCLOCALCAPS_ADDR13, + UMCLOCALCAPS_ADDR14, + UMCLOCALCAPS_ADDR15, +}; + +static const uint32_t ecc_umcch_umc_config_addrs[] = { + UMCCH_UMC_CONFIG_ADDR0, + UMCCH_UMC_CONFIG_ADDR1, + UMCCH_UMC_CONFIG_ADDR2, + UMCCH_UMC_CONFIG_ADDR3, + UMCCH_UMC_CONFIG_ADDR4, + UMCCH_UMC_CONFIG_ADDR5, + UMCCH_UMC_CONFIG_ADDR6, + UMCCH_UMC_CONFIG_ADDR7, + UMCCH_UMC_CONFIG_ADDR8, + UMCCH_UMC_CONFIG_ADDR9, + UMCCH_UMC_CONFIG_ADDR10, + UMCCH_UMC_CONFIG_ADDR11, + UMCCH_UMC_CONFIG_ADDR12, + UMCCH_UMC_CONFIG_ADDR13, + UMCCH_UMC_CONFIG_ADDR14, + UMCCH_UMC_CONFIG_ADDR15, +}; + +static const uint32_t ecc_umcch_eccctrl_addrs[] = { + UMCCH_ECCCTRL_ADDR0, + UMCCH_ECCCTRL_ADDR1, + UMCCH_ECCCTRL_ADDR2, + UMCCH_ECCCTRL_ADDR3, + UMCCH_ECCCTRL_ADDR4, + UMCCH_ECCCTRL_ADDR5, + UMCCH_ECCCTRL_ADDR6, + UMCCH_ECCCTRL_ADDR7, + UMCCH_ECCCTRL_ADDR8, + UMCCH_ECCCTRL_ADDR9, + UMCCH_ECCCTRL_ADDR10, + UMCCH_ECCCTRL_ADDR11, + UMCCH_ECCCTRL_ADDR12, + UMCCH_ECCCTRL_ADDR13, + UMCCH_ECCCTRL_ADDR14, + UMCCH_ECCCTRL_ADDR15, }; static int gmc_v9_0_vm_fault_interrupt_state(struct amdgpu_device *adev, @@ -134,7 +248,7 @@ static int gmc_v9_0_process_interrupt(struct amdgpu_device *adev, struct amdgpu_irq_src *source, struct amdgpu_iv_entry *entry) { - struct amdgpu_vmhub *hub = &adev->vmhub[entry->vm_id_src]; + struct amdgpu_vmhub *hub = &adev->vmhub[entry->vmid_src]; uint32_t status = 0; u64 addr; @@ -148,9 +262,9 @@ static int gmc_v9_0_process_interrupt(struct amdgpu_device *adev, if (printk_ratelimit()) { dev_err(adev->dev, - "[%s] VMC page fault (src_id:%u ring:%u vm_id:%u pas_id:%u)\n", - entry->vm_id_src ? "mmhub" : "gfxhub", - entry->src_id, entry->ring_id, entry->vm_id, + "[%s] VMC page fault (src_id:%u ring:%u vmid:%u pas_id:%u)\n", + entry->vmid_src ? "mmhub" : "gfxhub", + entry->src_id, entry->ring_id, entry->vmid, entry->pas_id); dev_err(adev->dev, " at page 0x%016llx from %d\n", addr, entry->client_id); @@ -174,13 +288,13 @@ static void gmc_v9_0_set_irq_funcs(struct amdgpu_device *adev) adev->mc.vm_fault.funcs = &gmc_v9_0_irq_funcs; } -static uint32_t gmc_v9_0_get_invalidate_req(unsigned int vm_id) +static uint32_t gmc_v9_0_get_invalidate_req(unsigned int vmid) { u32 req = 0; - /* invalidate using legacy mode on vm_id*/ + /* invalidate using legacy mode on vmid*/ req = REG_SET_FIELD(req, VM_INVALIDATE_ENG0_REQ, - PER_VMID_INVALIDATE_REQ, 1 << vm_id); + PER_VMID_INVALIDATE_REQ, 1 << vmid); req = REG_SET_FIELD(req, VM_INVALIDATE_ENG0_REQ, FLUSH_TYPE, 0); req = REG_SET_FIELD(req, VM_INVALIDATE_ENG0_REQ, INVALIDATE_L2_PTES, 1); req = REG_SET_FIELD(req, VM_INVALIDATE_ENG0_REQ, INVALIDATE_L2_PDE0, 1); @@ -216,10 +330,7 @@ static void gmc_v9_0_gart_flush_gpu_tlb(struct amdgpu_device *adev, unsigned i, j; /* flush hdp cache */ - if (adev->flags & AMD_IS_APU) - nbio_v7_0_hdp_flush(adev); - else - nbio_v6_1_hdp_flush(adev); + adev->nbio_funcs->hdp_flush(adev); spin_lock(&adev->mc.invalidate_lock); @@ -358,11 +469,28 @@ static uint64_t gmc_v9_0_get_vm_pte_flags(struct amdgpu_device *adev, return pte_flag; } -static u64 gmc_v9_0_get_vm_pde(struct amdgpu_device *adev, u64 addr) +static void gmc_v9_0_get_vm_pde(struct amdgpu_device *adev, int level, + uint64_t *addr, uint64_t *flags) { - addr = adev->vm_manager.vram_base_offset + addr - adev->mc.vram_start; - BUG_ON(addr & 0xFFFF00000000003FULL); - return addr; + if (!(*flags & AMDGPU_PDE_PTE)) + *addr = adev->vm_manager.vram_base_offset + *addr - + adev->mc.vram_start; + BUG_ON(*addr & 0xFFFF00000000003FULL); + + if (!adev->mc.translate_further) + return; + + if (level == AMDGPU_VM_PDB1) { + /* Set the block fragment size */ + if (!(*flags & AMDGPU_PDE_PTE)) + *flags |= AMDGPU_PDE_BFS(0x9); + + } else if (level == AMDGPU_VM_PDB0) { + if (*flags & AMDGPU_PDE_PTE) + *flags &= ~AMDGPU_PDE_PTE; + else + *flags |= AMDGPU_PTE_TF; + } } static const struct amdgpu_gart_funcs gmc_v9_0_gart_funcs = { @@ -386,9 +514,96 @@ static int gmc_v9_0_early_init(void *handle) gmc_v9_0_set_gart_funcs(adev); gmc_v9_0_set_irq_funcs(adev); + adev->mc.shared_aperture_start = 0x2000000000000000ULL; + adev->mc.shared_aperture_end = + adev->mc.shared_aperture_start + (4ULL << 30) - 1; + adev->mc.private_aperture_start = + adev->mc.shared_aperture_end + 1; + adev->mc.private_aperture_end = + adev->mc.private_aperture_start + (4ULL << 30) - 1; + return 0; } +static int gmc_v9_0_ecc_available(struct amdgpu_device *adev) +{ + uint32_t reg_val; + uint32_t reg_addr; + uint32_t field_val; + size_t i; + uint32_t fv2; + size_t lost_sheep; + + DRM_DEBUG("ecc: gmc_v9_0_ecc_available()\n"); + + lost_sheep = 0; + for (i = 0; i < ARRAY_SIZE(ecc_umclocalcap_addrs); ++i) { + reg_addr = ecc_umclocalcap_addrs[i]; + DRM_DEBUG("ecc: " + "UMCCH_UmcLocalCap[%zu]: reg_addr: 0x%08x\n", + i, reg_addr); + reg_val = RREG32(reg_addr); + field_val = REG_GET_FIELD(reg_val, UMCCH0_0_UmcLocalCap, + EccDis); + DRM_DEBUG("ecc: " + "reg_val: 0x%08x, " + "EccDis: 0x%08x, ", + reg_val, field_val); + if (field_val) { + DRM_ERROR("ecc: UmcLocalCap:EccDis is set.\n"); + ++lost_sheep; + } + } + + for (i = 0; i < ARRAY_SIZE(ecc_umcch_umc_config_addrs); ++i) { + reg_addr = ecc_umcch_umc_config_addrs[i]; + DRM_DEBUG("ecc: " + "UMCCH0_0_UMC_CONFIG[%zu]: reg_addr: 0x%08x", + i, reg_addr); + reg_val = RREG32(reg_addr); + field_val = REG_GET_FIELD(reg_val, UMCCH0_0_UMC_CONFIG, + DramReady); + DRM_DEBUG("ecc: " + "reg_val: 0x%08x, " + "DramReady: 0x%08x\n", + reg_val, field_val); + + if (!field_val) { + DRM_ERROR("ecc: UMC_CONFIG:DramReady is not set.\n"); + ++lost_sheep; + } + } + + for (i = 0; i < ARRAY_SIZE(ecc_umcch_eccctrl_addrs); ++i) { + reg_addr = ecc_umcch_eccctrl_addrs[i]; + DRM_DEBUG("ecc: " + "UMCCH_EccCtrl[%zu]: reg_addr: 0x%08x, ", + i, reg_addr); + reg_val = RREG32(reg_addr); + field_val = REG_GET_FIELD(reg_val, UMCCH0_0_EccCtrl, + WrEccEn); + fv2 = REG_GET_FIELD(reg_val, UMCCH0_0_EccCtrl, + RdEccEn); + DRM_DEBUG("ecc: " + "reg_val: 0x%08x, " + "WrEccEn: 0x%08x, " + "RdEccEn: 0x%08x\n", + reg_val, field_val, fv2); + + if (!field_val) { + DRM_DEBUG("ecc: WrEccEn is not set\n"); + ++lost_sheep; + } + if (!fv2) { + DRM_DEBUG("ecc: RdEccEn is not set\n"); + ++lost_sheep; + } + } + + DRM_DEBUG("ecc: lost_sheep: %zu\n", lost_sheep); + return lost_sheep == 0; +} + static int gmc_v9_0_late_init(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; @@ -403,6 +618,7 @@ static int gmc_v9_0_late_init(void *handle) */ unsigned vm_inv_eng[AMDGPU_MAX_VMHUBS] = { 4, 4 }; unsigned i; + int r; for(i = 0; i < adev->num_rings; ++i) { struct amdgpu_ring *ring = adev->rings[i]; @@ -418,6 +634,18 @@ static int gmc_v9_0_late_init(void *handle) for(i = 0; i < AMDGPU_MAX_VMHUBS; ++i) BUG_ON(vm_inv_eng[i] > 16); + if (adev->asic_type == CHIP_VEGA10) { + r = gmc_v9_0_ecc_available(adev); + if (r == 1) { + DRM_INFO("ECC is active.\n"); + } else if (r == 0) { + DRM_INFO("ECC is not present.\n"); + } else { + DRM_ERROR("gmc_v9_0_ecc_available() failed. r: %d\n", r); + return r; + } + } + return amdgpu_irq_get(adev, &adev->mc.vm_fault, 0); } @@ -427,8 +655,8 @@ static void gmc_v9_0_vram_gtt_location(struct amdgpu_device *adev, u64 base = 0; if (!amdgpu_sriov_vf(adev)) base = mmhub_v1_0_get_fb_location(adev); - amdgpu_vram_location(adev, &adev->mc, base); - amdgpu_gart_location(adev, mc); + amdgpu_device_vram_location(adev, &adev->mc, base); + amdgpu_device_gart_location(adev, mc); /* base offset of vram pages */ if (adev->flags & AMD_IS_APU) adev->vm_manager.vram_base_offset = gfxhub_v1_0_get_mc_fb_offset(adev); @@ -449,6 +677,7 @@ static int gmc_v9_0_mc_init(struct amdgpu_device *adev) { u32 tmp; int chansize, numchan; + int r; adev->mc.vram_width = amdgpu_atomfirmware_get_vram_width(adev); if (!adev->mc.vram_width) { @@ -491,17 +720,21 @@ static int gmc_v9_0_mc_init(struct amdgpu_device *adev) adev->mc.vram_width = numchan * chansize; } - /* Could aper size report 0 ? */ - adev->mc.aper_base = pci_resource_start(adev->pdev, 0); - adev->mc.aper_size = pci_resource_len(adev->pdev, 0); /* size in MB on si */ adev->mc.mc_vram_size = - ((adev->flags & AMD_IS_APU) ? nbio_v7_0_get_memsize(adev) : - nbio_v6_1_get_memsize(adev)) * 1024ULL * 1024ULL; + adev->nbio_funcs->get_memsize(adev) * 1024ULL * 1024ULL; adev->mc.real_vram_size = adev->mc.mc_vram_size; - adev->mc.visible_vram_size = adev->mc.aper_size; + + if (!(adev->flags & AMD_IS_APU)) { + r = amdgpu_device_resize_fb_bar(adev); + if (r) + return r; + } + adev->mc.aper_base = pci_resource_start(adev->pdev, 0); + adev->mc.aper_size = pci_resource_len(adev->pdev, 0); /* In case the PCI BAR is larger than the actual amount of vram */ + adev->mc.visible_vram_size = adev->mc.aper_size; if (adev->mc.visible_vram_size > adev->mc.real_vram_size) adev->mc.visible_vram_size = adev->mc.real_vram_size; @@ -558,14 +791,12 @@ static int gmc_v9_0_sw_init(void *handle) case CHIP_RAVEN: adev->mc.vram_type = AMDGPU_VRAM_TYPE_UNKNOWN; if (adev->rev_id == 0x0 || adev->rev_id == 0x1) { - adev->vm_manager.vm_size = 1U << 18; - adev->vm_manager.block_size = 9; - adev->vm_manager.num_level = 3; - amdgpu_vm_set_fragment_size(adev, 9); + amdgpu_vm_adjust_size(adev, 256 * 1024, 9, 3, 48); } else { - /* vm_size is 64GB for legacy 2-level page support */ - amdgpu_vm_adjust_size(adev, 64, 9); - adev->vm_manager.num_level = 1; + /* vm_size is 128TB + 512GB for legacy 3-level page support */ + amdgpu_vm_adjust_size(adev, 128 * 1024 + 512, 9, 2, 48); + adev->mc.translate_further = + adev->vm_manager.num_level > 1; } break; case CHIP_VEGA10: @@ -576,20 +807,12 @@ static int gmc_v9_0_sw_init(void *handle) * vm size is 256TB (48bit), maximum size of Vega10, * block size 512 (9bit) */ - adev->vm_manager.vm_size = 1U << 18; - adev->vm_manager.block_size = 9; - adev->vm_manager.num_level = 3; - amdgpu_vm_set_fragment_size(adev, 9); + amdgpu_vm_adjust_size(adev, 256 * 1024, 9, 3, 48); break; default: break; } - DRM_INFO("vm size is %llu GB, block size is %u-bit,fragment size is %u-bit\n", - adev->vm_manager.vm_size, - adev->vm_manager.block_size, - adev->vm_manager.fragment_size); - /* This interrupt is VMC page fault.*/ r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_VMC, 0, &adev->mc.vm_fault); @@ -599,8 +822,6 @@ static int gmc_v9_0_sw_init(void *handle) if (r) return r; - adev->vm_manager.max_pfn = adev->vm_manager.vm_size << 18; - /* Set the internal MC address mask * This is the max address of the GPU's * internal address space. @@ -660,7 +881,7 @@ static int gmc_v9_0_sw_init(void *handle) } /** - * gmc_v8_0_gart_fini - vm fini callback + * gmc_v9_0_gart_fini - vm fini callback * * @adev: amdgpu_device pointer * @@ -676,9 +897,9 @@ static int gmc_v9_0_sw_fini(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; + amdgpu_gem_force_release(adev); amdgpu_vm_manager_fini(adev); gmc_v9_0_gart_fini(adev); - amdgpu_gem_force_release(adev); amdgpu_bo_fini(adev); return 0; @@ -686,19 +907,20 @@ static int gmc_v9_0_sw_fini(void *handle) static void gmc_v9_0_init_golden_registers(struct amdgpu_device *adev) { + switch (adev->asic_type) { case CHIP_VEGA10: - amdgpu_program_register_sequence(adev, + soc15_program_register_sequence(adev, golden_settings_mmhub_1_0_0, - (const u32)ARRAY_SIZE(golden_settings_mmhub_1_0_0)); - amdgpu_program_register_sequence(adev, + ARRAY_SIZE(golden_settings_mmhub_1_0_0)); + soc15_program_register_sequence(adev, golden_settings_athub_1_0_0, - (const u32)ARRAY_SIZE(golden_settings_athub_1_0_0)); + ARRAY_SIZE(golden_settings_athub_1_0_0)); break; case CHIP_RAVEN: - amdgpu_program_register_sequence(adev, + soc15_program_register_sequence(adev, golden_settings_athub_1_0_0, - (const u32)ARRAY_SIZE(golden_settings_athub_1_0_0)); + ARRAY_SIZE(golden_settings_athub_1_0_0)); break; default: break; @@ -716,9 +938,9 @@ static int gmc_v9_0_gart_enable(struct amdgpu_device *adev) bool value; u32 tmp; - amdgpu_program_register_sequence(adev, - golden_settings_vega10_hdp, - (const u32)ARRAY_SIZE(golden_settings_vega10_hdp)); + amdgpu_device_program_register_sequence(adev, + golden_settings_vega10_hdp, + ARRAY_SIZE(golden_settings_vega10_hdp)); if (adev->gart.robj == NULL) { dev_err(adev->dev, "No VRAM object for PCIE GART.\n"); @@ -751,10 +973,7 @@ static int gmc_v9_0_gart_enable(struct amdgpu_device *adev) WREG32_SOC15(HDP, 0, mmHDP_HOST_PATH_CNTL, tmp); /* After HDP is initialized, flush HDP.*/ - if (adev->flags & AMD_IS_APU) - nbio_v7_0_hdp_flush(adev); - else - nbio_v6_1_hdp_flush(adev); + adev->nbio_funcs->hdp_flush(adev); if (amdgpu_vm_fault_stop == AMDGPU_VM_FAULT_STOP_ALWAYS) value = false; @@ -839,7 +1058,7 @@ static int gmc_v9_0_resume(void *handle) if (r) return r; - amdgpu_vm_reset_all_ids(adev); + amdgpu_vmid_reset_all(adev); return 0; } diff --git a/drivers/gpu/drm/amd/amdgpu/iceland_ih.c b/drivers/gpu/drm/amd/amdgpu/iceland_ih.c index bd592cb39f37..c4e4be3dd31d 100644 --- a/drivers/gpu/drm/amd/amdgpu/iceland_ih.c +++ b/drivers/gpu/drm/amd/amdgpu/iceland_ih.c @@ -259,7 +259,7 @@ static void iceland_ih_decode_iv(struct amdgpu_device *adev, entry->src_id = dw[0] & 0xff; entry->src_data[0] = dw[1] & 0xfffffff; entry->ring_id = dw[2] & 0xff; - entry->vm_id = (dw[2] >> 8) & 0xff; + entry->vmid = (dw[2] >> 8) & 0xff; entry->pas_id = (dw[2] >> 16) & 0xffff; /* wptr/rptr are in bytes! */ diff --git a/drivers/gpu/drm/amd/amdgpu/kv_dpm.c b/drivers/gpu/drm/amd/amdgpu/kv_dpm.c index f33d1ffdb20b..d9e9e52a0def 100644 --- a/drivers/gpu/drm/amd/amdgpu/kv_dpm.c +++ b/drivers/gpu/drm/amd/amdgpu/kv_dpm.c @@ -1682,8 +1682,8 @@ static void kv_dpm_powergate_uvd(void *handle, bool gate) if (gate) { /* stop the UVD block */ - ret = amdgpu_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_UVD, - AMD_PG_STATE_GATE); + ret = amdgpu_device_ip_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_UVD, + AMD_PG_STATE_GATE); kv_update_uvd_dpm(adev, gate); if (pi->caps_uvd_pg) /* power off the UVD block */ @@ -1695,8 +1695,8 @@ static void kv_dpm_powergate_uvd(void *handle, bool gate) /* re-init the UVD block */ kv_update_uvd_dpm(adev, gate); - ret = amdgpu_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_UVD, - AMD_PG_STATE_UNGATE); + ret = amdgpu_device_ip_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_UVD, + AMD_PG_STATE_UNGATE); } } diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c index cc21c4bdec27..ffd5b7ee49c4 100644 --- a/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c +++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c @@ -23,14 +23,12 @@ #include "amdgpu.h" #include "mmhub_v1_0.h" -#include "vega10/soc15ip.h" -#include "vega10/MMHUB/mmhub_1_0_offset.h" -#include "vega10/MMHUB/mmhub_1_0_sh_mask.h" -#include "vega10/MMHUB/mmhub_1_0_default.h" -#include "vega10/ATHUB/athub_1_0_offset.h" -#include "vega10/ATHUB/athub_1_0_sh_mask.h" -#include "vega10/ATHUB/athub_1_0_default.h" -#include "vega10/vega10_enum.h" +#include "mmhub/mmhub_1_0_offset.h" +#include "mmhub/mmhub_1_0_sh_mask.h" +#include "mmhub/mmhub_1_0_default.h" +#include "athub/athub_1_0_offset.h" +#include "athub/athub_1_0_sh_mask.h" +#include "vega10_enum.h" #include "soc15_common.h" @@ -157,10 +155,15 @@ static void mmhub_v1_0_init_cache_regs(struct amdgpu_device *adev) tmp = REG_SET_FIELD(tmp, VM_L2_CNTL2, INVALIDATE_L2_CACHE, 1); WREG32_SOC15(MMHUB, 0, mmVM_L2_CNTL2, tmp); - tmp = mmVM_L2_CNTL3_DEFAULT; - tmp = REG_SET_FIELD(tmp, VM_L2_CNTL3, BANK_SELECT, 9); - tmp = REG_SET_FIELD(tmp, VM_L2_CNTL3, L2_CACHE_BIGK_FRAGMENT_SIZE, 6); - WREG32_SOC15(MMHUB, 0, mmVM_L2_CNTL3, tmp); + if (adev->mc.translate_further) { + tmp = REG_SET_FIELD(tmp, VM_L2_CNTL3, BANK_SELECT, 12); + tmp = REG_SET_FIELD(tmp, VM_L2_CNTL3, + L2_CACHE_BIGK_FRAGMENT_SIZE, 9); + } else { + tmp = REG_SET_FIELD(tmp, VM_L2_CNTL3, BANK_SELECT, 9); + tmp = REG_SET_FIELD(tmp, VM_L2_CNTL3, + L2_CACHE_BIGK_FRAGMENT_SIZE, 6); + } tmp = mmVM_L2_CNTL4_DEFAULT; tmp = REG_SET_FIELD(tmp, VM_L2_CNTL4, VMC_TAP_PDE_REQUEST_PHYSICAL, 0); @@ -198,32 +201,40 @@ static void mmhub_v1_0_disable_identity_aperture(struct amdgpu_device *adev) static void mmhub_v1_0_setup_vmid_config(struct amdgpu_device *adev) { - int i; + unsigned num_level, block_size; uint32_t tmp; + int i; + + num_level = adev->vm_manager.num_level; + block_size = adev->vm_manager.block_size; + if (adev->mc.translate_further) + num_level -= 1; + else + block_size -= 9; for (i = 0; i <= 14; i++) { tmp = RREG32_SOC15_OFFSET(MMHUB, 0, mmVM_CONTEXT1_CNTL, i); + tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL, ENABLE_CONTEXT, 1); + tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL, PAGE_TABLE_DEPTH, + num_level); tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL, - ENABLE_CONTEXT, 1); - tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL, - PAGE_TABLE_DEPTH, adev->vm_manager.num_level); - tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL, - RANGE_PROTECTION_FAULT_ENABLE_DEFAULT, 1); + RANGE_PROTECTION_FAULT_ENABLE_DEFAULT, 1); tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL, - DUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT, 1); + DUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT, + 1); tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL, - PDE0_PROTECTION_FAULT_ENABLE_DEFAULT, 1); + PDE0_PROTECTION_FAULT_ENABLE_DEFAULT, 1); tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL, - VALID_PROTECTION_FAULT_ENABLE_DEFAULT, 1); + VALID_PROTECTION_FAULT_ENABLE_DEFAULT, 1); tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL, - READ_PROTECTION_FAULT_ENABLE_DEFAULT, 1); + READ_PROTECTION_FAULT_ENABLE_DEFAULT, 1); tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL, - WRITE_PROTECTION_FAULT_ENABLE_DEFAULT, 1); + WRITE_PROTECTION_FAULT_ENABLE_DEFAULT, 1); tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL, - EXECUTE_PROTECTION_FAULT_ENABLE_DEFAULT, 1); + EXECUTE_PROTECTION_FAULT_ENABLE_DEFAULT, 1); tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL, - PAGE_TABLE_BLOCK_SIZE, - adev->vm_manager.block_size - 9); + PAGE_TABLE_BLOCK_SIZE, + block_size); /* Send no-retry XNACK on fault to suppress VM fault storm. */ tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL, RETRY_PERMISSION_OR_INVALID_PAGE_FAULT, 0); diff --git a/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c b/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c index b4906d2f30d3..271452d3999a 100644 --- a/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c +++ b/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c @@ -22,11 +22,10 @@ */ #include "amdgpu.h" -#include "vega10/soc15ip.h" -#include "vega10/NBIO/nbio_6_1_offset.h" -#include "vega10/NBIO/nbio_6_1_sh_mask.h" -#include "vega10/GC/gc_9_0_offset.h" -#include "vega10/GC/gc_9_0_sh_mask.h" +#include "nbio/nbio_6_1_offset.h" +#include "nbio/nbio_6_1_sh_mask.h" +#include "gc/gc_9_0_offset.h" +#include "gc/gc_9_0_sh_mask.h" #include "soc15.h" #include "vega10_ih.h" #include "soc15_common.h" @@ -254,7 +253,7 @@ static void xgpu_ai_mailbox_flr_work(struct work_struct *work) } /* Trigger recovery due to world switch failure */ - amdgpu_sriov_gpu_reset(adev, NULL); + amdgpu_device_gpu_recover(adev, NULL, false); } static int xgpu_ai_set_mailbox_rcv_irq(struct amdgpu_device *adev, @@ -278,13 +277,21 @@ static int xgpu_ai_mailbox_rcv_irq(struct amdgpu_device *adev, int r; /* trigger gpu-reset by hypervisor only if TDR disbaled */ - if (amdgpu_lockup_timeout == 0) { + if (!amdgpu_gpu_recovery) { /* see what event we get */ r = xgpu_ai_mailbox_rcv_msg(adev, IDH_FLR_NOTIFICATION); - /* only handle FLR_NOTIFY now */ - if (!r) - schedule_work(&adev->virt.flr_work); + /* sometimes the interrupt is delayed to inject to VM, so under such case + * the IDH_FLR_NOTIFICATION is overwritten by VF FLR from GIM side, thus + * above recieve message could be failed, we should schedule the flr_work + * anyway + */ + if (r) { + DRM_ERROR("FLR_NOTIFICATION is missed\n"); + xgpu_ai_mailbox_send_ack(adev); + } + + schedule_work(&adev->virt.flr_work); } return 0; @@ -353,5 +360,6 @@ const struct amdgpu_virt_ops xgpu_ai_virt_ops = { .req_full_gpu = xgpu_ai_request_full_gpu_access, .rel_full_gpu = xgpu_ai_release_full_gpu_access, .reset_gpu = xgpu_ai_request_reset, + .wait_reset = NULL, .trans_msg = xgpu_ai_mailbox_trans_msg, }; diff --git a/drivers/gpu/drm/amd/amdgpu/mxgpu_vi.c b/drivers/gpu/drm/amd/amdgpu/mxgpu_vi.c index c25a831f94ec..9fc1c37344ce 100644 --- a/drivers/gpu/drm/amd/amdgpu/mxgpu_vi.c +++ b/drivers/gpu/drm/amd/amdgpu/mxgpu_vi.c @@ -279,32 +279,32 @@ void xgpu_vi_init_golden_registers(struct amdgpu_device *adev) { switch (adev->asic_type) { case CHIP_FIJI: - amdgpu_program_register_sequence(adev, - xgpu_fiji_mgcg_cgcg_init, - (const u32)ARRAY_SIZE( - xgpu_fiji_mgcg_cgcg_init)); - amdgpu_program_register_sequence(adev, - xgpu_fiji_golden_settings_a10, - (const u32)ARRAY_SIZE( - xgpu_fiji_golden_settings_a10)); - amdgpu_program_register_sequence(adev, - xgpu_fiji_golden_common_all, - (const u32)ARRAY_SIZE( - xgpu_fiji_golden_common_all)); + amdgpu_device_program_register_sequence(adev, + xgpu_fiji_mgcg_cgcg_init, + ARRAY_SIZE( + xgpu_fiji_mgcg_cgcg_init)); + amdgpu_device_program_register_sequence(adev, + xgpu_fiji_golden_settings_a10, + ARRAY_SIZE( + xgpu_fiji_golden_settings_a10)); + amdgpu_device_program_register_sequence(adev, + xgpu_fiji_golden_common_all, + ARRAY_SIZE( + xgpu_fiji_golden_common_all)); break; case CHIP_TONGA: - amdgpu_program_register_sequence(adev, - xgpu_tonga_mgcg_cgcg_init, - (const u32)ARRAY_SIZE( - xgpu_tonga_mgcg_cgcg_init)); - amdgpu_program_register_sequence(adev, - xgpu_tonga_golden_settings_a11, - (const u32)ARRAY_SIZE( - xgpu_tonga_golden_settings_a11)); - amdgpu_program_register_sequence(adev, - xgpu_tonga_golden_common_all, - (const u32)ARRAY_SIZE( - xgpu_tonga_golden_common_all)); + amdgpu_device_program_register_sequence(adev, + xgpu_tonga_mgcg_cgcg_init, + ARRAY_SIZE( + xgpu_tonga_mgcg_cgcg_init)); + amdgpu_device_program_register_sequence(adev, + xgpu_tonga_golden_settings_a11, + ARRAY_SIZE( + xgpu_tonga_golden_settings_a11)); + amdgpu_device_program_register_sequence(adev, + xgpu_tonga_golden_common_all, + ARRAY_SIZE( + xgpu_tonga_golden_common_all)); break; default: BUG_ON("Doesn't support chip type.\n"); @@ -446,8 +446,10 @@ static int xgpu_vi_send_access_requests(struct amdgpu_device *adev, request == IDH_REQ_GPU_FINI_ACCESS || request == IDH_REQ_GPU_RESET_ACCESS) { r = xgpu_vi_poll_msg(adev, IDH_READY_TO_ACCESS_GPU); - if (r) - pr_err("Doesn't get ack from pf, continue\n"); + if (r) { + pr_err("Doesn't get ack from pf, give up\n"); + return r; + } } return 0; @@ -458,6 +460,11 @@ static int xgpu_vi_request_reset(struct amdgpu_device *adev) return xgpu_vi_send_access_requests(adev, IDH_REQ_GPU_RESET_ACCESS); } +static int xgpu_vi_wait_reset_cmpl(struct amdgpu_device *adev) +{ + return xgpu_vi_poll_msg(adev, IDH_FLR_NOTIFICATION_CMPL); +} + static int xgpu_vi_request_full_gpu_access(struct amdgpu_device *adev, bool init) { @@ -514,7 +521,7 @@ static void xgpu_vi_mailbox_flr_work(struct work_struct *work) } /* Trigger recovery due to world switch failure */ - amdgpu_sriov_gpu_reset(adev, NULL); + amdgpu_device_gpu_recover(adev, NULL, false); } static int xgpu_vi_set_mailbox_rcv_irq(struct amdgpu_device *adev, @@ -538,7 +545,7 @@ static int xgpu_vi_mailbox_rcv_irq(struct amdgpu_device *adev, int r; /* trigger gpu-reset by hypervisor only if TDR disbaled */ - if (amdgpu_lockup_timeout == 0) { + if (!amdgpu_gpu_recovery) { /* see what event we get */ r = xgpu_vi_mailbox_rcv_msg(adev, IDH_FLR_NOTIFICATION); @@ -613,5 +620,6 @@ const struct amdgpu_virt_ops xgpu_vi_virt_ops = { .req_full_gpu = xgpu_vi_request_full_gpu_access, .rel_full_gpu = xgpu_vi_release_full_gpu_access, .reset_gpu = xgpu_vi_request_reset, + .wait_reset = xgpu_vi_wait_reset_cmpl, .trans_msg = NULL, /* Does not need to trans VF errors to host. */ }; diff --git a/drivers/gpu/drm/amd/amdgpu/nbio_v6_1.c b/drivers/gpu/drm/amd/amdgpu/nbio_v6_1.c index 904a1bab9b9f..d4da663d5eb0 100644 --- a/drivers/gpu/drm/amd/amdgpu/nbio_v6_1.c +++ b/drivers/gpu/drm/amd/amdgpu/nbio_v6_1.c @@ -24,17 +24,16 @@ #include "amdgpu_atombios.h" #include "nbio_v6_1.h" -#include "vega10/soc15ip.h" -#include "vega10/NBIO/nbio_6_1_default.h" -#include "vega10/NBIO/nbio_6_1_offset.h" -#include "vega10/NBIO/nbio_6_1_sh_mask.h" -#include "vega10/vega10_enum.h" +#include "nbio/nbio_6_1_default.h" +#include "nbio/nbio_6_1_offset.h" +#include "nbio/nbio_6_1_sh_mask.h" +#include "vega10_enum.h" #define smnCPM_CONTROL 0x11180460 #define smnPCIE_CNTL2 0x11180070 #define smnPCIE_CONFIG_CNTL 0x11180044 -u32 nbio_v6_1_get_rev_id(struct amdgpu_device *adev) +static u32 nbio_v6_1_get_rev_id(struct amdgpu_device *adev) { u32 tmp = RREG32_SOC15(NBIO, 0, mmRCC_DEV0_EPF0_STRAP0); @@ -44,19 +43,7 @@ u32 nbio_v6_1_get_rev_id(struct amdgpu_device *adev) return tmp; } -u32 nbio_v6_1_get_atombios_scratch_regs(struct amdgpu_device *adev, - uint32_t idx) -{ - return RREG32_SOC15_OFFSET(NBIO, 0, mmBIOS_SCRATCH_0, idx); -} - -void nbio_v6_1_set_atombios_scratch_regs(struct amdgpu_device *adev, - uint32_t idx, uint32_t val) -{ - WREG32_SOC15_OFFSET(NBIO, 0, mmBIOS_SCRATCH_0, idx, val); -} - -void nbio_v6_1_mc_access_enable(struct amdgpu_device *adev, bool enable) +static void nbio_v6_1_mc_access_enable(struct amdgpu_device *adev, bool enable) { if (enable) WREG32_SOC15(NBIO, 0, mmBIF_FB_EN, @@ -66,26 +53,23 @@ void nbio_v6_1_mc_access_enable(struct amdgpu_device *adev, bool enable) WREG32_SOC15(NBIO, 0, mmBIF_FB_EN, 0); } -void nbio_v6_1_hdp_flush(struct amdgpu_device *adev) +static void nbio_v6_1_hdp_flush(struct amdgpu_device *adev) { WREG32_SOC15_NO_KIQ(NBIO, 0, mmBIF_BX_PF0_HDP_MEM_COHERENCY_FLUSH_CNTL, 0); } -u32 nbio_v6_1_get_memsize(struct amdgpu_device *adev) +static u32 nbio_v6_1_get_memsize(struct amdgpu_device *adev) { return RREG32_SOC15(NBIO, 0, mmRCC_PF_0_0_RCC_CONFIG_MEMSIZE); } -static const u32 nbio_sdma_doorbell_range_reg[] = -{ - SOC15_REG_OFFSET(NBIO, 0, mmBIF_SDMA0_DOORBELL_RANGE), - SOC15_REG_OFFSET(NBIO, 0, mmBIF_SDMA1_DOORBELL_RANGE) -}; - -void nbio_v6_1_sdma_doorbell_range(struct amdgpu_device *adev, int instance, +static void nbio_v6_1_sdma_doorbell_range(struct amdgpu_device *adev, int instance, bool use_doorbell, int doorbell_index) { - u32 doorbell_range = RREG32(nbio_sdma_doorbell_range_reg[instance]); + u32 reg = instance == 0 ? SOC15_REG_OFFSET(NBIO, 0, mmBIF_SDMA0_DOORBELL_RANGE) : + SOC15_REG_OFFSET(NBIO, 0, mmBIF_SDMA1_DOORBELL_RANGE); + + u32 doorbell_range = RREG32(reg); if (use_doorbell) { doorbell_range = REG_SET_FIELD(doorbell_range, BIF_SDMA0_DOORBELL_RANGE, OFFSET, doorbell_index); @@ -93,17 +77,18 @@ void nbio_v6_1_sdma_doorbell_range(struct amdgpu_device *adev, int instance, } else doorbell_range = REG_SET_FIELD(doorbell_range, BIF_SDMA0_DOORBELL_RANGE, SIZE, 0); - WREG32(nbio_sdma_doorbell_range_reg[instance], doorbell_range); + WREG32(reg, doorbell_range); + } -void nbio_v6_1_enable_doorbell_aperture(struct amdgpu_device *adev, - bool enable) +static void nbio_v6_1_enable_doorbell_aperture(struct amdgpu_device *adev, + bool enable) { WREG32_FIELD15(NBIO, 0, RCC_PF_0_0_RCC_DOORBELL_APER_EN, BIF_DOORBELL_APER_EN, enable ? 1 : 0); } -void nbio_v6_1_enable_doorbell_selfring_aperture(struct amdgpu_device *adev, - bool enable) +static void nbio_v6_1_enable_doorbell_selfring_aperture(struct amdgpu_device *adev, + bool enable) { u32 tmp = 0; @@ -122,8 +107,8 @@ void nbio_v6_1_enable_doorbell_selfring_aperture(struct amdgpu_device *adev, } -void nbio_v6_1_ih_doorbell_range(struct amdgpu_device *adev, - bool use_doorbell, int doorbell_index) +static void nbio_v6_1_ih_doorbell_range(struct amdgpu_device *adev, + bool use_doorbell, int doorbell_index) { u32 ih_doorbell_range = RREG32_SOC15(NBIO, 0 , mmBIF_IH_DOORBELL_RANGE); @@ -136,7 +121,7 @@ void nbio_v6_1_ih_doorbell_range(struct amdgpu_device *adev, WREG32_SOC15(NBIO, 0, mmBIF_IH_DOORBELL_RANGE, ih_doorbell_range); } -void nbio_v6_1_ih_control(struct amdgpu_device *adev) +static void nbio_v6_1_ih_control(struct amdgpu_device *adev) { u32 interrupt_cntl; @@ -152,8 +137,8 @@ void nbio_v6_1_ih_control(struct amdgpu_device *adev) WREG32_SOC15(NBIO, 0, mmINTERRUPT_CNTL, interrupt_cntl); } -void nbio_v6_1_update_medium_grain_clock_gating(struct amdgpu_device *adev, - bool enable) +static void nbio_v6_1_update_medium_grain_clock_gating(struct amdgpu_device *adev, + bool enable) { uint32_t def, data; @@ -180,8 +165,8 @@ void nbio_v6_1_update_medium_grain_clock_gating(struct amdgpu_device *adev, WREG32_PCIE(smnCPM_CONTROL, data); } -void nbio_v6_1_update_medium_grain_light_sleep(struct amdgpu_device *adev, - bool enable) +static void nbio_v6_1_update_medium_grain_light_sleep(struct amdgpu_device *adev, + bool enable) { uint32_t def, data; @@ -200,7 +185,8 @@ void nbio_v6_1_update_medium_grain_light_sleep(struct amdgpu_device *adev, WREG32_PCIE(smnPCIE_CNTL2, data); } -void nbio_v6_1_get_clockgating_state(struct amdgpu_device *adev, u32 *flags) +static void nbio_v6_1_get_clockgating_state(struct amdgpu_device *adev, + u32 *flags) { int data; @@ -215,9 +201,27 @@ void nbio_v6_1_get_clockgating_state(struct amdgpu_device *adev, u32 *flags) *flags |= AMD_CG_SUPPORT_BIF_LS; } -const struct nbio_hdp_flush_reg nbio_v6_1_hdp_flush_reg = { - .hdp_flush_req_offset = SOC15_REG_OFFSET(NBIO, 0, mmBIF_BX_PF0_GPU_HDP_FLUSH_REQ), - .hdp_flush_done_offset = SOC15_REG_OFFSET(NBIO, 0, mmBIF_BX_PF0_GPU_HDP_FLUSH_DONE), +static u32 nbio_v6_1_get_hdp_flush_req_offset(struct amdgpu_device *adev) +{ + return SOC15_REG_OFFSET(NBIO, 0, mmBIF_BX_PF0_GPU_HDP_FLUSH_REQ); +} + +static u32 nbio_v6_1_get_hdp_flush_done_offset(struct amdgpu_device *adev) +{ + return SOC15_REG_OFFSET(NBIO, 0, mmBIF_BX_PF0_GPU_HDP_FLUSH_DONE); +} + +static u32 nbio_v6_1_get_pcie_index_offset(struct amdgpu_device *adev) +{ + return SOC15_REG_OFFSET(NBIO, 0, mmPCIE_INDEX); +} + +static u32 nbio_v6_1_get_pcie_data_offset(struct amdgpu_device *adev) +{ + return SOC15_REG_OFFSET(NBIO, 0, mmPCIE_DATA); +} + +static const struct nbio_hdp_flush_reg nbio_v6_1_hdp_flush_reg = { .ref_and_mask_cp0 = BIF_BX_PF0_GPU_HDP_FLUSH_DONE__CP0_MASK, .ref_and_mask_cp1 = BIF_BX_PF0_GPU_HDP_FLUSH_DONE__CP1_MASK, .ref_and_mask_cp2 = BIF_BX_PF0_GPU_HDP_FLUSH_DONE__CP2_MASK, @@ -232,12 +236,7 @@ const struct nbio_hdp_flush_reg nbio_v6_1_hdp_flush_reg = { .ref_and_mask_sdma1 = BIF_BX_PF0_GPU_HDP_FLUSH_DONE__SDMA1_MASK }; -const struct nbio_pcie_index_data nbio_v6_1_pcie_index_data = { - .index_offset = SOC15_REG_OFFSET(NBIO, 0, mmPCIE_INDEX), - .data_offset = SOC15_REG_OFFSET(NBIO, 0, mmPCIE_DATA), -}; - -void nbio_v6_1_detect_hw_virt(struct amdgpu_device *adev) +static void nbio_v6_1_detect_hw_virt(struct amdgpu_device *adev) { uint32_t reg; @@ -254,7 +253,7 @@ void nbio_v6_1_detect_hw_virt(struct amdgpu_device *adev) } } -void nbio_v6_1_init_registers(struct amdgpu_device *adev) +static void nbio_v6_1_init_registers(struct amdgpu_device *adev) { uint32_t def, data; @@ -265,3 +264,25 @@ void nbio_v6_1_init_registers(struct amdgpu_device *adev) if (def != data) WREG32_PCIE(smnPCIE_CONFIG_CNTL, data); } + +const struct amdgpu_nbio_funcs nbio_v6_1_funcs = { + .hdp_flush_reg = &nbio_v6_1_hdp_flush_reg, + .get_hdp_flush_req_offset = nbio_v6_1_get_hdp_flush_req_offset, + .get_hdp_flush_done_offset = nbio_v6_1_get_hdp_flush_done_offset, + .get_pcie_index_offset = nbio_v6_1_get_pcie_index_offset, + .get_pcie_data_offset = nbio_v6_1_get_pcie_data_offset, + .get_rev_id = nbio_v6_1_get_rev_id, + .mc_access_enable = nbio_v6_1_mc_access_enable, + .hdp_flush = nbio_v6_1_hdp_flush, + .get_memsize = nbio_v6_1_get_memsize, + .sdma_doorbell_range = nbio_v6_1_sdma_doorbell_range, + .enable_doorbell_aperture = nbio_v6_1_enable_doorbell_aperture, + .enable_doorbell_selfring_aperture = nbio_v6_1_enable_doorbell_selfring_aperture, + .ih_doorbell_range = nbio_v6_1_ih_doorbell_range, + .update_medium_grain_clock_gating = nbio_v6_1_update_medium_grain_clock_gating, + .update_medium_grain_light_sleep = nbio_v6_1_update_medium_grain_light_sleep, + .get_clockgating_state = nbio_v6_1_get_clockgating_state, + .ih_control = nbio_v6_1_ih_control, + .init_registers = nbio_v6_1_init_registers, + .detect_hw_virt = nbio_v6_1_detect_hw_virt, +}; diff --git a/drivers/gpu/drm/amd/amdgpu/nbio_v6_1.h b/drivers/gpu/drm/amd/amdgpu/nbio_v6_1.h index 14ca8d45a46c..0743a6f016f3 100644 --- a/drivers/gpu/drm/amd/amdgpu/nbio_v6_1.h +++ b/drivers/gpu/drm/amd/amdgpu/nbio_v6_1.h @@ -26,30 +26,6 @@ #include "soc15_common.h" -extern const struct nbio_hdp_flush_reg nbio_v6_1_hdp_flush_reg; -extern const struct nbio_pcie_index_data nbio_v6_1_pcie_index_data; -int nbio_v6_1_init(struct amdgpu_device *adev); -u32 nbio_v6_1_get_atombios_scratch_regs(struct amdgpu_device *adev, - uint32_t idx); -void nbio_v6_1_set_atombios_scratch_regs(struct amdgpu_device *adev, - uint32_t idx, uint32_t val); -void nbio_v6_1_mc_access_enable(struct amdgpu_device *adev, bool enable); -void nbio_v6_1_hdp_flush(struct amdgpu_device *adev); -u32 nbio_v6_1_get_memsize(struct amdgpu_device *adev); -void nbio_v6_1_sdma_doorbell_range(struct amdgpu_device *adev, int instance, - bool use_doorbell, int doorbell_index); -void nbio_v6_1_enable_doorbell_aperture(struct amdgpu_device *adev, - bool enable); -void nbio_v6_1_enable_doorbell_selfring_aperture(struct amdgpu_device *adev, - bool enable); -void nbio_v6_1_ih_doorbell_range(struct amdgpu_device *adev, - bool use_doorbell, int doorbell_index); -void nbio_v6_1_ih_control(struct amdgpu_device *adev); -u32 nbio_v6_1_get_rev_id(struct amdgpu_device *adev); -void nbio_v6_1_update_medium_grain_clock_gating(struct amdgpu_device *adev, bool enable); -void nbio_v6_1_update_medium_grain_light_sleep(struct amdgpu_device *adev, bool enable); -void nbio_v6_1_get_clockgating_state(struct amdgpu_device *adev, u32 *flags); -void nbio_v6_1_detect_hw_virt(struct amdgpu_device *adev); -void nbio_v6_1_init_registers(struct amdgpu_device *adev); +extern const struct amdgpu_nbio_funcs nbio_v6_1_funcs; #endif diff --git a/drivers/gpu/drm/amd/amdgpu/nbio_v7_0.c b/drivers/gpu/drm/amd/amdgpu/nbio_v7_0.c index f802b973410a..17a9131a4598 100644 --- a/drivers/gpu/drm/amd/amdgpu/nbio_v7_0.c +++ b/drivers/gpu/drm/amd/amdgpu/nbio_v7_0.c @@ -24,15 +24,17 @@ #include "amdgpu_atombios.h" #include "nbio_v7_0.h" -#include "vega10/soc15ip.h" -#include "raven1/NBIO/nbio_7_0_default.h" -#include "raven1/NBIO/nbio_7_0_offset.h" -#include "raven1/NBIO/nbio_7_0_sh_mask.h" -#include "vega10/vega10_enum.h" +#include "nbio/nbio_7_0_default.h" +#include "nbio/nbio_7_0_offset.h" +#include "nbio/nbio_7_0_sh_mask.h" +#include "vega10_enum.h" #define smnNBIF_MGCG_CTRL_LCLK 0x1013a05c -u32 nbio_v7_0_get_rev_id(struct amdgpu_device *adev) +#define smnCPM_CONTROL 0x11180460 +#define smnPCIE_CNTL2 0x11180070 + +static u32 nbio_v7_0_get_rev_id(struct amdgpu_device *adev) { u32 tmp = RREG32_SOC15(NBIO, 0, mmRCC_DEV0_EPF0_STRAP0); @@ -42,19 +44,7 @@ u32 nbio_v7_0_get_rev_id(struct amdgpu_device *adev) return tmp; } -u32 nbio_v7_0_get_atombios_scratch_regs(struct amdgpu_device *adev, - uint32_t idx) -{ - return RREG32_SOC15_OFFSET(NBIO, 0, mmBIOS_SCRATCH_0, idx); -} - -void nbio_v7_0_set_atombios_scratch_regs(struct amdgpu_device *adev, - uint32_t idx, uint32_t val) -{ - WREG32_SOC15_OFFSET(NBIO, 0, mmBIOS_SCRATCH_0, idx, val); -} - -void nbio_v7_0_mc_access_enable(struct amdgpu_device *adev, bool enable) +static void nbio_v7_0_mc_access_enable(struct amdgpu_device *adev, bool enable) { if (enable) WREG32_SOC15(NBIO, 0, mmBIF_FB_EN, @@ -63,26 +53,23 @@ void nbio_v7_0_mc_access_enable(struct amdgpu_device *adev, bool enable) WREG32_SOC15(NBIO, 0, mmBIF_FB_EN, 0); } -void nbio_v7_0_hdp_flush(struct amdgpu_device *adev) +static void nbio_v7_0_hdp_flush(struct amdgpu_device *adev) { WREG32_SOC15_NO_KIQ(NBIO, 0, mmHDP_MEM_COHERENCY_FLUSH_CNTL, 0); } -u32 nbio_v7_0_get_memsize(struct amdgpu_device *adev) +static u32 nbio_v7_0_get_memsize(struct amdgpu_device *adev) { return RREG32_SOC15(NBIO, 0, mmRCC_CONFIG_MEMSIZE); } -static const u32 nbio_sdma_doorbell_range_reg[] = +static void nbio_v7_0_sdma_doorbell_range(struct amdgpu_device *adev, int instance, + bool use_doorbell, int doorbell_index) { - SOC15_REG_OFFSET(NBIO, 0, mmBIF_SDMA0_DOORBELL_RANGE), - SOC15_REG_OFFSET(NBIO, 0, mmBIF_SDMA1_DOORBELL_RANGE) -}; + u32 reg = instance == 0 ? SOC15_REG_OFFSET(NBIO, 0, mmBIF_SDMA0_DOORBELL_RANGE) : + SOC15_REG_OFFSET(NBIO, 0, mmBIF_SDMA1_DOORBELL_RANGE); -void nbio_v7_0_sdma_doorbell_range(struct amdgpu_device *adev, int instance, - bool use_doorbell, int doorbell_index) -{ - u32 doorbell_range = RREG32(nbio_sdma_doorbell_range_reg[instance]); + u32 doorbell_range = RREG32(reg); if (use_doorbell) { doorbell_range = REG_SET_FIELD(doorbell_range, BIF_SDMA0_DOORBELL_RANGE, OFFSET, doorbell_index); @@ -90,17 +77,23 @@ void nbio_v7_0_sdma_doorbell_range(struct amdgpu_device *adev, int instance, } else doorbell_range = REG_SET_FIELD(doorbell_range, BIF_SDMA0_DOORBELL_RANGE, SIZE, 0); - WREG32(nbio_sdma_doorbell_range_reg[instance], doorbell_range); + WREG32(reg, doorbell_range); } -void nbio_v7_0_enable_doorbell_aperture(struct amdgpu_device *adev, - bool enable) +static void nbio_v7_0_enable_doorbell_aperture(struct amdgpu_device *adev, + bool enable) { WREG32_FIELD15(NBIO, 0, RCC_DOORBELL_APER_EN, BIF_DOORBELL_APER_EN, enable ? 1 : 0); } -void nbio_v7_0_ih_doorbell_range(struct amdgpu_device *adev, - bool use_doorbell, int doorbell_index) +static void nbio_v7_0_enable_doorbell_selfring_aperture(struct amdgpu_device *adev, + bool enable) +{ + +} + +static void nbio_v7_0_ih_doorbell_range(struct amdgpu_device *adev, + bool use_doorbell, int doorbell_index) { u32 ih_doorbell_range = RREG32_SOC15(NBIO, 0 , mmBIF_IH_DOORBELL_RANGE); @@ -130,8 +123,8 @@ static void nbio_7_0_write_syshub_ind_mmr(struct amdgpu_device *adev, uint32_t o WREG32_SOC15(NBIO, 0, mmSYSHUB_DATA, data); } -void nbio_v7_0_update_medium_grain_clock_gating(struct amdgpu_device *adev, - bool enable) +static void nbio_v7_0_update_medium_grain_clock_gating(struct amdgpu_device *adev, + bool enable) { uint32_t def, data; @@ -169,7 +162,43 @@ void nbio_v7_0_update_medium_grain_clock_gating(struct amdgpu_device *adev, nbio_7_0_write_syshub_ind_mmr(adev, ixSYSHUB_MMREG_IND_SYSHUB_MGCG_CTRL_SHUBCLK, data); } -void nbio_v7_0_ih_control(struct amdgpu_device *adev) +static void nbio_v7_0_update_medium_grain_light_sleep(struct amdgpu_device *adev, + bool enable) +{ + uint32_t def, data; + + def = data = RREG32_PCIE(smnPCIE_CNTL2); + if (enable && (adev->cg_flags & AMD_CG_SUPPORT_BIF_LS)) { + data |= (PCIE_CNTL2__SLV_MEM_LS_EN_MASK | + PCIE_CNTL2__MST_MEM_LS_EN_MASK | + PCIE_CNTL2__REPLAY_MEM_LS_EN_MASK); + } else { + data &= ~(PCIE_CNTL2__SLV_MEM_LS_EN_MASK | + PCIE_CNTL2__MST_MEM_LS_EN_MASK | + PCIE_CNTL2__REPLAY_MEM_LS_EN_MASK); + } + + if (def != data) + WREG32_PCIE(smnPCIE_CNTL2, data); +} + +static void nbio_v7_0_get_clockgating_state(struct amdgpu_device *adev, + u32 *flags) +{ + int data; + + /* AMD_CG_SUPPORT_BIF_MGCG */ + data = RREG32_PCIE(smnCPM_CONTROL); + if (data & CPM_CONTROL__LCLK_DYN_GATE_ENABLE_MASK) + *flags |= AMD_CG_SUPPORT_BIF_MGCG; + + /* AMD_CG_SUPPORT_BIF_LS */ + data = RREG32_PCIE(smnPCIE_CNTL2); + if (data & PCIE_CNTL2__SLV_MEM_LS_EN_MASK) + *flags |= AMD_CG_SUPPORT_BIF_LS; +} + +static void nbio_v7_0_ih_control(struct amdgpu_device *adev) { u32 interrupt_cntl; @@ -185,9 +214,27 @@ void nbio_v7_0_ih_control(struct amdgpu_device *adev) WREG32_SOC15(NBIO, 0, mmINTERRUPT_CNTL, interrupt_cntl); } +static u32 nbio_v7_0_get_hdp_flush_req_offset(struct amdgpu_device *adev) +{ + return SOC15_REG_OFFSET(NBIO, 0, mmGPU_HDP_FLUSH_REQ); +} + +static u32 nbio_v7_0_get_hdp_flush_done_offset(struct amdgpu_device *adev) +{ + return SOC15_REG_OFFSET(NBIO, 0, mmGPU_HDP_FLUSH_DONE); +} + +static u32 nbio_v7_0_get_pcie_index_offset(struct amdgpu_device *adev) +{ + return SOC15_REG_OFFSET(NBIO, 0, mmPCIE_INDEX2); +} + +static u32 nbio_v7_0_get_pcie_data_offset(struct amdgpu_device *adev) +{ + return SOC15_REG_OFFSET(NBIO, 0, mmPCIE_DATA2); +} + const struct nbio_hdp_flush_reg nbio_v7_0_hdp_flush_reg = { - .hdp_flush_req_offset = SOC15_REG_OFFSET(NBIO, 0, mmGPU_HDP_FLUSH_REQ), - .hdp_flush_done_offset = SOC15_REG_OFFSET(NBIO, 0, mmGPU_HDP_FLUSH_DONE), .ref_and_mask_cp0 = GPU_HDP_FLUSH_DONE__CP0_MASK, .ref_and_mask_cp1 = GPU_HDP_FLUSH_DONE__CP1_MASK, .ref_and_mask_cp2 = GPU_HDP_FLUSH_DONE__CP2_MASK, @@ -202,7 +249,35 @@ const struct nbio_hdp_flush_reg nbio_v7_0_hdp_flush_reg = { .ref_and_mask_sdma1 = GPU_HDP_FLUSH_DONE__SDMA1_MASK, }; -const struct nbio_pcie_index_data nbio_v7_0_pcie_index_data = { - .index_offset = SOC15_REG_OFFSET(NBIO, 0, mmPCIE_INDEX2), - .data_offset = SOC15_REG_OFFSET(NBIO, 0, mmPCIE_DATA2) +static void nbio_v7_0_detect_hw_virt(struct amdgpu_device *adev) +{ + if (is_virtual_machine()) /* passthrough mode exclus sriov mod */ + adev->virt.caps |= AMDGPU_PASSTHROUGH_MODE; +} + +static void nbio_v7_0_init_registers(struct amdgpu_device *adev) +{ + +} + +const struct amdgpu_nbio_funcs nbio_v7_0_funcs = { + .hdp_flush_reg = &nbio_v7_0_hdp_flush_reg, + .get_hdp_flush_req_offset = nbio_v7_0_get_hdp_flush_req_offset, + .get_hdp_flush_done_offset = nbio_v7_0_get_hdp_flush_done_offset, + .get_pcie_index_offset = nbio_v7_0_get_pcie_index_offset, + .get_pcie_data_offset = nbio_v7_0_get_pcie_data_offset, + .get_rev_id = nbio_v7_0_get_rev_id, + .mc_access_enable = nbio_v7_0_mc_access_enable, + .hdp_flush = nbio_v7_0_hdp_flush, + .get_memsize = nbio_v7_0_get_memsize, + .sdma_doorbell_range = nbio_v7_0_sdma_doorbell_range, + .enable_doorbell_aperture = nbio_v7_0_enable_doorbell_aperture, + .enable_doorbell_selfring_aperture = nbio_v7_0_enable_doorbell_selfring_aperture, + .ih_doorbell_range = nbio_v7_0_ih_doorbell_range, + .update_medium_grain_clock_gating = nbio_v7_0_update_medium_grain_clock_gating, + .update_medium_grain_light_sleep = nbio_v7_0_update_medium_grain_light_sleep, + .get_clockgating_state = nbio_v7_0_get_clockgating_state, + .ih_control = nbio_v7_0_ih_control, + .init_registers = nbio_v7_0_init_registers, + .detect_hw_virt = nbio_v7_0_detect_hw_virt, }; diff --git a/drivers/gpu/drm/amd/amdgpu/nbio_v7_0.h b/drivers/gpu/drm/amd/amdgpu/nbio_v7_0.h index df8fa90f40d7..508d549c5029 100644 --- a/drivers/gpu/drm/amd/amdgpu/nbio_v7_0.h +++ b/drivers/gpu/drm/amd/amdgpu/nbio_v7_0.h @@ -26,24 +26,6 @@ #include "soc15_common.h" -extern const struct nbio_hdp_flush_reg nbio_v7_0_hdp_flush_reg; -extern const struct nbio_pcie_index_data nbio_v7_0_pcie_index_data; -int nbio_v7_0_init(struct amdgpu_device *adev); -u32 nbio_v7_0_get_atombios_scratch_regs(struct amdgpu_device *adev, - uint32_t idx); -void nbio_v7_0_set_atombios_scratch_regs(struct amdgpu_device *adev, - uint32_t idx, uint32_t val); -void nbio_v7_0_mc_access_enable(struct amdgpu_device *adev, bool enable); -void nbio_v7_0_hdp_flush(struct amdgpu_device *adev); -u32 nbio_v7_0_get_memsize(struct amdgpu_device *adev); -void nbio_v7_0_sdma_doorbell_range(struct amdgpu_device *adev, int instance, - bool use_doorbell, int doorbell_index); -void nbio_v7_0_enable_doorbell_aperture(struct amdgpu_device *adev, - bool enable); -void nbio_v7_0_ih_doorbell_range(struct amdgpu_device *adev, - bool use_doorbell, int doorbell_index); -void nbio_v7_0_ih_control(struct amdgpu_device *adev); -u32 nbio_v7_0_get_rev_id(struct amdgpu_device *adev); -void nbio_v7_0_update_medium_grain_clock_gating(struct amdgpu_device *adev, - bool enable); +extern const struct amdgpu_nbio_funcs nbio_v7_0_funcs; + #endif diff --git a/drivers/gpu/drm/amd/amdgpu/psp_v10_0.c b/drivers/gpu/drm/amd/amdgpu/psp_v10_0.c index 4e20d91d5d50..5a9fe24697f9 100644 --- a/drivers/gpu/drm/amd/amdgpu/psp_v10_0.c +++ b/drivers/gpu/drm/amd/amdgpu/psp_v10_0.c @@ -30,10 +30,9 @@ #include "soc15_common.h" #include "psp_v10_0.h" -#include "vega10/soc15ip.h" -#include "raven1/MP/mp_10_0_offset.h" -#include "raven1/GC/gc_9_1_offset.h" -#include "raven1/SDMA0/sdma0_4_1_offset.h" +#include "mp/mp_10_0_offset.h" +#include "gc/gc_9_1_offset.h" +#include "sdma0/sdma0_4_1_offset.h" MODULE_FIRMWARE("amdgpu/raven_asd.bin"); @@ -298,9 +297,10 @@ int psp_v10_0_cmd_submit(struct psp_context *psp, } static int -psp_v10_0_sram_map(unsigned int *sram_offset, unsigned int *sram_addr_reg_offset, - unsigned int *sram_data_reg_offset, - enum AMDGPU_UCODE_ID ucode_id) +psp_v10_0_sram_map(struct amdgpu_device *adev, + unsigned int *sram_offset, unsigned int *sram_addr_reg_offset, + unsigned int *sram_data_reg_offset, + enum AMDGPU_UCODE_ID ucode_id) { int ret = 0; @@ -395,7 +395,7 @@ bool psp_v10_0_compare_sram_data(struct psp_context *psp, uint32_t *ucode_mem = NULL; struct amdgpu_device *adev = psp->adev; - err = psp_v10_0_sram_map(&fw_sram_reg_val, &fw_sram_addr_reg_offset, + err = psp_v10_0_sram_map(adev, &fw_sram_reg_val, &fw_sram_addr_reg_offset, &fw_sram_data_reg_offset, ucode_type); if (err) return false; diff --git a/drivers/gpu/drm/amd/amdgpu/psp_v3_1.c b/drivers/gpu/drm/amd/amdgpu/psp_v3_1.c index c7bcfe8e286c..19bd1934e63d 100644 --- a/drivers/gpu/drm/amd/amdgpu/psp_v3_1.c +++ b/drivers/gpu/drm/amd/amdgpu/psp_v3_1.c @@ -31,12 +31,11 @@ #include "soc15_common.h" #include "psp_v3_1.h" -#include "vega10/soc15ip.h" -#include "vega10/MP/mp_9_0_offset.h" -#include "vega10/MP/mp_9_0_sh_mask.h" -#include "vega10/GC/gc_9_0_offset.h" -#include "vega10/SDMA0/sdma0_4_0_offset.h" -#include "vega10/NBIO/nbio_6_1_offset.h" +#include "mp/mp_9_0_offset.h" +#include "mp/mp_9_0_sh_mask.h" +#include "gc/gc_9_0_offset.h" +#include "sdma0/sdma0_4_0_offset.h" +#include "nbio/nbio_6_1_offset.h" MODULE_FIRMWARE("amdgpu/vega10_sos.bin"); MODULE_FIRMWARE("amdgpu/vega10_asd.bin"); @@ -410,9 +409,10 @@ int psp_v3_1_cmd_submit(struct psp_context *psp, } static int -psp_v3_1_sram_map(unsigned int *sram_offset, unsigned int *sram_addr_reg_offset, - unsigned int *sram_data_reg_offset, - enum AMDGPU_UCODE_ID ucode_id) +psp_v3_1_sram_map(struct amdgpu_device *adev, + unsigned int *sram_offset, unsigned int *sram_addr_reg_offset, + unsigned int *sram_data_reg_offset, + enum AMDGPU_UCODE_ID ucode_id) { int ret = 0; @@ -507,7 +507,7 @@ bool psp_v3_1_compare_sram_data(struct psp_context *psp, uint32_t *ucode_mem = NULL; struct amdgpu_device *adev = psp->adev; - err = psp_v3_1_sram_map(&fw_sram_reg_val, &fw_sram_addr_reg_offset, + err = psp_v3_1_sram_map(adev, &fw_sram_reg_val, &fw_sram_addr_reg_offset, &fw_sram_data_reg_offset, ucode_type); if (err) return false; diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c b/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c index 67f375bfe452..d4787ad4d346 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c @@ -93,12 +93,12 @@ static void sdma_v2_4_init_golden_registers(struct amdgpu_device *adev) { switch (adev->asic_type) { case CHIP_TOPAZ: - amdgpu_program_register_sequence(adev, - iceland_mgcg_cgcg_init, - (const u32)ARRAY_SIZE(iceland_mgcg_cgcg_init)); - amdgpu_program_register_sequence(adev, - golden_settings_iceland_a11, - (const u32)ARRAY_SIZE(golden_settings_iceland_a11)); + amdgpu_device_program_register_sequence(adev, + iceland_mgcg_cgcg_init, + ARRAY_SIZE(iceland_mgcg_cgcg_init)); + amdgpu_device_program_register_sequence(adev, + golden_settings_iceland_a11, + ARRAY_SIZE(golden_settings_iceland_a11)); break; default: break; @@ -246,15 +246,13 @@ static void sdma_v2_4_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count) */ static void sdma_v2_4_ring_emit_ib(struct amdgpu_ring *ring, struct amdgpu_ib *ib, - unsigned vm_id, bool ctx_switch) + unsigned vmid, bool ctx_switch) { - u32 vmid = vm_id & 0xf; - /* IB packet must end on a 8 DW boundary */ sdma_v2_4_ring_insert_nop(ring, (10 - (lower_32_bits(ring->wptr) & 7)) % 8); amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_INDIRECT) | - SDMA_PKT_INDIRECT_HEADER_VMID(vmid)); + SDMA_PKT_INDIRECT_HEADER_VMID(vmid & 0xf)); /* base must be 32 byte aligned */ amdgpu_ring_write(ring, lower_32_bits(ib->gpu_addr) & 0xffffffe0); amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr)); @@ -600,7 +598,7 @@ static int sdma_v2_4_ring_test_ring(struct amdgpu_ring *ring) u32 tmp; u64 gpu_addr; - r = amdgpu_wb_get(adev, &index); + r = amdgpu_device_wb_get(adev, &index); if (r) { dev_err(adev->dev, "(%d) failed to allocate wb slot\n", r); return r; @@ -613,7 +611,7 @@ static int sdma_v2_4_ring_test_ring(struct amdgpu_ring *ring) r = amdgpu_ring_alloc(ring, 5); if (r) { DRM_ERROR("amdgpu: dma failed to lock ring %d (%d).\n", ring->idx, r); - amdgpu_wb_free(adev, index); + amdgpu_device_wb_free(adev, index); return r; } @@ -633,13 +631,13 @@ static int sdma_v2_4_ring_test_ring(struct amdgpu_ring *ring) } if (i < adev->usec_timeout) { - DRM_INFO("ring test on %d succeeded in %d usecs\n", ring->idx, i); + DRM_DEBUG("ring test on %d succeeded in %d usecs\n", ring->idx, i); } else { DRM_ERROR("amdgpu: ring %d test failed (0x%08X)\n", ring->idx, tmp); r = -EINVAL; } - amdgpu_wb_free(adev, index); + amdgpu_device_wb_free(adev, index); return r; } @@ -662,7 +660,7 @@ static int sdma_v2_4_ring_test_ib(struct amdgpu_ring *ring, long timeout) u64 gpu_addr; long r; - r = amdgpu_wb_get(adev, &index); + r = amdgpu_device_wb_get(adev, &index); if (r) { dev_err(adev->dev, "(%ld) failed to allocate wb slot\n", r); return r; @@ -704,7 +702,7 @@ static int sdma_v2_4_ring_test_ib(struct amdgpu_ring *ring, long timeout) } tmp = le32_to_cpu(adev->wb.wb[index]); if (tmp == 0xDEADBEEF) { - DRM_INFO("ib test on ring %d succeeded\n", ring->idx); + DRM_DEBUG("ib test on ring %d succeeded\n", ring->idx); r = 0; } else { DRM_ERROR("amdgpu: ib test failed (0x%08X)\n", tmp); @@ -715,7 +713,7 @@ err1: amdgpu_ib_free(adev, &ib, NULL); dma_fence_put(f); err0: - amdgpu_wb_free(adev, index); + amdgpu_device_wb_free(adev, index); return r; } @@ -861,14 +859,14 @@ static void sdma_v2_4_ring_emit_pipeline_sync(struct amdgpu_ring *ring) * using sDMA (VI). */ static void sdma_v2_4_ring_emit_vm_flush(struct amdgpu_ring *ring, - unsigned vm_id, uint64_t pd_addr) + unsigned vmid, uint64_t pd_addr) { amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_SRBM_WRITE) | SDMA_PKT_SRBM_WRITE_HEADER_BYTE_EN(0xf)); - if (vm_id < 8) { - amdgpu_ring_write(ring, (mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR + vm_id)); + if (vmid < 8) { + amdgpu_ring_write(ring, (mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR + vmid)); } else { - amdgpu_ring_write(ring, (mmVM_CONTEXT8_PAGE_TABLE_BASE_ADDR + vm_id - 8)); + amdgpu_ring_write(ring, (mmVM_CONTEXT8_PAGE_TABLE_BASE_ADDR + vmid - 8)); } amdgpu_ring_write(ring, pd_addr >> 12); @@ -876,7 +874,7 @@ static void sdma_v2_4_ring_emit_vm_flush(struct amdgpu_ring *ring, amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_SRBM_WRITE) | SDMA_PKT_SRBM_WRITE_HEADER_BYTE_EN(0xf)); amdgpu_ring_write(ring, mmVM_INVALIDATE_REQUEST); - amdgpu_ring_write(ring, 1 << vm_id); + amdgpu_ring_write(ring, 1 << vmid); /* wait for flush */ amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_POLL_REGMEM) | diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c index 6d06f8eb659f..521978c40537 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c @@ -192,47 +192,47 @@ static void sdma_v3_0_init_golden_registers(struct amdgpu_device *adev) { switch (adev->asic_type) { case CHIP_FIJI: - amdgpu_program_register_sequence(adev, - fiji_mgcg_cgcg_init, - (const u32)ARRAY_SIZE(fiji_mgcg_cgcg_init)); - amdgpu_program_register_sequence(adev, - golden_settings_fiji_a10, - (const u32)ARRAY_SIZE(golden_settings_fiji_a10)); + amdgpu_device_program_register_sequence(adev, + fiji_mgcg_cgcg_init, + ARRAY_SIZE(fiji_mgcg_cgcg_init)); + amdgpu_device_program_register_sequence(adev, + golden_settings_fiji_a10, + ARRAY_SIZE(golden_settings_fiji_a10)); break; case CHIP_TONGA: - amdgpu_program_register_sequence(adev, - tonga_mgcg_cgcg_init, - (const u32)ARRAY_SIZE(tonga_mgcg_cgcg_init)); - amdgpu_program_register_sequence(adev, - golden_settings_tonga_a11, - (const u32)ARRAY_SIZE(golden_settings_tonga_a11)); + amdgpu_device_program_register_sequence(adev, + tonga_mgcg_cgcg_init, + ARRAY_SIZE(tonga_mgcg_cgcg_init)); + amdgpu_device_program_register_sequence(adev, + golden_settings_tonga_a11, + ARRAY_SIZE(golden_settings_tonga_a11)); break; case CHIP_POLARIS11: case CHIP_POLARIS12: - amdgpu_program_register_sequence(adev, - golden_settings_polaris11_a11, - (const u32)ARRAY_SIZE(golden_settings_polaris11_a11)); + amdgpu_device_program_register_sequence(adev, + golden_settings_polaris11_a11, + ARRAY_SIZE(golden_settings_polaris11_a11)); break; case CHIP_POLARIS10: - amdgpu_program_register_sequence(adev, - golden_settings_polaris10_a11, - (const u32)ARRAY_SIZE(golden_settings_polaris10_a11)); + amdgpu_device_program_register_sequence(adev, + golden_settings_polaris10_a11, + ARRAY_SIZE(golden_settings_polaris10_a11)); break; case CHIP_CARRIZO: - amdgpu_program_register_sequence(adev, - cz_mgcg_cgcg_init, - (const u32)ARRAY_SIZE(cz_mgcg_cgcg_init)); - amdgpu_program_register_sequence(adev, - cz_golden_settings_a11, - (const u32)ARRAY_SIZE(cz_golden_settings_a11)); + amdgpu_device_program_register_sequence(adev, + cz_mgcg_cgcg_init, + ARRAY_SIZE(cz_mgcg_cgcg_init)); + amdgpu_device_program_register_sequence(adev, + cz_golden_settings_a11, + ARRAY_SIZE(cz_golden_settings_a11)); break; case CHIP_STONEY: - amdgpu_program_register_sequence(adev, - stoney_mgcg_cgcg_init, - (const u32)ARRAY_SIZE(stoney_mgcg_cgcg_init)); - amdgpu_program_register_sequence(adev, - stoney_golden_settings_a11, - (const u32)ARRAY_SIZE(stoney_golden_settings_a11)); + amdgpu_device_program_register_sequence(adev, + stoney_mgcg_cgcg_init, + ARRAY_SIZE(stoney_mgcg_cgcg_init)); + amdgpu_device_program_register_sequence(adev, + stoney_golden_settings_a11, + ARRAY_SIZE(stoney_golden_settings_a11)); break; default: break; @@ -355,7 +355,7 @@ static uint64_t sdma_v3_0_ring_get_wptr(struct amdgpu_ring *ring) struct amdgpu_device *adev = ring->adev; u32 wptr; - if (ring->use_doorbell) { + if (ring->use_doorbell || ring->use_pollmem) { /* XXX check if swapping is necessary on BE */ wptr = ring->adev->wb.wb[ring->wptr_offs] >> 2; } else { @@ -380,10 +380,13 @@ static void sdma_v3_0_ring_set_wptr(struct amdgpu_ring *ring) if (ring->use_doorbell) { u32 *wb = (u32 *)&adev->wb.wb[ring->wptr_offs]; - /* XXX check if swapping is necessary on BE */ WRITE_ONCE(*wb, (lower_32_bits(ring->wptr) << 2)); WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr) << 2); + } else if (ring->use_pollmem) { + u32 *wb = (u32 *)&adev->wb.wb[ring->wptr_offs]; + + WRITE_ONCE(*wb, (lower_32_bits(ring->wptr) << 2)); } else { int me = (ring == &ring->adev->sdma.instance[0].ring) ? 0 : 1; @@ -414,15 +417,13 @@ static void sdma_v3_0_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count) */ static void sdma_v3_0_ring_emit_ib(struct amdgpu_ring *ring, struct amdgpu_ib *ib, - unsigned vm_id, bool ctx_switch) + unsigned vmid, bool ctx_switch) { - u32 vmid = vm_id & 0xf; - /* IB packet must end on a 8 DW boundary */ sdma_v3_0_ring_insert_nop(ring, (10 - (lower_32_bits(ring->wptr) & 7)) % 8); amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_INDIRECT) | - SDMA_PKT_INDIRECT_HEADER_VMID(vmid)); + SDMA_PKT_INDIRECT_HEADER_VMID(vmid & 0xf)); /* base must be 32 byte aligned */ amdgpu_ring_write(ring, lower_32_bits(ib->gpu_addr) & 0xffffffe0); amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr)); @@ -718,10 +719,14 @@ static int sdma_v3_0_gfx_resume(struct amdgpu_device *adev) WREG32(mmSDMA0_GFX_RB_WPTR_POLL_ADDR_HI + sdma_offsets[i], upper_32_bits(wptr_gpu_addr)); wptr_poll_cntl = RREG32(mmSDMA0_GFX_RB_WPTR_POLL_CNTL + sdma_offsets[i]); - if (amdgpu_sriov_vf(adev)) - wptr_poll_cntl = REG_SET_FIELD(wptr_poll_cntl, SDMA0_GFX_RB_WPTR_POLL_CNTL, F32_POLL_ENABLE, 1); + if (ring->use_pollmem) + wptr_poll_cntl = REG_SET_FIELD(wptr_poll_cntl, + SDMA0_GFX_RB_WPTR_POLL_CNTL, + ENABLE, 1); else - wptr_poll_cntl = REG_SET_FIELD(wptr_poll_cntl, SDMA0_GFX_RB_WPTR_POLL_CNTL, F32_POLL_ENABLE, 0); + wptr_poll_cntl = REG_SET_FIELD(wptr_poll_cntl, + SDMA0_GFX_RB_WPTR_POLL_CNTL, + ENABLE, 0); WREG32(mmSDMA0_GFX_RB_WPTR_POLL_CNTL + sdma_offsets[i], wptr_poll_cntl); /* enable DMA RB */ @@ -860,7 +865,7 @@ static int sdma_v3_0_ring_test_ring(struct amdgpu_ring *ring) u32 tmp; u64 gpu_addr; - r = amdgpu_wb_get(adev, &index); + r = amdgpu_device_wb_get(adev, &index); if (r) { dev_err(adev->dev, "(%d) failed to allocate wb slot\n", r); return r; @@ -873,7 +878,7 @@ static int sdma_v3_0_ring_test_ring(struct amdgpu_ring *ring) r = amdgpu_ring_alloc(ring, 5); if (r) { DRM_ERROR("amdgpu: dma failed to lock ring %d (%d).\n", ring->idx, r); - amdgpu_wb_free(adev, index); + amdgpu_device_wb_free(adev, index); return r; } @@ -893,13 +898,13 @@ static int sdma_v3_0_ring_test_ring(struct amdgpu_ring *ring) } if (i < adev->usec_timeout) { - DRM_INFO("ring test on %d succeeded in %d usecs\n", ring->idx, i); + DRM_DEBUG("ring test on %d succeeded in %d usecs\n", ring->idx, i); } else { DRM_ERROR("amdgpu: ring %d test failed (0x%08X)\n", ring->idx, tmp); r = -EINVAL; } - amdgpu_wb_free(adev, index); + amdgpu_device_wb_free(adev, index); return r; } @@ -922,7 +927,7 @@ static int sdma_v3_0_ring_test_ib(struct amdgpu_ring *ring, long timeout) u64 gpu_addr; long r; - r = amdgpu_wb_get(adev, &index); + r = amdgpu_device_wb_get(adev, &index); if (r) { dev_err(adev->dev, "(%ld) failed to allocate wb slot\n", r); return r; @@ -964,7 +969,7 @@ static int sdma_v3_0_ring_test_ib(struct amdgpu_ring *ring, long timeout) } tmp = le32_to_cpu(adev->wb.wb[index]); if (tmp == 0xDEADBEEF) { - DRM_INFO("ib test on ring %d succeeded\n", ring->idx); + DRM_DEBUG("ib test on ring %d succeeded\n", ring->idx); r = 0; } else { DRM_ERROR("amdgpu: ib test failed (0x%08X)\n", tmp); @@ -974,7 +979,7 @@ err1: amdgpu_ib_free(adev, &ib, NULL); dma_fence_put(f); err0: - amdgpu_wb_free(adev, index); + amdgpu_device_wb_free(adev, index); return r; } @@ -1120,14 +1125,14 @@ static void sdma_v3_0_ring_emit_pipeline_sync(struct amdgpu_ring *ring) * using sDMA (VI). */ static void sdma_v3_0_ring_emit_vm_flush(struct amdgpu_ring *ring, - unsigned vm_id, uint64_t pd_addr) + unsigned vmid, uint64_t pd_addr) { amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_SRBM_WRITE) | SDMA_PKT_SRBM_WRITE_HEADER_BYTE_EN(0xf)); - if (vm_id < 8) { - amdgpu_ring_write(ring, (mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR + vm_id)); + if (vmid < 8) { + amdgpu_ring_write(ring, (mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR + vmid)); } else { - amdgpu_ring_write(ring, (mmVM_CONTEXT8_PAGE_TABLE_BASE_ADDR + vm_id - 8)); + amdgpu_ring_write(ring, (mmVM_CONTEXT8_PAGE_TABLE_BASE_ADDR + vmid - 8)); } amdgpu_ring_write(ring, pd_addr >> 12); @@ -1135,7 +1140,7 @@ static void sdma_v3_0_ring_emit_vm_flush(struct amdgpu_ring *ring, amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_SRBM_WRITE) | SDMA_PKT_SRBM_WRITE_HEADER_BYTE_EN(0xf)); amdgpu_ring_write(ring, mmVM_INVALIDATE_REQUEST); - amdgpu_ring_write(ring, 1 << vm_id); + amdgpu_ring_write(ring, 1 << vmid); /* wait for flush */ amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_POLL_REGMEM) | @@ -1203,9 +1208,13 @@ static int sdma_v3_0_sw_init(void *handle) for (i = 0; i < adev->sdma.num_instances; i++) { ring = &adev->sdma.instance[i].ring; ring->ring_obj = NULL; - ring->use_doorbell = true; - ring->doorbell_index = (i == 0) ? - AMDGPU_DOORBELL_sDMA_ENGINE0 : AMDGPU_DOORBELL_sDMA_ENGINE1; + if (!amdgpu_sriov_vf(adev)) { + ring->use_doorbell = true; + ring->doorbell_index = (i == 0) ? + AMDGPU_DOORBELL_sDMA_ENGINE0 : AMDGPU_DOORBELL_sDMA_ENGINE1; + } else { + ring->use_pollmem = true; + } sprintf(ring->name, "sdma%d", i); r = amdgpu_ring_init(adev, ring, 1024, diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c index 46009db3d195..e92fb372bc99 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c @@ -27,15 +27,14 @@ #include "amdgpu_ucode.h" #include "amdgpu_trace.h" -#include "vega10/soc15ip.h" -#include "vega10/SDMA0/sdma0_4_0_offset.h" -#include "vega10/SDMA0/sdma0_4_0_sh_mask.h" -#include "vega10/SDMA1/sdma1_4_0_offset.h" -#include "vega10/SDMA1/sdma1_4_0_sh_mask.h" -#include "vega10/MMHUB/mmhub_1_0_offset.h" -#include "vega10/MMHUB/mmhub_1_0_sh_mask.h" -#include "vega10/HDP/hdp_4_0_offset.h" -#include "raven1/SDMA0/sdma0_4_1_default.h" +#include "sdma0/sdma0_4_0_offset.h" +#include "sdma0/sdma0_4_0_sh_mask.h" +#include "sdma1/sdma1_4_0_offset.h" +#include "sdma1/sdma1_4_0_sh_mask.h" +#include "mmhub/mmhub_1_0_offset.h" +#include "mmhub/mmhub_1_0_sh_mask.h" +#include "hdp/hdp_4_0_offset.h" +#include "sdma0/sdma0_4_1_default.h" #include "soc15_common.h" #include "soc15.h" @@ -53,97 +52,85 @@ static void sdma_v4_0_set_buffer_funcs(struct amdgpu_device *adev); static void sdma_v4_0_set_vm_pte_funcs(struct amdgpu_device *adev); static void sdma_v4_0_set_irq_funcs(struct amdgpu_device *adev); -static const u32 golden_settings_sdma_4[] = { - SOC15_REG_OFFSET(SDMA0, 0, mmSDMA0_CHICKEN_BITS), 0xfe931f07, 0x02831d07, - SOC15_REG_OFFSET(SDMA0, 0, mmSDMA0_CLK_CTRL), 0xff000ff0, 0x3f000100, - SOC15_REG_OFFSET(SDMA0, 0, mmSDMA0_GFX_IB_CNTL), 0x800f0100, 0x00000100, - SOC15_REG_OFFSET(SDMA0, 0, mmSDMA0_GFX_RB_WPTR_POLL_CNTL), 0xfffffff7, 0x00403000, - SOC15_REG_OFFSET(SDMA0, 0, mmSDMA0_PAGE_IB_CNTL), 0x800f0100, 0x00000100, - SOC15_REG_OFFSET(SDMA0, 0, mmSDMA0_PAGE_RB_WPTR_POLL_CNTL), 0x0000fff0, 0x00403000, - SOC15_REG_OFFSET(SDMA0, 0, mmSDMA0_POWER_CNTL), 0x003ff006, 0x0003c000, - SOC15_REG_OFFSET(SDMA0, 0, mmSDMA0_RLC0_IB_CNTL), 0x800f0100, 0x00000100, - SOC15_REG_OFFSET(SDMA0, 0, mmSDMA0_RLC0_RB_WPTR_POLL_CNTL), 0x0000fff0, 0x00403000, - SOC15_REG_OFFSET(SDMA0, 0, mmSDMA0_RLC1_IB_CNTL), 0x800f0100, 0x00000100, - SOC15_REG_OFFSET(SDMA0, 0, mmSDMA0_RLC1_RB_WPTR_POLL_CNTL), 0x0000fff0, 0x00403000, - SOC15_REG_OFFSET(SDMA0, 0, mmSDMA0_UTCL1_PAGE), 0x000003ff, 0x000003c0, - SOC15_REG_OFFSET(SDMA1, 0, mmSDMA1_CHICKEN_BITS), 0xfe931f07, 0x02831f07, - SOC15_REG_OFFSET(SDMA1, 0, mmSDMA1_CLK_CTRL), 0xffffffff, 0x3f000100, - SOC15_REG_OFFSET(SDMA1, 0, mmSDMA1_GFX_IB_CNTL), 0x800f0100, 0x00000100, - SOC15_REG_OFFSET(SDMA1, 0, mmSDMA1_GFX_RB_WPTR_POLL_CNTL), 0x0000fff0, 0x00403000, - SOC15_REG_OFFSET(SDMA1, 0, mmSDMA1_PAGE_IB_CNTL), 0x800f0100, 0x00000100, - SOC15_REG_OFFSET(SDMA1, 0, mmSDMA1_PAGE_RB_WPTR_POLL_CNTL), 0x0000fff0, 0x00403000, - SOC15_REG_OFFSET(SDMA1, 0, mmSDMA1_POWER_CNTL), 0x003ff000, 0x0003c000, - SOC15_REG_OFFSET(SDMA1, 0, mmSDMA1_RLC0_IB_CNTL), 0x800f0100, 0x00000100, - SOC15_REG_OFFSET(SDMA1, 0, mmSDMA1_RLC0_RB_WPTR_POLL_CNTL), 0x0000fff0, 0x00403000, - SOC15_REG_OFFSET(SDMA1, 0, mmSDMA1_RLC1_IB_CNTL), 0x800f0100, 0x00000100, - SOC15_REG_OFFSET(SDMA1, 0, mmSDMA1_RLC1_RB_WPTR_POLL_CNTL), 0x0000fff0, 0x00403000, - SOC15_REG_OFFSET(SDMA1, 0, mmSDMA1_UTCL1_PAGE), 0x000003ff, 0x000003c0 +static const struct soc15_reg_golden golden_settings_sdma_4[] = { + SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_CHICKEN_BITS, 0xfe931f07, 0x02831d07), + SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_CLK_CTRL, 0xff000ff0, 0x3f000100), + SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_GFX_IB_CNTL, 0x800f0100, 0x00000100), + SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_GFX_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000), + SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_PAGE_IB_CNTL, 0x800f0100, 0x00000100), + SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_PAGE_RB_WPTR_POLL_CNTL, 0x0000fff0, 0x00403000), + SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_POWER_CNTL, 0x003ff006, 0x0003c000), + SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_RLC0_IB_CNTL, 0x800f0100, 0x00000100), + SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_RLC0_RB_WPTR_POLL_CNTL, 0x0000fff0, 0x00403000), + SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_RLC1_IB_CNTL, 0x800f0100, 0x00000100), + SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_RLC1_RB_WPTR_POLL_CNTL, 0x0000fff0, 0x00403000), + SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_UTCL1_PAGE, 0x000003ff, 0x000003c0), + SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_CHICKEN_BITS, 0xfe931f07, 0x02831f07), + SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_CLK_CTRL, 0xffffffff, 0x3f000100), + SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_GFX_IB_CNTL, 0x800f0100, 0x00000100), + SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_GFX_RB_WPTR_POLL_CNTL, 0x0000fff0, 0x00403000), + SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_PAGE_IB_CNTL, 0x800f0100, 0x00000100), + SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_PAGE_RB_WPTR_POLL_CNTL, 0x0000fff0, 0x00403000), + SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_POWER_CNTL, 0x003ff000, 0x0003c000), + SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_RLC0_IB_CNTL, 0x800f0100, 0x00000100), + SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_RLC0_RB_WPTR_POLL_CNTL, 0x0000fff0, 0x00403000), + SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_RLC1_IB_CNTL, 0x800f0100, 0x00000100), + SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_RLC1_RB_WPTR_POLL_CNTL, 0x0000fff0, 0x00403000), + SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_UTCL1_PAGE, 0x000003ff, 0x000003c0) }; -static const u32 golden_settings_sdma_vg10[] = { - SOC15_REG_OFFSET(SDMA0, 0, mmSDMA0_GB_ADDR_CONFIG), 0x0018773f, 0x00104002, - SOC15_REG_OFFSET(SDMA0, 0, mmSDMA0_GB_ADDR_CONFIG_READ), 0x0018773f, 0x00104002, - SOC15_REG_OFFSET(SDMA1, 0, mmSDMA1_GB_ADDR_CONFIG), 0x0018773f, 0x00104002, - SOC15_REG_OFFSET(SDMA1, 0, mmSDMA1_GB_ADDR_CONFIG_READ), 0x0018773f, 0x00104002 +static const struct soc15_reg_golden golden_settings_sdma_vg10[] = { + SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_GB_ADDR_CONFIG, 0x0018773f, 0x00104002), + SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_GB_ADDR_CONFIG_READ, 0x0018773f, 0x00104002), + SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_GB_ADDR_CONFIG, 0x0018773f, 0x00104002), + SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_GB_ADDR_CONFIG_READ, 0x0018773f, 0x00104002) }; -static const u32 golden_settings_sdma_4_1[] = +static const struct soc15_reg_golden golden_settings_sdma_4_1[] = { - SOC15_REG_OFFSET(SDMA0, 0, mmSDMA0_CHICKEN_BITS), 0xfe931f07, 0x02831d07, - SOC15_REG_OFFSET(SDMA0, 0, mmSDMA0_CLK_CTRL), 0xffffffff, 0x3f000100, - SOC15_REG_OFFSET(SDMA0, 0, mmSDMA0_GFX_IB_CNTL), 0x800f0111, 0x00000100, - SOC15_REG_OFFSET(SDMA0, 0, mmSDMA0_GFX_RB_WPTR_POLL_CNTL), 0xfffffff7, 0x00403000, - SOC15_REG_OFFSET(SDMA0, 0, mmSDMA0_POWER_CNTL), 0xfc3fffff, 0x40000051, - SOC15_REG_OFFSET(SDMA0, 0, mmSDMA0_RLC0_IB_CNTL), 0x800f0111, 0x00000100, - SOC15_REG_OFFSET(SDMA0, 0, mmSDMA0_RLC0_RB_WPTR_POLL_CNTL), 0xfffffff7, 0x00403000, - SOC15_REG_OFFSET(SDMA0, 0, mmSDMA0_RLC1_IB_CNTL), 0x800f0111, 0x00000100, - SOC15_REG_OFFSET(SDMA0, 0, mmSDMA0_RLC1_RB_WPTR_POLL_CNTL), 0xfffffff7, 0x00403000, - SOC15_REG_OFFSET(SDMA0, 0, mmSDMA0_UTCL1_PAGE), 0x000003ff, 0x000003c0 + SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_CHICKEN_BITS, 0xfe931f07, 0x02831d07), + SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_CLK_CTRL, 0xffffffff, 0x3f000100), + SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_GFX_IB_CNTL, 0x800f0111, 0x00000100), + SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_GFX_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000), + SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_POWER_CNTL, 0xfc3fffff, 0x40000051), + SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_RLC0_IB_CNTL, 0x800f0111, 0x00000100), + SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_RLC0_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000), + SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_RLC1_IB_CNTL, 0x800f0111, 0x00000100), + SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_RLC1_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000), + SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_UTCL1_PAGE, 0x000003ff, 0x000003c0) }; -static const u32 golden_settings_sdma_rv1[] = +static const struct soc15_reg_golden golden_settings_sdma_rv1[] = { - SOC15_REG_OFFSET(SDMA0, 0, mmSDMA0_GB_ADDR_CONFIG), 0x0018773f, 0x00000002, - SOC15_REG_OFFSET(SDMA0, 0, mmSDMA0_GB_ADDR_CONFIG_READ), 0x0018773f, 0x00000002 + SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_GB_ADDR_CONFIG, 0x0018773f, 0x00000002), + SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_GB_ADDR_CONFIG_READ, 0x0018773f, 0x00000002) }; -static u32 sdma_v4_0_get_reg_offset(u32 instance, u32 internal_offset) +static u32 sdma_v4_0_get_reg_offset(struct amdgpu_device *adev, + u32 instance, u32 offset) { - u32 base = 0; - - switch (instance) { - case 0: - base = SDMA0_BASE.instance[0].segment[0]; - break; - case 1: - base = SDMA1_BASE.instance[0].segment[0]; - break; - default: - BUG(); - break; - } - - return base + internal_offset; + return ( 0 == instance ? (adev->reg_offset[SDMA0_HWIP][0][0] + offset) : + (adev->reg_offset[SDMA1_HWIP][0][0] + offset)); } static void sdma_v4_0_init_golden_registers(struct amdgpu_device *adev) { switch (adev->asic_type) { case CHIP_VEGA10: - amdgpu_program_register_sequence(adev, + soc15_program_register_sequence(adev, golden_settings_sdma_4, - (const u32)ARRAY_SIZE(golden_settings_sdma_4)); - amdgpu_program_register_sequence(adev, + ARRAY_SIZE(golden_settings_sdma_4)); + soc15_program_register_sequence(adev, golden_settings_sdma_vg10, - (const u32)ARRAY_SIZE(golden_settings_sdma_vg10)); + ARRAY_SIZE(golden_settings_sdma_vg10)); break; case CHIP_RAVEN: - amdgpu_program_register_sequence(adev, + soc15_program_register_sequence(adev, golden_settings_sdma_4_1, - (const u32)ARRAY_SIZE(golden_settings_sdma_4_1)); - amdgpu_program_register_sequence(adev, + ARRAY_SIZE(golden_settings_sdma_4_1)); + soc15_program_register_sequence(adev, golden_settings_sdma_rv1, - (const u32)ARRAY_SIZE(golden_settings_sdma_rv1)); + ARRAY_SIZE(golden_settings_sdma_rv1)); break; default: break; @@ -265,8 +252,8 @@ static uint64_t sdma_v4_0_ring_get_wptr(struct amdgpu_ring *ring) int me = (ring == &adev->sdma.instance[0].ring) ? 0 : 1; wptr = &local_wptr; - lowbit = RREG32(sdma_v4_0_get_reg_offset(me, mmSDMA0_GFX_RB_WPTR)) >> 2; - highbit = RREG32(sdma_v4_0_get_reg_offset(me, mmSDMA0_GFX_RB_WPTR_HI)) >> 2; + lowbit = RREG32(sdma_v4_0_get_reg_offset(adev, me, mmSDMA0_GFX_RB_WPTR)) >> 2; + highbit = RREG32(sdma_v4_0_get_reg_offset(adev, me, mmSDMA0_GFX_RB_WPTR_HI)) >> 2; DRM_DEBUG("wptr [%i]high== 0x%08x low==0x%08x\n", me, highbit, lowbit); @@ -315,8 +302,8 @@ static void sdma_v4_0_ring_set_wptr(struct amdgpu_ring *ring) lower_32_bits(ring->wptr << 2), me, upper_32_bits(ring->wptr << 2)); - WREG32(sdma_v4_0_get_reg_offset(me, mmSDMA0_GFX_RB_WPTR), lower_32_bits(ring->wptr << 2)); - WREG32(sdma_v4_0_get_reg_offset(me, mmSDMA0_GFX_RB_WPTR_HI), upper_32_bits(ring->wptr << 2)); + WREG32(sdma_v4_0_get_reg_offset(adev, me, mmSDMA0_GFX_RB_WPTR), lower_32_bits(ring->wptr << 2)); + WREG32(sdma_v4_0_get_reg_offset(adev, me, mmSDMA0_GFX_RB_WPTR_HI), upper_32_bits(ring->wptr << 2)); } } @@ -343,15 +330,13 @@ static void sdma_v4_0_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count) */ static void sdma_v4_0_ring_emit_ib(struct amdgpu_ring *ring, struct amdgpu_ib *ib, - unsigned vm_id, bool ctx_switch) + unsigned vmid, bool ctx_switch) { - u32 vmid = vm_id & 0xf; - /* IB packet must end on a 8 DW boundary */ sdma_v4_0_ring_insert_nop(ring, (10 - (lower_32_bits(ring->wptr) & 7)) % 8); amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_INDIRECT) | - SDMA_PKT_INDIRECT_HEADER_VMID(vmid)); + SDMA_PKT_INDIRECT_HEADER_VMID(vmid & 0xf)); /* base must be 32 byte aligned */ amdgpu_ring_write(ring, lower_32_bits(ib->gpu_addr) & 0xffffffe0); amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr)); @@ -370,13 +355,9 @@ static void sdma_v4_0_ring_emit_ib(struct amdgpu_ring *ring, */ static void sdma_v4_0_ring_emit_hdp_flush(struct amdgpu_ring *ring) { + struct amdgpu_device *adev = ring->adev; u32 ref_and_mask = 0; - const struct nbio_hdp_flush_reg *nbio_hf_reg; - - if (ring->adev->flags & AMD_IS_APU) - nbio_hf_reg = &nbio_v7_0_hdp_flush_reg; - else - nbio_hf_reg = &nbio_v6_1_hdp_flush_reg; + const struct nbio_hdp_flush_reg *nbio_hf_reg = adev->nbio_funcs->hdp_flush_reg; if (ring == &ring->adev->sdma.instance[0].ring) ref_and_mask = nbio_hf_reg->ref_and_mask_sdma0; @@ -386,8 +367,8 @@ static void sdma_v4_0_ring_emit_hdp_flush(struct amdgpu_ring *ring) amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_POLL_REGMEM) | SDMA_PKT_POLL_REGMEM_HEADER_HDP_FLUSH(1) | SDMA_PKT_POLL_REGMEM_HEADER_FUNC(3)); /* == */ - amdgpu_ring_write(ring, nbio_hf_reg->hdp_flush_done_offset << 2); - amdgpu_ring_write(ring, nbio_hf_reg->hdp_flush_req_offset << 2); + amdgpu_ring_write(ring, (adev->nbio_funcs->get_hdp_flush_done_offset(adev)) << 2); + amdgpu_ring_write(ring, (adev->nbio_funcs->get_hdp_flush_req_offset(adev)) << 2); amdgpu_ring_write(ring, ref_and_mask); /* reference */ amdgpu_ring_write(ring, ref_and_mask); /* mask */ amdgpu_ring_write(ring, SDMA_PKT_POLL_REGMEM_DW5_RETRY_COUNT(0xfff) | @@ -396,6 +377,8 @@ static void sdma_v4_0_ring_emit_hdp_flush(struct amdgpu_ring *ring) static void sdma_v4_0_ring_emit_hdp_invalidate(struct amdgpu_ring *ring) { + struct amdgpu_device *adev = ring->adev; + amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_SRBM_WRITE) | SDMA_PKT_SRBM_WRITE_HEADER_BYTE_EN(0xf)); amdgpu_ring_write(ring, SOC15_REG_OFFSET(HDP, 0, mmHDP_READ_CACHE_INVALIDATE)); @@ -460,12 +443,12 @@ static void sdma_v4_0_gfx_stop(struct amdgpu_device *adev) amdgpu_ttm_set_active_vram_size(adev, adev->mc.visible_vram_size); for (i = 0; i < adev->sdma.num_instances; i++) { - rb_cntl = RREG32(sdma_v4_0_get_reg_offset(i, mmSDMA0_GFX_RB_CNTL)); + rb_cntl = RREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_CNTL)); rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, RB_ENABLE, 0); - WREG32(sdma_v4_0_get_reg_offset(i, mmSDMA0_GFX_RB_CNTL), rb_cntl); - ib_cntl = RREG32(sdma_v4_0_get_reg_offset(i, mmSDMA0_GFX_IB_CNTL)); + WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_CNTL), rb_cntl); + ib_cntl = RREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_IB_CNTL)); ib_cntl = REG_SET_FIELD(ib_cntl, SDMA0_GFX_IB_CNTL, IB_ENABLE, 0); - WREG32(sdma_v4_0_get_reg_offset(i, mmSDMA0_GFX_IB_CNTL), ib_cntl); + WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_IB_CNTL), ib_cntl); } sdma0->ready = false; @@ -522,18 +505,18 @@ static void sdma_v4_0_ctx_switch_enable(struct amdgpu_device *adev, bool enable) } for (i = 0; i < adev->sdma.num_instances; i++) { - f32_cntl = RREG32(sdma_v4_0_get_reg_offset(i, mmSDMA0_CNTL)); + f32_cntl = RREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_CNTL)); f32_cntl = REG_SET_FIELD(f32_cntl, SDMA0_CNTL, AUTO_CTXSW_ENABLE, enable ? 1 : 0); if (enable && amdgpu_sdma_phase_quantum) { - WREG32(sdma_v4_0_get_reg_offset(i, mmSDMA0_PHASE0_QUANTUM), + WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_PHASE0_QUANTUM), phase_quantum); - WREG32(sdma_v4_0_get_reg_offset(i, mmSDMA0_PHASE1_QUANTUM), + WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_PHASE1_QUANTUM), phase_quantum); - WREG32(sdma_v4_0_get_reg_offset(i, mmSDMA0_PHASE2_QUANTUM), + WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_PHASE2_QUANTUM), phase_quantum); } - WREG32(sdma_v4_0_get_reg_offset(i, mmSDMA0_CNTL), f32_cntl); + WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_CNTL), f32_cntl); } } @@ -557,9 +540,9 @@ static void sdma_v4_0_enable(struct amdgpu_device *adev, bool enable) } for (i = 0; i < adev->sdma.num_instances; i++) { - f32_cntl = RREG32(sdma_v4_0_get_reg_offset(i, mmSDMA0_F32_CNTL)); + f32_cntl = RREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_F32_CNTL)); f32_cntl = REG_SET_FIELD(f32_cntl, SDMA0_F32_CNTL, HALT, enable ? 0 : 1); - WREG32(sdma_v4_0_get_reg_offset(i, mmSDMA0_F32_CNTL), f32_cntl); + WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_F32_CNTL), f32_cntl); } } @@ -587,48 +570,48 @@ static int sdma_v4_0_gfx_resume(struct amdgpu_device *adev) ring = &adev->sdma.instance[i].ring; wb_offset = (ring->rptr_offs * 4); - WREG32(sdma_v4_0_get_reg_offset(i, mmSDMA0_SEM_WAIT_FAIL_TIMER_CNTL), 0); + WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_SEM_WAIT_FAIL_TIMER_CNTL), 0); /* Set ring buffer size in dwords */ rb_bufsz = order_base_2(ring->ring_size / 4); - rb_cntl = RREG32(sdma_v4_0_get_reg_offset(i, mmSDMA0_GFX_RB_CNTL)); + rb_cntl = RREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_CNTL)); rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, RB_SIZE, rb_bufsz); #ifdef __BIG_ENDIAN rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, RB_SWAP_ENABLE, 1); rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, RPTR_WRITEBACK_SWAP_ENABLE, 1); #endif - WREG32(sdma_v4_0_get_reg_offset(i, mmSDMA0_GFX_RB_CNTL), rb_cntl); + WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_CNTL), rb_cntl); /* Initialize the ring buffer's read and write pointers */ - WREG32(sdma_v4_0_get_reg_offset(i, mmSDMA0_GFX_RB_RPTR), 0); - WREG32(sdma_v4_0_get_reg_offset(i, mmSDMA0_GFX_RB_RPTR_HI), 0); - WREG32(sdma_v4_0_get_reg_offset(i, mmSDMA0_GFX_RB_WPTR), 0); - WREG32(sdma_v4_0_get_reg_offset(i, mmSDMA0_GFX_RB_WPTR_HI), 0); + WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_RPTR), 0); + WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_RPTR_HI), 0); + WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR), 0); + WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR_HI), 0); /* set the wb address whether it's enabled or not */ - WREG32(sdma_v4_0_get_reg_offset(i, mmSDMA0_GFX_RB_RPTR_ADDR_HI), + WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_RPTR_ADDR_HI), upper_32_bits(adev->wb.gpu_addr + wb_offset) & 0xFFFFFFFF); - WREG32(sdma_v4_0_get_reg_offset(i, mmSDMA0_GFX_RB_RPTR_ADDR_LO), + WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_RPTR_ADDR_LO), lower_32_bits(adev->wb.gpu_addr + wb_offset) & 0xFFFFFFFC); rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, RPTR_WRITEBACK_ENABLE, 1); - WREG32(sdma_v4_0_get_reg_offset(i, mmSDMA0_GFX_RB_BASE), ring->gpu_addr >> 8); - WREG32(sdma_v4_0_get_reg_offset(i, mmSDMA0_GFX_RB_BASE_HI), ring->gpu_addr >> 40); + WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_BASE), ring->gpu_addr >> 8); + WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_BASE_HI), ring->gpu_addr >> 40); ring->wptr = 0; /* before programing wptr to a less value, need set minor_ptr_update first */ - WREG32(sdma_v4_0_get_reg_offset(i, mmSDMA0_GFX_MINOR_PTR_UPDATE), 1); + WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_MINOR_PTR_UPDATE), 1); if (!amdgpu_sriov_vf(adev)) { /* only bare-metal use register write for wptr */ - WREG32(sdma_v4_0_get_reg_offset(i, mmSDMA0_GFX_RB_WPTR), lower_32_bits(ring->wptr) << 2); - WREG32(sdma_v4_0_get_reg_offset(i, mmSDMA0_GFX_RB_WPTR_HI), upper_32_bits(ring->wptr) << 2); + WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR), lower_32_bits(ring->wptr) << 2); + WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR_HI), upper_32_bits(ring->wptr) << 2); } - doorbell = RREG32(sdma_v4_0_get_reg_offset(i, mmSDMA0_GFX_DOORBELL)); - doorbell_offset = RREG32(sdma_v4_0_get_reg_offset(i, mmSDMA0_GFX_DOORBELL_OFFSET)); + doorbell = RREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_DOORBELL)); + doorbell_offset = RREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_DOORBELL_OFFSET)); if (ring->use_doorbell) { doorbell = REG_SET_FIELD(doorbell, SDMA0_GFX_DOORBELL, ENABLE, 1); @@ -637,55 +620,53 @@ static int sdma_v4_0_gfx_resume(struct amdgpu_device *adev) } else { doorbell = REG_SET_FIELD(doorbell, SDMA0_GFX_DOORBELL, ENABLE, 0); } - WREG32(sdma_v4_0_get_reg_offset(i, mmSDMA0_GFX_DOORBELL), doorbell); - WREG32(sdma_v4_0_get_reg_offset(i, mmSDMA0_GFX_DOORBELL_OFFSET), doorbell_offset); - if (adev->flags & AMD_IS_APU) - nbio_v7_0_sdma_doorbell_range(adev, i, ring->use_doorbell, ring->doorbell_index); - else - nbio_v6_1_sdma_doorbell_range(adev, i, ring->use_doorbell, ring->doorbell_index); + WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_DOORBELL), doorbell); + WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_DOORBELL_OFFSET), doorbell_offset); + adev->nbio_funcs->sdma_doorbell_range(adev, i, ring->use_doorbell, + ring->doorbell_index); if (amdgpu_sriov_vf(adev)) sdma_v4_0_ring_set_wptr(ring); /* set minor_ptr_update to 0 after wptr programed */ - WREG32(sdma_v4_0_get_reg_offset(i, mmSDMA0_GFX_MINOR_PTR_UPDATE), 0); + WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_MINOR_PTR_UPDATE), 0); /* set utc l1 enable flag always to 1 */ - temp = RREG32(sdma_v4_0_get_reg_offset(i, mmSDMA0_CNTL)); + temp = RREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_CNTL)); temp = REG_SET_FIELD(temp, SDMA0_CNTL, UTC_L1_ENABLE, 1); - WREG32(sdma_v4_0_get_reg_offset(i, mmSDMA0_CNTL), temp); + WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_CNTL), temp); if (!amdgpu_sriov_vf(adev)) { /* unhalt engine */ - temp = RREG32(sdma_v4_0_get_reg_offset(i, mmSDMA0_F32_CNTL)); + temp = RREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_F32_CNTL)); temp = REG_SET_FIELD(temp, SDMA0_F32_CNTL, HALT, 0); - WREG32(sdma_v4_0_get_reg_offset(i, mmSDMA0_F32_CNTL), temp); + WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_F32_CNTL), temp); } /* setup the wptr shadow polling */ wptr_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4); - WREG32(sdma_v4_0_get_reg_offset(i, mmSDMA0_GFX_RB_WPTR_POLL_ADDR_LO), + WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR_POLL_ADDR_LO), lower_32_bits(wptr_gpu_addr)); - WREG32(sdma_v4_0_get_reg_offset(i, mmSDMA0_GFX_RB_WPTR_POLL_ADDR_HI), + WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR_POLL_ADDR_HI), upper_32_bits(wptr_gpu_addr)); - wptr_poll_cntl = RREG32(sdma_v4_0_get_reg_offset(i, mmSDMA0_GFX_RB_WPTR_POLL_CNTL)); + wptr_poll_cntl = RREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR_POLL_CNTL)); if (amdgpu_sriov_vf(adev)) wptr_poll_cntl = REG_SET_FIELD(wptr_poll_cntl, SDMA0_GFX_RB_WPTR_POLL_CNTL, F32_POLL_ENABLE, 1); else wptr_poll_cntl = REG_SET_FIELD(wptr_poll_cntl, SDMA0_GFX_RB_WPTR_POLL_CNTL, F32_POLL_ENABLE, 0); - WREG32(sdma_v4_0_get_reg_offset(i, mmSDMA0_GFX_RB_WPTR_POLL_CNTL), wptr_poll_cntl); + WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR_POLL_CNTL), wptr_poll_cntl); /* enable DMA RB */ rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, RB_ENABLE, 1); - WREG32(sdma_v4_0_get_reg_offset(i, mmSDMA0_GFX_RB_CNTL), rb_cntl); + WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_CNTL), rb_cntl); - ib_cntl = RREG32(sdma_v4_0_get_reg_offset(i, mmSDMA0_GFX_IB_CNTL)); + ib_cntl = RREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_IB_CNTL)); ib_cntl = REG_SET_FIELD(ib_cntl, SDMA0_GFX_IB_CNTL, IB_ENABLE, 1); #ifdef __BIG_ENDIAN ib_cntl = REG_SET_FIELD(ib_cntl, SDMA0_GFX_IB_CNTL, IB_SWAP_ENABLE, 1); #endif /* enable DMA IBs */ - WREG32(sdma_v4_0_get_reg_offset(i, mmSDMA0_GFX_IB_CNTL), ib_cntl); + WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_IB_CNTL), ib_cntl); ring->ready = true; @@ -816,12 +797,12 @@ static int sdma_v4_0_load_microcode(struct amdgpu_device *adev) (adev->sdma.instance[i].fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes)); - WREG32(sdma_v4_0_get_reg_offset(i, mmSDMA0_UCODE_ADDR), 0); + WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_UCODE_ADDR), 0); for (j = 0; j < fw_size; j++) - WREG32(sdma_v4_0_get_reg_offset(i, mmSDMA0_UCODE_DATA), le32_to_cpup(fw_data++)); + WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_UCODE_DATA), le32_to_cpup(fw_data++)); - WREG32(sdma_v4_0_get_reg_offset(i, mmSDMA0_UCODE_ADDR), adev->sdma.instance[i].fw_version); + WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_UCODE_ADDR), adev->sdma.instance[i].fw_version); } return 0; @@ -886,7 +867,7 @@ static int sdma_v4_0_ring_test_ring(struct amdgpu_ring *ring) u32 tmp; u64 gpu_addr; - r = amdgpu_wb_get(adev, &index); + r = amdgpu_device_wb_get(adev, &index); if (r) { dev_err(adev->dev, "(%d) failed to allocate wb slot\n", r); return r; @@ -899,7 +880,7 @@ static int sdma_v4_0_ring_test_ring(struct amdgpu_ring *ring) r = amdgpu_ring_alloc(ring, 5); if (r) { DRM_ERROR("amdgpu: dma failed to lock ring %d (%d).\n", ring->idx, r); - amdgpu_wb_free(adev, index); + amdgpu_device_wb_free(adev, index); return r; } @@ -919,13 +900,13 @@ static int sdma_v4_0_ring_test_ring(struct amdgpu_ring *ring) } if (i < adev->usec_timeout) { - DRM_INFO("ring test on %d succeeded in %d usecs\n", ring->idx, i); + DRM_DEBUG("ring test on %d succeeded in %d usecs\n", ring->idx, i); } else { DRM_ERROR("amdgpu: ring %d test failed (0x%08X)\n", ring->idx, tmp); r = -EINVAL; } - amdgpu_wb_free(adev, index); + amdgpu_device_wb_free(adev, index); return r; } @@ -948,7 +929,7 @@ static int sdma_v4_0_ring_test_ib(struct amdgpu_ring *ring, long timeout) u32 tmp = 0; u64 gpu_addr; - r = amdgpu_wb_get(adev, &index); + r = amdgpu_device_wb_get(adev, &index); if (r) { dev_err(adev->dev, "(%ld) failed to allocate wb slot\n", r); return r; @@ -990,7 +971,7 @@ static int sdma_v4_0_ring_test_ib(struct amdgpu_ring *ring, long timeout) } tmp = le32_to_cpu(adev->wb.wb[index]); if (tmp == 0xDEADBEEF) { - DRM_INFO("ib test on ring %d succeeded\n", ring->idx); + DRM_DEBUG("ib test on ring %d succeeded\n", ring->idx); r = 0; } else { DRM_ERROR("amdgpu: ib test failed (0x%08X)\n", tmp); @@ -1000,7 +981,7 @@ err1: amdgpu_ib_free(adev, &ib, NULL); dma_fence_put(f); err0: - amdgpu_wb_free(adev, index); + amdgpu_device_wb_free(adev, index); return r; } @@ -1152,23 +1133,24 @@ static void sdma_v4_0_ring_emit_pipeline_sync(struct amdgpu_ring *ring) * using sDMA (VEGA10). */ static void sdma_v4_0_ring_emit_vm_flush(struct amdgpu_ring *ring, - unsigned vm_id, uint64_t pd_addr) + unsigned vmid, uint64_t pd_addr) { struct amdgpu_vmhub *hub = &ring->adev->vmhub[ring->funcs->vmhub]; - uint32_t req = ring->adev->gart.gart_funcs->get_invalidate_req(vm_id); + uint32_t req = ring->adev->gart.gart_funcs->get_invalidate_req(vmid); + uint64_t flags = AMDGPU_PTE_VALID; unsigned eng = ring->vm_inv_eng; - pd_addr = amdgpu_gart_get_vm_pde(ring->adev, pd_addr); - pd_addr |= AMDGPU_PTE_VALID; + amdgpu_gart_get_vm_pde(ring->adev, -1, &pd_addr, &flags); + pd_addr |= flags; amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_SRBM_WRITE) | SDMA_PKT_SRBM_WRITE_HEADER_BYTE_EN(0xf)); - amdgpu_ring_write(ring, hub->ctx0_ptb_addr_lo32 + vm_id * 2); + amdgpu_ring_write(ring, hub->ctx0_ptb_addr_lo32 + vmid * 2); amdgpu_ring_write(ring, lower_32_bits(pd_addr)); amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_SRBM_WRITE) | SDMA_PKT_SRBM_WRITE_HEADER_BYTE_EN(0xf)); - amdgpu_ring_write(ring, hub->ctx0_ptb_addr_hi32 + vm_id * 2); + amdgpu_ring_write(ring, hub->ctx0_ptb_addr_hi32 + vmid * 2); amdgpu_ring_write(ring, upper_32_bits(pd_addr)); /* flush TLB */ @@ -1183,8 +1165,8 @@ static void sdma_v4_0_ring_emit_vm_flush(struct amdgpu_ring *ring, SDMA_PKT_POLL_REGMEM_HEADER_FUNC(3)); /* equal */ amdgpu_ring_write(ring, (hub->vm_inv_eng0_ack + eng) << 2); amdgpu_ring_write(ring, 0); - amdgpu_ring_write(ring, 1 << vm_id); /* reference */ - amdgpu_ring_write(ring, 1 << vm_id); /* mask */ + amdgpu_ring_write(ring, 1 << vmid); /* reference */ + amdgpu_ring_write(ring, 1 << vmid); /* mask */ amdgpu_ring_write(ring, SDMA_PKT_POLL_REGMEM_DW5_RETRY_COUNT(0xfff) | SDMA_PKT_POLL_REGMEM_DW5_INTERVAL(10)); } @@ -1317,7 +1299,7 @@ static bool sdma_v4_0_is_idle(void *handle) u32 i; for (i = 0; i < adev->sdma.num_instances; i++) { - u32 tmp = RREG32(sdma_v4_0_get_reg_offset(i, mmSDMA0_STATUS_REG)); + u32 tmp = RREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_STATUS_REG)); if (!(tmp & SDMA0_STATUS_REG__IDLE_MASK)) return false; @@ -1333,8 +1315,8 @@ static int sdma_v4_0_wait_for_idle(void *handle) struct amdgpu_device *adev = (struct amdgpu_device *)handle; for (i = 0; i < adev->usec_timeout; i++) { - sdma0 = RREG32(sdma_v4_0_get_reg_offset(0, mmSDMA0_STATUS_REG)); - sdma1 = RREG32(sdma_v4_0_get_reg_offset(1, mmSDMA0_STATUS_REG)); + sdma0 = RREG32(sdma_v4_0_get_reg_offset(adev, 0, mmSDMA0_STATUS_REG)); + sdma1 = RREG32(sdma_v4_0_get_reg_offset(adev, 1, mmSDMA0_STATUS_REG)); if (sdma0 & sdma1 & SDMA0_STATUS_REG__IDLE_MASK) return 0; @@ -1358,8 +1340,8 @@ static int sdma_v4_0_set_trap_irq_state(struct amdgpu_device *adev, u32 sdma_cntl; u32 reg_offset = (type == AMDGPU_SDMA_IRQ_TRAP0) ? - sdma_v4_0_get_reg_offset(0, mmSDMA0_CNTL) : - sdma_v4_0_get_reg_offset(1, mmSDMA0_CNTL); + sdma_v4_0_get_reg_offset(adev, 0, mmSDMA0_CNTL) : + sdma_v4_0_get_reg_offset(adev, 1, mmSDMA0_CNTL); sdma_cntl = RREG32(reg_offset); sdma_cntl = REG_SET_FIELD(sdma_cntl, SDMA0_CNTL, TRAP_ENABLE, diff --git a/drivers/gpu/drm/amd/amdgpu/si.c b/drivers/gpu/drm/amd/amdgpu/si.c index 8284d5dbfc30..543101d5a5ed 100644 --- a/drivers/gpu/drm/amd/amdgpu/si.c +++ b/drivers/gpu/drm/amd/amdgpu/si.c @@ -1390,65 +1390,65 @@ static void si_init_golden_registers(struct amdgpu_device *adev) { switch (adev->asic_type) { case CHIP_TAHITI: - amdgpu_program_register_sequence(adev, - tahiti_golden_registers, - (const u32)ARRAY_SIZE(tahiti_golden_registers)); - amdgpu_program_register_sequence(adev, - tahiti_golden_rlc_registers, - (const u32)ARRAY_SIZE(tahiti_golden_rlc_registers)); - amdgpu_program_register_sequence(adev, - tahiti_mgcg_cgcg_init, - (const u32)ARRAY_SIZE(tahiti_mgcg_cgcg_init)); - amdgpu_program_register_sequence(adev, - tahiti_golden_registers2, - (const u32)ARRAY_SIZE(tahiti_golden_registers2)); + amdgpu_device_program_register_sequence(adev, + tahiti_golden_registers, + ARRAY_SIZE(tahiti_golden_registers)); + amdgpu_device_program_register_sequence(adev, + tahiti_golden_rlc_registers, + ARRAY_SIZE(tahiti_golden_rlc_registers)); + amdgpu_device_program_register_sequence(adev, + tahiti_mgcg_cgcg_init, + ARRAY_SIZE(tahiti_mgcg_cgcg_init)); + amdgpu_device_program_register_sequence(adev, + tahiti_golden_registers2, + ARRAY_SIZE(tahiti_golden_registers2)); break; case CHIP_PITCAIRN: - amdgpu_program_register_sequence(adev, - pitcairn_golden_registers, - (const u32)ARRAY_SIZE(pitcairn_golden_registers)); - amdgpu_program_register_sequence(adev, - pitcairn_golden_rlc_registers, - (const u32)ARRAY_SIZE(pitcairn_golden_rlc_registers)); - amdgpu_program_register_sequence(adev, - pitcairn_mgcg_cgcg_init, - (const u32)ARRAY_SIZE(pitcairn_mgcg_cgcg_init)); + amdgpu_device_program_register_sequence(adev, + pitcairn_golden_registers, + ARRAY_SIZE(pitcairn_golden_registers)); + amdgpu_device_program_register_sequence(adev, + pitcairn_golden_rlc_registers, + ARRAY_SIZE(pitcairn_golden_rlc_registers)); + amdgpu_device_program_register_sequence(adev, + pitcairn_mgcg_cgcg_init, + ARRAY_SIZE(pitcairn_mgcg_cgcg_init)); break; case CHIP_VERDE: - amdgpu_program_register_sequence(adev, - verde_golden_registers, - (const u32)ARRAY_SIZE(verde_golden_registers)); - amdgpu_program_register_sequence(adev, - verde_golden_rlc_registers, - (const u32)ARRAY_SIZE(verde_golden_rlc_registers)); - amdgpu_program_register_sequence(adev, - verde_mgcg_cgcg_init, - (const u32)ARRAY_SIZE(verde_mgcg_cgcg_init)); - amdgpu_program_register_sequence(adev, - verde_pg_init, - (const u32)ARRAY_SIZE(verde_pg_init)); + amdgpu_device_program_register_sequence(adev, + verde_golden_registers, + ARRAY_SIZE(verde_golden_registers)); + amdgpu_device_program_register_sequence(adev, + verde_golden_rlc_registers, + ARRAY_SIZE(verde_golden_rlc_registers)); + amdgpu_device_program_register_sequence(adev, + verde_mgcg_cgcg_init, + ARRAY_SIZE(verde_mgcg_cgcg_init)); + amdgpu_device_program_register_sequence(adev, + verde_pg_init, + ARRAY_SIZE(verde_pg_init)); break; case CHIP_OLAND: - amdgpu_program_register_sequence(adev, - oland_golden_registers, - (const u32)ARRAY_SIZE(oland_golden_registers)); - amdgpu_program_register_sequence(adev, - oland_golden_rlc_registers, - (const u32)ARRAY_SIZE(oland_golden_rlc_registers)); - amdgpu_program_register_sequence(adev, - oland_mgcg_cgcg_init, - (const u32)ARRAY_SIZE(oland_mgcg_cgcg_init)); + amdgpu_device_program_register_sequence(adev, + oland_golden_registers, + ARRAY_SIZE(oland_golden_registers)); + amdgpu_device_program_register_sequence(adev, + oland_golden_rlc_registers, + ARRAY_SIZE(oland_golden_rlc_registers)); + amdgpu_device_program_register_sequence(adev, + oland_mgcg_cgcg_init, + ARRAY_SIZE(oland_mgcg_cgcg_init)); break; case CHIP_HAINAN: - amdgpu_program_register_sequence(adev, - hainan_golden_registers, - (const u32)ARRAY_SIZE(hainan_golden_registers)); - amdgpu_program_register_sequence(adev, - hainan_golden_registers2, - (const u32)ARRAY_SIZE(hainan_golden_registers2)); - amdgpu_program_register_sequence(adev, - hainan_mgcg_cgcg_init, - (const u32)ARRAY_SIZE(hainan_mgcg_cgcg_init)); + amdgpu_device_program_register_sequence(adev, + hainan_golden_registers, + ARRAY_SIZE(hainan_golden_registers)); + amdgpu_device_program_register_sequence(adev, + hainan_golden_registers2, + ARRAY_SIZE(hainan_golden_registers2)); + amdgpu_device_program_register_sequence(adev, + hainan_mgcg_cgcg_init, + ARRAY_SIZE(hainan_mgcg_cgcg_init)); break; @@ -1959,42 +1959,42 @@ int si_set_ip_blocks(struct amdgpu_device *adev) case CHIP_VERDE: case CHIP_TAHITI: case CHIP_PITCAIRN: - amdgpu_ip_block_add(adev, &si_common_ip_block); - amdgpu_ip_block_add(adev, &gmc_v6_0_ip_block); - amdgpu_ip_block_add(adev, &si_ih_ip_block); - amdgpu_ip_block_add(adev, &amdgpu_pp_ip_block); + amdgpu_device_ip_block_add(adev, &si_common_ip_block); + amdgpu_device_ip_block_add(adev, &gmc_v6_0_ip_block); + amdgpu_device_ip_block_add(adev, &si_ih_ip_block); + amdgpu_device_ip_block_add(adev, &amdgpu_pp_ip_block); if (adev->enable_virtual_display) - amdgpu_ip_block_add(adev, &dce_virtual_ip_block); + amdgpu_device_ip_block_add(adev, &dce_virtual_ip_block); else - amdgpu_ip_block_add(adev, &dce_v6_0_ip_block); - amdgpu_ip_block_add(adev, &gfx_v6_0_ip_block); - amdgpu_ip_block_add(adev, &si_dma_ip_block); - /* amdgpu_ip_block_add(adev, &uvd_v3_1_ip_block); */ - /* amdgpu_ip_block_add(adev, &vce_v1_0_ip_block); */ + amdgpu_device_ip_block_add(adev, &dce_v6_0_ip_block); + amdgpu_device_ip_block_add(adev, &gfx_v6_0_ip_block); + amdgpu_device_ip_block_add(adev, &si_dma_ip_block); + /* amdgpu_device_ip_block_add(adev, &uvd_v3_1_ip_block); */ + /* amdgpu_device_ip_block_add(adev, &vce_v1_0_ip_block); */ break; case CHIP_OLAND: - amdgpu_ip_block_add(adev, &si_common_ip_block); - amdgpu_ip_block_add(adev, &gmc_v6_0_ip_block); - amdgpu_ip_block_add(adev, &si_ih_ip_block); - amdgpu_ip_block_add(adev, &amdgpu_pp_ip_block); + amdgpu_device_ip_block_add(adev, &si_common_ip_block); + amdgpu_device_ip_block_add(adev, &gmc_v6_0_ip_block); + amdgpu_device_ip_block_add(adev, &si_ih_ip_block); + amdgpu_device_ip_block_add(adev, &amdgpu_pp_ip_block); if (adev->enable_virtual_display) - amdgpu_ip_block_add(adev, &dce_virtual_ip_block); + amdgpu_device_ip_block_add(adev, &dce_virtual_ip_block); else - amdgpu_ip_block_add(adev, &dce_v6_4_ip_block); - amdgpu_ip_block_add(adev, &gfx_v6_0_ip_block); - amdgpu_ip_block_add(adev, &si_dma_ip_block); - /* amdgpu_ip_block_add(adev, &uvd_v3_1_ip_block); */ - /* amdgpu_ip_block_add(adev, &vce_v1_0_ip_block); */ + amdgpu_device_ip_block_add(adev, &dce_v6_4_ip_block); + amdgpu_device_ip_block_add(adev, &gfx_v6_0_ip_block); + amdgpu_device_ip_block_add(adev, &si_dma_ip_block); + /* amdgpu_device_ip_block_add(adev, &uvd_v3_1_ip_block); */ + /* amdgpu_device_ip_block_add(adev, &vce_v1_0_ip_block); */ break; case CHIP_HAINAN: - amdgpu_ip_block_add(adev, &si_common_ip_block); - amdgpu_ip_block_add(adev, &gmc_v6_0_ip_block); - amdgpu_ip_block_add(adev, &si_ih_ip_block); - amdgpu_ip_block_add(adev, &amdgpu_pp_ip_block); + amdgpu_device_ip_block_add(adev, &si_common_ip_block); + amdgpu_device_ip_block_add(adev, &gmc_v6_0_ip_block); + amdgpu_device_ip_block_add(adev, &si_ih_ip_block); + amdgpu_device_ip_block_add(adev, &amdgpu_pp_ip_block); if (adev->enable_virtual_display) - amdgpu_ip_block_add(adev, &dce_virtual_ip_block); - amdgpu_ip_block_add(adev, &gfx_v6_0_ip_block); - amdgpu_ip_block_add(adev, &si_dma_ip_block); + amdgpu_device_ip_block_add(adev, &dce_virtual_ip_block); + amdgpu_device_ip_block_add(adev, &gfx_v6_0_ip_block); + amdgpu_device_ip_block_add(adev, &si_dma_ip_block); break; default: BUG(); diff --git a/drivers/gpu/drm/amd/amdgpu/si_dma.c b/drivers/gpu/drm/amd/amdgpu/si_dma.c index 3fa2fbf8c9a1..9a29c1399091 100644 --- a/drivers/gpu/drm/amd/amdgpu/si_dma.c +++ b/drivers/gpu/drm/amd/amdgpu/si_dma.c @@ -61,14 +61,14 @@ static void si_dma_ring_set_wptr(struct amdgpu_ring *ring) static void si_dma_ring_emit_ib(struct amdgpu_ring *ring, struct amdgpu_ib *ib, - unsigned vm_id, bool ctx_switch) + unsigned vmid, bool ctx_switch) { /* The indirect buffer packet must end on an 8 DW boundary in the DMA ring. * Pad as necessary with NOPs. */ while ((lower_32_bits(ring->wptr) & 7) != 5) amdgpu_ring_write(ring, DMA_PACKET(DMA_PACKET_NOP, 0, 0, 0, 0)); - amdgpu_ring_write(ring, DMA_IB_PACKET(DMA_PACKET_INDIRECT_BUFFER, vm_id, 0)); + amdgpu_ring_write(ring, DMA_IB_PACKET(DMA_PACKET_INDIRECT_BUFFER, vmid, 0)); amdgpu_ring_write(ring, (ib->gpu_addr & 0xFFFFFFE0)); amdgpu_ring_write(ring, (ib->length_dw << 12) | (upper_32_bits(ib->gpu_addr) & 0xFF)); @@ -221,7 +221,7 @@ static int si_dma_ring_test_ring(struct amdgpu_ring *ring) u32 tmp; u64 gpu_addr; - r = amdgpu_wb_get(adev, &index); + r = amdgpu_device_wb_get(adev, &index); if (r) { dev_err(adev->dev, "(%d) failed to allocate wb slot\n", r); return r; @@ -234,7 +234,7 @@ static int si_dma_ring_test_ring(struct amdgpu_ring *ring) r = amdgpu_ring_alloc(ring, 4); if (r) { DRM_ERROR("amdgpu: dma failed to lock ring %d (%d).\n", ring->idx, r); - amdgpu_wb_free(adev, index); + amdgpu_device_wb_free(adev, index); return r; } @@ -252,13 +252,13 @@ static int si_dma_ring_test_ring(struct amdgpu_ring *ring) } if (i < adev->usec_timeout) { - DRM_INFO("ring test on %d succeeded in %d usecs\n", ring->idx, i); + DRM_DEBUG("ring test on %d succeeded in %d usecs\n", ring->idx, i); } else { DRM_ERROR("amdgpu: ring %d test failed (0x%08X)\n", ring->idx, tmp); r = -EINVAL; } - amdgpu_wb_free(adev, index); + amdgpu_device_wb_free(adev, index); return r; } @@ -281,7 +281,7 @@ static int si_dma_ring_test_ib(struct amdgpu_ring *ring, long timeout) u64 gpu_addr; long r; - r = amdgpu_wb_get(adev, &index); + r = amdgpu_device_wb_get(adev, &index); if (r) { dev_err(adev->dev, "(%ld) failed to allocate wb slot\n", r); return r; @@ -317,7 +317,7 @@ static int si_dma_ring_test_ib(struct amdgpu_ring *ring, long timeout) } tmp = le32_to_cpu(adev->wb.wb[index]); if (tmp == 0xDEADBEEF) { - DRM_INFO("ib test on ring %d succeeded\n", ring->idx); + DRM_DEBUG("ib test on ring %d succeeded\n", ring->idx); r = 0; } else { DRM_ERROR("amdgpu: ib test failed (0x%08X)\n", tmp); @@ -328,7 +328,7 @@ err1: amdgpu_ib_free(adev, &ib, NULL); dma_fence_put(f); err0: - amdgpu_wb_free(adev, index); + amdgpu_device_wb_free(adev, index); return r; } @@ -473,25 +473,25 @@ static void si_dma_ring_emit_pipeline_sync(struct amdgpu_ring *ring) * using sDMA (VI). */ static void si_dma_ring_emit_vm_flush(struct amdgpu_ring *ring, - unsigned vm_id, uint64_t pd_addr) + unsigned vmid, uint64_t pd_addr) { amdgpu_ring_write(ring, DMA_PACKET(DMA_PACKET_SRBM_WRITE, 0, 0, 0, 0)); - if (vm_id < 8) - amdgpu_ring_write(ring, (0xf << 16) | (VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + vm_id)); + if (vmid < 8) + amdgpu_ring_write(ring, (0xf << 16) | (VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + vmid)); else - amdgpu_ring_write(ring, (0xf << 16) | (VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + (vm_id - 8))); + amdgpu_ring_write(ring, (0xf << 16) | (VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + (vmid - 8))); amdgpu_ring_write(ring, pd_addr >> 12); /* bits 0-7 are the VM contexts0-7 */ amdgpu_ring_write(ring, DMA_PACKET(DMA_PACKET_SRBM_WRITE, 0, 0, 0, 0)); amdgpu_ring_write(ring, (0xf << 16) | (VM_INVALIDATE_REQUEST)); - amdgpu_ring_write(ring, 1 << vm_id); + amdgpu_ring_write(ring, 1 << vmid); /* wait for invalidate to complete */ amdgpu_ring_write(ring, DMA_PACKET(DMA_PACKET_POLL_REG_MEM, 0, 0, 0, 0)); amdgpu_ring_write(ring, VM_INVALIDATE_REQUEST); amdgpu_ring_write(ring, 0xff << 16); /* retry */ - amdgpu_ring_write(ring, 1 << vm_id); /* mask */ + amdgpu_ring_write(ring, 1 << vmid); /* mask */ amdgpu_ring_write(ring, 0); /* value */ amdgpu_ring_write(ring, (0 << 28) | 0x20); /* func(always) | poll interval */ } diff --git a/drivers/gpu/drm/amd/amdgpu/si_dpm.c b/drivers/gpu/drm/amd/amdgpu/si_dpm.c index 51fd0c9a20a5..ce675a7f179a 100644 --- a/drivers/gpu/drm/amd/amdgpu/si_dpm.c +++ b/drivers/gpu/drm/amd/amdgpu/si_dpm.c @@ -3464,6 +3464,11 @@ static void si_apply_state_adjust_rules(struct amdgpu_device *adev, (adev->pdev->device == 0x6667)) { max_sclk = 75000; } + if ((adev->pdev->revision == 0xC3) || + (adev->pdev->device == 0x6665)) { + max_sclk = 60000; + max_mclk = 80000; + } } else if (adev->asic_type == CHIP_OLAND) { if ((adev->pdev->revision == 0xC7) || (adev->pdev->revision == 0x80) || @@ -5845,9 +5850,9 @@ static int si_set_mc_special_registers(struct amdgpu_device *adev, ((temp_reg & 0xffff0000)) | ((table->mc_reg_table_entry[k].mc_data[i] & 0xffff0000) >> 16); j++; + if (j >= SMC_SISLANDS_MC_REGISTER_ARRAY_SIZE) return -EINVAL; - temp_reg = RREG32(MC_PMG_CMD_MRS); table->mc_reg_address[j].s1 = MC_PMG_CMD_MRS; table->mc_reg_address[j].s0 = MC_SEQ_PMG_CMD_MRS_LP; @@ -5859,18 +5864,16 @@ static int si_set_mc_special_registers(struct amdgpu_device *adev, table->mc_reg_table_entry[k].mc_data[j] |= 0x100; } j++; - if (j >= SMC_SISLANDS_MC_REGISTER_ARRAY_SIZE) - return -EINVAL; if (adev->mc.vram_type != AMDGPU_VRAM_TYPE_GDDR5) { + if (j >= SMC_SISLANDS_MC_REGISTER_ARRAY_SIZE) + return -EINVAL; table->mc_reg_address[j].s1 = MC_PMG_AUTO_CMD; table->mc_reg_address[j].s0 = MC_PMG_AUTO_CMD; for (k = 0; k < table->num_entries; k++) table->mc_reg_table_entry[k].mc_data[j] = (table->mc_reg_table_entry[k].mc_data[i] & 0xffff0000) >> 16; j++; - if (j >= SMC_SISLANDS_MC_REGISTER_ARRAY_SIZE) - return -EINVAL; } break; case MC_SEQ_RESERVE_M: @@ -5882,8 +5885,6 @@ static int si_set_mc_special_registers(struct amdgpu_device *adev, (temp_reg & 0xffff0000) | (table->mc_reg_table_entry[k].mc_data[i] & 0x0000ffff); j++; - if (j >= SMC_SISLANDS_MC_REGISTER_ARRAY_SIZE) - return -EINVAL; break; default: break; diff --git a/drivers/gpu/drm/amd/amdgpu/si_ih.c b/drivers/gpu/drm/amd/amdgpu/si_ih.c index d2c6b80309c8..60dad63098a2 100644 --- a/drivers/gpu/drm/amd/amdgpu/si_ih.c +++ b/drivers/gpu/drm/amd/amdgpu/si_ih.c @@ -146,7 +146,7 @@ static void si_ih_decode_iv(struct amdgpu_device *adev, entry->src_id = dw[0] & 0xff; entry->src_data[0] = dw[1] & 0xfffffff; entry->ring_id = dw[2] & 0xff; - entry->vm_id = (dw[2] >> 8) & 0xff; + entry->vmid = (dw[2] >> 8) & 0xff; adev->irq.ih.rptr += 16; } diff --git a/drivers/gpu/drm/amd/amdgpu/soc15.c b/drivers/gpu/drm/amd/amdgpu/soc15.c index 4e67fe1e7955..a04a033f57de 100644 --- a/drivers/gpu/drm/amd/amdgpu/soc15.c +++ b/drivers/gpu/drm/amd/amdgpu/soc15.c @@ -34,18 +34,17 @@ #include "atom.h" #include "amd_pcie.h" -#include "vega10/soc15ip.h" -#include "vega10/UVD/uvd_7_0_offset.h" -#include "vega10/GC/gc_9_0_offset.h" -#include "vega10/GC/gc_9_0_sh_mask.h" -#include "vega10/SDMA0/sdma0_4_0_offset.h" -#include "vega10/SDMA1/sdma1_4_0_offset.h" -#include "vega10/HDP/hdp_4_0_offset.h" -#include "vega10/HDP/hdp_4_0_sh_mask.h" -#include "vega10/MP/mp_9_0_offset.h" -#include "vega10/MP/mp_9_0_sh_mask.h" -#include "vega10/SMUIO/smuio_9_0_offset.h" -#include "vega10/SMUIO/smuio_9_0_sh_mask.h" +#include "uvd/uvd_7_0_offset.h" +#include "gc/gc_9_0_offset.h" +#include "gc/gc_9_0_sh_mask.h" +#include "sdma0/sdma0_4_0_offset.h" +#include "sdma1/sdma1_4_0_offset.h" +#include "hdp/hdp_4_0_offset.h" +#include "hdp/hdp_4_0_sh_mask.h" +#include "mp/mp_9_0_offset.h" +#include "mp/mp_9_0_sh_mask.h" +#include "smuio/smuio_9_0_offset.h" +#include "smuio/smuio_9_0_sh_mask.h" #include "soc15.h" #include "soc15_common.h" @@ -101,15 +100,8 @@ static u32 soc15_pcie_rreg(struct amdgpu_device *adev, u32 reg) { unsigned long flags, address, data; u32 r; - const struct nbio_pcie_index_data *nbio_pcie_id; - - if (adev->flags & AMD_IS_APU) - nbio_pcie_id = &nbio_v7_0_pcie_index_data; - else - nbio_pcie_id = &nbio_v6_1_pcie_index_data; - - address = nbio_pcie_id->index_offset; - data = nbio_pcie_id->data_offset; + address = adev->nbio_funcs->get_pcie_index_offset(adev); + data = adev->nbio_funcs->get_pcie_data_offset(adev); spin_lock_irqsave(&adev->pcie_idx_lock, flags); WREG32(address, reg); @@ -122,15 +114,9 @@ static u32 soc15_pcie_rreg(struct amdgpu_device *adev, u32 reg) static void soc15_pcie_wreg(struct amdgpu_device *adev, u32 reg, u32 v) { unsigned long flags, address, data; - const struct nbio_pcie_index_data *nbio_pcie_id; - if (adev->flags & AMD_IS_APU) - nbio_pcie_id = &nbio_v7_0_pcie_index_data; - else - nbio_pcie_id = &nbio_v6_1_pcie_index_data; - - address = nbio_pcie_id->index_offset; - data = nbio_pcie_id->data_offset; + address = adev->nbio_funcs->get_pcie_index_offset(adev); + data = adev->nbio_funcs->get_pcie_data_offset(adev); spin_lock_irqsave(&adev->pcie_idx_lock, flags); WREG32(address, reg); @@ -242,41 +228,9 @@ static void soc15_se_cac_wreg(struct amdgpu_device *adev, u32 reg, u32 v) static u32 soc15_get_config_memsize(struct amdgpu_device *adev) { - if (adev->flags & AMD_IS_APU) - return nbio_v7_0_get_memsize(adev); - else - return nbio_v6_1_get_memsize(adev); + return adev->nbio_funcs->get_memsize(adev); } -static const u32 vega10_golden_init[] = -{ -}; - -static const u32 raven_golden_init[] = -{ -}; - -static void soc15_init_golden_registers(struct amdgpu_device *adev) -{ - /* Some of the registers might be dependent on GRBM_GFX_INDEX */ - mutex_lock(&adev->grbm_idx_mutex); - - switch (adev->asic_type) { - case CHIP_VEGA10: - amdgpu_program_register_sequence(adev, - vega10_golden_init, - (const u32)ARRAY_SIZE(vega10_golden_init)); - break; - case CHIP_RAVEN: - amdgpu_program_register_sequence(adev, - raven_golden_init, - (const u32)ARRAY_SIZE(raven_golden_init)); - break; - default: - break; - } - mutex_unlock(&adev->grbm_idx_mutex); -} static u32 soc15_get_xclk(struct amdgpu_device *adev) { return adev->clock.spll.reference_freq; @@ -332,25 +286,34 @@ static bool soc15_read_bios_from_rom(struct amdgpu_device *adev, return true; } -static struct amdgpu_allowed_register_entry soc15_allowed_read_registers[] = { - { SOC15_REG_OFFSET(GC, 0, mmGRBM_STATUS)}, - { SOC15_REG_OFFSET(GC, 0, mmGRBM_STATUS2)}, - { SOC15_REG_OFFSET(GC, 0, mmGRBM_STATUS_SE0)}, - { SOC15_REG_OFFSET(GC, 0, mmGRBM_STATUS_SE1)}, - { SOC15_REG_OFFSET(GC, 0, mmGRBM_STATUS_SE2)}, - { SOC15_REG_OFFSET(GC, 0, mmGRBM_STATUS_SE3)}, - { SOC15_REG_OFFSET(SDMA0, 0, mmSDMA0_STATUS_REG)}, - { SOC15_REG_OFFSET(SDMA1, 0, mmSDMA1_STATUS_REG)}, - { SOC15_REG_OFFSET(GC, 0, mmCP_STAT)}, - { SOC15_REG_OFFSET(GC, 0, mmCP_STALLED_STAT1)}, - { SOC15_REG_OFFSET(GC, 0, mmCP_STALLED_STAT2)}, - { SOC15_REG_OFFSET(GC, 0, mmCP_STALLED_STAT3)}, - { SOC15_REG_OFFSET(GC, 0, mmCP_CPF_BUSY_STAT)}, - { SOC15_REG_OFFSET(GC, 0, mmCP_CPF_STALLED_STAT1)}, - { SOC15_REG_OFFSET(GC, 0, mmCP_CPF_STATUS)}, - { SOC15_REG_OFFSET(GC, 0, mmCP_CPC_STALLED_STAT1)}, - { SOC15_REG_OFFSET(GC, 0, mmCP_CPC_STATUS)}, - { SOC15_REG_OFFSET(GC, 0, mmGB_ADDR_CONFIG)}, +struct soc15_allowed_register_entry { + uint32_t hwip; + uint32_t inst; + uint32_t seg; + uint32_t reg_offset; + bool grbm_indexed; +}; + + +static struct soc15_allowed_register_entry soc15_allowed_read_registers[] = { + { SOC15_REG_ENTRY(GC, 0, mmGRBM_STATUS)}, + { SOC15_REG_ENTRY(GC, 0, mmGRBM_STATUS2)}, + { SOC15_REG_ENTRY(GC, 0, mmGRBM_STATUS_SE0)}, + { SOC15_REG_ENTRY(GC, 0, mmGRBM_STATUS_SE1)}, + { SOC15_REG_ENTRY(GC, 0, mmGRBM_STATUS_SE2)}, + { SOC15_REG_ENTRY(GC, 0, mmGRBM_STATUS_SE3)}, + { SOC15_REG_ENTRY(SDMA0, 0, mmSDMA0_STATUS_REG)}, + { SOC15_REG_ENTRY(SDMA1, 0, mmSDMA1_STATUS_REG)}, + { SOC15_REG_ENTRY(GC, 0, mmCP_STAT)}, + { SOC15_REG_ENTRY(GC, 0, mmCP_STALLED_STAT1)}, + { SOC15_REG_ENTRY(GC, 0, mmCP_STALLED_STAT2)}, + { SOC15_REG_ENTRY(GC, 0, mmCP_STALLED_STAT3)}, + { SOC15_REG_ENTRY(GC, 0, mmCP_CPF_BUSY_STAT)}, + { SOC15_REG_ENTRY(GC, 0, mmCP_CPF_STALLED_STAT1)}, + { SOC15_REG_ENTRY(GC, 0, mmCP_CPF_STATUS)}, + { SOC15_REG_ENTRY(GC, 0, mmCP_CPC_STALLED_STAT1)}, + { SOC15_REG_ENTRY(GC, 0, mmCP_CPC_STATUS)}, + { SOC15_REG_ENTRY(GC, 0, mmGB_ADDR_CONFIG)}, }; static uint32_t soc15_read_indexed_register(struct amdgpu_device *adev, u32 se_num, @@ -377,12 +340,9 @@ static uint32_t soc15_get_register_value(struct amdgpu_device *adev, if (indexed) { return soc15_read_indexed_register(adev, se_num, sh_num, reg_offset); } else { - switch (reg_offset) { - case SOC15_REG_OFFSET(GC, 0, mmGB_ADDR_CONFIG): + if (reg_offset == SOC15_REG_OFFSET(GC, 0, mmGB_ADDR_CONFIG)) return adev->gfx.config.gb_addr_config; - default: - return RREG32(reg_offset); - } + return RREG32(reg_offset); } } @@ -390,10 +350,13 @@ static int soc15_read_register(struct amdgpu_device *adev, u32 se_num, u32 sh_num, u32 reg_offset, u32 *value) { uint32_t i; + struct soc15_allowed_register_entry *en; *value = 0; for (i = 0; i < ARRAY_SIZE(soc15_allowed_read_registers); i++) { - if (reg_offset != soc15_allowed_read_registers[i].reg_offset) + en = &soc15_allowed_read_registers[i]; + if (reg_offset != (adev->reg_offset[en->hwip][en->inst][en->seg] + + en->reg_offset)) continue; *value = soc15_get_register_value(adev, @@ -404,6 +367,43 @@ static int soc15_read_register(struct amdgpu_device *adev, u32 se_num, return -EINVAL; } + +/** + * soc15_program_register_sequence - program an array of registers. + * + * @adev: amdgpu_device pointer + * @regs: pointer to the register array + * @array_size: size of the register array + * + * Programs an array or registers with and and or masks. + * This is a helper for setting golden registers. + */ + +void soc15_program_register_sequence(struct amdgpu_device *adev, + const struct soc15_reg_golden *regs, + const u32 array_size) +{ + const struct soc15_reg_golden *entry; + u32 tmp, reg; + int i; + + for (i = 0; i < array_size; ++i) { + entry = ®s[i]; + reg = adev->reg_offset[entry->hwip][entry->instance][entry->segment] + entry->reg; + + if (entry->and_mask == 0xffffffff) { + tmp = entry->or_mask; + } else { + tmp = RREG32(reg); + tmp &= ~(entry->and_mask); + tmp |= entry->or_mask; + } + WREG32(reg, tmp); + } + +} + + static int soc15_asic_reset(struct amdgpu_device *adev) { u32 i; @@ -428,9 +428,8 @@ static int soc15_asic_reset(struct amdgpu_device *adev) /* wait for asic to come out of reset */ for (i = 0; i < adev->usec_timeout; i++) { - u32 memsize = (adev->flags & AMD_IS_APU) ? - nbio_v7_0_get_memsize(adev) : - nbio_v6_1_get_memsize(adev); + u32 memsize = adev->nbio_funcs->get_memsize(adev); + if (memsize != 0xffffffff) break; udelay(1); @@ -495,14 +494,10 @@ static void soc15_program_aspm(struct amdgpu_device *adev) } static void soc15_enable_doorbell_aperture(struct amdgpu_device *adev, - bool enable) + bool enable) { - if (adev->flags & AMD_IS_APU) { - nbio_v7_0_enable_doorbell_aperture(adev, enable); - } else { - nbio_v6_1_enable_doorbell_aperture(adev, enable); - nbio_v6_1_enable_doorbell_selfring_aperture(adev, enable); - } + adev->nbio_funcs->enable_doorbell_aperture(adev, enable); + adev->nbio_funcs->enable_doorbell_selfring_aperture(adev, enable); } static const struct amdgpu_ip_block_version vega10_common_ip_block = @@ -516,50 +511,65 @@ static const struct amdgpu_ip_block_version vega10_common_ip_block = int soc15_set_ip_blocks(struct amdgpu_device *adev) { - nbio_v6_1_detect_hw_virt(adev); + /* Set IP register base before any HW register access */ + switch (adev->asic_type) { + case CHIP_VEGA10: + case CHIP_RAVEN: + vega10_reg_base_init(adev); + break; + default: + return -EINVAL; + } + + if (adev->flags & AMD_IS_APU) + adev->nbio_funcs = &nbio_v7_0_funcs; + else + adev->nbio_funcs = &nbio_v6_1_funcs; + + adev->nbio_funcs->detect_hw_virt(adev); if (amdgpu_sriov_vf(adev)) adev->virt.ops = &xgpu_ai_virt_ops; switch (adev->asic_type) { case CHIP_VEGA10: - amdgpu_ip_block_add(adev, &vega10_common_ip_block); - amdgpu_ip_block_add(adev, &gmc_v9_0_ip_block); - amdgpu_ip_block_add(adev, &vega10_ih_ip_block); + amdgpu_device_ip_block_add(adev, &vega10_common_ip_block); + amdgpu_device_ip_block_add(adev, &gmc_v9_0_ip_block); + amdgpu_device_ip_block_add(adev, &vega10_ih_ip_block); if (amdgpu_fw_load_type == 2 || amdgpu_fw_load_type == -1) - amdgpu_ip_block_add(adev, &psp_v3_1_ip_block); + amdgpu_device_ip_block_add(adev, &psp_v3_1_ip_block); if (!amdgpu_sriov_vf(adev)) - amdgpu_ip_block_add(adev, &amdgpu_pp_ip_block); + amdgpu_device_ip_block_add(adev, &amdgpu_pp_ip_block); if (adev->enable_virtual_display || amdgpu_sriov_vf(adev)) - amdgpu_ip_block_add(adev, &dce_virtual_ip_block); + amdgpu_device_ip_block_add(adev, &dce_virtual_ip_block); #if defined(CONFIG_DRM_AMD_DC) else if (amdgpu_device_has_dc_support(adev)) - amdgpu_ip_block_add(adev, &dm_ip_block); + amdgpu_device_ip_block_add(adev, &dm_ip_block); #else # warning "Enable CONFIG_DRM_AMD_DC for display support on SOC15." #endif - amdgpu_ip_block_add(adev, &gfx_v9_0_ip_block); - amdgpu_ip_block_add(adev, &sdma_v4_0_ip_block); - amdgpu_ip_block_add(adev, &uvd_v7_0_ip_block); - amdgpu_ip_block_add(adev, &vce_v4_0_ip_block); + amdgpu_device_ip_block_add(adev, &gfx_v9_0_ip_block); + amdgpu_device_ip_block_add(adev, &sdma_v4_0_ip_block); + amdgpu_device_ip_block_add(adev, &uvd_v7_0_ip_block); + amdgpu_device_ip_block_add(adev, &vce_v4_0_ip_block); break; case CHIP_RAVEN: - amdgpu_ip_block_add(adev, &vega10_common_ip_block); - amdgpu_ip_block_add(adev, &gmc_v9_0_ip_block); - amdgpu_ip_block_add(adev, &vega10_ih_ip_block); - amdgpu_ip_block_add(adev, &psp_v10_0_ip_block); - amdgpu_ip_block_add(adev, &amdgpu_pp_ip_block); + amdgpu_device_ip_block_add(adev, &vega10_common_ip_block); + amdgpu_device_ip_block_add(adev, &gmc_v9_0_ip_block); + amdgpu_device_ip_block_add(adev, &vega10_ih_ip_block); + amdgpu_device_ip_block_add(adev, &psp_v10_0_ip_block); + amdgpu_device_ip_block_add(adev, &amdgpu_pp_ip_block); if (adev->enable_virtual_display || amdgpu_sriov_vf(adev)) - amdgpu_ip_block_add(adev, &dce_virtual_ip_block); + amdgpu_device_ip_block_add(adev, &dce_virtual_ip_block); #if defined(CONFIG_DRM_AMD_DC) else if (amdgpu_device_has_dc_support(adev)) - amdgpu_ip_block_add(adev, &dm_ip_block); + amdgpu_device_ip_block_add(adev, &dm_ip_block); #else # warning "Enable CONFIG_DRM_AMD_DC for display support on SOC15." #endif - amdgpu_ip_block_add(adev, &gfx_v9_0_ip_block); - amdgpu_ip_block_add(adev, &sdma_v4_0_ip_block); - amdgpu_ip_block_add(adev, &vcn_v1_0_ip_block); + amdgpu_device_ip_block_add(adev, &gfx_v9_0_ip_block); + amdgpu_device_ip_block_add(adev, &sdma_v4_0_ip_block); + amdgpu_device_ip_block_add(adev, &vcn_v1_0_ip_block); break; default: return -EINVAL; @@ -570,10 +580,7 @@ int soc15_set_ip_blocks(struct amdgpu_device *adev) static uint32_t soc15_get_rev_id(struct amdgpu_device *adev) { - if (adev->flags & AMD_IS_APU) - return nbio_v7_0_get_rev_id(adev); - else - return nbio_v6_1_get_rev_id(adev); + return adev->nbio_funcs->get_rev_id(adev); } static const struct amdgpu_asic_funcs soc15_asic_funcs = @@ -609,8 +616,8 @@ static int soc15_common_early_init(void *handle) adev->asic_funcs = &soc15_asic_funcs; - if (amdgpu_get_ip_block(adev, AMD_IP_BLOCK_TYPE_PSP) && - (amdgpu_ip_block_mask & (1 << AMD_IP_BLOCK_TYPE_PSP))) + if (amdgpu_device_ip_get_ip_block(adev, AMD_IP_BLOCK_TYPE_PSP) && + (amdgpu_ip_block_mask & (1 << AMD_IP_BLOCK_TYPE_PSP))) psp_enabled = true; adev->rev_id = soc15_get_rev_id(adev); @@ -659,8 +666,8 @@ static int soc15_common_early_init(void *handle) AMD_CG_SUPPORT_MC_LS | AMD_CG_SUPPORT_SDMA_MGCG | AMD_CG_SUPPORT_SDMA_LS; - adev->pg_flags = AMD_PG_SUPPORT_SDMA | - AMD_PG_SUPPORT_MMHUB; + adev->pg_flags = AMD_PG_SUPPORT_SDMA; + adev->external_rev_id = 0x1; break; default: @@ -675,7 +682,7 @@ static int soc15_common_early_init(void *handle) adev->firmware.load_type = amdgpu_ucode_get_load_type(adev, amdgpu_fw_load_type); - amdgpu_get_pcie_info(adev); + amdgpu_device_get_pcie_info(adev); return 0; } @@ -709,15 +716,12 @@ static int soc15_common_hw_init(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; - /* move the golden regs per IP block */ - soc15_init_golden_registers(adev); /* enable pcie gen2/3 link */ soc15_pcie_gen3_enable(adev); /* enable aspm */ soc15_program_aspm(adev); /* setup nbio registers */ - if (!(adev->flags & AMD_IS_APU)) - nbio_v6_1_init_registers(adev); + adev->nbio_funcs->init_registers(adev); /* enable the doorbell aperture */ soc15_enable_doorbell_aperture(adev, true); @@ -878,9 +882,9 @@ static int soc15_common_set_clockgating_state(void *handle, switch (adev->asic_type) { case CHIP_VEGA10: - nbio_v6_1_update_medium_grain_clock_gating(adev, + adev->nbio_funcs->update_medium_grain_clock_gating(adev, state == AMD_CG_STATE_GATE ? true : false); - nbio_v6_1_update_medium_grain_light_sleep(adev, + adev->nbio_funcs->update_medium_grain_light_sleep(adev, state == AMD_CG_STATE_GATE ? true : false); soc15_update_hdp_light_sleep(adev, state == AMD_CG_STATE_GATE ? true : false); @@ -894,9 +898,9 @@ static int soc15_common_set_clockgating_state(void *handle, state == AMD_CG_STATE_GATE ? true : false); break; case CHIP_RAVEN: - nbio_v7_0_update_medium_grain_clock_gating(adev, + adev->nbio_funcs->update_medium_grain_clock_gating(adev, state == AMD_CG_STATE_GATE ? true : false); - nbio_v6_1_update_medium_grain_light_sleep(adev, + adev->nbio_funcs->update_medium_grain_light_sleep(adev, state == AMD_CG_STATE_GATE ? true : false); soc15_update_hdp_light_sleep(adev, state == AMD_CG_STATE_GATE ? true : false); @@ -921,7 +925,7 @@ static void soc15_common_get_clockgating_state(void *handle, u32 *flags) if (amdgpu_sriov_vf(adev)) *flags = 0; - nbio_v6_1_get_clockgating_state(adev, flags); + adev->nbio_funcs->get_clockgating_state(adev, flags); /* AMD_CG_SUPPORT_HDP_LS */ data = RREG32(SOC15_REG_OFFSET(HDP, 0, mmHDP_MEM_POWER_LS)); diff --git a/drivers/gpu/drm/amd/amdgpu/soc15.h b/drivers/gpu/drm/amd/amdgpu/soc15.h index acb3cdb119f2..26b3feac5d06 100644 --- a/drivers/gpu/drm/amd/amdgpu/soc15.h +++ b/drivers/gpu/drm/amd/amdgpu/soc15.h @@ -29,8 +29,28 @@ extern const struct amd_ip_funcs soc15_common_ip_funcs; +struct soc15_reg_golden { + u32 hwip; + u32 instance; + u32 segment; + u32 reg; + u32 and_mask; + u32 or_mask; +}; + +#define SOC15_REG_ENTRY(ip, inst, reg) ip##_HWIP, inst, reg##_BASE_IDX, reg + +#define SOC15_REG_GOLDEN_VALUE(ip, inst, reg, and_mask, or_mask) \ + { ip##_HWIP, inst, reg##_BASE_IDX, reg, and_mask, or_mask } + void soc15_grbm_select(struct amdgpu_device *adev, u32 me, u32 pipe, u32 queue, u32 vmid); int soc15_set_ip_blocks(struct amdgpu_device *adev); +void soc15_program_register_sequence(struct amdgpu_device *adev, + const struct soc15_reg_golden *registers, + const u32 array_size); + +int vega10_reg_base_init(struct amdgpu_device *adev); + #endif diff --git a/drivers/gpu/drm/amd/amdgpu/soc15_common.h b/drivers/gpu/drm/amd/amdgpu/soc15_common.h index 7a8e4e28abb2..def865067edd 100644 --- a/drivers/gpu/drm/amd/amdgpu/soc15_common.h +++ b/drivers/gpu/drm/amd/amdgpu/soc15_common.h @@ -24,72 +24,28 @@ #ifndef __SOC15_COMMON_H__ #define __SOC15_COMMON_H__ -struct nbio_hdp_flush_reg { - u32 hdp_flush_req_offset; - u32 hdp_flush_done_offset; - u32 ref_and_mask_cp0; - u32 ref_and_mask_cp1; - u32 ref_and_mask_cp2; - u32 ref_and_mask_cp3; - u32 ref_and_mask_cp4; - u32 ref_and_mask_cp5; - u32 ref_and_mask_cp6; - u32 ref_and_mask_cp7; - u32 ref_and_mask_cp8; - u32 ref_and_mask_cp9; - u32 ref_and_mask_sdma0; - u32 ref_and_mask_sdma1; -}; - -struct nbio_pcie_index_data { - u32 index_offset; - u32 data_offset; -}; - /* Register Access Macros */ -#define SOC15_REG_OFFSET(ip, inst, reg) (0 == reg##_BASE_IDX ? ip##_BASE__INST##inst##_SEG0 + reg : \ - (1 == reg##_BASE_IDX ? ip##_BASE__INST##inst##_SEG1 + reg : \ - (2 == reg##_BASE_IDX ? ip##_BASE__INST##inst##_SEG2 + reg : \ - (3 == reg##_BASE_IDX ? ip##_BASE__INST##inst##_SEG3 + reg : \ - (ip##_BASE__INST##inst##_SEG4 + reg))))) +#define SOC15_REG_OFFSET(ip, inst, reg) (adev->reg_offset[ip##_HWIP][inst][reg##_BASE_IDX] + reg) #define WREG32_FIELD15(ip, idx, reg, field, val) \ - WREG32(SOC15_REG_OFFSET(ip, idx, mm##reg), (RREG32(SOC15_REG_OFFSET(ip, idx, mm##reg)) & ~REG_FIELD_MASK(reg, field)) | (val) << REG_FIELD_SHIFT(reg, field)) + WREG32(adev->reg_offset[ip##_HWIP][idx][mm##reg##_BASE_IDX] + mm##reg, \ + (RREG32(adev->reg_offset[ip##_HWIP][idx][mm##reg##_BASE_IDX] + mm##reg) \ + & ~REG_FIELD_MASK(reg, field)) | (val) << REG_FIELD_SHIFT(reg, field)) #define RREG32_SOC15(ip, inst, reg) \ - RREG32( (0 == reg##_BASE_IDX ? ip##_BASE__INST##inst##_SEG0 + reg : \ - (1 == reg##_BASE_IDX ? ip##_BASE__INST##inst##_SEG1 + reg : \ - (2 == reg##_BASE_IDX ? ip##_BASE__INST##inst##_SEG2 + reg : \ - (3 == reg##_BASE_IDX ? ip##_BASE__INST##inst##_SEG3 + reg : \ - (ip##_BASE__INST##inst##_SEG4 + reg)))))) + RREG32(adev->reg_offset[ip##_HWIP][inst][reg##_BASE_IDX] + reg) #define RREG32_SOC15_OFFSET(ip, inst, reg, offset) \ - RREG32( (0 == reg##_BASE_IDX ? ip##_BASE__INST##inst##_SEG0 + reg : \ - (1 == reg##_BASE_IDX ? ip##_BASE__INST##inst##_SEG1 + reg : \ - (2 == reg##_BASE_IDX ? ip##_BASE__INST##inst##_SEG2 + reg : \ - (3 == reg##_BASE_IDX ? ip##_BASE__INST##inst##_SEG3 + reg : \ - (ip##_BASE__INST##inst##_SEG4 + reg))))) + offset) + RREG32((adev->reg_offset[ip##_HWIP][inst][reg##_BASE_IDX] + reg) + offset) #define WREG32_SOC15(ip, inst, reg, value) \ - WREG32( (0 == reg##_BASE_IDX ? ip##_BASE__INST##inst##_SEG0 + reg : \ - (1 == reg##_BASE_IDX ? ip##_BASE__INST##inst##_SEG1 + reg : \ - (2 == reg##_BASE_IDX ? ip##_BASE__INST##inst##_SEG2 + reg : \ - (3 == reg##_BASE_IDX ? ip##_BASE__INST##inst##_SEG3 + reg : \ - (ip##_BASE__INST##inst##_SEG4 + reg))))), value) + WREG32((adev->reg_offset[ip##_HWIP][inst][reg##_BASE_IDX] + reg), value) #define WREG32_SOC15_NO_KIQ(ip, inst, reg, value) \ - WREG32_NO_KIQ( (0 == reg##_BASE_IDX ? ip##_BASE__INST##inst##_SEG0 + reg : \ - (1 == reg##_BASE_IDX ? ip##_BASE__INST##inst##_SEG1 + reg : \ - (2 == reg##_BASE_IDX ? ip##_BASE__INST##inst##_SEG2 + reg : \ - (3 == reg##_BASE_IDX ? ip##_BASE__INST##inst##_SEG3 + reg : \ - (ip##_BASE__INST##inst##_SEG4 + reg))))), value) + WREG32_NO_KIQ((adev->reg_offset[ip##_HWIP][inst][reg##_BASE_IDX] + reg), value) #define WREG32_SOC15_OFFSET(ip, inst, reg, offset, value) \ - WREG32( (0 == reg##_BASE_IDX ? ip##_BASE__INST##inst##_SEG0 + reg : \ - (1 == reg##_BASE_IDX ? ip##_BASE__INST##inst##_SEG1 + reg : \ - (2 == reg##_BASE_IDX ? ip##_BASE__INST##inst##_SEG2 + reg : \ - (3 == reg##_BASE_IDX ? ip##_BASE__INST##inst##_SEG3 + reg : \ - (ip##_BASE__INST##inst##_SEG4 + reg))))) + offset, value) + WREG32((adev->reg_offset[ip##_HWIP][inst][reg##_BASE_IDX] + reg) + offset, value) #endif diff --git a/drivers/gpu/drm/amd/amdgpu/tonga_ih.c b/drivers/gpu/drm/amd/amdgpu/tonga_ih.c index aa4e320e31f8..5995ffc183de 100644 --- a/drivers/gpu/drm/amd/amdgpu/tonga_ih.c +++ b/drivers/gpu/drm/amd/amdgpu/tonga_ih.c @@ -270,7 +270,7 @@ static void tonga_ih_decode_iv(struct amdgpu_device *adev, entry->src_id = dw[0] & 0xff; entry->src_data[0] = dw[1] & 0xfffffff; entry->ring_id = dw[2] & 0xff; - entry->vm_id = (dw[2] >> 8) & 0xff; + entry->vmid = (dw[2] >> 8) & 0xff; entry->pas_id = (dw[2] >> 16) & 0xffff; /* wptr/rptr are in bytes! */ diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v4_2.c b/drivers/gpu/drm/amd/amdgpu/uvd_v4_2.c index 8ab0f78794a5..8ab10c220910 100644 --- a/drivers/gpu/drm/amd/amdgpu/uvd_v4_2.c +++ b/drivers/gpu/drm/amd/amdgpu/uvd_v4_2.c @@ -521,7 +521,7 @@ static int uvd_v4_2_ring_test_ring(struct amdgpu_ring *ring) } if (i < adev->usec_timeout) { - DRM_INFO("ring test on %d succeeded in %d usecs\n", + DRM_DEBUG("ring test on %d succeeded in %d usecs\n", ring->idx, i); } else { DRM_ERROR("amdgpu: ring %d test failed (0x%08X)\n", @@ -541,7 +541,7 @@ static int uvd_v4_2_ring_test_ring(struct amdgpu_ring *ring) */ static void uvd_v4_2_ring_emit_ib(struct amdgpu_ring *ring, struct amdgpu_ib *ib, - unsigned vm_id, bool ctx_switch) + unsigned vmid, bool ctx_switch) { amdgpu_ring_write(ring, PACKET0(mmUVD_RBC_IB_BASE, 0)); amdgpu_ring_write(ring, ib->gpu_addr); @@ -563,7 +563,7 @@ static void uvd_v4_2_mc_resume(struct amdgpu_device *adev) /* programm the VCPU memory controller bits 0-27 */ addr = (adev->uvd.gpu_addr + AMDGPU_UVD_FIRMWARE_OFFSET) >> 3; - size = AMDGPU_GPU_PAGE_ALIGN(adev->uvd.fw->size + 4) >> 3; + size = AMDGPU_UVD_FIRMWARE_SIZE(adev) >> 3; WREG32(mmUVD_VCPU_CACHE_OFFSET0, addr); WREG32(mmUVD_VCPU_CACHE_SIZE0, size); diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v5_0.c b/drivers/gpu/drm/amd/amdgpu/uvd_v5_0.c index bb6d46e168a3..c1fe30cdba32 100644 --- a/drivers/gpu/drm/amd/amdgpu/uvd_v5_0.c +++ b/drivers/gpu/drm/amd/amdgpu/uvd_v5_0.c @@ -258,7 +258,7 @@ static void uvd_v5_0_mc_resume(struct amdgpu_device *adev) upper_32_bits(adev->uvd.gpu_addr)); offset = AMDGPU_UVD_FIRMWARE_OFFSET; - size = AMDGPU_GPU_PAGE_ALIGN(adev->uvd.fw->size + 4); + size = AMDGPU_UVD_FIRMWARE_SIZE(adev); WREG32(mmUVD_VCPU_CACHE_OFFSET0, offset >> 3); WREG32(mmUVD_VCPU_CACHE_SIZE0, size); @@ -536,7 +536,7 @@ static int uvd_v5_0_ring_test_ring(struct amdgpu_ring *ring) } if (i < adev->usec_timeout) { - DRM_INFO("ring test on %d succeeded in %d usecs\n", + DRM_DEBUG("ring test on %d succeeded in %d usecs\n", ring->idx, i); } else { DRM_ERROR("amdgpu: ring %d test failed (0x%08X)\n", @@ -556,7 +556,7 @@ static int uvd_v5_0_ring_test_ring(struct amdgpu_ring *ring) */ static void uvd_v5_0_ring_emit_ib(struct amdgpu_ring *ring, struct amdgpu_ib *ib, - unsigned vm_id, bool ctx_switch) + unsigned vmid, bool ctx_switch) { amdgpu_ring_write(ring, PACKET0(mmUVD_LMI_RBC_IB_64BIT_BAR_LOW, 0)); amdgpu_ring_write(ring, lower_32_bits(ib->gpu_addr)); diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c b/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c index 920910ac8663..b2bfedaf57f1 100644 --- a/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c +++ b/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c @@ -37,6 +37,9 @@ #include "gmc/gmc_8_1_d.h" #include "vi.h" +/* Polaris10/11/12 firmware version */ +#define FW_1_130_16 ((1 << 24) | (130 << 16) | (16 << 8)) + static void uvd_v6_0_set_ring_funcs(struct amdgpu_device *adev); static void uvd_v6_0_set_enc_ring_funcs(struct amdgpu_device *adev); @@ -58,7 +61,9 @@ static void uvd_v6_0_enable_mgcg(struct amdgpu_device *adev, */ static inline bool uvd_v6_0_enc_support(struct amdgpu_device *adev) { - return ((adev->asic_type >= CHIP_POLARIS10) && (adev->asic_type <= CHIP_POLARIS12)); + return ((adev->asic_type >= CHIP_POLARIS10) && + (adev->asic_type <= CHIP_POLARIS12) && + (!adev->uvd.fw_version || adev->uvd.fw_version >= FW_1_130_16)); } /** @@ -184,7 +189,7 @@ static int uvd_v6_0_enc_ring_test_ring(struct amdgpu_ring *ring) } if (i < adev->usec_timeout) { - DRM_INFO("ring test on %d succeeded in %d usecs\n", + DRM_DEBUG("ring test on %d succeeded in %d usecs\n", ring->idx, i); } else { DRM_ERROR("amdgpu: ring %d test failed\n", @@ -360,7 +365,7 @@ static int uvd_v6_0_enc_ring_test_ib(struct amdgpu_ring *ring, long timeout) } else if (r < 0) { DRM_ERROR("amdgpu: fence wait failed (%ld).\n", r); } else { - DRM_INFO("ib test on ring %d succeeded\n", ring->idx); + DRM_DEBUG("ib test on ring %d succeeded\n", ring->idx); r = 0; } error: @@ -411,12 +416,20 @@ static int uvd_v6_0_sw_init(void *handle) if (r) return r; - if (uvd_v6_0_enc_support(adev)) { - struct amd_sched_rq *rq; + if (!uvd_v6_0_enc_support(adev)) { + for (i = 0; i < adev->uvd.num_enc_rings; ++i) + adev->uvd.ring_enc[i].funcs = NULL; + + adev->uvd.irq.num_types = 1; + adev->uvd.num_enc_rings = 0; + + DRM_INFO("UVD ENC is disabled\n"); + } else { + struct drm_sched_rq *rq; ring = &adev->uvd.ring_enc[0]; - rq = &ring->sched.sched_rq[AMD_SCHED_PRIORITY_NORMAL]; - r = amd_sched_entity_init(&ring->sched, &adev->uvd.entity_enc, - rq, amdgpu_sched_jobs); + rq = &ring->sched.sched_rq[DRM_SCHED_PRIORITY_NORMAL]; + r = drm_sched_entity_init(&ring->sched, &adev->uvd.entity_enc, + rq, amdgpu_sched_jobs, NULL); if (r) { DRM_ERROR("Failed setting up UVD ENC run queue.\n"); return r; @@ -456,7 +469,7 @@ static int uvd_v6_0_sw_fini(void *handle) return r; if (uvd_v6_0_enc_support(adev)) { - amd_sched_entity_fini(&adev->uvd.ring_enc[0].sched, &adev->uvd.entity_enc); + drm_sched_entity_fini(&adev->uvd.ring_enc[0].sched, &adev->uvd.entity_enc); for (i = 0; i < adev->uvd.num_enc_rings; ++i) amdgpu_ring_fini(&adev->uvd.ring_enc[i]); @@ -603,7 +616,7 @@ static void uvd_v6_0_mc_resume(struct amdgpu_device *adev) upper_32_bits(adev->uvd.gpu_addr)); offset = AMDGPU_UVD_FIRMWARE_OFFSET; - size = AMDGPU_GPU_PAGE_ALIGN(adev->uvd.fw->size + 4); + size = AMDGPU_UVD_FIRMWARE_SIZE(adev); WREG32(mmUVD_VCPU_CACHE_OFFSET0, offset >> 3); WREG32(mmUVD_VCPU_CACHE_SIZE0, size); @@ -1008,7 +1021,7 @@ static int uvd_v6_0_ring_test_ring(struct amdgpu_ring *ring) } if (i < adev->usec_timeout) { - DRM_INFO("ring test on %d succeeded in %d usecs\n", + DRM_DEBUG("ring test on %d succeeded in %d usecs\n", ring->idx, i); } else { DRM_ERROR("amdgpu: ring %d test failed (0x%08X)\n", @@ -1028,10 +1041,10 @@ static int uvd_v6_0_ring_test_ring(struct amdgpu_ring *ring) */ static void uvd_v6_0_ring_emit_ib(struct amdgpu_ring *ring, struct amdgpu_ib *ib, - unsigned vm_id, bool ctx_switch) + unsigned vmid, bool ctx_switch) { amdgpu_ring_write(ring, PACKET0(mmUVD_LMI_RBC_IB_VMID, 0)); - amdgpu_ring_write(ring, vm_id); + amdgpu_ring_write(ring, vmid); amdgpu_ring_write(ring, PACKET0(mmUVD_LMI_RBC_IB_64BIT_BAR_LOW, 0)); amdgpu_ring_write(ring, lower_32_bits(ib->gpu_addr)); @@ -1050,24 +1063,24 @@ static void uvd_v6_0_ring_emit_ib(struct amdgpu_ring *ring, * Write enc ring commands to execute the indirect buffer */ static void uvd_v6_0_enc_ring_emit_ib(struct amdgpu_ring *ring, - struct amdgpu_ib *ib, unsigned int vm_id, bool ctx_switch) + struct amdgpu_ib *ib, unsigned int vmid, bool ctx_switch) { amdgpu_ring_write(ring, HEVC_ENC_CMD_IB_VM); - amdgpu_ring_write(ring, vm_id); + amdgpu_ring_write(ring, vmid); amdgpu_ring_write(ring, lower_32_bits(ib->gpu_addr)); amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr)); amdgpu_ring_write(ring, ib->length_dw); } static void uvd_v6_0_ring_emit_vm_flush(struct amdgpu_ring *ring, - unsigned vm_id, uint64_t pd_addr) + unsigned vmid, uint64_t pd_addr) { uint32_t reg; - if (vm_id < 8) - reg = mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR + vm_id; + if (vmid < 8) + reg = mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR + vmid; else - reg = mmVM_CONTEXT8_PAGE_TABLE_BASE_ADDR + vm_id - 8; + reg = mmVM_CONTEXT8_PAGE_TABLE_BASE_ADDR + vmid - 8; amdgpu_ring_write(ring, PACKET0(mmUVD_GPCOM_VCPU_DATA0, 0)); amdgpu_ring_write(ring, reg << 2); @@ -1079,7 +1092,7 @@ static void uvd_v6_0_ring_emit_vm_flush(struct amdgpu_ring *ring, amdgpu_ring_write(ring, PACKET0(mmUVD_GPCOM_VCPU_DATA0, 0)); amdgpu_ring_write(ring, mmVM_INVALIDATE_REQUEST << 2); amdgpu_ring_write(ring, PACKET0(mmUVD_GPCOM_VCPU_DATA1, 0)); - amdgpu_ring_write(ring, 1 << vm_id); + amdgpu_ring_write(ring, 1 << vmid); amdgpu_ring_write(ring, PACKET0(mmUVD_GPCOM_VCPU_CMD, 0)); amdgpu_ring_write(ring, 0x8); @@ -1088,7 +1101,7 @@ static void uvd_v6_0_ring_emit_vm_flush(struct amdgpu_ring *ring, amdgpu_ring_write(ring, PACKET0(mmUVD_GPCOM_VCPU_DATA1, 0)); amdgpu_ring_write(ring, 0); amdgpu_ring_write(ring, PACKET0(mmUVD_GP_SCRATCH8, 0)); - amdgpu_ring_write(ring, 1 << vm_id); /* mask */ + amdgpu_ring_write(ring, 1 << vmid); /* mask */ amdgpu_ring_write(ring, PACKET0(mmUVD_GPCOM_VCPU_CMD, 0)); amdgpu_ring_write(ring, 0xC); } @@ -1127,14 +1140,14 @@ static void uvd_v6_0_enc_ring_insert_end(struct amdgpu_ring *ring) } static void uvd_v6_0_enc_ring_emit_vm_flush(struct amdgpu_ring *ring, - unsigned int vm_id, uint64_t pd_addr) + unsigned int vmid, uint64_t pd_addr) { amdgpu_ring_write(ring, HEVC_ENC_CMD_UPDATE_PTB); - amdgpu_ring_write(ring, vm_id); + amdgpu_ring_write(ring, vmid); amdgpu_ring_write(ring, pd_addr >> 12); amdgpu_ring_write(ring, HEVC_ENC_CMD_FLUSH_TLB); - amdgpu_ring_write(ring, vm_id); + amdgpu_ring_write(ring, vmid); } static bool uvd_v6_0_is_idle(void *handle) diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c b/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c index 6634545060fd..6b95f4f344b5 100644 --- a/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c +++ b/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c @@ -29,16 +29,15 @@ #include "soc15_common.h" #include "mmsch_v1_0.h" -#include "vega10/soc15ip.h" -#include "vega10/UVD/uvd_7_0_offset.h" -#include "vega10/UVD/uvd_7_0_sh_mask.h" -#include "vega10/VCE/vce_4_0_offset.h" -#include "vega10/VCE/vce_4_0_default.h" -#include "vega10/VCE/vce_4_0_sh_mask.h" -#include "vega10/NBIF/nbif_6_1_offset.h" -#include "vega10/HDP/hdp_4_0_offset.h" -#include "vega10/MMHUB/mmhub_1_0_offset.h" -#include "vega10/MMHUB/mmhub_1_0_sh_mask.h" +#include "uvd/uvd_7_0_offset.h" +#include "uvd/uvd_7_0_sh_mask.h" +#include "vce/vce_4_0_offset.h" +#include "vce/vce_4_0_default.h" +#include "vce/vce_4_0_sh_mask.h" +#include "nbif/nbif_6_1_offset.h" +#include "hdp/hdp_4_0_offset.h" +#include "mmhub/mmhub_1_0_offset.h" +#include "mmhub/mmhub_1_0_sh_mask.h" static void uvd_v7_0_set_ring_funcs(struct amdgpu_device *adev); static void uvd_v7_0_set_enc_ring_funcs(struct amdgpu_device *adev); @@ -184,7 +183,7 @@ static int uvd_v7_0_enc_ring_test_ring(struct amdgpu_ring *ring) } if (i < adev->usec_timeout) { - DRM_INFO("ring test on %d succeeded in %d usecs\n", + DRM_DEBUG("ring test on %d succeeded in %d usecs\n", ring->idx, i); } else { DRM_ERROR("amdgpu: ring %d test failed\n", @@ -359,7 +358,7 @@ static int uvd_v7_0_enc_ring_test_ib(struct amdgpu_ring *ring, long timeout) } else if (r < 0) { DRM_ERROR("amdgpu: fence wait failed (%ld).\n", r); } else { - DRM_INFO("ib test on ring %d succeeded\n", ring->idx); + DRM_DEBUG("ib test on ring %d succeeded\n", ring->idx); r = 0; } error: @@ -385,7 +384,7 @@ static int uvd_v7_0_early_init(void *handle) static int uvd_v7_0_sw_init(void *handle) { struct amdgpu_ring *ring; - struct amd_sched_rq *rq; + struct drm_sched_rq *rq; int i, r; struct amdgpu_device *adev = (struct amdgpu_device *)handle; @@ -416,9 +415,9 @@ static int uvd_v7_0_sw_init(void *handle) } ring = &adev->uvd.ring_enc[0]; - rq = &ring->sched.sched_rq[AMD_SCHED_PRIORITY_NORMAL]; - r = amd_sched_entity_init(&ring->sched, &adev->uvd.entity_enc, - rq, amdgpu_sched_jobs); + rq = &ring->sched.sched_rq[DRM_SCHED_PRIORITY_NORMAL]; + r = drm_sched_entity_init(&ring->sched, &adev->uvd.entity_enc, + rq, amdgpu_sched_jobs, NULL); if (r) { DRM_ERROR("Failed setting up UVD ENC run queue.\n"); return r; @@ -472,7 +471,7 @@ static int uvd_v7_0_sw_fini(void *handle) if (r) return r; - amd_sched_entity_fini(&adev->uvd.ring_enc[0].sched, &adev->uvd.entity_enc); + drm_sched_entity_fini(&adev->uvd.ring_enc[0].sched, &adev->uvd.entity_enc); for (i = 0; i < adev->uvd.num_enc_rings; ++i) amdgpu_ring_fini(&adev->uvd.ring_enc[i]); @@ -616,7 +615,7 @@ static int uvd_v7_0_resume(void *handle) */ static void uvd_v7_0_mc_resume(struct amdgpu_device *adev) { - uint32_t size = AMDGPU_GPU_PAGE_ALIGN(adev->uvd.fw->size + 4); + uint32_t size = AMDGPU_UVD_FIRMWARE_SIZE(adev); uint32_t offset; if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) { @@ -1086,6 +1085,8 @@ static void uvd_v7_0_stop(struct amdgpu_device *adev) static void uvd_v7_0_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u64 seq, unsigned flags) { + struct amdgpu_device *adev = ring->adev; + WARN_ON(flags & AMDGPU_FENCE_FLAG_64BIT); amdgpu_ring_write(ring, @@ -1123,6 +1124,7 @@ static void uvd_v7_0_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u64 seq static void uvd_v7_0_enc_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u64 seq, unsigned flags) { + WARN_ON(flags & AMDGPU_FENCE_FLAG_64BIT); amdgpu_ring_write(ring, HEVC_ENC_CMD_FENCE); @@ -1141,6 +1143,8 @@ static void uvd_v7_0_enc_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, */ static void uvd_v7_0_ring_emit_hdp_flush(struct amdgpu_ring *ring) { + struct amdgpu_device *adev = ring->adev; + amdgpu_ring_write(ring, PACKET0(SOC15_REG_OFFSET(NBIF, 0, mmHDP_MEM_COHERENCY_FLUSH_CNTL), 0)); amdgpu_ring_write(ring, 0); @@ -1155,6 +1159,8 @@ static void uvd_v7_0_ring_emit_hdp_flush(struct amdgpu_ring *ring) */ static void uvd_v7_0_ring_emit_hdp_invalidate(struct amdgpu_ring *ring) { + struct amdgpu_device *adev = ring->adev; + amdgpu_ring_write(ring, PACKET0(SOC15_REG_OFFSET(HDP, 0, mmHDP_READ_CACHE_INVALIDATE), 0)); amdgpu_ring_write(ring, 1); } @@ -1192,7 +1198,7 @@ static int uvd_v7_0_ring_test_ring(struct amdgpu_ring *ring) } if (i < adev->usec_timeout) { - DRM_INFO("ring test on %d succeeded in %d usecs\n", + DRM_DEBUG("ring test on %d succeeded in %d usecs\n", ring->idx, i); } else { DRM_ERROR("amdgpu: ring %d test failed (0x%08X)\n", @@ -1212,11 +1218,13 @@ static int uvd_v7_0_ring_test_ring(struct amdgpu_ring *ring) */ static void uvd_v7_0_ring_emit_ib(struct amdgpu_ring *ring, struct amdgpu_ib *ib, - unsigned vm_id, bool ctx_switch) + unsigned vmid, bool ctx_switch) { + struct amdgpu_device *adev = ring->adev; + amdgpu_ring_write(ring, PACKET0(SOC15_REG_OFFSET(UVD, 0, mmUVD_LMI_RBC_IB_VMID), 0)); - amdgpu_ring_write(ring, vm_id); + amdgpu_ring_write(ring, vmid); amdgpu_ring_write(ring, PACKET0(SOC15_REG_OFFSET(UVD, 0, mmUVD_LMI_RBC_IB_64BIT_BAR_LOW), 0)); @@ -1238,10 +1246,10 @@ static void uvd_v7_0_ring_emit_ib(struct amdgpu_ring *ring, * Write enc ring commands to execute the indirect buffer */ static void uvd_v7_0_enc_ring_emit_ib(struct amdgpu_ring *ring, - struct amdgpu_ib *ib, unsigned int vm_id, bool ctx_switch) + struct amdgpu_ib *ib, unsigned int vmid, bool ctx_switch) { amdgpu_ring_write(ring, HEVC_ENC_CMD_IB_VM); - amdgpu_ring_write(ring, vm_id); + amdgpu_ring_write(ring, vmid); amdgpu_ring_write(ring, lower_32_bits(ib->gpu_addr)); amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr)); amdgpu_ring_write(ring, ib->length_dw); @@ -1250,6 +1258,8 @@ static void uvd_v7_0_enc_ring_emit_ib(struct amdgpu_ring *ring, static void uvd_v7_0_vm_reg_write(struct amdgpu_ring *ring, uint32_t data0, uint32_t data1) { + struct amdgpu_device *adev = ring->adev; + amdgpu_ring_write(ring, PACKET0(SOC15_REG_OFFSET(UVD, 0, mmUVD_GPCOM_VCPU_DATA0), 0)); amdgpu_ring_write(ring, data0); @@ -1264,6 +1274,8 @@ static void uvd_v7_0_vm_reg_write(struct amdgpu_ring *ring, static void uvd_v7_0_vm_reg_wait(struct amdgpu_ring *ring, uint32_t data0, uint32_t data1, uint32_t mask) { + struct amdgpu_device *adev = ring->adev; + amdgpu_ring_write(ring, PACKET0(SOC15_REG_OFFSET(UVD, 0, mmUVD_GPCOM_VCPU_DATA0), 0)); amdgpu_ring_write(ring, data0); @@ -1279,25 +1291,26 @@ static void uvd_v7_0_vm_reg_wait(struct amdgpu_ring *ring, } static void uvd_v7_0_ring_emit_vm_flush(struct amdgpu_ring *ring, - unsigned vm_id, uint64_t pd_addr) + unsigned vmid, uint64_t pd_addr) { struct amdgpu_vmhub *hub = &ring->adev->vmhub[ring->funcs->vmhub]; - uint32_t req = ring->adev->gart.gart_funcs->get_invalidate_req(vm_id); - uint32_t data0, data1, mask; + uint32_t req = ring->adev->gart.gart_funcs->get_invalidate_req(vmid); + uint64_t flags = AMDGPU_PTE_VALID; unsigned eng = ring->vm_inv_eng; + uint32_t data0, data1, mask; - pd_addr = amdgpu_gart_get_vm_pde(ring->adev, pd_addr); - pd_addr |= AMDGPU_PTE_VALID; + amdgpu_gart_get_vm_pde(ring->adev, -1, &pd_addr, &flags); + pd_addr |= flags; - data0 = (hub->ctx0_ptb_addr_hi32 + vm_id * 2) << 2; + data0 = (hub->ctx0_ptb_addr_hi32 + vmid * 2) << 2; data1 = upper_32_bits(pd_addr); uvd_v7_0_vm_reg_write(ring, data0, data1); - data0 = (hub->ctx0_ptb_addr_lo32 + vm_id * 2) << 2; + data0 = (hub->ctx0_ptb_addr_lo32 + vmid * 2) << 2; data1 = lower_32_bits(pd_addr); uvd_v7_0_vm_reg_write(ring, data0, data1); - data0 = (hub->ctx0_ptb_addr_lo32 + vm_id * 2) << 2; + data0 = (hub->ctx0_ptb_addr_lo32 + vmid * 2) << 2; data1 = lower_32_bits(pd_addr); mask = 0xffffffff; uvd_v7_0_vm_reg_wait(ring, data0, data1, mask); @@ -1309,36 +1322,47 @@ static void uvd_v7_0_ring_emit_vm_flush(struct amdgpu_ring *ring, /* wait for flush */ data0 = (hub->vm_inv_eng0_ack + eng) << 2; - data1 = 1 << vm_id; - mask = 1 << vm_id; + data1 = 1 << vmid; + mask = 1 << vmid; uvd_v7_0_vm_reg_wait(ring, data0, data1, mask); } +static void uvd_v7_0_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count) +{ + int i; + struct amdgpu_device *adev = ring->adev; + + for (i = 0; i < count; i++) + amdgpu_ring_write(ring, PACKET0(SOC15_REG_OFFSET(UVD, 0, mmUVD_NO_OP), 0)); + +} + static void uvd_v7_0_enc_ring_insert_end(struct amdgpu_ring *ring) { amdgpu_ring_write(ring, HEVC_ENC_CMD_END); } static void uvd_v7_0_enc_ring_emit_vm_flush(struct amdgpu_ring *ring, - unsigned int vm_id, uint64_t pd_addr) + unsigned int vmid, uint64_t pd_addr) { struct amdgpu_vmhub *hub = &ring->adev->vmhub[ring->funcs->vmhub]; - uint32_t req = ring->adev->gart.gart_funcs->get_invalidate_req(vm_id); + uint32_t req = ring->adev->gart.gart_funcs->get_invalidate_req(vmid); + uint64_t flags = AMDGPU_PTE_VALID; unsigned eng = ring->vm_inv_eng; - pd_addr = amdgpu_gart_get_vm_pde(ring->adev, pd_addr); - pd_addr |= AMDGPU_PTE_VALID; + amdgpu_gart_get_vm_pde(ring->adev, -1, &pd_addr, &flags); + pd_addr |= flags; amdgpu_ring_write(ring, HEVC_ENC_CMD_REG_WRITE); - amdgpu_ring_write(ring, (hub->ctx0_ptb_addr_hi32 + vm_id * 2) << 2); + amdgpu_ring_write(ring, (hub->ctx0_ptb_addr_hi32 + vmid * 2) << 2); amdgpu_ring_write(ring, upper_32_bits(pd_addr)); amdgpu_ring_write(ring, HEVC_ENC_CMD_REG_WRITE); - amdgpu_ring_write(ring, (hub->ctx0_ptb_addr_lo32 + vm_id * 2) << 2); + amdgpu_ring_write(ring, (hub->ctx0_ptb_addr_lo32 + vmid * 2) << 2); amdgpu_ring_write(ring, lower_32_bits(pd_addr)); amdgpu_ring_write(ring, HEVC_ENC_CMD_REG_WAIT); - amdgpu_ring_write(ring, (hub->ctx0_ptb_addr_lo32 + vm_id * 2) << 2); + amdgpu_ring_write(ring, (hub->ctx0_ptb_addr_lo32 + vmid * 2) << 2); amdgpu_ring_write(ring, 0xffffffff); amdgpu_ring_write(ring, lower_32_bits(pd_addr)); @@ -1350,8 +1374,8 @@ static void uvd_v7_0_enc_ring_emit_vm_flush(struct amdgpu_ring *ring, /* wait for flush */ amdgpu_ring_write(ring, HEVC_ENC_CMD_REG_WAIT); amdgpu_ring_write(ring, (hub->vm_inv_eng0_ack + eng) << 2); - amdgpu_ring_write(ring, 1 << vm_id); - amdgpu_ring_write(ring, 1 << vm_id); + amdgpu_ring_write(ring, 1 << vmid); + amdgpu_ring_write(ring, 1 << vmid); } #if 0 @@ -1681,7 +1705,7 @@ const struct amd_ip_funcs uvd_v7_0_ip_funcs = { static const struct amdgpu_ring_funcs uvd_v7_0_ring_vm_funcs = { .type = AMDGPU_RING_TYPE_UVD, .align_mask = 0xf, - .nop = PACKET0(SOC15_REG_OFFSET(UVD, 0, mmUVD_NO_OP), 0), + .nop = PACKET0(0x81ff, 0), .support_64bit_ptrs = false, .vmhub = AMDGPU_MMHUB, .get_rptr = uvd_v7_0_ring_get_rptr, @@ -1700,7 +1724,7 @@ static const struct amdgpu_ring_funcs uvd_v7_0_ring_vm_funcs = { .emit_hdp_invalidate = uvd_v7_0_ring_emit_hdp_invalidate, .test_ring = uvd_v7_0_ring_test_ring, .test_ib = amdgpu_uvd_ring_test_ib, - .insert_nop = amdgpu_ring_insert_nop, + .insert_nop = uvd_v7_0_ring_insert_nop, .pad_ib = amdgpu_ring_generic_pad_ib, .begin_use = amdgpu_uvd_ring_begin_use, .end_use = amdgpu_uvd_ring_end_use, diff --git a/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c b/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c index cf81065e3c5a..a5355eb689f1 100644 --- a/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c @@ -834,24 +834,24 @@ out: } static void vce_v3_0_ring_emit_ib(struct amdgpu_ring *ring, - struct amdgpu_ib *ib, unsigned int vm_id, bool ctx_switch) + struct amdgpu_ib *ib, unsigned int vmid, bool ctx_switch) { amdgpu_ring_write(ring, VCE_CMD_IB_VM); - amdgpu_ring_write(ring, vm_id); + amdgpu_ring_write(ring, vmid); amdgpu_ring_write(ring, lower_32_bits(ib->gpu_addr)); amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr)); amdgpu_ring_write(ring, ib->length_dw); } static void vce_v3_0_emit_vm_flush(struct amdgpu_ring *ring, - unsigned int vm_id, uint64_t pd_addr) + unsigned int vmid, uint64_t pd_addr) { amdgpu_ring_write(ring, VCE_CMD_UPDATE_PTB); - amdgpu_ring_write(ring, vm_id); + amdgpu_ring_write(ring, vmid); amdgpu_ring_write(ring, pd_addr >> 12); amdgpu_ring_write(ring, VCE_CMD_FLUSH_TLB); - amdgpu_ring_write(ring, vm_id); + amdgpu_ring_write(ring, vmid); amdgpu_ring_write(ring, VCE_CMD_END); } diff --git a/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c b/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c index 75745544600a..7cf2eef68cf2 100644..100755 --- a/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c @@ -32,12 +32,11 @@ #include "soc15_common.h" #include "mmsch_v1_0.h" -#include "vega10/soc15ip.h" -#include "vega10/VCE/vce_4_0_offset.h" -#include "vega10/VCE/vce_4_0_default.h" -#include "vega10/VCE/vce_4_0_sh_mask.h" -#include "vega10/MMHUB/mmhub_1_0_offset.h" -#include "vega10/MMHUB/mmhub_1_0_sh_mask.h" +#include "vce/vce_4_0_offset.h" +#include "vce/vce_4_0_default.h" +#include "vce/vce_4_0_sh_mask.h" +#include "mmhub/mmhub_1_0_offset.h" +#include "mmhub/mmhub_1_0_sh_mask.h" #define VCE_STATUS_VCPU_REPORT_FW_LOADED_MASK 0x02 @@ -243,37 +242,49 @@ static int vce_v4_0_sriov_start(struct amdgpu_device *adev) MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VM_CTRL), 0); if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) { - MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_40BIT_BAR0), - adev->firmware.ucode[AMDGPU_UCODE_ID_VCE].mc_addr >> 8); - MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_40BIT_BAR1), - adev->firmware.ucode[AMDGPU_UCODE_ID_VCE].mc_addr >> 8); - MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_40BIT_BAR2), + MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, + mmVCE_LMI_VCPU_CACHE_40BIT_BAR0), adev->firmware.ucode[AMDGPU_UCODE_ID_VCE].mc_addr >> 8); + MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, + mmVCE_LMI_VCPU_CACHE_64BIT_BAR0), + (adev->firmware.ucode[AMDGPU_UCODE_ID_VCE].mc_addr >> 40) & 0xff); } else { - MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_40BIT_BAR0), + MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, + mmVCE_LMI_VCPU_CACHE_40BIT_BAR0), adev->vce.gpu_addr >> 8); - MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_40BIT_BAR1), + MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, + mmVCE_LMI_VCPU_CACHE_64BIT_BAR0), + (adev->vce.gpu_addr >> 40) & 0xff); + } + MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, + mmVCE_LMI_VCPU_CACHE_40BIT_BAR1), adev->vce.gpu_addr >> 8); - MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_40BIT_BAR2), + MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, + mmVCE_LMI_VCPU_CACHE_64BIT_BAR1), + (adev->vce.gpu_addr >> 40) & 0xff); + MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, + mmVCE_LMI_VCPU_CACHE_40BIT_BAR2), adev->vce.gpu_addr >> 8); - } + MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, + mmVCE_LMI_VCPU_CACHE_64BIT_BAR2), + (adev->vce.gpu_addr >> 40) & 0xff); offset = AMDGPU_VCE_FIRMWARE_OFFSET; size = VCE_V4_0_FW_SIZE; MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_OFFSET0), - offset & 0x7FFFFFFF); + offset & ~0x0f000000); MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_SIZE0), size); - offset += size; + offset = (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) ? offset + size : 0; size = VCE_V4_0_STACK_SIZE; MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_OFFSET1), - offset & 0x7FFFFFFF); + (offset & ~0x0f000000) | (1 << 24)); MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_SIZE1), size); offset += size; size = VCE_V4_0_DATA_SIZE; MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_OFFSET2), - offset & 0x7FFFFFFF); + (offset & ~0x0f000000) | (2 << 24)); MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_SIZE2), size); MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_CTRL2), ~0x100, 0); @@ -412,7 +423,7 @@ static int vce_v4_0_sw_init(void *handle) if (r) return r; - size = (VCE_V4_0_STACK_SIZE + VCE_V4_0_DATA_SIZE) * 2; + size = VCE_V4_0_STACK_SIZE + VCE_V4_0_DATA_SIZE; if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) size += VCE_V4_0_FW_SIZE; @@ -927,10 +938,10 @@ static int vce_v4_0_set_powergating_state(void *handle, #endif static void vce_v4_0_ring_emit_ib(struct amdgpu_ring *ring, - struct amdgpu_ib *ib, unsigned int vm_id, bool ctx_switch) + struct amdgpu_ib *ib, unsigned int vmid, bool ctx_switch) { amdgpu_ring_write(ring, VCE_CMD_IB_VM); - amdgpu_ring_write(ring, vm_id); + amdgpu_ring_write(ring, vmid); amdgpu_ring_write(ring, lower_32_bits(ib->gpu_addr)); amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr)); amdgpu_ring_write(ring, ib->length_dw); @@ -954,25 +965,26 @@ static void vce_v4_0_ring_insert_end(struct amdgpu_ring *ring) } static void vce_v4_0_emit_vm_flush(struct amdgpu_ring *ring, - unsigned int vm_id, uint64_t pd_addr) + unsigned int vmid, uint64_t pd_addr) { struct amdgpu_vmhub *hub = &ring->adev->vmhub[ring->funcs->vmhub]; - uint32_t req = ring->adev->gart.gart_funcs->get_invalidate_req(vm_id); + uint32_t req = ring->adev->gart.gart_funcs->get_invalidate_req(vmid); + uint64_t flags = AMDGPU_PTE_VALID; unsigned eng = ring->vm_inv_eng; - pd_addr = amdgpu_gart_get_vm_pde(ring->adev, pd_addr); - pd_addr |= AMDGPU_PTE_VALID; + amdgpu_gart_get_vm_pde(ring->adev, -1, &pd_addr, &flags); + pd_addr |= flags; amdgpu_ring_write(ring, VCE_CMD_REG_WRITE); - amdgpu_ring_write(ring, (hub->ctx0_ptb_addr_hi32 + vm_id * 2) << 2); + amdgpu_ring_write(ring, (hub->ctx0_ptb_addr_hi32 + vmid * 2) << 2); amdgpu_ring_write(ring, upper_32_bits(pd_addr)); amdgpu_ring_write(ring, VCE_CMD_REG_WRITE); - amdgpu_ring_write(ring, (hub->ctx0_ptb_addr_lo32 + vm_id * 2) << 2); + amdgpu_ring_write(ring, (hub->ctx0_ptb_addr_lo32 + vmid * 2) << 2); amdgpu_ring_write(ring, lower_32_bits(pd_addr)); amdgpu_ring_write(ring, VCE_CMD_REG_WAIT); - amdgpu_ring_write(ring, (hub->ctx0_ptb_addr_lo32 + vm_id * 2) << 2); + amdgpu_ring_write(ring, (hub->ctx0_ptb_addr_lo32 + vmid * 2) << 2); amdgpu_ring_write(ring, 0xffffffff); amdgpu_ring_write(ring, lower_32_bits(pd_addr)); @@ -984,8 +996,8 @@ static void vce_v4_0_emit_vm_flush(struct amdgpu_ring *ring, /* wait for flush */ amdgpu_ring_write(ring, VCE_CMD_REG_WAIT); amdgpu_ring_write(ring, (hub->vm_inv_eng0_ack + eng) << 2); - amdgpu_ring_write(ring, 1 << vm_id); - amdgpu_ring_write(ring, 1 << vm_id); + amdgpu_ring_write(ring, 1 << vmid); + amdgpu_ring_write(ring, 1 << vmid); } static int vce_v4_0_set_interrupt_state(struct amdgpu_device *adev, diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c b/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c index 0450ac5ba6b6..b99e15c43e45 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c @@ -28,12 +28,11 @@ #include "soc15d.h" #include "soc15_common.h" -#include "vega10/soc15ip.h" -#include "raven1/VCN/vcn_1_0_offset.h" -#include "raven1/VCN/vcn_1_0_sh_mask.h" -#include "vega10/HDP/hdp_4_0_offset.h" -#include "raven1/MMHUB/mmhub_9_1_offset.h" -#include "raven1/MMHUB/mmhub_9_1_sh_mask.h" +#include "vcn/vcn_1_0_offset.h" +#include "vcn/vcn_1_0_sh_mask.h" +#include "hdp/hdp_4_0_offset.h" +#include "mmhub/mmhub_9_1_offset.h" +#include "mmhub/mmhub_9_1_sh_mask.h" static int vcn_v1_0_start(struct amdgpu_device *adev); static int vcn_v1_0_stop(struct amdgpu_device *adev); @@ -744,6 +743,8 @@ static void vcn_v1_0_dec_ring_set_wptr(struct amdgpu_ring *ring) */ static void vcn_v1_0_dec_ring_insert_start(struct amdgpu_ring *ring) { + struct amdgpu_device *adev = ring->adev; + amdgpu_ring_write(ring, PACKET0(SOC15_REG_OFFSET(UVD, 0, mmUVD_GPCOM_VCPU_DATA0), 0)); amdgpu_ring_write(ring, 0); @@ -761,6 +762,8 @@ static void vcn_v1_0_dec_ring_insert_start(struct amdgpu_ring *ring) */ static void vcn_v1_0_dec_ring_insert_end(struct amdgpu_ring *ring) { + struct amdgpu_device *adev = ring->adev; + amdgpu_ring_write(ring, PACKET0(SOC15_REG_OFFSET(UVD, 0, mmUVD_GPCOM_VCPU_CMD), 0)); amdgpu_ring_write(ring, VCN_DEC_CMD_PACKET_END << 1); @@ -777,6 +780,8 @@ static void vcn_v1_0_dec_ring_insert_end(struct amdgpu_ring *ring) static void vcn_v1_0_dec_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u64 seq, unsigned flags) { + struct amdgpu_device *adev = ring->adev; + WARN_ON(flags & AMDGPU_FENCE_FLAG_64BIT); amdgpu_ring_write(ring, @@ -812,6 +817,8 @@ static void vcn_v1_0_dec_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u64 */ static void vcn_v1_0_dec_ring_emit_hdp_invalidate(struct amdgpu_ring *ring) { + struct amdgpu_device *adev = ring->adev; + amdgpu_ring_write(ring, PACKET0(SOC15_REG_OFFSET(HDP, 0, mmHDP_READ_CACHE_INVALIDATE), 0)); amdgpu_ring_write(ring, 1); } @@ -826,11 +833,13 @@ static void vcn_v1_0_dec_ring_emit_hdp_invalidate(struct amdgpu_ring *ring) */ static void vcn_v1_0_dec_ring_emit_ib(struct amdgpu_ring *ring, struct amdgpu_ib *ib, - unsigned vm_id, bool ctx_switch) + unsigned vmid, bool ctx_switch) { + struct amdgpu_device *adev = ring->adev; + amdgpu_ring_write(ring, PACKET0(SOC15_REG_OFFSET(UVD, 0, mmUVD_LMI_RBC_IB_VMID), 0)); - amdgpu_ring_write(ring, vm_id); + amdgpu_ring_write(ring, vmid); amdgpu_ring_write(ring, PACKET0(SOC15_REG_OFFSET(UVD, 0, mmUVD_LMI_RBC_IB_64BIT_BAR_LOW), 0)); @@ -846,6 +855,8 @@ static void vcn_v1_0_dec_ring_emit_ib(struct amdgpu_ring *ring, static void vcn_v1_0_dec_vm_reg_write(struct amdgpu_ring *ring, uint32_t data0, uint32_t data1) { + struct amdgpu_device *adev = ring->adev; + amdgpu_ring_write(ring, PACKET0(SOC15_REG_OFFSET(UVD, 0, mmUVD_GPCOM_VCPU_DATA0), 0)); amdgpu_ring_write(ring, data0); @@ -860,6 +871,8 @@ static void vcn_v1_0_dec_vm_reg_write(struct amdgpu_ring *ring, static void vcn_v1_0_dec_vm_reg_wait(struct amdgpu_ring *ring, uint32_t data0, uint32_t data1, uint32_t mask) { + struct amdgpu_device *adev = ring->adev; + amdgpu_ring_write(ring, PACKET0(SOC15_REG_OFFSET(UVD, 0, mmUVD_GPCOM_VCPU_DATA0), 0)); amdgpu_ring_write(ring, data0); @@ -875,25 +888,26 @@ static void vcn_v1_0_dec_vm_reg_wait(struct amdgpu_ring *ring, } static void vcn_v1_0_dec_ring_emit_vm_flush(struct amdgpu_ring *ring, - unsigned vm_id, uint64_t pd_addr) + unsigned vmid, uint64_t pd_addr) { struct amdgpu_vmhub *hub = &ring->adev->vmhub[ring->funcs->vmhub]; - uint32_t req = ring->adev->gart.gart_funcs->get_invalidate_req(vm_id); - uint32_t data0, data1, mask; + uint32_t req = ring->adev->gart.gart_funcs->get_invalidate_req(vmid); + uint64_t flags = AMDGPU_PTE_VALID; unsigned eng = ring->vm_inv_eng; + uint32_t data0, data1, mask; - pd_addr = amdgpu_gart_get_vm_pde(ring->adev, pd_addr); - pd_addr |= AMDGPU_PTE_VALID; + amdgpu_gart_get_vm_pde(ring->adev, -1, &pd_addr, &flags); + pd_addr |= flags; - data0 = (hub->ctx0_ptb_addr_hi32 + vm_id * 2) << 2; + data0 = (hub->ctx0_ptb_addr_hi32 + vmid * 2) << 2; data1 = upper_32_bits(pd_addr); vcn_v1_0_dec_vm_reg_write(ring, data0, data1); - data0 = (hub->ctx0_ptb_addr_lo32 + vm_id * 2) << 2; + data0 = (hub->ctx0_ptb_addr_lo32 + vmid * 2) << 2; data1 = lower_32_bits(pd_addr); vcn_v1_0_dec_vm_reg_write(ring, data0, data1); - data0 = (hub->ctx0_ptb_addr_lo32 + vm_id * 2) << 2; + data0 = (hub->ctx0_ptb_addr_lo32 + vmid * 2) << 2; data1 = lower_32_bits(pd_addr); mask = 0xffffffff; vcn_v1_0_dec_vm_reg_wait(ring, data0, data1, mask); @@ -905,8 +919,8 @@ static void vcn_v1_0_dec_ring_emit_vm_flush(struct amdgpu_ring *ring, /* wait for flush */ data0 = (hub->vm_inv_eng0_ack + eng) << 2; - data1 = 1 << vm_id; - mask = 1 << vm_id; + data1 = 1 << vmid; + mask = 1 << vmid; vcn_v1_0_dec_vm_reg_wait(ring, data0, data1, mask); } @@ -997,38 +1011,39 @@ static void vcn_v1_0_enc_ring_insert_end(struct amdgpu_ring *ring) * Write enc ring commands to execute the indirect buffer */ static void vcn_v1_0_enc_ring_emit_ib(struct amdgpu_ring *ring, - struct amdgpu_ib *ib, unsigned int vm_id, bool ctx_switch) + struct amdgpu_ib *ib, unsigned int vmid, bool ctx_switch) { amdgpu_ring_write(ring, VCN_ENC_CMD_IB); - amdgpu_ring_write(ring, vm_id); + amdgpu_ring_write(ring, vmid); amdgpu_ring_write(ring, lower_32_bits(ib->gpu_addr)); amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr)); amdgpu_ring_write(ring, ib->length_dw); } static void vcn_v1_0_enc_ring_emit_vm_flush(struct amdgpu_ring *ring, - unsigned int vm_id, uint64_t pd_addr) + unsigned int vmid, uint64_t pd_addr) { struct amdgpu_vmhub *hub = &ring->adev->vmhub[ring->funcs->vmhub]; - uint32_t req = ring->adev->gart.gart_funcs->get_invalidate_req(vm_id); + uint32_t req = ring->adev->gart.gart_funcs->get_invalidate_req(vmid); + uint64_t flags = AMDGPU_PTE_VALID; unsigned eng = ring->vm_inv_eng; - pd_addr = amdgpu_gart_get_vm_pde(ring->adev, pd_addr); - pd_addr |= AMDGPU_PTE_VALID; + amdgpu_gart_get_vm_pde(ring->adev, -1, &pd_addr, &flags); + pd_addr |= flags; amdgpu_ring_write(ring, VCN_ENC_CMD_REG_WRITE); amdgpu_ring_write(ring, - (hub->ctx0_ptb_addr_hi32 + vm_id * 2) << 2); + (hub->ctx0_ptb_addr_hi32 + vmid * 2) << 2); amdgpu_ring_write(ring, upper_32_bits(pd_addr)); amdgpu_ring_write(ring, VCN_ENC_CMD_REG_WRITE); amdgpu_ring_write(ring, - (hub->ctx0_ptb_addr_lo32 + vm_id * 2) << 2); + (hub->ctx0_ptb_addr_lo32 + vmid * 2) << 2); amdgpu_ring_write(ring, lower_32_bits(pd_addr)); amdgpu_ring_write(ring, VCN_ENC_CMD_REG_WAIT); amdgpu_ring_write(ring, - (hub->ctx0_ptb_addr_lo32 + vm_id * 2) << 2); + (hub->ctx0_ptb_addr_lo32 + vmid * 2) << 2); amdgpu_ring_write(ring, 0xffffffff); amdgpu_ring_write(ring, lower_32_bits(pd_addr)); @@ -1040,8 +1055,8 @@ static void vcn_v1_0_enc_ring_emit_vm_flush(struct amdgpu_ring *ring, /* wait for flush */ amdgpu_ring_write(ring, VCN_ENC_CMD_REG_WAIT); amdgpu_ring_write(ring, (hub->vm_inv_eng0_ack + eng) << 2); - amdgpu_ring_write(ring, 1 << vm_id); - amdgpu_ring_write(ring, 1 << vm_id); + amdgpu_ring_write(ring, 1 << vmid); + amdgpu_ring_write(ring, 1 << vmid); } static int vcn_v1_0_set_interrupt_state(struct amdgpu_device *adev, @@ -1077,6 +1092,17 @@ static int vcn_v1_0_process_interrupt(struct amdgpu_device *adev, return 0; } +static void vcn_v1_0_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count) +{ + int i; + struct amdgpu_device *adev = ring->adev; + + for (i = 0; i < count; i++) + amdgpu_ring_write(ring, PACKET0(SOC15_REG_OFFSET(UVD, 0, mmUVD_NO_OP), 0)); + +} + + static const struct amd_ip_funcs vcn_v1_0_ip_funcs = { .name = "vcn_v1_0", .early_init = vcn_v1_0_early_init, @@ -1100,7 +1126,7 @@ static const struct amd_ip_funcs vcn_v1_0_ip_funcs = { static const struct amdgpu_ring_funcs vcn_v1_0_dec_ring_vm_funcs = { .type = AMDGPU_RING_TYPE_VCN_DEC, .align_mask = 0xf, - .nop = PACKET0(SOC15_REG_OFFSET(UVD, 0, mmUVD_NO_OP), 0), + .nop = PACKET0(0x81ff, 0), .support_64bit_ptrs = false, .vmhub = AMDGPU_MMHUB, .get_rptr = vcn_v1_0_dec_ring_get_rptr, @@ -1118,7 +1144,7 @@ static const struct amdgpu_ring_funcs vcn_v1_0_dec_ring_vm_funcs = { .emit_hdp_invalidate = vcn_v1_0_dec_ring_emit_hdp_invalidate, .test_ring = amdgpu_vcn_dec_ring_test_ring, .test_ib = amdgpu_vcn_dec_ring_test_ib, - .insert_nop = amdgpu_ring_insert_nop, + .insert_nop = vcn_v1_0_ring_insert_nop, .insert_start = vcn_v1_0_dec_ring_insert_start, .insert_end = vcn_v1_0_dec_ring_insert_end, .pad_ib = amdgpu_ring_generic_pad_ib, diff --git a/drivers/gpu/drm/amd/amdgpu/vega10_ih.c b/drivers/gpu/drm/amd/amdgpu/vega10_ih.c index 697325737ba8..b69ceafb7888 100644 --- a/drivers/gpu/drm/amd/amdgpu/vega10_ih.c +++ b/drivers/gpu/drm/amd/amdgpu/vega10_ih.c @@ -25,10 +25,8 @@ #include "amdgpu_ih.h" #include "soc15.h" - -#include "vega10/soc15ip.h" -#include "vega10/OSSSYS/osssys_4_0_offset.h" -#include "vega10/OSSSYS/osssys_4_0_sh_mask.h" +#include "oss/osssys_4_0_offset.h" +#include "oss/osssys_4_0_sh_mask.h" #include "soc15_common.h" #include "vega10_ih.h" @@ -46,11 +44,11 @@ static void vega10_ih_set_interrupt_funcs(struct amdgpu_device *adev); */ static void vega10_ih_enable_interrupts(struct amdgpu_device *adev) { - u32 ih_rb_cntl = RREG32(SOC15_REG_OFFSET(OSSSYS, 0, mmIH_RB_CNTL)); + u32 ih_rb_cntl = RREG32_SOC15(OSSSYS, 0, mmIH_RB_CNTL); ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL, RB_ENABLE, 1); ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL, ENABLE_INTR, 1); - WREG32(SOC15_REG_OFFSET(OSSSYS, 0, mmIH_RB_CNTL), ih_rb_cntl); + WREG32_SOC15(OSSSYS, 0, mmIH_RB_CNTL, ih_rb_cntl); adev->irq.ih.enabled = true; } @@ -63,14 +61,14 @@ static void vega10_ih_enable_interrupts(struct amdgpu_device *adev) */ static void vega10_ih_disable_interrupts(struct amdgpu_device *adev) { - u32 ih_rb_cntl = RREG32(SOC15_REG_OFFSET(OSSSYS, 0, mmIH_RB_CNTL)); + u32 ih_rb_cntl = RREG32_SOC15(OSSSYS, 0, mmIH_RB_CNTL); ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL, RB_ENABLE, 0); ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL, ENABLE_INTR, 0); - WREG32(SOC15_REG_OFFSET(OSSSYS, 0, mmIH_RB_CNTL), ih_rb_cntl); + WREG32_SOC15(OSSSYS, 0, mmIH_RB_CNTL, ih_rb_cntl); /* set rptr, wptr to 0 */ - WREG32(SOC15_REG_OFFSET(OSSSYS, 0, mmIH_RB_RPTR), 0); - WREG32(SOC15_REG_OFFSET(OSSSYS, 0, mmIH_RB_WPTR), 0); + WREG32_SOC15(OSSSYS, 0, mmIH_RB_RPTR, 0); + WREG32_SOC15(OSSSYS, 0, mmIH_RB_WPTR, 0); adev->irq.ih.enabled = false; adev->irq.ih.rptr = 0; } @@ -97,20 +95,17 @@ static int vega10_ih_irq_init(struct amdgpu_device *adev) /* disable irqs */ vega10_ih_disable_interrupts(adev); - if (adev->flags & AMD_IS_APU) - nbio_v7_0_ih_control(adev); - else - nbio_v6_1_ih_control(adev); + adev->nbio_funcs->ih_control(adev); - ih_rb_cntl = RREG32(SOC15_REG_OFFSET(OSSSYS, 0, mmIH_RB_CNTL)); + ih_rb_cntl = RREG32_SOC15(OSSSYS, 0, mmIH_RB_CNTL); /* Ring Buffer base. [39:8] of 40-bit address of the beginning of the ring buffer*/ if (adev->irq.ih.use_bus_addr) { - WREG32(SOC15_REG_OFFSET(OSSSYS, 0, mmIH_RB_BASE), adev->irq.ih.rb_dma_addr >> 8); - WREG32(SOC15_REG_OFFSET(OSSSYS, 0, mmIH_RB_BASE_HI), ((u64)adev->irq.ih.rb_dma_addr >> 40) & 0xff); + WREG32_SOC15(OSSSYS, 0, mmIH_RB_BASE, adev->irq.ih.rb_dma_addr >> 8); + WREG32_SOC15(OSSSYS, 0, mmIH_RB_BASE_HI, ((u64)adev->irq.ih.rb_dma_addr >> 40) & 0xff); ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL, MC_SPACE, 1); } else { - WREG32(SOC15_REG_OFFSET(OSSSYS, 0, mmIH_RB_BASE), adev->irq.ih.gpu_addr >> 8); - WREG32(SOC15_REG_OFFSET(OSSSYS, 0, mmIH_RB_BASE_HI), (adev->irq.ih.gpu_addr >> 40) & 0xff); + WREG32_SOC15(OSSSYS, 0, mmIH_RB_BASE, adev->irq.ih.gpu_addr >> 8); + WREG32_SOC15(OSSSYS, 0, mmIH_RB_BASE_HI, (adev->irq.ih.gpu_addr >> 40) & 0xff); ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL, MC_SPACE, 4); } rb_bufsz = order_base_2(adev->irq.ih.ring_size / 4); @@ -126,21 +121,21 @@ static int vega10_ih_irq_init(struct amdgpu_device *adev) if (adev->irq.msi_enabled) ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL, RPTR_REARM, 1); - WREG32(SOC15_REG_OFFSET(OSSSYS, 0, mmIH_RB_CNTL), ih_rb_cntl); + WREG32_SOC15(OSSSYS, 0, mmIH_RB_CNTL, ih_rb_cntl); /* set the writeback address whether it's enabled or not */ if (adev->irq.ih.use_bus_addr) wptr_off = adev->irq.ih.rb_dma_addr + (adev->irq.ih.wptr_offs * 4); else wptr_off = adev->wb.gpu_addr + (adev->irq.ih.wptr_offs * 4); - WREG32(SOC15_REG_OFFSET(OSSSYS, 0, mmIH_RB_WPTR_ADDR_LO), lower_32_bits(wptr_off)); - WREG32(SOC15_REG_OFFSET(OSSSYS, 0, mmIH_RB_WPTR_ADDR_HI), upper_32_bits(wptr_off) & 0xFF); + WREG32_SOC15(OSSSYS, 0, mmIH_RB_WPTR_ADDR_LO, lower_32_bits(wptr_off)); + WREG32_SOC15(OSSSYS, 0, mmIH_RB_WPTR_ADDR_HI, upper_32_bits(wptr_off) & 0xFF); /* set rptr, wptr to 0 */ - WREG32(SOC15_REG_OFFSET(OSSSYS, 0, mmIH_RB_RPTR), 0); - WREG32(SOC15_REG_OFFSET(OSSSYS, 0, mmIH_RB_WPTR), 0); + WREG32_SOC15(OSSSYS, 0, mmIH_RB_RPTR, 0); + WREG32_SOC15(OSSSYS, 0, mmIH_RB_WPTR, 0); - ih_doorbell_rtpr = RREG32(SOC15_REG_OFFSET(OSSSYS, 0, mmIH_DOORBELL_RPTR)); + ih_doorbell_rtpr = RREG32_SOC15(OSSSYS, 0, mmIH_DOORBELL_RPTR); if (adev->irq.ih.use_doorbell) { ih_doorbell_rtpr = REG_SET_FIELD(ih_doorbell_rtpr, IH_DOORBELL_RPTR, OFFSET, adev->irq.ih.doorbell_index); @@ -150,20 +145,18 @@ static int vega10_ih_irq_init(struct amdgpu_device *adev) ih_doorbell_rtpr = REG_SET_FIELD(ih_doorbell_rtpr, IH_DOORBELL_RPTR, ENABLE, 0); } - WREG32(SOC15_REG_OFFSET(OSSSYS, 0, mmIH_DOORBELL_RPTR), ih_doorbell_rtpr); - if (adev->flags & AMD_IS_APU) - nbio_v7_0_ih_doorbell_range(adev, adev->irq.ih.use_doorbell, adev->irq.ih.doorbell_index); - else - nbio_v6_1_ih_doorbell_range(adev, adev->irq.ih.use_doorbell, adev->irq.ih.doorbell_index); + WREG32_SOC15(OSSSYS, 0, mmIH_DOORBELL_RPTR, ih_doorbell_rtpr); + adev->nbio_funcs->ih_doorbell_range(adev, adev->irq.ih.use_doorbell, + adev->irq.ih.doorbell_index); - tmp = RREG32(SOC15_REG_OFFSET(OSSSYS, 0, mmIH_STORM_CLIENT_LIST_CNTL)); + tmp = RREG32_SOC15(OSSSYS, 0, mmIH_STORM_CLIENT_LIST_CNTL); tmp = REG_SET_FIELD(tmp, IH_STORM_CLIENT_LIST_CNTL, CLIENT18_IS_STORM_CLIENT, 1); - WREG32(SOC15_REG_OFFSET(OSSSYS, 0, mmIH_STORM_CLIENT_LIST_CNTL), tmp); + WREG32_SOC15(OSSSYS, 0, mmIH_STORM_CLIENT_LIST_CNTL, tmp); - tmp = RREG32(SOC15_REG_OFFSET(OSSSYS, 0, mmIH_INT_FLOOD_CNTL)); + tmp = RREG32_SOC15(OSSSYS, 0, mmIH_INT_FLOOD_CNTL); tmp = REG_SET_FIELD(tmp, IH_INT_FLOOD_CNTL, FLOOD_CNTL_ENABLE, 1); - WREG32(SOC15_REG_OFFSET(OSSSYS, 0, mmIH_INT_FLOOD_CNTL), tmp); + WREG32_SOC15(OSSSYS, 0, mmIH_INT_FLOOD_CNTL, tmp); pci_set_master(adev->pdev); @@ -334,8 +327,8 @@ static void vega10_ih_decode_iv(struct amdgpu_device *adev, entry->client_id = dw[0] & 0xff; entry->src_id = (dw[0] >> 8) & 0xff; entry->ring_id = (dw[0] >> 16) & 0xff; - entry->vm_id = (dw[0] >> 24) & 0xf; - entry->vm_id_src = (dw[0] >> 31); + entry->vmid = (dw[0] >> 24) & 0xf; + entry->vmid_src = (dw[0] >> 31); entry->timestamp = dw[1] | ((u64)(dw[2] & 0xffff) << 32); entry->timestamp_src = dw[2] >> 31; entry->pas_id = dw[3] & 0xffff; @@ -367,7 +360,7 @@ static void vega10_ih_set_rptr(struct amdgpu_device *adev) adev->wb.wb[adev->irq.ih.rptr_offs] = adev->irq.ih.rptr; WDOORBELL32(adev->irq.ih.doorbell_index, adev->irq.ih.rptr); } else { - WREG32(SOC15_REG_OFFSET(OSSSYS, 0, mmIH_RB_RPTR), adev->irq.ih.rptr); + WREG32_SOC15(OSSSYS, 0, mmIH_RB_RPTR, adev->irq.ih.rptr); } } diff --git a/drivers/gpu/drm/amd/amdgpu/vega10_reg_init.c b/drivers/gpu/drm/amd/amdgpu/vega10_reg_init.c new file mode 100644 index 000000000000..b7bdd04793d6 --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/vega10_reg_init.c @@ -0,0 +1,56 @@ +/* + * Copyright 2017 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ +#include "amdgpu.h" +#include "soc15.h" + +#include "soc15_common.h" +#include "soc15ip.h" + +int vega10_reg_base_init(struct amdgpu_device *adev) +{ + /* HW has more IP blocks, only initialized the blocke beend by our driver */ + uint32_t i; + for (i = 0 ; i < MAX_INSTANCE ; ++i) { + adev->reg_offset[GC_HWIP][i] = (uint32_t *)(&(GC_BASE.instance[i])); + adev->reg_offset[HDP_HWIP][i] = (uint32_t *)(&(HDP_BASE.instance[i])); + adev->reg_offset[MMHUB_HWIP][i] = (uint32_t *)(&(MMHUB_BASE.instance[i])); + adev->reg_offset[ATHUB_HWIP][i] = (uint32_t *)(&(ATHUB_BASE.instance[i])); + adev->reg_offset[NBIO_HWIP][i] = (uint32_t *)(&(NBIO_BASE.instance[i])); + adev->reg_offset[MP0_HWIP][i] = (uint32_t *)(&(MP0_BASE.instance[i])); + adev->reg_offset[UVD_HWIP][i] = (uint32_t *)(&(UVD_BASE.instance[i])); + adev->reg_offset[VCE_HWIP][i] = (uint32_t *)(&(VCE_BASE.instance[i])); + adev->reg_offset[VCN_HWIP][i] = (uint32_t *)(&(VCN_BASE.instance[i])); + adev->reg_offset[DF_HWIP][i] = (uint32_t *)(&(DF_BASE.instance[i])); + adev->reg_offset[DCE_HWIP][i] = (uint32_t *)(&(DCE_BASE.instance[i])); + adev->reg_offset[OSSSYS_HWIP][i] = (uint32_t *)(&(OSSSYS_BASE.instance[i])); + adev->reg_offset[SDMA0_HWIP][i] = (uint32_t *)(&(SDMA0_BASE.instance[i])); + adev->reg_offset[SDMA1_HWIP][i] = (uint32_t *)(&(SDMA1_BASE.instance[i])); + adev->reg_offset[SMUIO_HWIP][i] = (uint32_t *)(&(SMUIO_BASE.instance[i])); + adev->reg_offset[PWR_HWIP][i] = (uint32_t *)(&(PWR_BASE.instance[i])); + adev->reg_offset[NBIF_HWIP][i] = (uint32_t *)(&(NBIF_BASE.instance[i])); + + } + return 0; +} + + diff --git a/drivers/gpu/drm/amd/amdgpu/vi.c b/drivers/gpu/drm/amd/amdgpu/vi.c index 3a4c2fa7e36d..da2b99c2d95f 100644 --- a/drivers/gpu/drm/amd/amdgpu/vi.c +++ b/drivers/gpu/drm/amd/amdgpu/vi.c @@ -282,29 +282,29 @@ static void vi_init_golden_registers(struct amdgpu_device *adev) switch (adev->asic_type) { case CHIP_TOPAZ: - amdgpu_program_register_sequence(adev, - iceland_mgcg_cgcg_init, - (const u32)ARRAY_SIZE(iceland_mgcg_cgcg_init)); + amdgpu_device_program_register_sequence(adev, + iceland_mgcg_cgcg_init, + ARRAY_SIZE(iceland_mgcg_cgcg_init)); break; case CHIP_FIJI: - amdgpu_program_register_sequence(adev, - fiji_mgcg_cgcg_init, - (const u32)ARRAY_SIZE(fiji_mgcg_cgcg_init)); + amdgpu_device_program_register_sequence(adev, + fiji_mgcg_cgcg_init, + ARRAY_SIZE(fiji_mgcg_cgcg_init)); break; case CHIP_TONGA: - amdgpu_program_register_sequence(adev, - tonga_mgcg_cgcg_init, - (const u32)ARRAY_SIZE(tonga_mgcg_cgcg_init)); + amdgpu_device_program_register_sequence(adev, + tonga_mgcg_cgcg_init, + ARRAY_SIZE(tonga_mgcg_cgcg_init)); break; case CHIP_CARRIZO: - amdgpu_program_register_sequence(adev, - cz_mgcg_cgcg_init, - (const u32)ARRAY_SIZE(cz_mgcg_cgcg_init)); + amdgpu_device_program_register_sequence(adev, + cz_mgcg_cgcg_init, + ARRAY_SIZE(cz_mgcg_cgcg_init)); break; case CHIP_STONEY: - amdgpu_program_register_sequence(adev, - stoney_mgcg_cgcg_init, - (const u32)ARRAY_SIZE(stoney_mgcg_cgcg_init)); + amdgpu_device_program_register_sequence(adev, + stoney_mgcg_cgcg_init, + ARRAY_SIZE(stoney_mgcg_cgcg_init)); break; case CHIP_POLARIS11: case CHIP_POLARIS10: @@ -449,14 +449,18 @@ static bool vi_read_bios_from_rom(struct amdgpu_device *adev, static void vi_detect_hw_virtualization(struct amdgpu_device *adev) { - uint32_t reg = RREG32(mmBIF_IOV_FUNC_IDENTIFIER); - /* bit0: 0 means pf and 1 means vf */ - /* bit31: 0 means disable IOV and 1 means enable */ - if (reg & 1) - adev->virt.caps |= AMDGPU_SRIOV_CAPS_IS_VF; - - if (reg & 0x80000000) - adev->virt.caps |= AMDGPU_SRIOV_CAPS_ENABLE_IOV; + uint32_t reg = 0; + + if (adev->asic_type == CHIP_TONGA || + adev->asic_type == CHIP_FIJI) { + reg = RREG32(mmBIF_IOV_FUNC_IDENTIFIER); + /* bit0: 0 means pf and 1 means vf */ + if (REG_GET_FIELD(reg, BIF_IOV_FUNC_IDENTIFIER, FUNC_IDENTIFIER)) + adev->virt.caps |= AMDGPU_SRIOV_CAPS_IS_VF; + /* bit31: 0 means disable IOV and 1 means enable */ + if (REG_GET_FIELD(reg, BIF_IOV_FUNC_IDENTIFIER, IOV_ENABLE)) + adev->virt.caps |= AMDGPU_SRIOV_CAPS_ENABLE_IOV; + } if (reg == 0) { if (is_virtual_machine()) /* passthrough mode exclus sr-iov mode */ @@ -667,7 +671,7 @@ static int vi_gpu_pci_config_reset(struct amdgpu_device *adev) /* disable BM */ pci_clear_master(adev->pdev); /* reset */ - amdgpu_pci_config_reset(adev); + amdgpu_device_pci_config_reset(adev); udelay(100); @@ -891,8 +895,8 @@ static int vi_common_early_init(void *handle) adev->asic_funcs = &vi_asic_funcs; - if (amdgpu_get_ip_block(adev, AMD_IP_BLOCK_TYPE_SMC) && - (amdgpu_ip_block_mask & (1 << AMD_IP_BLOCK_TYPE_SMC))) + if (amdgpu_device_ip_get_ip_block(adev, AMD_IP_BLOCK_TYPE_SMC) && + (amdgpu_ip_block_mask & (1 << AMD_IP_BLOCK_TYPE_SMC))) smc_enabled = true; adev->rev_id = vi_get_rev_id(adev); @@ -1074,7 +1078,7 @@ static int vi_common_early_init(void *handle) /* vi use smc load by default */ adev->firmware.load_type = amdgpu_ucode_get_load_type(adev, amdgpu_fw_load_type); - amdgpu_get_pcie_info(adev); + amdgpu_device_get_pcie_info(adev); return 0; } @@ -1487,115 +1491,115 @@ int vi_set_ip_blocks(struct amdgpu_device *adev) switch (adev->asic_type) { case CHIP_TOPAZ: /* topaz has no DCE, UVD, VCE */ - amdgpu_ip_block_add(adev, &vi_common_ip_block); - amdgpu_ip_block_add(adev, &gmc_v7_4_ip_block); - amdgpu_ip_block_add(adev, &iceland_ih_ip_block); - amdgpu_ip_block_add(adev, &amdgpu_pp_ip_block); + amdgpu_device_ip_block_add(adev, &vi_common_ip_block); + amdgpu_device_ip_block_add(adev, &gmc_v7_4_ip_block); + amdgpu_device_ip_block_add(adev, &iceland_ih_ip_block); + amdgpu_device_ip_block_add(adev, &amdgpu_pp_ip_block); if (adev->enable_virtual_display) - amdgpu_ip_block_add(adev, &dce_virtual_ip_block); - amdgpu_ip_block_add(adev, &gfx_v8_0_ip_block); - amdgpu_ip_block_add(adev, &sdma_v2_4_ip_block); + amdgpu_device_ip_block_add(adev, &dce_virtual_ip_block); + amdgpu_device_ip_block_add(adev, &gfx_v8_0_ip_block); + amdgpu_device_ip_block_add(adev, &sdma_v2_4_ip_block); break; case CHIP_FIJI: - amdgpu_ip_block_add(adev, &vi_common_ip_block); - amdgpu_ip_block_add(adev, &gmc_v8_5_ip_block); - amdgpu_ip_block_add(adev, &tonga_ih_ip_block); - amdgpu_ip_block_add(adev, &amdgpu_pp_ip_block); + amdgpu_device_ip_block_add(adev, &vi_common_ip_block); + amdgpu_device_ip_block_add(adev, &gmc_v8_5_ip_block); + amdgpu_device_ip_block_add(adev, &tonga_ih_ip_block); + amdgpu_device_ip_block_add(adev, &amdgpu_pp_ip_block); if (adev->enable_virtual_display || amdgpu_sriov_vf(adev)) - amdgpu_ip_block_add(adev, &dce_virtual_ip_block); + amdgpu_device_ip_block_add(adev, &dce_virtual_ip_block); #if defined(CONFIG_DRM_AMD_DC) else if (amdgpu_device_has_dc_support(adev)) - amdgpu_ip_block_add(adev, &dm_ip_block); + amdgpu_device_ip_block_add(adev, &dm_ip_block); #endif else - amdgpu_ip_block_add(adev, &dce_v10_1_ip_block); - amdgpu_ip_block_add(adev, &gfx_v8_0_ip_block); - amdgpu_ip_block_add(adev, &sdma_v3_0_ip_block); + amdgpu_device_ip_block_add(adev, &dce_v10_1_ip_block); + amdgpu_device_ip_block_add(adev, &gfx_v8_0_ip_block); + amdgpu_device_ip_block_add(adev, &sdma_v3_0_ip_block); if (!amdgpu_sriov_vf(adev)) { - amdgpu_ip_block_add(adev, &uvd_v6_0_ip_block); - amdgpu_ip_block_add(adev, &vce_v3_0_ip_block); + amdgpu_device_ip_block_add(adev, &uvd_v6_0_ip_block); + amdgpu_device_ip_block_add(adev, &vce_v3_0_ip_block); } break; case CHIP_TONGA: - amdgpu_ip_block_add(adev, &vi_common_ip_block); - amdgpu_ip_block_add(adev, &gmc_v8_0_ip_block); - amdgpu_ip_block_add(adev, &tonga_ih_ip_block); - amdgpu_ip_block_add(adev, &amdgpu_pp_ip_block); + amdgpu_device_ip_block_add(adev, &vi_common_ip_block); + amdgpu_device_ip_block_add(adev, &gmc_v8_0_ip_block); + amdgpu_device_ip_block_add(adev, &tonga_ih_ip_block); + amdgpu_device_ip_block_add(adev, &amdgpu_pp_ip_block); if (adev->enable_virtual_display || amdgpu_sriov_vf(adev)) - amdgpu_ip_block_add(adev, &dce_virtual_ip_block); + amdgpu_device_ip_block_add(adev, &dce_virtual_ip_block); #if defined(CONFIG_DRM_AMD_DC) else if (amdgpu_device_has_dc_support(adev)) - amdgpu_ip_block_add(adev, &dm_ip_block); + amdgpu_device_ip_block_add(adev, &dm_ip_block); #endif else - amdgpu_ip_block_add(adev, &dce_v10_0_ip_block); - amdgpu_ip_block_add(adev, &gfx_v8_0_ip_block); - amdgpu_ip_block_add(adev, &sdma_v3_0_ip_block); + amdgpu_device_ip_block_add(adev, &dce_v10_0_ip_block); + amdgpu_device_ip_block_add(adev, &gfx_v8_0_ip_block); + amdgpu_device_ip_block_add(adev, &sdma_v3_0_ip_block); if (!amdgpu_sriov_vf(adev)) { - amdgpu_ip_block_add(adev, &uvd_v5_0_ip_block); - amdgpu_ip_block_add(adev, &vce_v3_0_ip_block); + amdgpu_device_ip_block_add(adev, &uvd_v5_0_ip_block); + amdgpu_device_ip_block_add(adev, &vce_v3_0_ip_block); } break; case CHIP_POLARIS11: case CHIP_POLARIS10: case CHIP_POLARIS12: - amdgpu_ip_block_add(adev, &vi_common_ip_block); - amdgpu_ip_block_add(adev, &gmc_v8_1_ip_block); - amdgpu_ip_block_add(adev, &tonga_ih_ip_block); - amdgpu_ip_block_add(adev, &amdgpu_pp_ip_block); + amdgpu_device_ip_block_add(adev, &vi_common_ip_block); + amdgpu_device_ip_block_add(adev, &gmc_v8_1_ip_block); + amdgpu_device_ip_block_add(adev, &tonga_ih_ip_block); + amdgpu_device_ip_block_add(adev, &amdgpu_pp_ip_block); if (adev->enable_virtual_display) - amdgpu_ip_block_add(adev, &dce_virtual_ip_block); + amdgpu_device_ip_block_add(adev, &dce_virtual_ip_block); #if defined(CONFIG_DRM_AMD_DC) else if (amdgpu_device_has_dc_support(adev)) - amdgpu_ip_block_add(adev, &dm_ip_block); + amdgpu_device_ip_block_add(adev, &dm_ip_block); #endif else - amdgpu_ip_block_add(adev, &dce_v11_2_ip_block); - amdgpu_ip_block_add(adev, &gfx_v8_0_ip_block); - amdgpu_ip_block_add(adev, &sdma_v3_1_ip_block); - amdgpu_ip_block_add(adev, &uvd_v6_3_ip_block); - amdgpu_ip_block_add(adev, &vce_v3_4_ip_block); + amdgpu_device_ip_block_add(adev, &dce_v11_2_ip_block); + amdgpu_device_ip_block_add(adev, &gfx_v8_0_ip_block); + amdgpu_device_ip_block_add(adev, &sdma_v3_1_ip_block); + amdgpu_device_ip_block_add(adev, &uvd_v6_3_ip_block); + amdgpu_device_ip_block_add(adev, &vce_v3_4_ip_block); break; case CHIP_CARRIZO: - amdgpu_ip_block_add(adev, &vi_common_ip_block); - amdgpu_ip_block_add(adev, &gmc_v8_0_ip_block); - amdgpu_ip_block_add(adev, &cz_ih_ip_block); - amdgpu_ip_block_add(adev, &amdgpu_pp_ip_block); + amdgpu_device_ip_block_add(adev, &vi_common_ip_block); + amdgpu_device_ip_block_add(adev, &gmc_v8_0_ip_block); + amdgpu_device_ip_block_add(adev, &cz_ih_ip_block); + amdgpu_device_ip_block_add(adev, &amdgpu_pp_ip_block); if (adev->enable_virtual_display) - amdgpu_ip_block_add(adev, &dce_virtual_ip_block); + amdgpu_device_ip_block_add(adev, &dce_virtual_ip_block); #if defined(CONFIG_DRM_AMD_DC) else if (amdgpu_device_has_dc_support(adev)) - amdgpu_ip_block_add(adev, &dm_ip_block); + amdgpu_device_ip_block_add(adev, &dm_ip_block); #endif else - amdgpu_ip_block_add(adev, &dce_v11_0_ip_block); - amdgpu_ip_block_add(adev, &gfx_v8_0_ip_block); - amdgpu_ip_block_add(adev, &sdma_v3_0_ip_block); - amdgpu_ip_block_add(adev, &uvd_v6_0_ip_block); - amdgpu_ip_block_add(adev, &vce_v3_1_ip_block); + amdgpu_device_ip_block_add(adev, &dce_v11_0_ip_block); + amdgpu_device_ip_block_add(adev, &gfx_v8_0_ip_block); + amdgpu_device_ip_block_add(adev, &sdma_v3_0_ip_block); + amdgpu_device_ip_block_add(adev, &uvd_v6_0_ip_block); + amdgpu_device_ip_block_add(adev, &vce_v3_1_ip_block); #if defined(CONFIG_DRM_AMD_ACP) - amdgpu_ip_block_add(adev, &acp_ip_block); + amdgpu_device_ip_block_add(adev, &acp_ip_block); #endif break; case CHIP_STONEY: - amdgpu_ip_block_add(adev, &vi_common_ip_block); - amdgpu_ip_block_add(adev, &gmc_v8_0_ip_block); - amdgpu_ip_block_add(adev, &cz_ih_ip_block); - amdgpu_ip_block_add(adev, &amdgpu_pp_ip_block); + amdgpu_device_ip_block_add(adev, &vi_common_ip_block); + amdgpu_device_ip_block_add(adev, &gmc_v8_0_ip_block); + amdgpu_device_ip_block_add(adev, &cz_ih_ip_block); + amdgpu_device_ip_block_add(adev, &amdgpu_pp_ip_block); if (adev->enable_virtual_display) - amdgpu_ip_block_add(adev, &dce_virtual_ip_block); + amdgpu_device_ip_block_add(adev, &dce_virtual_ip_block); #if defined(CONFIG_DRM_AMD_DC) else if (amdgpu_device_has_dc_support(adev)) - amdgpu_ip_block_add(adev, &dm_ip_block); + amdgpu_device_ip_block_add(adev, &dm_ip_block); #endif else - amdgpu_ip_block_add(adev, &dce_v11_0_ip_block); - amdgpu_ip_block_add(adev, &gfx_v8_1_ip_block); - amdgpu_ip_block_add(adev, &sdma_v3_0_ip_block); - amdgpu_ip_block_add(adev, &uvd_v6_2_ip_block); - amdgpu_ip_block_add(adev, &vce_v3_4_ip_block); + amdgpu_device_ip_block_add(adev, &dce_v11_0_ip_block); + amdgpu_device_ip_block_add(adev, &gfx_v8_1_ip_block); + amdgpu_device_ip_block_add(adev, &sdma_v3_0_ip_block); + amdgpu_device_ip_block_add(adev, &uvd_v6_2_ip_block); + amdgpu_device_ip_block_add(adev, &vce_v3_4_ip_block); #if defined(CONFIG_DRM_AMD_ACP) - amdgpu_ip_block_add(adev, &acp_ip_block); + amdgpu_device_ip_block_add(adev, &acp_ip_block); #endif break; default: diff --git a/drivers/gpu/drm/amd/amdgpu/vid.h b/drivers/gpu/drm/amd/amdgpu/vid.h index dbf3703cbd1b..19ddd2312e00 100644 --- a/drivers/gpu/drm/amd/amdgpu/vid.h +++ b/drivers/gpu/drm/amd/amdgpu/vid.h @@ -27,6 +27,8 @@ #define SDMA1_REGISTER_OFFSET 0x200 /* not a register */ #define SDMA_MAX_INSTANCE 2 +#define KFD_VI_SDMA_QUEUE_OFFSET 0x80 /* not a register */ + /* crtc instance offsets */ #define CRTC0_REGISTER_OFFSET (0x1b9c - 0x1b9c) #define CRTC1_REGISTER_OFFSET (0x1d9c - 0x1b9c) |