diff options
Diffstat (limited to 'drivers/gpu/drm/amd/amdgpu')
117 files changed, 23395 insertions, 1937 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/Makefile b/drivers/gpu/drm/amd/amdgpu/Makefile index 2814aad81752..660786aba7d2 100644 --- a/drivers/gpu/drm/amd/amdgpu/Makefile +++ b/drivers/gpu/drm/amd/amdgpu/Makefile @@ -24,7 +24,7 @@ amdgpu-y += amdgpu_device.o amdgpu_kms.o \ atombios_encoders.o amdgpu_sa.o atombios_i2c.o \ amdgpu_prime.o amdgpu_vm.o amdgpu_ib.o amdgpu_pll.o \ amdgpu_ucode.o amdgpu_bo_list.o amdgpu_ctx.o amdgpu_sync.o \ - amdgpu_gtt_mgr.o amdgpu_vram_mgr.o amdgpu_virt.o + amdgpu_gtt_mgr.o amdgpu_vram_mgr.o amdgpu_virt.o amdgpu_atomfirmware.o # add asic specific block amdgpu-$(CONFIG_DRM_AMDGPU_CIK)+= cik.o cik_ih.o kv_smc.o kv_dpm.o \ @@ -34,12 +34,13 @@ amdgpu-$(CONFIG_DRM_AMDGPU_CIK)+= cik.o cik_ih.o kv_smc.o kv_dpm.o \ amdgpu-$(CONFIG_DRM_AMDGPU_SI)+= si.o gmc_v6_0.o gfx_v6_0.o si_ih.o si_dma.o dce_v6_0.o si_dpm.o si_smc.o amdgpu-y += \ - vi.o mxgpu_vi.o + vi.o mxgpu_vi.o nbio_v6_1.o soc15.o mxgpu_ai.o # add GMC block amdgpu-y += \ gmc_v7_0.o \ - gmc_v8_0.o + gmc_v8_0.o \ + gfxhub_v1_0.o mmhub_v1_0.o gmc_v9_0.o # add IH block amdgpu-y += \ @@ -47,7 +48,13 @@ amdgpu-y += \ amdgpu_ih.o \ iceland_ih.o \ tonga_ih.o \ - cz_ih.o + cz_ih.o \ + vega10_ih.o + +# add PSP block +amdgpu-y += \ + amdgpu_psp.o \ + psp_v3_1.o # add SMC block amdgpu-y += \ @@ -63,23 +70,27 @@ amdgpu-y += \ # add GFX block amdgpu-y += \ amdgpu_gfx.o \ - gfx_v8_0.o + gfx_v8_0.o \ + gfx_v9_0.o # add async DMA block amdgpu-y += \ sdma_v2_4.o \ - sdma_v3_0.o + sdma_v3_0.o \ + sdma_v4_0.o # add UVD block amdgpu-y += \ amdgpu_uvd.o \ uvd_v5_0.o \ - uvd_v6_0.o + uvd_v6_0.o \ + uvd_v7_0.o # add VCE block amdgpu-y += \ amdgpu_vce.o \ - vce_v3_0.o + vce_v3_0.o \ + vce_v4_0.o # add amdkfd interfaces amdgpu-y += \ diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index c1b913541739..6a8129949333 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -32,7 +32,7 @@ #include <linux/wait.h> #include <linux/list.h> #include <linux/kref.h> -#include <linux/interval_tree.h> +#include <linux/rbtree.h> #include <linux/hashtable.h> #include <linux/dma-fence.h> @@ -52,6 +52,7 @@ #include "amdgpu_irq.h" #include "amdgpu_ucode.h" #include "amdgpu_ttm.h" +#include "amdgpu_psp.h" #include "amdgpu_gds.h" #include "amdgpu_sync.h" #include "amdgpu_ring.h" @@ -59,6 +60,8 @@ #include "amd_powerplay.h" #include "amdgpu_dpm.h" #include "amdgpu_acp.h" +#include "amdgpu_uvd.h" +#include "amdgpu_vce.h" #include "gpu_scheduler.h" #include "amdgpu_virt.h" @@ -79,7 +82,7 @@ extern int amdgpu_pcie_gen2; extern int amdgpu_msi; extern int amdgpu_lockup_timeout; extern int amdgpu_dpm; -extern int amdgpu_smc_load_fw; +extern int amdgpu_fw_load_type; extern int amdgpu_aspm; extern int amdgpu_runtime_pm; extern unsigned amdgpu_ip_block_mask; @@ -101,6 +104,11 @@ extern char *amdgpu_disable_cu; extern char *amdgpu_virtual_display; extern unsigned amdgpu_pp_feature_mask; extern int amdgpu_vram_page_split; +extern int amdgpu_ngg; +extern int amdgpu_prim_buf_per_se; +extern int amdgpu_pos_buf_per_se; +extern int amdgpu_cntl_sb_buf_per_se; +extern int amdgpu_param_buf_per_se; #define AMDGPU_WAIT_IDLE_TIMEOUT_IN_MS 3000 #define AMDGPU_MAX_USEC_TIMEOUT 100000 /* 100 ms */ @@ -109,14 +117,11 @@ extern int amdgpu_vram_page_split; #define AMDGPU_IB_POOL_SIZE 16 #define AMDGPU_DEBUGFS_MAX_COMPONENTS 32 #define AMDGPUFB_CONN_LIMIT 4 -#define AMDGPU_BIOS_NUM_SCRATCH 8 +#define AMDGPU_BIOS_NUM_SCRATCH 16 /* max number of IP instances */ #define AMDGPU_MAX_SDMA_INSTANCES 2 -/* hardcode that limit for now */ -#define AMDGPU_VA_RESERVED_SIZE (8 << 20) - /* hard reset data */ #define AMDGPU_ASIC_RESET_DATA 0x39d5e86b @@ -280,7 +285,7 @@ struct amdgpu_vm_pte_funcs { void (*set_pte_pde)(struct amdgpu_ib *ib, uint64_t pe, uint64_t addr, unsigned count, - uint32_t incr, uint32_t flags); + uint32_t incr, uint64_t flags); }; /* provided by the gmc block */ @@ -293,7 +298,15 @@ struct amdgpu_gart_funcs { void *cpu_pt_addr, /* cpu addr of page table */ uint32_t gpu_page_idx, /* pte/pde to update */ uint64_t addr, /* addr to write into pte/pde */ - uint32_t flags); /* access flags */ + uint64_t flags); /* access flags */ + /* enable/disable PRT support */ + void (*set_prt)(struct amdgpu_device *adev, bool enable); + /* set pte flags based per asic */ + uint64_t (*get_vm_pte_flags)(struct amdgpu_device *adev, + uint32_t flags); + /* adjust mc addr in fb for APU case */ + u64 (*adjust_mc_addr)(struct amdgpu_device *adev, u64 addr); + uint32_t (*get_invalidate_req)(unsigned int vm_id); }; /* provided by the ih block */ @@ -355,7 +368,10 @@ struct amdgpu_bo_list_entry { struct amdgpu_bo_va_mapping { struct list_head list; - struct interval_tree_node it; + struct rb_node rb; + uint64_t start; + uint64_t last; + uint64_t __subtree_last; uint64_t offset; uint64_t flags; }; @@ -522,6 +538,10 @@ struct amdgpu_gart { struct page **pages; #endif bool ready; + + /* Asic default pte flags */ + uint64_t gart_pte_flags; + const struct amdgpu_gart_funcs *gart_funcs; }; @@ -537,10 +557,23 @@ void amdgpu_gart_unbind(struct amdgpu_device *adev, uint64_t offset, int pages); int amdgpu_gart_bind(struct amdgpu_device *adev, uint64_t offset, int pages, struct page **pagelist, - dma_addr_t *dma_addr, uint32_t flags); + dma_addr_t *dma_addr, uint64_t flags); int amdgpu_ttm_recover_gart(struct amdgpu_device *adev); /* + * VMHUB structures, functions & helpers + */ +struct amdgpu_vmhub { + uint32_t ctx0_ptb_addr_lo32; + uint32_t ctx0_ptb_addr_hi32; + uint32_t vm_inv_eng0_req; + uint32_t vm_inv_eng0_ack; + uint32_t vm_context0_cntl; + uint32_t vm_l2_pro_fault_status; + uint32_t vm_l2_pro_fault_cntl; +}; + +/* * GPU MC structures, functions & helpers */ struct amdgpu_mc { @@ -567,6 +600,14 @@ struct amdgpu_mc { uint32_t vram_type; uint32_t srbm_soft_reset; struct amdgpu_mode_mc_save save; + bool prt_warning; + /* apertures */ + u64 shared_aperture_start; + u64 shared_aperture_end; + u64 private_aperture_start; + u64 private_aperture_end; + /* protects concurrent invalidation */ + spinlock_t invalidate_lock; }; /* @@ -601,6 +642,83 @@ struct amdgpu_doorbell { u32 num_doorbells; /* Number of doorbells actually reserved for amdgpu. */ }; +/* + * 64bit doorbell, offset are in QWORD, occupy 2KB doorbell space + */ +typedef enum _AMDGPU_DOORBELL64_ASSIGNMENT +{ + /* + * All compute related doorbells: kiq, hiq, diq, traditional compute queue, user queue, should locate in + * a continues range so that programming CP_MEC_DOORBELL_RANGE_LOWER/UPPER can cover this range. + * Compute related doorbells are allocated from 0x00 to 0x8a + */ + + + /* kernel scheduling */ + AMDGPU_DOORBELL64_KIQ = 0x00, + + /* HSA interface queue and debug queue */ + AMDGPU_DOORBELL64_HIQ = 0x01, + AMDGPU_DOORBELL64_DIQ = 0x02, + + /* Compute engines */ + AMDGPU_DOORBELL64_MEC_RING0 = 0x03, + AMDGPU_DOORBELL64_MEC_RING1 = 0x04, + AMDGPU_DOORBELL64_MEC_RING2 = 0x05, + AMDGPU_DOORBELL64_MEC_RING3 = 0x06, + AMDGPU_DOORBELL64_MEC_RING4 = 0x07, + AMDGPU_DOORBELL64_MEC_RING5 = 0x08, + AMDGPU_DOORBELL64_MEC_RING6 = 0x09, + AMDGPU_DOORBELL64_MEC_RING7 = 0x0a, + + /* User queue doorbell range (128 doorbells) */ + AMDGPU_DOORBELL64_USERQUEUE_START = 0x0b, + AMDGPU_DOORBELL64_USERQUEUE_END = 0x8a, + + /* Graphics engine */ + AMDGPU_DOORBELL64_GFX_RING0 = 0x8b, + + /* + * Other graphics doorbells can be allocated here: from 0x8c to 0xef + * Graphics voltage island aperture 1 + * default non-graphics QWORD index is 0xF0 - 0xFF inclusive + */ + + /* sDMA engines */ + AMDGPU_DOORBELL64_sDMA_ENGINE0 = 0xF0, + AMDGPU_DOORBELL64_sDMA_HI_PRI_ENGINE0 = 0xF1, + AMDGPU_DOORBELL64_sDMA_ENGINE1 = 0xF2, + AMDGPU_DOORBELL64_sDMA_HI_PRI_ENGINE1 = 0xF3, + + /* Interrupt handler */ + AMDGPU_DOORBELL64_IH = 0xF4, /* For legacy interrupt ring buffer */ + AMDGPU_DOORBELL64_IH_RING1 = 0xF5, /* For page migration request log */ + AMDGPU_DOORBELL64_IH_RING2 = 0xF6, /* For page migration translation/invalidation log */ + + /* VCN engine use 32 bits doorbell */ + AMDGPU_DOORBELL64_VCN0_1 = 0xF8, /* lower 32 bits for VNC0 and upper 32 bits for VNC1 */ + AMDGPU_DOORBELL64_VCN2_3 = 0xF9, + AMDGPU_DOORBELL64_VCN4_5 = 0xFA, + AMDGPU_DOORBELL64_VCN6_7 = 0xFB, + + /* overlap the doorbell assignment with VCN as they are mutually exclusive + * VCE engine's doorbell is 32 bit and two VCE ring share one QWORD + */ + AMDGPU_DOORBELL64_RING0_1 = 0xF8, + AMDGPU_DOORBELL64_RING2_3 = 0xF9, + AMDGPU_DOORBELL64_RING4_5 = 0xFA, + AMDGPU_DOORBELL64_RING6_7 = 0xFB, + + AMDGPU_DOORBELL64_UVD_RING0_1 = 0xFC, + AMDGPU_DOORBELL64_UVD_RING2_3 = 0xFD, + AMDGPU_DOORBELL64_UVD_RING4_5 = 0xFE, + AMDGPU_DOORBELL64_UVD_RING6_7 = 0xFF, + + AMDGPU_DOORBELL64_MAX_ASSIGNMENT = 0xFF, + AMDGPU_DOORBELL64_INVALID = 0xFFFF +} AMDGPU_DOORBELL64_ASSIGNMENT; + + void amdgpu_doorbell_get_kfd_info(struct amdgpu_device *adev, phys_addr_t *aperture_base, size_t *aperture_size, @@ -699,6 +817,7 @@ void amdgpu_ctx_mgr_fini(struct amdgpu_ctx_mgr *mgr); struct amdgpu_fpriv { struct amdgpu_vm vm; + struct amdgpu_bo_va *prt_va; struct mutex bo_list_lock; struct idr bo_list_handles; struct amdgpu_ctx_mgr ctx_mgr; @@ -776,9 +895,12 @@ struct amdgpu_rlc { struct amdgpu_mec { struct amdgpu_bo *hpd_eop_obj; u64 hpd_eop_gpu_addr; + struct amdgpu_bo *mec_fw_obj; + u64 mec_fw_gpu_addr; u32 num_pipe; u32 num_mec; u32 num_queue; + void *mqd_backup[AMDGPU_MAX_COMPUTE_RINGS + 1]; }; struct amdgpu_kiq { @@ -810,7 +932,16 @@ struct amdgpu_rb_config { uint32_t raster_config_1; }; -struct amdgpu_gca_config { +struct gb_addr_config { + uint16_t pipe_interleave_size; + uint8_t num_pipes; + uint8_t max_compress_frags; + uint8_t num_banks; + uint8_t num_se; + uint8_t num_rb_per_se; +}; + +struct amdgpu_gfx_config { unsigned max_shader_engines; unsigned max_tile_pipes; unsigned max_cu_per_sh; @@ -839,7 +970,11 @@ struct amdgpu_gca_config { uint32_t tile_mode_array[32]; uint32_t macrotile_mode_array[16]; + struct gb_addr_config gb_addr_config_fields; struct amdgpu_rb_config rb_config[AMDGPU_GFX_MAX_SE][AMDGPU_GFX_MAX_SH_PER_SE]; + + /* gfx configure feature */ + uint32_t double_offchip_lds_buf; }; struct amdgpu_cu_info { @@ -857,9 +992,31 @@ struct amdgpu_gfx_funcs { void (*read_wave_sgprs)(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t start, uint32_t size, uint32_t *dst); }; +struct amdgpu_ngg_buf { + struct amdgpu_bo *bo; + uint64_t gpu_addr; + uint32_t size; + uint32_t bo_size; +}; + +enum { + PRIM = 0, + POS, + CNTL, + PARAM, + NGG_BUF_MAX +}; + +struct amdgpu_ngg { + struct amdgpu_ngg_buf buf[NGG_BUF_MAX]; + uint32_t gds_reserve_addr; + uint32_t gds_reserve_size; + bool init; +}; + struct amdgpu_gfx { struct mutex gpu_clock_mutex; - struct amdgpu_gca_config config; + struct amdgpu_gfx_config config; struct amdgpu_rlc rlc; struct amdgpu_mec mec; struct amdgpu_kiq kiq; @@ -899,6 +1056,9 @@ struct amdgpu_gfx { /* reset mask */ uint32_t grbm_soft_reset; uint32_t srbm_soft_reset; + bool in_reset; + /* NGG */ + struct amdgpu_ngg ngg; }; int amdgpu_ib_get(struct amdgpu_device *adev, struct amdgpu_vm *vm, @@ -1007,67 +1167,12 @@ struct amdgpu_wb { int amdgpu_wb_get(struct amdgpu_device *adev, u32 *wb); void amdgpu_wb_free(struct amdgpu_device *adev, u32 wb); +int amdgpu_wb_get_64bit(struct amdgpu_device *adev, u32 *wb); +void amdgpu_wb_free_64bit(struct amdgpu_device *adev, u32 wb); void amdgpu_get_pcie_info(struct amdgpu_device *adev); /* - * UVD - */ -#define AMDGPU_DEFAULT_UVD_HANDLES 10 -#define AMDGPU_MAX_UVD_HANDLES 40 -#define AMDGPU_UVD_STACK_SIZE (200*1024) -#define AMDGPU_UVD_HEAP_SIZE (256*1024) -#define AMDGPU_UVD_SESSION_SIZE (50*1024) -#define AMDGPU_UVD_FIRMWARE_OFFSET 256 - -struct amdgpu_uvd { - struct amdgpu_bo *vcpu_bo; - void *cpu_addr; - uint64_t gpu_addr; - unsigned fw_version; - void *saved_bo; - unsigned max_handles; - atomic_t handles[AMDGPU_MAX_UVD_HANDLES]; - struct drm_file *filp[AMDGPU_MAX_UVD_HANDLES]; - struct delayed_work idle_work; - const struct firmware *fw; /* UVD firmware */ - struct amdgpu_ring ring; - struct amdgpu_irq_src irq; - bool address_64_bit; - bool use_ctx_buf; - struct amd_sched_entity entity; - uint32_t srbm_soft_reset; -}; - -/* - * VCE - */ -#define AMDGPU_MAX_VCE_HANDLES 16 -#define AMDGPU_VCE_FIRMWARE_OFFSET 256 - -#define AMDGPU_VCE_HARVEST_VCE0 (1 << 0) -#define AMDGPU_VCE_HARVEST_VCE1 (1 << 1) - -struct amdgpu_vce { - struct amdgpu_bo *vcpu_bo; - uint64_t gpu_addr; - unsigned fw_version; - unsigned fb_version; - atomic_t handles[AMDGPU_MAX_VCE_HANDLES]; - struct drm_file *filp[AMDGPU_MAX_VCE_HANDLES]; - uint32_t img_size[AMDGPU_MAX_VCE_HANDLES]; - struct delayed_work idle_work; - struct mutex idle_mutex; - const struct firmware *fw; /* VCE firmware */ - struct amdgpu_ring ring[AMDGPU_MAX_VCE_RINGS]; - struct amdgpu_irq_src irq; - unsigned harvest_config; - struct amd_sched_entity entity; - uint32_t srbm_soft_reset; - unsigned num_rings; -}; - -/* * SDMA */ struct amdgpu_sdma_instance { @@ -1095,11 +1200,22 @@ struct amdgpu_sdma { /* * Firmware */ +enum amdgpu_firmware_load_type { + AMDGPU_FW_LOAD_DIRECT = 0, + AMDGPU_FW_LOAD_SMU, + AMDGPU_FW_LOAD_PSP, +}; + struct amdgpu_firmware { struct amdgpu_firmware_info ucode[AMDGPU_UCODE_ID_MAXIMUM]; - bool smu_load; + enum amdgpu_firmware_load_type load_type; struct amdgpu_bo *fw_buf; unsigned int fw_size; + unsigned int max_ucodes; + /* firmwares are loaded by psp instead of smu from vega10 */ + const struct amdgpu_psp_funcs *funcs; + struct amdgpu_bo *rbuf; + struct mutex mutex; }; /* @@ -1112,10 +1228,6 @@ void amdgpu_benchmark(struct amdgpu_device *adev, int test_number); * Testing */ void amdgpu_test_moves(struct amdgpu_device *adev); -void amdgpu_test_ring_sync(struct amdgpu_device *adev, - struct amdgpu_ring *cpA, - struct amdgpu_ring *cpB); -void amdgpu_test_syncing(struct amdgpu_device *adev); /* * MMU Notifier @@ -1202,6 +1314,8 @@ struct amdgpu_asic_funcs { /* static power management */ int (*get_pcie_lanes)(struct amdgpu_device *adev); void (*set_pcie_lanes)(struct amdgpu_device *adev, int lanes); + /* get config memsize register */ + u32 (*get_config_memsize)(struct amdgpu_device *adev); }; /* @@ -1342,9 +1456,11 @@ struct amdgpu_device { bool have_disp_power_ref; /* BIOS */ + bool is_atom_fw; uint8_t *bios; uint32_t bios_size; struct amdgpu_bo *stollen_vga_memory; + uint32_t bios_scratch_reg_offset; uint32_t bios_scratch[AMDGPU_BIOS_NUM_SCRATCH]; /* Register/doorbell mmio */ @@ -1391,6 +1507,7 @@ struct amdgpu_device { struct amdgpu_gart gart; struct amdgpu_dummy_page dummy_page; struct amdgpu_vm_manager vm_manager; + struct amdgpu_vmhub vmhub[AMDGPU_MAX_VMHUBS]; /* memory management */ struct amdgpu_mman mman; @@ -1457,6 +1574,9 @@ struct amdgpu_device { /* firmwares */ struct amdgpu_firmware firmware; + /* PSP */ + struct psp_context psp; + /* GDS */ struct amdgpu_gds gds; @@ -1501,23 +1621,32 @@ void amdgpu_device_fini(struct amdgpu_device *adev); int amdgpu_gpu_wait_for_idle(struct amdgpu_device *adev); uint32_t amdgpu_mm_rreg(struct amdgpu_device *adev, uint32_t reg, - bool always_indirect); + uint32_t acc_flags); void amdgpu_mm_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v, - bool always_indirect); + uint32_t acc_flags); u32 amdgpu_io_rreg(struct amdgpu_device *adev, u32 reg); void amdgpu_io_wreg(struct amdgpu_device *adev, u32 reg, u32 v); u32 amdgpu_mm_rdoorbell(struct amdgpu_device *adev, u32 index); void amdgpu_mm_wdoorbell(struct amdgpu_device *adev, u32 index, u32 v); +u64 amdgpu_mm_rdoorbell64(struct amdgpu_device *adev, u32 index); +void amdgpu_mm_wdoorbell64(struct amdgpu_device *adev, u32 index, u64 v); /* * Registers read & write functions. */ -#define RREG32(reg) amdgpu_mm_rreg(adev, (reg), false) -#define RREG32_IDX(reg) amdgpu_mm_rreg(adev, (reg), true) -#define DREG32(reg) printk(KERN_INFO "REGISTER: " #reg " : 0x%08X\n", amdgpu_mm_rreg(adev, (reg), false)) -#define WREG32(reg, v) amdgpu_mm_wreg(adev, (reg), (v), false) -#define WREG32_IDX(reg, v) amdgpu_mm_wreg(adev, (reg), (v), true) + +#define AMDGPU_REGS_IDX (1<<0) +#define AMDGPU_REGS_NO_KIQ (1<<1) + +#define RREG32_NO_KIQ(reg) amdgpu_mm_rreg(adev, (reg), AMDGPU_REGS_NO_KIQ) +#define WREG32_NO_KIQ(reg, v) amdgpu_mm_wreg(adev, (reg), (v), AMDGPU_REGS_NO_KIQ) + +#define RREG32(reg) amdgpu_mm_rreg(adev, (reg), 0) +#define RREG32_IDX(reg) amdgpu_mm_rreg(adev, (reg), AMDGPU_REGS_IDX) +#define DREG32(reg) printk(KERN_INFO "REGISTER: " #reg " : 0x%08X\n", amdgpu_mm_rreg(adev, (reg), 0)) +#define WREG32(reg, v) amdgpu_mm_wreg(adev, (reg), (v), 0) +#define WREG32_IDX(reg, v) amdgpu_mm_wreg(adev, (reg), (v), AMDGPU_REGS_IDX) #define REG_SET(FIELD, v) (((v) << FIELD##_SHIFT) & FIELD##_MASK) #define REG_GET(FIELD, v) (((v) << FIELD##_SHIFT) & FIELD##_MASK) #define RREG32_PCIE(reg) adev->pcie_rreg(adev, (reg)) @@ -1556,6 +1685,8 @@ void amdgpu_mm_wdoorbell(struct amdgpu_device *adev, u32 index, u32 v); #define RDOORBELL32(index) amdgpu_mm_rdoorbell(adev, (index)) #define WDOORBELL32(index, v) amdgpu_mm_wdoorbell(adev, (index), (v)) +#define RDOORBELL64(index) amdgpu_mm_rdoorbell64(adev, (index)) +#define WDOORBELL64(index, v) amdgpu_mm_wdoorbell64(adev, (index), (v)) #define REG_FIELD_SHIFT(reg, field) reg##__##field##__SHIFT #define REG_FIELD_MASK(reg, field) reg##__##field##_MASK @@ -1570,6 +1701,12 @@ void amdgpu_mm_wdoorbell(struct amdgpu_device *adev, u32 index, u32 v); #define WREG32_FIELD(reg, field, val) \ WREG32(mm##reg, (RREG32(mm##reg) & ~REG_FIELD_MASK(reg, field)) | (val) << REG_FIELD_SHIFT(reg, field)) +#define WREG32_FIELD_OFFSET(reg, offset, field, val) \ + WREG32(mm##reg + offset, (RREG32(mm##reg + offset) & ~REG_FIELD_MASK(reg, field)) | (val) << REG_FIELD_SHIFT(reg, field)) + +#define WREG32_FIELD15(ip, idx, reg, field, val) \ + WREG32(SOC15_REG_OFFSET(ip, idx, mm##reg), (RREG32(SOC15_REG_OFFSET(ip, idx, mm##reg)) & ~REG_FIELD_MASK(reg, field)) | (val) << REG_FIELD_SHIFT(reg, field)) + /* * BIOS helpers. */ @@ -1584,7 +1721,7 @@ static inline void amdgpu_ring_write(struct amdgpu_ring *ring, uint32_t v) { if (ring->count_dw <= 0) DRM_ERROR("amdgpu: writing more dwords to the ring than expected!\n"); - ring->ring[ring->wptr++] = v; + ring->ring[ring->wptr++ & ring->buf_mask] = v; ring->wptr &= ring->ptr_mask; ring->count_dw--; } @@ -1597,9 +1734,9 @@ static inline void amdgpu_ring_write_multiple(struct amdgpu_ring *ring, void *sr if (ring->count_dw < count_dw) { DRM_ERROR("amdgpu: writing more dwords to the ring than expected!\n"); } else { - occupied = ring->wptr & ring->ptr_mask; + occupied = ring->wptr & ring->buf_mask; dst = (void *)&ring->ring[occupied]; - chunk1 = ring->ptr_mask + 1 - occupied; + chunk1 = ring->buf_mask + 1 - occupied; chunk1 = (chunk1 >= count_dw) ? count_dw: chunk1; chunk2 = count_dw - chunk1; chunk1 <<= 2; @@ -1650,11 +1787,13 @@ amdgpu_get_sdma_instance(struct amdgpu_ring *ring) #define amdgpu_asic_read_disabled_bios(adev) (adev)->asic_funcs->read_disabled_bios((adev)) #define amdgpu_asic_read_bios_from_rom(adev, b, l) (adev)->asic_funcs->read_bios_from_rom((adev), (b), (l)) #define amdgpu_asic_read_register(adev, se, sh, offset, v)((adev)->asic_funcs->read_register((adev), (se), (sh), (offset), (v))) +#define amdgpu_asic_get_config_memsize(adev) (adev)->asic_funcs->get_config_memsize((adev)) #define amdgpu_gart_flush_gpu_tlb(adev, vmid) (adev)->gart.gart_funcs->flush_gpu_tlb((adev), (vmid)) #define amdgpu_gart_set_pte_pde(adev, pt, idx, addr, flags) (adev)->gart.gart_funcs->set_pte_pde((adev), (pt), (idx), (addr), (flags)) #define amdgpu_vm_copy_pte(adev, ib, pe, src, count) ((adev)->vm_manager.vm_pte_funcs->copy_pte((ib), (pe), (src), (count))) #define amdgpu_vm_write_pte(adev, ib, pe, value, count, incr) ((adev)->vm_manager.vm_pte_funcs->write_pte((ib), (pe), (value), (count), (incr))) #define amdgpu_vm_set_pte_pde(adev, ib, pe, addr, count, incr, flags) ((adev)->vm_manager.vm_pte_funcs->set_pte_pde((ib), (pe), (addr), (count), (incr), (flags))) +#define amdgpu_vm_get_pte_flags(adev, flags) (adev)->gart.gart_funcs->get_vm_pte_flags((adev),(flags)) #define amdgpu_ring_parse_cs(r, p, ib) ((r)->funcs->parse_cs((p), (ib))) #define amdgpu_ring_test_ring(r) (r)->funcs->test_ring((r)) #define amdgpu_ring_test_ib(r, t) (r)->funcs->test_ib((r), (t)) @@ -1698,6 +1837,7 @@ amdgpu_get_sdma_instance(struct amdgpu_ring *ring) #define amdgpu_gfx_get_gpu_clock_counter(adev) (adev)->gfx.funcs->get_gpu_clock_counter((adev)) #define amdgpu_gfx_select_se_sh(adev, se, sh, instance) (adev)->gfx.funcs->select_se_sh((adev), (se), (sh), (instance)) #define amdgpu_gds_switch(adev, r, v, d, w, a) (adev)->gds.funcs->patch_gds_switch((r), (v), (d), (w), (a)) +#define amdgpu_psp_check_fw_loading_status(adev, i) (adev)->firmware.funcs->check_fw_loading_status((adev), (i)) /* Common functions */ int amdgpu_gpu_reset(struct amdgpu_device *adev); @@ -1723,7 +1863,7 @@ bool amdgpu_ttm_tt_affect_userptr(struct ttm_tt *ttm, unsigned long start, bool amdgpu_ttm_tt_userptr_invalidated(struct ttm_tt *ttm, int *last_invalidated); bool amdgpu_ttm_tt_is_readonly(struct ttm_tt *ttm); -uint32_t amdgpu_ttm_tt_pte_flags(struct amdgpu_device *adev, struct ttm_tt *ttm, +uint64_t amdgpu_ttm_tt_pte_flags(struct amdgpu_device *adev, struct ttm_tt *ttm, struct ttm_mem_reg *mem); void amdgpu_vram_location(struct amdgpu_device *adev, struct amdgpu_mc *mc, u64 base); void amdgpu_gtt_location(struct amdgpu_device *adev, struct amdgpu_mc *mc); @@ -1742,12 +1882,14 @@ void amdgpu_unregister_atpx_handler(void); bool amdgpu_has_atpx_dgpu_power_cntl(void); bool amdgpu_is_atpx_hybrid(void); bool amdgpu_atpx_dgpu_req_power_for_displays(void); +bool amdgpu_has_atpx(void); #else static inline void amdgpu_register_atpx_handler(void) {} static inline void amdgpu_unregister_atpx_handler(void) {} static inline bool amdgpu_has_atpx_dgpu_power_cntl(void) { return false; } static inline bool amdgpu_is_atpx_hybrid(void) { return false; } static inline bool amdgpu_atpx_dgpu_req_power_for_displays(void) { return false; } +static inline bool amdgpu_has_atpx(void) { return false; } #endif /* @@ -1762,8 +1904,6 @@ void amdgpu_driver_lastclose_kms(struct drm_device *dev); int amdgpu_driver_open_kms(struct drm_device *dev, struct drm_file *file_priv); void amdgpu_driver_postclose_kms(struct drm_device *dev, struct drm_file *file_priv); -void amdgpu_driver_preclose_kms(struct drm_device *dev, - struct drm_file *file_priv); int amdgpu_suspend(struct amdgpu_device *adev); int amdgpu_device_suspend(struct drm_device *dev, bool suspend, bool fbcon); int amdgpu_device_resume(struct drm_device *dev, bool resume, bool fbcon); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_afmt.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_afmt.c index 857ba0897159..3889486f71fe 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_afmt.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_afmt.c @@ -74,9 +74,9 @@ static void amdgpu_afmt_calc_cts(uint32_t clock, int *CTS, int *N, int freq) /* Check that we are in spec (not always possible) */ if (n < (128*freq/1500)) - printk(KERN_WARNING "Calculated ACR N value is too small. You may experience audio problems.\n"); + pr_warn("Calculated ACR N value is too small. You may experience audio problems.\n"); if (n > (128*freq/300)) - printk(KERN_WARNING "Calculated ACR N value is too large. You may experience audio problems.\n"); + pr_warn("Calculated ACR N value is too large. You may experience audio problems.\n"); *N = n; *CTS = cts; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.c index 56a86dd5789e..ad4329922f79 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.c @@ -754,6 +754,35 @@ union igp_info { struct _ATOM_INTEGRATED_SYSTEM_INFO_V1_9 info_9; }; +/* + * Return vram width from integrated system info table, if available, + * or 0 if not. + */ +int amdgpu_atombios_get_vram_width(struct amdgpu_device *adev) +{ + struct amdgpu_mode_info *mode_info = &adev->mode_info; + int index = GetIndexIntoMasterTable(DATA, IntegratedSystemInfo); + u16 data_offset, size; + union igp_info *igp_info; + u8 frev, crev; + + /* get any igp specific overrides */ + if (amdgpu_atom_parse_data_header(mode_info->atom_context, index, &size, + &frev, &crev, &data_offset)) { + igp_info = (union igp_info *) + (mode_info->atom_context->bios + data_offset); + switch (crev) { + case 8: + case 9: + return igp_info->info_8.ucUMAChannelNumber * 64; + default: + return 0; + } + } + + return 0; +} + static void amdgpu_atombios_get_igp_ss_overrides(struct amdgpu_device *adev, struct amdgpu_atom_ss *ss, int id) @@ -1748,3 +1777,31 @@ void amdgpu_atombios_copy_swap(u8 *dst, u8 *src, u8 num_bytes, bool to_le) memcpy(dst, src, num_bytes); #endif } + +int amdgpu_atombios_allocate_fb_scratch(struct amdgpu_device *adev) +{ + struct atom_context *ctx = adev->mode_info.atom_context; + int index = GetIndexIntoMasterTable(DATA, VRAM_UsageByFirmware); + uint16_t data_offset; + int usage_bytes = 0; + struct _ATOM_VRAM_USAGE_BY_FIRMWARE *firmware_usage; + + if (amdgpu_atom_parse_data_header(ctx, index, NULL, NULL, NULL, &data_offset)) { + firmware_usage = (struct _ATOM_VRAM_USAGE_BY_FIRMWARE *)(ctx->bios + data_offset); + + DRM_DEBUG("atom firmware requested %08x %dkb\n", + le32_to_cpu(firmware_usage->asFirmwareVramReserveInfo[0].ulStartAddrUsedByFirmware), + le16_to_cpu(firmware_usage->asFirmwareVramReserveInfo[0].usFirmwareUseInKb)); + + usage_bytes = le16_to_cpu(firmware_usage->asFirmwareVramReserveInfo[0].usFirmwareUseInKb) * 1024; + } + ctx->scratch_size_bytes = 0; + if (usage_bytes == 0) + usage_bytes = 20 * 1024; + /* allocate some scratch memory */ + ctx->scratch = kzalloc(usage_bytes, GFP_KERNEL); + if (!ctx->scratch) + return -ENOMEM; + ctx->scratch_size_bytes = usage_bytes; + return 0; +} diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.h index 70e9acef5d9c..38d0fe32e5cd 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.h @@ -148,6 +148,8 @@ int amdgpu_atombios_get_clock_info(struct amdgpu_device *adev); int amdgpu_atombios_get_gfx_info(struct amdgpu_device *adev); +int amdgpu_atombios_get_vram_width(struct amdgpu_device *adev); + bool amdgpu_atombios_get_asic_ss_info(struct amdgpu_device *adev, struct amdgpu_atom_ss *ss, int id, u32 clock); @@ -215,4 +217,7 @@ int amdgpu_atombios_get_clock_dividers(struct amdgpu_device *adev, int amdgpu_atombios_get_svi2_info(struct amdgpu_device *adev, u8 voltage_type, u8 *svd_gpio_id, u8 *svc_gpio_id); + +int amdgpu_atombios_allocate_fb_scratch(struct amdgpu_device *adev); + #endif diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.c new file mode 100644 index 000000000000..4b9abd68e04f --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.c @@ -0,0 +1,112 @@ +/* + * Copyright 2016 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ +#include <drm/drmP.h> +#include <drm/amdgpu_drm.h> +#include "amdgpu.h" +#include "atomfirmware.h" +#include "amdgpu_atomfirmware.h" +#include "atom.h" + +#define get_index_into_master_table(master_table, table_name) (offsetof(struct master_table, table_name) / sizeof(uint16_t)) + +bool amdgpu_atomfirmware_gpu_supports_virtualization(struct amdgpu_device *adev) +{ + int index = get_index_into_master_table(atom_master_list_of_data_tables_v2_1, + firmwareinfo); + uint16_t data_offset; + + if (amdgpu_atom_parse_data_header(adev->mode_info.atom_context, index, NULL, + NULL, NULL, &data_offset)) { + struct atom_firmware_info_v3_1 *firmware_info = + (struct atom_firmware_info_v3_1 *)(adev->mode_info.atom_context->bios + + data_offset); + + if (le32_to_cpu(firmware_info->firmware_capability) & + ATOM_FIRMWARE_CAP_GPU_VIRTUALIZATION) + return true; + } + return false; +} + +void amdgpu_atomfirmware_scratch_regs_init(struct amdgpu_device *adev) +{ + int index = get_index_into_master_table(atom_master_list_of_data_tables_v2_1, + firmwareinfo); + uint16_t data_offset; + + if (amdgpu_atom_parse_data_header(adev->mode_info.atom_context, index, NULL, + NULL, NULL, &data_offset)) { + struct atom_firmware_info_v3_1 *firmware_info = + (struct atom_firmware_info_v3_1 *)(adev->mode_info.atom_context->bios + + data_offset); + + adev->bios_scratch_reg_offset = + le32_to_cpu(firmware_info->bios_scratch_reg_startaddr); + } +} + +void amdgpu_atomfirmware_scratch_regs_save(struct amdgpu_device *adev) +{ + int i; + + for (i = 0; i < AMDGPU_BIOS_NUM_SCRATCH; i++) + adev->bios_scratch[i] = RREG32(adev->bios_scratch_reg_offset + i); +} + +void amdgpu_atomfirmware_scratch_regs_restore(struct amdgpu_device *adev) +{ + int i; + + for (i = 0; i < AMDGPU_BIOS_NUM_SCRATCH; i++) + WREG32(adev->bios_scratch_reg_offset + i, adev->bios_scratch[i]); +} + +int amdgpu_atomfirmware_allocate_fb_scratch(struct amdgpu_device *adev) +{ + struct atom_context *ctx = adev->mode_info.atom_context; + int index = get_index_into_master_table(atom_master_list_of_data_tables_v2_1, + vram_usagebyfirmware); + uint16_t data_offset; + int usage_bytes = 0; + + if (amdgpu_atom_parse_data_header(ctx, index, NULL, NULL, NULL, &data_offset)) { + struct vram_usagebyfirmware_v2_1 *firmware_usage = + (struct vram_usagebyfirmware_v2_1 *)(ctx->bios + data_offset); + + DRM_DEBUG("atom firmware requested %08x %dkb fw %dkb drv\n", + le32_to_cpu(firmware_usage->start_address_in_kb), + le16_to_cpu(firmware_usage->used_by_firmware_in_kb), + le16_to_cpu(firmware_usage->used_by_driver_in_kb)); + + usage_bytes = le16_to_cpu(firmware_usage->used_by_driver_in_kb) * 1024; + } + ctx->scratch_size_bytes = 0; + if (usage_bytes == 0) + usage_bytes = 20 * 1024; + /* allocate some scratch memory */ + ctx->scratch = kzalloc(usage_bytes, GFP_KERNEL); + if (!ctx->scratch) + return -ENOMEM; + ctx->scratch_size_bytes = usage_bytes; + return 0; +} diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.h new file mode 100644 index 000000000000..d0c4dcd7fa96 --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.h @@ -0,0 +1,33 @@ +/* + * Copyright 2014 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#ifndef __AMDGPU_ATOMFIRMWARE_H__ +#define __AMDGPU_ATOMFIRMWARE_H__ + +bool amdgpu_atomfirmware_gpu_supports_virtualization(struct amdgpu_device *adev); +void amdgpu_atomfirmware_scratch_regs_init(struct amdgpu_device *adev); +void amdgpu_atomfirmware_scratch_regs_save(struct amdgpu_device *adev); +void amdgpu_atomfirmware_scratch_regs_restore(struct amdgpu_device *adev); +int amdgpu_atomfirmware_allocate_fb_scratch(struct amdgpu_device *adev); + +#endif diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_atpx_handler.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_atpx_handler.c index 6c343a933182..c13c51af0b68 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_atpx_handler.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_atpx_handler.c @@ -583,8 +583,8 @@ static bool amdgpu_atpx_detect(void) if (has_atpx && vga_count == 2) { acpi_get_name(amdgpu_atpx_priv.atpx.handle, ACPI_FULL_PATHNAME, &buffer); - printk(KERN_INFO "vga_switcheroo: detected switching method %s handle\n", - acpi_method_name); + pr_info("vga_switcheroo: detected switching method %s handle\n", + acpi_method_name); amdgpu_atpx_priv.atpx_detected = true; amdgpu_atpx_priv.bridge_pm_usable = d3_supported; amdgpu_atpx_init(); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_bios.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_bios.c index 821f7cc2051f..365e735f6647 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_bios.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_bios.c @@ -86,6 +86,18 @@ static bool check_atom_bios(uint8_t *bios, size_t size) return false; } +static bool is_atom_fw(uint8_t *bios) +{ + uint16_t bios_header_start = bios[0x48] | (bios[0x49] << 8); + uint8_t frev = bios[bios_header_start + 2]; + uint8_t crev = bios[bios_header_start + 3]; + + if ((frev < 3) || + ((frev == 3) && (crev < 3))) + return false; + + return true; +} /* If you boot an IGP board with a discrete card as the primary, * the IGP rom is not accessible via the rom bar as the IGP rom is @@ -419,26 +431,30 @@ static inline bool amdgpu_acpi_vfct_bios(struct amdgpu_device *adev) bool amdgpu_get_bios(struct amdgpu_device *adev) { if (amdgpu_atrm_get_bios(adev)) - return true; + goto success; if (amdgpu_acpi_vfct_bios(adev)) - return true; + goto success; if (igp_read_bios_from_vram(adev)) - return true; + goto success; if (amdgpu_read_bios(adev)) - return true; + goto success; if (amdgpu_read_bios_from_rom(adev)) - return true; + goto success; if (amdgpu_read_disabled_bios(adev)) - return true; + goto success; if (amdgpu_read_platform_bios(adev)) - return true; + goto success; DRM_ERROR("Unable to locate a BIOS ROM\n"); return false; + +success: + adev->is_atom_fw = is_atom_fw(adev->bios); + return true; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c index 0218cea6be4d..a6649874e6ce 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c @@ -237,7 +237,7 @@ int amdgpu_bo_list_ioctl(struct drm_device *dev, void *data, struct amdgpu_fpriv *fpriv = filp->driver_priv; union drm_amdgpu_bo_list *args = data; uint32_t handle = args->in.list_handle; - const void __user *uptr = (const void*)(long)args->in.bo_info_ptr; + const void __user *uptr = (const void*)(uintptr_t)args->in.bo_info_ptr; struct drm_amdgpu_bo_list_entry *info; struct amdgpu_bo_list *list; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c index d9e5aa4a79ef..1c7e6c28f93a 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c @@ -571,7 +571,9 @@ static const struct amdgpu_irq_src_funcs cgs_irq_funcs = { .process = cgs_process_irq, }; -static int amdgpu_cgs_add_irq_source(struct cgs_device *cgs_device, unsigned src_id, +static int amdgpu_cgs_add_irq_source(void *cgs_device, + unsigned client_id, + unsigned src_id, unsigned num_types, cgs_irq_source_set_func_t set, cgs_irq_handler_func_t handler, @@ -597,7 +599,7 @@ static int amdgpu_cgs_add_irq_source(struct cgs_device *cgs_device, unsigned src irq_params->handler = handler; irq_params->private_data = private_data; source->data = (void *)irq_params; - ret = amdgpu_irq_add_id(adev, src_id, source); + ret = amdgpu_irq_add_id(adev, client_id, src_id, source); if (ret) { kfree(irq_params); kfree(source); @@ -606,16 +608,26 @@ static int amdgpu_cgs_add_irq_source(struct cgs_device *cgs_device, unsigned src return ret; } -static int amdgpu_cgs_irq_get(struct cgs_device *cgs_device, unsigned src_id, unsigned type) +static int amdgpu_cgs_irq_get(void *cgs_device, unsigned client_id, + unsigned src_id, unsigned type) { CGS_FUNC_ADEV; - return amdgpu_irq_get(adev, adev->irq.sources[src_id], type); + + if (!adev->irq.client[client_id].sources) + return -EINVAL; + + return amdgpu_irq_get(adev, adev->irq.client[client_id].sources[src_id], type); } -static int amdgpu_cgs_irq_put(struct cgs_device *cgs_device, unsigned src_id, unsigned type) +static int amdgpu_cgs_irq_put(void *cgs_device, unsigned client_id, + unsigned src_id, unsigned type) { CGS_FUNC_ADEV; - return amdgpu_irq_put(adev, adev->irq.sources[src_id], type); + + if (!adev->irq.client[client_id].sources) + return -EINVAL; + + return amdgpu_irq_put(adev, adev->irq.client[client_id].sources[src_id], type); } static int amdgpu_cgs_set_clockgating_state(struct cgs_device *cgs_device, @@ -825,9 +837,8 @@ static int amdgpu_cgs_get_firmware_info(struct cgs_device *cgs_device, uint32_t ucode_start_address; const uint8_t *src; const struct smc_firmware_header_v1_0 *hdr; - - if (CGS_UCODE_ID_SMU_SK == type) - amdgpu_cgs_rel_firmware(cgs_device, CGS_UCODE_ID_SMU); + const struct common_firmware_header *header; + struct amdgpu_firmware_info *ucode = NULL; if (!adev->pm.fw) { switch (adev->asic_type) { @@ -889,6 +900,9 @@ static int amdgpu_cgs_get_firmware_info(struct cgs_device *cgs_device, case CHIP_POLARIS12: strcpy(fw_name, "amdgpu/polaris12_smc.bin"); break; + case CHIP_VEGA10: + strcpy(fw_name, "amdgpu/vega10_smc.bin"); + break; default: DRM_ERROR("SMC firmware not supported\n"); return -EINVAL; @@ -907,6 +921,15 @@ static int amdgpu_cgs_get_firmware_info(struct cgs_device *cgs_device, adev->pm.fw = NULL; return err; } + + if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) { + ucode = &adev->firmware.ucode[AMDGPU_UCODE_ID_SMC]; + ucode->ucode_id = AMDGPU_UCODE_ID_SMC; + ucode->fw = adev->pm.fw; + header = (const struct common_firmware_header *)ucode->fw->data; + adev->firmware.fw_size += + ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE); + } } hdr = (const struct smc_firmware_header_v1_0 *) adev->pm.fw->data; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c index 99424cb8020b..ec71b9320561 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c @@ -82,6 +82,15 @@ int amdgpu_cs_get_ring(struct amdgpu_device *adev, u32 ip_type, return -EINVAL; } break; + case AMDGPU_HW_IP_UVD_ENC: + if (ring < adev->uvd.num_enc_rings){ + *out_ring = &adev->uvd.ring_enc[ring]; + } else { + DRM_ERROR("only %d UVD ENC rings are supported\n", + adev->uvd.num_enc_rings); + return -EINVAL; + } + break; } if (!(*out_ring && (*out_ring)->adev)) { @@ -152,7 +161,7 @@ int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, void *data) } /* get chunks */ - chunk_array_user = (uint64_t __user *)(unsigned long)(cs->in.chunks); + chunk_array_user = (uint64_t __user *)(uintptr_t)(cs->in.chunks); if (copy_from_user(chunk_array, chunk_array_user, sizeof(uint64_t)*cs->in.num_chunks)) { ret = -EFAULT; @@ -172,7 +181,7 @@ int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, void *data) struct drm_amdgpu_cs_chunk user_chunk; uint32_t __user *cdata; - chunk_ptr = (void __user *)(unsigned long)chunk_array[i]; + chunk_ptr = (void __user *)(uintptr_t)chunk_array[i]; if (copy_from_user(&user_chunk, chunk_ptr, sizeof(struct drm_amdgpu_cs_chunk))) { ret = -EFAULT; @@ -183,7 +192,7 @@ int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, void *data) p->chunks[i].length_dw = user_chunk.length_dw; size = p->chunks[i].length_dw; - cdata = (void __user *)(unsigned long)user_chunk.chunk_data; + cdata = (void __user *)(uintptr_t)user_chunk.chunk_data; p->chunks[i].kdata = drm_malloc_ab(size, sizeof(uint32_t)); if (p->chunks[i].kdata == NULL) { @@ -759,23 +768,33 @@ static void amdgpu_cs_parser_fini(struct amdgpu_cs_parser *parser, int error, bo amdgpu_bo_unref(&parser->uf_entry.robj); } -static int amdgpu_bo_vm_update_pte(struct amdgpu_cs_parser *p, - struct amdgpu_vm *vm) +static int amdgpu_bo_vm_update_pte(struct amdgpu_cs_parser *p) { struct amdgpu_device *adev = p->adev; + struct amdgpu_fpriv *fpriv = p->filp->driver_priv; + struct amdgpu_vm *vm = &fpriv->vm; struct amdgpu_bo_va *bo_va; struct amdgpu_bo *bo; int i, r; - r = amdgpu_vm_update_page_directory(adev, vm); + r = amdgpu_vm_update_directories(adev, vm); if (r) return r; - r = amdgpu_sync_fence(adev, &p->job->sync, vm->page_directory_fence); + r = amdgpu_sync_fence(adev, &p->job->sync, vm->last_dir_update); if (r) return r; - r = amdgpu_vm_clear_freed(adev, vm); + r = amdgpu_vm_clear_freed(adev, vm, NULL); + if (r) + return r; + + r = amdgpu_vm_bo_update(adev, fpriv->prt_va, false); + if (r) + return r; + + r = amdgpu_sync_fence(adev, &p->job->sync, + fpriv->prt_va->last_pt_update); if (r) return r; @@ -853,9 +872,9 @@ static int amdgpu_cs_ib_vm_chunk(struct amdgpu_device *adev, } if (p->job->vm) { - p->job->vm_pd_addr = amdgpu_bo_gpu_offset(vm->page_directory); + p->job->vm_pd_addr = amdgpu_bo_gpu_offset(vm->root.bo); - r = amdgpu_bo_vm_update_pte(p, vm); + r = amdgpu_bo_vm_update_pte(p); if (r) return r; } @@ -869,7 +888,7 @@ static int amdgpu_cs_ib_fill(struct amdgpu_device *adev, struct amdgpu_fpriv *fpriv = parser->filp->driver_priv; struct amdgpu_vm *vm = &fpriv->vm; int i, j; - int r; + int r, ce_preempt = 0, de_preempt = 0; for (i = 0, j = 0; i < parser->nchunks && j < parser->job->num_ibs; i++) { struct amdgpu_cs_chunk *chunk; @@ -884,13 +903,26 @@ static int amdgpu_cs_ib_fill(struct amdgpu_device *adev, if (chunk->chunk_id != AMDGPU_CHUNK_ID_IB) continue; + if (chunk_ib->ip_type == AMDGPU_HW_IP_GFX && amdgpu_sriov_vf(adev)) { + if (chunk_ib->flags & AMDGPU_IB_FLAG_PREEMPT) { + if (chunk_ib->flags & AMDGPU_IB_FLAG_CE) + ce_preempt++; + else + de_preempt++; + } + + /* each GFX command submit allows 0 or 1 IB preemptible for CE & DE */ + if (ce_preempt > 1 || de_preempt > 1) + return -EINVAL; + } + r = amdgpu_cs_get_ring(adev, chunk_ib->ip_type, chunk_ib->ip_instance, chunk_ib->ring, &ring); if (r) return r; - if (ib->flags & AMDGPU_IB_FLAG_PREAMBLE) { + if (chunk_ib->flags & AMDGPU_IB_FLAG_PREAMBLE) { parser->job->preamble_status |= AMDGPU_PREAMBLE_IB_PRESENT; if (!parser->ctx->preamble_presented) { parser->job->preamble_status |= AMDGPU_PREAMBLE_IB_PRESENT_FIRST; @@ -917,7 +949,7 @@ static int amdgpu_cs_ib_fill(struct amdgpu_device *adev, } if ((chunk_ib->va_start + chunk_ib->ib_bytes) > - (m->it.last + 1) * AMDGPU_GPU_PAGE_SIZE) { + (m->last + 1) * AMDGPU_GPU_PAGE_SIZE) { DRM_ERROR("IB va_start+ib_bytes is invalid\n"); return -EINVAL; } @@ -928,7 +960,7 @@ static int amdgpu_cs_ib_fill(struct amdgpu_device *adev, return r; } - offset = ((uint64_t)m->it.start) * AMDGPU_GPU_PAGE_SIZE; + offset = m->start * AMDGPU_GPU_PAGE_SIZE; kptr += chunk_ib->va_start - offset; r = amdgpu_ib_get(adev, vm, chunk_ib->ib_bytes, ib); @@ -1210,6 +1242,7 @@ static int amdgpu_cs_wait_all_fences(struct amdgpu_device *adev, continue; r = dma_fence_wait_timeout(fence, true, timeout); + dma_fence_put(fence); if (r < 0) return r; @@ -1307,7 +1340,7 @@ int amdgpu_cs_wait_fences_ioctl(struct drm_device *dev, void *data, if (fences == NULL) return -ENOMEM; - fences_user = (void __user *)(unsigned long)(wait->in.fences); + fences_user = (void __user *)(uintptr_t)(wait->in.fences); if (copy_from_user(fences, fences_user, sizeof(struct drm_amdgpu_fence) * fence_count)) { r = -EFAULT; @@ -1356,8 +1389,8 @@ amdgpu_cs_find_mapping(struct amdgpu_cs_parser *parser, continue; list_for_each_entry(mapping, &lobj->bo_va->valids, list) { - if (mapping->it.start > addr || - addr > mapping->it.last) + if (mapping->start > addr || + addr > mapping->last) continue; *bo = lobj->bo_va->bo; @@ -1365,8 +1398,8 @@ amdgpu_cs_find_mapping(struct amdgpu_cs_parser *parser, } list_for_each_entry(mapping, &lobj->bo_va->invalids, list) { - if (mapping->it.start > addr || - addr > mapping->it.last) + if (mapping->start > addr || + addr > mapping->last) continue; *bo = lobj->bo_va->bo; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index a3a105ec99e2..483660742f75 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -40,6 +40,7 @@ #include "amdgpu_i2c.h" #include "atom.h" #include "amdgpu_atombios.h" +#include "amdgpu_atomfirmware.h" #include "amd_pcie.h" #ifdef CONFIG_DRM_AMDGPU_SI #include "si.h" @@ -48,9 +49,11 @@ #include "cik.h" #endif #include "vi.h" +#include "soc15.h" #include "bif/bif_4_1_d.h" #include <linux/pci.h> #include <linux/firmware.h> +#include "amdgpu_pm.h" static int amdgpu_debugfs_regs_init(struct amdgpu_device *adev); static void amdgpu_debugfs_regs_cleanup(struct amdgpu_device *adev); @@ -74,6 +77,7 @@ static const char *amdgpu_asic_name[] = { "POLARIS10", "POLARIS11", "POLARIS12", + "VEGA10", "LAST", }; @@ -90,16 +94,16 @@ bool amdgpu_device_is_px(struct drm_device *dev) * MMIO register access helper functions. */ uint32_t amdgpu_mm_rreg(struct amdgpu_device *adev, uint32_t reg, - bool always_indirect) + uint32_t acc_flags) { uint32_t ret; - if (amdgpu_sriov_runtime(adev)) { + if (!(acc_flags & AMDGPU_REGS_NO_KIQ) && amdgpu_sriov_runtime(adev)) { BUG_ON(in_interrupt()); return amdgpu_virt_kiq_rreg(adev, reg); } - if ((reg * 4) < adev->rmmio_size && !always_indirect) + if ((reg * 4) < adev->rmmio_size && !(acc_flags & AMDGPU_REGS_IDX)) ret = readl(((void __iomem *)adev->rmmio) + (reg * 4)); else { unsigned long flags; @@ -114,16 +118,16 @@ uint32_t amdgpu_mm_rreg(struct amdgpu_device *adev, uint32_t reg, } void amdgpu_mm_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v, - bool always_indirect) + uint32_t acc_flags) { trace_amdgpu_mm_wreg(adev->pdev->device, reg, v); - if (amdgpu_sriov_runtime(adev)) { + if (!(acc_flags & AMDGPU_REGS_NO_KIQ) && amdgpu_sriov_runtime(adev)) { BUG_ON(in_interrupt()); return amdgpu_virt_kiq_wreg(adev, reg, v); } - if ((reg * 4) < adev->rmmio_size && !always_indirect) + if ((reg * 4) < adev->rmmio_size && !(acc_flags & AMDGPU_REGS_IDX)) writel(v, ((void __iomem *)adev->rmmio) + (reg * 4)); else { unsigned long flags; @@ -195,6 +199,44 @@ void amdgpu_mm_wdoorbell(struct amdgpu_device *adev, u32 index, u32 v) } /** + * amdgpu_mm_rdoorbell64 - read a doorbell Qword + * + * @adev: amdgpu_device pointer + * @index: doorbell index + * + * Returns the value in the doorbell aperture at the + * requested doorbell index (VEGA10+). + */ +u64 amdgpu_mm_rdoorbell64(struct amdgpu_device *adev, u32 index) +{ + if (index < adev->doorbell.num_doorbells) { + return atomic64_read((atomic64_t *)(adev->doorbell.ptr + index)); + } else { + DRM_ERROR("reading beyond doorbell aperture: 0x%08x!\n", index); + return 0; + } +} + +/** + * amdgpu_mm_wdoorbell64 - write a doorbell Qword + * + * @adev: amdgpu_device pointer + * @index: doorbell index + * @v: value to write + * + * Writes @v to the doorbell aperture at the + * requested doorbell index (VEGA10+). + */ +void amdgpu_mm_wdoorbell64(struct amdgpu_device *adev, u32 index, u64 v) +{ + if (index < adev->doorbell.num_doorbells) { + atomic64_set((atomic64_t *)(adev->doorbell.ptr + index), v); + } else { + DRM_ERROR("writing beyond doorbell aperture: 0x%08x!\n", index); + } +} + +/** * amdgpu_invalid_rreg - dummy reg read function * * @adev: amdgpu device pointer @@ -475,7 +517,7 @@ static int amdgpu_wb_init(struct amdgpu_device *adev) int r; if (adev->wb.wb_obj == NULL) { - r = amdgpu_bo_create_kernel(adev, AMDGPU_MAX_WB * 4, + r = amdgpu_bo_create_kernel(adev, AMDGPU_MAX_WB * sizeof(uint32_t), PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT, &adev->wb.wb_obj, &adev->wb.gpu_addr, (void **)&adev->wb.wb); @@ -488,7 +530,7 @@ static int amdgpu_wb_init(struct amdgpu_device *adev) memset(&adev->wb.used, 0, sizeof(adev->wb.used)); /* clear wb memory */ - memset((char *)adev->wb.wb, 0, AMDGPU_GPU_PAGE_SIZE); + memset((char *)adev->wb.wb, 0, AMDGPU_MAX_WB * sizeof(uint32_t)); } return 0; @@ -516,6 +558,29 @@ int amdgpu_wb_get(struct amdgpu_device *adev, u32 *wb) } /** + * amdgpu_wb_get_64bit - Allocate a wb entry + * + * @adev: amdgpu_device pointer + * @wb: wb index + * + * Allocate a wb slot for use by the driver (all asics). + * Returns 0 on success or -EINVAL on failure. + */ +int amdgpu_wb_get_64bit(struct amdgpu_device *adev, u32 *wb) +{ + unsigned long offset = bitmap_find_next_zero_area_off(adev->wb.used, + adev->wb.num_wb, 0, 2, 7, 0); + if ((offset + 1) < adev->wb.num_wb) { + __set_bit(offset, adev->wb.used); + __set_bit(offset + 1, adev->wb.used); + *wb = offset; + return 0; + } else { + return -EINVAL; + } +} + +/** * amdgpu_wb_free - Free a wb entry * * @adev: amdgpu_device pointer @@ -530,6 +595,22 @@ void amdgpu_wb_free(struct amdgpu_device *adev, u32 wb) } /** + * amdgpu_wb_free_64bit - Free a wb entry + * + * @adev: amdgpu_device pointer + * @wb: wb index + * + * Free a wb slot allocated for use by the driver (all asics) + */ +void amdgpu_wb_free_64bit(struct amdgpu_device *adev, u32 wb) +{ + if ((wb + 1) < adev->wb.num_wb) { + __clear_bit(wb, adev->wb.used); + __clear_bit(wb + 1, adev->wb.used); + } +} + +/** * amdgpu_vram_location - try to find VRAM location * @adev: amdgpu device structure holding all necessary informations * @mc: memory controller structure holding memory informations @@ -602,7 +683,7 @@ void amdgpu_gtt_location(struct amdgpu_device *adev, struct amdgpu_mc *mc) dev_warn(adev->dev, "limiting GTT\n"); mc->gtt_size = size_bf; } - mc->gtt_start = (mc->vram_start & ~mc->gtt_base_align) - mc->gtt_size; + mc->gtt_start = 0; } else { if (mc->gtt_size > size_af) { dev_warn(adev->dev, "limiting GTT\n"); @@ -636,9 +717,9 @@ bool amdgpu_need_post(struct amdgpu_device *adev) return true; } /* then check MEM_SIZE, in case the crtcs are off */ - reg = RREG32(mmCONFIG_MEMSIZE); + reg = amdgpu_asic_get_config_memsize(adev); - if (reg) + if ((reg != 0) && (reg != 0xffffffff)) return false; return true; @@ -915,8 +996,13 @@ static int amdgpu_atombios_init(struct amdgpu_device *adev) } mutex_init(&adev->mode_info.atom_context->mutex); - amdgpu_atombios_scratch_regs_init(adev); - amdgpu_atom_allocate_fb_scratch(adev->mode_info.atom_context); + if (adev->is_atom_fw) { + amdgpu_atomfirmware_scratch_regs_init(adev); + amdgpu_atomfirmware_allocate_fb_scratch(adev); + } else { + amdgpu_atombios_scratch_regs_init(adev); + amdgpu_atombios_allocate_fb_scratch(adev); + } return 0; } @@ -954,6 +1040,62 @@ static bool amdgpu_check_pot_argument(int arg) return (arg & (arg - 1)) == 0; } +static void amdgpu_check_block_size(struct amdgpu_device *adev) +{ + /* defines number of bits in page table versus page directory, + * a page is 4KB so we have 12 bits offset, minimum 9 bits in the + * page table and the remaining bits are in the page directory */ + if (amdgpu_vm_block_size == -1) + return; + + if (amdgpu_vm_block_size < 9) { + dev_warn(adev->dev, "VM page table size (%d) too small\n", + amdgpu_vm_block_size); + goto def_value; + } + + if (amdgpu_vm_block_size > 24 || + (amdgpu_vm_size * 1024) < (1ull << amdgpu_vm_block_size)) { + dev_warn(adev->dev, "VM page table size (%d) too large\n", + amdgpu_vm_block_size); + goto def_value; + } + + return; + +def_value: + amdgpu_vm_block_size = -1; +} + +static void amdgpu_check_vm_size(struct amdgpu_device *adev) +{ + if (!amdgpu_check_pot_argument(amdgpu_vm_size)) { + dev_warn(adev->dev, "VM size (%d) must be a power of 2\n", + amdgpu_vm_size); + goto def_value; + } + + if (amdgpu_vm_size < 1) { + dev_warn(adev->dev, "VM size (%d) too small, min is 1GB\n", + amdgpu_vm_size); + goto def_value; + } + + /* + * Max GPUVM size for Cayman, SI, CI VI are 40 bits. + */ + if (amdgpu_vm_size > 1024) { + dev_warn(adev->dev, "VM size (%d) too large, max is 1TB\n", + amdgpu_vm_size); + goto def_value; + } + + return; + +def_value: + amdgpu_vm_size = -1; +} + /** * amdgpu_check_arguments - validate module params * @@ -983,54 +1125,9 @@ static void amdgpu_check_arguments(struct amdgpu_device *adev) } } - if (!amdgpu_check_pot_argument(amdgpu_vm_size)) { - dev_warn(adev->dev, "VM size (%d) must be a power of 2\n", - amdgpu_vm_size); - amdgpu_vm_size = 8; - } - - if (amdgpu_vm_size < 1) { - dev_warn(adev->dev, "VM size (%d) too small, min is 1GB\n", - amdgpu_vm_size); - amdgpu_vm_size = 8; - } - - /* - * Max GPUVM size for Cayman, SI and CI are 40 bits. - */ - if (amdgpu_vm_size > 1024) { - dev_warn(adev->dev, "VM size (%d) too large, max is 1TB\n", - amdgpu_vm_size); - amdgpu_vm_size = 8; - } - - /* defines number of bits in page table versus page directory, - * a page is 4KB so we have 12 bits offset, minimum 9 bits in the - * page table and the remaining bits are in the page directory */ - if (amdgpu_vm_block_size == -1) { - - /* Total bits covered by PD + PTs */ - unsigned bits = ilog2(amdgpu_vm_size) + 18; - - /* Make sure the PD is 4K in size up to 8GB address space. - Above that split equal between PD and PTs */ - if (amdgpu_vm_size <= 8) - amdgpu_vm_block_size = bits - 9; - else - amdgpu_vm_block_size = (bits + 3) / 2; + amdgpu_check_vm_size(adev); - } else if (amdgpu_vm_block_size < 9) { - dev_warn(adev->dev, "VM page table size (%d) too small\n", - amdgpu_vm_block_size); - amdgpu_vm_block_size = 9; - } - - if (amdgpu_vm_block_size > 24 || - (amdgpu_vm_size * 1024) < (1ull << amdgpu_vm_block_size)) { - dev_warn(adev->dev, "VM page table size (%d) too large\n", - amdgpu_vm_block_size); - amdgpu_vm_block_size = 9; - } + amdgpu_check_block_size(adev); if (amdgpu_vram_page_split != -1 && (amdgpu_vram_page_split < 16 || !amdgpu_check_pot_argument(amdgpu_vram_page_split))) { @@ -1059,7 +1156,7 @@ static void amdgpu_switcheroo_set_state(struct pci_dev *pdev, enum vga_switchero if (state == VGA_SWITCHEROO_ON) { unsigned d3_delay = dev->pdev->d3_delay; - printk(KERN_INFO "amdgpu: switched on\n"); + pr_info("amdgpu: switched on\n"); /* don't suspend or resume card normally */ dev->switch_power_state = DRM_SWITCH_POWER_CHANGING; @@ -1070,7 +1167,7 @@ static void amdgpu_switcheroo_set_state(struct pci_dev *pdev, enum vga_switchero dev->switch_power_state = DRM_SWITCH_POWER_ON; drm_kms_helper_poll_enable(dev); } else { - printk(KERN_INFO "amdgpu: switched off\n"); + pr_info("amdgpu: switched off\n"); drm_kms_helper_poll_disable(dev); dev->switch_power_state = DRM_SWITCH_POWER_CHANGING; amdgpu_device_suspend(dev, true, true); @@ -1114,13 +1211,15 @@ int amdgpu_set_clockgating_state(struct amdgpu_device *adev, for (i = 0; i < adev->num_ip_blocks; i++) { if (!adev->ip_blocks[i].status.valid) continue; - if (adev->ip_blocks[i].version->type == block_type) { - r = adev->ip_blocks[i].version->funcs->set_clockgating_state((void *)adev, - state); - if (r) - return r; - break; - } + if (adev->ip_blocks[i].version->type != block_type) + continue; + if (!adev->ip_blocks[i].version->funcs->set_clockgating_state) + continue; + r = adev->ip_blocks[i].version->funcs->set_clockgating_state( + (void *)adev, state); + if (r) + DRM_ERROR("set_clockgating_state of IP block <%s> failed %d\n", + adev->ip_blocks[i].version->funcs->name, r); } return r; } @@ -1134,13 +1233,15 @@ int amdgpu_set_powergating_state(struct amdgpu_device *adev, for (i = 0; i < adev->num_ip_blocks; i++) { if (!adev->ip_blocks[i].status.valid) continue; - if (adev->ip_blocks[i].version->type == block_type) { - r = adev->ip_blocks[i].version->funcs->set_powergating_state((void *)adev, - state); - if (r) - return r; - break; - } + if (adev->ip_blocks[i].version->type != block_type) + continue; + if (!adev->ip_blocks[i].version->funcs->set_powergating_state) + continue; + r = adev->ip_blocks[i].version->funcs->set_powergating_state( + (void *)adev, state); + if (r) + DRM_ERROR("set_powergating_state of IP block <%s> failed %d\n", + adev->ip_blocks[i].version->funcs->name, r); } return r; } @@ -1345,6 +1446,13 @@ static int amdgpu_early_init(struct amdgpu_device *adev) return r; break; #endif + case CHIP_VEGA10: + adev->family = AMDGPU_FAMILY_AI; + + r = soc15_set_ip_blocks(adev); + if (r) + return r; + break; default: /* FIXME: not supported yet */ return -EINVAL; @@ -1476,6 +1584,9 @@ static int amdgpu_late_init(struct amdgpu_device *adev) } } + amdgpu_dpm_enable_uvd(adev, false); + amdgpu_dpm_enable_vce(adev, false); + return 0; } @@ -1607,6 +1718,53 @@ int amdgpu_suspend(struct amdgpu_device *adev) return 0; } +static int amdgpu_sriov_reinit_early(struct amdgpu_device *adev) +{ + int i, r; + + for (i = 0; i < adev->num_ip_blocks; i++) { + if (!adev->ip_blocks[i].status.valid) + continue; + + if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON || + adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC || + adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH) + r = adev->ip_blocks[i].version->funcs->hw_init(adev); + + if (r) { + DRM_ERROR("resume of IP block <%s> failed %d\n", + adev->ip_blocks[i].version->funcs->name, r); + return r; + } + } + + return 0; +} + +static int amdgpu_sriov_reinit_late(struct amdgpu_device *adev) +{ + int i, r; + + for (i = 0; i < adev->num_ip_blocks; i++) { + if (!adev->ip_blocks[i].status.valid) + continue; + + if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON || + adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC || + adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH ) + continue; + + r = adev->ip_blocks[i].version->funcs->hw_init(adev); + if (r) { + DRM_ERROR("resume of IP block <%s> failed %d\n", + adev->ip_blocks[i].version->funcs->name, r); + return r; + } + } + + return 0; +} + static int amdgpu_resume(struct amdgpu_device *adev) { int i, r; @@ -1627,8 +1785,13 @@ static int amdgpu_resume(struct amdgpu_device *adev) static void amdgpu_device_detect_sriov_bios(struct amdgpu_device *adev) { - if (amdgpu_atombios_has_gpu_virtualization_table(adev)) - adev->virt.caps |= AMDGPU_SRIOV_CAPS_SRIOV_VBIOS; + if (adev->is_atom_fw) { + if (amdgpu_atomfirmware_gpu_supports_virtualization(adev)) + adev->virt.caps |= AMDGPU_SRIOV_CAPS_SRIOV_VBIOS; + } else { + if (amdgpu_atombios_has_gpu_virtualization_table(adev)) + adev->virt.caps |= AMDGPU_SRIOV_CAPS_SRIOV_VBIOS; + } } /** @@ -1693,6 +1856,7 @@ int amdgpu_device_init(struct amdgpu_device *adev, * can recall function without having locking issues */ mutex_init(&adev->vm_manager.lock); atomic_set(&adev->irq.ih.lock, 0); + mutex_init(&adev->firmware.mutex); mutex_init(&adev->pm.mutex); mutex_init(&adev->gfx.gpu_clock_mutex); mutex_init(&adev->srbm_mutex); @@ -1763,7 +1927,9 @@ int amdgpu_device_init(struct amdgpu_device *adev, runtime = true; if (amdgpu_device_is_px(ddev)) runtime = true; - vga_switcheroo_register_client(adev->pdev, &amdgpu_switcheroo_ops, runtime); + if (!pci_is_thunderbolt_attached(adev->pdev)) + vga_switcheroo_register_client(adev->pdev, + &amdgpu_switcheroo_ops, runtime); if (runtime) vga_switcheroo_init_domain_pm_ops(adev->dev, &adev->vga_pm_domain); @@ -1799,14 +1965,16 @@ int amdgpu_device_init(struct amdgpu_device *adev, DRM_INFO("GPU post is not needed\n"); } - /* Initialize clocks */ - r = amdgpu_atombios_get_clock_info(adev); - if (r) { - dev_err(adev->dev, "amdgpu_atombios_get_clock_info failed\n"); - goto failed; + if (!adev->is_atom_fw) { + /* Initialize clocks */ + r = amdgpu_atombios_get_clock_info(adev); + if (r) { + dev_err(adev->dev, "amdgpu_atombios_get_clock_info failed\n"); + return r; + } + /* init i2c buses */ + amdgpu_atombios_i2c_init(adev); } - /* init i2c buses */ - amdgpu_atombios_i2c_init(adev); /* Fence driver */ r = amdgpu_fence_driver_init(adev); @@ -1835,8 +2003,6 @@ int amdgpu_device_init(struct amdgpu_device *adev, /* Get a log2 for easy divisions. */ adev->mm_stats.log2_max_MBps = ilog2(max(1u, max_MBps)); - amdgpu_fbdev_init(adev); - r = amdgpu_ib_pool_init(adev); if (r) { dev_err(adev->dev, "IB initialization failed (%d).\n", r); @@ -1847,21 +2013,19 @@ int amdgpu_device_init(struct amdgpu_device *adev, if (r) DRM_ERROR("ib ring test failed (%d).\n", r); + amdgpu_fbdev_init(adev); + r = amdgpu_gem_debugfs_init(adev); - if (r) { + if (r) DRM_ERROR("registering gem debugfs failed (%d).\n", r); - } r = amdgpu_debugfs_regs_init(adev); - if (r) { + if (r) DRM_ERROR("registering register debugfs failed (%d).\n", r); - } r = amdgpu_debugfs_firmware_init(adev); - if (r) { + if (r) DRM_ERROR("registering firmware debugfs failed (%d).\n", r); - return r; - } if ((amdgpu_testing & 1)) { if (adev->accel_working) @@ -1869,12 +2033,6 @@ int amdgpu_device_init(struct amdgpu_device *adev, else DRM_INFO("amdgpu: acceleration disabled, skipping move tests\n"); } - if ((amdgpu_testing & 2)) { - if (adev->accel_working) - amdgpu_test_syncing(adev); - else - DRM_INFO("amdgpu: acceleration disabled, skipping sync tests\n"); - } if (amdgpu_benchmarking) { if (adev->accel_working) amdgpu_benchmark(adev, amdgpu_benchmarking); @@ -1926,7 +2084,8 @@ void amdgpu_device_fini(struct amdgpu_device *adev) amdgpu_atombios_fini(adev); kfree(adev->bios); adev->bios = NULL; - vga_switcheroo_unregister_client(adev->pdev); + if (!pci_is_thunderbolt_attached(adev->pdev)) + vga_switcheroo_unregister_client(adev->pdev); if (adev->flags & AMD_IS_PX) vga_switcheroo_fini_domain_pm_ops(adev->dev); vga_client_register(adev->pdev, NULL, NULL, NULL); @@ -2020,7 +2179,10 @@ int amdgpu_device_suspend(struct drm_device *dev, bool suspend, bool fbcon) */ amdgpu_bo_evict_vram(adev); - amdgpu_atombios_scratch_regs_save(adev); + if (adev->is_atom_fw) + amdgpu_atomfirmware_scratch_regs_save(adev); + else + amdgpu_atombios_scratch_regs_save(adev); pci_save_state(dev->pdev); if (suspend) { /* Shut down the device */ @@ -2072,7 +2234,10 @@ int amdgpu_device_resume(struct drm_device *dev, bool resume, bool fbcon) return r; } } - amdgpu_atombios_scratch_regs_restore(adev); + if (adev->is_atom_fw) + amdgpu_atomfirmware_scratch_regs_restore(adev); + else + amdgpu_atombios_scratch_regs_restore(adev); /* post card */ if (amdgpu_need_post(adev)) { @@ -2082,9 +2247,10 @@ int amdgpu_device_resume(struct drm_device *dev, bool resume, bool fbcon) } r = amdgpu_resume(adev); - if (r) + if (r) { DRM_ERROR("amdgpu_resume failed (%d).\n", r); - + return r; + } amdgpu_fence_driver_resume(adev); if (resume) { @@ -2286,6 +2452,117 @@ err: } /** + * amdgpu_sriov_gpu_reset - reset the asic + * + * @adev: amdgpu device pointer + * @voluntary: if this reset is requested by guest. + * (true means by guest and false means by HYPERVISOR ) + * + * Attempt the reset the GPU if it has hung (all asics). + * for SRIOV case. + * Returns 0 for success or an error on failure. + */ +int amdgpu_sriov_gpu_reset(struct amdgpu_device *adev, bool voluntary) +{ + int i, r = 0; + int resched; + struct amdgpu_bo *bo, *tmp; + struct amdgpu_ring *ring; + struct dma_fence *fence = NULL, *next = NULL; + + mutex_lock(&adev->virt.lock_reset); + atomic_inc(&adev->gpu_reset_counter); + adev->gfx.in_reset = true; + + /* block TTM */ + resched = ttm_bo_lock_delayed_workqueue(&adev->mman.bdev); + + /* block scheduler */ + for (i = 0; i < AMDGPU_MAX_RINGS; ++i) { + ring = adev->rings[i]; + + if (!ring || !ring->sched.thread) + continue; + + kthread_park(ring->sched.thread); + amd_sched_hw_job_reset(&ring->sched); + } + + /* after all hw jobs are reset, hw fence is meaningless, so force_completion */ + amdgpu_fence_driver_force_completion(adev); + + /* request to take full control of GPU before re-initialization */ + if (voluntary) + amdgpu_virt_reset_gpu(adev); + else + amdgpu_virt_request_full_gpu(adev, true); + + + /* Resume IP prior to SMC */ + amdgpu_sriov_reinit_early(adev); + + /* we need recover gart prior to run SMC/CP/SDMA resume */ + amdgpu_ttm_recover_gart(adev); + + /* now we are okay to resume SMC/CP/SDMA */ + amdgpu_sriov_reinit_late(adev); + + amdgpu_irq_gpu_reset_resume_helper(adev); + + if (amdgpu_ib_ring_tests(adev)) + dev_err(adev->dev, "[GPU_RESET] ib ring test failed (%d).\n", r); + + /* release full control of GPU after ib test */ + amdgpu_virt_release_full_gpu(adev, true); + + DRM_INFO("recover vram bo from shadow\n"); + + ring = adev->mman.buffer_funcs_ring; + mutex_lock(&adev->shadow_list_lock); + list_for_each_entry_safe(bo, tmp, &adev->shadow_list, shadow_list) { + amdgpu_recover_vram_from_shadow(adev, ring, bo, &next); + if (fence) { + r = dma_fence_wait(fence, false); + if (r) { + WARN(r, "recovery from shadow isn't completed\n"); + break; + } + } + + dma_fence_put(fence); + fence = next; + } + mutex_unlock(&adev->shadow_list_lock); + + if (fence) { + r = dma_fence_wait(fence, false); + if (r) + WARN(r, "recovery from shadow isn't completed\n"); + } + dma_fence_put(fence); + + for (i = 0; i < AMDGPU_MAX_RINGS; ++i) { + struct amdgpu_ring *ring = adev->rings[i]; + if (!ring || !ring->sched.thread) + continue; + + amd_sched_job_recovery(&ring->sched); + kthread_unpark(ring->sched.thread); + } + + drm_helper_resume_force_mode(adev->ddev); + ttm_bo_unlock_delayed_workqueue(&adev->mman.bdev, resched); + if (r) { + /* bad news, how to tell it to userspace ? */ + dev_info(adev->dev, "GPU reset failed\n"); + } + + adev->gfx.in_reset = false; + mutex_unlock(&adev->virt.lock_reset); + return r; +} + +/** * amdgpu_gpu_reset - reset the asic * * @adev: amdgpu device pointer @@ -2300,7 +2577,7 @@ int amdgpu_gpu_reset(struct amdgpu_device *adev) bool need_full_reset; if (amdgpu_sriov_vf(adev)) - return 0; + return amdgpu_sriov_gpu_reset(adev, true); if (!amdgpu_check_soft_reset(adev)) { DRM_INFO("No hardware hang detected. Did some blocks stall?\n"); @@ -2346,9 +2623,15 @@ retry: amdgpu_display_stop_mc_access(adev, &save); amdgpu_wait_for_idle(adev, AMD_IP_BLOCK_TYPE_GMC); } - amdgpu_atombios_scratch_regs_save(adev); + if (adev->is_atom_fw) + amdgpu_atomfirmware_scratch_regs_save(adev); + else + amdgpu_atombios_scratch_regs_save(adev); r = amdgpu_asic_reset(adev); - amdgpu_atombios_scratch_regs_restore(adev); + if (adev->is_atom_fw) + amdgpu_atomfirmware_scratch_regs_restore(adev); + else + amdgpu_atombios_scratch_regs_restore(adev); /* post card */ amdgpu_atom_asic_init(adev->mode_info.atom_context); @@ -2387,7 +2670,7 @@ retry: if (fence) { r = dma_fence_wait(fence, false); if (r) { - WARN(r, "recovery from shadow isn't comleted\n"); + WARN(r, "recovery from shadow isn't completed\n"); break; } } @@ -2399,7 +2682,7 @@ retry: if (fence) { r = dma_fence_wait(fence, false); if (r) - WARN(r, "recovery from shadow isn't comleted\n"); + WARN(r, "recovery from shadow isn't completed\n"); } dma_fence_put(fence); } @@ -2954,24 +3237,42 @@ static ssize_t amdgpu_debugfs_sensor_read(struct file *f, char __user *buf, size_t size, loff_t *pos) { struct amdgpu_device *adev = file_inode(f)->i_private; - int idx, r; - int32_t value; + int idx, x, outsize, r, valuesize; + uint32_t values[16]; + + if (size & 3 || *pos & 0x3) + return -EINVAL; - if (size != 4 || *pos & 0x3) + if (amdgpu_dpm == 0) return -EINVAL; /* convert offset to sensor number */ idx = *pos >> 2; + valuesize = sizeof(values); if (adev->powerplay.pp_funcs && adev->powerplay.pp_funcs->read_sensor) - r = adev->powerplay.pp_funcs->read_sensor(adev->powerplay.pp_handle, idx, &value); + r = adev->powerplay.pp_funcs->read_sensor(adev->powerplay.pp_handle, idx, &values[0], &valuesize); + else if (adev->pm.funcs && adev->pm.funcs->read_sensor) + r = adev->pm.funcs->read_sensor(adev, idx, &values[0], + &valuesize); else return -EINVAL; - if (!r) - r = put_user(value, (int32_t *)buf); + if (size > valuesize) + return -EINVAL; + + outsize = 0; + x = 0; + if (!r) { + while (size) { + r = put_user(values[x++], (int32_t *)buf); + buf += 4; + size -= 4; + outsize += 4; + } + } - return !r ? 4 : r; + return !r ? outsize : r; } static ssize_t amdgpu_debugfs_wave_read(struct file *f, char __user *buf, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c index 39fc388f222a..96926a221bd5 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c @@ -311,7 +311,8 @@ int amdgpu_crtc_page_flip_target(struct drm_crtc *crtc, struct drm_framebuffer *fb, struct drm_pending_vblank_event *event, uint32_t page_flip_flags, - uint32_t target) + uint32_t target, + struct drm_modeset_acquire_ctx *ctx) { struct amdgpu_bo *new_abo; struct amdgpu_flip_work *work; @@ -332,7 +333,8 @@ int amdgpu_crtc_page_flip_target(struct drm_crtc *crtc, return 0; } -int amdgpu_crtc_set_config(struct drm_mode_set *set) +int amdgpu_crtc_set_config(struct drm_mode_set *set, + struct drm_modeset_acquire_ctx *ctx) { struct drm_device *dev; struct amdgpu_device *adev; @@ -349,7 +351,7 @@ int amdgpu_crtc_set_config(struct drm_mode_set *set) if (ret < 0) return ret; - ret = drm_crtc_helper_set_config(set); + ret = drm_crtc_helper_set_config(set, ctx); list_for_each_entry(crtc, &dev->mode_config.crtc_list, head) if (crtc->enabled) @@ -612,6 +614,12 @@ amdgpu_user_framebuffer_create(struct drm_device *dev, return ERR_PTR(-ENOENT); } + /* Handle is imported dma-buf, so cannot be migrated to VRAM for scanout */ + if (obj->import_attach) { + DRM_DEBUG_KMS("Cannot create framebuffer from imported dma_buf\n"); + return ERR_PTR(-EINVAL); + } + amdgpu_fb = kzalloc(sizeof(*amdgpu_fb), GFP_KERNEL); if (amdgpu_fb == NULL) { drm_gem_object_unreference_unlocked(obj); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_dpm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_dpm.c index 6ca0333ca4c0..38e9b0d3659a 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_dpm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_dpm.c @@ -31,86 +31,88 @@ void amdgpu_dpm_print_class_info(u32 class, u32 class2) { - printk("\tui class: "); + const char *s; + switch (class & ATOM_PPLIB_CLASSIFICATION_UI_MASK) { case ATOM_PPLIB_CLASSIFICATION_UI_NONE: default: - printk("none\n"); + s = "none"; break; case ATOM_PPLIB_CLASSIFICATION_UI_BATTERY: - printk("battery\n"); + s = "battery"; break; case ATOM_PPLIB_CLASSIFICATION_UI_BALANCED: - printk("balanced\n"); + s = "balanced"; break; case ATOM_PPLIB_CLASSIFICATION_UI_PERFORMANCE: - printk("performance\n"); + s = "performance"; break; } - printk("\tinternal class: "); + printk("\tui class: %s\n", s); + printk("\tinternal class:"); if (((class & ~ATOM_PPLIB_CLASSIFICATION_UI_MASK) == 0) && (class2 == 0)) - printk("none"); + pr_cont(" none"); else { if (class & ATOM_PPLIB_CLASSIFICATION_BOOT) - printk("boot "); + pr_cont(" boot"); if (class & ATOM_PPLIB_CLASSIFICATION_THERMAL) - printk("thermal "); + pr_cont(" thermal"); if (class & ATOM_PPLIB_CLASSIFICATION_LIMITEDPOWERSOURCE) - printk("limited_pwr "); + pr_cont(" limited_pwr"); if (class & ATOM_PPLIB_CLASSIFICATION_REST) - printk("rest "); + pr_cont(" rest"); if (class & ATOM_PPLIB_CLASSIFICATION_FORCED) - printk("forced "); + pr_cont(" forced"); if (class & ATOM_PPLIB_CLASSIFICATION_3DPERFORMANCE) - printk("3d_perf "); + pr_cont(" 3d_perf"); if (class & ATOM_PPLIB_CLASSIFICATION_OVERDRIVETEMPLATE) - printk("ovrdrv "); + pr_cont(" ovrdrv"); if (class & ATOM_PPLIB_CLASSIFICATION_UVDSTATE) - printk("uvd "); + pr_cont(" uvd"); if (class & ATOM_PPLIB_CLASSIFICATION_3DLOW) - printk("3d_low "); + pr_cont(" 3d_low"); if (class & ATOM_PPLIB_CLASSIFICATION_ACPI) - printk("acpi "); + pr_cont(" acpi"); if (class & ATOM_PPLIB_CLASSIFICATION_HD2STATE) - printk("uvd_hd2 "); + pr_cont(" uvd_hd2"); if (class & ATOM_PPLIB_CLASSIFICATION_HDSTATE) - printk("uvd_hd "); + pr_cont(" uvd_hd"); if (class & ATOM_PPLIB_CLASSIFICATION_SDSTATE) - printk("uvd_sd "); + pr_cont(" uvd_sd"); if (class2 & ATOM_PPLIB_CLASSIFICATION2_LIMITEDPOWERSOURCE_2) - printk("limited_pwr2 "); + pr_cont(" limited_pwr2"); if (class2 & ATOM_PPLIB_CLASSIFICATION2_ULV) - printk("ulv "); + pr_cont(" ulv"); if (class2 & ATOM_PPLIB_CLASSIFICATION2_MVC) - printk("uvd_mvc "); + pr_cont(" uvd_mvc"); } - printk("\n"); + pr_cont("\n"); } void amdgpu_dpm_print_cap_info(u32 caps) { - printk("\tcaps: "); + printk("\tcaps:"); if (caps & ATOM_PPLIB_SINGLE_DISPLAY_ONLY) - printk("single_disp "); + pr_cont(" single_disp"); if (caps & ATOM_PPLIB_SUPPORTS_VIDEO_PLAYBACK) - printk("video "); + pr_cont(" video"); if (caps & ATOM_PPLIB_DISALLOW_ON_DC) - printk("no_dc "); - printk("\n"); + pr_cont(" no_dc"); + pr_cont("\n"); } void amdgpu_dpm_print_ps_status(struct amdgpu_device *adev, struct amdgpu_ps *rps) { - printk("\tstatus: "); + printk("\tstatus:"); if (rps == adev->pm.dpm.current_ps) - printk("c "); + pr_cont(" c"); if (rps == adev->pm.dpm.requested_ps) - printk("r "); + pr_cont(" r"); if (rps == adev->pm.dpm.boot_ps) - printk("b "); - printk("\n"); + pr_cont(" b"); + pr_cont("\n"); } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_dpm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_dpm.h index fa2b55681422..8c96a4caa715 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_dpm.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_dpm.h @@ -270,8 +270,18 @@ struct amdgpu_dpm_funcs { struct amdgpu_ps *cps, struct amdgpu_ps *rps, bool *equal); + int (*read_sensor)(struct amdgpu_device *adev, int idx, void *value, + int *size); struct amd_vce_state* (*get_vce_clock_state)(struct amdgpu_device *adev, unsigned idx); + int (*reset_power_profile_state)(struct amdgpu_device *adev, + struct amd_pp_profile *request); + int (*get_power_profile_state)(struct amdgpu_device *adev, + struct amd_pp_profile *query); + int (*set_power_profile_state)(struct amdgpu_device *adev, + struct amd_pp_profile *request); + int (*switch_power_profile)(struct amdgpu_device *adev, + enum amd_pp_profile_type type); }; #define amdgpu_dpm_pre_set_power_state(adev) (adev)->pm.funcs->pre_set_power_state((adev)) @@ -282,10 +292,10 @@ struct amdgpu_dpm_funcs { #define amdgpu_dpm_vblank_too_short(adev) (adev)->pm.funcs->vblank_too_short((adev)) #define amdgpu_dpm_enable_bapm(adev, e) (adev)->pm.funcs->enable_bapm((adev), (e)) -#define amdgpu_dpm_read_sensor(adev, idx, value) \ +#define amdgpu_dpm_read_sensor(adev, idx, value, size) \ ((adev)->pp_enabled ? \ - (adev)->powerplay.pp_funcs->read_sensor(adev->powerplay.pp_handle, (idx), (value)) : \ - -EINVAL) + (adev)->powerplay.pp_funcs->read_sensor(adev->powerplay.pp_handle, (idx), (value), (size)) : \ + (adev)->pm.funcs->read_sensor((adev), (idx), (value), (size))) #define amdgpu_dpm_get_temperature(adev) \ ((adev)->pp_enabled ? \ @@ -388,6 +398,22 @@ struct amdgpu_dpm_funcs { (adev)->powerplay.pp_funcs->get_performance_level((adev)->powerplay.pp_handle) : \ (adev)->pm.dpm.forced_level) +#define amdgpu_dpm_reset_power_profile_state(adev, request) \ + ((adev)->powerplay.pp_funcs->reset_power_profile_state(\ + (adev)->powerplay.pp_handle, request)) + +#define amdgpu_dpm_get_power_profile_state(adev, query) \ + ((adev)->powerplay.pp_funcs->get_power_profile_state(\ + (adev)->powerplay.pp_handle, query)) + +#define amdgpu_dpm_set_power_profile_state(adev, request) \ + ((adev)->powerplay.pp_funcs->set_power_profile_state(\ + (adev)->powerplay.pp_handle, request)) + +#define amdgpu_dpm_switch_power_profile(adev, type) \ + ((adev)->powerplay.pp_funcs->switch_power_profile(\ + (adev)->powerplay.pp_handle, type)) + struct amdgpu_dpm { struct amdgpu_ps *ps; /* number of valid power states */ diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c index f7adbace428a..4e0f7d2d87f1 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c @@ -60,9 +60,12 @@ * - 3.8.0 - Add support raster config init in the kernel * - 3.9.0 - Add support for memory query info about VRAM and GTT. * - 3.10.0 - Add support for new fences ioctl, new gem ioctl flags + * - 3.11.0 - Add support for sensor query info (clocks, temp, etc). + * - 3.12.0 - Add query for double offchip LDS buffers + * - 3.13.0 - Add PRT support */ #define KMS_DRIVER_MAJOR 3 -#define KMS_DRIVER_MINOR 10 +#define KMS_DRIVER_MINOR 13 #define KMS_DRIVER_PATCHLEVEL 0 int amdgpu_vram_limit = 0; @@ -77,13 +80,13 @@ int amdgpu_pcie_gen2 = -1; int amdgpu_msi = -1; int amdgpu_lockup_timeout = 0; int amdgpu_dpm = -1; -int amdgpu_smc_load_fw = 1; +int amdgpu_fw_load_type = -1; int amdgpu_aspm = -1; int amdgpu_runtime_pm = -1; unsigned amdgpu_ip_block_mask = 0xffffffff; int amdgpu_bapm = -1; int amdgpu_deep_color = 0; -int amdgpu_vm_size = 64; +int amdgpu_vm_size = -1; int amdgpu_vm_block_size = -1; int amdgpu_vm_fault_stop = 0; int amdgpu_vm_debug = 0; @@ -100,6 +103,11 @@ unsigned amdgpu_pg_mask = 0xffffffff; char *amdgpu_disable_cu = NULL; char *amdgpu_virtual_display = NULL; unsigned amdgpu_pp_feature_mask = 0xffffffff; +int amdgpu_ngg = 0; +int amdgpu_prim_buf_per_se = 0; +int amdgpu_pos_buf_per_se = 0; +int amdgpu_cntl_sb_buf_per_se = 0; +int amdgpu_param_buf_per_se = 0; MODULE_PARM_DESC(vramlimit, "Restrict VRAM for testing, in megabytes"); module_param_named(vramlimit, amdgpu_vram_limit, int, 0600); @@ -137,8 +145,8 @@ module_param_named(lockup_timeout, amdgpu_lockup_timeout, int, 0444); MODULE_PARM_DESC(dpm, "DPM support (1 = enable, 0 = disable, -1 = auto)"); module_param_named(dpm, amdgpu_dpm, int, 0444); -MODULE_PARM_DESC(smc_load_fw, "SMC firmware loading(1 = enable, 0 = disable)"); -module_param_named(smc_load_fw, amdgpu_smc_load_fw, int, 0444); +MODULE_PARM_DESC(fw_load_type, "firmware loading type (0 = direct, 1 = SMU, 2 = PSP, -1 = auto)"); +module_param_named(fw_load_type, amdgpu_fw_load_type, int, 0444); MODULE_PARM_DESC(aspm, "ASPM support (1 = enable, 0 = disable, -1 = auto)"); module_param_named(aspm, amdgpu_aspm, int, 0444); @@ -207,6 +215,22 @@ MODULE_PARM_DESC(virtual_display, "Enable virtual display feature (the virtual_display will be set like xxxx:xx:xx.x,x;xxxx:xx:xx.x,x)"); module_param_named(virtual_display, amdgpu_virtual_display, charp, 0444); +MODULE_PARM_DESC(ngg, "Next Generation Graphics (1 = enable, 0 = disable(default depending on gfx))"); +module_param_named(ngg, amdgpu_ngg, int, 0444); + +MODULE_PARM_DESC(prim_buf_per_se, "the size of Primitive Buffer per Shader Engine (default depending on gfx)"); +module_param_named(prim_buf_per_se, amdgpu_prim_buf_per_se, int, 0444); + +MODULE_PARM_DESC(pos_buf_per_se, "the size of Position Buffer per Shader Engine (default depending on gfx)"); +module_param_named(pos_buf_per_se, amdgpu_pos_buf_per_se, int, 0444); + +MODULE_PARM_DESC(cntl_sb_buf_per_se, "the size of Control Sideband per Shader Engine (default depending on gfx)"); +module_param_named(cntl_sb_buf_per_se, amdgpu_cntl_sb_buf_per_se, int, 0444); + +MODULE_PARM_DESC(param_buf_per_se, "the size of Off-Chip Pramater Cache per Shader Engine (default depending on gfx)"); +module_param_named(param_buf_per_se, amdgpu_param_buf_per_se, int, 0444); + + static const struct pci_device_id pciidlist[] = { #ifdef CONFIG_DRM_AMDGPU_SI {0x1002, 0x6780, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_TAHITI}, @@ -409,6 +433,7 @@ static const struct pci_device_id pciidlist[] = { {0x1002, 0x67C2, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_POLARIS10}, {0x1002, 0x67C4, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_POLARIS10}, {0x1002, 0x67C7, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_POLARIS10}, + {0x1002, 0x67D0, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_POLARIS10}, {0x1002, 0x67DF, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_POLARIS10}, {0x1002, 0x67C8, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_POLARIS10}, {0x1002, 0x67C9, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_POLARIS10}, @@ -421,8 +446,16 @@ static const struct pci_device_id pciidlist[] = { {0x1002, 0x6985, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_POLARIS12}, {0x1002, 0x6986, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_POLARIS12}, {0x1002, 0x6987, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_POLARIS12}, + {0x1002, 0x6995, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_POLARIS12}, {0x1002, 0x699F, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_POLARIS12}, - + /* Vega 10 */ + {0x1002, 0x6860, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VEGA10|AMD_EXP_HW_SUPPORT}, + {0x1002, 0x6861, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VEGA10|AMD_EXP_HW_SUPPORT}, + {0x1002, 0x6862, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VEGA10|AMD_EXP_HW_SUPPORT}, + {0x1002, 0x6863, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VEGA10|AMD_EXP_HW_SUPPORT}, + {0x1002, 0x6867, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VEGA10|AMD_EXP_HW_SUPPORT}, + {0x1002, 0x686c, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VEGA10|AMD_EXP_HW_SUPPORT}, + {0x1002, 0x687f, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VEGA10|AMD_EXP_HW_SUPPORT}, {0, 0, 0} }; @@ -685,7 +718,6 @@ static struct drm_driver kms_driver = { DRIVER_PRIME | DRIVER_RENDER | DRIVER_MODESET, .load = amdgpu_driver_load_kms, .open = amdgpu_driver_open_kms, - .preclose = amdgpu_driver_preclose_kms, .postclose = amdgpu_driver_postclose_kms, .lastclose = amdgpu_driver_lastclose_kms, .set_busid = drm_pci_set_busid, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c index 36ce3cac81ba..a48142d930c6 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c @@ -147,11 +147,11 @@ static int amdgpufb_create_pinned_object(struct amdgpu_fbdev *rfbdev, ret = amdgpu_gem_object_create(adev, aligned_size, 0, AMDGPU_GEM_DOMAIN_VRAM, AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED | - AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS, + AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS | + AMDGPU_GEM_CREATE_VRAM_CLEARED, true, &gobj); if (ret) { - printk(KERN_ERR "failed to allocate framebuffer (%d)\n", - aligned_size); + pr_err("failed to allocate framebuffer (%d)\n", aligned_size); return -ENOMEM; } abo = gem_to_amdgpu_bo(gobj); @@ -224,7 +224,7 @@ static int amdgpufb_create(struct drm_fb_helper *helper, info = drm_fb_helper_alloc_fbi(helper); if (IS_ERR(info)) { ret = PTR_ERR(info); - goto out_unref; + goto out; } info->par = rfbdev; @@ -233,7 +233,7 @@ static int amdgpufb_create(struct drm_fb_helper *helper, ret = amdgpu_framebuffer_init(adev->ddev, &rfbdev->rfb, &mode_cmd, gobj); if (ret) { DRM_ERROR("failed to initialize framebuffer %d\n", ret); - goto out_destroy_fbi; + goto out; } fb = &rfbdev->rfb.base; @@ -241,8 +241,6 @@ static int amdgpufb_create(struct drm_fb_helper *helper, /* setup helper */ rfbdev->helper.fb = fb; - memset_io(abo->kptr, 0x0, amdgpu_bo_size(abo)); - strcpy(info->fix.id, "amdgpudrmfb"); drm_fb_helper_fill_fix(info, fb->pitches[0], fb->format->depth); @@ -266,7 +264,7 @@ static int amdgpufb_create(struct drm_fb_helper *helper, if (info->screen_base == NULL) { ret = -ENOSPC; - goto out_destroy_fbi; + goto out; } DRM_INFO("fb mappable at 0x%lX\n", info->fix.smem_start); @@ -278,9 +276,7 @@ static int amdgpufb_create(struct drm_fb_helper *helper, vga_switcheroo_client_fb_set(adev->ddev->pdev, info); return 0; -out_destroy_fbi: - drm_fb_helper_release_fbi(helper); -out_unref: +out: if (abo) { } @@ -304,7 +300,6 @@ static int amdgpu_fbdev_destroy(struct drm_device *dev, struct amdgpu_fbdev *rfb struct amdgpu_framebuffer *rfb = &rfbdev->rfb; drm_fb_helper_unregister_fbi(&rfbdev->helper); - drm_fb_helper_release_fbi(&rfbdev->helper); if (rfb->obj) { amdgpufb_destroy_pinned_object(rfb->obj); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c index 964d2a946ed5..2ee327d69775 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c @@ -27,6 +27,9 @@ */ #include <drm/drmP.h> #include <drm/amdgpu_drm.h> +#ifdef CONFIG_X86 +#include <asm/set_memory.h> +#endif #include "amdgpu.h" /* @@ -229,7 +232,8 @@ void amdgpu_gart_unbind(struct amdgpu_device *adev, uint64_t offset, unsigned p; int i, j; u64 page_base; - uint32_t flags = AMDGPU_PTE_SYSTEM; + /* Starting from VEGA10, system bit must be 0 to mean invalid. */ + uint64_t flags = 0; if (!adev->gart.ready) { WARN(1, "trying to unbind memory from uninitialized GART !\n"); @@ -271,7 +275,7 @@ void amdgpu_gart_unbind(struct amdgpu_device *adev, uint64_t offset, */ int amdgpu_gart_bind(struct amdgpu_device *adev, uint64_t offset, int pages, struct page **pagelist, dma_addr_t *dma_addr, - uint32_t flags) + uint64_t flags) { unsigned t; unsigned p; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c index 106cf83c2e6b..03a9c5cad222 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c @@ -152,6 +152,7 @@ void amdgpu_gem_object_close(struct drm_gem_object *obj, struct ttm_validate_buffer tv; struct ww_acquire_ctx ticket; struct amdgpu_bo_va *bo_va; + struct dma_fence *fence = NULL; int r; INIT_LIST_HEAD(&list); @@ -173,6 +174,17 @@ void amdgpu_gem_object_close(struct drm_gem_object *obj, if (bo_va) { if (--bo_va->ref_count == 0) { amdgpu_vm_bo_rmv(adev, bo_va); + + r = amdgpu_vm_clear_freed(adev, vm, &fence); + if (unlikely(r)) { + dev_err(adev->dev, "failed to clear page " + "tables on GEM object close (%d)\n", r); + } + + if (fence) { + amdgpu_bo_fence(bo, fence, true); + dma_fence_put(fence); + } } } ttm_eu_backoff_reservation(&ticket, &list); @@ -507,14 +519,16 @@ static int amdgpu_gem_va_check(void *param, struct amdgpu_bo *bo) * amdgpu_gem_va_update_vm -update the bo_va in its VM * * @adev: amdgpu_device pointer + * @vm: vm to update * @bo_va: bo_va to update * @list: validation list - * @operation: map or unmap + * @operation: map, unmap or clear * * Update the bo_va directly after setting its address. Errors are not * vital here, so they are not reported back to userspace. */ static void amdgpu_gem_va_update_vm(struct amdgpu_device *adev, + struct amdgpu_vm *vm, struct amdgpu_bo_va *bo_va, struct list_head *list, uint32_t operation) @@ -529,20 +543,21 @@ static void amdgpu_gem_va_update_vm(struct amdgpu_device *adev, goto error; } - r = amdgpu_vm_validate_pt_bos(adev, bo_va->vm, amdgpu_gem_va_check, + r = amdgpu_vm_validate_pt_bos(adev, vm, amdgpu_gem_va_check, NULL); if (r) goto error; - r = amdgpu_vm_update_page_directory(adev, bo_va->vm); + r = amdgpu_vm_update_directories(adev, vm); if (r) goto error; - r = amdgpu_vm_clear_freed(adev, bo_va->vm); + r = amdgpu_vm_clear_freed(adev, vm, NULL); if (r) goto error; - if (operation == AMDGPU_VA_OP_MAP) + if (operation == AMDGPU_VA_OP_MAP || + operation == AMDGPU_VA_OP_REPLACE) r = amdgpu_vm_bo_update(adev, bo_va, false); error: @@ -553,6 +568,12 @@ error: int amdgpu_gem_va_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) { + const uint32_t valid_flags = AMDGPU_VM_DELAY_UPDATE | + AMDGPU_VM_PAGE_READABLE | AMDGPU_VM_PAGE_WRITEABLE | + AMDGPU_VM_PAGE_EXECUTABLE | AMDGPU_VM_MTYPE_MASK; + const uint32_t prt_flags = AMDGPU_VM_DELAY_UPDATE | + AMDGPU_VM_PAGE_PRT; + struct drm_amdgpu_gem_va *args = data; struct drm_gem_object *gobj; struct amdgpu_device *adev = dev->dev_private; @@ -563,7 +584,7 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev, void *data, struct ttm_validate_buffer tv; struct ww_acquire_ctx ticket; struct list_head list; - uint32_t invalid_flags, va_flags = 0; + uint64_t va_flags; int r = 0; if (!adev->vm_manager.enabled) @@ -577,17 +598,17 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev, void *data, return -EINVAL; } - invalid_flags = ~(AMDGPU_VM_DELAY_UPDATE | AMDGPU_VM_PAGE_READABLE | - AMDGPU_VM_PAGE_WRITEABLE | AMDGPU_VM_PAGE_EXECUTABLE); - if ((args->flags & invalid_flags)) { - dev_err(&dev->pdev->dev, "invalid flags 0x%08X vs 0x%08X\n", - args->flags, invalid_flags); + if ((args->flags & ~valid_flags) && (args->flags & ~prt_flags)) { + dev_err(&dev->pdev->dev, "invalid flags combination 0x%08X\n", + args->flags); return -EINVAL; } switch (args->operation) { case AMDGPU_VA_OP_MAP: case AMDGPU_VA_OP_UNMAP: + case AMDGPU_VA_OP_CLEAR: + case AMDGPU_VA_OP_REPLACE: break; default: dev_err(&dev->pdev->dev, "unsupported operation %d\n", @@ -595,38 +616,47 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev, void *data, return -EINVAL; } - gobj = drm_gem_object_lookup(filp, args->handle); - if (gobj == NULL) - return -ENOENT; - abo = gem_to_amdgpu_bo(gobj); INIT_LIST_HEAD(&list); - tv.bo = &abo->tbo; - tv.shared = false; - list_add(&tv.head, &list); + if ((args->operation != AMDGPU_VA_OP_CLEAR) && + !(args->flags & AMDGPU_VM_PAGE_PRT)) { + gobj = drm_gem_object_lookup(filp, args->handle); + if (gobj == NULL) + return -ENOENT; + abo = gem_to_amdgpu_bo(gobj); + tv.bo = &abo->tbo; + tv.shared = false; + list_add(&tv.head, &list); + } else { + gobj = NULL; + abo = NULL; + } amdgpu_vm_get_pd_bo(&fpriv->vm, &list, &vm_pd); r = ttm_eu_reserve_buffers(&ticket, &list, true, NULL); - if (r) { - drm_gem_object_unreference_unlocked(gobj); - return r; - } + if (r) + goto error_unref; - bo_va = amdgpu_vm_bo_find(&fpriv->vm, abo); - if (!bo_va) { - ttm_eu_backoff_reservation(&ticket, &list); - drm_gem_object_unreference_unlocked(gobj); - return -ENOENT; + if (abo) { + bo_va = amdgpu_vm_bo_find(&fpriv->vm, abo); + if (!bo_va) { + r = -ENOENT; + goto error_backoff; + } + } else if (args->operation != AMDGPU_VA_OP_CLEAR) { + bo_va = fpriv->prt_va; + } else { + bo_va = NULL; } switch (args->operation) { case AMDGPU_VA_OP_MAP: - if (args->flags & AMDGPU_VM_PAGE_READABLE) - va_flags |= AMDGPU_PTE_READABLE; - if (args->flags & AMDGPU_VM_PAGE_WRITEABLE) - va_flags |= AMDGPU_PTE_WRITEABLE; - if (args->flags & AMDGPU_VM_PAGE_EXECUTABLE) - va_flags |= AMDGPU_PTE_EXECUTABLE; + r = amdgpu_vm_alloc_pts(adev, bo_va->vm, args->va_address, + args->map_size); + if (r) + goto error_backoff; + + va_flags = amdgpu_vm_get_pte_flags(adev, args->flags); r = amdgpu_vm_bo_map(adev, bo_va, args->va_address, args->offset_in_bo, args->map_size, va_flags); @@ -634,14 +664,34 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev, void *data, case AMDGPU_VA_OP_UNMAP: r = amdgpu_vm_bo_unmap(adev, bo_va, args->va_address); break; + + case AMDGPU_VA_OP_CLEAR: + r = amdgpu_vm_bo_clear_mappings(adev, &fpriv->vm, + args->va_address, + args->map_size); + break; + case AMDGPU_VA_OP_REPLACE: + r = amdgpu_vm_alloc_pts(adev, bo_va->vm, args->va_address, + args->map_size); + if (r) + goto error_backoff; + + va_flags = amdgpu_vm_get_pte_flags(adev, args->flags); + r = amdgpu_vm_bo_replace_map(adev, bo_va, args->va_address, + args->offset_in_bo, args->map_size, + va_flags); + break; default: break; } - if (!r && !(args->flags & AMDGPU_VM_DELAY_UPDATE) && - !amdgpu_vm_debug) - amdgpu_gem_va_update_vm(adev, bo_va, &list, args->operation); + if (!r && !(args->flags & AMDGPU_VM_DELAY_UPDATE) && !amdgpu_vm_debug) + amdgpu_gem_va_update_vm(adev, &fpriv->vm, bo_va, &list, + args->operation); + +error_backoff: ttm_eu_backoff_reservation(&ticket, &list); +error_unref: drm_gem_object_unreference_unlocked(gobj); return r; } @@ -667,7 +717,7 @@ int amdgpu_gem_op_ioctl(struct drm_device *dev, void *data, switch (args->op) { case AMDGPU_GEM_OP_GET_GEM_CREATE_INFO: { struct drm_amdgpu_gem_create_in info; - void __user *out = (void __user *)(long)args->value; + void __user *out = (void __user *)(uintptr_t)args->value; info.bo_size = robj->gem_base.size; info.alignment = robj->tbo.mem.page_alignment << PAGE_SHIFT; @@ -679,6 +729,11 @@ int amdgpu_gem_op_ioctl(struct drm_device *dev, void *data, break; } case AMDGPU_GEM_OP_SET_PLACEMENT: + if (robj->prime_shared_count && (args->value & AMDGPU_GEM_DOMAIN_VRAM)) { + r = -EINVAL; + amdgpu_bo_unreserve(robj); + break; + } if (amdgpu_ttm_tt_get_usermm(robj->tbo.ttm)) { r = -EPERM; amdgpu_bo_unreserve(robj); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c index e02a70dd37b5..aab857d89d03 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c @@ -161,9 +161,6 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs, return r; } - if (ring->funcs->init_cond_exec) - patch_offset = amdgpu_ring_init_cond_exec(ring); - if (vm) { r = amdgpu_vm_flush(ring, job); if (r) { @@ -172,7 +169,14 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs, } } - if (ring->funcs->emit_hdp_flush) + if (ring->funcs->init_cond_exec) + patch_offset = amdgpu_ring_init_cond_exec(ring); + + if (ring->funcs->emit_hdp_flush +#ifdef CONFIG_X86_64 + && !(adev->flags & AMD_IS_APU) +#endif + ) amdgpu_ring_emit_hdp_flush(ring); skip_preamble = ring->current_ctx == fence_ctx; @@ -202,7 +206,11 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs, need_ctx_switch = false; } - if (ring->funcs->emit_hdp_invalidate) + if (ring->funcs->emit_hdp_invalidate +#ifdef CONFIG_X86_64 + && !(adev->flags & AMD_IS_APU) +#endif + ) amdgpu_ring_emit_hdp_invalidate(ring); r = amdgpu_fence_emit(ring, f); @@ -214,6 +222,9 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs, return r; } + if (ring->funcs->insert_end) + ring->funcs->insert_end(ring); + /* wrap the last IB with fence */ if (job && job->uf_addr) { amdgpu_ring_emit_fence(ring, job->uf_addr, job->uf_sequence, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.h index ba38ae6a1463..a3da1a122fc8 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.h @@ -25,6 +25,48 @@ #define __AMDGPU_IH_H__ struct amdgpu_device; + /* + * vega10+ IH clients + */ +enum amdgpu_ih_clientid +{ + AMDGPU_IH_CLIENTID_IH = 0x00, + AMDGPU_IH_CLIENTID_ACP = 0x01, + AMDGPU_IH_CLIENTID_ATHUB = 0x02, + AMDGPU_IH_CLIENTID_BIF = 0x03, + AMDGPU_IH_CLIENTID_DCE = 0x04, + AMDGPU_IH_CLIENTID_ISP = 0x05, + AMDGPU_IH_CLIENTID_PCIE0 = 0x06, + AMDGPU_IH_CLIENTID_RLC = 0x07, + AMDGPU_IH_CLIENTID_SDMA0 = 0x08, + AMDGPU_IH_CLIENTID_SDMA1 = 0x09, + AMDGPU_IH_CLIENTID_SE0SH = 0x0a, + AMDGPU_IH_CLIENTID_SE1SH = 0x0b, + AMDGPU_IH_CLIENTID_SE2SH = 0x0c, + AMDGPU_IH_CLIENTID_SE3SH = 0x0d, + AMDGPU_IH_CLIENTID_SYSHUB = 0x0e, + AMDGPU_IH_CLIENTID_THM = 0x0f, + AMDGPU_IH_CLIENTID_UVD = 0x10, + AMDGPU_IH_CLIENTID_VCE0 = 0x11, + AMDGPU_IH_CLIENTID_VMC = 0x12, + AMDGPU_IH_CLIENTID_XDMA = 0x13, + AMDGPU_IH_CLIENTID_GRBM_CP = 0x14, + AMDGPU_IH_CLIENTID_ATS = 0x15, + AMDGPU_IH_CLIENTID_ROM_SMUIO = 0x16, + AMDGPU_IH_CLIENTID_DF = 0x17, + AMDGPU_IH_CLIENTID_VCE1 = 0x18, + AMDGPU_IH_CLIENTID_PWR = 0x19, + AMDGPU_IH_CLIENTID_UTCL2 = 0x1b, + AMDGPU_IH_CLIENTID_EA = 0x1c, + AMDGPU_IH_CLIENTID_UTCL2LOG = 0x1d, + AMDGPU_IH_CLIENTID_MP0 = 0x1e, + AMDGPU_IH_CLIENTID_MP1 = 0x1f, + + AMDGPU_IH_CLIENTID_MAX + +}; + +#define AMDGPU_IH_CLIENTID_LEGACY 0 /* * R6xx+ IH ring @@ -46,12 +88,19 @@ struct amdgpu_ih_ring { dma_addr_t rb_dma_addr; /* only used when use_bus_addr = true */ }; +#define AMDGPU_IH_SRC_DATA_MAX_SIZE_DW 4 + struct amdgpu_iv_entry { + unsigned client_id; unsigned src_id; - unsigned src_data; unsigned ring_id; unsigned vm_id; + unsigned vm_id_src; + uint64_t timestamp; + unsigned timestamp_src; unsigned pas_id; + unsigned pasid_src; + unsigned src_data[AMDGPU_IH_SRC_DATA_MAX_SIZE_DW]; const uint32_t *iv_entry; }; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c index e63ece049b05..a6b7e367a860 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c @@ -33,6 +33,7 @@ #include "amdgpu_ih.h" #include "atom.h" #include "amdgpu_connectors.h" +#include "amdgpu_trace.h" #include <linux/pm_runtime.h> @@ -89,23 +90,28 @@ static void amdgpu_irq_reset_work_func(struct work_struct *work) static void amdgpu_irq_disable_all(struct amdgpu_device *adev) { unsigned long irqflags; - unsigned i, j; + unsigned i, j, k; int r; spin_lock_irqsave(&adev->irq.lock, irqflags); - for (i = 0; i < AMDGPU_MAX_IRQ_SRC_ID; ++i) { - struct amdgpu_irq_src *src = adev->irq.sources[i]; - - if (!src || !src->funcs->set || !src->num_types) + for (i = 0; i < AMDGPU_IH_CLIENTID_MAX; ++i) { + if (!adev->irq.client[i].sources) continue; - for (j = 0; j < src->num_types; ++j) { - atomic_set(&src->enabled_types[j], 0); - r = src->funcs->set(adev, src, j, - AMDGPU_IRQ_STATE_DISABLE); - if (r) - DRM_ERROR("error disabling interrupt (%d)\n", - r); + for (j = 0; j < AMDGPU_MAX_IRQ_SRC_ID; ++j) { + struct amdgpu_irq_src *src = adev->irq.client[i].sources[j]; + + if (!src || !src->funcs->set || !src->num_types) + continue; + + for (k = 0; k < src->num_types; ++k) { + atomic_set(&src->enabled_types[k], 0); + r = src->funcs->set(adev, src, k, + AMDGPU_IRQ_STATE_DISABLE); + if (r) + DRM_ERROR("error disabling interrupt (%d)\n", + r); + } } } spin_unlock_irqrestore(&adev->irq.lock, irqflags); @@ -254,7 +260,7 @@ int amdgpu_irq_init(struct amdgpu_device *adev) */ void amdgpu_irq_fini(struct amdgpu_device *adev) { - unsigned i; + unsigned i, j; drm_vblank_cleanup(adev->ddev); if (adev->irq.installed) { @@ -266,19 +272,25 @@ void amdgpu_irq_fini(struct amdgpu_device *adev) cancel_work_sync(&adev->reset_work); } - for (i = 0; i < AMDGPU_MAX_IRQ_SRC_ID; ++i) { - struct amdgpu_irq_src *src = adev->irq.sources[i]; - - if (!src) + for (i = 0; i < AMDGPU_IH_CLIENTID_MAX; ++i) { + if (!adev->irq.client[i].sources) continue; - kfree(src->enabled_types); - src->enabled_types = NULL; - if (src->data) { - kfree(src->data); - kfree(src); - adev->irq.sources[i] = NULL; + for (j = 0; j < AMDGPU_MAX_IRQ_SRC_ID; ++j) { + struct amdgpu_irq_src *src = adev->irq.client[i].sources[j]; + + if (!src) + continue; + + kfree(src->enabled_types); + src->enabled_types = NULL; + if (src->data) { + kfree(src->data); + kfree(src); + adev->irq.client[i].sources[j] = NULL; + } } + kfree(adev->irq.client[i].sources); } } @@ -290,18 +302,31 @@ void amdgpu_irq_fini(struct amdgpu_device *adev) * @source: irq source * */ -int amdgpu_irq_add_id(struct amdgpu_device *adev, unsigned src_id, +int amdgpu_irq_add_id(struct amdgpu_device *adev, + unsigned client_id, unsigned src_id, struct amdgpu_irq_src *source) { - if (src_id >= AMDGPU_MAX_IRQ_SRC_ID) + if (client_id >= AMDGPU_IH_CLIENTID_MAX) return -EINVAL; - if (adev->irq.sources[src_id] != NULL) + if (src_id >= AMDGPU_MAX_IRQ_SRC_ID) return -EINVAL; if (!source->funcs) return -EINVAL; + if (!adev->irq.client[client_id].sources) { + adev->irq.client[client_id].sources = + kcalloc(AMDGPU_MAX_IRQ_SRC_ID, + sizeof(struct amdgpu_irq_src *), + GFP_KERNEL); + if (!adev->irq.client[client_id].sources) + return -ENOMEM; + } + + if (adev->irq.client[client_id].sources[src_id] != NULL) + return -EINVAL; + if (source->num_types && !source->enabled_types) { atomic_t *types; @@ -313,8 +338,7 @@ int amdgpu_irq_add_id(struct amdgpu_device *adev, unsigned src_id, source->enabled_types = types; } - adev->irq.sources[src_id] = source; - + adev->irq.client[client_id].sources[src_id] = source; return 0; } @@ -329,10 +353,18 @@ int amdgpu_irq_add_id(struct amdgpu_device *adev, unsigned src_id, void amdgpu_irq_dispatch(struct amdgpu_device *adev, struct amdgpu_iv_entry *entry) { + unsigned client_id = entry->client_id; unsigned src_id = entry->src_id; struct amdgpu_irq_src *src; int r; + trace_amdgpu_iv(entry); + + if (client_id >= AMDGPU_IH_CLIENTID_MAX) { + DRM_DEBUG("Invalid client_id in IV: %d\n", client_id); + return; + } + if (src_id >= AMDGPU_MAX_IRQ_SRC_ID) { DRM_DEBUG("Invalid src_id in IV: %d\n", src_id); return; @@ -341,7 +373,13 @@ void amdgpu_irq_dispatch(struct amdgpu_device *adev, if (adev->irq.virq[src_id]) { generic_handle_irq(irq_find_mapping(adev->irq.domain, src_id)); } else { - src = adev->irq.sources[src_id]; + if (!adev->irq.client[client_id].sources) { + DRM_DEBUG("Unregistered interrupt client_id: %d src_id: %d\n", + client_id, src_id); + return; + } + + src = adev->irq.client[client_id].sources[src_id]; if (!src) { DRM_DEBUG("Unhandled interrupt src_id: %d\n", src_id); return; @@ -385,13 +423,20 @@ int amdgpu_irq_update(struct amdgpu_device *adev, void amdgpu_irq_gpu_reset_resume_helper(struct amdgpu_device *adev) { - int i, j; - for (i = 0; i < AMDGPU_MAX_IRQ_SRC_ID; i++) { - struct amdgpu_irq_src *src = adev->irq.sources[i]; - if (!src) + int i, j, k; + + for (i = 0; i < AMDGPU_IH_CLIENTID_MAX; ++i) { + if (!adev->irq.client[i].sources) continue; - for (j = 0; j < src->num_types; j++) - amdgpu_irq_update(adev, src, j); + + for (j = 0; j < AMDGPU_MAX_IRQ_SRC_ID; ++j) { + struct amdgpu_irq_src *src = adev->irq.client[i].sources[j]; + + if (!src) + continue; + for (k = 0; k < src->num_types; k++) + amdgpu_irq_update(adev, src, k); + } } } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.h index 1642f4108297..0610cc4a9788 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.h @@ -28,6 +28,7 @@ #include "amdgpu_ih.h" #define AMDGPU_MAX_IRQ_SRC_ID 0x100 +#define AMDGPU_MAX_IRQ_CLIENT_ID 0x100 struct amdgpu_device; struct amdgpu_iv_entry; @@ -44,6 +45,10 @@ struct amdgpu_irq_src { void *data; }; +struct amdgpu_irq_client { + struct amdgpu_irq_src **sources; +}; + /* provided by interrupt generating IP blocks */ struct amdgpu_irq_src_funcs { int (*set)(struct amdgpu_device *adev, struct amdgpu_irq_src *source, @@ -58,7 +63,7 @@ struct amdgpu_irq { bool installed; spinlock_t lock; /* interrupt sources */ - struct amdgpu_irq_src *sources[AMDGPU_MAX_IRQ_SRC_ID]; + struct amdgpu_irq_client client[AMDGPU_IH_CLIENTID_MAX]; /* status, etc. */ bool msi_enabled; /* msi enabled */ @@ -80,7 +85,8 @@ irqreturn_t amdgpu_irq_handler(int irq, void *arg); int amdgpu_irq_init(struct amdgpu_device *adev); void amdgpu_irq_fini(struct amdgpu_device *adev); -int amdgpu_irq_add_id(struct amdgpu_device *adev, unsigned src_id, +int amdgpu_irq_add_id(struct amdgpu_device *adev, + unsigned client_id, unsigned src_id, struct amdgpu_irq_src *source); void amdgpu_irq_dispatch(struct amdgpu_device *adev, struct amdgpu_iv_entry *entry); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c index 61d94c745672..832be632478f 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c @@ -36,12 +36,6 @@ #include <linux/pm_runtime.h> #include "amdgpu_amdkfd.h" -#if defined(CONFIG_VGA_SWITCHEROO) -bool amdgpu_has_atpx(void); -#else -static inline bool amdgpu_has_atpx(void) { return false; } -#endif - /** * amdgpu_driver_unload_kms - Main unload function for KMS. * @@ -103,7 +97,8 @@ int amdgpu_driver_load_kms(struct drm_device *dev, unsigned long flags) amdgpu_has_atpx() && (amdgpu_is_atpx_hybrid() || amdgpu_has_atpx_dgpu_power_cntl()) && - ((flags & AMD_IS_APU) == 0)) + ((flags & AMD_IS_APU) == 0) && + !pci_is_thunderbolt_attached(dev->pdev)) flags |= AMD_IS_PX; /* amdgpu_device_init should report only fatal error @@ -208,6 +203,14 @@ static int amdgpu_firmware_info(struct drm_amdgpu_info_firmware *fw_info, fw_info->ver = adev->sdma.instance[query_fw->index].fw_version; fw_info->feature = adev->sdma.instance[query_fw->index].feature_version; break; + case AMDGPU_INFO_FW_SOS: + fw_info->ver = adev->psp.sos_fw_version; + fw_info->feature = adev->psp.sos_feature_version; + break; + case AMDGPU_INFO_FW_ASD: + fw_info->ver = adev->psp.asd_fw_version; + fw_info->feature = adev->psp.asd_feature_version; + break; default: return -EINVAL; } @@ -234,12 +237,13 @@ static int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file struct amdgpu_device *adev = dev->dev_private; struct drm_amdgpu_info *info = data; struct amdgpu_mode_info *minfo = &adev->mode_info; - void __user *out = (void __user *)(long)info->return_pointer; + void __user *out = (void __user *)(uintptr_t)info->return_pointer; uint32_t size = info->return_size; struct drm_crtc *crtc; uint32_t ui32 = 0; uint64_t ui64 = 0; int i, found; + int ui32_size = sizeof(ui32); if (!info->return_size || !info->return_pointer) return -EINVAL; @@ -308,6 +312,13 @@ static int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file ib_start_alignment = AMDGPU_GPU_PAGE_SIZE; ib_size_alignment = 1; break; + case AMDGPU_HW_IP_UVD_ENC: + type = AMD_IP_BLOCK_TYPE_UVD; + for (i = 0; i < adev->uvd.num_enc_rings; i++) + ring_mask |= ((adev->uvd.ring_enc[i].ready ? 1 : 0) << i); + ib_start_alignment = AMDGPU_GPU_PAGE_SIZE; + ib_size_alignment = 1; + break; default: return -EINVAL; } @@ -347,6 +358,9 @@ static int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file case AMDGPU_HW_IP_VCE: type = AMD_IP_BLOCK_TYPE_VCE; break; + case AMDGPU_HW_IP_UVD_ENC: + type = AMD_IP_BLOCK_TYPE_UVD; + break; default: return -EINVAL; } @@ -527,6 +541,15 @@ static int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file dev_info.vram_type = adev->mc.vram_type; dev_info.vram_bit_width = adev->mc.vram_width; dev_info.vce_harvest_config = adev->vce.harvest_config; + dev_info.gc_double_offchip_lds_buf = + adev->gfx.config.double_offchip_lds_buf; + + if (amdgpu_ngg) { + dev_info.prim_buf_gpu_addr = adev->gfx.ngg.buf[PRIM].gpu_addr; + dev_info.pos_buf_gpu_addr = adev->gfx.ngg.buf[POS].gpu_addr; + dev_info.cntl_sb_buf_gpu_addr = adev->gfx.ngg.buf[CNTL].gpu_addr; + dev_info.param_buf_gpu_addr = adev->gfx.ngg.buf[PARAM].gpu_addr; + } return copy_to_user(out, &dev_info, min((size_t)size, sizeof(dev_info))) ? -EFAULT : 0; @@ -596,6 +619,80 @@ static int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file return -EINVAL; } } + case AMDGPU_INFO_SENSOR: { + struct pp_gpu_power query = {0}; + int query_size = sizeof(query); + + if (amdgpu_dpm == 0) + return -ENOENT; + + switch (info->sensor_info.type) { + case AMDGPU_INFO_SENSOR_GFX_SCLK: + /* get sclk in Mhz */ + if (amdgpu_dpm_read_sensor(adev, + AMDGPU_PP_SENSOR_GFX_SCLK, + (void *)&ui32, &ui32_size)) { + return -EINVAL; + } + ui32 /= 100; + break; + case AMDGPU_INFO_SENSOR_GFX_MCLK: + /* get mclk in Mhz */ + if (amdgpu_dpm_read_sensor(adev, + AMDGPU_PP_SENSOR_GFX_MCLK, + (void *)&ui32, &ui32_size)) { + return -EINVAL; + } + ui32 /= 100; + break; + case AMDGPU_INFO_SENSOR_GPU_TEMP: + /* get temperature in millidegrees C */ + if (amdgpu_dpm_read_sensor(adev, + AMDGPU_PP_SENSOR_GPU_TEMP, + (void *)&ui32, &ui32_size)) { + return -EINVAL; + } + break; + case AMDGPU_INFO_SENSOR_GPU_LOAD: + /* get GPU load */ + if (amdgpu_dpm_read_sensor(adev, + AMDGPU_PP_SENSOR_GPU_LOAD, + (void *)&ui32, &ui32_size)) { + return -EINVAL; + } + break; + case AMDGPU_INFO_SENSOR_GPU_AVG_POWER: + /* get average GPU power */ + if (amdgpu_dpm_read_sensor(adev, + AMDGPU_PP_SENSOR_GPU_POWER, + (void *)&query, &query_size)) { + return -EINVAL; + } + ui32 = query.average_gpu_power >> 8; + break; + case AMDGPU_INFO_SENSOR_VDDNB: + /* get VDDNB in millivolts */ + if (amdgpu_dpm_read_sensor(adev, + AMDGPU_PP_SENSOR_VDDNB, + (void *)&ui32, &ui32_size)) { + return -EINVAL; + } + break; + case AMDGPU_INFO_SENSOR_VDDGFX: + /* get VDDGFX in millivolts */ + if (amdgpu_dpm_read_sensor(adev, + AMDGPU_PP_SENSOR_VDDGFX, + (void *)&ui32, &ui32_size)) { + return -EINVAL; + } + break; + default: + DRM_DEBUG_KMS("Invalid request %d\n", + info->sensor_info.type); + return -EINVAL; + } + return copy_to_user(out, &ui32, min(size, 4u)) ? -EFAULT : 0; + } default: DRM_DEBUG_KMS("Invalid request %d\n", info->query); return -EINVAL; @@ -655,6 +752,14 @@ int amdgpu_driver_open_kms(struct drm_device *dev, struct drm_file *file_priv) goto out_suspend; } + fpriv->prt_va = amdgpu_vm_bo_add(adev, &fpriv->vm, NULL); + if (!fpriv->prt_va) { + r = -ENOMEM; + amdgpu_vm_fini(adev, &fpriv->vm); + kfree(fpriv); + goto out_suspend; + } + if (amdgpu_sriov_vf(adev)) { r = amdgpu_map_static_csa(adev, &fpriv->vm); if (r) @@ -694,11 +799,15 @@ void amdgpu_driver_postclose_kms(struct drm_device *dev, if (!fpriv) return; + pm_runtime_get_sync(dev->dev); + amdgpu_ctx_mgr_fini(&fpriv->ctx_mgr); amdgpu_uvd_free_handles(adev, file_priv); amdgpu_vce_free_handles(adev, file_priv); + amdgpu_vm_bo_rmv(adev, fpriv->prt_va); + if (amdgpu_sriov_vf(adev)) { /* TODO: how to handle reserve failure */ BUG_ON(amdgpu_bo_reserve(adev->virt.csa_obj, false)); @@ -722,21 +831,6 @@ void amdgpu_driver_postclose_kms(struct drm_device *dev, pm_runtime_put_autosuspend(dev->dev); } -/** - * amdgpu_driver_preclose_kms - drm callback for pre close - * - * @dev: drm dev pointer - * @file_priv: drm file - * - * On device pre close, tear down hyperz and cmask filps on r1xx-r5xx - * (all asics). - */ -void amdgpu_driver_preclose_kms(struct drm_device *dev, - struct drm_file *file_priv) -{ - pm_runtime_get_sync(dev->dev); -} - /* * VBlank related functions. */ @@ -989,6 +1083,23 @@ static int amdgpu_debugfs_firmware_info(struct seq_file *m, void *data) fw_info.feature, fw_info.ver); } + /* PSP SOS */ + query_fw.fw_type = AMDGPU_INFO_FW_SOS; + ret = amdgpu_firmware_info(&fw_info, &query_fw, adev); + if (ret) + return ret; + seq_printf(m, "SOS feature version: %u, firmware version: 0x%08x\n", + fw_info.feature, fw_info.ver); + + + /* PSP ASD */ + query_fw.fw_type = AMDGPU_INFO_FW_ASD; + ret = amdgpu_firmware_info(&fw_info, &query_fw, adev); + if (ret) + return ret; + seq_printf(m, "ASD feature version: %u, firmware version: 0x%08x\n", + fw_info.feature, fw_info.ver); + /* SMC */ query_fw.fw_type = AMDGPU_INFO_FW_SMC; ret = amdgpu_firmware_info(&fw_info, &query_fw, adev); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c index 7ea3cacf9f9f..38f739fb727b 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c @@ -31,6 +31,7 @@ #include <linux/firmware.h> #include <linux/module.h> #include <linux/mmu_notifier.h> +#include <linux/interval_tree.h> #include <drm/drmP.h> #include <drm/drm.h> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h index c12497bd3889..db8f8dda209c 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h @@ -590,11 +590,13 @@ int amdgpu_align_pitch(struct amdgpu_device *adev, int width, int bpp, bool tile /* amdgpu_display.c */ void amdgpu_print_display_setup(struct drm_device *dev); int amdgpu_modeset_create_props(struct amdgpu_device *adev); -int amdgpu_crtc_set_config(struct drm_mode_set *set); +int amdgpu_crtc_set_config(struct drm_mode_set *set, + struct drm_modeset_acquire_ctx *ctx); int amdgpu_crtc_page_flip_target(struct drm_crtc *crtc, struct drm_framebuffer *fb, struct drm_pending_vblank_event *event, - uint32_t page_flip_flags, uint32_t target); + uint32_t page_flip_flags, uint32_t target, + struct drm_modeset_acquire_ctx *ctx); void amdgpu_crtc_cleanup_flip_ctx(struct amdgpu_flip_work *work, struct amdgpu_bo *new_abo); int amdgpu_crtc_prepare_flip(struct drm_crtc *crtc, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c index be80a4a68d7b..cb89fff863c0 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c @@ -122,20 +122,19 @@ static void amdgpu_ttm_placement_init(struct amdgpu_device *adev, if (domain & AMDGPU_GEM_DOMAIN_VRAM) { unsigned visible_pfn = adev->mc.visible_vram_size >> PAGE_SHIFT; - unsigned lpfn = 0; - - /* This forces a reallocation if the flag wasn't set before */ - if (flags & AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS) - lpfn = adev->mc.real_vram_size >> PAGE_SHIFT; places[c].fpfn = 0; - places[c].lpfn = lpfn; + places[c].lpfn = 0; places[c].flags = TTM_PL_FLAG_WC | TTM_PL_FLAG_UNCACHED | TTM_PL_FLAG_VRAM; + if (flags & AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED) places[c].lpfn = visible_pfn; else places[c].flags |= TTM_PL_FLAG_TOPDOWN; + + if (flags & AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS) + places[c].flags |= TTM_PL_FLAG_CONTIGUOUS; c++; } @@ -395,32 +394,18 @@ int amdgpu_bo_create_restricted(struct amdgpu_device *adev, amdgpu_fill_placement_to_bo(bo, placement); /* Kernel allocation are uninterruptible */ - if (!resv) { - bool locked; - - reservation_object_init(&bo->tbo.ttm_resv); - locked = ww_mutex_trylock(&bo->tbo.ttm_resv.lock); - WARN_ON(!locked); - } - initial_bytes_moved = atomic64_read(&adev->num_bytes_moved); - r = ttm_bo_init(&adev->mman.bdev, &bo->tbo, size, type, - &bo->placement, page_align, !kernel, NULL, - acc_size, sg, resv ? resv : &bo->tbo.ttm_resv, - &amdgpu_ttm_bo_destroy); + r = ttm_bo_init_reserved(&adev->mman.bdev, &bo->tbo, size, type, + &bo->placement, page_align, !kernel, NULL, + acc_size, sg, resv, &amdgpu_ttm_bo_destroy); amdgpu_cs_report_moved_bytes(adev, atomic64_read(&adev->num_bytes_moved) - initial_bytes_moved); - if (unlikely(r != 0)) { - if (!resv) - ww_mutex_unlock(&bo->tbo.resv->lock); + if (unlikely(r != 0)) return r; - } - bo->tbo.priority = ilog2(bo->tbo.num_pages); if (kernel) - bo->tbo.priority *= 2; - bo->tbo.priority = min(bo->tbo.priority, (unsigned)(TTM_MAX_BO_PRIORITY - 1)); + bo->tbo.priority = 1; if (flags & AMDGPU_GEM_CREATE_VRAM_CLEARED && bo->tbo.mem.placement & TTM_PL_FLAG_VRAM) { @@ -436,7 +421,7 @@ int amdgpu_bo_create_restricted(struct amdgpu_device *adev, dma_fence_put(fence); } if (!resv) - ww_mutex_unlock(&bo->tbo.resv->lock); + amdgpu_bo_unreserve(bo); *bo_ptr = bo; trace_amdgpu_bo_create(bo); @@ -665,6 +650,10 @@ int amdgpu_bo_pin_restricted(struct amdgpu_bo *bo, u32 domain, if (WARN_ON_ONCE(min_offset > max_offset)) return -EINVAL; + /* A shared bo cannot be migrated to VRAM */ + if (bo->prime_shared_count && (domain == AMDGPU_GEM_DOMAIN_VRAM)) + return -EINVAL; + if (bo->pin_count) { uint32_t mem_type = bo->tbo.mem.mem_type; @@ -827,7 +816,10 @@ int amdgpu_bo_fbdev_mmap(struct amdgpu_bo *bo, int amdgpu_bo_set_tiling_flags(struct amdgpu_bo *bo, u64 tiling_flags) { - if (AMDGPU_TILING_GET(tiling_flags, TILE_SPLIT) > 6) + struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev); + + if (adev->family <= AMDGPU_FAMILY_CZ && + AMDGPU_TILING_GET(tiling_flags, TILE_SPLIT) > 6) return -EINVAL; bo->tiling_flags = tiling_flags; @@ -939,8 +931,7 @@ int amdgpu_bo_fault_reserve_notify(struct ttm_buffer_object *bo) size = bo->mem.num_pages << PAGE_SHIFT; offset = bo->mem.start << PAGE_SHIFT; /* TODO: figure out how to map scattered VRAM to the CPU */ - if ((offset + size) <= adev->mc.visible_vram_size && - (abo->flags & AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS)) + if ((offset + size) <= adev->mc.visible_vram_size) return 0; /* Can't move a pinned BO to visible VRAM */ @@ -948,7 +939,6 @@ int amdgpu_bo_fault_reserve_notify(struct ttm_buffer_object *bo) return -EINVAL; /* hurrah the memory is not visible ! */ - abo->flags |= AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS; amdgpu_ttm_placement_from_domain(abo, AMDGPU_GEM_DOMAIN_VRAM); lpfn = adev->mc.visible_vram_size >> PAGE_SHIFT; for (i = 0; i < abo->placement.num_placement; i++) { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c index 346e80a7119b..990fde2cf4fd 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c @@ -43,16 +43,22 @@ static const struct cg_flag_name clocks[] = { {AMD_CG_SUPPORT_GFX_CGTS_LS, "Graphics Coarse Grain Tree Shader Light Sleep"}, {AMD_CG_SUPPORT_GFX_CP_LS, "Graphics Command Processor Light Sleep"}, {AMD_CG_SUPPORT_GFX_RLC_LS, "Graphics Run List Controller Light Sleep"}, + {AMD_CG_SUPPORT_GFX_3D_CGCG, "Graphics 3D Coarse Grain Clock Gating"}, + {AMD_CG_SUPPORT_GFX_3D_CGLS, "Graphics 3D Coarse Grain memory Light Sleep"}, {AMD_CG_SUPPORT_MC_LS, "Memory Controller Light Sleep"}, {AMD_CG_SUPPORT_MC_MGCG, "Memory Controller Medium Grain Clock Gating"}, {AMD_CG_SUPPORT_SDMA_LS, "System Direct Memory Access Light Sleep"}, {AMD_CG_SUPPORT_SDMA_MGCG, "System Direct Memory Access Medium Grain Clock Gating"}, + {AMD_CG_SUPPORT_BIF_MGCG, "Bus Interface Medium Grain Clock Gating"}, {AMD_CG_SUPPORT_BIF_LS, "Bus Interface Light Sleep"}, {AMD_CG_SUPPORT_UVD_MGCG, "Unified Video Decoder Medium Grain Clock Gating"}, {AMD_CG_SUPPORT_VCE_MGCG, "Video Compression Engine Medium Grain Clock Gating"}, {AMD_CG_SUPPORT_HDP_LS, "Host Data Path Light Sleep"}, {AMD_CG_SUPPORT_HDP_MGCG, "Host Data Path Medium Grain Clock Gating"}, + {AMD_CG_SUPPORT_DRM_MGCG, "Digital Right Management Medium Grain Clock Gating"}, + {AMD_CG_SUPPORT_DRM_LS, "Digital Right Management Light Sleep"}, {AMD_CG_SUPPORT_ROM_MGCG, "Rom Medium Grain Clock Gating"}, + {AMD_CG_SUPPORT_DF_MGCG, "Data Fabric Medium Grain Clock Gating"}, {0, NULL}, }; @@ -610,6 +616,174 @@ fail: return count; } +static ssize_t amdgpu_get_pp_power_profile(struct device *dev, + char *buf, struct amd_pp_profile *query) +{ + struct drm_device *ddev = dev_get_drvdata(dev); + struct amdgpu_device *adev = ddev->dev_private; + int ret = 0; + + if (adev->pp_enabled) + ret = amdgpu_dpm_get_power_profile_state( + adev, query); + else if (adev->pm.funcs->get_power_profile_state) + ret = adev->pm.funcs->get_power_profile_state( + adev, query); + + if (ret) + return ret; + + return snprintf(buf, PAGE_SIZE, + "%d %d %d %d %d\n", + query->min_sclk / 100, + query->min_mclk / 100, + query->activity_threshold, + query->up_hyst, + query->down_hyst); +} + +static ssize_t amdgpu_get_pp_gfx_power_profile(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + struct amd_pp_profile query = {0}; + + query.type = AMD_PP_GFX_PROFILE; + + return amdgpu_get_pp_power_profile(dev, buf, &query); +} + +static ssize_t amdgpu_get_pp_compute_power_profile(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + struct amd_pp_profile query = {0}; + + query.type = AMD_PP_COMPUTE_PROFILE; + + return amdgpu_get_pp_power_profile(dev, buf, &query); +} + +static ssize_t amdgpu_set_pp_power_profile(struct device *dev, + const char *buf, + size_t count, + struct amd_pp_profile *request) +{ + struct drm_device *ddev = dev_get_drvdata(dev); + struct amdgpu_device *adev = ddev->dev_private; + uint32_t loop = 0; + char *sub_str, buf_cpy[128], *tmp_str; + const char delimiter[3] = {' ', '\n', '\0'}; + long int value; + int ret = 0; + + if (strncmp("reset", buf, strlen("reset")) == 0) { + if (adev->pp_enabled) + ret = amdgpu_dpm_reset_power_profile_state( + adev, request); + else if (adev->pm.funcs->reset_power_profile_state) + ret = adev->pm.funcs->reset_power_profile_state( + adev, request); + if (ret) { + count = -EINVAL; + goto fail; + } + return count; + } + + if (strncmp("set", buf, strlen("set")) == 0) { + if (adev->pp_enabled) + ret = amdgpu_dpm_set_power_profile_state( + adev, request); + else if (adev->pm.funcs->set_power_profile_state) + ret = adev->pm.funcs->set_power_profile_state( + adev, request); + if (ret) { + count = -EINVAL; + goto fail; + } + return count; + } + + if (count + 1 >= 128) { + count = -EINVAL; + goto fail; + } + + memcpy(buf_cpy, buf, count + 1); + tmp_str = buf_cpy; + + while (tmp_str[0]) { + sub_str = strsep(&tmp_str, delimiter); + ret = kstrtol(sub_str, 0, &value); + if (ret) { + count = -EINVAL; + goto fail; + } + + switch (loop) { + case 0: + /* input unit MHz convert to dpm table unit 10KHz*/ + request->min_sclk = (uint32_t)value * 100; + break; + case 1: + /* input unit MHz convert to dpm table unit 10KHz*/ + request->min_mclk = (uint32_t)value * 100; + break; + case 2: + request->activity_threshold = (uint16_t)value; + break; + case 3: + request->up_hyst = (uint8_t)value; + break; + case 4: + request->down_hyst = (uint8_t)value; + break; + default: + break; + } + + loop++; + } + + if (adev->pp_enabled) + ret = amdgpu_dpm_set_power_profile_state( + adev, request); + else if (adev->pm.funcs->set_power_profile_state) + ret = adev->pm.funcs->set_power_profile_state( + adev, request); + + if (ret) + count = -EINVAL; + +fail: + return count; +} + +static ssize_t amdgpu_set_pp_gfx_power_profile(struct device *dev, + struct device_attribute *attr, + const char *buf, + size_t count) +{ + struct amd_pp_profile request = {0}; + + request.type = AMD_PP_GFX_PROFILE; + + return amdgpu_set_pp_power_profile(dev, buf, count, &request); +} + +static ssize_t amdgpu_set_pp_compute_power_profile(struct device *dev, + struct device_attribute *attr, + const char *buf, + size_t count) +{ + struct amd_pp_profile request = {0}; + + request.type = AMD_PP_COMPUTE_PROFILE; + + return amdgpu_set_pp_power_profile(dev, buf, count, &request); +} + static DEVICE_ATTR(power_dpm_state, S_IRUGO | S_IWUSR, amdgpu_get_dpm_state, amdgpu_set_dpm_state); static DEVICE_ATTR(power_dpm_force_performance_level, S_IRUGO | S_IWUSR, amdgpu_get_dpm_forced_performance_level, @@ -637,6 +811,12 @@ static DEVICE_ATTR(pp_sclk_od, S_IRUGO | S_IWUSR, static DEVICE_ATTR(pp_mclk_od, S_IRUGO | S_IWUSR, amdgpu_get_pp_mclk_od, amdgpu_set_pp_mclk_od); +static DEVICE_ATTR(pp_gfx_power_profile, S_IRUGO | S_IWUSR, + amdgpu_get_pp_gfx_power_profile, + amdgpu_set_pp_gfx_power_profile); +static DEVICE_ATTR(pp_compute_power_profile, S_IRUGO | S_IWUSR, + amdgpu_get_pp_compute_power_profile, + amdgpu_set_pp_compute_power_profile); static ssize_t amdgpu_hwmon_show_temp(struct device *dev, struct device_attribute *attr, @@ -1142,11 +1322,11 @@ void amdgpu_dpm_enable_vce(struct amdgpu_device *adev, bool enable) /* XXX select vce level based on ring/task */ adev->pm.dpm.vce_level = AMD_VCE_LEVEL_AC_ALL; mutex_unlock(&adev->pm.mutex); - amdgpu_pm_compute_clocks(adev); - amdgpu_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_VCE, - AMD_PG_STATE_UNGATE); amdgpu_set_clockgating_state(adev, AMD_IP_BLOCK_TYPE_VCE, AMD_CG_STATE_UNGATE); + amdgpu_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_VCE, + AMD_PG_STATE_UNGATE); + amdgpu_pm_compute_clocks(adev); } else { amdgpu_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_VCE, AMD_PG_STATE_GATE); @@ -1255,6 +1435,20 @@ int amdgpu_pm_sysfs_init(struct amdgpu_device *adev) DRM_ERROR("failed to create device file pp_mclk_od\n"); return ret; } + ret = device_create_file(adev->dev, + &dev_attr_pp_gfx_power_profile); + if (ret) { + DRM_ERROR("failed to create device file " + "pp_gfx_power_profile\n"); + return ret; + } + ret = device_create_file(adev->dev, + &dev_attr_pp_compute_power_profile); + if (ret) { + DRM_ERROR("failed to create device file " + "pp_compute_power_profile\n"); + return ret; + } ret = amdgpu_debugfs_pm_init(adev); if (ret) { @@ -1284,6 +1478,10 @@ void amdgpu_pm_sysfs_fini(struct amdgpu_device *adev) device_remove_file(adev->dev, &dev_attr_pp_dpm_pcie); device_remove_file(adev->dev, &dev_attr_pp_sclk_od); device_remove_file(adev->dev, &dev_attr_pp_mclk_od); + device_remove_file(adev->dev, + &dev_attr_pp_gfx_power_profile); + device_remove_file(adev->dev, + &dev_attr_pp_compute_power_profile); } void amdgpu_pm_compute_clocks(struct amdgpu_device *adev) @@ -1340,7 +1538,9 @@ void amdgpu_pm_compute_clocks(struct amdgpu_device *adev) static int amdgpu_debugfs_pm_info_pp(struct seq_file *m, struct amdgpu_device *adev) { - int32_t value; + uint32_t value; + struct pp_gpu_power query = {0}; + int size; /* sanity check PP is enabled */ if (!(adev->powerplay.pp_funcs && @@ -1348,47 +1548,60 @@ static int amdgpu_debugfs_pm_info_pp(struct seq_file *m, struct amdgpu_device *a return -EINVAL; /* GPU Clocks */ + size = sizeof(value); seq_printf(m, "GFX Clocks and Power:\n"); - if (!amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_GFX_MCLK, &value)) + if (!amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_GFX_MCLK, (void *)&value, &size)) seq_printf(m, "\t%u MHz (MCLK)\n", value/100); - if (!amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_GFX_SCLK, &value)) + if (!amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_GFX_SCLK, (void *)&value, &size)) seq_printf(m, "\t%u MHz (SCLK)\n", value/100); - if (!amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_VDDGFX, &value)) + if (!amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_VDDGFX, (void *)&value, &size)) seq_printf(m, "\t%u mV (VDDGFX)\n", value); - if (!amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_VDDNB, &value)) + if (!amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_VDDNB, (void *)&value, &size)) seq_printf(m, "\t%u mV (VDDNB)\n", value); + size = sizeof(query); + if (!amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_GPU_POWER, (void *)&query, &size)) { + seq_printf(m, "\t%u.%u W (VDDC)\n", query.vddc_power >> 8, + query.vddc_power & 0xff); + seq_printf(m, "\t%u.%u W (VDDCI)\n", query.vddci_power >> 8, + query.vddci_power & 0xff); + seq_printf(m, "\t%u.%u W (max GPU)\n", query.max_gpu_power >> 8, + query.max_gpu_power & 0xff); + seq_printf(m, "\t%u.%u W (average GPU)\n", query.average_gpu_power >> 8, + query.average_gpu_power & 0xff); + } + size = sizeof(value); seq_printf(m, "\n"); /* GPU Temp */ - if (!amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_GPU_TEMP, &value)) + if (!amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_GPU_TEMP, (void *)&value, &size)) seq_printf(m, "GPU Temperature: %u C\n", value/1000); /* GPU Load */ - if (!amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_GPU_LOAD, &value)) + if (!amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_GPU_LOAD, (void *)&value, &size)) seq_printf(m, "GPU Load: %u %%\n", value); seq_printf(m, "\n"); /* UVD clocks */ - if (!amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_UVD_POWER, &value)) { + if (!amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_UVD_POWER, (void *)&value, &size)) { if (!value) { seq_printf(m, "UVD: Disabled\n"); } else { seq_printf(m, "UVD: Enabled\n"); - if (!amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_UVD_DCLK, &value)) + if (!amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_UVD_DCLK, (void *)&value, &size)) seq_printf(m, "\t%u MHz (DCLK)\n", value/100); - if (!amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_UVD_VCLK, &value)) + if (!amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_UVD_VCLK, (void *)&value, &size)) seq_printf(m, "\t%u MHz (VCLK)\n", value/100); } } seq_printf(m, "\n"); /* VCE clocks */ - if (!amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_VCE_POWER, &value)) { + if (!amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_VCE_POWER, (void *)&value, &size)) { if (!value) { seq_printf(m, "VCE: Disabled\n"); } else { seq_printf(m, "VCE: Enabled\n"); - if (!amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_VCE_ECCLK, &value)) + if (!amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_VCE_ECCLK, (void *)&value, &size)) seq_printf(m, "\t%u MHz (ECCLK)\n", value/100); } } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_powerplay.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_powerplay.c index 8856eccc37fa..f5ae871aa11c 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_powerplay.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_powerplay.c @@ -43,7 +43,7 @@ static int amdgpu_create_pp_handle(struct amdgpu_device *adev) amd_pp = &(adev->powerplay); pp_init.chip_family = adev->family; pp_init.chip_id = adev->asic_type; - pp_init.pm_en = amdgpu_dpm != 0 ? true : false; + pp_init.pm_en = (amdgpu_dpm != 0 && !amdgpu_sriov_vf(adev)) ? true : false; pp_init.feature_mask = amdgpu_pp_feature_mask; pp_init.device = amdgpu_cgs_create_device(adev); ret = amd_powerplay_create(&pp_init, &(amd_pp->pp_handle)); @@ -71,6 +71,7 @@ static int amdgpu_pp_early_init(void *handle) case CHIP_TOPAZ: case CHIP_CARRIZO: case CHIP_STONEY: + case CHIP_VEGA10: adev->pp_enabled = true; if (amdgpu_create_pp_handle(adev)) return -EINVAL; @@ -163,7 +164,7 @@ static int amdgpu_pp_hw_init(void *handle) int ret = 0; struct amdgpu_device *adev = (struct amdgpu_device *)handle; - if (adev->pp_enabled && adev->firmware.smu_load) + if (adev->pp_enabled && adev->firmware.load_type == AMDGPU_FW_LOAD_SMU) amdgpu_ucode_init_bo(adev); if (adev->powerplay.ip_funcs->hw_init) @@ -190,7 +191,7 @@ static int amdgpu_pp_hw_fini(void *handle) ret = adev->powerplay.ip_funcs->hw_fini( adev->powerplay.pp_handle); - if (adev->pp_enabled && adev->firmware.smu_load) + if (adev->pp_enabled && adev->firmware.load_type == AMDGPU_FW_LOAD_SMU) amdgpu_ucode_fini_bo(adev); return ret; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c new file mode 100644 index 000000000000..ed6e5799016e --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c @@ -0,0 +1,481 @@ +/* + * Copyright 2016 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + * Author: Huang Rui + * + */ + +#include <linux/firmware.h> +#include "drmP.h" +#include "amdgpu.h" +#include "amdgpu_psp.h" +#include "amdgpu_ucode.h" +#include "soc15_common.h" +#include "psp_v3_1.h" + +static void psp_set_funcs(struct amdgpu_device *adev); + +static int psp_early_init(void *handle) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + + psp_set_funcs(adev); + + return 0; +} + +static int psp_sw_init(void *handle) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct psp_context *psp = &adev->psp; + int ret; + + switch (adev->asic_type) { + case CHIP_VEGA10: + psp->init_microcode = psp_v3_1_init_microcode; + psp->bootloader_load_sysdrv = psp_v3_1_bootloader_load_sysdrv; + psp->bootloader_load_sos = psp_v3_1_bootloader_load_sos; + psp->prep_cmd_buf = psp_v3_1_prep_cmd_buf; + psp->ring_init = psp_v3_1_ring_init; + psp->cmd_submit = psp_v3_1_cmd_submit; + psp->compare_sram_data = psp_v3_1_compare_sram_data; + psp->smu_reload_quirk = psp_v3_1_smu_reload_quirk; + break; + default: + return -EINVAL; + } + + psp->adev = adev; + + ret = psp_init_microcode(psp); + if (ret) { + DRM_ERROR("Failed to load psp firmware!\n"); + return ret; + } + + return 0; +} + +static int psp_sw_fini(void *handle) +{ + return 0; +} + +int psp_wait_for(struct psp_context *psp, uint32_t reg_index, + uint32_t reg_val, uint32_t mask, bool check_changed) +{ + uint32_t val; + int i; + struct amdgpu_device *adev = psp->adev; + + val = RREG32(reg_index); + + for (i = 0; i < adev->usec_timeout; i++) { + if (check_changed) { + if (val != reg_val) + return 0; + } else { + if ((val & mask) == reg_val) + return 0; + } + udelay(1); + } + + return -ETIME; +} + +static int +psp_cmd_submit_buf(struct psp_context *psp, + struct amdgpu_firmware_info *ucode, + struct psp_gfx_cmd_resp *cmd, uint64_t fence_mc_addr, + int index) +{ + int ret; + struct amdgpu_bo *cmd_buf_bo; + uint64_t cmd_buf_mc_addr; + struct psp_gfx_cmd_resp *cmd_buf_mem; + struct amdgpu_device *adev = psp->adev; + + ret = amdgpu_bo_create_kernel(adev, PSP_CMD_BUFFER_SIZE, PAGE_SIZE, + AMDGPU_GEM_DOMAIN_VRAM, + &cmd_buf_bo, &cmd_buf_mc_addr, + (void **)&cmd_buf_mem); + if (ret) + return ret; + + memset(cmd_buf_mem, 0, PSP_CMD_BUFFER_SIZE); + + memcpy(cmd_buf_mem, cmd, sizeof(struct psp_gfx_cmd_resp)); + + ret = psp_cmd_submit(psp, ucode, cmd_buf_mc_addr, + fence_mc_addr, index); + + while (*((unsigned int *)psp->fence_buf) != index) { + msleep(1); + } + + amdgpu_bo_free_kernel(&cmd_buf_bo, + &cmd_buf_mc_addr, + (void **)&cmd_buf_mem); + + return ret; +} + +static void psp_prep_tmr_cmd_buf(struct psp_gfx_cmd_resp *cmd, + uint64_t tmr_mc, uint32_t size) +{ + cmd->cmd_id = GFX_CMD_ID_SETUP_TMR; + cmd->cmd.cmd_setup_tmr.buf_phy_addr_lo = (uint32_t)tmr_mc; + cmd->cmd.cmd_setup_tmr.buf_phy_addr_hi = (uint32_t)(tmr_mc >> 32); + cmd->cmd.cmd_setup_tmr.buf_size = size; +} + +/* Set up Trusted Memory Region */ +static int psp_tmr_init(struct psp_context *psp) +{ + int ret; + struct psp_gfx_cmd_resp *cmd; + + cmd = kzalloc(sizeof(struct psp_gfx_cmd_resp), GFP_KERNEL); + if (!cmd) + return -ENOMEM; + + /* + * Allocate 3M memory aligned to 1M from Frame Buffer (local + * physical). + * + * Note: this memory need be reserved till the driver + * uninitializes. + */ + ret = amdgpu_bo_create_kernel(psp->adev, 0x300000, 0x100000, + AMDGPU_GEM_DOMAIN_VRAM, + &psp->tmr_bo, &psp->tmr_mc_addr, &psp->tmr_buf); + if (ret) + goto failed; + + psp_prep_tmr_cmd_buf(cmd, psp->tmr_mc_addr, 0x300000); + + ret = psp_cmd_submit_buf(psp, NULL, cmd, + psp->fence_buf_mc_addr, 1); + if (ret) + goto failed_mem; + + kfree(cmd); + + return 0; + +failed_mem: + amdgpu_bo_free_kernel(&psp->tmr_bo, &psp->tmr_mc_addr, &psp->tmr_buf); +failed: + kfree(cmd); + return ret; +} + +static void psp_prep_asd_cmd_buf(struct psp_gfx_cmd_resp *cmd, + uint64_t asd_mc, uint64_t asd_mc_shared, + uint32_t size, uint32_t shared_size) +{ + cmd->cmd_id = GFX_CMD_ID_LOAD_ASD; + cmd->cmd.cmd_load_ta.app_phy_addr_lo = lower_32_bits(asd_mc); + cmd->cmd.cmd_load_ta.app_phy_addr_hi = upper_32_bits(asd_mc); + cmd->cmd.cmd_load_ta.app_len = size; + + cmd->cmd.cmd_load_ta.cmd_buf_phy_addr_lo = lower_32_bits(asd_mc_shared); + cmd->cmd.cmd_load_ta.cmd_buf_phy_addr_hi = upper_32_bits(asd_mc_shared); + cmd->cmd.cmd_load_ta.cmd_buf_len = shared_size; +} + +static int psp_asd_load(struct psp_context *psp) +{ + int ret; + struct amdgpu_bo *asd_bo, *asd_shared_bo; + uint64_t asd_mc_addr, asd_shared_mc_addr; + void *asd_buf, *asd_shared_buf; + struct psp_gfx_cmd_resp *cmd; + + cmd = kzalloc(sizeof(struct psp_gfx_cmd_resp), GFP_KERNEL); + if (!cmd) + return -ENOMEM; + + /* + * Allocate 16k memory aligned to 4k from Frame Buffer (local + * physical) for shared ASD <-> Driver + */ + ret = amdgpu_bo_create_kernel(psp->adev, PSP_ASD_SHARED_MEM_SIZE, PAGE_SIZE, + AMDGPU_GEM_DOMAIN_VRAM, + &asd_shared_bo, &asd_shared_mc_addr, &asd_buf); + if (ret) + goto failed; + + /* + * Allocate 256k memory aligned to 4k from Frame Buffer (local + * physical) for ASD firmware + */ + ret = amdgpu_bo_create_kernel(psp->adev, PSP_ASD_BIN_SIZE, PAGE_SIZE, + AMDGPU_GEM_DOMAIN_VRAM, + &asd_bo, &asd_mc_addr, &asd_buf); + if (ret) + goto failed_mem; + + memcpy(asd_buf, psp->asd_start_addr, psp->asd_ucode_size); + + psp_prep_asd_cmd_buf(cmd, asd_mc_addr, asd_shared_mc_addr, + psp->asd_ucode_size, PSP_ASD_SHARED_MEM_SIZE); + + ret = psp_cmd_submit_buf(psp, NULL, cmd, + psp->fence_buf_mc_addr, 2); + if (ret) + goto failed_mem1; + + amdgpu_bo_free_kernel(&asd_bo, &asd_mc_addr, &asd_buf); + amdgpu_bo_free_kernel(&asd_shared_bo, &asd_shared_mc_addr, &asd_shared_buf); + kfree(cmd); + + return 0; + +failed_mem1: + amdgpu_bo_free_kernel(&asd_bo, &asd_mc_addr, &asd_buf); +failed_mem: + amdgpu_bo_free_kernel(&asd_shared_bo, &asd_shared_mc_addr, &asd_shared_buf); +failed: + kfree(cmd); + return ret; +} + +static int psp_load_fw(struct amdgpu_device *adev) +{ + int ret; + struct psp_gfx_cmd_resp *cmd; + int i; + struct amdgpu_firmware_info *ucode; + struct psp_context *psp = &adev->psp; + + cmd = kzalloc(sizeof(struct psp_gfx_cmd_resp), GFP_KERNEL); + if (!cmd) + return -ENOMEM; + + ret = psp_bootloader_load_sysdrv(psp); + if (ret) + goto failed; + + ret = psp_bootloader_load_sos(psp); + if (ret) + goto failed; + + ret = psp_ring_init(psp, PSP_RING_TYPE__KM); + if (ret) + goto failed; + + ret = amdgpu_bo_create_kernel(adev, PSP_FENCE_BUFFER_SIZE, PAGE_SIZE, + AMDGPU_GEM_DOMAIN_VRAM, + &psp->fence_buf_bo, + &psp->fence_buf_mc_addr, + &psp->fence_buf); + if (ret) + goto failed; + + memset(psp->fence_buf, 0, PSP_FENCE_BUFFER_SIZE); + + ret = psp_tmr_init(psp); + if (ret) + goto failed_mem; + + ret = psp_asd_load(psp); + if (ret) + goto failed_mem; + + for (i = 0; i < adev->firmware.max_ucodes; i++) { + ucode = &adev->firmware.ucode[i]; + if (!ucode->fw) + continue; + + if (ucode->ucode_id == AMDGPU_UCODE_ID_SMC && + psp_smu_reload_quirk(psp)) + continue; + + ret = psp_prep_cmd_buf(ucode, cmd); + if (ret) + goto failed_mem; + + ret = psp_cmd_submit_buf(psp, ucode, cmd, + psp->fence_buf_mc_addr, i + 3); + if (ret) + goto failed_mem; + +#if 0 + /* check if firmware loaded sucessfully */ + if (!amdgpu_psp_check_fw_loading_status(adev, i)) + return -EINVAL; +#endif + } + + amdgpu_bo_free_kernel(&psp->fence_buf_bo, + &psp->fence_buf_mc_addr, &psp->fence_buf); + kfree(cmd); + + return 0; + +failed_mem: + amdgpu_bo_free_kernel(&psp->fence_buf_bo, + &psp->fence_buf_mc_addr, &psp->fence_buf); +failed: + kfree(cmd); + return ret; +} + +static int psp_hw_init(void *handle) +{ + int ret; + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + + + if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) + return 0; + + mutex_lock(&adev->firmware.mutex); + /* + * This sequence is just used on hw_init only once, no need on + * resume. + */ + ret = amdgpu_ucode_init_bo(adev); + if (ret) + goto failed; + + ret = psp_load_fw(adev); + if (ret) { + DRM_ERROR("PSP firmware loading failed\n"); + goto failed; + } + + mutex_unlock(&adev->firmware.mutex); + return 0; + +failed: + adev->firmware.load_type = AMDGPU_FW_LOAD_DIRECT; + mutex_unlock(&adev->firmware.mutex); + return -EINVAL; +} + +static int psp_hw_fini(void *handle) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct psp_context *psp = &adev->psp; + + if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) + amdgpu_ucode_fini_bo(adev); + + if (psp->tmr_buf) + amdgpu_bo_free_kernel(&psp->tmr_bo, &psp->tmr_mc_addr, &psp->tmr_buf); + + return 0; +} + +static int psp_suspend(void *handle) +{ + return 0; +} + +static int psp_resume(void *handle) +{ + int ret; + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + + if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) + return 0; + + mutex_lock(&adev->firmware.mutex); + + ret = psp_load_fw(adev); + if (ret) + DRM_ERROR("PSP resume failed\n"); + + mutex_unlock(&adev->firmware.mutex); + + return ret; +} + +static bool psp_check_fw_loading_status(struct amdgpu_device *adev, + enum AMDGPU_UCODE_ID ucode_type) +{ + struct amdgpu_firmware_info *ucode = NULL; + + if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) { + DRM_INFO("firmware is not loaded by PSP\n"); + return true; + } + + if (!adev->firmware.fw_size) + return false; + + ucode = &adev->firmware.ucode[ucode_type]; + if (!ucode->fw || !ucode->ucode_size) + return false; + + return psp_compare_sram_data(&adev->psp, ucode, ucode_type); +} + +static int psp_set_clockgating_state(void *handle, + enum amd_clockgating_state state) +{ + return 0; +} + +static int psp_set_powergating_state(void *handle, + enum amd_powergating_state state) +{ + return 0; +} + +const struct amd_ip_funcs psp_ip_funcs = { + .name = "psp", + .early_init = psp_early_init, + .late_init = NULL, + .sw_init = psp_sw_init, + .sw_fini = psp_sw_fini, + .hw_init = psp_hw_init, + .hw_fini = psp_hw_fini, + .suspend = psp_suspend, + .resume = psp_resume, + .is_idle = NULL, + .wait_for_idle = NULL, + .soft_reset = NULL, + .set_clockgating_state = psp_set_clockgating_state, + .set_powergating_state = psp_set_powergating_state, +}; + +static const struct amdgpu_psp_funcs psp_funcs = { + .check_fw_loading_status = psp_check_fw_loading_status, +}; + +static void psp_set_funcs(struct amdgpu_device *adev) +{ + if (NULL == adev->firmware.funcs) + adev->firmware.funcs = &psp_funcs; +} + +const struct amdgpu_ip_block_version psp_v3_1_ip_block = +{ + .type = AMD_IP_BLOCK_TYPE_PSP, + .major = 3, + .minor = 1, + .rev = 0, + .funcs = &psp_ip_funcs, +}; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h new file mode 100644 index 000000000000..e9f35e025b59 --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h @@ -0,0 +1,127 @@ +/* + * Copyright 2016 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + * Author: Huang Rui + * + */ +#ifndef __AMDGPU_PSP_H__ +#define __AMDGPU_PSP_H__ + +#include "amdgpu.h" +#include "psp_gfx_if.h" + +#define PSP_FENCE_BUFFER_SIZE 0x1000 +#define PSP_CMD_BUFFER_SIZE 0x1000 +#define PSP_ASD_BIN_SIZE 0x40000 +#define PSP_ASD_SHARED_MEM_SIZE 0x4000 + +enum psp_ring_type +{ + PSP_RING_TYPE__INVALID = 0, + /* + * These values map to the way the PSP kernel identifies the + * rings. + */ + PSP_RING_TYPE__UM = 1, /* User mode ring (formerly called RBI) */ + PSP_RING_TYPE__KM = 2 /* Kernel mode ring (formerly called GPCOM) */ +}; + +struct psp_ring +{ + enum psp_ring_type ring_type; + struct psp_gfx_rb_frame *ring_mem; + uint64_t ring_mem_mc_addr; + void *ring_mem_handle; + uint32_t ring_size; +}; + +struct psp_context +{ + struct amdgpu_device *adev; + struct psp_ring km_ring; + + int (*init_microcode)(struct psp_context *psp); + int (*bootloader_load_sysdrv)(struct psp_context *psp); + int (*bootloader_load_sos)(struct psp_context *psp); + int (*prep_cmd_buf)(struct amdgpu_firmware_info *ucode, + struct psp_gfx_cmd_resp *cmd); + int (*ring_init)(struct psp_context *psp, enum psp_ring_type ring_type); + int (*cmd_submit)(struct psp_context *psp, struct amdgpu_firmware_info *ucode, + uint64_t cmd_buf_mc_addr, uint64_t fence_mc_addr, int index); + bool (*compare_sram_data)(struct psp_context *psp, + struct amdgpu_firmware_info *ucode, + enum AMDGPU_UCODE_ID ucode_type); + bool (*smu_reload_quirk)(struct psp_context *psp); + + /* sos firmware */ + const struct firmware *sos_fw; + uint32_t sos_fw_version; + uint32_t sos_feature_version; + uint32_t sys_bin_size; + uint32_t sos_bin_size; + uint8_t *sys_start_addr; + uint8_t *sos_start_addr; + + /* tmr buffer */ + struct amdgpu_bo *tmr_bo; + uint64_t tmr_mc_addr; + void *tmr_buf; + + /* asd firmware */ + const struct firmware *asd_fw; + uint32_t asd_fw_version; + uint32_t asd_feature_version; + uint32_t asd_ucode_size; + uint8_t *asd_start_addr; + + /* fence buffer */ + struct amdgpu_bo *fence_buf_bo; + uint64_t fence_buf_mc_addr; + void *fence_buf; +}; + +struct amdgpu_psp_funcs { + bool (*check_fw_loading_status)(struct amdgpu_device *adev, + enum AMDGPU_UCODE_ID); +}; + +#define psp_prep_cmd_buf(ucode, type) (psp)->prep_cmd_buf((ucode), (type)) +#define psp_ring_init(psp, type) (psp)->ring_init((psp), (type)) +#define psp_cmd_submit(psp, ucode, cmd_mc, fence_mc, index) \ + (psp)->cmd_submit((psp), (ucode), (cmd_mc), (fence_mc), (index)) +#define psp_compare_sram_data(psp, ucode, type) \ + (psp)->compare_sram_data((psp), (ucode), (type)) +#define psp_init_microcode(psp) \ + ((psp)->init_microcode ? (psp)->init_microcode((psp)) : 0) +#define psp_bootloader_load_sysdrv(psp) \ + ((psp)->bootloader_load_sysdrv ? (psp)->bootloader_load_sysdrv((psp)) : 0) +#define psp_bootloader_load_sos(psp) \ + ((psp)->bootloader_load_sos ? (psp)->bootloader_load_sos((psp)) : 0) +#define psp_smu_reload_quirk(psp) \ + ((psp)->smu_reload_quirk ? (psp)->smu_reload_quirk((psp)) : false) + +extern const struct amd_ip_funcs psp_ip_funcs; + +extern const struct amdgpu_ip_block_version psp_v3_1_ip_block; +extern int psp_wait_for(struct psp_context *psp, uint32_t reg_index, + uint32_t field_val, uint32_t mask, bool check_changed); + +#endif diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c index 7c842b7f1004..6a85db0c0bc3 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c @@ -182,16 +182,32 @@ int amdgpu_ring_init(struct amdgpu_device *adev, struct amdgpu_ring *ring, return r; } - r = amdgpu_wb_get(adev, &ring->rptr_offs); - if (r) { - dev_err(adev->dev, "(%d) ring rptr_offs wb alloc failed\n", r); - return r; - } + if (ring->funcs->support_64bit_ptrs) { + r = amdgpu_wb_get_64bit(adev, &ring->rptr_offs); + if (r) { + dev_err(adev->dev, "(%d) ring rptr_offs wb alloc failed\n", r); + return r; + } + + r = amdgpu_wb_get_64bit(adev, &ring->wptr_offs); + if (r) { + dev_err(adev->dev, "(%d) ring wptr_offs wb alloc failed\n", r); + return r; + } + + } else { + r = amdgpu_wb_get(adev, &ring->rptr_offs); + if (r) { + dev_err(adev->dev, "(%d) ring rptr_offs wb alloc failed\n", r); + return r; + } + + r = amdgpu_wb_get(adev, &ring->wptr_offs); + if (r) { + dev_err(adev->dev, "(%d) ring wptr_offs wb alloc failed\n", r); + return r; + } - r = amdgpu_wb_get(adev, &ring->wptr_offs); - if (r) { - dev_err(adev->dev, "(%d) ring wptr_offs wb alloc failed\n", r); - return r; } r = amdgpu_wb_get(adev, &ring->fence_offs); @@ -219,6 +235,9 @@ int amdgpu_ring_init(struct amdgpu_device *adev, struct amdgpu_ring *ring, ring->ring_size = roundup_pow_of_two(max_dw * 4 * amdgpu_sched_hw_submission); + ring->buf_mask = (ring->ring_size / 4) - 1; + ring->ptr_mask = ring->funcs->support_64bit_ptrs ? + 0xffffffffffffffff : ring->buf_mask; /* Allocate ring buffer */ if (ring->ring_obj == NULL) { r = amdgpu_bo_create_kernel(adev, ring->ring_size, PAGE_SIZE, @@ -230,9 +249,9 @@ int amdgpu_ring_init(struct amdgpu_device *adev, struct amdgpu_ring *ring, dev_err(adev->dev, "(%d) ring create failed\n", r); return r; } - memset((void *)ring->ring, 0, ring->ring_size); + amdgpu_ring_clear_ring(ring); } - ring->ptr_mask = (ring->ring_size / 4) - 1; + ring->max_dw = max_dw; if (amdgpu_debugfs_ring_init(adev, ring)) { @@ -253,10 +272,18 @@ void amdgpu_ring_fini(struct amdgpu_ring *ring) { ring->ready = false; - amdgpu_wb_free(ring->adev, ring->cond_exe_offs); - amdgpu_wb_free(ring->adev, ring->fence_offs); - amdgpu_wb_free(ring->adev, ring->rptr_offs); - amdgpu_wb_free(ring->adev, ring->wptr_offs); + if (ring->funcs->support_64bit_ptrs) { + amdgpu_wb_free_64bit(ring->adev, ring->cond_exe_offs); + amdgpu_wb_free_64bit(ring->adev, ring->fence_offs); + amdgpu_wb_free_64bit(ring->adev, ring->rptr_offs); + amdgpu_wb_free_64bit(ring->adev, ring->wptr_offs); + } else { + amdgpu_wb_free(ring->adev, ring->cond_exe_offs); + amdgpu_wb_free(ring->adev, ring->fence_offs); + amdgpu_wb_free(ring->adev, ring->rptr_offs); + amdgpu_wb_free(ring->adev, ring->wptr_offs); + } + amdgpu_bo_free_kernel(&ring->ring_obj, &ring->gpu_addr, @@ -293,8 +320,8 @@ static ssize_t amdgpu_debugfs_ring_read(struct file *f, char __user *buf, if (*pos < 12) { early[0] = amdgpu_ring_get_rptr(ring); - early[1] = amdgpu_ring_get_wptr(ring); - early[2] = ring->wptr; + early[1] = amdgpu_ring_get_wptr(ring) & ring->buf_mask; + early[2] = ring->wptr & ring->buf_mask; for (i = *pos / 4; i < 3 && size; i++) { r = put_user(early[i], (uint32_t *)buf); if (r) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h index 2345b39878c6..63e56398ca9a 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h @@ -27,10 +27,11 @@ #include "gpu_scheduler.h" /* max number of rings */ -#define AMDGPU_MAX_RINGS 16 +#define AMDGPU_MAX_RINGS 18 #define AMDGPU_MAX_GFX_RINGS 1 #define AMDGPU_MAX_COMPUTE_RINGS 8 #define AMDGPU_MAX_VCE_RINGS 3 +#define AMDGPU_MAX_UVD_ENC_RINGS 2 /* some special values for the owner field */ #define AMDGPU_FENCE_OWNER_UNDEFINED ((void*)0ul) @@ -45,7 +46,8 @@ enum amdgpu_ring_type { AMDGPU_RING_TYPE_SDMA, AMDGPU_RING_TYPE_UVD, AMDGPU_RING_TYPE_VCE, - AMDGPU_RING_TYPE_KIQ + AMDGPU_RING_TYPE_KIQ, + AMDGPU_RING_TYPE_UVD_ENC }; struct amdgpu_device; @@ -96,10 +98,11 @@ struct amdgpu_ring_funcs { enum amdgpu_ring_type type; uint32_t align_mask; u32 nop; + bool support_64bit_ptrs; /* ring read/write ptr handling */ - u32 (*get_rptr)(struct amdgpu_ring *ring); - u32 (*get_wptr)(struct amdgpu_ring *ring); + u64 (*get_rptr)(struct amdgpu_ring *ring); + u64 (*get_wptr)(struct amdgpu_ring *ring); void (*set_wptr)(struct amdgpu_ring *ring); /* validating and patching of IBs */ int (*parse_cs)(struct amdgpu_cs_parser *p, uint32_t ib_idx); @@ -126,6 +129,7 @@ struct amdgpu_ring_funcs { int (*test_ib)(struct amdgpu_ring *ring, long timeout); /* insert NOP packets */ void (*insert_nop)(struct amdgpu_ring *ring, uint32_t count); + void (*insert_end)(struct amdgpu_ring *ring); /* pad the indirect buffer to the necessary number of dw */ void (*pad_ib)(struct amdgpu_ring *ring, struct amdgpu_ib *ib); unsigned (*init_cond_exec)(struct amdgpu_ring *ring); @@ -148,19 +152,23 @@ struct amdgpu_ring { struct amdgpu_bo *ring_obj; volatile uint32_t *ring; unsigned rptr_offs; - unsigned wptr; - unsigned wptr_old; + u64 wptr; + u64 wptr_old; unsigned ring_size; unsigned max_dw; int count_dw; uint64_t gpu_addr; - uint32_t ptr_mask; + uint64_t ptr_mask; + uint32_t buf_mask; bool ready; u32 idx; u32 me; u32 pipe; u32 queue; struct amdgpu_bo *mqd_obj; + uint64_t mqd_gpu_addr; + void *mqd_ptr; + uint64_t eop_gpu_addr; u32 doorbell_index; bool use_doorbell; unsigned wptr_offs; @@ -184,5 +192,12 @@ int amdgpu_ring_init(struct amdgpu_device *adev, struct amdgpu_ring *ring, unsigned ring_size, struct amdgpu_irq_src *irq_src, unsigned irq_type); void amdgpu_ring_fini(struct amdgpu_ring *ring); +static inline void amdgpu_ring_clear_ring(struct amdgpu_ring *ring) +{ + int i = 0; + while (i <= ring->buf_mask) + ring->ring[i++] = ring->funcs->nop; + +} #endif diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_test.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_test.c index e05a24325eeb..15510dadde01 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_test.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_test.c @@ -228,7 +228,7 @@ out_unref: out_cleanup: kfree(gtt_obj); if (r) { - printk(KERN_WARNING "Error while testing BO move.\n"); + pr_warn("Error while testing BO move\n"); } } @@ -237,82 +237,3 @@ void amdgpu_test_moves(struct amdgpu_device *adev) if (adev->mman.buffer_funcs) amdgpu_do_test_moves(adev); } - -void amdgpu_test_ring_sync(struct amdgpu_device *adev, - struct amdgpu_ring *ringA, - struct amdgpu_ring *ringB) -{ -} - -static void amdgpu_test_ring_sync2(struct amdgpu_device *adev, - struct amdgpu_ring *ringA, - struct amdgpu_ring *ringB, - struct amdgpu_ring *ringC) -{ -} - -static bool amdgpu_test_sync_possible(struct amdgpu_ring *ringA, - struct amdgpu_ring *ringB) -{ - if (ringA == &ringA->adev->vce.ring[0] && - ringB == &ringB->adev->vce.ring[1]) - return false; - - return true; -} - -void amdgpu_test_syncing(struct amdgpu_device *adev) -{ - int i, j, k; - - for (i = 1; i < AMDGPU_MAX_RINGS; ++i) { - struct amdgpu_ring *ringA = adev->rings[i]; - if (!ringA || !ringA->ready) - continue; - - for (j = 0; j < i; ++j) { - struct amdgpu_ring *ringB = adev->rings[j]; - if (!ringB || !ringB->ready) - continue; - - if (!amdgpu_test_sync_possible(ringA, ringB)) - continue; - - DRM_INFO("Testing syncing between rings %d and %d...\n", i, j); - amdgpu_test_ring_sync(adev, ringA, ringB); - - DRM_INFO("Testing syncing between rings %d and %d...\n", j, i); - amdgpu_test_ring_sync(adev, ringB, ringA); - - for (k = 0; k < j; ++k) { - struct amdgpu_ring *ringC = adev->rings[k]; - if (!ringC || !ringC->ready) - continue; - - if (!amdgpu_test_sync_possible(ringA, ringC)) - continue; - - if (!amdgpu_test_sync_possible(ringB, ringC)) - continue; - - DRM_INFO("Testing syncing between rings %d, %d and %d...\n", i, j, k); - amdgpu_test_ring_sync2(adev, ringA, ringB, ringC); - - DRM_INFO("Testing syncing between rings %d, %d and %d...\n", i, k, j); - amdgpu_test_ring_sync2(adev, ringA, ringC, ringB); - - DRM_INFO("Testing syncing between rings %d, %d and %d...\n", j, i, k); - amdgpu_test_ring_sync2(adev, ringB, ringA, ringC); - - DRM_INFO("Testing syncing between rings %d, %d and %d...\n", j, k, i); - amdgpu_test_ring_sync2(adev, ringB, ringC, ringA); - - DRM_INFO("Testing syncing between rings %d, %d and %d...\n", k, i, j); - amdgpu_test_ring_sync2(adev, ringC, ringA, ringB); - - DRM_INFO("Testing syncing between rings %d, %d and %d...\n", k, j, i); - amdgpu_test_ring_sync2(adev, ringC, ringB, ringA); - } - } - } -} diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h index a18ae1e97860..ee9d0f346d75 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h @@ -11,6 +11,9 @@ #define TRACE_SYSTEM amdgpu #define TRACE_INCLUDE_FILE amdgpu_trace +#define AMDGPU_JOB_GET_TIMELINE_NAME(job) \ + job->base.s_fence->finished.ops->get_timeline_name(&job->base.s_fence->finished) + TRACE_EVENT(amdgpu_mm_rreg, TP_PROTO(unsigned did, uint32_t reg, uint32_t value), TP_ARGS(did, reg, value), @@ -49,6 +52,43 @@ TRACE_EVENT(amdgpu_mm_wreg, (unsigned long)__entry->value) ); +TRACE_EVENT(amdgpu_iv, + TP_PROTO(struct amdgpu_iv_entry *iv), + TP_ARGS(iv), + TP_STRUCT__entry( + __field(unsigned, client_id) + __field(unsigned, src_id) + __field(unsigned, ring_id) + __field(unsigned, vm_id) + __field(unsigned, vm_id_src) + __field(uint64_t, timestamp) + __field(unsigned, timestamp_src) + __field(unsigned, pas_id) + __array(unsigned, src_data, 4) + ), + TP_fast_assign( + __entry->client_id = iv->client_id; + __entry->src_id = iv->src_id; + __entry->ring_id = iv->ring_id; + __entry->vm_id = iv->vm_id; + __entry->vm_id_src = iv->vm_id_src; + __entry->timestamp = iv->timestamp; + __entry->timestamp_src = iv->timestamp_src; + __entry->pas_id = iv->pas_id; + __entry->src_data[0] = iv->src_data[0]; + __entry->src_data[1] = iv->src_data[1]; + __entry->src_data[2] = iv->src_data[2]; + __entry->src_data[3] = iv->src_data[3]; + ), + TP_printk("client_id:%u src_id:%u ring:%u vm_id:%u timestamp: %llu pas_id:%u src_data: %08x %08x %08x %08x\n", + __entry->client_id, __entry->src_id, + __entry->ring_id, __entry->vm_id, + __entry->timestamp, __entry->pas_id, + __entry->src_data[0], __entry->src_data[1], + __entry->src_data[2], __entry->src_data[3]) +); + + TRACE_EVENT(amdgpu_bo_create, TP_PROTO(struct amdgpu_bo *bo), TP_ARGS(bo), @@ -70,7 +110,7 @@ TRACE_EVENT(amdgpu_bo_create, __entry->visible = bo->flags; ), - TP_printk("bo=%p,pages=%u,type=%d,prefered=%d,allowed=%d,visible=%d", + TP_printk("bo=%p, pages=%u, type=%d, prefered=%d, allowed=%d, visible=%d", __entry->bo, __entry->pages, __entry->type, __entry->prefer, __entry->allow, __entry->visible) ); @@ -101,50 +141,51 @@ TRACE_EVENT(amdgpu_cs_ioctl, TP_PROTO(struct amdgpu_job *job), TP_ARGS(job), TP_STRUCT__entry( - __field(struct amdgpu_device *, adev) - __field(struct amd_sched_job *, sched_job) - __field(struct amdgpu_ib *, ib) + __field(uint64_t, sched_job_id) + __string(timeline, AMDGPU_JOB_GET_TIMELINE_NAME(job)) + __field(unsigned int, context) + __field(unsigned int, seqno) __field(struct dma_fence *, fence) __field(char *, ring_name) __field(u32, num_ibs) ), TP_fast_assign( - __entry->adev = job->adev; - __entry->sched_job = &job->base; - __entry->ib = job->ibs; - __entry->fence = &job->base.s_fence->finished; + __entry->sched_job_id = job->base.id; + __assign_str(timeline, AMDGPU_JOB_GET_TIMELINE_NAME(job)) + __entry->context = job->base.s_fence->finished.context; + __entry->seqno = job->base.s_fence->finished.seqno; __entry->ring_name = job->ring->name; __entry->num_ibs = job->num_ibs; ), - TP_printk("adev=%p, sched_job=%p, first ib=%p, sched fence=%p, ring name:%s, num_ibs:%u", - __entry->adev, __entry->sched_job, __entry->ib, - __entry->fence, __entry->ring_name, __entry->num_ibs) + TP_printk("sched_job=%llu, timeline=%s, context=%u, seqno=%u, ring_name=%s, num_ibs=%u", + __entry->sched_job_id, __get_str(timeline), __entry->context, + __entry->seqno, __entry->ring_name, __entry->num_ibs) ); TRACE_EVENT(amdgpu_sched_run_job, TP_PROTO(struct amdgpu_job *job), TP_ARGS(job), TP_STRUCT__entry( - __field(struct amdgpu_device *, adev) - __field(struct amd_sched_job *, sched_job) - __field(struct amdgpu_ib *, ib) - __field(struct dma_fence *, fence) + __field(uint64_t, sched_job_id) + __string(timeline, AMDGPU_JOB_GET_TIMELINE_NAME(job)) + __field(unsigned int, context) + __field(unsigned int, seqno) __field(char *, ring_name) __field(u32, num_ibs) ), TP_fast_assign( - __entry->adev = job->adev; - __entry->sched_job = &job->base; - __entry->ib = job->ibs; - __entry->fence = &job->base.s_fence->finished; + __entry->sched_job_id = job->base.id; + __assign_str(timeline, AMDGPU_JOB_GET_TIMELINE_NAME(job)) + __entry->context = job->base.s_fence->finished.context; + __entry->seqno = job->base.s_fence->finished.seqno; __entry->ring_name = job->ring->name; __entry->num_ibs = job->num_ibs; ), - TP_printk("adev=%p, sched_job=%p, first ib=%p, sched fence=%p, ring name:%s, num_ibs:%u", - __entry->adev, __entry->sched_job, __entry->ib, - __entry->fence, __entry->ring_name, __entry->num_ibs) + TP_printk("sched_job=%llu, timeline=%s, context=%u, seqno=%u, ring_name=%s, num_ibs=%u", + __entry->sched_job_id, __get_str(timeline), __entry->context, + __entry->seqno, __entry->ring_name, __entry->num_ibs) ); @@ -184,9 +225,9 @@ TRACE_EVENT(amdgpu_vm_bo_map, ), TP_fast_assign( - __entry->bo = bo_va->bo; - __entry->start = mapping->it.start; - __entry->last = mapping->it.last; + __entry->bo = bo_va ? bo_va->bo : NULL; + __entry->start = mapping->start; + __entry->last = mapping->last; __entry->offset = mapping->offset; __entry->flags = mapping->flags; ), @@ -209,8 +250,8 @@ TRACE_EVENT(amdgpu_vm_bo_unmap, TP_fast_assign( __entry->bo = bo_va->bo; - __entry->start = mapping->it.start; - __entry->last = mapping->it.last; + __entry->start = mapping->start; + __entry->last = mapping->last; __entry->offset = mapping->offset; __entry->flags = mapping->flags; ), @@ -229,8 +270,8 @@ DECLARE_EVENT_CLASS(amdgpu_vm_mapping, ), TP_fast_assign( - __entry->soffset = mapping->it.start; - __entry->eoffset = mapping->it.last + 1; + __entry->soffset = mapping->start; + __entry->eoffset = mapping->last + 1; __entry->flags = mapping->flags; ), TP_printk("soffs=%010llx, eoffs=%010llx, flags=%08x", @@ -321,7 +362,7 @@ TRACE_EVENT(amdgpu_bo_list_set, __entry->bo = bo; __entry->bo_size = amdgpu_bo_size(bo); ), - TP_printk("list=%p, bo=%p, bo_size = %Ld", + TP_printk("list=%p, bo=%p, bo_size=%Ld", __entry->list, __entry->bo, __entry->bo_size) @@ -339,7 +380,7 @@ TRACE_EVENT(amdgpu_cs_bo_status, __entry->total_bo = total_bo; __entry->total_size = total_size; ), - TP_printk("total bo size = %Ld, total bo count = %Ld", + TP_printk("total_bo_size=%Ld, total_bo_count=%Ld", __entry->total_bo, __entry->total_size) ); @@ -359,11 +400,12 @@ TRACE_EVENT(amdgpu_ttm_bo_move, __entry->new_placement = new_placement; __entry->old_placement = old_placement; ), - TP_printk("bo=%p from:%d to %d with size = %Ld", + TP_printk("bo=%p, from=%d, to=%d, size=%Ld", __entry->bo, __entry->old_placement, __entry->new_placement, __entry->bo_size) ); +#undef AMDGPU_JOB_GET_TIMELINE_NAME #endif /* This part must be outside protection */ diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c index 4c6094eefc51..35d53a0d9ba6 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c @@ -529,40 +529,12 @@ static int amdgpu_ttm_io_mem_reserve(struct ttm_bo_device *bdev, struct ttm_mem_ case TTM_PL_TT: break; case TTM_PL_VRAM: - if (mem->start == AMDGPU_BO_INVALID_OFFSET) - return -EINVAL; - mem->bus.offset = mem->start << PAGE_SHIFT; /* check if it's visible */ if ((mem->bus.offset + mem->bus.size) > adev->mc.visible_vram_size) return -EINVAL; mem->bus.base = adev->mc.aper_base; mem->bus.is_iomem = true; -#ifdef __alpha__ - /* - * Alpha: use bus.addr to hold the ioremap() return, - * so we can modify bus.base below. - */ - if (mem->placement & TTM_PL_FLAG_WC) - mem->bus.addr = - ioremap_wc(mem->bus.base + mem->bus.offset, - mem->bus.size); - else - mem->bus.addr = - ioremap_nocache(mem->bus.base + mem->bus.offset, - mem->bus.size); - if (!mem->bus.addr) - return -ENOMEM; - - /* - * Alpha: Use just the bus offset plus - * the hose/domain memory base for bus.base. - * It then can be used to build PTEs for VRAM - * access, as done in ttm_bo_vm_fault(). - */ - mem->bus.base = (mem->bus.base & 0x0ffffffffUL) + - adev->ddev->hose->dense_mem_base; -#endif break; default: return -EINVAL; @@ -574,6 +546,18 @@ static void amdgpu_ttm_io_mem_free(struct ttm_bo_device *bdev, struct ttm_mem_re { } +static unsigned long amdgpu_ttm_io_mem_pfn(struct ttm_buffer_object *bo, + unsigned long page_offset) +{ + struct drm_mm_node *mm = bo->mem.mm_node; + uint64_t size = mm->size; + uint64_t offset = page_offset; + + page_offset = do_div(offset, size); + mm += offset; + return (bo->mem.bus.base >> PAGE_SHIFT) + mm->start + page_offset; +} + /* * TTM backend functions. */ @@ -746,7 +730,7 @@ int amdgpu_ttm_bind(struct ttm_buffer_object *bo, struct ttm_mem_reg *bo_mem) { struct ttm_tt *ttm = bo->ttm; struct amdgpu_ttm_tt *gtt = (void *)bo->ttm; - uint32_t flags; + uint64_t flags; int r; if (!ttm || amdgpu_ttm_is_bound(ttm)) @@ -1027,10 +1011,10 @@ bool amdgpu_ttm_tt_is_readonly(struct ttm_tt *ttm) return !!(gtt->userflags & AMDGPU_GEM_USERPTR_READONLY); } -uint32_t amdgpu_ttm_tt_pte_flags(struct amdgpu_device *adev, struct ttm_tt *ttm, +uint64_t amdgpu_ttm_tt_pte_flags(struct amdgpu_device *adev, struct ttm_tt *ttm, struct ttm_mem_reg *mem) { - uint32_t flags = 0; + uint64_t flags = 0; if (mem && mem->mem_type != TTM_PL_SYSTEM) flags |= AMDGPU_PTE_VALID; @@ -1042,9 +1026,7 @@ uint32_t amdgpu_ttm_tt_pte_flags(struct amdgpu_device *adev, struct ttm_tt *ttm, flags |= AMDGPU_PTE_SNOOPED; } - if (adev->asic_type >= CHIP_TONGA) - flags |= AMDGPU_PTE_EXECUTABLE; - + flags |= adev->gart.gart_pte_flags; flags |= AMDGPU_PTE_READABLE; if (!amdgpu_ttm_tt_is_readonly(ttm)) @@ -1091,6 +1073,7 @@ static struct ttm_bo_driver amdgpu_bo_driver = { .fault_reserve_notify = &amdgpu_bo_fault_reserve_notify, .io_mem_reserve = &amdgpu_ttm_io_mem_reserve, .io_mem_free = &amdgpu_ttm_io_mem_free, + .io_mem_pfn = amdgpu_ttm_io_mem_pfn, }; int amdgpu_ttm_init(struct amdgpu_device *adev) @@ -1160,27 +1143,33 @@ int amdgpu_ttm_init(struct amdgpu_device *adev) adev->gds.oa.gfx_partition_size = adev->gds.oa.gfx_partition_size << AMDGPU_OA_SHIFT; adev->gds.oa.cs_partition_size = adev->gds.oa.cs_partition_size << AMDGPU_OA_SHIFT; /* GDS Memory */ - r = ttm_bo_init_mm(&adev->mman.bdev, AMDGPU_PL_GDS, - adev->gds.mem.total_size >> PAGE_SHIFT); - if (r) { - DRM_ERROR("Failed initializing GDS heap.\n"); - return r; + if (adev->gds.mem.total_size) { + r = ttm_bo_init_mm(&adev->mman.bdev, AMDGPU_PL_GDS, + adev->gds.mem.total_size >> PAGE_SHIFT); + if (r) { + DRM_ERROR("Failed initializing GDS heap.\n"); + return r; + } } /* GWS */ - r = ttm_bo_init_mm(&adev->mman.bdev, AMDGPU_PL_GWS, - adev->gds.gws.total_size >> PAGE_SHIFT); - if (r) { - DRM_ERROR("Failed initializing gws heap.\n"); - return r; + if (adev->gds.gws.total_size) { + r = ttm_bo_init_mm(&adev->mman.bdev, AMDGPU_PL_GWS, + adev->gds.gws.total_size >> PAGE_SHIFT); + if (r) { + DRM_ERROR("Failed initializing gws heap.\n"); + return r; + } } /* OA */ - r = ttm_bo_init_mm(&adev->mman.bdev, AMDGPU_PL_OA, - adev->gds.oa.total_size >> PAGE_SHIFT); - if (r) { - DRM_ERROR("Failed initializing oa heap.\n"); - return r; + if (adev->gds.oa.total_size) { + r = ttm_bo_init_mm(&adev->mman.bdev, AMDGPU_PL_OA, + adev->gds.oa.total_size >> PAGE_SHIFT); + if (r) { + DRM_ERROR("Failed initializing oa heap.\n"); + return r; + } } r = amdgpu_ttm_debugfs_init(adev); @@ -1208,9 +1197,12 @@ void amdgpu_ttm_fini(struct amdgpu_device *adev) } ttm_bo_clean_mm(&adev->mman.bdev, TTM_PL_VRAM); ttm_bo_clean_mm(&adev->mman.bdev, TTM_PL_TT); - ttm_bo_clean_mm(&adev->mman.bdev, AMDGPU_PL_GDS); - ttm_bo_clean_mm(&adev->mman.bdev, AMDGPU_PL_GWS); - ttm_bo_clean_mm(&adev->mman.bdev, AMDGPU_PL_OA); + if (adev->gds.mem.total_size) + ttm_bo_clean_mm(&adev->mman.bdev, AMDGPU_PL_GDS); + if (adev->gds.gws.total_size) + ttm_bo_clean_mm(&adev->mman.bdev, AMDGPU_PL_GWS); + if (adev->gds.oa.total_size) + ttm_bo_clean_mm(&adev->mman.bdev, AMDGPU_PL_OA); ttm_bo_device_release(&adev->mman.bdev); amdgpu_gart_fini(adev); amdgpu_ttm_global_fini(adev); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c index 0f0b38191fac..a1891c93cdbf 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c @@ -217,10 +217,55 @@ bool amdgpu_ucode_hdr_version(union amdgpu_firmware_header *hdr, return true; } -static int amdgpu_ucode_init_single_fw(struct amdgpu_firmware_info *ucode, - uint64_t mc_addr, void *kptr) +enum amdgpu_firmware_load_type +amdgpu_ucode_get_load_type(struct amdgpu_device *adev, int load_type) +{ + switch (adev->asic_type) { +#ifdef CONFIG_DRM_AMDGPU_SI + case CHIP_TAHITI: + case CHIP_PITCAIRN: + case CHIP_VERDE: + case CHIP_OLAND: + return AMDGPU_FW_LOAD_DIRECT; +#endif +#ifdef CONFIG_DRM_AMDGPU_CIK + case CHIP_BONAIRE: + case CHIP_KAVERI: + case CHIP_KABINI: + case CHIP_HAWAII: + case CHIP_MULLINS: + return AMDGPU_FW_LOAD_DIRECT; +#endif + case CHIP_TOPAZ: + case CHIP_TONGA: + case CHIP_FIJI: + case CHIP_CARRIZO: + case CHIP_STONEY: + case CHIP_POLARIS10: + case CHIP_POLARIS11: + case CHIP_POLARIS12: + if (!load_type) + return AMDGPU_FW_LOAD_DIRECT; + else + return AMDGPU_FW_LOAD_SMU; + case CHIP_VEGA10: + if (!load_type) + return AMDGPU_FW_LOAD_DIRECT; + else + return AMDGPU_FW_LOAD_PSP; + default: + DRM_ERROR("Unknow firmware load type\n"); + } + + return AMDGPU_FW_LOAD_DIRECT; +} + +static int amdgpu_ucode_init_single_fw(struct amdgpu_device *adev, + struct amdgpu_firmware_info *ucode, + uint64_t mc_addr, void *kptr) { const struct common_firmware_header *header = NULL; + const struct gfx_firmware_header_v1_0 *cp_hdr = NULL; if (NULL == ucode->fw) return 0; @@ -232,9 +277,36 @@ static int amdgpu_ucode_init_single_fw(struct amdgpu_firmware_info *ucode, return 0; header = (const struct common_firmware_header *)ucode->fw->data; - memcpy(ucode->kaddr, (void *)((uint8_t *)ucode->fw->data + - le32_to_cpu(header->ucode_array_offset_bytes)), - le32_to_cpu(header->ucode_size_bytes)); + + cp_hdr = (const struct gfx_firmware_header_v1_0 *)ucode->fw->data; + + if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP || + (ucode->ucode_id != AMDGPU_UCODE_ID_CP_MEC1 && + ucode->ucode_id != AMDGPU_UCODE_ID_CP_MEC2 && + ucode->ucode_id != AMDGPU_UCODE_ID_CP_MEC1_JT && + ucode->ucode_id != AMDGPU_UCODE_ID_CP_MEC2_JT)) { + ucode->ucode_size = le32_to_cpu(header->ucode_size_bytes); + + memcpy(ucode->kaddr, (void *)((uint8_t *)ucode->fw->data + + le32_to_cpu(header->ucode_array_offset_bytes)), + ucode->ucode_size); + } else if (ucode->ucode_id == AMDGPU_UCODE_ID_CP_MEC1 || + ucode->ucode_id == AMDGPU_UCODE_ID_CP_MEC2) { + ucode->ucode_size = le32_to_cpu(header->ucode_size_bytes) - + le32_to_cpu(cp_hdr->jt_size) * 4; + + memcpy(ucode->kaddr, (void *)((uint8_t *)ucode->fw->data + + le32_to_cpu(header->ucode_array_offset_bytes)), + ucode->ucode_size); + } else if (ucode->ucode_id == AMDGPU_UCODE_ID_CP_MEC1_JT || + ucode->ucode_id == AMDGPU_UCODE_ID_CP_MEC2_JT) { + ucode->ucode_size = le32_to_cpu(cp_hdr->jt_size) * 4; + + memcpy(ucode->kaddr, (void *)((uint8_t *)ucode->fw->data + + le32_to_cpu(header->ucode_array_offset_bytes) + + le32_to_cpu(cp_hdr->jt_offset) * 4), + ucode->ucode_size); + } return 0; } @@ -260,10 +332,11 @@ static int amdgpu_ucode_patch_jt(struct amdgpu_firmware_info *ucode, (le32_to_cpu(header->jt_offset) * 4); memcpy(dst_addr, src_addr, le32_to_cpu(header->jt_size) * 4); + ucode->ucode_size += le32_to_cpu(header->jt_size) * 4; + return 0; } - int amdgpu_ucode_init_bo(struct amdgpu_device *adev) { struct amdgpu_bo **bo = &adev->firmware.fw_buf; @@ -303,20 +376,32 @@ int amdgpu_ucode_init_bo(struct amdgpu_device *adev) amdgpu_bo_unreserve(*bo); - for (i = 0; i < AMDGPU_UCODE_ID_MAXIMUM; i++) { + memset(fw_buf_ptr, 0, adev->firmware.fw_size); + + /* + * if SMU loaded firmware, it needn't add SMC, UVD, and VCE + * ucode info here + */ + if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) + adev->firmware.max_ucodes = AMDGPU_UCODE_ID_MAXIMUM - 4; + else + adev->firmware.max_ucodes = AMDGPU_UCODE_ID_MAXIMUM; + + for (i = 0; i < adev->firmware.max_ucodes; i++) { ucode = &adev->firmware.ucode[i]; if (ucode->fw) { header = (const struct common_firmware_header *)ucode->fw->data; - amdgpu_ucode_init_single_fw(ucode, fw_mc_addr + fw_offset, - fw_buf_ptr + fw_offset); - if (i == AMDGPU_UCODE_ID_CP_MEC1) { + amdgpu_ucode_init_single_fw(adev, ucode, fw_mc_addr + fw_offset, + (void *)((uint8_t *)fw_buf_ptr + fw_offset)); + if (i == AMDGPU_UCODE_ID_CP_MEC1 && + adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) { const struct gfx_firmware_header_v1_0 *cp_hdr; cp_hdr = (const struct gfx_firmware_header_v1_0 *)ucode->fw->data; amdgpu_ucode_patch_jt(ucode, fw_mc_addr + fw_offset, fw_buf_ptr + fw_offset); fw_offset += ALIGN(le32_to_cpu(cp_hdr->jt_size) << 2, PAGE_SIZE); } - fw_offset += ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE); + fw_offset += ALIGN(ucode->ucode_size, PAGE_SIZE); } } return 0; @@ -328,7 +413,8 @@ failed_pin: failed_reserve: amdgpu_bo_unref(bo); failed: - adev->firmware.smu_load = false; + if (err) + adev->firmware.load_type = AMDGPU_FW_LOAD_DIRECT; return err; } @@ -338,7 +424,7 @@ int amdgpu_ucode_fini_bo(struct amdgpu_device *adev) int i; struct amdgpu_firmware_info *ucode = NULL; - for (i = 0; i < AMDGPU_UCODE_ID_MAXIMUM; i++) { + for (i = 0; i < adev->firmware.max_ucodes; i++) { ucode = &adev->firmware.ucode[i]; if (ucode->fw) { ucode->mc_addr = 0; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h index a8a4230729f9..758f03a1770d 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h @@ -50,6 +50,14 @@ struct smc_firmware_header_v1_0 { }; /* version_major=1, version_minor=0 */ +struct psp_firmware_header_v1_0 { + struct common_firmware_header header; + uint32_t ucode_feature_version; + uint32_t sos_offset_bytes; + uint32_t sos_size_bytes; +}; + +/* version_major=1, version_minor=0 */ struct gfx_firmware_header_v1_0 { struct common_firmware_header header; uint32_t ucode_feature_version; @@ -110,6 +118,7 @@ union amdgpu_firmware_header { struct common_firmware_header common; struct mc_firmware_header_v1_0 mc; struct smc_firmware_header_v1_0 smc; + struct psp_firmware_header_v1_0 psp; struct gfx_firmware_header_v1_0 gfx; struct rlc_firmware_header_v1_0 rlc; struct rlc_firmware_header_v2_0 rlc_v2_0; @@ -128,9 +137,14 @@ enum AMDGPU_UCODE_ID { AMDGPU_UCODE_ID_CP_PFP, AMDGPU_UCODE_ID_CP_ME, AMDGPU_UCODE_ID_CP_MEC1, + AMDGPU_UCODE_ID_CP_MEC1_JT, AMDGPU_UCODE_ID_CP_MEC2, + AMDGPU_UCODE_ID_CP_MEC2_JT, AMDGPU_UCODE_ID_RLC_G, AMDGPU_UCODE_ID_STORAGE, + AMDGPU_UCODE_ID_SMC, + AMDGPU_UCODE_ID_UVD, + AMDGPU_UCODE_ID_VCE, AMDGPU_UCODE_ID_MAXIMUM, }; @@ -161,6 +175,8 @@ struct amdgpu_firmware_info { uint64_t mc_addr; /* kernel linear address */ void *kaddr; + /* ucode_size_bytes */ + uint32_t ucode_size; }; void amdgpu_ucode_print_mc_hdr(const struct common_firmware_header *hdr); @@ -174,4 +190,7 @@ bool amdgpu_ucode_hdr_version(union amdgpu_firmware_header *hdr, int amdgpu_ucode_init_bo(struct amdgpu_device *adev); int amdgpu_ucode_fini_bo(struct amdgpu_device *adev); +enum amdgpu_firmware_load_type +amdgpu_ucode_get_load_type(struct amdgpu_device *adev, int load_type); + #endif diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c index 6d6ab7f11b4c..2ca09f111f08 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c @@ -67,6 +67,14 @@ #define FIRMWARE_POLARIS11 "amdgpu/polaris11_uvd.bin" #define FIRMWARE_POLARIS12 "amdgpu/polaris12_uvd.bin" +#define FIRMWARE_VEGA10 "amdgpu/vega10_uvd.bin" + +#define mmUVD_GPCOM_VCPU_DATA0_VEGA10 (0x03c4 + 0x7e00) +#define mmUVD_GPCOM_VCPU_DATA1_VEGA10 (0x03c5 + 0x7e00) +#define mmUVD_GPCOM_VCPU_CMD_VEGA10 (0x03c3 + 0x7e00) +#define mmUVD_NO_OP_VEGA10 (0x03ff + 0x7e00) +#define mmUVD_ENGINE_CNTL_VEGA10 (0x03c6 + 0x7e00) + /** * amdgpu_uvd_cs_ctx - Command submission parser context * @@ -101,6 +109,8 @@ MODULE_FIRMWARE(FIRMWARE_POLARIS10); MODULE_FIRMWARE(FIRMWARE_POLARIS11); MODULE_FIRMWARE(FIRMWARE_POLARIS12); +MODULE_FIRMWARE(FIRMWARE_VEGA10); + static void amdgpu_uvd_idle_work_handler(struct work_struct *work); int amdgpu_uvd_sw_init(struct amdgpu_device *adev) @@ -151,6 +161,9 @@ int amdgpu_uvd_sw_init(struct amdgpu_device *adev) case CHIP_POLARIS11: fw_name = FIRMWARE_POLARIS11; break; + case CHIP_VEGA10: + fw_name = FIRMWARE_VEGA10; + break; case CHIP_POLARIS12: fw_name = FIRMWARE_POLARIS12; break; @@ -203,9 +216,11 @@ int amdgpu_uvd_sw_init(struct amdgpu_device *adev) DRM_ERROR("POLARIS10/11 UVD firmware version %hu.%hu is too old.\n", version_major, version_minor); - bo_size = AMDGPU_GPU_PAGE_ALIGN(le32_to_cpu(hdr->ucode_size_bytes) + 8) - + AMDGPU_UVD_STACK_SIZE + AMDGPU_UVD_HEAP_SIZE + bo_size = AMDGPU_UVD_STACK_SIZE + AMDGPU_UVD_HEAP_SIZE + AMDGPU_UVD_SESSION_SIZE * adev->uvd.max_handles; + if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) + bo_size += AMDGPU_GPU_PAGE_ALIGN(le32_to_cpu(hdr->ucode_size_bytes) + 8); + r = amdgpu_bo_create_kernel(adev, bo_size, PAGE_SIZE, AMDGPU_GEM_DOMAIN_VRAM, &adev->uvd.vcpu_bo, &adev->uvd.gpu_addr, &adev->uvd.cpu_addr); @@ -319,11 +334,13 @@ int amdgpu_uvd_resume(struct amdgpu_device *adev) unsigned offset; hdr = (const struct common_firmware_header *)adev->uvd.fw->data; - offset = le32_to_cpu(hdr->ucode_array_offset_bytes); - memcpy_toio(adev->uvd.cpu_addr, adev->uvd.fw->data + offset, - le32_to_cpu(hdr->ucode_size_bytes)); - size -= le32_to_cpu(hdr->ucode_size_bytes); - ptr += le32_to_cpu(hdr->ucode_size_bytes); + if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) { + offset = le32_to_cpu(hdr->ucode_array_offset_bytes); + memcpy_toio(adev->uvd.cpu_addr, adev->uvd.fw->data + offset, + le32_to_cpu(hdr->ucode_size_bytes)); + size -= le32_to_cpu(hdr->ucode_size_bytes); + ptr += le32_to_cpu(hdr->ucode_size_bytes); + } memset_io(ptr, 0, size); } @@ -724,10 +741,10 @@ static int amdgpu_uvd_cs_pass2(struct amdgpu_uvd_cs_ctx *ctx) start = amdgpu_bo_gpu_offset(bo); - end = (mapping->it.last + 1 - mapping->it.start); + end = (mapping->last + 1 - mapping->start); end = end * AMDGPU_GPU_PAGE_SIZE + start; - addr -= ((uint64_t)mapping->it.start) * AMDGPU_GPU_PAGE_SIZE; + addr -= mapping->start * AMDGPU_GPU_PAGE_SIZE; start += addr; amdgpu_set_ib_value(ctx->parser, ctx->ib_idx, ctx->data0, @@ -936,6 +953,7 @@ static int amdgpu_uvd_send_msg(struct amdgpu_ring *ring, struct amdgpu_bo *bo, struct dma_fence *f = NULL; struct amdgpu_device *adev = ring->adev; uint64_t addr; + uint32_t data[4]; int i, r; memset(&tv, 0, sizeof(tv)); @@ -961,16 +979,28 @@ static int amdgpu_uvd_send_msg(struct amdgpu_ring *ring, struct amdgpu_bo *bo, if (r) goto err; + if (adev->asic_type >= CHIP_VEGA10) { + data[0] = PACKET0(mmUVD_GPCOM_VCPU_DATA0_VEGA10, 0); + data[1] = PACKET0(mmUVD_GPCOM_VCPU_DATA1_VEGA10, 0); + data[2] = PACKET0(mmUVD_GPCOM_VCPU_CMD_VEGA10, 0); + data[3] = PACKET0(mmUVD_NO_OP_VEGA10, 0); + } else { + data[0] = PACKET0(mmUVD_GPCOM_VCPU_DATA0, 0); + data[1] = PACKET0(mmUVD_GPCOM_VCPU_DATA1, 0); + data[2] = PACKET0(mmUVD_GPCOM_VCPU_CMD, 0); + data[3] = PACKET0(mmUVD_NO_OP, 0); + } + ib = &job->ibs[0]; addr = amdgpu_bo_gpu_offset(bo); - ib->ptr[0] = PACKET0(mmUVD_GPCOM_VCPU_DATA0, 0); + ib->ptr[0] = data[0]; ib->ptr[1] = addr; - ib->ptr[2] = PACKET0(mmUVD_GPCOM_VCPU_DATA1, 0); + ib->ptr[2] = data[1]; ib->ptr[3] = addr >> 32; - ib->ptr[4] = PACKET0(mmUVD_GPCOM_VCPU_CMD, 0); + ib->ptr[4] = data[2]; ib->ptr[5] = 0; for (i = 6; i < 16; i += 2) { - ib->ptr[i] = PACKET0(mmUVD_NO_OP, 0); + ib->ptr[i] = data[3]; ib->ptr[i+1] = 0; } ib->length_dw = 16; @@ -1108,6 +1138,9 @@ static void amdgpu_uvd_idle_work_handler(struct work_struct *work) container_of(work, struct amdgpu_device, uvd.idle_work.work); unsigned fences = amdgpu_fence_count_emitted(&adev->uvd.ring); + if (amdgpu_sriov_vf(adev)) + return; + if (fences == 0) { if (adev->pm.dpm_enabled) { amdgpu_dpm_enable_uvd(adev, false); @@ -1129,6 +1162,9 @@ void amdgpu_uvd_ring_begin_use(struct amdgpu_ring *ring) struct amdgpu_device *adev = ring->adev; bool set_clocks = !cancel_delayed_work_sync(&adev->uvd.idle_work); + if (amdgpu_sriov_vf(adev)) + return; + if (set_clocks) { if (adev->pm.dpm_enabled) { amdgpu_dpm_enable_uvd(adev, true); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.h index c10682baccae..3553b92bf69a 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.h @@ -24,6 +24,35 @@ #ifndef __AMDGPU_UVD_H__ #define __AMDGPU_UVD_H__ +#define AMDGPU_DEFAULT_UVD_HANDLES 10 +#define AMDGPU_MAX_UVD_HANDLES 40 +#define AMDGPU_UVD_STACK_SIZE (200*1024) +#define AMDGPU_UVD_HEAP_SIZE (256*1024) +#define AMDGPU_UVD_SESSION_SIZE (50*1024) +#define AMDGPU_UVD_FIRMWARE_OFFSET 256 + +struct amdgpu_uvd { + struct amdgpu_bo *vcpu_bo; + void *cpu_addr; + uint64_t gpu_addr; + unsigned fw_version; + void *saved_bo; + unsigned max_handles; + atomic_t handles[AMDGPU_MAX_UVD_HANDLES]; + struct drm_file *filp[AMDGPU_MAX_UVD_HANDLES]; + struct delayed_work idle_work; + const struct firmware *fw; /* UVD firmware */ + struct amdgpu_ring ring; + struct amdgpu_ring ring_enc[AMDGPU_MAX_UVD_ENC_RINGS]; + struct amdgpu_irq_src irq; + bool address_64_bit; + bool use_ctx_buf; + struct amd_sched_entity entity; + struct amd_sched_entity entity_enc; + uint32_t srbm_soft_reset; + unsigned num_enc_rings; +}; + int amdgpu_uvd_sw_init(struct amdgpu_device *adev); int amdgpu_uvd_sw_fini(struct amdgpu_device *adev); int amdgpu_uvd_suspend(struct amdgpu_device *adev); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c index e2c06780ce49..c853400805d1 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c @@ -54,6 +54,8 @@ #define FIRMWARE_POLARIS11 "amdgpu/polaris11_vce.bin" #define FIRMWARE_POLARIS12 "amdgpu/polaris12_vce.bin" +#define FIRMWARE_VEGA10 "amdgpu/vega10_vce.bin" + #ifdef CONFIG_DRM_AMDGPU_CIK MODULE_FIRMWARE(FIRMWARE_BONAIRE); MODULE_FIRMWARE(FIRMWARE_KABINI); @@ -69,6 +71,8 @@ MODULE_FIRMWARE(FIRMWARE_POLARIS10); MODULE_FIRMWARE(FIRMWARE_POLARIS11); MODULE_FIRMWARE(FIRMWARE_POLARIS12); +MODULE_FIRMWARE(FIRMWARE_VEGA10); + static void amdgpu_vce_idle_work_handler(struct work_struct *work); /** @@ -123,6 +127,9 @@ int amdgpu_vce_sw_init(struct amdgpu_device *adev, unsigned long size) case CHIP_POLARIS11: fw_name = FIRMWARE_POLARIS11; break; + case CHIP_VEGA10: + fw_name = FIRMWARE_VEGA10; + break; case CHIP_POLARIS12: fw_name = FIRMWARE_POLARIS12; break; @@ -313,6 +320,9 @@ static void amdgpu_vce_idle_work_handler(struct work_struct *work) container_of(work, struct amdgpu_device, vce.idle_work.work); unsigned i, count = 0; + if (amdgpu_sriov_vf(adev)) + return; + for (i = 0; i < adev->vce.num_rings; i++) count += amdgpu_fence_count_emitted(&adev->vce.ring[i]); @@ -343,6 +353,9 @@ void amdgpu_vce_ring_begin_use(struct amdgpu_ring *ring) struct amdgpu_device *adev = ring->adev; bool set_clocks; + if (amdgpu_sriov_vf(adev)) + return; + mutex_lock(&adev->vce.idle_mutex); set_clocks = !cancel_delayed_work_sync(&adev->vce.idle_work); if (set_clocks) { @@ -582,13 +595,13 @@ static int amdgpu_vce_cs_reloc(struct amdgpu_cs_parser *p, uint32_t ib_idx, } if ((addr + (uint64_t)size) > - ((uint64_t)mapping->it.last + 1) * AMDGPU_GPU_PAGE_SIZE) { + (mapping->last + 1) * AMDGPU_GPU_PAGE_SIZE) { DRM_ERROR("BO to small for addr 0x%010Lx %d %d\n", addr, lo, hi); return -EINVAL; } - addr -= ((uint64_t)mapping->it.start) * AMDGPU_GPU_PAGE_SIZE; + addr -= mapping->start * AMDGPU_GPU_PAGE_SIZE; addr += amdgpu_bo_gpu_offset(bo); addr -= ((uint64_t)size) * ((uint64_t)index); @@ -944,6 +957,10 @@ int amdgpu_vce_ring_test_ring(struct amdgpu_ring *ring) unsigned i; int r; + /* TODO: remove it if VCE can work for sriov */ + if (amdgpu_sriov_vf(adev)) + return 0; + r = amdgpu_ring_alloc(ring, 16); if (r) { DRM_ERROR("amdgpu: vce failed to lock ring %d (%d).\n", @@ -982,6 +999,10 @@ int amdgpu_vce_ring_test_ib(struct amdgpu_ring *ring, long timeout) struct dma_fence *fence = NULL; long r; + /* TODO: remove it if VCE can work for sriov */ + if (amdgpu_sriov_vf(ring->adev)) + return 0; + /* skip vce ring1/2 ib test for now, since it's not reliable */ if (ring != &ring->adev->vce.ring[0]) return 0; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.h index d98041f7508d..0a7f18c461e4 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.h @@ -24,6 +24,31 @@ #ifndef __AMDGPU_VCE_H__ #define __AMDGPU_VCE_H__ +#define AMDGPU_MAX_VCE_HANDLES 16 +#define AMDGPU_VCE_FIRMWARE_OFFSET 256 + +#define AMDGPU_VCE_HARVEST_VCE0 (1 << 0) +#define AMDGPU_VCE_HARVEST_VCE1 (1 << 1) + +struct amdgpu_vce { + struct amdgpu_bo *vcpu_bo; + uint64_t gpu_addr; + unsigned fw_version; + unsigned fb_version; + atomic_t handles[AMDGPU_MAX_VCE_HANDLES]; + struct drm_file *filp[AMDGPU_MAX_VCE_HANDLES]; + uint32_t img_size[AMDGPU_MAX_VCE_HANDLES]; + struct delayed_work idle_work; + struct mutex idle_mutex; + const struct firmware *fw; /* VCE firmware */ + struct amdgpu_ring ring[AMDGPU_MAX_VCE_RINGS]; + struct amdgpu_irq_src irq; + unsigned harvest_config; + struct amd_sched_entity entity; + uint32_t srbm_soft_reset; + unsigned num_rings; +}; + int amdgpu_vce_sw_init(struct amdgpu_device *adev, unsigned long size); int amdgpu_vce_sw_fini(struct amdgpu_device *adev); int amdgpu_vce_suspend(struct amdgpu_device *adev); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c index dcfb7df3caf4..ba8b8ae6234f 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c @@ -75,6 +75,15 @@ int amdgpu_map_static_csa(struct amdgpu_device *adev, struct amdgpu_vm *vm) return -ENOMEM; } + r = amdgpu_vm_alloc_pts(adev, bo_va->vm, AMDGPU_CSA_VADDR, + AMDGPU_CSA_SIZE); + if (r) { + DRM_ERROR("failed to allocate pts for static CSA, err=%d\n", r); + amdgpu_vm_bo_rmv(adev, bo_va); + ttm_eu_backoff_reservation(&ticket, &list); + return r; + } + r = amdgpu_vm_bo_map(adev, bo_va, AMDGPU_CSA_VADDR, 0,AMDGPU_CSA_SIZE, AMDGPU_PTE_READABLE | AMDGPU_PTE_WRITEABLE | AMDGPU_PTE_EXECUTABLE); @@ -97,7 +106,8 @@ void amdgpu_virt_init_setting(struct amdgpu_device *adev) adev->mode_info.num_crtc = 1; adev->enable_virtual_display = true; - mutex_init(&adev->virt.lock); + mutex_init(&adev->virt.lock_kiq); + mutex_init(&adev->virt.lock_reset); } uint32_t amdgpu_virt_kiq_rreg(struct amdgpu_device *adev, uint32_t reg) @@ -110,14 +120,12 @@ uint32_t amdgpu_virt_kiq_rreg(struct amdgpu_device *adev, uint32_t reg) BUG_ON(!ring->funcs->emit_rreg); - mutex_lock(&adev->virt.lock); + mutex_lock(&adev->virt.lock_kiq); amdgpu_ring_alloc(ring, 32); - amdgpu_ring_emit_hdp_flush(ring); amdgpu_ring_emit_rreg(ring, reg); - amdgpu_ring_emit_hdp_invalidate(ring); amdgpu_fence_emit(ring, &f); amdgpu_ring_commit(ring); - mutex_unlock(&adev->virt.lock); + mutex_unlock(&adev->virt.lock_kiq); r = dma_fence_wait(f, false); if (r) @@ -138,14 +146,12 @@ void amdgpu_virt_kiq_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v) BUG_ON(!ring->funcs->emit_wreg); - mutex_lock(&adev->virt.lock); + mutex_lock(&adev->virt.lock_kiq); amdgpu_ring_alloc(ring, 32); - amdgpu_ring_emit_hdp_flush(ring); amdgpu_ring_emit_wreg(ring, reg, v); - amdgpu_ring_emit_hdp_invalidate(ring); amdgpu_fence_emit(ring, &f); amdgpu_ring_commit(ring); - mutex_unlock(&adev->virt.lock); + mutex_unlock(&adev->virt.lock_kiq); r = dma_fence_wait(f, false); if (r) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h index 675e12c42532..1ee0a190b33b 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h @@ -30,6 +30,12 @@ #define AMDGPU_PASSTHROUGH_MODE (1 << 3) /* thw whole GPU is pass through for VM */ #define AMDGPU_SRIOV_CAPS_RUNTIME (1 << 4) /* is out of full access mode */ +struct amdgpu_mm_table { + struct amdgpu_bo *bo; + uint32_t *cpu_addr; + uint64_t gpu_addr; +}; + /** * struct amdgpu_virt_ops - amdgpu device virt operations */ @@ -46,10 +52,12 @@ struct amdgpu_virt { uint64_t csa_vmid0_addr; bool chained_ib_support; uint32_t reg_val_offs; - struct mutex lock; + struct mutex lock_kiq; + struct mutex lock_reset; struct amdgpu_irq_src ack_irq; struct amdgpu_irq_src rcv_irq; - struct delayed_work flr_work; + struct work_struct flr_work; + struct amdgpu_mm_table mm_table; const struct amdgpu_virt_ops *ops; }; @@ -89,5 +97,6 @@ void amdgpu_virt_kiq_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v); int amdgpu_virt_request_full_gpu(struct amdgpu_device *adev, bool init); int amdgpu_virt_release_full_gpu(struct amdgpu_device *adev, bool init); int amdgpu_virt_reset_gpu(struct amdgpu_device *adev); +int amdgpu_sriov_gpu_reset(struct amdgpu_device *adev, bool voluntary); #endif diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c index bd0d33125c18..7ed5302b511a 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c @@ -26,6 +26,7 @@ * Jerome Glisse */ #include <linux/dma-fence-array.h> +#include <linux/interval_tree_generic.h> #include <drm/drmP.h> #include <drm/amdgpu_drm.h> #include "amdgpu.h" @@ -51,12 +52,23 @@ * SI supports 16. */ +#define START(node) ((node)->start) +#define LAST(node) ((node)->last) + +INTERVAL_TREE_DEFINE(struct amdgpu_bo_va_mapping, rb, uint64_t, __subtree_last, + START, LAST, static, amdgpu_vm_it) + +#undef START +#undef LAST + /* Local structure. Encapsulate some VM table update parameters to reduce * the number of function parameters */ struct amdgpu_pte_update_params { /* amdgpu device we do this update for */ struct amdgpu_device *adev; + /* optional amdgpu_vm we do this update for */ + struct amdgpu_vm *vm; /* address where to copy page table entries from */ uint64_t src; /* indirect buffer to fill with commands */ @@ -64,33 +76,50 @@ struct amdgpu_pte_update_params { /* Function which actually does the update */ void (*func)(struct amdgpu_pte_update_params *params, uint64_t pe, uint64_t addr, unsigned count, uint32_t incr, - uint32_t flags); + uint64_t flags); /* indicate update pt or its shadow */ bool shadow; }; +/* Helper to disable partial resident texture feature from a fence callback */ +struct amdgpu_prt_cb { + struct amdgpu_device *adev; + struct dma_fence_cb cb; +}; + /** - * amdgpu_vm_num_pde - return the number of page directory entries + * amdgpu_vm_num_entries - return the number of entries in a PD/PT * * @adev: amdgpu_device pointer * - * Calculate the number of page directory entries. + * Calculate the number of entries in a page directory or page table. */ -static unsigned amdgpu_vm_num_pdes(struct amdgpu_device *adev) +static unsigned amdgpu_vm_num_entries(struct amdgpu_device *adev, + unsigned level) { - return adev->vm_manager.max_pfn >> amdgpu_vm_block_size; + if (level == 0) + /* For the root directory */ + return adev->vm_manager.max_pfn >> + (adev->vm_manager.block_size * + adev->vm_manager.num_level); + else if (level == adev->vm_manager.num_level) + /* For the page tables on the leaves */ + return AMDGPU_VM_PTE_COUNT(adev); + else + /* Everything in between */ + return 1 << adev->vm_manager.block_size; } /** - * amdgpu_vm_directory_size - returns the size of the page directory in bytes + * amdgpu_vm_bo_size - returns the size of the BOs in bytes * * @adev: amdgpu_device pointer * - * Calculate the size of the page directory in bytes. + * Calculate the size of the BO for a page directory or page table in bytes. */ -static unsigned amdgpu_vm_directory_size(struct amdgpu_device *adev) +static unsigned amdgpu_vm_bo_size(struct amdgpu_device *adev, unsigned level) { - return AMDGPU_GPU_PAGE_ALIGN(amdgpu_vm_num_pdes(adev) * 8); + return AMDGPU_GPU_PAGE_ALIGN(amdgpu_vm_num_entries(adev, level) * 8); } /** @@ -107,15 +136,56 @@ void amdgpu_vm_get_pd_bo(struct amdgpu_vm *vm, struct list_head *validated, struct amdgpu_bo_list_entry *entry) { - entry->robj = vm->page_directory; + entry->robj = vm->root.bo; entry->priority = 0; - entry->tv.bo = &vm->page_directory->tbo; + entry->tv.bo = &entry->robj->tbo; entry->tv.shared = true; entry->user_pages = NULL; list_add(&entry->tv.head, validated); } /** + * amdgpu_vm_validate_layer - validate a single page table level + * + * @parent: parent page table level + * @validate: callback to do the validation + * @param: parameter for the validation callback + * + * Validate the page table BOs on command submission if neccessary. + */ +static int amdgpu_vm_validate_level(struct amdgpu_vm_pt *parent, + int (*validate)(void *, struct amdgpu_bo *), + void *param) +{ + unsigned i; + int r; + + if (!parent->entries) + return 0; + + for (i = 0; i <= parent->last_entry_used; ++i) { + struct amdgpu_vm_pt *entry = &parent->entries[i]; + + if (!entry->bo) + continue; + + r = validate(param, entry->bo); + if (r) + return r; + + /* + * Recurse into the sub directory. This is harmless because we + * have only a maximum of 5 layers. + */ + r = amdgpu_vm_validate_level(entry, validate, param); + if (r) + return r; + } + + return r; +} + +/** * amdgpu_vm_validate_pt_bos - validate the page table BOs * * @adev: amdgpu device pointer @@ -130,8 +200,6 @@ int amdgpu_vm_validate_pt_bos(struct amdgpu_device *adev, struct amdgpu_vm *vm, void *param) { uint64_t num_evictions; - unsigned i; - int r; /* We only need to validate the page tables * if they aren't already valid. @@ -140,19 +208,33 @@ int amdgpu_vm_validate_pt_bos(struct amdgpu_device *adev, struct amdgpu_vm *vm, if (num_evictions == vm->last_eviction_counter) return 0; - /* add the vm page table to the list */ - for (i = 0; i <= vm->max_pde_used; ++i) { - struct amdgpu_bo *bo = vm->page_tables[i].bo; + return amdgpu_vm_validate_level(&vm->root, validate, param); +} + +/** + * amdgpu_vm_move_level_in_lru - move one level of PT BOs to the LRU tail + * + * @adev: amdgpu device instance + * @vm: vm providing the BOs + * + * Move the PT BOs to the tail of the LRU. + */ +static void amdgpu_vm_move_level_in_lru(struct amdgpu_vm_pt *parent) +{ + unsigned i; + + if (!parent->entries) + return; + + for (i = 0; i <= parent->last_entry_used; ++i) { + struct amdgpu_vm_pt *entry = &parent->entries[i]; - if (!bo) + if (!entry->bo) continue; - r = validate(param, bo); - if (r) - return r; + ttm_bo_move_to_lru_tail(&entry->bo->tbo); + amdgpu_vm_move_level_in_lru(entry); } - - return 0; } /** @@ -167,25 +249,146 @@ void amdgpu_vm_move_pt_bos_in_lru(struct amdgpu_device *adev, struct amdgpu_vm *vm) { struct ttm_bo_global *glob = adev->mman.bdev.glob; - unsigned i; spin_lock(&glob->lru_lock); - for (i = 0; i <= vm->max_pde_used; ++i) { - struct amdgpu_bo *bo = vm->page_tables[i].bo; + amdgpu_vm_move_level_in_lru(&vm->root); + spin_unlock(&glob->lru_lock); +} - if (!bo) - continue; + /** + * amdgpu_vm_alloc_levels - allocate the PD/PT levels + * + * @adev: amdgpu_device pointer + * @vm: requested vm + * @saddr: start of the address range + * @eaddr: end of the address range + * + * Make sure the page directories and page tables are allocated + */ +static int amdgpu_vm_alloc_levels(struct amdgpu_device *adev, + struct amdgpu_vm *vm, + struct amdgpu_vm_pt *parent, + uint64_t saddr, uint64_t eaddr, + unsigned level) +{ + unsigned shift = (adev->vm_manager.num_level - level) * + adev->vm_manager.block_size; + unsigned pt_idx, from, to; + int r; + + if (!parent->entries) { + unsigned num_entries = amdgpu_vm_num_entries(adev, level); - ttm_bo_move_to_lru_tail(&bo->tbo); + parent->entries = drm_calloc_large(num_entries, + sizeof(struct amdgpu_vm_pt)); + if (!parent->entries) + return -ENOMEM; + memset(parent->entries, 0 , sizeof(struct amdgpu_vm_pt)); } - spin_unlock(&glob->lru_lock); + + from = saddr >> shift; + to = eaddr >> shift; + if (from >= amdgpu_vm_num_entries(adev, level) || + to >= amdgpu_vm_num_entries(adev, level)) + return -EINVAL; + + if (to > parent->last_entry_used) + parent->last_entry_used = to; + + ++level; + saddr = saddr & ((1 << shift) - 1); + eaddr = eaddr & ((1 << shift) - 1); + + /* walk over the address space and allocate the page tables */ + for (pt_idx = from; pt_idx <= to; ++pt_idx) { + struct reservation_object *resv = vm->root.bo->tbo.resv; + struct amdgpu_vm_pt *entry = &parent->entries[pt_idx]; + struct amdgpu_bo *pt; + + if (!entry->bo) { + r = amdgpu_bo_create(adev, + amdgpu_vm_bo_size(adev, level), + AMDGPU_GPU_PAGE_SIZE, true, + AMDGPU_GEM_DOMAIN_VRAM, + AMDGPU_GEM_CREATE_NO_CPU_ACCESS | + AMDGPU_GEM_CREATE_SHADOW | + AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS | + AMDGPU_GEM_CREATE_VRAM_CLEARED, + NULL, resv, &pt); + if (r) + return r; + + /* Keep a reference to the root directory to avoid + * freeing them up in the wrong order. + */ + pt->parent = amdgpu_bo_ref(vm->root.bo); + + entry->bo = pt; + entry->addr = 0; + } + + if (level < adev->vm_manager.num_level) { + uint64_t sub_saddr = (pt_idx == from) ? saddr : 0; + uint64_t sub_eaddr = (pt_idx == to) ? eaddr : + ((1 << shift) - 1); + r = amdgpu_vm_alloc_levels(adev, vm, entry, sub_saddr, + sub_eaddr, level); + if (r) + return r; + } + } + + return 0; +} + +/** + * amdgpu_vm_alloc_pts - Allocate page tables. + * + * @adev: amdgpu_device pointer + * @vm: VM to allocate page tables for + * @saddr: Start address which needs to be allocated + * @size: Size from start address we need. + * + * Make sure the page tables are allocated. + */ +int amdgpu_vm_alloc_pts(struct amdgpu_device *adev, + struct amdgpu_vm *vm, + uint64_t saddr, uint64_t size) +{ + uint64_t last_pfn; + uint64_t eaddr; + + /* validate the parameters */ + if (saddr & AMDGPU_GPU_PAGE_MASK || size & AMDGPU_GPU_PAGE_MASK) + return -EINVAL; + + eaddr = saddr + size - 1; + last_pfn = eaddr / AMDGPU_GPU_PAGE_SIZE; + if (last_pfn >= adev->vm_manager.max_pfn) { + dev_err(adev->dev, "va above limit (0x%08llX >= 0x%08llX)\n", + last_pfn, adev->vm_manager.max_pfn); + return -EINVAL; + } + + saddr /= AMDGPU_GPU_PAGE_SIZE; + eaddr /= AMDGPU_GPU_PAGE_SIZE; + + return amdgpu_vm_alloc_levels(adev, vm, &vm->root, saddr, eaddr, 0); } -static bool amdgpu_vm_is_gpu_reset(struct amdgpu_device *adev, - struct amdgpu_vm_id *id) +/** + * amdgpu_vm_had_gpu_reset - check if reset occured since last use + * + * @adev: amdgpu_device pointer + * @id: VMID structure + * + * Check if GPU reset occured since last use of the VMID. + */ +static bool amdgpu_vm_had_gpu_reset(struct amdgpu_device *adev, + struct amdgpu_vm_id *id) { return id->current_gpu_reset_count != - atomic_read(&adev->gpu_reset_counter) ? true : false; + atomic_read(&adev->gpu_reset_counter); } /** @@ -271,7 +474,7 @@ int amdgpu_vm_grab_id(struct amdgpu_vm *vm, struct amdgpu_ring *ring, /* Check all the prerequisites to using this VMID */ if (!id) continue; - if (amdgpu_vm_is_gpu_reset(adev, id)) + if (amdgpu_vm_had_gpu_reset(adev, id)) continue; if (atomic64_read(&id->owner) != vm->client_id) @@ -299,7 +502,6 @@ int amdgpu_vm_grab_id(struct amdgpu_vm *vm, struct amdgpu_ring *ring, if (r) goto error; - id->current_gpu_reset_count = atomic_read(&adev->gpu_reset_counter); list_move_tail(&id->list, &adev->vm_manager.ids_lru); vm->ids[ring->idx] = id; @@ -320,9 +522,6 @@ int amdgpu_vm_grab_id(struct amdgpu_vm *vm, struct amdgpu_ring *ring, if (r) goto error; - dma_fence_put(id->first); - id->first = dma_fence_get(fence); - dma_fence_put(id->last_flush); id->last_flush = NULL; @@ -369,6 +568,16 @@ static bool amdgpu_vm_ring_has_compute_vm_bug(struct amdgpu_ring *ring) return false; } +static u64 amdgpu_vm_adjust_mc_addr(struct amdgpu_device *adev, u64 mc_addr) +{ + u64 addr = mc_addr; + + if (adev->gart.gart_funcs->adjust_mc_addr) + addr = adev->gart.gart_funcs->adjust_mc_addr(adev, addr); + + return addr; +} + /** * amdgpu_vm_flush - hardware flush the vm * @@ -389,19 +598,31 @@ int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job) id->gws_size != job->gws_size || id->oa_base != job->oa_base || id->oa_size != job->oa_size); + bool vm_flush_needed = job->vm_needs_flush || + amdgpu_vm_ring_has_compute_vm_bug(ring); + unsigned patch_offset = 0; int r; - if (ring->funcs->emit_pipeline_sync && ( - job->vm_needs_flush || gds_switch_needed || - amdgpu_vm_ring_has_compute_vm_bug(ring))) + if (amdgpu_vm_had_gpu_reset(adev, id)) { + gds_switch_needed = true; + vm_flush_needed = true; + } + + if (!vm_flush_needed && !gds_switch_needed) + return 0; + + if (ring->funcs->init_cond_exec) + patch_offset = amdgpu_ring_init_cond_exec(ring); + + if (ring->funcs->emit_pipeline_sync) amdgpu_ring_emit_pipeline_sync(ring); - if (ring->funcs->emit_vm_flush && (job->vm_needs_flush || - amdgpu_vm_is_gpu_reset(adev, id))) { + if (ring->funcs->emit_vm_flush && vm_flush_needed) { + u64 pd_addr = amdgpu_vm_adjust_mc_addr(adev, job->vm_pd_addr); struct dma_fence *fence; - trace_amdgpu_vm_flush(job->vm_pd_addr, ring->idx, job->vm_id); - amdgpu_ring_emit_vm_flush(ring, job->vm_id, job->vm_pd_addr); + trace_amdgpu_vm_flush(pd_addr, ring->idx, job->vm_id); + amdgpu_ring_emit_vm_flush(ring, job->vm_id, pd_addr); r = amdgpu_fence_emit(ring, &fence); if (r) @@ -420,12 +641,20 @@ int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job) id->gws_size = job->gws_size; id->oa_base = job->oa_base; id->oa_size = job->oa_size; - amdgpu_ring_emit_gds_switch(ring, job->vm_id, - job->gds_base, job->gds_size, - job->gws_base, job->gws_size, - job->oa_base, job->oa_size); + amdgpu_ring_emit_gds_switch(ring, job->vm_id, job->gds_base, + job->gds_size, job->gws_base, + job->gws_size, job->oa_base, + job->oa_size); } + if (ring->funcs->patch_cond_exec) + amdgpu_ring_patch_cond_exec(ring, patch_offset); + + /* the double SWITCH_BUFFER here *cannot* be skipped by COND_EXEC */ + if (ring->funcs->emit_switch_buffer) { + amdgpu_ring_emit_switch_buffer(ring); + amdgpu_ring_emit_switch_buffer(ring); + } return 0; } @@ -490,7 +719,7 @@ struct amdgpu_bo_va *amdgpu_vm_bo_find(struct amdgpu_vm *vm, static void amdgpu_vm_do_set_ptes(struct amdgpu_pte_update_params *params, uint64_t pe, uint64_t addr, unsigned count, uint32_t incr, - uint32_t flags) + uint64_t flags) { trace_amdgpu_vm_set_ptes(pe, addr, count, incr, flags); @@ -519,7 +748,7 @@ static void amdgpu_vm_do_set_ptes(struct amdgpu_pte_update_params *params, static void amdgpu_vm_do_copy_ptes(struct amdgpu_pte_update_params *params, uint64_t pe, uint64_t addr, unsigned count, uint32_t incr, - uint32_t flags) + uint64_t flags) { uint64_t src = (params->src + (addr >> 12) * 8); @@ -554,24 +783,24 @@ static uint64_t amdgpu_vm_map_gart(const dma_addr_t *pages_addr, uint64_t addr) } /* - * amdgpu_vm_update_pdes - make sure that page directory is valid + * amdgpu_vm_update_level - update a single level in the hierarchy * * @adev: amdgpu_device pointer * @vm: requested vm - * @start: start of GPU address range - * @end: end of GPU address range + * @parent: parent directory * - * Allocates new page tables if necessary - * and updates the page directory. + * Makes sure all entries in @parent are up to date. * Returns 0 for success, error for failure. */ -int amdgpu_vm_update_page_directory(struct amdgpu_device *adev, - struct amdgpu_vm *vm) +static int amdgpu_vm_update_level(struct amdgpu_device *adev, + struct amdgpu_vm *vm, + struct amdgpu_vm_pt *parent, + unsigned level) { struct amdgpu_bo *shadow; struct amdgpu_ring *ring; uint64_t pd_addr, shadow_addr; - uint32_t incr = AMDGPU_VM_PTE_COUNT * 8; + uint32_t incr = amdgpu_vm_bo_size(adev, level + 1); uint64_t last_pde = ~0, last_pt = ~0, last_shadow = ~0; unsigned count = 0, pt_idx, ndw; struct amdgpu_job *job; @@ -580,16 +809,19 @@ int amdgpu_vm_update_page_directory(struct amdgpu_device *adev, int r; + if (!parent->entries) + return 0; ring = container_of(vm->entity.sched, struct amdgpu_ring, sched); - shadow = vm->page_directory->shadow; /* padding, etc. */ ndw = 64; /* assume the worst case */ - ndw += vm->max_pde_used * 6; + ndw += parent->last_entry_used * 6; - pd_addr = amdgpu_bo_gpu_offset(vm->page_directory); + pd_addr = amdgpu_bo_gpu_offset(parent->bo); + + shadow = parent->bo->shadow; if (shadow) { r = amdgpu_ttm_bind(&shadow->tbo, &shadow->tbo.mem); if (r) @@ -608,9 +840,9 @@ int amdgpu_vm_update_page_directory(struct amdgpu_device *adev, params.adev = adev; params.ib = &job->ibs[0]; - /* walk over the address space and update the page directory */ - for (pt_idx = 0; pt_idx <= vm->max_pde_used; ++pt_idx) { - struct amdgpu_bo *bo = vm->page_tables[pt_idx].bo; + /* walk over the address space and update the directory */ + for (pt_idx = 0; pt_idx <= parent->last_entry_used; ++pt_idx) { + struct amdgpu_bo *bo = parent->entries[pt_idx].bo; uint64_t pde, pt; if (bo == NULL) @@ -626,10 +858,10 @@ int amdgpu_vm_update_page_directory(struct amdgpu_device *adev, } pt = amdgpu_bo_gpu_offset(bo); - if (vm->page_tables[pt_idx].addr == pt) + if (parent->entries[pt_idx].addr == pt) continue; - vm->page_tables[pt_idx].addr = pt; + parent->entries[pt_idx].addr = pt; pde = pd_addr + pt_idx * 8; if (((last_pde + 8 * count) != pde) || @@ -637,15 +869,18 @@ int amdgpu_vm_update_page_directory(struct amdgpu_device *adev, (count == AMDGPU_VM_MAX_UPDATE_SIZE)) { if (count) { + uint64_t pt_addr = + amdgpu_vm_adjust_mc_addr(adev, last_pt); + if (shadow) amdgpu_vm_do_set_ptes(¶ms, last_shadow, - last_pt, count, + pt_addr, count, incr, AMDGPU_PTE_VALID); amdgpu_vm_do_set_ptes(¶ms, last_pde, - last_pt, count, incr, + pt_addr, count, incr, AMDGPU_PTE_VALID); } @@ -659,36 +894,51 @@ int amdgpu_vm_update_page_directory(struct amdgpu_device *adev, } if (count) { - if (vm->page_directory->shadow) - amdgpu_vm_do_set_ptes(¶ms, last_shadow, last_pt, + uint64_t pt_addr = amdgpu_vm_adjust_mc_addr(adev, last_pt); + + if (vm->root.bo->shadow) + amdgpu_vm_do_set_ptes(¶ms, last_shadow, pt_addr, count, incr, AMDGPU_PTE_VALID); - amdgpu_vm_do_set_ptes(¶ms, last_pde, last_pt, + amdgpu_vm_do_set_ptes(¶ms, last_pde, pt_addr, count, incr, AMDGPU_PTE_VALID); } if (params.ib->length_dw == 0) { amdgpu_job_free(job); - return 0; - } - - amdgpu_ring_pad_ib(ring, params.ib); - amdgpu_sync_resv(adev, &job->sync, vm->page_directory->tbo.resv, - AMDGPU_FENCE_OWNER_VM); - if (shadow) - amdgpu_sync_resv(adev, &job->sync, shadow->tbo.resv, + } else { + amdgpu_ring_pad_ib(ring, params.ib); + amdgpu_sync_resv(adev, &job->sync, parent->bo->tbo.resv, AMDGPU_FENCE_OWNER_VM); + if (shadow) + amdgpu_sync_resv(adev, &job->sync, shadow->tbo.resv, + AMDGPU_FENCE_OWNER_VM); - WARN_ON(params.ib->length_dw > ndw); - r = amdgpu_job_submit(job, ring, &vm->entity, - AMDGPU_FENCE_OWNER_VM, &fence); - if (r) - goto error_free; + WARN_ON(params.ib->length_dw > ndw); + r = amdgpu_job_submit(job, ring, &vm->entity, + AMDGPU_FENCE_OWNER_VM, &fence); + if (r) + goto error_free; - amdgpu_bo_fence(vm->page_directory, fence, true); - dma_fence_put(vm->page_directory_fence); - vm->page_directory_fence = dma_fence_get(fence); - dma_fence_put(fence); + amdgpu_bo_fence(parent->bo, fence, true); + dma_fence_put(vm->last_dir_update); + vm->last_dir_update = dma_fence_get(fence); + dma_fence_put(fence); + } + /* + * Recurse into the subdirectories. This recursion is harmless because + * we only have a maximum of 5 layers. + */ + for (pt_idx = 0; pt_idx <= parent->last_entry_used; ++pt_idx) { + struct amdgpu_vm_pt *entry = &parent->entries[pt_idx]; + + if (!entry->bo) + continue; + + r = amdgpu_vm_update_level(adev, vm, entry, level + 1); + if (r) + return r; + } return 0; @@ -697,6 +947,47 @@ error_free: return r; } +/* + * amdgpu_vm_update_directories - make sure that all directories are valid + * + * @adev: amdgpu_device pointer + * @vm: requested vm + * + * Makes sure all directories are up to date. + * Returns 0 for success, error for failure. + */ +int amdgpu_vm_update_directories(struct amdgpu_device *adev, + struct amdgpu_vm *vm) +{ + return amdgpu_vm_update_level(adev, vm, &vm->root, 0); +} + +/** + * amdgpu_vm_find_pt - find the page table for an address + * + * @p: see amdgpu_pte_update_params definition + * @addr: virtual address in question + * + * Find the page table BO for a virtual address, return NULL when none found. + */ +static struct amdgpu_bo *amdgpu_vm_get_pt(struct amdgpu_pte_update_params *p, + uint64_t addr) +{ + struct amdgpu_vm_pt *entry = &p->vm->root; + unsigned idx, level = p->adev->vm_manager.num_level; + + while (entry->entries) { + idx = addr >> (p->adev->vm_manager.block_size * level--); + idx %= amdgpu_bo_size(entry->bo) / 8; + entry = &entry->entries[idx]; + } + + if (level) + return NULL; + + return entry->bo; +} + /** * amdgpu_vm_update_ptes - make sure that page tables are valid * @@ -710,23 +1001,26 @@ error_free: * Update the page tables in the range @start - @end. */ static void amdgpu_vm_update_ptes(struct amdgpu_pte_update_params *params, - struct amdgpu_vm *vm, uint64_t start, uint64_t end, - uint64_t dst, uint32_t flags) + uint64_t dst, uint64_t flags) { - const uint64_t mask = AMDGPU_VM_PTE_COUNT - 1; + struct amdgpu_device *adev = params->adev; + const uint64_t mask = AMDGPU_VM_PTE_COUNT(adev) - 1; uint64_t cur_pe_start, cur_nptes, cur_dst; uint64_t addr; /* next GPU address to be updated */ - uint64_t pt_idx; struct amdgpu_bo *pt; unsigned nptes; /* next number of ptes to be updated */ uint64_t next_pe_start; /* initialize the variables */ addr = start; - pt_idx = addr >> amdgpu_vm_block_size; - pt = vm->page_tables[pt_idx].bo; + pt = amdgpu_vm_get_pt(params, addr); + if (!pt) { + pr_err("PT not found, aborting update_ptes\n"); + return; + } + if (params->shadow) { if (!pt->shadow) return; @@ -735,7 +1029,7 @@ static void amdgpu_vm_update_ptes(struct amdgpu_pte_update_params *params, if ((addr & ~mask) == (end & ~mask)) nptes = end - addr; else - nptes = AMDGPU_VM_PTE_COUNT - (addr & mask); + nptes = AMDGPU_VM_PTE_COUNT(adev) - (addr & mask); cur_pe_start = amdgpu_bo_gpu_offset(pt); cur_pe_start += (addr & mask) * 8; @@ -748,8 +1042,12 @@ static void amdgpu_vm_update_ptes(struct amdgpu_pte_update_params *params, /* walk over the address space and update the page tables */ while (addr < end) { - pt_idx = addr >> amdgpu_vm_block_size; - pt = vm->page_tables[pt_idx].bo; + pt = amdgpu_vm_get_pt(params, addr); + if (!pt) { + pr_err("PT not found, aborting update_ptes\n"); + return; + } + if (params->shadow) { if (!pt->shadow) return; @@ -759,7 +1057,7 @@ static void amdgpu_vm_update_ptes(struct amdgpu_pte_update_params *params, if ((addr & ~mask) == (end & ~mask)) nptes = end - addr; else - nptes = AMDGPU_VM_PTE_COUNT - (addr & mask); + nptes = AMDGPU_VM_PTE_COUNT(adev) - (addr & mask); next_pe_start = amdgpu_bo_gpu_offset(pt); next_pe_start += (addr & mask) * 8; @@ -800,9 +1098,8 @@ static void amdgpu_vm_update_ptes(struct amdgpu_pte_update_params *params, * @flags: hw mapping flags */ static void amdgpu_vm_frag_ptes(struct amdgpu_pte_update_params *params, - struct amdgpu_vm *vm, uint64_t start, uint64_t end, - uint64_t dst, uint32_t flags) + uint64_t dst, uint64_t flags) { /** * The MC L1 TLB supports variable sized pages, based on a fragment @@ -834,25 +1131,25 @@ static void amdgpu_vm_frag_ptes(struct amdgpu_pte_update_params *params, if (params->src || !(flags & AMDGPU_PTE_VALID) || (frag_start >= frag_end)) { - amdgpu_vm_update_ptes(params, vm, start, end, dst, flags); + amdgpu_vm_update_ptes(params, start, end, dst, flags); return; } /* handle the 4K area at the beginning */ if (start != frag_start) { - amdgpu_vm_update_ptes(params, vm, start, frag_start, + amdgpu_vm_update_ptes(params, start, frag_start, dst, flags); dst += (frag_start - start) * AMDGPU_GPU_PAGE_SIZE; } /* handle the area in the middle */ - amdgpu_vm_update_ptes(params, vm, frag_start, frag_end, dst, + amdgpu_vm_update_ptes(params, frag_start, frag_end, dst, flags | frag_flags); /* handle the 4K area at the end */ if (frag_end != end) { dst += (frag_end - frag_start) * AMDGPU_GPU_PAGE_SIZE; - amdgpu_vm_update_ptes(params, vm, frag_end, end, dst, flags); + amdgpu_vm_update_ptes(params, frag_end, end, dst, flags); } } @@ -879,7 +1176,7 @@ static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev, dma_addr_t *pages_addr, struct amdgpu_vm *vm, uint64_t start, uint64_t last, - uint32_t flags, uint64_t addr, + uint64_t flags, uint64_t addr, struct dma_fence **fence) { struct amdgpu_ring *ring; @@ -892,14 +1189,11 @@ static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev, memset(¶ms, 0, sizeof(params)); params.adev = adev; + params.vm = vm; params.src = src; ring = container_of(vm->entity.sched, struct amdgpu_ring, sched); - memset(¶ms, 0, sizeof(params)); - params.adev = adev; - params.src = src; - /* sync to everything on unmapping */ if (!(flags & AMDGPU_PTE_VALID)) owner = AMDGPU_FENCE_OWNER_UNDEFINED; @@ -910,7 +1204,7 @@ static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev, * reserve space for one command every (1 << BLOCK_SIZE) * entries or 2k dwords (whatever is smaller) */ - ncmds = (nptes >> min(amdgpu_vm_block_size, 11)) + 1; + ncmds = (nptes >> min(adev->vm_manager.block_size, 11u)) + 1; /* padding, etc. */ ndw = 64; @@ -967,19 +1261,19 @@ static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev, if (r) goto error_free; - r = amdgpu_sync_resv(adev, &job->sync, vm->page_directory->tbo.resv, + r = amdgpu_sync_resv(adev, &job->sync, vm->root.bo->tbo.resv, owner); if (r) goto error_free; - r = reservation_object_reserve_shared(vm->page_directory->tbo.resv); + r = reservation_object_reserve_shared(vm->root.bo->tbo.resv); if (r) goto error_free; params.shadow = true; - amdgpu_vm_frag_ptes(¶ms, vm, start, last + 1, addr, flags); + amdgpu_vm_frag_ptes(¶ms, start, last + 1, addr, flags); params.shadow = false; - amdgpu_vm_frag_ptes(¶ms, vm, start, last + 1, addr, flags); + amdgpu_vm_frag_ptes(¶ms, start, last + 1, addr, flags); amdgpu_ring_pad_ib(ring, params.ib); WARN_ON(params.ib->length_dw > ndw); @@ -988,12 +1282,9 @@ static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev, if (r) goto error_free; - amdgpu_bo_fence(vm->page_directory, f, true); - if (fence) { - dma_fence_put(*fence); - *fence = dma_fence_get(f); - } - dma_fence_put(f); + amdgpu_bo_fence(vm->root.bo, f, true); + dma_fence_put(*fence); + *fence = f; return 0; error_free: @@ -1020,15 +1311,15 @@ error_free: */ static int amdgpu_vm_bo_split_mapping(struct amdgpu_device *adev, struct dma_fence *exclusive, - uint32_t gtt_flags, + uint64_t gtt_flags, dma_addr_t *pages_addr, struct amdgpu_vm *vm, struct amdgpu_bo_va_mapping *mapping, - uint32_t flags, + uint64_t flags, struct drm_mm_node *nodes, struct dma_fence **fence) { - uint64_t pfn, src = 0, start = mapping->it.start; + uint64_t pfn, src = 0, start = mapping->start; int r; /* normally,bo_va->flags only contians READABLE and WIRTEABLE bit go here @@ -1039,6 +1330,12 @@ static int amdgpu_vm_bo_split_mapping(struct amdgpu_device *adev, if (!(mapping->flags & AMDGPU_PTE_WRITEABLE)) flags &= ~AMDGPU_PTE_WRITEABLE; + flags &= ~AMDGPU_PTE_EXECUTABLE; + flags |= mapping->flags & AMDGPU_PTE_EXECUTABLE; + + flags &= ~AMDGPU_PTE_MTYPE_MASK; + flags |= (mapping->flags & AMDGPU_PTE_MTYPE_MASK); + trace_amdgpu_vm_bo_update(mapping); pfn = mapping->offset >> PAGE_SHIFT; @@ -1074,7 +1371,7 @@ static int amdgpu_vm_bo_split_mapping(struct amdgpu_device *adev, } addr += pfn << PAGE_SHIFT; - last = min((uint64_t)mapping->it.last, start + max_entries - 1); + last = min((uint64_t)mapping->last, start + max_entries - 1); r = amdgpu_vm_bo_update_mapping(adev, exclusive, src, pages_addr, vm, start, last, flags, addr, @@ -1089,7 +1386,7 @@ static int amdgpu_vm_bo_split_mapping(struct amdgpu_device *adev, } start = last + 1; - } while (unlikely(start != mapping->it.last + 1)); + } while (unlikely(start != mapping->last + 1)); return 0; } @@ -1111,13 +1408,13 @@ int amdgpu_vm_bo_update(struct amdgpu_device *adev, struct amdgpu_vm *vm = bo_va->vm; struct amdgpu_bo_va_mapping *mapping; dma_addr_t *pages_addr = NULL; - uint32_t gtt_flags, flags; + uint64_t gtt_flags, flags; struct ttm_mem_reg *mem; struct drm_mm_node *nodes; struct dma_fence *exclusive; int r; - if (clear) { + if (clear || !bo_va->bo) { mem = NULL; nodes = NULL; exclusive = NULL; @@ -1134,9 +1431,15 @@ int amdgpu_vm_bo_update(struct amdgpu_device *adev, exclusive = reservation_object_get_excl(bo_va->bo->tbo.resv); } - flags = amdgpu_ttm_tt_pte_flags(adev, bo_va->bo->tbo.ttm, mem); - gtt_flags = (amdgpu_ttm_is_bound(bo_va->bo->tbo.ttm) && - adev == amdgpu_ttm_adev(bo_va->bo->tbo.bdev)) ? flags : 0; + if (bo_va->bo) { + flags = amdgpu_ttm_tt_pte_flags(adev, bo_va->bo->tbo.ttm, mem); + gtt_flags = (amdgpu_ttm_is_bound(bo_va->bo->tbo.ttm) && + adev == amdgpu_ttm_adev(bo_va->bo->tbo.bdev)) ? + flags : 0; + } else { + flags = 0x0; + gtt_flags = ~0x0; + } spin_lock(&vm->status_lock); if (!list_empty(&bo_va->vm_status)) @@ -1171,10 +1474,142 @@ int amdgpu_vm_bo_update(struct amdgpu_device *adev, } /** + * amdgpu_vm_update_prt_state - update the global PRT state + */ +static void amdgpu_vm_update_prt_state(struct amdgpu_device *adev) +{ + unsigned long flags; + bool enable; + + spin_lock_irqsave(&adev->vm_manager.prt_lock, flags); + enable = !!atomic_read(&adev->vm_manager.num_prt_users); + adev->gart.gart_funcs->set_prt(adev, enable); + spin_unlock_irqrestore(&adev->vm_manager.prt_lock, flags); +} + +/** + * amdgpu_vm_prt_get - add a PRT user + */ +static void amdgpu_vm_prt_get(struct amdgpu_device *adev) +{ + if (!adev->gart.gart_funcs->set_prt) + return; + + if (atomic_inc_return(&adev->vm_manager.num_prt_users) == 1) + amdgpu_vm_update_prt_state(adev); +} + +/** + * amdgpu_vm_prt_put - drop a PRT user + */ +static void amdgpu_vm_prt_put(struct amdgpu_device *adev) +{ + if (atomic_dec_return(&adev->vm_manager.num_prt_users) == 0) + amdgpu_vm_update_prt_state(adev); +} + +/** + * amdgpu_vm_prt_cb - callback for updating the PRT status + */ +static void amdgpu_vm_prt_cb(struct dma_fence *fence, struct dma_fence_cb *_cb) +{ + struct amdgpu_prt_cb *cb = container_of(_cb, struct amdgpu_prt_cb, cb); + + amdgpu_vm_prt_put(cb->adev); + kfree(cb); +} + +/** + * amdgpu_vm_add_prt_cb - add callback for updating the PRT status + */ +static void amdgpu_vm_add_prt_cb(struct amdgpu_device *adev, + struct dma_fence *fence) +{ + struct amdgpu_prt_cb *cb; + + if (!adev->gart.gart_funcs->set_prt) + return; + + cb = kmalloc(sizeof(struct amdgpu_prt_cb), GFP_KERNEL); + if (!cb) { + /* Last resort when we are OOM */ + if (fence) + dma_fence_wait(fence, false); + + amdgpu_vm_prt_put(adev); + } else { + cb->adev = adev; + if (!fence || dma_fence_add_callback(fence, &cb->cb, + amdgpu_vm_prt_cb)) + amdgpu_vm_prt_cb(fence, &cb->cb); + } +} + +/** + * amdgpu_vm_free_mapping - free a mapping + * + * @adev: amdgpu_device pointer + * @vm: requested vm + * @mapping: mapping to be freed + * @fence: fence of the unmap operation + * + * Free a mapping and make sure we decrease the PRT usage count if applicable. + */ +static void amdgpu_vm_free_mapping(struct amdgpu_device *adev, + struct amdgpu_vm *vm, + struct amdgpu_bo_va_mapping *mapping, + struct dma_fence *fence) +{ + if (mapping->flags & AMDGPU_PTE_PRT) + amdgpu_vm_add_prt_cb(adev, fence); + kfree(mapping); +} + +/** + * amdgpu_vm_prt_fini - finish all prt mappings + * + * @adev: amdgpu_device pointer + * @vm: requested vm + * + * Register a cleanup callback to disable PRT support after VM dies. + */ +static void amdgpu_vm_prt_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm) +{ + struct reservation_object *resv = vm->root.bo->tbo.resv; + struct dma_fence *excl, **shared; + unsigned i, shared_count; + int r; + + r = reservation_object_get_fences_rcu(resv, &excl, + &shared_count, &shared); + if (r) { + /* Not enough memory to grab the fence list, as last resort + * block for all the fences to complete. + */ + reservation_object_wait_timeout_rcu(resv, true, false, + MAX_SCHEDULE_TIMEOUT); + return; + } + + /* Add a callback for each fence in the reservation object */ + amdgpu_vm_prt_get(adev); + amdgpu_vm_add_prt_cb(adev, excl); + + for (i = 0; i < shared_count; ++i) { + amdgpu_vm_prt_get(adev); + amdgpu_vm_add_prt_cb(adev, shared[i]); + } + + kfree(shared); +} + +/** * amdgpu_vm_clear_freed - clear freed BOs in the PT * * @adev: amdgpu_device pointer * @vm: requested vm + * @fence: optional resulting fence (unchanged if no work needed to be done + * or if an error occurred) * * Make sure all freed BOs are cleared in the PT. * Returns 0 for success. @@ -1182,9 +1617,11 @@ int amdgpu_vm_bo_update(struct amdgpu_device *adev, * PTs have to be reserved and mutex must be locked! */ int amdgpu_vm_clear_freed(struct amdgpu_device *adev, - struct amdgpu_vm *vm) + struct amdgpu_vm *vm, + struct dma_fence **fence) { struct amdgpu_bo_va_mapping *mapping; + struct dma_fence *f = NULL; int r; while (!list_empty(&vm->freed)) { @@ -1193,12 +1630,21 @@ int amdgpu_vm_clear_freed(struct amdgpu_device *adev, list_del(&mapping->list); r = amdgpu_vm_bo_split_mapping(adev, NULL, 0, NULL, vm, mapping, - 0, 0, NULL); - kfree(mapping); - if (r) + 0, 0, &f); + amdgpu_vm_free_mapping(adev, vm, mapping, f); + if (r) { + dma_fence_put(f); return r; + } + } + if (fence && f) { + dma_fence_put(*fence); + *fence = f; + } else { + dma_fence_put(f); } + return 0; } @@ -1271,7 +1717,8 @@ struct amdgpu_bo_va *amdgpu_vm_bo_add(struct amdgpu_device *adev, INIT_LIST_HEAD(&bo_va->invalids); INIT_LIST_HEAD(&bo_va->vm_status); - list_add_tail(&bo_va->bo_list, &bo->va); + if (bo) + list_add_tail(&bo_va->bo_list, &bo->va); return bo_va; } @@ -1295,12 +1742,9 @@ int amdgpu_vm_bo_map(struct amdgpu_device *adev, uint64_t saddr, uint64_t offset, uint64_t size, uint64_t flags) { - struct amdgpu_bo_va_mapping *mapping; + struct amdgpu_bo_va_mapping *mapping, *tmp; struct amdgpu_vm *vm = bo_va->vm; - struct interval_tree_node *it; - unsigned last_pfn, pt_idx; uint64_t eaddr; - int r; /* validate the parameters */ if (saddr & AMDGPU_GPU_PAGE_MASK || offset & AMDGPU_GPU_PAGE_MASK || @@ -1309,93 +1753,103 @@ int amdgpu_vm_bo_map(struct amdgpu_device *adev, /* make sure object fit at this offset */ eaddr = saddr + size - 1; - if ((saddr >= eaddr) || (offset + size > amdgpu_bo_size(bo_va->bo))) - return -EINVAL; - - last_pfn = eaddr / AMDGPU_GPU_PAGE_SIZE; - if (last_pfn >= adev->vm_manager.max_pfn) { - dev_err(adev->dev, "va above limit (0x%08X >= 0x%08X)\n", - last_pfn, adev->vm_manager.max_pfn); + if (saddr >= eaddr || + (bo_va->bo && offset + size > amdgpu_bo_size(bo_va->bo))) return -EINVAL; - } saddr /= AMDGPU_GPU_PAGE_SIZE; eaddr /= AMDGPU_GPU_PAGE_SIZE; - it = interval_tree_iter_first(&vm->va, saddr, eaddr); - if (it) { - struct amdgpu_bo_va_mapping *tmp; - tmp = container_of(it, struct amdgpu_bo_va_mapping, it); + tmp = amdgpu_vm_it_iter_first(&vm->va, saddr, eaddr); + if (tmp) { /* bo and tmp overlap, invalid addr */ dev_err(adev->dev, "bo %p va 0x%010Lx-0x%010Lx conflict with " - "0x%010lx-0x%010lx\n", bo_va->bo, saddr, eaddr, - tmp->it.start, tmp->it.last + 1); - r = -EINVAL; - goto error; + "0x%010Lx-0x%010Lx\n", bo_va->bo, saddr, eaddr, + tmp->start, tmp->last + 1); + return -EINVAL; } mapping = kmalloc(sizeof(*mapping), GFP_KERNEL); - if (!mapping) { - r = -ENOMEM; - goto error; - } + if (!mapping) + return -ENOMEM; INIT_LIST_HEAD(&mapping->list); - mapping->it.start = saddr; - mapping->it.last = eaddr; + mapping->start = saddr; + mapping->last = eaddr; mapping->offset = offset; mapping->flags = flags; list_add(&mapping->list, &bo_va->invalids); - interval_tree_insert(&mapping->it, &vm->va); - - /* Make sure the page tables are allocated */ - saddr >>= amdgpu_vm_block_size; - eaddr >>= amdgpu_vm_block_size; + amdgpu_vm_it_insert(mapping, &vm->va); - BUG_ON(eaddr >= amdgpu_vm_num_pdes(adev)); + if (flags & AMDGPU_PTE_PRT) + amdgpu_vm_prt_get(adev); - if (eaddr > vm->max_pde_used) - vm->max_pde_used = eaddr; + return 0; +} - /* walk over the address space and allocate the page tables */ - for (pt_idx = saddr; pt_idx <= eaddr; ++pt_idx) { - struct reservation_object *resv = vm->page_directory->tbo.resv; - struct amdgpu_bo *pt; +/** + * amdgpu_vm_bo_replace_map - map bo inside a vm, replacing existing mappings + * + * @adev: amdgpu_device pointer + * @bo_va: bo_va to store the address + * @saddr: where to map the BO + * @offset: requested offset in the BO + * @flags: attributes of pages (read/write/valid/etc.) + * + * Add a mapping of the BO at the specefied addr into the VM. Replace existing + * mappings as we do so. + * Returns 0 for success, error for failure. + * + * Object has to be reserved and unreserved outside! + */ +int amdgpu_vm_bo_replace_map(struct amdgpu_device *adev, + struct amdgpu_bo_va *bo_va, + uint64_t saddr, uint64_t offset, + uint64_t size, uint64_t flags) +{ + struct amdgpu_bo_va_mapping *mapping; + struct amdgpu_vm *vm = bo_va->vm; + uint64_t eaddr; + int r; - if (vm->page_tables[pt_idx].bo) - continue; + /* validate the parameters */ + if (saddr & AMDGPU_GPU_PAGE_MASK || offset & AMDGPU_GPU_PAGE_MASK || + size == 0 || size & AMDGPU_GPU_PAGE_MASK) + return -EINVAL; - r = amdgpu_bo_create(adev, AMDGPU_VM_PTE_COUNT * 8, - AMDGPU_GPU_PAGE_SIZE, true, - AMDGPU_GEM_DOMAIN_VRAM, - AMDGPU_GEM_CREATE_NO_CPU_ACCESS | - AMDGPU_GEM_CREATE_SHADOW | - AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS | - AMDGPU_GEM_CREATE_VRAM_CLEARED, - NULL, resv, &pt); - if (r) - goto error_free; + /* make sure object fit at this offset */ + eaddr = saddr + size - 1; + if (saddr >= eaddr || + (bo_va->bo && offset + size > amdgpu_bo_size(bo_va->bo))) + return -EINVAL; - /* Keep a reference to the page table to avoid freeing - * them up in the wrong order. - */ - pt->parent = amdgpu_bo_ref(vm->page_directory); + /* Allocate all the needed memory */ + mapping = kmalloc(sizeof(*mapping), GFP_KERNEL); + if (!mapping) + return -ENOMEM; - vm->page_tables[pt_idx].bo = pt; - vm->page_tables[pt_idx].addr = 0; + r = amdgpu_vm_bo_clear_mappings(adev, bo_va->vm, saddr, size); + if (r) { + kfree(mapping); + return r; } - return 0; + saddr /= AMDGPU_GPU_PAGE_SIZE; + eaddr /= AMDGPU_GPU_PAGE_SIZE; -error_free: - list_del(&mapping->list); - interval_tree_remove(&mapping->it, &vm->va); - trace_amdgpu_vm_bo_unmap(bo_va, mapping); - kfree(mapping); + mapping->start = saddr; + mapping->last = eaddr; + mapping->offset = offset; + mapping->flags = flags; -error: - return r; + list_add(&mapping->list, &bo_va->invalids); + amdgpu_vm_it_insert(mapping, &vm->va); + + if (flags & AMDGPU_PTE_PRT) + amdgpu_vm_prt_get(adev); + + return 0; } /** @@ -1421,7 +1875,7 @@ int amdgpu_vm_bo_unmap(struct amdgpu_device *adev, saddr /= AMDGPU_GPU_PAGE_SIZE; list_for_each_entry(mapping, &bo_va->valids, list) { - if (mapping->it.start == saddr) + if (mapping->start == saddr) break; } @@ -1429,7 +1883,7 @@ int amdgpu_vm_bo_unmap(struct amdgpu_device *adev, valid = false; list_for_each_entry(mapping, &bo_va->invalids, list) { - if (mapping->it.start == saddr) + if (mapping->start == saddr) break; } @@ -1438,13 +1892,113 @@ int amdgpu_vm_bo_unmap(struct amdgpu_device *adev, } list_del(&mapping->list); - interval_tree_remove(&mapping->it, &vm->va); + amdgpu_vm_it_remove(mapping, &vm->va); trace_amdgpu_vm_bo_unmap(bo_va, mapping); if (valid) list_add(&mapping->list, &vm->freed); else - kfree(mapping); + amdgpu_vm_free_mapping(adev, vm, mapping, + bo_va->last_pt_update); + + return 0; +} + +/** + * amdgpu_vm_bo_clear_mappings - remove all mappings in a specific range + * + * @adev: amdgpu_device pointer + * @vm: VM structure to use + * @saddr: start of the range + * @size: size of the range + * + * Remove all mappings in a range, split them as appropriate. + * Returns 0 for success, error for failure. + */ +int amdgpu_vm_bo_clear_mappings(struct amdgpu_device *adev, + struct amdgpu_vm *vm, + uint64_t saddr, uint64_t size) +{ + struct amdgpu_bo_va_mapping *before, *after, *tmp, *next; + LIST_HEAD(removed); + uint64_t eaddr; + + eaddr = saddr + size - 1; + saddr /= AMDGPU_GPU_PAGE_SIZE; + eaddr /= AMDGPU_GPU_PAGE_SIZE; + + /* Allocate all the needed memory */ + before = kzalloc(sizeof(*before), GFP_KERNEL); + if (!before) + return -ENOMEM; + INIT_LIST_HEAD(&before->list); + + after = kzalloc(sizeof(*after), GFP_KERNEL); + if (!after) { + kfree(before); + return -ENOMEM; + } + INIT_LIST_HEAD(&after->list); + + /* Now gather all removed mappings */ + tmp = amdgpu_vm_it_iter_first(&vm->va, saddr, eaddr); + while (tmp) { + /* Remember mapping split at the start */ + if (tmp->start < saddr) { + before->start = tmp->start; + before->last = saddr - 1; + before->offset = tmp->offset; + before->flags = tmp->flags; + list_add(&before->list, &tmp->list); + } + + /* Remember mapping split at the end */ + if (tmp->last > eaddr) { + after->start = eaddr + 1; + after->last = tmp->last; + after->offset = tmp->offset; + after->offset += after->start - tmp->start; + after->flags = tmp->flags; + list_add(&after->list, &tmp->list); + } + + list_del(&tmp->list); + list_add(&tmp->list, &removed); + + tmp = amdgpu_vm_it_iter_next(tmp, saddr, eaddr); + } + + /* And free them up */ + list_for_each_entry_safe(tmp, next, &removed, list) { + amdgpu_vm_it_remove(tmp, &vm->va); + list_del(&tmp->list); + + if (tmp->start < saddr) + tmp->start = saddr; + if (tmp->last > eaddr) + tmp->last = eaddr; + + list_add(&tmp->list, &vm->freed); + trace_amdgpu_vm_bo_unmap(NULL, tmp); + } + + /* Insert partial mapping before the range */ + if (!list_empty(&before->list)) { + amdgpu_vm_it_insert(before, &vm->va); + if (before->flags & AMDGPU_PTE_PRT) + amdgpu_vm_prt_get(adev); + } else { + kfree(before); + } + + /* Insert partial mapping after the range */ + if (!list_empty(&after->list)) { + amdgpu_vm_it_insert(after, &vm->va); + if (after->flags & AMDGPU_PTE_PRT) + amdgpu_vm_prt_get(adev); + } else { + kfree(after); + } return 0; } @@ -1473,14 +2027,15 @@ void amdgpu_vm_bo_rmv(struct amdgpu_device *adev, list_for_each_entry_safe(mapping, next, &bo_va->valids, list) { list_del(&mapping->list); - interval_tree_remove(&mapping->it, &vm->va); + amdgpu_vm_it_remove(mapping, &vm->va); trace_amdgpu_vm_bo_unmap(bo_va, mapping); list_add(&mapping->list, &vm->freed); } list_for_each_entry_safe(mapping, next, &bo_va->invalids, list) { list_del(&mapping->list); - interval_tree_remove(&mapping->it, &vm->va); - kfree(mapping); + amdgpu_vm_it_remove(mapping, &vm->va); + amdgpu_vm_free_mapping(adev, vm, mapping, + bo_va->last_pt_update); } dma_fence_put(bo_va->last_pt_update); @@ -1509,6 +2064,44 @@ void amdgpu_vm_bo_invalidate(struct amdgpu_device *adev, } } +static uint32_t amdgpu_vm_get_block_size(uint64_t vm_size) +{ + /* Total bits covered by PD + PTs */ + unsigned bits = ilog2(vm_size) + 18; + + /* Make sure the PD is 4K in size up to 8GB address space. + Above that split equal between PD and PTs */ + if (vm_size <= 8) + return (bits - 9); + else + return ((bits + 3) / 2); +} + +/** + * amdgpu_vm_adjust_size - adjust vm size and block size + * + * @adev: amdgpu_device pointer + * @vm_size: the default vm size if it's set auto + */ +void amdgpu_vm_adjust_size(struct amdgpu_device *adev, uint64_t vm_size) +{ + /* adjust vm size firstly */ + if (amdgpu_vm_size == -1) + adev->vm_manager.vm_size = vm_size; + else + adev->vm_manager.vm_size = amdgpu_vm_size; + + /* block size depends on vm size */ + if (amdgpu_vm_block_size == -1) + adev->vm_manager.block_size = + amdgpu_vm_get_block_size(adev->vm_manager.vm_size); + else + adev->vm_manager.block_size = amdgpu_vm_block_size; + + DRM_INFO("vm size is %llu GB, block size is %u-bit\n", + adev->vm_manager.vm_size, adev->vm_manager.block_size); +} + /** * amdgpu_vm_init - initialize a vm instance * @@ -1520,8 +2113,7 @@ void amdgpu_vm_bo_invalidate(struct amdgpu_device *adev, int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm) { const unsigned align = min(AMDGPU_VM_PTB_ALIGN_SIZE, - AMDGPU_VM_PTE_COUNT * 8); - unsigned pd_size, pd_entries; + AMDGPU_VM_PTE_COUNT(adev) * 8); unsigned ring_instance; struct amdgpu_ring *ring; struct amd_sched_rq *rq; @@ -1536,16 +2128,6 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm) INIT_LIST_HEAD(&vm->cleared); INIT_LIST_HEAD(&vm->freed); - pd_size = amdgpu_vm_directory_size(adev); - pd_entries = amdgpu_vm_num_pdes(adev); - - /* allocate page table array */ - vm->page_tables = drm_calloc_large(pd_entries, sizeof(struct amdgpu_vm_pt)); - if (vm->page_tables == NULL) { - DRM_ERROR("Cannot allocate memory for page table array\n"); - return -ENOMEM; - } - /* create scheduler entity for page table updates */ ring_instance = atomic_inc_return(&adev->vm_manager.vm_pte_next_ring); @@ -1555,44 +2137,64 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm) r = amd_sched_entity_init(&ring->sched, &vm->entity, rq, amdgpu_sched_jobs); if (r) - goto err; + return r; - vm->page_directory_fence = NULL; + vm->last_dir_update = NULL; - r = amdgpu_bo_create(adev, pd_size, align, true, + r = amdgpu_bo_create(adev, amdgpu_vm_bo_size(adev, 0), align, true, AMDGPU_GEM_DOMAIN_VRAM, AMDGPU_GEM_CREATE_NO_CPU_ACCESS | AMDGPU_GEM_CREATE_SHADOW | AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS | AMDGPU_GEM_CREATE_VRAM_CLEARED, - NULL, NULL, &vm->page_directory); + NULL, NULL, &vm->root.bo); if (r) goto error_free_sched_entity; - r = amdgpu_bo_reserve(vm->page_directory, false); + r = amdgpu_bo_reserve(vm->root.bo, false); if (r) - goto error_free_page_directory; + goto error_free_root; vm->last_eviction_counter = atomic64_read(&adev->num_evictions); - amdgpu_bo_unreserve(vm->page_directory); + amdgpu_bo_unreserve(vm->root.bo); return 0; -error_free_page_directory: - amdgpu_bo_unref(&vm->page_directory->shadow); - amdgpu_bo_unref(&vm->page_directory); - vm->page_directory = NULL; +error_free_root: + amdgpu_bo_unref(&vm->root.bo->shadow); + amdgpu_bo_unref(&vm->root.bo); + vm->root.bo = NULL; error_free_sched_entity: amd_sched_entity_fini(&ring->sched, &vm->entity); -err: - drm_free_large(vm->page_tables); - return r; } /** + * amdgpu_vm_free_levels - free PD/PT levels + * + * @level: PD/PT starting level to free + * + * Free the page directory or page table level and all sub levels. + */ +static void amdgpu_vm_free_levels(struct amdgpu_vm_pt *level) +{ + unsigned i; + + if (level->bo) { + amdgpu_bo_unref(&level->bo->shadow); + amdgpu_bo_unref(&level->bo); + } + + if (level->entries) + for (i = 0; i <= level->last_entry_used; i++) + amdgpu_vm_free_levels(&level->entries[i]); + + drm_free_large(level->entries); +} + +/** * amdgpu_vm_fini - tear down a vm instance * * @adev: amdgpu_device pointer @@ -1604,37 +2206,30 @@ err: void amdgpu_vm_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm) { struct amdgpu_bo_va_mapping *mapping, *tmp; - int i; + bool prt_fini_needed = !!adev->gart.gart_funcs->set_prt; amd_sched_entity_fini(vm->entity.sched, &vm->entity); if (!RB_EMPTY_ROOT(&vm->va)) { dev_err(adev->dev, "still active bo inside vm\n"); } - rbtree_postorder_for_each_entry_safe(mapping, tmp, &vm->va, it.rb) { + rbtree_postorder_for_each_entry_safe(mapping, tmp, &vm->va, rb) { list_del(&mapping->list); - interval_tree_remove(&mapping->it, &vm->va); + amdgpu_vm_it_remove(mapping, &vm->va); kfree(mapping); } list_for_each_entry_safe(mapping, tmp, &vm->freed, list) { - list_del(&mapping->list); - kfree(mapping); - } - - for (i = 0; i < amdgpu_vm_num_pdes(adev); i++) { - struct amdgpu_bo *pt = vm->page_tables[i].bo; - - if (!pt) - continue; + if (mapping->flags & AMDGPU_PTE_PRT && prt_fini_needed) { + amdgpu_vm_prt_fini(adev, vm); + prt_fini_needed = false; + } - amdgpu_bo_unref(&pt->shadow); - amdgpu_bo_unref(&pt); + list_del(&mapping->list); + amdgpu_vm_free_mapping(adev, vm, mapping, NULL); } - drm_free_large(vm->page_tables); - amdgpu_bo_unref(&vm->page_directory->shadow); - amdgpu_bo_unref(&vm->page_directory); - dma_fence_put(vm->page_directory_fence); + amdgpu_vm_free_levels(&vm->root); + dma_fence_put(vm->last_dir_update); } /** @@ -1665,6 +2260,8 @@ void amdgpu_vm_manager_init(struct amdgpu_device *adev) atomic_set(&adev->vm_manager.vm_pte_next_ring, 0); atomic64_set(&adev->vm_manager.client_counter, 0); + spin_lock_init(&adev->vm_manager.prt_lock); + atomic_set(&adev->vm_manager.num_prt_users, 0); } /** @@ -1681,7 +2278,6 @@ void amdgpu_vm_manager_fini(struct amdgpu_device *adev) for (i = 0; i < AMDGPU_NUM_VM; ++i) { struct amdgpu_vm_id *id = &adev->vm_manager.ids[i]; - dma_fence_put(adev->vm_manager.ids[i].first); amdgpu_sync_free(&adev->vm_manager.ids[i].active); dma_fence_put(id->flushed_updates); dma_fence_put(id->last_flush); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h index 18c72c0b478d..d9e57290dc71 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h @@ -45,7 +45,7 @@ struct amdgpu_bo_list_entry; #define AMDGPU_VM_MAX_UPDATE_SIZE 0x3FFFF /* number of entries in page table */ -#define AMDGPU_VM_PTE_COUNT (1 << amdgpu_vm_block_size) +#define AMDGPU_VM_PTE_COUNT(adev) (1 << (adev)->vm_manager.block_size) /* PTBs (Page Table Blocks) need to be aligned to 32K */ #define AMDGPU_VM_PTB_ALIGN_SIZE 32768 @@ -53,26 +53,44 @@ struct amdgpu_bo_list_entry; /* LOG2 number of continuous pages for the fragment field */ #define AMDGPU_LOG2_PAGES_PER_FRAG 4 -#define AMDGPU_PTE_VALID (1 << 0) -#define AMDGPU_PTE_SYSTEM (1 << 1) -#define AMDGPU_PTE_SNOOPED (1 << 2) +#define AMDGPU_PTE_VALID (1ULL << 0) +#define AMDGPU_PTE_SYSTEM (1ULL << 1) +#define AMDGPU_PTE_SNOOPED (1ULL << 2) /* VI only */ -#define AMDGPU_PTE_EXECUTABLE (1 << 4) +#define AMDGPU_PTE_EXECUTABLE (1ULL << 4) -#define AMDGPU_PTE_READABLE (1 << 5) -#define AMDGPU_PTE_WRITEABLE (1 << 6) +#define AMDGPU_PTE_READABLE (1ULL << 5) +#define AMDGPU_PTE_WRITEABLE (1ULL << 6) -#define AMDGPU_PTE_FRAG(x) ((x & 0x1f) << 7) +#define AMDGPU_PTE_FRAG(x) ((x & 0x1fULL) << 7) + +#define AMDGPU_PTE_PRT (1ULL << 63) + +/* VEGA10 only */ +#define AMDGPU_PTE_MTYPE(a) ((uint64_t)a << 57) +#define AMDGPU_PTE_MTYPE_MASK AMDGPU_PTE_MTYPE(3ULL) /* How to programm VM fault handling */ #define AMDGPU_VM_FAULT_STOP_NEVER 0 #define AMDGPU_VM_FAULT_STOP_FIRST 1 #define AMDGPU_VM_FAULT_STOP_ALWAYS 2 +/* max number of VMHUB */ +#define AMDGPU_MAX_VMHUBS 2 +#define AMDGPU_GFXHUB 0 +#define AMDGPU_MMHUB 1 + +/* hardcode that limit for now */ +#define AMDGPU_VA_RESERVED_SIZE (8 << 20) + struct amdgpu_vm_pt { struct amdgpu_bo *bo; uint64_t addr; + + /* array of page tables, one for each directory entry */ + struct amdgpu_vm_pt *entries; + unsigned last_entry_used; }; struct amdgpu_vm { @@ -92,14 +110,10 @@ struct amdgpu_vm { struct list_head freed; /* contains the page directory */ - struct amdgpu_bo *page_directory; - unsigned max_pde_used; - struct dma_fence *page_directory_fence; + struct amdgpu_vm_pt root; + struct dma_fence *last_dir_update; uint64_t last_eviction_counter; - /* array of page tables, one for each page directory entry */ - struct amdgpu_vm_pt *page_tables; - /* for id and flush management per ring */ struct amdgpu_vm_id *ids[AMDGPU_MAX_RINGS]; @@ -117,7 +131,6 @@ struct amdgpu_vm { struct amdgpu_vm_id { struct list_head list; - struct dma_fence *first; struct amdgpu_sync active; struct dma_fence *last_flush; atomic64_t owner; @@ -147,7 +160,10 @@ struct amdgpu_vm_manager { u64 fence_context; unsigned seqno[AMDGPU_MAX_RINGS]; - uint32_t max_pfn; + uint64_t max_pfn; + uint32_t num_level; + uint64_t vm_size; + uint32_t block_size; /* vram base address for page table entry */ u64 vram_base_offset; /* is vm enabled? */ @@ -159,6 +175,10 @@ struct amdgpu_vm_manager { atomic_t vm_pte_next_ring; /* client id counter */ atomic64_t client_counter; + + /* partial resident texture handling */ + spinlock_t prt_lock; + atomic_t num_prt_users; }; void amdgpu_vm_manager_init(struct amdgpu_device *adev); @@ -173,15 +193,19 @@ int amdgpu_vm_validate_pt_bos(struct amdgpu_device *adev, struct amdgpu_vm *vm, void *param); void amdgpu_vm_move_pt_bos_in_lru(struct amdgpu_device *adev, struct amdgpu_vm *vm); +int amdgpu_vm_alloc_pts(struct amdgpu_device *adev, + struct amdgpu_vm *vm, + uint64_t saddr, uint64_t size); int amdgpu_vm_grab_id(struct amdgpu_vm *vm, struct amdgpu_ring *ring, struct amdgpu_sync *sync, struct dma_fence *fence, struct amdgpu_job *job); int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job); void amdgpu_vm_reset_id(struct amdgpu_device *adev, unsigned vm_id); -int amdgpu_vm_update_page_directory(struct amdgpu_device *adev, - struct amdgpu_vm *vm); +int amdgpu_vm_update_directories(struct amdgpu_device *adev, + struct amdgpu_vm *vm); int amdgpu_vm_clear_freed(struct amdgpu_device *adev, - struct amdgpu_vm *vm); + struct amdgpu_vm *vm, + struct dma_fence **fence); int amdgpu_vm_clear_invalids(struct amdgpu_device *adev, struct amdgpu_vm *vm, struct amdgpu_sync *sync); int amdgpu_vm_bo_update(struct amdgpu_device *adev, @@ -198,10 +222,18 @@ int amdgpu_vm_bo_map(struct amdgpu_device *adev, struct amdgpu_bo_va *bo_va, uint64_t addr, uint64_t offset, uint64_t size, uint64_t flags); +int amdgpu_vm_bo_replace_map(struct amdgpu_device *adev, + struct amdgpu_bo_va *bo_va, + uint64_t addr, uint64_t offset, + uint64_t size, uint64_t flags); int amdgpu_vm_bo_unmap(struct amdgpu_device *adev, struct amdgpu_bo_va *bo_va, uint64_t addr); +int amdgpu_vm_bo_clear_mappings(struct amdgpu_device *adev, + struct amdgpu_vm *vm, + uint64_t saddr, uint64_t size); void amdgpu_vm_bo_rmv(struct amdgpu_device *adev, struct amdgpu_bo_va *bo_va); +void amdgpu_vm_adjust_size(struct amdgpu_device *adev, uint64_t vm_size); #endif diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c index 9e577e3d3147..a4831fe0223b 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c @@ -93,7 +93,6 @@ static int amdgpu_vram_mgr_new(struct ttm_mem_type_manager *man, const struct ttm_place *place, struct ttm_mem_reg *mem) { - struct amdgpu_bo *bo = container_of(tbo, struct amdgpu_bo, tbo); struct amdgpu_vram_mgr *mgr = man->priv; struct drm_mm *mm = &mgr->mm; struct drm_mm_node *nodes; @@ -106,8 +105,8 @@ static int amdgpu_vram_mgr_new(struct ttm_mem_type_manager *man, if (!lpfn) lpfn = man->size; - if (bo->flags & AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS || - place->lpfn || amdgpu_vram_page_split == -1) { + if (place->flags & TTM_PL_FLAG_CONTIGUOUS || + amdgpu_vram_page_split == -1) { pages_per_node = ~0ul; num_nodes = 1; } else { @@ -124,12 +123,14 @@ static int amdgpu_vram_mgr_new(struct ttm_mem_type_manager *man, if (place->flags & TTM_PL_FLAG_TOPDOWN) mode = DRM_MM_INSERT_HIGH; + mem->start = 0; pages_left = mem->num_pages; spin_lock(&mgr->lock); for (i = 0; i < num_nodes; ++i) { unsigned long pages = min(pages_left, pages_per_node); uint32_t alignment = mem->page_alignment; + unsigned long start; if (pages == pages_per_node) alignment = pages_per_node; @@ -141,11 +142,19 @@ static int amdgpu_vram_mgr_new(struct ttm_mem_type_manager *man, if (unlikely(r)) goto error; + /* Calculate a virtual BO start address to easily check if + * everything is CPU accessible. + */ + start = nodes[i].start + nodes[i].size; + if (start > mem->num_pages) + start -= mem->num_pages; + else + start = 0; + mem->start = max(mem->start, start); pages_left -= pages; } spin_unlock(&mgr->lock); - mem->start = num_nodes == 1 ? nodes[0].start : AMDGPU_BO_INVALID_OFFSET; mem->mm_node = nodes; return 0; diff --git a/drivers/gpu/drm/amd/amdgpu/atom.c b/drivers/gpu/drm/amd/amdgpu/atom.c index 1b50e6c13fb3..d69aa2e179bb 100644 --- a/drivers/gpu/drm/amd/amdgpu/atom.c +++ b/drivers/gpu/drm/amd/amdgpu/atom.c @@ -166,7 +166,7 @@ static uint32_t atom_iio_execute(struct atom_context *ctx, int base, case ATOM_IIO_END: return temp; default: - printk(KERN_INFO "Unknown IIO opcode.\n"); + pr_info("Unknown IIO opcode\n"); return 0; } } @@ -190,22 +190,19 @@ static uint32_t atom_get_src_int(atom_exec_context *ctx, uint8_t attr, val = gctx->card->reg_read(gctx->card, idx); break; case ATOM_IO_PCI: - printk(KERN_INFO - "PCI registers are not implemented.\n"); + pr_info("PCI registers are not implemented\n"); return 0; case ATOM_IO_SYSIO: - printk(KERN_INFO - "SYSIO registers are not implemented.\n"); + pr_info("SYSIO registers are not implemented\n"); return 0; default: if (!(gctx->io_mode & 0x80)) { - printk(KERN_INFO "Bad IO mode.\n"); + pr_info("Bad IO mode\n"); return 0; } if (!gctx->iio[gctx->io_mode & 0x7F]) { - printk(KERN_INFO - "Undefined indirect IO read method %d.\n", - gctx->io_mode & 0x7F); + pr_info("Undefined indirect IO read method %d\n", + gctx->io_mode & 0x7F); return 0; } val = @@ -469,22 +466,19 @@ static void atom_put_dst(atom_exec_context *ctx, int arg, uint8_t attr, gctx->card->reg_write(gctx->card, idx, val); break; case ATOM_IO_PCI: - printk(KERN_INFO - "PCI registers are not implemented.\n"); + pr_info("PCI registers are not implemented\n"); return; case ATOM_IO_SYSIO: - printk(KERN_INFO - "SYSIO registers are not implemented.\n"); + pr_info("SYSIO registers are not implemented\n"); return; default: if (!(gctx->io_mode & 0x80)) { - printk(KERN_INFO "Bad IO mode.\n"); + pr_info("Bad IO mode\n"); return; } if (!gctx->iio[gctx->io_mode & 0xFF]) { - printk(KERN_INFO - "Undefined indirect IO write method %d.\n", - gctx->io_mode & 0x7F); + pr_info("Undefined indirect IO write method %d\n", + gctx->io_mode & 0x7F); return; } atom_iio_execute(gctx, gctx->iio[gctx->io_mode & 0xFF], @@ -850,17 +844,17 @@ static void atom_op_postcard(atom_exec_context *ctx, int *ptr, int arg) static void atom_op_repeat(atom_exec_context *ctx, int *ptr, int arg) { - printk(KERN_INFO "unimplemented!\n"); + pr_info("unimplemented!\n"); } static void atom_op_restorereg(atom_exec_context *ctx, int *ptr, int arg) { - printk(KERN_INFO "unimplemented!\n"); + pr_info("unimplemented!\n"); } static void atom_op_savereg(atom_exec_context *ctx, int *ptr, int arg) { - printk(KERN_INFO "unimplemented!\n"); + pr_info("unimplemented!\n"); } static void atom_op_setdatablock(atom_exec_context *ctx, int *ptr, int arg) @@ -1023,7 +1017,7 @@ static void atom_op_switch(atom_exec_context *ctx, int *ptr, int arg) } (*ptr) += 2; } else { - printk(KERN_INFO "Bad case.\n"); + pr_info("Bad case\n"); return; } (*ptr) += 2; @@ -1306,8 +1300,7 @@ struct atom_context *amdgpu_atom_parse(struct card_info *card, void *bios) struct atom_context *ctx = kzalloc(sizeof(struct atom_context), GFP_KERNEL); char *str; - char name[512]; - int i; + u16 idx; if (!ctx) return NULL; @@ -1316,14 +1309,14 @@ struct atom_context *amdgpu_atom_parse(struct card_info *card, void *bios) ctx->bios = bios; if (CU16(0) != ATOM_BIOS_MAGIC) { - printk(KERN_INFO "Invalid BIOS magic.\n"); + pr_info("Invalid BIOS magic\n"); kfree(ctx); return NULL; } if (strncmp (CSTR(ATOM_ATI_MAGIC_PTR), ATOM_ATI_MAGIC, strlen(ATOM_ATI_MAGIC))) { - printk(KERN_INFO "Invalid ATI magic.\n"); + pr_info("Invalid ATI magic\n"); kfree(ctx); return NULL; } @@ -1332,7 +1325,7 @@ struct atom_context *amdgpu_atom_parse(struct card_info *card, void *bios) if (strncmp (CSTR(base + ATOM_ROM_MAGIC_PTR), ATOM_ROM_MAGIC, strlen(ATOM_ROM_MAGIC))) { - printk(KERN_INFO "Invalid ATOM magic.\n"); + pr_info("Invalid ATOM magic\n"); kfree(ctx); return NULL; } @@ -1345,18 +1338,13 @@ struct atom_context *amdgpu_atom_parse(struct card_info *card, void *bios) return NULL; } - str = CSTR(CU16(base + ATOM_ROM_MSG_PTR)); - while (*str && ((*str == '\n') || (*str == '\r'))) - str++; - /* name string isn't always 0 terminated */ - for (i = 0; i < 511; i++) { - name[i] = str[i]; - if (name[i] < '.' || name[i] > 'z') { - name[i] = 0; - break; - } - } - printk(KERN_INFO "ATOM BIOS: %s\n", name); + idx = CU16(ATOM_ROM_PART_NUMBER_PTR); + if (idx == 0) + idx = 0x80; + + str = CSTR(idx); + if (*str != '\0') + pr_info("ATOM BIOS: %s\n", str); return ctx; } @@ -1429,29 +1417,3 @@ bool amdgpu_atom_parse_cmd_header(struct atom_context *ctx, int index, uint8_t * return true; } -int amdgpu_atom_allocate_fb_scratch(struct atom_context *ctx) -{ - int index = GetIndexIntoMasterTable(DATA, VRAM_UsageByFirmware); - uint16_t data_offset; - int usage_bytes = 0; - struct _ATOM_VRAM_USAGE_BY_FIRMWARE *firmware_usage; - - if (amdgpu_atom_parse_data_header(ctx, index, NULL, NULL, NULL, &data_offset)) { - firmware_usage = (struct _ATOM_VRAM_USAGE_BY_FIRMWARE *)(ctx->bios + data_offset); - - DRM_DEBUG("atom firmware requested %08x %dkb\n", - le32_to_cpu(firmware_usage->asFirmwareVramReserveInfo[0].ulStartAddrUsedByFirmware), - le16_to_cpu(firmware_usage->asFirmwareVramReserveInfo[0].usFirmwareUseInKb)); - - usage_bytes = le16_to_cpu(firmware_usage->asFirmwareVramReserveInfo[0].usFirmwareUseInKb) * 1024; - } - ctx->scratch_size_bytes = 0; - if (usage_bytes == 0) - usage_bytes = 20 * 1024; - /* allocate some scratch memory */ - ctx->scratch = kzalloc(usage_bytes, GFP_KERNEL); - if (!ctx->scratch) - return -ENOMEM; - ctx->scratch_size_bytes = usage_bytes; - return 0; -} diff --git a/drivers/gpu/drm/amd/amdgpu/atom.h b/drivers/gpu/drm/amd/amdgpu/atom.h index 49daf6d723e5..ddd8045accf3 100644 --- a/drivers/gpu/drm/amd/amdgpu/atom.h +++ b/drivers/gpu/drm/amd/amdgpu/atom.h @@ -32,6 +32,7 @@ #define ATOM_ATI_MAGIC_PTR 0x30 #define ATOM_ATI_MAGIC " 761295520" #define ATOM_ROM_TABLE_PTR 0x48 +#define ATOM_ROM_PART_NUMBER_PTR 0x6E #define ATOM_ROM_MAGIC "ATOM" #define ATOM_ROM_MAGIC_PTR 4 @@ -151,7 +152,6 @@ bool amdgpu_atom_parse_data_header(struct atom_context *ctx, int index, uint16_t uint8_t *frev, uint8_t *crev, uint16_t *data_start); bool amdgpu_atom_parse_cmd_header(struct atom_context *ctx, int index, uint8_t *frev, uint8_t *crev); -int amdgpu_atom_allocate_fb_scratch(struct atom_context *ctx); #include "atom-types.h" #include "atombios.h" #include "ObjectID.h" diff --git a/drivers/gpu/drm/amd/amdgpu/ci_dpm.c b/drivers/gpu/drm/amd/amdgpu/ci_dpm.c index f97ecb49972e..11ccda83d767 100644 --- a/drivers/gpu/drm/amd/amdgpu/ci_dpm.c +++ b/drivers/gpu/drm/amd/amdgpu/ci_dpm.c @@ -3681,6 +3681,40 @@ static int ci_find_boot_level(struct ci_single_dpm_table *table, return ret; } +static void ci_save_default_power_profile(struct amdgpu_device *adev) +{ + struct ci_power_info *pi = ci_get_pi(adev); + struct SMU7_Discrete_GraphicsLevel *levels = + pi->smc_state_table.GraphicsLevel; + uint32_t min_level = 0; + + pi->default_gfx_power_profile.activity_threshold = + be16_to_cpu(levels[0].ActivityLevel); + pi->default_gfx_power_profile.up_hyst = levels[0].UpH; + pi->default_gfx_power_profile.down_hyst = levels[0].DownH; + pi->default_gfx_power_profile.type = AMD_PP_GFX_PROFILE; + + pi->default_compute_power_profile = pi->default_gfx_power_profile; + pi->default_compute_power_profile.type = AMD_PP_COMPUTE_PROFILE; + + /* Optimize compute power profile: Use only highest + * 2 power levels (if more than 2 are available), Hysteresis: + * 0ms up, 5ms down + */ + if (pi->smc_state_table.GraphicsDpmLevelCount > 2) + min_level = pi->smc_state_table.GraphicsDpmLevelCount - 2; + else if (pi->smc_state_table.GraphicsDpmLevelCount == 2) + min_level = 1; + pi->default_compute_power_profile.min_sclk = + be32_to_cpu(levels[min_level].SclkFrequency); + + pi->default_compute_power_profile.up_hyst = 0; + pi->default_compute_power_profile.down_hyst = 5; + + pi->gfx_power_profile = pi->default_gfx_power_profile; + pi->compute_power_profile = pi->default_compute_power_profile; +} + static int ci_init_smc_table(struct amdgpu_device *adev) { struct ci_power_info *pi = ci_get_pi(adev); @@ -3826,6 +3860,8 @@ static int ci_init_smc_table(struct amdgpu_device *adev) if (ret) return ret; + ci_save_default_power_profile(adev); + return 0; } @@ -5804,9 +5840,7 @@ static int ci_dpm_init_microcode(struct amdgpu_device *adev) out: if (err) { - printk(KERN_ERR - "cik_smc: Failed to load firmware \"%s\"\n", - fw_name); + pr_err("cik_smc: Failed to load firmware \"%s\"\n", fw_name); release_firmware(adev->pm.fw); adev->pm.fw = NULL; } @@ -6250,11 +6284,13 @@ static int ci_dpm_sw_init(void *handle) int ret; struct amdgpu_device *adev = (struct amdgpu_device *)handle; - ret = amdgpu_irq_add_id(adev, 230, &adev->pm.dpm.thermal.irq); + ret = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 230, + &adev->pm.dpm.thermal.irq); if (ret) return ret; - ret = amdgpu_irq_add_id(adev, 231, &adev->pm.dpm.thermal.irq); + ret = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 231, + &adev->pm.dpm.thermal.irq); if (ret) return ret; @@ -6688,6 +6724,260 @@ static int ci_dpm_set_mclk_od(struct amdgpu_device *adev, uint32_t value) return 0; } +static int ci_dpm_get_power_profile_state(struct amdgpu_device *adev, + struct amd_pp_profile *query) +{ + struct ci_power_info *pi = ci_get_pi(adev); + + if (!pi || !query) + return -EINVAL; + + if (query->type == AMD_PP_GFX_PROFILE) + memcpy(query, &pi->gfx_power_profile, + sizeof(struct amd_pp_profile)); + else if (query->type == AMD_PP_COMPUTE_PROFILE) + memcpy(query, &pi->compute_power_profile, + sizeof(struct amd_pp_profile)); + else + return -EINVAL; + + return 0; +} + +static int ci_populate_requested_graphic_levels(struct amdgpu_device *adev, + struct amd_pp_profile *request) +{ + struct ci_power_info *pi = ci_get_pi(adev); + struct ci_dpm_table *dpm_table = &(pi->dpm_table); + struct SMU7_Discrete_GraphicsLevel *levels = + pi->smc_state_table.GraphicsLevel; + uint32_t array = pi->dpm_table_start + + offsetof(SMU7_Discrete_DpmTable, GraphicsLevel); + uint32_t array_size = sizeof(struct SMU7_Discrete_GraphicsLevel) * + SMU7_MAX_LEVELS_GRAPHICS; + uint32_t i; + + for (i = 0; i < dpm_table->sclk_table.count; i++) { + levels[i].ActivityLevel = + cpu_to_be16(request->activity_threshold); + levels[i].EnabledForActivity = 1; + levels[i].UpH = request->up_hyst; + levels[i].DownH = request->down_hyst; + } + + return amdgpu_ci_copy_bytes_to_smc(adev, array, (uint8_t *)levels, + array_size, pi->sram_end); +} + +static void ci_find_min_clock_masks(struct amdgpu_device *adev, + uint32_t *sclk_mask, uint32_t *mclk_mask, + uint32_t min_sclk, uint32_t min_mclk) +{ + struct ci_power_info *pi = ci_get_pi(adev); + struct ci_dpm_table *dpm_table = &(pi->dpm_table); + uint32_t i; + + for (i = 0; i < dpm_table->sclk_table.count; i++) { + if (dpm_table->sclk_table.dpm_levels[i].enabled && + dpm_table->sclk_table.dpm_levels[i].value >= min_sclk) + *sclk_mask |= 1 << i; + } + + for (i = 0; i < dpm_table->mclk_table.count; i++) { + if (dpm_table->mclk_table.dpm_levels[i].enabled && + dpm_table->mclk_table.dpm_levels[i].value >= min_mclk) + *mclk_mask |= 1 << i; + } +} + +static int ci_set_power_profile_state(struct amdgpu_device *adev, + struct amd_pp_profile *request) +{ + struct ci_power_info *pi = ci_get_pi(adev); + int tmp_result, result = 0; + uint32_t sclk_mask = 0, mclk_mask = 0; + + tmp_result = ci_freeze_sclk_mclk_dpm(adev); + if (tmp_result) { + DRM_ERROR("Failed to freeze SCLK MCLK DPM!"); + result = tmp_result; + } + + tmp_result = ci_populate_requested_graphic_levels(adev, + request); + if (tmp_result) { + DRM_ERROR("Failed to populate requested graphic levels!"); + result = tmp_result; + } + + tmp_result = ci_unfreeze_sclk_mclk_dpm(adev); + if (tmp_result) { + DRM_ERROR("Failed to unfreeze SCLK MCLK DPM!"); + result = tmp_result; + } + + ci_find_min_clock_masks(adev, &sclk_mask, &mclk_mask, + request->min_sclk, request->min_mclk); + + if (sclk_mask) { + if (!pi->sclk_dpm_key_disabled) + amdgpu_ci_send_msg_to_smc_with_parameter( + adev, + PPSMC_MSG_SCLKDPM_SetEnabledMask, + pi->dpm_level_enable_mask. + sclk_dpm_enable_mask & + sclk_mask); + } + + if (mclk_mask) { + if (!pi->mclk_dpm_key_disabled) + amdgpu_ci_send_msg_to_smc_with_parameter( + adev, + PPSMC_MSG_MCLKDPM_SetEnabledMask, + pi->dpm_level_enable_mask. + mclk_dpm_enable_mask & + mclk_mask); + } + + + return result; +} + +static int ci_dpm_set_power_profile_state(struct amdgpu_device *adev, + struct amd_pp_profile *request) +{ + struct ci_power_info *pi = ci_get_pi(adev); + int ret = -1; + + if (!pi || !request) + return -EINVAL; + + if (adev->pm.dpm.forced_level != + AMD_DPM_FORCED_LEVEL_AUTO) + return -EINVAL; + + if (request->min_sclk || + request->min_mclk || + request->activity_threshold || + request->up_hyst || + request->down_hyst) { + if (request->type == AMD_PP_GFX_PROFILE) + memcpy(&pi->gfx_power_profile, request, + sizeof(struct amd_pp_profile)); + else if (request->type == AMD_PP_COMPUTE_PROFILE) + memcpy(&pi->compute_power_profile, request, + sizeof(struct amd_pp_profile)); + else + return -EINVAL; + + if (request->type == pi->current_power_profile) + ret = ci_set_power_profile_state( + adev, + request); + } else { + /* set power profile if it exists */ + switch (request->type) { + case AMD_PP_GFX_PROFILE: + ret = ci_set_power_profile_state( + adev, + &pi->gfx_power_profile); + break; + case AMD_PP_COMPUTE_PROFILE: + ret = ci_set_power_profile_state( + adev, + &pi->compute_power_profile); + break; + default: + return -EINVAL; + } + } + + if (!ret) + pi->current_power_profile = request->type; + + return 0; +} + +static int ci_dpm_reset_power_profile_state(struct amdgpu_device *adev, + struct amd_pp_profile *request) +{ + struct ci_power_info *pi = ci_get_pi(adev); + + if (!pi || !request) + return -EINVAL; + + if (request->type == AMD_PP_GFX_PROFILE) { + pi->gfx_power_profile = pi->default_gfx_power_profile; + return ci_dpm_set_power_profile_state(adev, + &pi->gfx_power_profile); + } else if (request->type == AMD_PP_COMPUTE_PROFILE) { + pi->compute_power_profile = + pi->default_compute_power_profile; + return ci_dpm_set_power_profile_state(adev, + &pi->compute_power_profile); + } else + return -EINVAL; +} + +static int ci_dpm_switch_power_profile(struct amdgpu_device *adev, + enum amd_pp_profile_type type) +{ + struct ci_power_info *pi = ci_get_pi(adev); + struct amd_pp_profile request = {0}; + + if (!pi) + return -EINVAL; + + if (pi->current_power_profile != type) { + request.type = type; + return ci_dpm_set_power_profile_state(adev, &request); + } + + return 0; +} + +static int ci_dpm_read_sensor(struct amdgpu_device *adev, int idx, + void *value, int *size) +{ + u32 activity_percent = 50; + int ret; + + /* size must be at least 4 bytes for all sensors */ + if (*size < 4) + return -EINVAL; + + switch (idx) { + case AMDGPU_PP_SENSOR_GFX_SCLK: + *((uint32_t *)value) = ci_get_average_sclk_freq(adev); + *size = 4; + return 0; + case AMDGPU_PP_SENSOR_GFX_MCLK: + *((uint32_t *)value) = ci_get_average_mclk_freq(adev); + *size = 4; + return 0; + case AMDGPU_PP_SENSOR_GPU_TEMP: + *((uint32_t *)value) = ci_dpm_get_temp(adev); + *size = 4; + return 0; + case AMDGPU_PP_SENSOR_GPU_LOAD: + ret = ci_read_smc_soft_register(adev, + offsetof(SMU7_SoftRegisters, + AverageGraphicsA), + &activity_percent); + if (ret == 0) { + activity_percent += 0x80; + activity_percent >>= 8; + activity_percent = + activity_percent > 100 ? 100 : activity_percent; + } + *((uint32_t *)value) = activity_percent; + *size = 4; + return 0; + default: + return -EINVAL; + } +} + const struct amd_ip_funcs ci_dpm_ip_funcs = { .name = "ci_dpm", .early_init = ci_dpm_early_init, @@ -6730,6 +7020,11 @@ static const struct amdgpu_dpm_funcs ci_dpm_funcs = { .set_mclk_od = ci_dpm_set_mclk_od, .check_state_equal = ci_check_state_equal, .get_vce_clock_state = amdgpu_get_vce_clock_state, + .get_power_profile_state = ci_dpm_get_power_profile_state, + .set_power_profile_state = ci_dpm_set_power_profile_state, + .reset_power_profile_state = ci_dpm_reset_power_profile_state, + .switch_power_profile = ci_dpm_switch_power_profile, + .read_sensor = ci_dpm_read_sensor, }; static void ci_dpm_set_dpm_funcs(struct amdgpu_device *adev) diff --git a/drivers/gpu/drm/amd/amdgpu/ci_dpm.h b/drivers/gpu/drm/amd/amdgpu/ci_dpm.h index 91be2996ae7c..84cbc9c45f4d 100644 --- a/drivers/gpu/drm/amd/amdgpu/ci_dpm.h +++ b/drivers/gpu/drm/amd/amdgpu/ci_dpm.h @@ -295,6 +295,13 @@ struct ci_power_info { bool fan_is_controlled_by_smc; u32 t_min; u32 fan_ctrl_default_mode; + + /* power profile */ + struct amd_pp_profile gfx_power_profile; + struct amd_pp_profile compute_power_profile; + struct amd_pp_profile default_gfx_power_profile; + struct amd_pp_profile default_compute_power_profile; + enum amd_pp_profile_type current_power_profile; }; #define CISLANDS_VOLTAGE_CONTROL_NONE 0x0 diff --git a/drivers/gpu/drm/amd/amdgpu/cik.c b/drivers/gpu/drm/amd/amdgpu/cik.c index c4d4b35e54ec..9d33e5641419 100644 --- a/drivers/gpu/drm/amd/amdgpu/cik.c +++ b/drivers/gpu/drm/amd/amdgpu/cik.c @@ -1212,6 +1212,11 @@ static int cik_asic_reset(struct amdgpu_device *adev) return r; } +static u32 cik_get_config_memsize(struct amdgpu_device *adev) +{ + return RREG32(mmCONFIG_MEMSIZE); +} + static int cik_set_uvd_clock(struct amdgpu_device *adev, u32 clock, u32 cntl_reg, u32 status_reg) { @@ -1641,6 +1646,7 @@ static const struct amdgpu_asic_funcs cik_asic_funcs = .get_xclk = &cik_get_xclk, .set_uvd_clocks = &cik_set_uvd_clocks, .set_vce_clocks = &cik_set_vce_clocks, + .get_config_memsize = &cik_get_config_memsize, }; static int cik_common_early_init(void *handle) @@ -1779,6 +1785,8 @@ static int cik_common_early_init(void *handle) return -EINVAL; } + adev->firmware.load_type = amdgpu_ucode_get_load_type(adev, amdgpu_fw_load_type); + amdgpu_get_pcie_info(adev); return 0; diff --git a/drivers/gpu/drm/amd/amdgpu/cik_ih.c b/drivers/gpu/drm/amd/amdgpu/cik_ih.c index 319b32cdea84..c57c3f18af01 100644 --- a/drivers/gpu/drm/amd/amdgpu/cik_ih.c +++ b/drivers/gpu/drm/amd/amdgpu/cik_ih.c @@ -248,8 +248,9 @@ static void cik_ih_decode_iv(struct amdgpu_device *adev, dw[2] = le32_to_cpu(adev->irq.ih.ring[ring_index + 2]); dw[3] = le32_to_cpu(adev->irq.ih.ring[ring_index + 3]); + entry->client_id = AMDGPU_IH_CLIENTID_LEGACY; entry->src_id = dw[0] & 0xff; - entry->src_data = dw[1] & 0xfffffff; + entry->src_data[0] = dw[1] & 0xfffffff; entry->ring_id = dw[2] & 0xff; entry->vm_id = (dw[2] >> 8) & 0xff; entry->pas_id = (dw[2] >> 16) & 0xffff; diff --git a/drivers/gpu/drm/amd/amdgpu/cik_sdma.c b/drivers/gpu/drm/amd/amdgpu/cik_sdma.c index 810bba533975..c216e16826c9 100644 --- a/drivers/gpu/drm/amd/amdgpu/cik_sdma.c +++ b/drivers/gpu/drm/amd/amdgpu/cik_sdma.c @@ -142,9 +142,7 @@ static int cik_sdma_init_microcode(struct amdgpu_device *adev) } out: if (err) { - printk(KERN_ERR - "cik_sdma: Failed to load firmware \"%s\"\n", - fw_name); + pr_err("cik_sdma: Failed to load firmware \"%s\"\n", fw_name); for (i = 0; i < adev->sdma.num_instances; i++) { release_firmware(adev->sdma.instance[i].fw); adev->sdma.instance[i].fw = NULL; @@ -160,7 +158,7 @@ out: * * Get the current rptr from the hardware (CIK+). */ -static uint32_t cik_sdma_ring_get_rptr(struct amdgpu_ring *ring) +static uint64_t cik_sdma_ring_get_rptr(struct amdgpu_ring *ring) { u32 rptr; @@ -176,7 +174,7 @@ static uint32_t cik_sdma_ring_get_rptr(struct amdgpu_ring *ring) * * Get the current wptr from the hardware (CIK+). */ -static uint32_t cik_sdma_ring_get_wptr(struct amdgpu_ring *ring) +static uint64_t cik_sdma_ring_get_wptr(struct amdgpu_ring *ring) { struct amdgpu_device *adev = ring->adev; u32 me = (ring == &adev->sdma.instance[0].ring) ? 0 : 1; @@ -196,7 +194,8 @@ static void cik_sdma_ring_set_wptr(struct amdgpu_ring *ring) struct amdgpu_device *adev = ring->adev; u32 me = (ring == &adev->sdma.instance[0].ring) ? 0 : 1; - WREG32(mmSDMA0_GFX_RB_WPTR + sdma_offsets[me], (ring->wptr << 2) & 0x3fffc); + WREG32(mmSDMA0_GFX_RB_WPTR + sdma_offsets[me], + (lower_32_bits(ring->wptr) << 2) & 0x3fffc); } static void cik_sdma_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count) @@ -227,7 +226,7 @@ static void cik_sdma_ring_emit_ib(struct amdgpu_ring *ring, u32 extra_bits = vm_id & 0xf; /* IB packet must end on a 8 DW boundary */ - cik_sdma_ring_insert_nop(ring, (12 - (ring->wptr & 7)) % 8); + cik_sdma_ring_insert_nop(ring, (12 - (lower_32_bits(ring->wptr) & 7)) % 8); amdgpu_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_INDIRECT_BUFFER, 0, extra_bits)); amdgpu_ring_write(ring, ib->gpu_addr & 0xffffffe0); /* base must be 32 byte aligned */ @@ -434,7 +433,7 @@ static int cik_sdma_gfx_resume(struct amdgpu_device *adev) WREG32(mmSDMA0_GFX_RB_BASE_HI + sdma_offsets[i], ring->gpu_addr >> 40); ring->wptr = 0; - WREG32(mmSDMA0_GFX_RB_WPTR + sdma_offsets[i], ring->wptr << 2); + WREG32(mmSDMA0_GFX_RB_WPTR + sdma_offsets[i], lower_32_bits(ring->wptr) << 2); /* enable DMA RB */ WREG32(mmSDMA0_GFX_RB_CNTL + sdma_offsets[i], @@ -750,14 +749,14 @@ static void cik_sdma_vm_write_pte(struct amdgpu_ib *ib, uint64_t pe, */ static void cik_sdma_vm_set_pte_pde(struct amdgpu_ib *ib, uint64_t pe, uint64_t addr, unsigned count, - uint32_t incr, uint32_t flags) + uint32_t incr, uint64_t flags) { /* for physically contiguous pages (vram) */ ib->ptr[ib->length_dw++] = SDMA_PACKET(SDMA_OPCODE_GENERATE_PTE_PDE, 0, 0); ib->ptr[ib->length_dw++] = lower_32_bits(pe); /* dst addr */ ib->ptr[ib->length_dw++] = upper_32_bits(pe); - ib->ptr[ib->length_dw++] = flags; /* mask */ - ib->ptr[ib->length_dw++] = 0; + ib->ptr[ib->length_dw++] = lower_32_bits(flags); /* mask */ + ib->ptr[ib->length_dw++] = upper_32_bits(flags); ib->ptr[ib->length_dw++] = lower_32_bits(addr); /* value */ ib->ptr[ib->length_dw++] = upper_32_bits(addr); ib->ptr[ib->length_dw++] = incr; /* increment size */ @@ -924,17 +923,20 @@ static int cik_sdma_sw_init(void *handle) } /* SDMA trap event */ - r = amdgpu_irq_add_id(adev, 224, &adev->sdma.trap_irq); + r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 224, + &adev->sdma.trap_irq); if (r) return r; /* SDMA Privileged inst */ - r = amdgpu_irq_add_id(adev, 241, &adev->sdma.illegal_inst_irq); + r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 241, + &adev->sdma.illegal_inst_irq); if (r) return r; /* SDMA Privileged inst */ - r = amdgpu_irq_add_id(adev, 247, &adev->sdma.illegal_inst_irq); + r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 247, + &adev->sdma.illegal_inst_irq); if (r) return r; @@ -1211,6 +1213,7 @@ static const struct amdgpu_ring_funcs cik_sdma_ring_funcs = { .type = AMDGPU_RING_TYPE_SDMA, .align_mask = 0xf, .nop = SDMA_PACKET(SDMA_OPCODE_NOP, 0, 0), + .support_64bit_ptrs = false, .get_rptr = cik_sdma_ring_get_rptr, .get_wptr = cik_sdma_ring_get_wptr, .set_wptr = cik_sdma_ring_set_wptr, diff --git a/drivers/gpu/drm/amd/amdgpu/cikd.h b/drivers/gpu/drm/amd/amdgpu/cikd.h index 6cbd913fd12e..6a9e38a3d2a0 100644 --- a/drivers/gpu/drm/amd/amdgpu/cikd.h +++ b/drivers/gpu/drm/amd/amdgpu/cikd.h @@ -502,7 +502,7 @@ # define SDMA_COPY_SUB_OPCODE_T2T_SUB_WINDOW 6 #define SDMA_OPCODE_WRITE 2 # define SDMA_WRITE_SUB_OPCODE_LINEAR 0 -# define SDMA_WRTIE_SUB_OPCODE_TILED 1 +# define SDMA_WRITE_SUB_OPCODE_TILED 1 #define SDMA_OPCODE_INDIRECT_BUFFER 4 #define SDMA_OPCODE_FENCE 5 #define SDMA_OPCODE_TRAP 6 diff --git a/drivers/gpu/drm/amd/amdgpu/clearstate_gfx9.h b/drivers/gpu/drm/amd/amdgpu/clearstate_gfx9.h new file mode 100644 index 000000000000..18fd01f3e4b2 --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/clearstate_gfx9.h @@ -0,0 +1,941 @@ + +/* +*************************************************************************************************** +* +* Trade secret of Advanced Micro Devices, Inc. +* Copyright (c) 2010 Advanced Micro Devices, Inc. (unpublished) +* +* All rights reserved. This notice is intended as a precaution against inadvertent publication and +* does not imply publication or any waiver of confidentiality. The year included in the foregoing +* notice is the year of creation of the work. +* +*************************************************************************************************** +*/ +/** +*************************************************************************************************** +* @brief gfx9 Clearstate Definitions +*************************************************************************************************** +* +* Do not edit! This is a machine-generated file! +* +*/ + +static const unsigned int gfx9_SECT_CONTEXT_def_1[] = +{ + 0x00000000, // DB_RENDER_CONTROL + 0x00000000, // DB_COUNT_CONTROL + 0x00000000, // DB_DEPTH_VIEW + 0x00000000, // DB_RENDER_OVERRIDE + 0x00000000, // DB_RENDER_OVERRIDE2 + 0x00000000, // DB_HTILE_DATA_BASE + 0x00000000, // DB_HTILE_DATA_BASE_HI + 0x00000000, // DB_DEPTH_SIZE + 0x00000000, // DB_DEPTH_BOUNDS_MIN + 0x00000000, // DB_DEPTH_BOUNDS_MAX + 0x00000000, // DB_STENCIL_CLEAR + 0x00000000, // DB_DEPTH_CLEAR + 0x00000000, // PA_SC_SCREEN_SCISSOR_TL + 0x40004000, // PA_SC_SCREEN_SCISSOR_BR + 0x00000000, // DB_Z_INFO + 0x00000000, // DB_STENCIL_INFO + 0x00000000, // DB_Z_READ_BASE + 0x00000000, // DB_Z_READ_BASE_HI + 0x00000000, // DB_STENCIL_READ_BASE + 0x00000000, // DB_STENCIL_READ_BASE_HI + 0x00000000, // DB_Z_WRITE_BASE + 0x00000000, // DB_Z_WRITE_BASE_HI + 0x00000000, // DB_STENCIL_WRITE_BASE + 0x00000000, // DB_STENCIL_WRITE_BASE_HI + 0x00000000, // DB_DFSM_CONTROL + 0x00000000, // DB_RENDER_FILTER + 0x00000000, // DB_Z_INFO2 + 0x00000000, // DB_STENCIL_INFO2 + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0x00000000, // TA_BC_BASE_ADDR + 0x00000000, // TA_BC_BASE_ADDR_HI + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0x00000000, // COHER_DEST_BASE_HI_0 + 0x00000000, // COHER_DEST_BASE_HI_1 + 0x00000000, // COHER_DEST_BASE_HI_2 + 0x00000000, // COHER_DEST_BASE_HI_3 + 0x00000000, // COHER_DEST_BASE_2 + 0x00000000, // COHER_DEST_BASE_3 + 0x00000000, // PA_SC_WINDOW_OFFSET + 0x80000000, // PA_SC_WINDOW_SCISSOR_TL + 0x40004000, // PA_SC_WINDOW_SCISSOR_BR + 0x0000ffff, // PA_SC_CLIPRECT_RULE + 0x00000000, // PA_SC_CLIPRECT_0_TL + 0x40004000, // PA_SC_CLIPRECT_0_BR + 0x00000000, // PA_SC_CLIPRECT_1_TL + 0x40004000, // PA_SC_CLIPRECT_1_BR + 0x00000000, // PA_SC_CLIPRECT_2_TL + 0x40004000, // PA_SC_CLIPRECT_2_BR + 0x00000000, // PA_SC_CLIPRECT_3_TL + 0x40004000, // PA_SC_CLIPRECT_3_BR + 0xaa99aaaa, // PA_SC_EDGERULE + 0x00000000, // PA_SU_HARDWARE_SCREEN_OFFSET + 0xffffffff, // CB_TARGET_MASK + 0xffffffff, // CB_SHADER_MASK + 0x80000000, // PA_SC_GENERIC_SCISSOR_TL + 0x40004000, // PA_SC_GENERIC_SCISSOR_BR + 0x00000000, // COHER_DEST_BASE_0 + 0x00000000, // COHER_DEST_BASE_1 + 0x80000000, // PA_SC_VPORT_SCISSOR_0_TL + 0x40004000, // PA_SC_VPORT_SCISSOR_0_BR + 0x80000000, // PA_SC_VPORT_SCISSOR_1_TL + 0x40004000, // PA_SC_VPORT_SCISSOR_1_BR + 0x80000000, // PA_SC_VPORT_SCISSOR_2_TL + 0x40004000, // PA_SC_VPORT_SCISSOR_2_BR + 0x80000000, // PA_SC_VPORT_SCISSOR_3_TL + 0x40004000, // PA_SC_VPORT_SCISSOR_3_BR + 0x80000000, // PA_SC_VPORT_SCISSOR_4_TL + 0x40004000, // PA_SC_VPORT_SCISSOR_4_BR + 0x80000000, // PA_SC_VPORT_SCISSOR_5_TL + 0x40004000, // PA_SC_VPORT_SCISSOR_5_BR + 0x80000000, // PA_SC_VPORT_SCISSOR_6_TL + 0x40004000, // PA_SC_VPORT_SCISSOR_6_BR + 0x80000000, // PA_SC_VPORT_SCISSOR_7_TL + 0x40004000, // PA_SC_VPORT_SCISSOR_7_BR + 0x80000000, // PA_SC_VPORT_SCISSOR_8_TL + 0x40004000, // PA_SC_VPORT_SCISSOR_8_BR + 0x80000000, // PA_SC_VPORT_SCISSOR_9_TL + 0x40004000, // PA_SC_VPORT_SCISSOR_9_BR + 0x80000000, // PA_SC_VPORT_SCISSOR_10_TL + 0x40004000, // PA_SC_VPORT_SCISSOR_10_BR + 0x80000000, // PA_SC_VPORT_SCISSOR_11_TL + 0x40004000, // PA_SC_VPORT_SCISSOR_11_BR + 0x80000000, // PA_SC_VPORT_SCISSOR_12_TL + 0x40004000, // PA_SC_VPORT_SCISSOR_12_BR + 0x80000000, // PA_SC_VPORT_SCISSOR_13_TL + 0x40004000, // PA_SC_VPORT_SCISSOR_13_BR + 0x80000000, // PA_SC_VPORT_SCISSOR_14_TL + 0x40004000, // PA_SC_VPORT_SCISSOR_14_BR + 0x80000000, // PA_SC_VPORT_SCISSOR_15_TL + 0x40004000, // PA_SC_VPORT_SCISSOR_15_BR + 0x00000000, // PA_SC_VPORT_ZMIN_0 + 0x3f800000, // PA_SC_VPORT_ZMAX_0 + 0x00000000, // PA_SC_VPORT_ZMIN_1 + 0x3f800000, // PA_SC_VPORT_ZMAX_1 + 0x00000000, // PA_SC_VPORT_ZMIN_2 + 0x3f800000, // PA_SC_VPORT_ZMAX_2 + 0x00000000, // PA_SC_VPORT_ZMIN_3 + 0x3f800000, // PA_SC_VPORT_ZMAX_3 + 0x00000000, // PA_SC_VPORT_ZMIN_4 + 0x3f800000, // PA_SC_VPORT_ZMAX_4 + 0x00000000, // PA_SC_VPORT_ZMIN_5 + 0x3f800000, // PA_SC_VPORT_ZMAX_5 + 0x00000000, // PA_SC_VPORT_ZMIN_6 + 0x3f800000, // PA_SC_VPORT_ZMAX_6 + 0x00000000, // PA_SC_VPORT_ZMIN_7 + 0x3f800000, // PA_SC_VPORT_ZMAX_7 + 0x00000000, // PA_SC_VPORT_ZMIN_8 + 0x3f800000, // PA_SC_VPORT_ZMAX_8 + 0x00000000, // PA_SC_VPORT_ZMIN_9 + 0x3f800000, // PA_SC_VPORT_ZMAX_9 + 0x00000000, // PA_SC_VPORT_ZMIN_10 + 0x3f800000, // PA_SC_VPORT_ZMAX_10 + 0x00000000, // PA_SC_VPORT_ZMIN_11 + 0x3f800000, // PA_SC_VPORT_ZMAX_11 + 0x00000000, // PA_SC_VPORT_ZMIN_12 + 0x3f800000, // PA_SC_VPORT_ZMAX_12 + 0x00000000, // PA_SC_VPORT_ZMIN_13 + 0x3f800000, // PA_SC_VPORT_ZMAX_13 + 0x00000000, // PA_SC_VPORT_ZMIN_14 + 0x3f800000, // PA_SC_VPORT_ZMAX_14 + 0x00000000, // PA_SC_VPORT_ZMIN_15 + 0x3f800000, // PA_SC_VPORT_ZMAX_15 +}; +static const unsigned int gfx9_SECT_CONTEXT_def_2[] = +{ + 0x00000000, // PA_SC_SCREEN_EXTENT_CONTROL + 0x00000000, // PA_SC_TILE_STEERING_OVERRIDE + 0x00000000, // CP_PERFMON_CNTX_CNTL + 0x00000000, // CP_RINGID + 0x00000000, // CP_VMID + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0x00000000, // PA_SC_RIGHT_VERT_GRID + 0x00000000, // PA_SC_LEFT_VERT_GRID + 0x00000000, // PA_SC_HORIZ_GRID + 0x00000000, // PA_SC_FOV_WINDOW_LR + 0x00000000, // PA_SC_FOV_WINDOW_TB + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0x00000000, // VGT_MULTI_PRIM_IB_RESET_INDX + 0, // HOLE + 0x00000000, // CB_BLEND_RED + 0x00000000, // CB_BLEND_GREEN + 0x00000000, // CB_BLEND_BLUE + 0x00000000, // CB_BLEND_ALPHA + 0x00000000, // CB_DCC_CONTROL + 0, // HOLE + 0x00000000, // DB_STENCIL_CONTROL + 0x01000000, // DB_STENCILREFMASK + 0x01000000, // DB_STENCILREFMASK_BF + 0, // HOLE + 0x00000000, // PA_CL_VPORT_XSCALE + 0x00000000, // PA_CL_VPORT_XOFFSET + 0x00000000, // PA_CL_VPORT_YSCALE + 0x00000000, // PA_CL_VPORT_YOFFSET + 0x00000000, // PA_CL_VPORT_ZSCALE + 0x00000000, // PA_CL_VPORT_ZOFFSET + 0x00000000, // PA_CL_VPORT_XSCALE_1 + 0x00000000, // PA_CL_VPORT_XOFFSET_1 + 0x00000000, // PA_CL_VPORT_YSCALE_1 + 0x00000000, // PA_CL_VPORT_YOFFSET_1 + 0x00000000, // PA_CL_VPORT_ZSCALE_1 + 0x00000000, // PA_CL_VPORT_ZOFFSET_1 + 0x00000000, // PA_CL_VPORT_XSCALE_2 + 0x00000000, // PA_CL_VPORT_XOFFSET_2 + 0x00000000, // PA_CL_VPORT_YSCALE_2 + 0x00000000, // PA_CL_VPORT_YOFFSET_2 + 0x00000000, // PA_CL_VPORT_ZSCALE_2 + 0x00000000, // PA_CL_VPORT_ZOFFSET_2 + 0x00000000, // PA_CL_VPORT_XSCALE_3 + 0x00000000, // PA_CL_VPORT_XOFFSET_3 + 0x00000000, // PA_CL_VPORT_YSCALE_3 + 0x00000000, // PA_CL_VPORT_YOFFSET_3 + 0x00000000, // PA_CL_VPORT_ZSCALE_3 + 0x00000000, // PA_CL_VPORT_ZOFFSET_3 + 0x00000000, // PA_CL_VPORT_XSCALE_4 + 0x00000000, // PA_CL_VPORT_XOFFSET_4 + 0x00000000, // PA_CL_VPORT_YSCALE_4 + 0x00000000, // PA_CL_VPORT_YOFFSET_4 + 0x00000000, // PA_CL_VPORT_ZSCALE_4 + 0x00000000, // PA_CL_VPORT_ZOFFSET_4 + 0x00000000, // PA_CL_VPORT_XSCALE_5 + 0x00000000, // PA_CL_VPORT_XOFFSET_5 + 0x00000000, // PA_CL_VPORT_YSCALE_5 + 0x00000000, // PA_CL_VPORT_YOFFSET_5 + 0x00000000, // PA_CL_VPORT_ZSCALE_5 + 0x00000000, // PA_CL_VPORT_ZOFFSET_5 + 0x00000000, // PA_CL_VPORT_XSCALE_6 + 0x00000000, // PA_CL_VPORT_XOFFSET_6 + 0x00000000, // PA_CL_VPORT_YSCALE_6 + 0x00000000, // PA_CL_VPORT_YOFFSET_6 + 0x00000000, // PA_CL_VPORT_ZSCALE_6 + 0x00000000, // PA_CL_VPORT_ZOFFSET_6 + 0x00000000, // PA_CL_VPORT_XSCALE_7 + 0x00000000, // PA_CL_VPORT_XOFFSET_7 + 0x00000000, // PA_CL_VPORT_YSCALE_7 + 0x00000000, // PA_CL_VPORT_YOFFSET_7 + 0x00000000, // PA_CL_VPORT_ZSCALE_7 + 0x00000000, // PA_CL_VPORT_ZOFFSET_7 + 0x00000000, // PA_CL_VPORT_XSCALE_8 + 0x00000000, // PA_CL_VPORT_XOFFSET_8 + 0x00000000, // PA_CL_VPORT_YSCALE_8 + 0x00000000, // PA_CL_VPORT_YOFFSET_8 + 0x00000000, // PA_CL_VPORT_ZSCALE_8 + 0x00000000, // PA_CL_VPORT_ZOFFSET_8 + 0x00000000, // PA_CL_VPORT_XSCALE_9 + 0x00000000, // PA_CL_VPORT_XOFFSET_9 + 0x00000000, // PA_CL_VPORT_YSCALE_9 + 0x00000000, // PA_CL_VPORT_YOFFSET_9 + 0x00000000, // PA_CL_VPORT_ZSCALE_9 + 0x00000000, // PA_CL_VPORT_ZOFFSET_9 + 0x00000000, // PA_CL_VPORT_XSCALE_10 + 0x00000000, // PA_CL_VPORT_XOFFSET_10 + 0x00000000, // PA_CL_VPORT_YSCALE_10 + 0x00000000, // PA_CL_VPORT_YOFFSET_10 + 0x00000000, // PA_CL_VPORT_ZSCALE_10 + 0x00000000, // PA_CL_VPORT_ZOFFSET_10 + 0x00000000, // PA_CL_VPORT_XSCALE_11 + 0x00000000, // PA_CL_VPORT_XOFFSET_11 + 0x00000000, // PA_CL_VPORT_YSCALE_11 + 0x00000000, // PA_CL_VPORT_YOFFSET_11 + 0x00000000, // PA_CL_VPORT_ZSCALE_11 + 0x00000000, // PA_CL_VPORT_ZOFFSET_11 + 0x00000000, // PA_CL_VPORT_XSCALE_12 + 0x00000000, // PA_CL_VPORT_XOFFSET_12 + 0x00000000, // PA_CL_VPORT_YSCALE_12 + 0x00000000, // PA_CL_VPORT_YOFFSET_12 + 0x00000000, // PA_CL_VPORT_ZSCALE_12 + 0x00000000, // PA_CL_VPORT_ZOFFSET_12 + 0x00000000, // PA_CL_VPORT_XSCALE_13 + 0x00000000, // PA_CL_VPORT_XOFFSET_13 + 0x00000000, // PA_CL_VPORT_YSCALE_13 + 0x00000000, // PA_CL_VPORT_YOFFSET_13 + 0x00000000, // PA_CL_VPORT_ZSCALE_13 + 0x00000000, // PA_CL_VPORT_ZOFFSET_13 + 0x00000000, // PA_CL_VPORT_XSCALE_14 + 0x00000000, // PA_CL_VPORT_XOFFSET_14 + 0x00000000, // PA_CL_VPORT_YSCALE_14 + 0x00000000, // PA_CL_VPORT_YOFFSET_14 + 0x00000000, // PA_CL_VPORT_ZSCALE_14 + 0x00000000, // PA_CL_VPORT_ZOFFSET_14 + 0x00000000, // PA_CL_VPORT_XSCALE_15 + 0x00000000, // PA_CL_VPORT_XOFFSET_15 + 0x00000000, // PA_CL_VPORT_YSCALE_15 + 0x00000000, // PA_CL_VPORT_YOFFSET_15 + 0x00000000, // PA_CL_VPORT_ZSCALE_15 + 0x00000000, // PA_CL_VPORT_ZOFFSET_15 + 0x00000000, // PA_CL_UCP_0_X + 0x00000000, // PA_CL_UCP_0_Y + 0x00000000, // PA_CL_UCP_0_Z + 0x00000000, // PA_CL_UCP_0_W + 0x00000000, // PA_CL_UCP_1_X + 0x00000000, // PA_CL_UCP_1_Y + 0x00000000, // PA_CL_UCP_1_Z + 0x00000000, // PA_CL_UCP_1_W + 0x00000000, // PA_CL_UCP_2_X + 0x00000000, // PA_CL_UCP_2_Y + 0x00000000, // PA_CL_UCP_2_Z + 0x00000000, // PA_CL_UCP_2_W + 0x00000000, // PA_CL_UCP_3_X + 0x00000000, // PA_CL_UCP_3_Y + 0x00000000, // PA_CL_UCP_3_Z + 0x00000000, // PA_CL_UCP_3_W + 0x00000000, // PA_CL_UCP_4_X + 0x00000000, // PA_CL_UCP_4_Y + 0x00000000, // PA_CL_UCP_4_Z + 0x00000000, // PA_CL_UCP_4_W + 0x00000000, // PA_CL_UCP_5_X + 0x00000000, // PA_CL_UCP_5_Y + 0x00000000, // PA_CL_UCP_5_Z + 0x00000000, // PA_CL_UCP_5_W + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0x00000000, // SPI_PS_INPUT_CNTL_0 + 0x00000000, // SPI_PS_INPUT_CNTL_1 + 0x00000000, // SPI_PS_INPUT_CNTL_2 + 0x00000000, // SPI_PS_INPUT_CNTL_3 + 0x00000000, // SPI_PS_INPUT_CNTL_4 + 0x00000000, // SPI_PS_INPUT_CNTL_5 + 0x00000000, // SPI_PS_INPUT_CNTL_6 + 0x00000000, // SPI_PS_INPUT_CNTL_7 + 0x00000000, // SPI_PS_INPUT_CNTL_8 + 0x00000000, // SPI_PS_INPUT_CNTL_9 + 0x00000000, // SPI_PS_INPUT_CNTL_10 + 0x00000000, // SPI_PS_INPUT_CNTL_11 + 0x00000000, // SPI_PS_INPUT_CNTL_12 + 0x00000000, // SPI_PS_INPUT_CNTL_13 + 0x00000000, // SPI_PS_INPUT_CNTL_14 + 0x00000000, // SPI_PS_INPUT_CNTL_15 + 0x00000000, // SPI_PS_INPUT_CNTL_16 + 0x00000000, // SPI_PS_INPUT_CNTL_17 + 0x00000000, // SPI_PS_INPUT_CNTL_18 + 0x00000000, // SPI_PS_INPUT_CNTL_19 + 0x00000000, // SPI_PS_INPUT_CNTL_20 + 0x00000000, // SPI_PS_INPUT_CNTL_21 + 0x00000000, // SPI_PS_INPUT_CNTL_22 + 0x00000000, // SPI_PS_INPUT_CNTL_23 + 0x00000000, // SPI_PS_INPUT_CNTL_24 + 0x00000000, // SPI_PS_INPUT_CNTL_25 + 0x00000000, // SPI_PS_INPUT_CNTL_26 + 0x00000000, // SPI_PS_INPUT_CNTL_27 + 0x00000000, // SPI_PS_INPUT_CNTL_28 + 0x00000000, // SPI_PS_INPUT_CNTL_29 + 0x00000000, // SPI_PS_INPUT_CNTL_30 + 0x00000000, // SPI_PS_INPUT_CNTL_31 + 0x00000000, // SPI_VS_OUT_CONFIG + 0, // HOLE + 0x00000000, // SPI_PS_INPUT_ENA + 0x00000000, // SPI_PS_INPUT_ADDR + 0x00000000, // SPI_INTERP_CONTROL_0 + 0x00000002, // SPI_PS_IN_CONTROL + 0, // HOLE + 0x00000000, // SPI_BARYC_CNTL + 0, // HOLE + 0x00000000, // SPI_TMPRING_SIZE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0x00000000, // SPI_SHADER_POS_FORMAT + 0x00000000, // SPI_SHADER_Z_FORMAT + 0x00000000, // SPI_SHADER_COL_FORMAT + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0x00000000, // SX_PS_DOWNCONVERT + 0x00000000, // SX_BLEND_OPT_EPSILON + 0x00000000, // SX_BLEND_OPT_CONTROL + 0x00000000, // SX_MRT0_BLEND_OPT + 0x00000000, // SX_MRT1_BLEND_OPT + 0x00000000, // SX_MRT2_BLEND_OPT + 0x00000000, // SX_MRT3_BLEND_OPT + 0x00000000, // SX_MRT4_BLEND_OPT + 0x00000000, // SX_MRT5_BLEND_OPT + 0x00000000, // SX_MRT6_BLEND_OPT + 0x00000000, // SX_MRT7_BLEND_OPT + 0x00000000, // CB_BLEND0_CONTROL + 0x00000000, // CB_BLEND1_CONTROL + 0x00000000, // CB_BLEND2_CONTROL + 0x00000000, // CB_BLEND3_CONTROL + 0x00000000, // CB_BLEND4_CONTROL + 0x00000000, // CB_BLEND5_CONTROL + 0x00000000, // CB_BLEND6_CONTROL + 0x00000000, // CB_BLEND7_CONTROL + 0x00000000, // CB_MRT0_EPITCH + 0x00000000, // CB_MRT1_EPITCH + 0x00000000, // CB_MRT2_EPITCH + 0x00000000, // CB_MRT3_EPITCH + 0x00000000, // CB_MRT4_EPITCH + 0x00000000, // CB_MRT5_EPITCH + 0x00000000, // CB_MRT6_EPITCH + 0x00000000, // CB_MRT7_EPITCH +}; +static const unsigned int gfx9_SECT_CONTEXT_def_3[] = +{ + 0x00000000, // PA_CL_POINT_X_RAD + 0x00000000, // PA_CL_POINT_Y_RAD + 0x00000000, // PA_CL_POINT_SIZE + 0x00000000, // PA_CL_POINT_CULL_RAD +}; +static const unsigned int gfx9_SECT_CONTEXT_def_4[] = +{ + 0x00000000, // DB_DEPTH_CONTROL + 0x00000000, // DB_EQAA + 0x00000000, // CB_COLOR_CONTROL + 0x00000000, // DB_SHADER_CONTROL + 0x00090000, // PA_CL_CLIP_CNTL + 0x00000004, // PA_SU_SC_MODE_CNTL + 0x00000000, // PA_CL_VTE_CNTL + 0x00000000, // PA_CL_VS_OUT_CNTL + 0x00000000, // PA_CL_NANINF_CNTL + 0x00000000, // PA_SU_LINE_STIPPLE_CNTL + 0x00000000, // PA_SU_LINE_STIPPLE_SCALE + 0x00000000, // PA_SU_PRIM_FILTER_CNTL + 0x00000000, // PA_SU_SMALL_PRIM_FILTER_CNTL + 0x00000000, // PA_CL_OBJPRIM_ID_CNTL + 0x00000000, // PA_CL_NGG_CNTL + 0x00000000, // PA_SU_OVER_RASTERIZATION_CNTL + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0x00000000, // PA_SU_POINT_SIZE + 0x00000000, // PA_SU_POINT_MINMAX + 0x00000000, // PA_SU_LINE_CNTL + 0x00000000, // PA_SC_LINE_STIPPLE + 0x00000000, // VGT_OUTPUT_PATH_CNTL + 0x00000000, // VGT_HOS_CNTL + 0x00000000, // VGT_HOS_MAX_TESS_LEVEL + 0x00000000, // VGT_HOS_MIN_TESS_LEVEL + 0x00000000, // VGT_HOS_REUSE_DEPTH + 0x00000000, // VGT_GROUP_PRIM_TYPE + 0x00000000, // VGT_GROUP_FIRST_DECR + 0x00000000, // VGT_GROUP_DECR + 0x00000000, // VGT_GROUP_VECT_0_CNTL + 0x00000000, // VGT_GROUP_VECT_1_CNTL + 0x00000000, // VGT_GROUP_VECT_0_FMT_CNTL + 0x00000000, // VGT_GROUP_VECT_1_FMT_CNTL + 0x00000000, // VGT_GS_MODE + 0x00000000, // VGT_GS_ONCHIP_CNTL + 0x00000000, // PA_SC_MODE_CNTL_0 + 0x00000000, // PA_SC_MODE_CNTL_1 + 0x00000000, // VGT_ENHANCE + 0x00000100, // VGT_GS_PER_ES + 0x00000080, // VGT_ES_PER_GS + 0x00000002, // VGT_GS_PER_VS + 0x00000000, // VGT_GSVS_RING_OFFSET_1 + 0x00000000, // VGT_GSVS_RING_OFFSET_2 + 0x00000000, // VGT_GSVS_RING_OFFSET_3 + 0x00000000, // VGT_GS_OUT_PRIM_TYPE + 0x00000000, // IA_ENHANCE +}; +static const unsigned int gfx9_SECT_CONTEXT_def_5[] = +{ + 0x00000000, // WD_ENHANCE + 0x00000000, // VGT_PRIMITIVEID_EN +}; +static const unsigned int gfx9_SECT_CONTEXT_def_6[] = +{ + 0x00000000, // VGT_PRIMITIVEID_RESET +}; +static const unsigned int gfx9_SECT_CONTEXT_def_7[] = +{ + 0x00000000, // VGT_GS_MAX_PRIMS_PER_SUBGROUP + 0x00000000, // VGT_DRAW_PAYLOAD_CNTL + 0x00000000, // VGT_INDEX_PAYLOAD_CNTL + 0x00000000, // VGT_INSTANCE_STEP_RATE_0 + 0x00000000, // VGT_INSTANCE_STEP_RATE_1 + 0, // HOLE + 0x00000000, // VGT_ESGS_RING_ITEMSIZE + 0x00000000, // VGT_GSVS_RING_ITEMSIZE + 0x00000000, // VGT_REUSE_OFF + 0x00000000, // VGT_VTX_CNT_EN + 0x00000000, // DB_HTILE_SURFACE + 0x00000000, // DB_SRESULTS_COMPARE_STATE0 + 0x00000000, // DB_SRESULTS_COMPARE_STATE1 + 0x00000000, // DB_PRELOAD_CONTROL + 0, // HOLE + 0x00000000, // VGT_STRMOUT_BUFFER_SIZE_0 + 0x00000000, // VGT_STRMOUT_VTX_STRIDE_0 + 0, // HOLE + 0x00000000, // VGT_STRMOUT_BUFFER_OFFSET_0 + 0x00000000, // VGT_STRMOUT_BUFFER_SIZE_1 + 0x00000000, // VGT_STRMOUT_VTX_STRIDE_1 + 0, // HOLE + 0x00000000, // VGT_STRMOUT_BUFFER_OFFSET_1 + 0x00000000, // VGT_STRMOUT_BUFFER_SIZE_2 + 0x00000000, // VGT_STRMOUT_VTX_STRIDE_2 + 0, // HOLE + 0x00000000, // VGT_STRMOUT_BUFFER_OFFSET_2 + 0x00000000, // VGT_STRMOUT_BUFFER_SIZE_3 + 0x00000000, // VGT_STRMOUT_VTX_STRIDE_3 + 0, // HOLE + 0x00000000, // VGT_STRMOUT_BUFFER_OFFSET_3 + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0x00000000, // VGT_STRMOUT_DRAW_OPAQUE_OFFSET + 0x00000000, // VGT_STRMOUT_DRAW_OPAQUE_BUFFER_FILLED_SIZE + 0x00000000, // VGT_STRMOUT_DRAW_OPAQUE_VERTEX_STRIDE + 0, // HOLE + 0x00000000, // VGT_GS_MAX_VERT_OUT + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0x00000000, // VGT_TESS_DISTRIBUTION + 0x00000000, // VGT_SHADER_STAGES_EN + 0x00000000, // VGT_LS_HS_CONFIG + 0x00000000, // VGT_GS_VERT_ITEMSIZE + 0x00000000, // VGT_GS_VERT_ITEMSIZE_1 + 0x00000000, // VGT_GS_VERT_ITEMSIZE_2 + 0x00000000, // VGT_GS_VERT_ITEMSIZE_3 + 0x00000000, // VGT_TF_PARAM + 0x00000000, // DB_ALPHA_TO_MASK + 0x00000000, // VGT_DISPATCH_DRAW_INDEX + 0x00000000, // PA_SU_POLY_OFFSET_DB_FMT_CNTL + 0x00000000, // PA_SU_POLY_OFFSET_CLAMP + 0x00000000, // PA_SU_POLY_OFFSET_FRONT_SCALE + 0x00000000, // PA_SU_POLY_OFFSET_FRONT_OFFSET + 0x00000000, // PA_SU_POLY_OFFSET_BACK_SCALE + 0x00000000, // PA_SU_POLY_OFFSET_BACK_OFFSET + 0x00000000, // VGT_GS_INSTANCE_CNT + 0x00000000, // VGT_STRMOUT_CONFIG + 0x00000000, // VGT_STRMOUT_BUFFER_CONFIG +}; +static const unsigned int gfx9_SECT_CONTEXT_def_8[] = +{ + 0x00000000, // PA_SC_CENTROID_PRIORITY_0 + 0x00000000, // PA_SC_CENTROID_PRIORITY_1 + 0x00001000, // PA_SC_LINE_CNTL + 0x00000000, // PA_SC_AA_CONFIG + 0x00000005, // PA_SU_VTX_CNTL + 0x3f800000, // PA_CL_GB_VERT_CLIP_ADJ + 0x3f800000, // PA_CL_GB_VERT_DISC_ADJ + 0x3f800000, // PA_CL_GB_HORZ_CLIP_ADJ + 0x3f800000, // PA_CL_GB_HORZ_DISC_ADJ + 0x00000000, // PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_0 + 0x00000000, // PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_1 + 0x00000000, // PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_2 + 0x00000000, // PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_3 + 0x00000000, // PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y0_0 + 0x00000000, // PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y0_1 + 0x00000000, // PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y0_2 + 0x00000000, // PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y0_3 + 0x00000000, // PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y1_0 + 0x00000000, // PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y1_1 + 0x00000000, // PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y1_2 + 0x00000000, // PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y1_3 + 0x00000000, // PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y1_0 + 0x00000000, // PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y1_1 + 0x00000000, // PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y1_2 + 0x00000000, // PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y1_3 + 0xffffffff, // PA_SC_AA_MASK_X0Y0_X1Y0 + 0xffffffff, // PA_SC_AA_MASK_X0Y1_X1Y1 + 0x00000000, // PA_SC_SHADER_CONTROL + 0x00000003, // PA_SC_BINNER_CNTL_0 + 0x00000000, // PA_SC_BINNER_CNTL_1 + 0x00000000, // PA_SC_CONSERVATIVE_RASTERIZATION_CNTL + 0x00000000, // PA_SC_NGG_MODE_CNTL + 0, // HOLE + 0x0000001e, // VGT_VERTEX_REUSE_BLOCK_CNTL + 0x00000020, // VGT_OUT_DEALLOC_CNTL + 0x00000000, // CB_COLOR0_BASE + 0x00000000, // CB_COLOR0_BASE_EXT + 0x00000000, // CB_COLOR0_ATTRIB2 + 0x00000000, // CB_COLOR0_VIEW + 0x00000000, // CB_COLOR0_INFO + 0x00000000, // CB_COLOR0_ATTRIB + 0x00000000, // CB_COLOR0_DCC_CONTROL + 0x00000000, // CB_COLOR0_CMASK + 0x00000000, // CB_COLOR0_CMASK_BASE_EXT + 0x00000000, // CB_COLOR0_FMASK + 0x00000000, // CB_COLOR0_FMASK_BASE_EXT + 0x00000000, // CB_COLOR0_CLEAR_WORD0 + 0x00000000, // CB_COLOR0_CLEAR_WORD1 + 0x00000000, // CB_COLOR0_DCC_BASE + 0x00000000, // CB_COLOR0_DCC_BASE_EXT + 0x00000000, // CB_COLOR1_BASE + 0x00000000, // CB_COLOR1_BASE_EXT + 0x00000000, // CB_COLOR1_ATTRIB2 + 0x00000000, // CB_COLOR1_VIEW + 0x00000000, // CB_COLOR1_INFO + 0x00000000, // CB_COLOR1_ATTRIB + 0x00000000, // CB_COLOR1_DCC_CONTROL + 0x00000000, // CB_COLOR1_CMASK + 0x00000000, // CB_COLOR1_CMASK_BASE_EXT + 0x00000000, // CB_COLOR1_FMASK + 0x00000000, // CB_COLOR1_FMASK_BASE_EXT + 0x00000000, // CB_COLOR1_CLEAR_WORD0 + 0x00000000, // CB_COLOR1_CLEAR_WORD1 + 0x00000000, // CB_COLOR1_DCC_BASE + 0x00000000, // CB_COLOR1_DCC_BASE_EXT + 0x00000000, // CB_COLOR2_BASE + 0x00000000, // CB_COLOR2_BASE_EXT + 0x00000000, // CB_COLOR2_ATTRIB2 + 0x00000000, // CB_COLOR2_VIEW + 0x00000000, // CB_COLOR2_INFO + 0x00000000, // CB_COLOR2_ATTRIB + 0x00000000, // CB_COLOR2_DCC_CONTROL + 0x00000000, // CB_COLOR2_CMASK + 0x00000000, // CB_COLOR2_CMASK_BASE_EXT + 0x00000000, // CB_COLOR2_FMASK + 0x00000000, // CB_COLOR2_FMASK_BASE_EXT + 0x00000000, // CB_COLOR2_CLEAR_WORD0 + 0x00000000, // CB_COLOR2_CLEAR_WORD1 + 0x00000000, // CB_COLOR2_DCC_BASE + 0x00000000, // CB_COLOR2_DCC_BASE_EXT + 0x00000000, // CB_COLOR3_BASE + 0x00000000, // CB_COLOR3_BASE_EXT + 0x00000000, // CB_COLOR3_ATTRIB2 + 0x00000000, // CB_COLOR3_VIEW + 0x00000000, // CB_COLOR3_INFO + 0x00000000, // CB_COLOR3_ATTRIB + 0x00000000, // CB_COLOR3_DCC_CONTROL + 0x00000000, // CB_COLOR3_CMASK + 0x00000000, // CB_COLOR3_CMASK_BASE_EXT + 0x00000000, // CB_COLOR3_FMASK + 0x00000000, // CB_COLOR3_FMASK_BASE_EXT + 0x00000000, // CB_COLOR3_CLEAR_WORD0 + 0x00000000, // CB_COLOR3_CLEAR_WORD1 + 0x00000000, // CB_COLOR3_DCC_BASE + 0x00000000, // CB_COLOR3_DCC_BASE_EXT + 0x00000000, // CB_COLOR4_BASE + 0x00000000, // CB_COLOR4_BASE_EXT + 0x00000000, // CB_COLOR4_ATTRIB2 + 0x00000000, // CB_COLOR4_VIEW + 0x00000000, // CB_COLOR4_INFO + 0x00000000, // CB_COLOR4_ATTRIB + 0x00000000, // CB_COLOR4_DCC_CONTROL + 0x00000000, // CB_COLOR4_CMASK + 0x00000000, // CB_COLOR4_CMASK_BASE_EXT + 0x00000000, // CB_COLOR4_FMASK + 0x00000000, // CB_COLOR4_FMASK_BASE_EXT + 0x00000000, // CB_COLOR4_CLEAR_WORD0 + 0x00000000, // CB_COLOR4_CLEAR_WORD1 + 0x00000000, // CB_COLOR4_DCC_BASE + 0x00000000, // CB_COLOR4_DCC_BASE_EXT + 0x00000000, // CB_COLOR5_BASE + 0x00000000, // CB_COLOR5_BASE_EXT + 0x00000000, // CB_COLOR5_ATTRIB2 + 0x00000000, // CB_COLOR5_VIEW + 0x00000000, // CB_COLOR5_INFO + 0x00000000, // CB_COLOR5_ATTRIB + 0x00000000, // CB_COLOR5_DCC_CONTROL + 0x00000000, // CB_COLOR5_CMASK + 0x00000000, // CB_COLOR5_CMASK_BASE_EXT + 0x00000000, // CB_COLOR5_FMASK + 0x00000000, // CB_COLOR5_FMASK_BASE_EXT + 0x00000000, // CB_COLOR5_CLEAR_WORD0 + 0x00000000, // CB_COLOR5_CLEAR_WORD1 + 0x00000000, // CB_COLOR5_DCC_BASE + 0x00000000, // CB_COLOR5_DCC_BASE_EXT + 0x00000000, // CB_COLOR6_BASE + 0x00000000, // CB_COLOR6_BASE_EXT + 0x00000000, // CB_COLOR6_ATTRIB2 + 0x00000000, // CB_COLOR6_VIEW + 0x00000000, // CB_COLOR6_INFO + 0x00000000, // CB_COLOR6_ATTRIB + 0x00000000, // CB_COLOR6_DCC_CONTROL + 0x00000000, // CB_COLOR6_CMASK + 0x00000000, // CB_COLOR6_CMASK_BASE_EXT + 0x00000000, // CB_COLOR6_FMASK + 0x00000000, // CB_COLOR6_FMASK_BASE_EXT + 0x00000000, // CB_COLOR6_CLEAR_WORD0 + 0x00000000, // CB_COLOR6_CLEAR_WORD1 + 0x00000000, // CB_COLOR6_DCC_BASE + 0x00000000, // CB_COLOR6_DCC_BASE_EXT + 0x00000000, // CB_COLOR7_BASE + 0x00000000, // CB_COLOR7_BASE_EXT + 0x00000000, // CB_COLOR7_ATTRIB2 + 0x00000000, // CB_COLOR7_VIEW + 0x00000000, // CB_COLOR7_INFO + 0x00000000, // CB_COLOR7_ATTRIB + 0x00000000, // CB_COLOR7_DCC_CONTROL + 0x00000000, // CB_COLOR7_CMASK + 0x00000000, // CB_COLOR7_CMASK_BASE_EXT + 0x00000000, // CB_COLOR7_FMASK + 0x00000000, // CB_COLOR7_FMASK_BASE_EXT + 0x00000000, // CB_COLOR7_CLEAR_WORD0 + 0x00000000, // CB_COLOR7_CLEAR_WORD1 + 0x00000000, // CB_COLOR7_DCC_BASE + 0x00000000, // CB_COLOR7_DCC_BASE_EXT +}; +static const struct cs_extent_def gfx9_SECT_CONTEXT_defs[] = +{ + {gfx9_SECT_CONTEXT_def_1, 0x0000a000, 212 }, + {gfx9_SECT_CONTEXT_def_2, 0x0000a0d6, 282 }, + {gfx9_SECT_CONTEXT_def_3, 0x0000a1f5, 4 }, + {gfx9_SECT_CONTEXT_def_4, 0x0000a200, 157 }, + {gfx9_SECT_CONTEXT_def_5, 0x0000a2a0, 2 }, + {gfx9_SECT_CONTEXT_def_6, 0x0000a2a3, 1 }, + {gfx9_SECT_CONTEXT_def_7, 0x0000a2a5, 66 }, + {gfx9_SECT_CONTEXT_def_8, 0x0000a2f5, 155 }, + { 0, 0, 0 } +}; +static const struct cs_section_def gfx9_cs_data[] = { + { gfx9_SECT_CONTEXT_defs, SECT_CONTEXT }, + { 0, SECT_NONE } +}; diff --git a/drivers/gpu/drm/amd/amdgpu/cz_ih.c b/drivers/gpu/drm/amd/amdgpu/cz_ih.c index fe7cbb24da7b..a5f294ebff5c 100644 --- a/drivers/gpu/drm/amd/amdgpu/cz_ih.c +++ b/drivers/gpu/drm/amd/amdgpu/cz_ih.c @@ -227,8 +227,9 @@ static void cz_ih_decode_iv(struct amdgpu_device *adev, dw[2] = le32_to_cpu(adev->irq.ih.ring[ring_index + 2]); dw[3] = le32_to_cpu(adev->irq.ih.ring[ring_index + 3]); + entry->client_id = AMDGPU_IH_CLIENTID_LEGACY; entry->src_id = dw[0] & 0xff; - entry->src_data = dw[1] & 0xfffffff; + entry->src_data[0] = dw[1] & 0xfffffff; entry->ring_id = dw[2] & 0xff; entry->vm_id = (dw[2] >> 8) & 0xff; entry->pas_id = (dw[2] >> 16) & 0xffff; diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c b/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c index d4452d8f76ca..ba98d35340a3 100644 --- a/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c +++ b/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c @@ -1090,23 +1090,10 @@ static u32 dce_v10_0_latency_watermark(struct dce10_wm_params *wm) a.full = dfixed_const(available_bandwidth); b.full = dfixed_const(wm->num_heads); a.full = dfixed_div(a, b); + tmp = div_u64((u64) dmif_size * (u64) wm->disp_clk, mc_latency + 512); + tmp = min(dfixed_trunc(a), tmp); - b.full = dfixed_const(mc_latency + 512); - c.full = dfixed_const(wm->disp_clk); - b.full = dfixed_div(b, c); - - c.full = dfixed_const(dmif_size); - b.full = dfixed_div(c, b); - - tmp = min(dfixed_trunc(a), dfixed_trunc(b)); - - b.full = dfixed_const(1000); - c.full = dfixed_const(wm->disp_clk); - b.full = dfixed_div(c, b); - c.full = dfixed_const(wm->bytes_per_pixel); - b.full = dfixed_mul(b, c); - - lb_fill_bw = min(tmp, dfixed_trunc(b)); + lb_fill_bw = min(tmp, wm->disp_clk * wm->bytes_per_pixel / 1000); a.full = dfixed_const(max_src_lines_per_dst_line * wm->src_width * wm->bytes_per_pixel); b.full = dfixed_const(1000); @@ -1214,14 +1201,14 @@ static void dce_v10_0_program_watermarks(struct amdgpu_device *adev, { struct drm_display_mode *mode = &amdgpu_crtc->base.mode; struct dce10_wm_params wm_low, wm_high; - u32 pixel_period; + u32 active_time; u32 line_time = 0; u32 latency_watermark_a = 0, latency_watermark_b = 0; u32 tmp, wm_mask, lb_vblank_lead_lines = 0; if (amdgpu_crtc->base.enabled && num_heads && mode) { - pixel_period = 1000000 / (u32)mode->clock; - line_time = min((u32)mode->crtc_htotal * pixel_period, (u32)65535); + active_time = 1000000UL * (u32)mode->crtc_hdisplay / (u32)mode->clock; + line_time = min((u32) (1000000UL * (u32)mode->crtc_htotal / (u32)mode->clock), (u32)65535); /* watermark for high clocks */ if (adev->pm.dpm_enabled) { @@ -1236,7 +1223,7 @@ static void dce_v10_0_program_watermarks(struct amdgpu_device *adev, wm_high.disp_clk = mode->clock; wm_high.src_width = mode->crtc_hdisplay; - wm_high.active_time = mode->crtc_hdisplay * pixel_period; + wm_high.active_time = active_time; wm_high.blank_time = line_time - wm_high.active_time; wm_high.interlaced = false; if (mode->flags & DRM_MODE_FLAG_INTERLACE) @@ -1275,7 +1262,7 @@ static void dce_v10_0_program_watermarks(struct amdgpu_device *adev, wm_low.disp_clk = mode->clock; wm_low.src_width = mode->crtc_hdisplay; - wm_low.active_time = mode->crtc_hdisplay * pixel_period; + wm_low.active_time = active_time; wm_low.blank_time = line_time - wm_low.active_time; wm_low.interlaced = false; if (mode->flags & DRM_MODE_FLAG_INTERLACE) @@ -2631,7 +2618,8 @@ static void dce_v10_0_cursor_reset(struct drm_crtc *crtc) } static int dce_v10_0_crtc_gamma_set(struct drm_crtc *crtc, u16 *red, u16 *green, - u16 *blue, uint32_t size) + u16 *blue, uint32_t size, + struct drm_modeset_acquire_ctx *ctx) { struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc); int i; @@ -2947,19 +2935,19 @@ static int dce_v10_0_sw_init(void *handle) struct amdgpu_device *adev = (struct amdgpu_device *)handle; for (i = 0; i < adev->mode_info.num_crtc; i++) { - r = amdgpu_irq_add_id(adev, i + 1, &adev->crtc_irq); + r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, i + 1, &adev->crtc_irq); if (r) return r; } for (i = 8; i < 20; i += 2) { - r = amdgpu_irq_add_id(adev, i, &adev->pageflip_irq); + r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, i, &adev->pageflip_irq); if (r) return r; } /* HPD hotplug */ - r = amdgpu_irq_add_id(adev, 42, &adev->hpd_irq); + r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 42, &adev->hpd_irq); if (r) return r; @@ -3398,7 +3386,7 @@ static int dce_v10_0_crtc_irq(struct amdgpu_device *adev, uint32_t disp_int = RREG32(interrupt_status_offsets[crtc].reg); unsigned irq_type = amdgpu_crtc_idx_to_irq_type(adev, crtc); - switch (entry->src_data) { + switch (entry->src_data[0]) { case 0: /* vblank */ if (disp_int & interrupt_status_offsets[crtc].vblank) dce_v10_0_crtc_vblank_int_ack(adev, crtc); @@ -3421,7 +3409,7 @@ static int dce_v10_0_crtc_irq(struct amdgpu_device *adev, break; default: - DRM_DEBUG("Unhandled interrupt: %d %d\n", entry->src_id, entry->src_data); + DRM_DEBUG("Unhandled interrupt: %d %d\n", entry->src_id, entry->src_data[0]); break; } @@ -3435,12 +3423,12 @@ static int dce_v10_0_hpd_irq(struct amdgpu_device *adev, uint32_t disp_int, mask; unsigned hpd; - if (entry->src_data >= adev->mode_info.num_hpd) { - DRM_DEBUG("Unhandled interrupt: %d %d\n", entry->src_id, entry->src_data); + if (entry->src_data[0] >= adev->mode_info.num_hpd) { + DRM_DEBUG("Unhandled interrupt: %d %d\n", entry->src_id, entry->src_data[0]); return 0; } - hpd = entry->src_data; + hpd = entry->src_data[0]; disp_int = RREG32(interrupt_status_offsets[hpd].reg); mask = interrupt_status_offsets[hpd].hpd; diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c b/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c index 5b24e89552ec..e59bc42df18c 100644 --- a/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c @@ -1059,23 +1059,10 @@ static u32 dce_v11_0_latency_watermark(struct dce10_wm_params *wm) a.full = dfixed_const(available_bandwidth); b.full = dfixed_const(wm->num_heads); a.full = dfixed_div(a, b); + tmp = div_u64((u64) dmif_size * (u64) wm->disp_clk, mc_latency + 512); + tmp = min(dfixed_trunc(a), tmp); - b.full = dfixed_const(mc_latency + 512); - c.full = dfixed_const(wm->disp_clk); - b.full = dfixed_div(b, c); - - c.full = dfixed_const(dmif_size); - b.full = dfixed_div(c, b); - - tmp = min(dfixed_trunc(a), dfixed_trunc(b)); - - b.full = dfixed_const(1000); - c.full = dfixed_const(wm->disp_clk); - b.full = dfixed_div(c, b); - c.full = dfixed_const(wm->bytes_per_pixel); - b.full = dfixed_mul(b, c); - - lb_fill_bw = min(tmp, dfixed_trunc(b)); + lb_fill_bw = min(tmp, wm->disp_clk * wm->bytes_per_pixel / 1000); a.full = dfixed_const(max_src_lines_per_dst_line * wm->src_width * wm->bytes_per_pixel); b.full = dfixed_const(1000); @@ -1183,14 +1170,14 @@ static void dce_v11_0_program_watermarks(struct amdgpu_device *adev, { struct drm_display_mode *mode = &amdgpu_crtc->base.mode; struct dce10_wm_params wm_low, wm_high; - u32 pixel_period; + u32 active_time; u32 line_time = 0; u32 latency_watermark_a = 0, latency_watermark_b = 0; u32 tmp, wm_mask, lb_vblank_lead_lines = 0; if (amdgpu_crtc->base.enabled && num_heads && mode) { - pixel_period = 1000000 / (u32)mode->clock; - line_time = min((u32)mode->crtc_htotal * pixel_period, (u32)65535); + active_time = 1000000UL * (u32)mode->crtc_hdisplay / (u32)mode->clock; + line_time = min((u32) (1000000UL * (u32)mode->crtc_htotal / (u32)mode->clock), (u32)65535); /* watermark for high clocks */ if (adev->pm.dpm_enabled) { @@ -1205,7 +1192,7 @@ static void dce_v11_0_program_watermarks(struct amdgpu_device *adev, wm_high.disp_clk = mode->clock; wm_high.src_width = mode->crtc_hdisplay; - wm_high.active_time = mode->crtc_hdisplay * pixel_period; + wm_high.active_time = active_time; wm_high.blank_time = line_time - wm_high.active_time; wm_high.interlaced = false; if (mode->flags & DRM_MODE_FLAG_INTERLACE) @@ -1244,7 +1231,7 @@ static void dce_v11_0_program_watermarks(struct amdgpu_device *adev, wm_low.disp_clk = mode->clock; wm_low.src_width = mode->crtc_hdisplay; - wm_low.active_time = mode->crtc_hdisplay * pixel_period; + wm_low.active_time = active_time; wm_low.blank_time = line_time - wm_low.active_time; wm_low.interlaced = false; if (mode->flags & DRM_MODE_FLAG_INTERLACE) @@ -2651,7 +2638,8 @@ static void dce_v11_0_cursor_reset(struct drm_crtc *crtc) } static int dce_v11_0_crtc_gamma_set(struct drm_crtc *crtc, u16 *red, u16 *green, - u16 *blue, uint32_t size) + u16 *blue, uint32_t size, + struct drm_modeset_acquire_ctx *ctx) { struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc); int i; @@ -3007,19 +2995,19 @@ static int dce_v11_0_sw_init(void *handle) struct amdgpu_device *adev = (struct amdgpu_device *)handle; for (i = 0; i < adev->mode_info.num_crtc; i++) { - r = amdgpu_irq_add_id(adev, i + 1, &adev->crtc_irq); + r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, i + 1, &adev->crtc_irq); if (r) return r; } for (i = 8; i < 20; i += 2) { - r = amdgpu_irq_add_id(adev, i, &adev->pageflip_irq); + r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, i, &adev->pageflip_irq); if (r) return r; } /* HPD hotplug */ - r = amdgpu_irq_add_id(adev, 42, &adev->hpd_irq); + r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 42, &adev->hpd_irq); if (r) return r; @@ -3462,7 +3450,7 @@ static int dce_v11_0_crtc_irq(struct amdgpu_device *adev, uint32_t disp_int = RREG32(interrupt_status_offsets[crtc].reg); unsigned irq_type = amdgpu_crtc_idx_to_irq_type(adev, crtc); - switch (entry->src_data) { + switch (entry->src_data[0]) { case 0: /* vblank */ if (disp_int & interrupt_status_offsets[crtc].vblank) dce_v11_0_crtc_vblank_int_ack(adev, crtc); @@ -3485,7 +3473,7 @@ static int dce_v11_0_crtc_irq(struct amdgpu_device *adev, break; default: - DRM_DEBUG("Unhandled interrupt: %d %d\n", entry->src_id, entry->src_data); + DRM_DEBUG("Unhandled interrupt: %d %d\n", entry->src_id, entry->src_data[0]); break; } @@ -3499,12 +3487,12 @@ static int dce_v11_0_hpd_irq(struct amdgpu_device *adev, uint32_t disp_int, mask; unsigned hpd; - if (entry->src_data >= adev->mode_info.num_hpd) { - DRM_DEBUG("Unhandled interrupt: %d %d\n", entry->src_id, entry->src_data); + if (entry->src_data[0] >= adev->mode_info.num_hpd) { + DRM_DEBUG("Unhandled interrupt: %d %d\n", entry->src_id, entry->src_data[0]); return 0; } - hpd = entry->src_data; + hpd = entry->src_data[0]; disp_int = RREG32(interrupt_status_offsets[hpd].reg); mask = interrupt_status_offsets[hpd].hpd; diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c b/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c index 809aa94a0cc1..307269bda4fa 100644 --- a/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c +++ b/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c @@ -861,23 +861,10 @@ static u32 dce_v6_0_latency_watermark(struct dce6_wm_params *wm) a.full = dfixed_const(available_bandwidth); b.full = dfixed_const(wm->num_heads); a.full = dfixed_div(a, b); + tmp = div_u64((u64) dmif_size * (u64) wm->disp_clk, mc_latency + 512); + tmp = min(dfixed_trunc(a), tmp); - b.full = dfixed_const(mc_latency + 512); - c.full = dfixed_const(wm->disp_clk); - b.full = dfixed_div(b, c); - - c.full = dfixed_const(dmif_size); - b.full = dfixed_div(c, b); - - tmp = min(dfixed_trunc(a), dfixed_trunc(b)); - - b.full = dfixed_const(1000); - c.full = dfixed_const(wm->disp_clk); - b.full = dfixed_div(c, b); - c.full = dfixed_const(wm->bytes_per_pixel); - b.full = dfixed_mul(b, c); - - lb_fill_bw = min(tmp, dfixed_trunc(b)); + lb_fill_bw = min(tmp, wm->disp_clk * wm->bytes_per_pixel / 1000); a.full = dfixed_const(max_src_lines_per_dst_line * wm->src_width * wm->bytes_per_pixel); b.full = dfixed_const(1000); @@ -986,7 +973,7 @@ static void dce_v6_0_program_watermarks(struct amdgpu_device *adev, struct drm_display_mode *mode = &amdgpu_crtc->base.mode; struct dce6_wm_params wm_low, wm_high; u32 dram_channels; - u32 pixel_period; + u32 active_time; u32 line_time = 0; u32 latency_watermark_a = 0, latency_watermark_b = 0; u32 priority_a_mark = 0, priority_b_mark = 0; @@ -996,8 +983,8 @@ static void dce_v6_0_program_watermarks(struct amdgpu_device *adev, fixed20_12 a, b, c; if (amdgpu_crtc->base.enabled && num_heads && mode) { - pixel_period = 1000000 / (u32)mode->clock; - line_time = min((u32)mode->crtc_htotal * pixel_period, (u32)65535); + active_time = 1000000UL * (u32)mode->crtc_hdisplay / (u32)mode->clock; + line_time = min((u32) (1000000UL * (u32)mode->crtc_htotal / (u32)mode->clock), (u32)65535); priority_a_cnt = 0; priority_b_cnt = 0; @@ -1016,7 +1003,7 @@ static void dce_v6_0_program_watermarks(struct amdgpu_device *adev, wm_high.disp_clk = mode->clock; wm_high.src_width = mode->crtc_hdisplay; - wm_high.active_time = mode->crtc_hdisplay * pixel_period; + wm_high.active_time = active_time; wm_high.blank_time = line_time - wm_high.active_time; wm_high.interlaced = false; if (mode->flags & DRM_MODE_FLAG_INTERLACE) @@ -1043,7 +1030,7 @@ static void dce_v6_0_program_watermarks(struct amdgpu_device *adev, wm_low.disp_clk = mode->clock; wm_low.src_width = mode->crtc_hdisplay; - wm_low.active_time = mode->crtc_hdisplay * pixel_period; + wm_low.active_time = active_time; wm_low.blank_time = line_time - wm_low.active_time; wm_low.interlaced = false; if (mode->flags & DRM_MODE_FLAG_INTERLACE) @@ -1998,7 +1985,8 @@ static void dce_v6_0_cursor_reset(struct drm_crtc *crtc) } static int dce_v6_0_crtc_gamma_set(struct drm_crtc *crtc, u16 *red, u16 *green, - u16 *blue, uint32_t size) + u16 *blue, uint32_t size, + struct drm_modeset_acquire_ctx *ctx) { struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc); int i; @@ -2295,19 +2283,19 @@ static int dce_v6_0_sw_init(void *handle) struct amdgpu_device *adev = (struct amdgpu_device *)handle; for (i = 0; i < adev->mode_info.num_crtc; i++) { - r = amdgpu_irq_add_id(adev, i + 1, &adev->crtc_irq); + r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, i + 1, &adev->crtc_irq); if (r) return r; } for (i = 8; i < 20; i += 2) { - r = amdgpu_irq_add_id(adev, i, &adev->pageflip_irq); + r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, i, &adev->pageflip_irq); if (r) return r; } /* HPD hotplug */ - r = amdgpu_irq_add_id(adev, 42, &adev->hpd_irq); + r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 42, &adev->hpd_irq); if (r) return r; @@ -2592,7 +2580,7 @@ static int dce_v6_0_crtc_irq(struct amdgpu_device *adev, uint32_t disp_int = RREG32(interrupt_status_offsets[crtc].reg); unsigned irq_type = amdgpu_crtc_idx_to_irq_type(adev, crtc); - switch (entry->src_data) { + switch (entry->src_data[0]) { case 0: /* vblank */ if (disp_int & interrupt_status_offsets[crtc].vblank) WREG32(mmVBLANK_STATUS + crtc_offsets[crtc], VBLANK_ACK); @@ -2613,7 +2601,7 @@ static int dce_v6_0_crtc_irq(struct amdgpu_device *adev, DRM_DEBUG("IH: D%d vline\n", crtc + 1); break; default: - DRM_DEBUG("Unhandled interrupt: %d %d\n", entry->src_id, entry->src_data); + DRM_DEBUG("Unhandled interrupt: %d %d\n", entry->src_id, entry->src_data[0]); break; } @@ -2703,12 +2691,12 @@ static int dce_v6_0_hpd_irq(struct amdgpu_device *adev, uint32_t disp_int, mask, tmp; unsigned hpd; - if (entry->src_data >= adev->mode_info.num_hpd) { - DRM_DEBUG("Unhandled interrupt: %d %d\n", entry->src_id, entry->src_data); + if (entry->src_data[0] >= adev->mode_info.num_hpd) { + DRM_DEBUG("Unhandled interrupt: %d %d\n", entry->src_id, entry->src_data[0]); return 0; } - hpd = entry->src_data; + hpd = entry->src_data[0]; disp_int = RREG32(interrupt_status_offsets[hpd].reg); mask = interrupt_status_offsets[hpd].hpd; diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c b/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c index d2590d75aa11..6df7a28e8aac 100644 --- a/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c +++ b/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c @@ -974,23 +974,10 @@ static u32 dce_v8_0_latency_watermark(struct dce8_wm_params *wm) a.full = dfixed_const(available_bandwidth); b.full = dfixed_const(wm->num_heads); a.full = dfixed_div(a, b); + tmp = div_u64((u64) dmif_size * (u64) wm->disp_clk, mc_latency + 512); + tmp = min(dfixed_trunc(a), tmp); - b.full = dfixed_const(mc_latency + 512); - c.full = dfixed_const(wm->disp_clk); - b.full = dfixed_div(b, c); - - c.full = dfixed_const(dmif_size); - b.full = dfixed_div(c, b); - - tmp = min(dfixed_trunc(a), dfixed_trunc(b)); - - b.full = dfixed_const(1000); - c.full = dfixed_const(wm->disp_clk); - b.full = dfixed_div(c, b); - c.full = dfixed_const(wm->bytes_per_pixel); - b.full = dfixed_mul(b, c); - - lb_fill_bw = min(tmp, dfixed_trunc(b)); + lb_fill_bw = min(tmp, wm->disp_clk * wm->bytes_per_pixel / 1000); a.full = dfixed_const(max_src_lines_per_dst_line * wm->src_width * wm->bytes_per_pixel); b.full = dfixed_const(1000); @@ -1098,14 +1085,14 @@ static void dce_v8_0_program_watermarks(struct amdgpu_device *adev, { struct drm_display_mode *mode = &amdgpu_crtc->base.mode; struct dce8_wm_params wm_low, wm_high; - u32 pixel_period; + u32 active_time; u32 line_time = 0; u32 latency_watermark_a = 0, latency_watermark_b = 0; u32 tmp, wm_mask, lb_vblank_lead_lines = 0; if (amdgpu_crtc->base.enabled && num_heads && mode) { - pixel_period = 1000000 / (u32)mode->clock; - line_time = min((u32)mode->crtc_htotal * pixel_period, (u32)65535); + active_time = 1000000UL * (u32)mode->crtc_hdisplay / (u32)mode->clock; + line_time = min((u32) (1000000UL * (u32)mode->crtc_htotal / (u32)mode->clock), (u32)65535); /* watermark for high clocks */ if (adev->pm.dpm_enabled) { @@ -1120,7 +1107,7 @@ static void dce_v8_0_program_watermarks(struct amdgpu_device *adev, wm_high.disp_clk = mode->clock; wm_high.src_width = mode->crtc_hdisplay; - wm_high.active_time = mode->crtc_hdisplay * pixel_period; + wm_high.active_time = active_time; wm_high.blank_time = line_time - wm_high.active_time; wm_high.interlaced = false; if (mode->flags & DRM_MODE_FLAG_INTERLACE) @@ -1159,7 +1146,7 @@ static void dce_v8_0_program_watermarks(struct amdgpu_device *adev, wm_low.disp_clk = mode->clock; wm_low.src_width = mode->crtc_hdisplay; - wm_low.active_time = mode->crtc_hdisplay * pixel_period; + wm_low.active_time = active_time; wm_low.blank_time = line_time - wm_low.active_time; wm_low.interlaced = false; if (mode->flags & DRM_MODE_FLAG_INTERLACE) @@ -2482,7 +2469,8 @@ static void dce_v8_0_cursor_reset(struct drm_crtc *crtc) } static int dce_v8_0_crtc_gamma_set(struct drm_crtc *crtc, u16 *red, u16 *green, - u16 *blue, uint32_t size) + u16 *blue, uint32_t size, + struct drm_modeset_acquire_ctx *ctx) { struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc); int i; @@ -2794,19 +2782,19 @@ static int dce_v8_0_sw_init(void *handle) struct amdgpu_device *adev = (struct amdgpu_device *)handle; for (i = 0; i < adev->mode_info.num_crtc; i++) { - r = amdgpu_irq_add_id(adev, i + 1, &adev->crtc_irq); + r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, i + 1, &adev->crtc_irq); if (r) return r; } for (i = 8; i < 20; i += 2) { - r = amdgpu_irq_add_id(adev, i, &adev->pageflip_irq); + r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, i, &adev->pageflip_irq); if (r) return r; } /* HPD hotplug */ - r = amdgpu_irq_add_id(adev, 42, &adev->hpd_irq); + r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 42, &adev->hpd_irq); if (r) return r; @@ -3159,7 +3147,7 @@ static int dce_v8_0_crtc_irq(struct amdgpu_device *adev, uint32_t disp_int = RREG32(interrupt_status_offsets[crtc].reg); unsigned irq_type = amdgpu_crtc_idx_to_irq_type(adev, crtc); - switch (entry->src_data) { + switch (entry->src_data[0]) { case 0: /* vblank */ if (disp_int & interrupt_status_offsets[crtc].vblank) WREG32(mmLB_VBLANK_STATUS + crtc_offsets[crtc], LB_VBLANK_STATUS__VBLANK_ACK_MASK); @@ -3180,7 +3168,7 @@ static int dce_v8_0_crtc_irq(struct amdgpu_device *adev, DRM_DEBUG("IH: D%d vline\n", crtc + 1); break; default: - DRM_DEBUG("Unhandled interrupt: %d %d\n", entry->src_id, entry->src_data); + DRM_DEBUG("Unhandled interrupt: %d %d\n", entry->src_id, entry->src_data[0]); break; } @@ -3270,12 +3258,12 @@ static int dce_v8_0_hpd_irq(struct amdgpu_device *adev, uint32_t disp_int, mask, tmp; unsigned hpd; - if (entry->src_data >= adev->mode_info.num_hpd) { - DRM_DEBUG("Unhandled interrupt: %d %d\n", entry->src_id, entry->src_data); + if (entry->src_data[0] >= adev->mode_info.num_hpd) { + DRM_DEBUG("Unhandled interrupt: %d %d\n", entry->src_id, entry->src_data[0]); return 0; } - hpd = entry->src_data; + hpd = entry->src_data[0]; disp_int = RREG32(interrupt_status_offsets[hpd].reg); mask = interrupt_status_offsets[hpd].hpd; diff --git a/drivers/gpu/drm/amd/amdgpu/dce_virtual.c b/drivers/gpu/drm/amd/amdgpu/dce_virtual.c index e9a176891e13..81a24b6b4846 100644 --- a/drivers/gpu/drm/amd/amdgpu/dce_virtual.c +++ b/drivers/gpu/drm/amd/amdgpu/dce_virtual.c @@ -122,8 +122,9 @@ static void dce_virtual_stop_mc_access(struct amdgpu_device *adev, break; case CHIP_CARRIZO: case CHIP_STONEY: - case CHIP_POLARIS11: case CHIP_POLARIS10: + case CHIP_POLARIS11: + case CHIP_POLARIS12: dce_v11_0_disable_dce(adev); break; case CHIP_TOPAZ: @@ -164,7 +165,8 @@ static void dce_virtual_bandwidth_update(struct amdgpu_device *adev) } static int dce_virtual_crtc_gamma_set(struct drm_crtc *crtc, u16 *red, - u16 *green, u16 *blue, uint32_t size) + u16 *green, u16 *blue, uint32_t size, + struct drm_modeset_acquire_ctx *ctx) { struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc); int i; @@ -203,6 +205,9 @@ static void dce_virtual_crtc_dpms(struct drm_crtc *crtc, int mode) struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc); unsigned type; + if (amdgpu_sriov_vf(adev)) + return; + switch (mode) { case DRM_MODE_DPMS_ON: amdgpu_crtc->enabled = true; @@ -463,7 +468,7 @@ static int dce_virtual_sw_init(void *handle) int r, i; struct amdgpu_device *adev = (struct amdgpu_device *)handle; - r = amdgpu_irq_add_id(adev, 229, &adev->crtc_irq); + r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 229, &adev->crtc_irq); if (r) return r; diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c index 2086e7e68de4..4c4874fdf59f 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c @@ -378,9 +378,7 @@ static int gfx_v6_0_init_microcode(struct amdgpu_device *adev) out: if (err) { - printk(KERN_ERR - "gfx6: Failed to load firmware \"%s\"\n", - fw_name); + pr_err("gfx6: Failed to load firmware \"%s\"\n", fw_name); release_firmware(adev->gfx.pfp_fw); adev->gfx.pfp_fw = NULL; release_firmware(adev->gfx.me_fw); @@ -1579,6 +1577,11 @@ static void gfx_v6_0_setup_spi(struct amdgpu_device *adev) mutex_unlock(&adev->grbm_idx_mutex); } +static void gfx_v6_0_config_init(struct amdgpu_device *adev) +{ + adev->gfx.config.double_offchip_lds_buf = 1; +} + static void gfx_v6_0_gpu_init(struct amdgpu_device *adev) { u32 gb_addr_config = 0; @@ -1736,6 +1739,7 @@ static void gfx_v6_0_gpu_init(struct amdgpu_device *adev) gfx_v6_0_setup_spi(adev); gfx_v6_0_get_cu_info(adev); + gfx_v6_0_config_init(adev); WREG32(mmCP_QUEUE_THRESHOLDS, ((0x16 << CP_QUEUE_THRESHOLDS__ROQ_IB1_START__SHIFT) | (0x2b << CP_QUEUE_THRESHOLDS__ROQ_IB2_START__SHIFT))); @@ -2188,12 +2192,12 @@ static int gfx_v6_0_cp_gfx_resume(struct amdgpu_device *adev) return 0; } -static u32 gfx_v6_0_ring_get_rptr(struct amdgpu_ring *ring) +static u64 gfx_v6_0_ring_get_rptr(struct amdgpu_ring *ring) { return ring->adev->wb.wb[ring->rptr_offs]; } -static u32 gfx_v6_0_ring_get_wptr(struct amdgpu_ring *ring) +static u64 gfx_v6_0_ring_get_wptr(struct amdgpu_ring *ring) { struct amdgpu_device *adev = ring->adev; @@ -2211,7 +2215,7 @@ static void gfx_v6_0_ring_set_wptr_gfx(struct amdgpu_ring *ring) { struct amdgpu_device *adev = ring->adev; - WREG32(mmCP_RB0_WPTR, ring->wptr); + WREG32(mmCP_RB0_WPTR, lower_32_bits(ring->wptr)); (void)RREG32(mmCP_RB0_WPTR); } @@ -2220,10 +2224,10 @@ static void gfx_v6_0_ring_set_wptr_compute(struct amdgpu_ring *ring) struct amdgpu_device *adev = ring->adev; if (ring == &adev->gfx.compute_ring[0]) { - WREG32(mmCP_RB1_WPTR, ring->wptr); + WREG32(mmCP_RB1_WPTR, lower_32_bits(ring->wptr)); (void)RREG32(mmCP_RB1_WPTR); } else if (ring == &adev->gfx.compute_ring[1]) { - WREG32(mmCP_RB2_WPTR, ring->wptr); + WREG32(mmCP_RB2_WPTR, lower_32_bits(ring->wptr)); (void)RREG32(mmCP_RB2_WPTR); } else { BUG(); @@ -3238,15 +3242,15 @@ static int gfx_v6_0_sw_init(void *handle) struct amdgpu_device *adev = (struct amdgpu_device *)handle; int i, r; - r = amdgpu_irq_add_id(adev, 181, &adev->gfx.eop_irq); + r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 181, &adev->gfx.eop_irq); if (r) return r; - r = amdgpu_irq_add_id(adev, 184, &adev->gfx.priv_reg_irq); + r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 184, &adev->gfx.priv_reg_irq); if (r) return r; - r = amdgpu_irq_add_id(adev, 185, &adev->gfx.priv_inst_irq); + r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 185, &adev->gfx.priv_inst_irq); if (r) return r; @@ -3304,10 +3308,6 @@ static int gfx_v6_0_sw_fini(void *handle) int i; struct amdgpu_device *adev = (struct amdgpu_device *)handle; - amdgpu_bo_unref(&adev->gds.oa_gfx_bo); - amdgpu_bo_unref(&adev->gds.gws_gfx_bo); - amdgpu_bo_unref(&adev->gds.gds_gfx_bo); - for (i = 0; i < adev->gfx.num_gfx_rings; i++) amdgpu_ring_fini(&adev->gfx.gfx_ring[i]); for (i = 0; i < adev->gfx.num_compute_rings; i++) @@ -3627,6 +3627,7 @@ static const struct amdgpu_ring_funcs gfx_v6_0_ring_funcs_gfx = { .type = AMDGPU_RING_TYPE_GFX, .align_mask = 0xff, .nop = 0x80000000, + .support_64bit_ptrs = false, .get_rptr = gfx_v6_0_ring_get_rptr, .get_wptr = gfx_v6_0_ring_get_wptr, .set_wptr = gfx_v6_0_ring_set_wptr_gfx, diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c index 1f9354541f29..8a8bc2fe6f2e 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c @@ -972,9 +972,7 @@ static int gfx_v7_0_init_microcode(struct amdgpu_device *adev) out: if (err) { - printk(KERN_ERR - "gfx7: Failed to load firmware \"%s\"\n", - fw_name); + pr_err("gfx7: Failed to load firmware \"%s\"\n", fw_name); release_firmware(adev->gfx.pfp_fw); adev->gfx.pfp_fw = NULL; release_firmware(adev->gfx.me_fw); @@ -1876,6 +1874,11 @@ static void gmc_v7_0_init_compute_vmid(struct amdgpu_device *adev) mutex_unlock(&adev->srbm_mutex); } +static void gfx_v7_0_config_init(struct amdgpu_device *adev) +{ + adev->gfx.config.double_offchip_lds_buf = 1; +} + /** * gfx_v7_0_gpu_init - setup the 3D engine * @@ -1886,7 +1889,8 @@ static void gmc_v7_0_init_compute_vmid(struct amdgpu_device *adev) */ static void gfx_v7_0_gpu_init(struct amdgpu_device *adev) { - u32 tmp, sh_mem_cfg; + u32 sh_mem_cfg, sh_static_mem_cfg, sh_mem_base; + u32 tmp; int i; WREG32(mmGRBM_CNTL, (0xff << GRBM_CNTL__READ_TIMEOUT__SHIFT)); @@ -1899,6 +1903,7 @@ static void gfx_v7_0_gpu_init(struct amdgpu_device *adev) gfx_v7_0_setup_rb(adev); gfx_v7_0_get_cu_info(adev); + gfx_v7_0_config_init(adev); /* set HW defaults for 3D engine */ WREG32(mmCP_MEQ_THRESHOLDS, @@ -1916,15 +1921,32 @@ static void gfx_v7_0_gpu_init(struct amdgpu_device *adev) /* where to put LDS, scratch, GPUVM in FSA64 space */ sh_mem_cfg = REG_SET_FIELD(0, SH_MEM_CONFIG, ALIGNMENT_MODE, SH_MEM_ALIGNMENT_MODE_UNALIGNED); + sh_mem_cfg = REG_SET_FIELD(sh_mem_cfg, SH_MEM_CONFIG, DEFAULT_MTYPE, + MTYPE_NC); + sh_mem_cfg = REG_SET_FIELD(sh_mem_cfg, SH_MEM_CONFIG, APE1_MTYPE, + MTYPE_UC); + sh_mem_cfg = REG_SET_FIELD(sh_mem_cfg, SH_MEM_CONFIG, PRIVATE_ATC, 0); + + sh_static_mem_cfg = REG_SET_FIELD(0, SH_STATIC_MEM_CONFIG, + SWIZZLE_ENABLE, 1); + sh_static_mem_cfg = REG_SET_FIELD(sh_static_mem_cfg, SH_STATIC_MEM_CONFIG, + ELEMENT_SIZE, 1); + sh_static_mem_cfg = REG_SET_FIELD(sh_static_mem_cfg, SH_STATIC_MEM_CONFIG, + INDEX_STRIDE, 3); mutex_lock(&adev->srbm_mutex); - for (i = 0; i < 16; i++) { + for (i = 0; i < adev->vm_manager.num_ids; i++) { + if (i == 0) + sh_mem_base = 0; + else + sh_mem_base = adev->mc.shared_aperture_start >> 48; cik_srbm_select(adev, 0, 0, 0, i); /* CP and shaders */ WREG32(mmSH_MEM_CONFIG, sh_mem_cfg); WREG32(mmSH_MEM_APE1_BASE, 1); WREG32(mmSH_MEM_APE1_LIMIT, 0); - WREG32(mmSH_MEM_BASES, 0); + WREG32(mmSH_MEM_BASES, sh_mem_base); + WREG32(mmSH_STATIC_MEM_CONFIG, sh_static_mem_cfg); } cik_srbm_select(adev, 0, 0, 0, 0); mutex_unlock(&adev->srbm_mutex); @@ -2607,7 +2629,7 @@ static int gfx_v7_0_cp_gfx_resume(struct amdgpu_device *adev) /* Initialize the ring buffer's read and write pointers */ WREG32(mmCP_RB0_CNTL, tmp | CP_RB0_CNTL__RB_RPTR_WR_ENA_MASK); ring->wptr = 0; - WREG32(mmCP_RB0_WPTR, ring->wptr); + WREG32(mmCP_RB0_WPTR, lower_32_bits(ring->wptr)); /* set the wb address wether it's enabled or not */ rptr_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4); @@ -2636,12 +2658,12 @@ static int gfx_v7_0_cp_gfx_resume(struct amdgpu_device *adev) return 0; } -static u32 gfx_v7_0_ring_get_rptr(struct amdgpu_ring *ring) +static u64 gfx_v7_0_ring_get_rptr(struct amdgpu_ring *ring) { return ring->adev->wb.wb[ring->rptr_offs]; } -static u32 gfx_v7_0_ring_get_wptr_gfx(struct amdgpu_ring *ring) +static u64 gfx_v7_0_ring_get_wptr_gfx(struct amdgpu_ring *ring) { struct amdgpu_device *adev = ring->adev; @@ -2652,11 +2674,11 @@ static void gfx_v7_0_ring_set_wptr_gfx(struct amdgpu_ring *ring) { struct amdgpu_device *adev = ring->adev; - WREG32(mmCP_RB0_WPTR, ring->wptr); + WREG32(mmCP_RB0_WPTR, lower_32_bits(ring->wptr)); (void)RREG32(mmCP_RB0_WPTR); } -static u32 gfx_v7_0_ring_get_wptr_compute(struct amdgpu_ring *ring) +static u64 gfx_v7_0_ring_get_wptr_compute(struct amdgpu_ring *ring) { /* XXX check if swapping is necessary on BE */ return ring->adev->wb.wb[ring->wptr_offs]; @@ -2667,8 +2689,8 @@ static void gfx_v7_0_ring_set_wptr_compute(struct amdgpu_ring *ring) struct amdgpu_device *adev = ring->adev; /* XXX check if swapping is necessary on BE */ - adev->wb.wb[ring->wptr_offs] = ring->wptr; - WDOORBELL32(ring->doorbell_index, ring->wptr); + adev->wb.wb[ring->wptr_offs] = lower_32_bits(ring->wptr); + WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr)); } /** @@ -3138,7 +3160,7 @@ static int gfx_v7_0_cp_compute_resume(struct amdgpu_device *adev) /* read and write pointers, similar to CP_RB0_WPTR/_RPTR */ ring->wptr = 0; - mqd->queue_state.cp_hqd_pq_wptr = ring->wptr; + mqd->queue_state.cp_hqd_pq_wptr = lower_32_bits(ring->wptr); WREG32(mmCP_HQD_PQ_WPTR, mqd->queue_state.cp_hqd_pq_wptr); mqd->queue_state.cp_hqd_pq_rptr = RREG32(mmCP_HQD_PQ_RPTR); @@ -4647,17 +4669,19 @@ static int gfx_v7_0_sw_init(void *handle) int i, r; /* EOP Event */ - r = amdgpu_irq_add_id(adev, 181, &adev->gfx.eop_irq); + r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 181, &adev->gfx.eop_irq); if (r) return r; /* Privileged reg */ - r = amdgpu_irq_add_id(adev, 184, &adev->gfx.priv_reg_irq); + r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 184, + &adev->gfx.priv_reg_irq); if (r) return r; /* Privileged inst */ - r = amdgpu_irq_add_id(adev, 185, &adev->gfx.priv_inst_irq); + r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 185, + &adev->gfx.priv_inst_irq); if (r) return r; @@ -5184,6 +5208,7 @@ static const struct amdgpu_ring_funcs gfx_v7_0_ring_funcs_gfx = { .type = AMDGPU_RING_TYPE_GFX, .align_mask = 0xff, .nop = PACKET3(PACKET3_NOP, 0x3FFF), + .support_64bit_ptrs = false, .get_rptr = gfx_v7_0_ring_get_rptr, .get_wptr = gfx_v7_0_ring_get_wptr_gfx, .set_wptr = gfx_v7_0_ring_set_wptr_gfx, @@ -5214,6 +5239,7 @@ static const struct amdgpu_ring_funcs gfx_v7_0_ring_funcs_compute = { .type = AMDGPU_RING_TYPE_COMPUTE, .align_mask = 0xff, .nop = PACKET3(PACKET3_NOP, 0x3FFF), + .support_64bit_ptrs = false, .get_rptr = gfx_v7_0_ring_get_rptr, .get_wptr = gfx_v7_0_ring_get_wptr_compute, .set_wptr = gfx_v7_0_ring_set_wptr_compute, diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c index 67afc901905c..dad8a4cd1b37 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c @@ -659,6 +659,8 @@ static u32 gfx_v8_0_get_csb_size(struct amdgpu_device *adev); static void gfx_v8_0_get_cu_info(struct amdgpu_device *adev); static void gfx_v8_0_ring_emit_ce_meta_init(struct amdgpu_ring *ring, uint64_t addr); static void gfx_v8_0_ring_emit_de_meta_init(struct amdgpu_ring *ring, uint64_t addr); +static int gfx_v8_0_compute_mqd_sw_init(struct amdgpu_device *adev); +static void gfx_v8_0_compute_mqd_sw_fini(struct amdgpu_device *adev); static void gfx_v8_0_init_golden_registers(struct amdgpu_device *adev) { @@ -1038,7 +1040,7 @@ static int gfx_v8_0_init_microcode(struct amdgpu_device *adev) } } - if (adev->firmware.smu_load) { + if (adev->firmware.load_type == AMDGPU_FW_LOAD_SMU) { info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_PFP]; info->ucode_id = AMDGPU_UCODE_ID_CP_PFP; info->fw = adev->gfx.pfp_fw; @@ -1375,13 +1377,12 @@ static int gfx_v8_0_kiq_init_ring(struct amdgpu_device *adev, struct amdgpu_ring *ring, struct amdgpu_irq_src *irq) { + struct amdgpu_kiq *kiq = &adev->gfx.kiq; int r = 0; - if (amdgpu_sriov_vf(adev)) { - r = amdgpu_wb_get(adev, &adev->virt.reg_val_offs); - if (r) - return r; - } + r = amdgpu_wb_get(adev, &adev->virt.reg_val_offs); + if (r) + return r; ring->adev = NULL; ring->ring_obj = NULL; @@ -1395,8 +1396,8 @@ static int gfx_v8_0_kiq_init_ring(struct amdgpu_device *adev, ring->pipe = 1; } - irq->data = ring; ring->queue = 0; + ring->eop_gpu_addr = kiq->eop_gpu_addr; sprintf(ring->name, "kiq %d.%d.%d", ring->me, ring->pipe, ring->queue); r = amdgpu_ring_init(adev, ring, 1024, irq, AMDGPU_CP_KIQ_IRQ_DRIVER0); @@ -1405,15 +1406,11 @@ static int gfx_v8_0_kiq_init_ring(struct amdgpu_device *adev, return r; } - static void gfx_v8_0_kiq_free_ring(struct amdgpu_ring *ring, struct amdgpu_irq_src *irq) { - if (amdgpu_sriov_vf(ring->adev)) - amdgpu_wb_free(ring->adev, ring->adev->virt.reg_val_offs); - + amdgpu_wb_free(ring->adev, ring->adev->virt.reg_val_offs); amdgpu_ring_fini(ring); - irq->data = NULL; } #define MEC_HPD_SIZE 2048 @@ -1475,7 +1472,6 @@ static void gfx_v8_0_kiq_fini(struct amdgpu_device *adev) struct amdgpu_kiq *kiq = &adev->gfx.kiq; amdgpu_bo_free_kernel(&kiq->eop_obj, &kiq->eop_gpu_addr, NULL); - kiq->eop_obj = NULL; } static int gfx_v8_0_kiq_init(struct amdgpu_device *adev) @@ -1494,7 +1490,11 @@ static int gfx_v8_0_kiq_init(struct amdgpu_device *adev) memset(hpd, 0, MEC_HPD_SIZE); + r = amdgpu_bo_reserve(kiq->eop_obj, false); + if (unlikely(r != 0)) + dev_warn(adev->dev, "(%d) reserve kiq eop bo failed\n", r); amdgpu_bo_kunmap(kiq->eop_obj); + amdgpu_bo_unreserve(kiq->eop_obj); return 0; } @@ -2079,22 +2079,24 @@ static int gfx_v8_0_sw_init(void *handle) struct amdgpu_device *adev = (struct amdgpu_device *)handle; /* KIQ event */ - r = amdgpu_irq_add_id(adev, 178, &adev->gfx.kiq.irq); + r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 178, &adev->gfx.kiq.irq); if (r) return r; /* EOP Event */ - r = amdgpu_irq_add_id(adev, 181, &adev->gfx.eop_irq); + r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 181, &adev->gfx.eop_irq); if (r) return r; /* Privileged reg */ - r = amdgpu_irq_add_id(adev, 184, &adev->gfx.priv_reg_irq); + r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 184, + &adev->gfx.priv_reg_irq); if (r) return r; /* Privileged inst */ - r = amdgpu_irq_add_id(adev, 185, &adev->gfx.priv_inst_irq); + r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 185, + &adev->gfx.priv_inst_irq); if (r) return r; @@ -2120,17 +2122,6 @@ static int gfx_v8_0_sw_init(void *handle) return r; } - r = gfx_v8_0_kiq_init(adev); - if (r) { - DRM_ERROR("Failed to init KIQ BOs!\n"); - return r; - } - - kiq = &adev->gfx.kiq; - r = gfx_v8_0_kiq_init_ring(adev, &kiq->ring, &kiq->irq); - if (r) - return r; - /* set up the gfx ring */ for (i = 0; i < adev->gfx.num_gfx_rings; i++) { ring = &adev->gfx.gfx_ring[i]; @@ -2164,6 +2155,7 @@ static int gfx_v8_0_sw_init(void *handle) ring->me = 1; /* first MEC */ ring->pipe = i / 8; ring->queue = i % 8; + ring->eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr + (i * MEC_HPD_SIZE); sprintf(ring->name, "comp_%d.%d.%d", ring->me, ring->pipe, ring->queue); irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP + ring->pipe; /* type-2 packets are deprecated on MEC, use type-3 instead */ @@ -2173,6 +2165,24 @@ static int gfx_v8_0_sw_init(void *handle) return r; } + if (amdgpu_sriov_vf(adev)) { + r = gfx_v8_0_kiq_init(adev); + if (r) { + DRM_ERROR("Failed to init KIQ BOs!\n"); + return r; + } + + kiq = &adev->gfx.kiq; + r = gfx_v8_0_kiq_init_ring(adev, &kiq->ring, &kiq->irq); + if (r) + return r; + + /* create MQD for all compute queues as wel as KIQ for SRIOV case */ + r = gfx_v8_0_compute_mqd_sw_init(adev); + if (r) + return r; + } + /* reserve GDS, GWS and OA resource for gfx */ r = amdgpu_bo_create_kernel(adev, adev->gds.mem.gfx_partition_size, PAGE_SIZE, AMDGPU_GEM_DOMAIN_GDS, @@ -2214,9 +2224,13 @@ static int gfx_v8_0_sw_fini(void *handle) amdgpu_ring_fini(&adev->gfx.gfx_ring[i]); for (i = 0; i < adev->gfx.num_compute_rings; i++) amdgpu_ring_fini(&adev->gfx.compute_ring[i]); - gfx_v8_0_kiq_free_ring(&adev->gfx.kiq.ring, &adev->gfx.kiq.irq); - gfx_v8_0_kiq_fini(adev); + if (amdgpu_sriov_vf(adev)) { + gfx_v8_0_compute_mqd_sw_fini(adev); + gfx_v8_0_kiq_free_ring(&adev->gfx.kiq.ring, &adev->gfx.kiq.irq); + gfx_v8_0_kiq_fini(adev); + } + gfx_v8_0_mec_fini(adev); gfx_v8_0_rlc_fini(adev); gfx_v8_0_free_microcode(adev); @@ -3839,9 +3853,22 @@ static void gfx_v8_0_init_compute_vmid(struct amdgpu_device *adev) mutex_unlock(&adev->srbm_mutex); } +static void gfx_v8_0_config_init(struct amdgpu_device *adev) +{ + switch (adev->asic_type) { + default: + adev->gfx.config.double_offchip_lds_buf = 1; + break; + case CHIP_CARRIZO: + case CHIP_STONEY: + adev->gfx.config.double_offchip_lds_buf = 0; + break; + } +} + static void gfx_v8_0_gpu_init(struct amdgpu_device *adev) { - u32 tmp; + u32 tmp, sh_static_mem_cfg; int i; WREG32_FIELD(GRBM_CNTL, READ_TIMEOUT, 0xFF); @@ -3852,11 +3879,18 @@ static void gfx_v8_0_gpu_init(struct amdgpu_device *adev) gfx_v8_0_tiling_mode_table_init(adev); gfx_v8_0_setup_rb(adev); gfx_v8_0_get_cu_info(adev); + gfx_v8_0_config_init(adev); /* XXX SH_MEM regs */ /* where to put LDS, scratch, GPUVM in FSA64 space */ + sh_static_mem_cfg = REG_SET_FIELD(0, SH_STATIC_MEM_CONFIG, + SWIZZLE_ENABLE, 1); + sh_static_mem_cfg = REG_SET_FIELD(sh_static_mem_cfg, SH_STATIC_MEM_CONFIG, + ELEMENT_SIZE, 1); + sh_static_mem_cfg = REG_SET_FIELD(sh_static_mem_cfg, SH_STATIC_MEM_CONFIG, + INDEX_STRIDE, 3); mutex_lock(&adev->srbm_mutex); - for (i = 0; i < 16; i++) { + for (i = 0; i < adev->vm_manager.num_ids; i++) { vi_srbm_select(adev, 0, 0, 0, i); /* CP and shaders */ if (i == 0) { @@ -3865,17 +3899,20 @@ static void gfx_v8_0_gpu_init(struct amdgpu_device *adev) tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, ALIGNMENT_MODE, SH_MEM_ALIGNMENT_MODE_UNALIGNED); WREG32(mmSH_MEM_CONFIG, tmp); + WREG32(mmSH_MEM_BASES, 0); } else { tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, DEFAULT_MTYPE, MTYPE_NC); - tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, APE1_MTYPE, MTYPE_NC); + tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, APE1_MTYPE, MTYPE_UC); tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, ALIGNMENT_MODE, SH_MEM_ALIGNMENT_MODE_UNALIGNED); WREG32(mmSH_MEM_CONFIG, tmp); + tmp = adev->mc.shared_aperture_start >> 48; + WREG32(mmSH_MEM_BASES, tmp); } WREG32(mmSH_MEM_APE1_BASE, 1); WREG32(mmSH_MEM_APE1_LIMIT, 0); - WREG32(mmSH_MEM_BASES, 0); + WREG32(mmSH_STATIC_MEM_CONFIG, sh_static_mem_cfg); } vi_srbm_select(adev, 0, 0, 0, 0); mutex_unlock(&adev->srbm_mutex); @@ -4069,10 +4106,8 @@ static int gfx_v8_0_init_save_restore_list(struct amdgpu_device *adev) data = mmRLC_SRM_INDEX_CNTL_DATA_0; for (i = 0; i < sizeof(unique_indices) / sizeof(int); i++) { if (unique_indices[i] != 0) { - amdgpu_mm_wreg(adev, temp + i, - unique_indices[i] & 0x3FFFF, false); - amdgpu_mm_wreg(adev, data + i, - unique_indices[i] >> 20, false); + WREG32(temp + i, unique_indices[i] & 0x3FFFF); + WREG32(data + i, unique_indices[i] >> 20); } } kfree(register_list_format); @@ -4218,7 +4253,7 @@ static int gfx_v8_0_rlc_resume(struct amdgpu_device *adev) gfx_v8_0_init_pg(adev); if (!adev->pp_enabled) { - if (!adev->firmware.smu_load) { + if (adev->firmware.load_type != AMDGPU_FW_LOAD_SMU) { /* legacy rlc firmware loading */ r = gfx_v8_0_rlc_load_microcode(adev); if (r) @@ -4464,7 +4499,7 @@ static int gfx_v8_0_cp_gfx_resume(struct amdgpu_device *adev) /* Initialize the ring buffer's read and write pointers */ WREG32(mmCP_RB0_CNTL, tmp | CP_RB0_CNTL__RB_RPTR_WR_ENA_MASK); ring->wptr = 0; - WREG32(mmCP_RB0_WPTR, ring->wptr); + WREG32(mmCP_RB0_WPTR, lower_32_bits(ring->wptr)); /* set the wb address wether it's enabled or not */ rptr_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4); @@ -4510,6 +4545,7 @@ static int gfx_v8_0_cp_gfx_resume(struct amdgpu_device *adev) } /* start the ring */ + amdgpu_ring_clear_ring(ring); gfx_v8_0_cp_gfx_start(adev); ring->ready = true; r = amdgpu_ring_test_ring(ring); @@ -4529,6 +4565,7 @@ static void gfx_v8_0_cp_compute_enable(struct amdgpu_device *adev, bool enable) WREG32(mmCP_MEC_CNTL, (CP_MEC_CNTL__MEC_ME1_HALT_MASK | CP_MEC_CNTL__MEC_ME2_HALT_MASK)); for (i = 0; i < adev->gfx.num_compute_rings; i++) adev->gfx.compute_ring[i].ready = false; + adev->gfx.kiq.ring.ready = false; } udelay(50); } @@ -4596,6 +4633,8 @@ static void gfx_v8_0_cp_compute_fini(struct amdgpu_device *adev) amdgpu_bo_unref(&ring->mqd_obj); ring->mqd_obj = NULL; + ring->mqd_ptr = NULL; + ring->mqd_gpu_addr = 0; } } } @@ -4656,12 +4695,10 @@ static void gfx_v8_0_map_queue_enable(struct amdgpu_ring *kiq_ring, udelay(50); } -static int gfx_v8_0_mqd_init(struct amdgpu_device *adev, - struct vi_mqd *mqd, - uint64_t mqd_gpu_addr, - uint64_t eop_gpu_addr, - struct amdgpu_ring *ring) +static int gfx_v8_0_mqd_init(struct amdgpu_ring *ring) { + struct amdgpu_device *adev = ring->adev; + struct vi_mqd *mqd = ring->mqd_ptr; uint64_t hqd_gpu_addr, wb_gpu_addr, eop_base_addr; uint32_t tmp; @@ -4673,7 +4710,7 @@ static int gfx_v8_0_mqd_init(struct amdgpu_device *adev, mqd->compute_static_thread_mgmt_se3 = 0xffffffff; mqd->compute_misc_reserved = 0x00000003; - eop_base_addr = eop_gpu_addr >> 8; + eop_base_addr = ring->eop_gpu_addr >> 8; mqd->cp_hqd_eop_base_addr_lo = eop_base_addr; mqd->cp_hqd_eop_base_addr_hi = upper_32_bits(eop_base_addr); @@ -4685,14 +4722,10 @@ static int gfx_v8_0_mqd_init(struct amdgpu_device *adev, mqd->cp_hqd_eop_control = tmp; /* enable doorbell? */ - tmp = RREG32(mmCP_HQD_PQ_DOORBELL_CONTROL); - - if (ring->use_doorbell) - tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, - DOORBELL_EN, 1); - else - tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, - DOORBELL_EN, 0); + tmp = REG_SET_FIELD(RREG32(mmCP_HQD_PQ_DOORBELL_CONTROL), + CP_HQD_PQ_DOORBELL_CONTROL, + DOORBELL_EN, + ring->use_doorbell ? 1 : 0); mqd->cp_hqd_pq_doorbell_control = tmp; @@ -4702,8 +4735,8 @@ static int gfx_v8_0_mqd_init(struct amdgpu_device *adev, mqd->cp_hqd_pq_wptr = 0; /* set the pointer to the MQD */ - mqd->cp_mqd_base_addr_lo = mqd_gpu_addr & 0xfffffffc; - mqd->cp_mqd_base_addr_hi = upper_32_bits(mqd_gpu_addr); + mqd->cp_mqd_base_addr_lo = ring->mqd_gpu_addr & 0xfffffffc; + mqd->cp_mqd_base_addr_hi = upper_32_bits(ring->mqd_gpu_addr); /* set MQD vmid to 0 */ tmp = RREG32(mmCP_MQD_CONTROL); @@ -4776,17 +4809,14 @@ static int gfx_v8_0_mqd_init(struct amdgpu_device *adev, return 0; } -static int gfx_v8_0_kiq_init_register(struct amdgpu_device *adev, - struct vi_mqd *mqd, - struct amdgpu_ring *ring) +static int gfx_v8_0_kiq_init_register(struct amdgpu_ring *ring) { - uint32_t tmp; + struct amdgpu_device *adev = ring->adev; + struct vi_mqd *mqd = ring->mqd_ptr; int j; /* disable wptr polling */ - tmp = RREG32(mmCP_PQ_WPTR_POLL_CNTL); - tmp = REG_SET_FIELD(tmp, CP_PQ_WPTR_POLL_CNTL, EN, 0); - WREG32(mmCP_PQ_WPTR_POLL_CNTL, tmp); + WREG32_FIELD(CP_PQ_WPTR_POLL_CNTL, EN, 0); WREG32(mmCP_HQD_EOP_BASE_ADDR, mqd->cp_hqd_eop_base_addr_lo); WREG32(mmCP_HQD_EOP_BASE_ADDR_HI, mqd->cp_hqd_eop_base_addr_hi); @@ -4798,10 +4828,10 @@ static int gfx_v8_0_kiq_init_register(struct amdgpu_device *adev, WREG32(mmCP_HQD_PQ_DOORBELL_CONTROL, mqd->cp_hqd_pq_doorbell_control); /* disable the queue if it's active */ - if (RREG32(mmCP_HQD_ACTIVE) & 1) { + if (RREG32(mmCP_HQD_ACTIVE) & CP_HQD_ACTIVE__ACTIVE_MASK) { WREG32(mmCP_HQD_DEQUEUE_REQUEST, 1); for (j = 0; j < adev->usec_timeout; j++) { - if (!(RREG32(mmCP_HQD_ACTIVE) & 1)) + if (!(RREG32(mmCP_HQD_ACTIVE) & CP_HQD_ACTIVE__ACTIVE_MASK)) break; udelay(1); } @@ -4858,44 +4888,55 @@ static int gfx_v8_0_kiq_init_register(struct amdgpu_device *adev, /* activate the queue */ WREG32(mmCP_HQD_ACTIVE, mqd->cp_hqd_active); - if (ring->use_doorbell) { - tmp = RREG32(mmCP_PQ_STATUS); - tmp = REG_SET_FIELD(tmp, CP_PQ_STATUS, DOORBELL_ENABLE, 1); - WREG32(mmCP_PQ_STATUS, tmp); - } + if (ring->use_doorbell) + WREG32_FIELD(CP_PQ_STATUS, DOORBELL_ENABLE, 1); return 0; } -static int gfx_v8_0_kiq_init_queue(struct amdgpu_ring *ring, - struct vi_mqd *mqd, - u64 mqd_gpu_addr) +static int gfx_v8_0_kiq_init_queue(struct amdgpu_ring *ring) { struct amdgpu_device *adev = ring->adev; struct amdgpu_kiq *kiq = &adev->gfx.kiq; - uint64_t eop_gpu_addr; - bool is_kiq = false; - - if (ring->funcs->type == AMDGPU_RING_TYPE_KIQ) - is_kiq = true; + struct vi_mqd *mqd = ring->mqd_ptr; + bool is_kiq = (ring->funcs->type == AMDGPU_RING_TYPE_KIQ); + int mqd_idx = AMDGPU_MAX_COMPUTE_RINGS; if (is_kiq) { - eop_gpu_addr = kiq->eop_gpu_addr; gfx_v8_0_kiq_setting(&kiq->ring); - } else - eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr + - ring->queue * MEC_HPD_SIZE; - - mutex_lock(&adev->srbm_mutex); - vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0); + } else { + mqd_idx = ring - &adev->gfx.compute_ring[0]; + } - gfx_v8_0_mqd_init(adev, mqd, mqd_gpu_addr, eop_gpu_addr, ring); + if (!adev->gfx.in_reset) { + memset((void *)mqd, 0, sizeof(*mqd)); + mutex_lock(&adev->srbm_mutex); + vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0); + gfx_v8_0_mqd_init(ring); + if (is_kiq) + gfx_v8_0_kiq_init_register(ring); + vi_srbm_select(adev, 0, 0, 0, 0); + mutex_unlock(&adev->srbm_mutex); - if (is_kiq) - gfx_v8_0_kiq_init_register(adev, mqd, ring); + if (adev->gfx.mec.mqd_backup[mqd_idx]) + memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(*mqd)); + } else { /* for GPU_RESET case */ + /* reset MQD to a clean status */ + if (adev->gfx.mec.mqd_backup[mqd_idx]) + memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(*mqd)); - vi_srbm_select(adev, 0, 0, 0, 0); - mutex_unlock(&adev->srbm_mutex); + /* reset ring buffer */ + ring->wptr = 0; + amdgpu_ring_clear_ring(ring); + + if (is_kiq) { + mutex_lock(&adev->srbm_mutex); + vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0); + gfx_v8_0_kiq_init_register(ring); + vi_srbm_select(adev, 0, 0, 0, 0); + mutex_unlock(&adev->srbm_mutex); + } + } if (is_kiq) gfx_v8_0_kiq_enable(ring); @@ -4905,86 +4946,60 @@ static int gfx_v8_0_kiq_init_queue(struct amdgpu_ring *ring, return 0; } -static void gfx_v8_0_kiq_free_queue(struct amdgpu_device *adev) +static int gfx_v8_0_kiq_resume(struct amdgpu_device *adev) { struct amdgpu_ring *ring = NULL; - int i; + int r = 0, i; - for (i = 0; i < adev->gfx.num_compute_rings; i++) { - ring = &adev->gfx.compute_ring[i]; - amdgpu_bo_free_kernel(&ring->mqd_obj, NULL, NULL); - ring->mqd_obj = NULL; - } + gfx_v8_0_cp_compute_enable(adev, true); ring = &adev->gfx.kiq.ring; - amdgpu_bo_free_kernel(&ring->mqd_obj, NULL, NULL); - ring->mqd_obj = NULL; -} -static int gfx_v8_0_kiq_setup_queue(struct amdgpu_device *adev, - struct amdgpu_ring *ring) -{ - struct vi_mqd *mqd; - u64 mqd_gpu_addr; - u32 *buf; - int r = 0; + r = amdgpu_bo_reserve(ring->mqd_obj, false); + if (unlikely(r != 0)) + goto done; - r = amdgpu_bo_create_kernel(adev, sizeof(struct vi_mqd), PAGE_SIZE, - AMDGPU_GEM_DOMAIN_GTT, &ring->mqd_obj, - &mqd_gpu_addr, (void **)&buf); - if (r) { - dev_warn(adev->dev, "failed to create ring mqd ob (%d)", r); - return r; + r = amdgpu_bo_kmap(ring->mqd_obj, &ring->mqd_ptr); + if (!r) { + r = gfx_v8_0_kiq_init_queue(ring); + amdgpu_bo_kunmap(ring->mqd_obj); + ring->mqd_ptr = NULL; } - - /* init the mqd struct */ - memset(buf, 0, sizeof(struct vi_mqd)); - mqd = (struct vi_mqd *)buf; - - r = gfx_v8_0_kiq_init_queue(ring, mqd, mqd_gpu_addr); + amdgpu_bo_unreserve(ring->mqd_obj); if (r) - return r; - - amdgpu_bo_kunmap(ring->mqd_obj); - - return 0; -} + goto done; -static int gfx_v8_0_kiq_resume(struct amdgpu_device *adev) -{ - struct amdgpu_ring *ring = NULL; - int r, i; - - ring = &adev->gfx.kiq.ring; - r = gfx_v8_0_kiq_setup_queue(adev, ring); - if (r) - return r; - - for (i = 0; i < adev->gfx.num_compute_rings; i++) { - ring = &adev->gfx.compute_ring[i]; - r = gfx_v8_0_kiq_setup_queue(adev, ring); - if (r) - return r; + ring->ready = true; + r = amdgpu_ring_test_ring(ring); + if (r) { + ring->ready = false; + goto done; } - gfx_v8_0_cp_compute_enable(adev, true); - for (i = 0; i < adev->gfx.num_compute_rings; i++) { ring = &adev->gfx.compute_ring[i]; + r = amdgpu_bo_reserve(ring->mqd_obj, false); + if (unlikely(r != 0)) + goto done; + r = amdgpu_bo_kmap(ring->mqd_obj, &ring->mqd_ptr); + if (!r) { + r = gfx_v8_0_kiq_init_queue(ring); + amdgpu_bo_kunmap(ring->mqd_obj); + ring->mqd_ptr = NULL; + } + amdgpu_bo_unreserve(ring->mqd_obj); + if (r) + goto done; + ring->ready = true; r = amdgpu_ring_test_ring(ring); if (r) ring->ready = false; } - ring = &adev->gfx.kiq.ring; - ring->ready = true; - r = amdgpu_ring_test_ring(ring); - if (r) - ring->ready = false; - - return 0; +done: + return r; } static int gfx_v8_0_cp_compute_resume(struct amdgpu_device *adev) @@ -5185,7 +5200,7 @@ static int gfx_v8_0_cp_compute_resume(struct amdgpu_device *adev) /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */ ring->wptr = 0; - mqd->cp_hqd_pq_wptr = ring->wptr; + mqd->cp_hqd_pq_wptr = lower_32_bits(ring->wptr); WREG32(mmCP_HQD_PQ_WPTR, mqd->cp_hqd_pq_wptr); mqd->cp_hqd_pq_rptr = RREG32(mmCP_HQD_PQ_RPTR); @@ -5245,7 +5260,7 @@ static int gfx_v8_0_cp_resume(struct amdgpu_device *adev) gfx_v8_0_enable_gui_idle_interrupt(adev, false); if (!adev->pp_enabled) { - if (!adev->firmware.smu_load) { + if (adev->firmware.load_type != AMDGPU_FW_LOAD_SMU) { /* legacy firmware loading */ r = gfx_v8_0_cp_gfx_load_microcode(adev); if (r) @@ -5329,7 +5344,6 @@ static int gfx_v8_0_hw_fini(void *handle) amdgpu_irq_put(adev, &adev->gfx.priv_reg_irq, 0); amdgpu_irq_put(adev, &adev->gfx.priv_inst_irq, 0); if (amdgpu_sriov_vf(adev)) { - gfx_v8_0_kiq_free_queue(adev); pr_debug("For SRIOV client, shouldn't do anything.\n"); return 0; } @@ -5448,19 +5462,18 @@ static void gfx_v8_0_inactive_hqd(struct amdgpu_device *adev, { int i; + mutex_lock(&adev->srbm_mutex); vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0); if (RREG32(mmCP_HQD_ACTIVE) & CP_HQD_ACTIVE__ACTIVE_MASK) { - u32 tmp; - tmp = RREG32(mmCP_HQD_DEQUEUE_REQUEST); - tmp = REG_SET_FIELD(tmp, CP_HQD_DEQUEUE_REQUEST, - DEQUEUE_REQ, 2); - WREG32(mmCP_HQD_DEQUEUE_REQUEST, tmp); + WREG32_FIELD(CP_HQD_DEQUEUE_REQUEST, DEQUEUE_REQ, 2); for (i = 0; i < adev->usec_timeout; i++) { if (!(RREG32(mmCP_HQD_ACTIVE) & CP_HQD_ACTIVE__ACTIVE_MASK)) break; udelay(1); } } + vi_srbm_select(adev, 0, 0, 0, 0); + mutex_unlock(&adev->srbm_mutex); } static int gfx_v8_0_pre_soft_reset(void *handle) @@ -5566,11 +5579,13 @@ static int gfx_v8_0_soft_reset(void *handle) static void gfx_v8_0_init_hqd(struct amdgpu_device *adev, struct amdgpu_ring *ring) { + mutex_lock(&adev->srbm_mutex); vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0); WREG32(mmCP_HQD_DEQUEUE_REQUEST, 0); WREG32(mmCP_HQD_PQ_RPTR, 0); WREG32(mmCP_HQD_PQ_WPTR, 0); vi_srbm_select(adev, 0, 0, 0, 0); + mutex_unlock(&adev->srbm_mutex); } static int gfx_v8_0_post_soft_reset(void *handle) @@ -5839,7 +5854,10 @@ static int gfx_v8_0_set_powergating_state(void *handle, enum amd_powergating_state state) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; - bool enable = (state == AMD_PG_STATE_GATE) ? true : false; + bool enable = (state == AMD_PG_STATE_GATE); + + if (amdgpu_sriov_vf(adev)) + return 0; switch (adev->asic_type) { case CHIP_CARRIZO: @@ -5898,6 +5916,9 @@ static void gfx_v8_0_get_clockgating_state(void *handle, u32 *flags) struct amdgpu_device *adev = (struct amdgpu_device *)handle; int data; + if (amdgpu_sriov_vf(adev)) + *flags = 0; + /* AMD_CG_SUPPORT_GFX_MGCG */ data = RREG32(mmRLC_CGTT_MGCG_OVERRIDE); if (!(data & RLC_CGTT_MGCG_OVERRIDE__CPF_MASK)) @@ -6411,18 +6432,22 @@ static int gfx_v8_0_set_clockgating_state(void *handle, { struct amdgpu_device *adev = (struct amdgpu_device *)handle; + if (amdgpu_sriov_vf(adev)) + return 0; + switch (adev->asic_type) { case CHIP_FIJI: case CHIP_CARRIZO: case CHIP_STONEY: gfx_v8_0_update_gfx_clock_gating(adev, - state == AMD_CG_STATE_GATE ? true : false); + state == AMD_CG_STATE_GATE); break; case CHIP_TONGA: gfx_v8_0_tonga_update_gfx_clock_gating(adev, state); break; case CHIP_POLARIS10: case CHIP_POLARIS11: + case CHIP_POLARIS12: gfx_v8_0_polaris_update_gfx_clock_gating(adev, state); break; default: @@ -6431,12 +6456,12 @@ static int gfx_v8_0_set_clockgating_state(void *handle, return 0; } -static u32 gfx_v8_0_ring_get_rptr(struct amdgpu_ring *ring) +static u64 gfx_v8_0_ring_get_rptr(struct amdgpu_ring *ring) { return ring->adev->wb.wb[ring->rptr_offs]; } -static u32 gfx_v8_0_ring_get_wptr_gfx(struct amdgpu_ring *ring) +static u64 gfx_v8_0_ring_get_wptr_gfx(struct amdgpu_ring *ring) { struct amdgpu_device *adev = ring->adev; @@ -6453,10 +6478,10 @@ static void gfx_v8_0_ring_set_wptr_gfx(struct amdgpu_ring *ring) if (ring->use_doorbell) { /* XXX check if swapping is necessary on BE */ - adev->wb.wb[ring->wptr_offs] = ring->wptr; - WDOORBELL32(ring->doorbell_index, ring->wptr); + adev->wb.wb[ring->wptr_offs] = lower_32_bits(ring->wptr); + WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr)); } else { - WREG32(mmCP_RB0_WPTR, ring->wptr); + WREG32(mmCP_RB0_WPTR, lower_32_bits(ring->wptr)); (void)RREG32(mmCP_RB0_WPTR); } } @@ -6531,6 +6556,9 @@ static void gfx_v8_0_ring_emit_ib_gfx(struct amdgpu_ring *ring, control |= ib->length_dw | (vm_id << 24); + if (amdgpu_sriov_vf(ring->adev) && ib->flags & AMDGPU_IB_FLAG_PREEMPT) + control |= INDIRECT_BUFFER_PRE_ENB(1); + amdgpu_ring_write(ring, header); amdgpu_ring_write(ring, #ifdef __BIG_ENDIAN @@ -6639,12 +6667,10 @@ static void gfx_v8_0_ring_emit_vm_flush(struct amdgpu_ring *ring, /* sync PFP to ME, otherwise we might get invalid PFP reads */ amdgpu_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0)); amdgpu_ring_write(ring, 0x0); - /* GFX8 emits 128 dw nop to prevent CE access VM before vm_flush finish */ - amdgpu_ring_insert_nop(ring, 128); } } -static u32 gfx_v8_0_ring_get_wptr_compute(struct amdgpu_ring *ring) +static u64 gfx_v8_0_ring_get_wptr_compute(struct amdgpu_ring *ring) { return ring->adev->wb.wb[ring->wptr_offs]; } @@ -6654,8 +6680,8 @@ static void gfx_v8_0_ring_set_wptr_compute(struct amdgpu_ring *ring) struct amdgpu_device *adev = ring->adev; /* XXX check if swapping is necessary on BE */ - adev->wb.wb[ring->wptr_offs] = ring->wptr; - WDOORBELL32(ring->doorbell_index, ring->wptr); + adev->wb.wb[ring->wptr_offs] = lower_32_bits(ring->wptr); + WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr)); } static void gfx_v8_0_ring_emit_fence_compute(struct amdgpu_ring *ring, @@ -6748,6 +6774,34 @@ static void gfx_v8_ring_emit_cntxcntl(struct amdgpu_ring *ring, uint32_t flags) (flags & AMDGPU_VM_DOMAIN) ? AMDGPU_CSA_VADDR : ring->adev->virt.csa_vmid0_addr); } +static unsigned gfx_v8_0_ring_emit_init_cond_exec(struct amdgpu_ring *ring) +{ + unsigned ret; + + amdgpu_ring_write(ring, PACKET3(PACKET3_COND_EXEC, 3)); + amdgpu_ring_write(ring, lower_32_bits(ring->cond_exe_gpu_addr)); + amdgpu_ring_write(ring, upper_32_bits(ring->cond_exe_gpu_addr)); + amdgpu_ring_write(ring, 0); /* discard following DWs if *cond_exec_gpu_addr==0 */ + ret = ring->wptr & ring->buf_mask; + amdgpu_ring_write(ring, 0x55aa55aa); /* patch dummy value later */ + return ret; +} + +static void gfx_v8_0_ring_emit_patch_cond_exec(struct amdgpu_ring *ring, unsigned offset) +{ + unsigned cur; + + BUG_ON(offset > ring->buf_mask); + BUG_ON(ring->ring[offset] != 0x55aa55aa); + + cur = (ring->wptr & ring->buf_mask) - 1; + if (likely(cur > offset)) + ring->ring[offset] = cur - offset; + else + ring->ring[offset] = (ring->ring_size >> 2) - offset + cur; +} + + static void gfx_v8_0_ring_emit_rreg(struct amdgpu_ring *ring, uint32_t reg) { struct amdgpu_device *adev = ring->adev; @@ -6924,40 +6978,24 @@ static int gfx_v8_0_kiq_set_interrupt_state(struct amdgpu_device *adev, unsigned int type, enum amdgpu_interrupt_state state) { - uint32_t tmp, target; - struct amdgpu_ring *ring = (struct amdgpu_ring *)src->data; + struct amdgpu_ring *ring = &(adev->gfx.kiq.ring); - BUG_ON(!ring || (ring->funcs->type != AMDGPU_RING_TYPE_KIQ)); - - if (ring->me == 1) - target = mmCP_ME1_PIPE0_INT_CNTL; - else - target = mmCP_ME2_PIPE0_INT_CNTL; - target += ring->pipe; + BUG_ON(ring->funcs->type != AMDGPU_RING_TYPE_KIQ); switch (type) { case AMDGPU_CP_KIQ_IRQ_DRIVER0: - if (state == AMDGPU_IRQ_STATE_DISABLE) { - tmp = RREG32(mmCPC_INT_CNTL); - tmp = REG_SET_FIELD(tmp, CPC_INT_CNTL, - GENERIC2_INT_ENABLE, 0); - WREG32(mmCPC_INT_CNTL, tmp); - - tmp = RREG32(target); - tmp = REG_SET_FIELD(tmp, CP_ME2_PIPE0_INT_CNTL, - GENERIC2_INT_ENABLE, 0); - WREG32(target, tmp); - } else { - tmp = RREG32(mmCPC_INT_CNTL); - tmp = REG_SET_FIELD(tmp, CPC_INT_CNTL, - GENERIC2_INT_ENABLE, 1); - WREG32(mmCPC_INT_CNTL, tmp); - - tmp = RREG32(target); - tmp = REG_SET_FIELD(tmp, CP_ME2_PIPE0_INT_CNTL, - GENERIC2_INT_ENABLE, 1); - WREG32(target, tmp); - } + WREG32_FIELD(CPC_INT_CNTL, GENERIC2_INT_ENABLE, + state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1); + if (ring->me == 1) + WREG32_FIELD_OFFSET(CP_ME1_PIPE0_INT_CNTL, + ring->pipe, + GENERIC2_INT_ENABLE, + state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1); + else + WREG32_FIELD_OFFSET(CP_ME2_PIPE0_INT_CNTL, + ring->pipe, + GENERIC2_INT_ENABLE, + state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1); break; default: BUG(); /* kiq only support GENERIC2_INT now */ @@ -6971,9 +7009,9 @@ static int gfx_v8_0_kiq_irq(struct amdgpu_device *adev, struct amdgpu_iv_entry *entry) { u8 me_id, pipe_id, queue_id; - struct amdgpu_ring *ring = (struct amdgpu_ring *)source->data; + struct amdgpu_ring *ring = &(adev->gfx.kiq.ring); - BUG_ON(!ring || (ring->funcs->type != AMDGPU_RING_TYPE_KIQ)); + BUG_ON(ring->funcs->type != AMDGPU_RING_TYPE_KIQ); me_id = (entry->ring_id & 0x0c) >> 2; pipe_id = (entry->ring_id & 0x03) >> 0; @@ -7010,18 +7048,28 @@ static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_gfx = { .type = AMDGPU_RING_TYPE_GFX, .align_mask = 0xff, .nop = PACKET3(PACKET3_NOP, 0x3FFF), + .support_64bit_ptrs = false, .get_rptr = gfx_v8_0_ring_get_rptr, .get_wptr = gfx_v8_0_ring_get_wptr_gfx, .set_wptr = gfx_v8_0_ring_set_wptr_gfx, - .emit_frame_size = - 20 + /* gfx_v8_0_ring_emit_gds_switch */ - 7 + /* gfx_v8_0_ring_emit_hdp_flush */ - 5 + /* gfx_v8_0_ring_emit_hdp_invalidate */ - 6 + 6 + 6 +/* gfx_v8_0_ring_emit_fence_gfx x3 for user fence, vm fence */ - 7 + /* gfx_v8_0_ring_emit_pipeline_sync */ - 128 + 19 + /* gfx_v8_0_ring_emit_vm_flush */ - 2 + /* gfx_v8_ring_emit_sb */ - 3 + 4 + 29, /* gfx_v8_ring_emit_cntxcntl including vgt flush/meta-data */ + .emit_frame_size = /* maximum 215dw if count 16 IBs in */ + 5 + /* COND_EXEC */ + 7 + /* PIPELINE_SYNC */ + 19 + /* VM_FLUSH */ + 8 + /* FENCE for VM_FLUSH */ + 20 + /* GDS switch */ + 4 + /* double SWITCH_BUFFER, + the first COND_EXEC jump to the place just + prior to this double SWITCH_BUFFER */ + 5 + /* COND_EXEC */ + 7 + /* HDP_flush */ + 4 + /* VGT_flush */ + 14 + /* CE_META */ + 31 + /* DE_META */ + 3 + /* CNTX_CTRL */ + 5 + /* HDP_INVL */ + 8 + 8 + /* FENCE x2 */ + 2, /* SWITCH_BUFFER */ .emit_ib_size = 4, /* gfx_v8_0_ring_emit_ib_gfx */ .emit_ib = gfx_v8_0_ring_emit_ib_gfx, .emit_fence = gfx_v8_0_ring_emit_fence_gfx, @@ -7036,12 +7084,15 @@ static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_gfx = { .pad_ib = amdgpu_ring_generic_pad_ib, .emit_switch_buffer = gfx_v8_ring_emit_sb, .emit_cntxcntl = gfx_v8_ring_emit_cntxcntl, + .init_cond_exec = gfx_v8_0_ring_emit_init_cond_exec, + .patch_cond_exec = gfx_v8_0_ring_emit_patch_cond_exec, }; static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_compute = { .type = AMDGPU_RING_TYPE_COMPUTE, .align_mask = 0xff, .nop = PACKET3(PACKET3_NOP, 0x3FFF), + .support_64bit_ptrs = false, .get_rptr = gfx_v8_0_ring_get_rptr, .get_wptr = gfx_v8_0_ring_get_wptr_compute, .set_wptr = gfx_v8_0_ring_set_wptr_compute, @@ -7070,6 +7121,7 @@ static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_kiq = { .type = AMDGPU_RING_TYPE_KIQ, .align_mask = 0xff, .nop = PACKET3(PACKET3_NOP, 0x3FFF), + .support_64bit_ptrs = false, .get_rptr = gfx_v8_0_ring_get_rptr, .get_wptr = gfx_v8_0_ring_get_wptr_compute, .set_wptr = gfx_v8_0_ring_set_wptr_compute, @@ -7083,8 +7135,6 @@ static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_kiq = { .emit_ib_size = 4, /* gfx_v8_0_ring_emit_ib_compute */ .emit_ib = gfx_v8_0_ring_emit_ib_compute, .emit_fence = gfx_v8_0_ring_emit_fence_kiq, - .emit_hdp_flush = gfx_v8_0_ring_emit_hdp_flush, - .emit_hdp_invalidate = gfx_v8_0_ring_emit_hdp_invalidate, .test_ring = gfx_v8_0_ring_test_ring, .test_ib = gfx_v8_0_ring_test_ib, .insert_nop = amdgpu_ring_insert_nop, @@ -7266,15 +7316,15 @@ static void gfx_v8_0_ring_emit_ce_meta_init(struct amdgpu_ring *ring, uint64_t c uint64_t ce_payload_addr; int cnt_ce; static union { - struct amdgpu_ce_ib_state regular; - struct amdgpu_ce_ib_state_chained_ib chained; + struct vi_ce_ib_state regular; + struct vi_ce_ib_state_chained_ib chained; } ce_payload = {}; if (ring->adev->virt.chained_ib_support) { - ce_payload_addr = csa_addr + offsetof(struct amdgpu_gfx_meta_data_chained_ib, ce_payload); + ce_payload_addr = csa_addr + offsetof(struct vi_gfx_meta_data_chained_ib, ce_payload); cnt_ce = (sizeof(ce_payload.chained) >> 2) + 4 - 2; } else { - ce_payload_addr = csa_addr + offsetof(struct amdgpu_gfx_meta_data, ce_payload); + ce_payload_addr = csa_addr + offsetof(struct vi_gfx_meta_data, ce_payload); cnt_ce = (sizeof(ce_payload.regular) >> 2) + 4 - 2; } @@ -7293,20 +7343,20 @@ static void gfx_v8_0_ring_emit_de_meta_init(struct amdgpu_ring *ring, uint64_t c uint64_t de_payload_addr, gds_addr; int cnt_de; static union { - struct amdgpu_de_ib_state regular; - struct amdgpu_de_ib_state_chained_ib chained; + struct vi_de_ib_state regular; + struct vi_de_ib_state_chained_ib chained; } de_payload = {}; gds_addr = csa_addr + 4096; if (ring->adev->virt.chained_ib_support) { de_payload.chained.gds_backup_addrlo = lower_32_bits(gds_addr); de_payload.chained.gds_backup_addrhi = upper_32_bits(gds_addr); - de_payload_addr = csa_addr + offsetof(struct amdgpu_gfx_meta_data_chained_ib, de_payload); + de_payload_addr = csa_addr + offsetof(struct vi_gfx_meta_data_chained_ib, de_payload); cnt_de = (sizeof(de_payload.chained) >> 2) + 4 - 2; } else { de_payload.regular.gds_backup_addrlo = lower_32_bits(gds_addr); de_payload.regular.gds_backup_addrhi = upper_32_bits(gds_addr); - de_payload_addr = csa_addr + offsetof(struct amdgpu_gfx_meta_data, de_payload); + de_payload_addr = csa_addr + offsetof(struct vi_gfx_meta_data, de_payload); cnt_de = (sizeof(de_payload.regular) >> 2) + 4 - 2; } @@ -7319,3 +7369,68 @@ static void gfx_v8_0_ring_emit_de_meta_init(struct amdgpu_ring *ring, uint64_t c amdgpu_ring_write(ring, upper_32_bits(de_payload_addr)); amdgpu_ring_write_multiple(ring, (void *)&de_payload, cnt_de - 2); } + +/* create MQD for each compute queue */ +static int gfx_v8_0_compute_mqd_sw_init(struct amdgpu_device *adev) +{ + struct amdgpu_ring *ring = NULL; + int r, i; + + /* create MQD for KIQ */ + ring = &adev->gfx.kiq.ring; + if (!ring->mqd_obj) { + r = amdgpu_bo_create_kernel(adev, sizeof(struct vi_mqd), PAGE_SIZE, + AMDGPU_GEM_DOMAIN_GTT, &ring->mqd_obj, + &ring->mqd_gpu_addr, &ring->mqd_ptr); + if (r) { + dev_warn(adev->dev, "failed to create ring mqd ob (%d)", r); + return r; + } + + /* prepare MQD backup */ + adev->gfx.mec.mqd_backup[AMDGPU_MAX_COMPUTE_RINGS] = kmalloc(sizeof(struct vi_mqd), GFP_KERNEL); + if (!adev->gfx.mec.mqd_backup[AMDGPU_MAX_COMPUTE_RINGS]) + dev_warn(adev->dev, "no memory to create MQD backup for ring %s\n", ring->name); + } + + /* create MQD for each KCQ */ + for (i = 0; i < adev->gfx.num_compute_rings; i++) { + ring = &adev->gfx.compute_ring[i]; + if (!ring->mqd_obj) { + r = amdgpu_bo_create_kernel(adev, sizeof(struct vi_mqd), PAGE_SIZE, + AMDGPU_GEM_DOMAIN_GTT, &ring->mqd_obj, + &ring->mqd_gpu_addr, &ring->mqd_ptr); + if (r) { + dev_warn(adev->dev, "failed to create ring mqd ob (%d)", r); + return r; + } + + /* prepare MQD backup */ + adev->gfx.mec.mqd_backup[i] = kmalloc(sizeof(struct vi_mqd), GFP_KERNEL); + if (!adev->gfx.mec.mqd_backup[i]) + dev_warn(adev->dev, "no memory to create MQD backup for ring %s\n", ring->name); + } + } + + return 0; +} + +static void gfx_v8_0_compute_mqd_sw_fini(struct amdgpu_device *adev) +{ + struct amdgpu_ring *ring = NULL; + int i; + + for (i = 0; i < adev->gfx.num_compute_rings; i++) { + ring = &adev->gfx.compute_ring[i]; + kfree(adev->gfx.mec.mqd_backup[i]); + amdgpu_bo_free_kernel(&ring->mqd_obj, + &ring->mqd_gpu_addr, + &ring->mqd_ptr); + } + + ring = &adev->gfx.kiq.ring; + kfree(adev->gfx.mec.mqd_backup[AMDGPU_MAX_COMPUTE_RINGS]); + amdgpu_bo_free_kernel(&ring->mqd_obj, + &ring->mqd_gpu_addr, + &ring->mqd_ptr); +} diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c new file mode 100644 index 000000000000..a447b70841c9 --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c @@ -0,0 +1,3916 @@ +/* + * Copyright 2016 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ +#include <linux/firmware.h> +#include "drmP.h" +#include "amdgpu.h" +#include "amdgpu_gfx.h" +#include "soc15.h" +#include "soc15d.h" + +#include "vega10/soc15ip.h" +#include "vega10/GC/gc_9_0_offset.h" +#include "vega10/GC/gc_9_0_sh_mask.h" +#include "vega10/vega10_enum.h" +#include "vega10/HDP/hdp_4_0_offset.h" + +#include "soc15_common.h" +#include "clearstate_gfx9.h" +#include "v9_structs.h" + +#define GFX9_NUM_GFX_RINGS 1 +#define GFX9_NUM_COMPUTE_RINGS 8 +#define GFX9_NUM_SE 4 +#define RLCG_UCODE_LOADING_START_ADDRESS 0x2000 + +MODULE_FIRMWARE("amdgpu/vega10_ce.bin"); +MODULE_FIRMWARE("amdgpu/vega10_pfp.bin"); +MODULE_FIRMWARE("amdgpu/vega10_me.bin"); +MODULE_FIRMWARE("amdgpu/vega10_mec.bin"); +MODULE_FIRMWARE("amdgpu/vega10_mec2.bin"); +MODULE_FIRMWARE("amdgpu/vega10_rlc.bin"); + +static const struct amdgpu_gds_reg_offset amdgpu_gds_reg_offset[] = +{ + {SOC15_REG_OFFSET(GC, 0, mmGDS_VMID0_BASE), SOC15_REG_OFFSET(GC, 0, mmGDS_VMID0_SIZE), + SOC15_REG_OFFSET(GC, 0, mmGDS_GWS_VMID0), SOC15_REG_OFFSET(GC, 0, mmGDS_OA_VMID0)}, + {SOC15_REG_OFFSET(GC, 0, mmGDS_VMID1_BASE), SOC15_REG_OFFSET(GC, 0, mmGDS_VMID1_SIZE), + SOC15_REG_OFFSET(GC, 0, mmGDS_GWS_VMID1), SOC15_REG_OFFSET(GC, 0, mmGDS_OA_VMID1)}, + {SOC15_REG_OFFSET(GC, 0, mmGDS_VMID2_BASE), SOC15_REG_OFFSET(GC, 0, mmGDS_VMID2_SIZE), + SOC15_REG_OFFSET(GC, 0, mmGDS_GWS_VMID2), SOC15_REG_OFFSET(GC, 0, mmGDS_OA_VMID2)}, + {SOC15_REG_OFFSET(GC, 0, mmGDS_VMID3_BASE), SOC15_REG_OFFSET(GC, 0, mmGDS_VMID3_SIZE), + SOC15_REG_OFFSET(GC, 0, mmGDS_GWS_VMID3), SOC15_REG_OFFSET(GC, 0, mmGDS_OA_VMID3)}, + {SOC15_REG_OFFSET(GC, 0, mmGDS_VMID4_BASE), SOC15_REG_OFFSET(GC, 0, mmGDS_VMID4_SIZE), + SOC15_REG_OFFSET(GC, 0, mmGDS_GWS_VMID4), SOC15_REG_OFFSET(GC, 0, mmGDS_OA_VMID4)}, + {SOC15_REG_OFFSET(GC, 0, mmGDS_VMID5_BASE), SOC15_REG_OFFSET(GC, 0, mmGDS_VMID5_SIZE), + SOC15_REG_OFFSET(GC, 0, mmGDS_GWS_VMID5), SOC15_REG_OFFSET(GC, 0, mmGDS_OA_VMID5)}, + {SOC15_REG_OFFSET(GC, 0, mmGDS_VMID6_BASE), SOC15_REG_OFFSET(GC, 0, mmGDS_VMID6_SIZE), + SOC15_REG_OFFSET(GC, 0, mmGDS_GWS_VMID6), SOC15_REG_OFFSET(GC, 0, mmGDS_OA_VMID6)}, + {SOC15_REG_OFFSET(GC, 0, mmGDS_VMID7_BASE), SOC15_REG_OFFSET(GC, 0, mmGDS_VMID7_SIZE), + SOC15_REG_OFFSET(GC, 0, mmGDS_GWS_VMID7), SOC15_REG_OFFSET(GC, 0, mmGDS_OA_VMID7)}, + {SOC15_REG_OFFSET(GC, 0, mmGDS_VMID8_BASE), SOC15_REG_OFFSET(GC, 0, mmGDS_VMID8_SIZE), + SOC15_REG_OFFSET(GC, 0, mmGDS_GWS_VMID8), SOC15_REG_OFFSET(GC, 0, mmGDS_OA_VMID8)}, + {SOC15_REG_OFFSET(GC, 0, mmGDS_VMID9_BASE), SOC15_REG_OFFSET(GC, 0, mmGDS_VMID9_SIZE), + SOC15_REG_OFFSET(GC, 0, mmGDS_GWS_VMID9), SOC15_REG_OFFSET(GC, 0, mmGDS_OA_VMID9)}, + {SOC15_REG_OFFSET(GC, 0, mmGDS_VMID10_BASE), SOC15_REG_OFFSET(GC, 0, mmGDS_VMID10_SIZE), + SOC15_REG_OFFSET(GC, 0, mmGDS_GWS_VMID10), SOC15_REG_OFFSET(GC, 0, mmGDS_OA_VMID10)}, + {SOC15_REG_OFFSET(GC, 0, mmGDS_VMID11_BASE), SOC15_REG_OFFSET(GC, 0, mmGDS_VMID11_SIZE), + SOC15_REG_OFFSET(GC, 0, mmGDS_GWS_VMID11), SOC15_REG_OFFSET(GC, 0, mmGDS_OA_VMID11)}, + {SOC15_REG_OFFSET(GC, 0, mmGDS_VMID12_BASE), SOC15_REG_OFFSET(GC, 0, mmGDS_VMID12_SIZE), + SOC15_REG_OFFSET(GC, 0, mmGDS_GWS_VMID12), SOC15_REG_OFFSET(GC, 0, mmGDS_OA_VMID12)}, + {SOC15_REG_OFFSET(GC, 0, mmGDS_VMID13_BASE), SOC15_REG_OFFSET(GC, 0, mmGDS_VMID13_SIZE), + SOC15_REG_OFFSET(GC, 0, mmGDS_GWS_VMID13), SOC15_REG_OFFSET(GC, 0, mmGDS_OA_VMID13)}, + {SOC15_REG_OFFSET(GC, 0, mmGDS_VMID14_BASE), SOC15_REG_OFFSET(GC, 0, mmGDS_VMID14_SIZE), + SOC15_REG_OFFSET(GC, 0, mmGDS_GWS_VMID14), SOC15_REG_OFFSET(GC, 0, mmGDS_OA_VMID14)}, + {SOC15_REG_OFFSET(GC, 0, mmGDS_VMID15_BASE), SOC15_REG_OFFSET(GC, 0, mmGDS_VMID15_SIZE), + SOC15_REG_OFFSET(GC, 0, mmGDS_GWS_VMID15), SOC15_REG_OFFSET(GC, 0, mmGDS_OA_VMID15)} +}; + +static const u32 golden_settings_gc_9_0[] = +{ + SOC15_REG_OFFSET(GC, 0, mmDB_DEBUG2), 0xf00ffeff, 0x00000400, + SOC15_REG_OFFSET(GC, 0, mmPA_SC_BINNER_EVENT_CNTL_3), 0x00000003, 0x82400024, + SOC15_REG_OFFSET(GC, 0, mmPA_SC_ENHANCE), 0x3fffffff, 0x00000001, + SOC15_REG_OFFSET(GC, 0, mmPA_SC_LINE_STIPPLE_STATE), 0x0000ff0f, 0x00000000, + SOC15_REG_OFFSET(GC, 0, mmTA_CNTL_AUX), 0xfffffeef, 0x010b0000, + SOC15_REG_OFFSET(GC, 0, mmTCP_CHAN_STEER_HI), 0xffffffff, 0x4a2c0e68, + SOC15_REG_OFFSET(GC, 0, mmTCP_CHAN_STEER_LO), 0xffffffff, 0xb5d3f197, + SOC15_REG_OFFSET(GC, 0, mmVGT_GS_MAX_WAVE_ID), 0x00000fff, 0x000003ff +}; + +static const u32 golden_settings_gc_9_0_vg10[] = +{ + SOC15_REG_OFFSET(GC, 0, mmCB_HW_CONTROL), 0x0000f000, 0x00012107, + SOC15_REG_OFFSET(GC, 0, mmCB_HW_CONTROL_3), 0x30000000, 0x10000000, + SOC15_REG_OFFSET(GC, 0, mmGB_ADDR_CONFIG), 0xffff77ff, 0x2a114042, + SOC15_REG_OFFSET(GC, 0, mmGB_ADDR_CONFIG_READ), 0xffff77ff, 0x2a114042, + SOC15_REG_OFFSET(GC, 0, mmPA_SC_ENHANCE_1), 0x00008000, 0x00048000, + SOC15_REG_OFFSET(GC, 0, mmRMI_UTCL1_CNTL2), 0x00030000, 0x00020000, + SOC15_REG_OFFSET(GC, 0, mmTD_CNTL), 0x00001800, 0x00000800, + SOC15_REG_OFFSET(GC, 0, mmSPI_CONFIG_CNTL_1),0x0000000f, 0x00000007 +}; + +#define VEGA10_GB_ADDR_CONFIG_GOLDEN 0x2a114042 + +static void gfx_v9_0_set_ring_funcs(struct amdgpu_device *adev); +static void gfx_v9_0_set_irq_funcs(struct amdgpu_device *adev); +static void gfx_v9_0_set_gds_init(struct amdgpu_device *adev); +static void gfx_v9_0_set_rlc_funcs(struct amdgpu_device *adev); +static int gfx_v9_0_get_cu_info(struct amdgpu_device *adev, + struct amdgpu_cu_info *cu_info); +static uint64_t gfx_v9_0_get_gpu_clock_counter(struct amdgpu_device *adev); +static void gfx_v9_0_select_se_sh(struct amdgpu_device *adev, u32 se_num, u32 sh_num, u32 instance); + +static void gfx_v9_0_init_golden_registers(struct amdgpu_device *adev) +{ + switch (adev->asic_type) { + case CHIP_VEGA10: + amdgpu_program_register_sequence(adev, + golden_settings_gc_9_0, + (const u32)ARRAY_SIZE(golden_settings_gc_9_0)); + amdgpu_program_register_sequence(adev, + golden_settings_gc_9_0_vg10, + (const u32)ARRAY_SIZE(golden_settings_gc_9_0_vg10)); + break; + default: + break; + } +} + +static void gfx_v9_0_scratch_init(struct amdgpu_device *adev) +{ + adev->gfx.scratch.num_reg = 7; + adev->gfx.scratch.reg_base = SOC15_REG_OFFSET(GC, 0, mmSCRATCH_REG0); + adev->gfx.scratch.free_mask = (1u << adev->gfx.scratch.num_reg) - 1; +} + +static void gfx_v9_0_write_data_to_reg(struct amdgpu_ring *ring, int eng_sel, + bool wc, uint32_t reg, uint32_t val) +{ + amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); + amdgpu_ring_write(ring, WRITE_DATA_ENGINE_SEL(eng_sel) | + WRITE_DATA_DST_SEL(0) | + (wc ? WR_CONFIRM : 0)); + amdgpu_ring_write(ring, reg); + amdgpu_ring_write(ring, 0); + amdgpu_ring_write(ring, val); +} + +static void gfx_v9_0_wait_reg_mem(struct amdgpu_ring *ring, int eng_sel, + int mem_space, int opt, uint32_t addr0, + uint32_t addr1, uint32_t ref, uint32_t mask, + uint32_t inv) +{ + amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5)); + amdgpu_ring_write(ring, + /* memory (1) or register (0) */ + (WAIT_REG_MEM_MEM_SPACE(mem_space) | + WAIT_REG_MEM_OPERATION(opt) | /* wait */ + WAIT_REG_MEM_FUNCTION(3) | /* equal */ + WAIT_REG_MEM_ENGINE(eng_sel))); + + if (mem_space) + BUG_ON(addr0 & 0x3); /* Dword align */ + amdgpu_ring_write(ring, addr0); + amdgpu_ring_write(ring, addr1); + amdgpu_ring_write(ring, ref); + amdgpu_ring_write(ring, mask); + amdgpu_ring_write(ring, inv); /* poll interval */ +} + +static int gfx_v9_0_ring_test_ring(struct amdgpu_ring *ring) +{ + struct amdgpu_device *adev = ring->adev; + uint32_t scratch; + uint32_t tmp = 0; + unsigned i; + int r; + + r = amdgpu_gfx_scratch_get(adev, &scratch); + if (r) { + DRM_ERROR("amdgpu: cp failed to get scratch reg (%d).\n", r); + return r; + } + WREG32(scratch, 0xCAFEDEAD); + r = amdgpu_ring_alloc(ring, 3); + if (r) { + DRM_ERROR("amdgpu: cp failed to lock ring %d (%d).\n", + ring->idx, r); + amdgpu_gfx_scratch_free(adev, scratch); + return r; + } + amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1)); + amdgpu_ring_write(ring, (scratch - PACKET3_SET_UCONFIG_REG_START)); + amdgpu_ring_write(ring, 0xDEADBEEF); + amdgpu_ring_commit(ring); + + for (i = 0; i < adev->usec_timeout; i++) { + tmp = RREG32(scratch); + if (tmp == 0xDEADBEEF) + break; + DRM_UDELAY(1); + } + if (i < adev->usec_timeout) { + DRM_INFO("ring test on %d succeeded in %d usecs\n", + ring->idx, i); + } else { + DRM_ERROR("amdgpu: ring %d test failed (scratch(0x%04X)=0x%08X)\n", + ring->idx, scratch, tmp); + r = -EINVAL; + } + amdgpu_gfx_scratch_free(adev, scratch); + return r; +} + +static int gfx_v9_0_ring_test_ib(struct amdgpu_ring *ring, long timeout) +{ + struct amdgpu_device *adev = ring->adev; + struct amdgpu_ib ib; + struct dma_fence *f = NULL; + uint32_t scratch; + uint32_t tmp = 0; + long r; + + r = amdgpu_gfx_scratch_get(adev, &scratch); + if (r) { + DRM_ERROR("amdgpu: failed to get scratch reg (%ld).\n", r); + return r; + } + WREG32(scratch, 0xCAFEDEAD); + memset(&ib, 0, sizeof(ib)); + r = amdgpu_ib_get(adev, NULL, 256, &ib); + if (r) { + DRM_ERROR("amdgpu: failed to get ib (%ld).\n", r); + goto err1; + } + ib.ptr[0] = PACKET3(PACKET3_SET_UCONFIG_REG, 1); + ib.ptr[1] = ((scratch - PACKET3_SET_UCONFIG_REG_START)); + ib.ptr[2] = 0xDEADBEEF; + ib.length_dw = 3; + + r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f); + if (r) + goto err2; + + r = dma_fence_wait_timeout(f, false, timeout); + if (r == 0) { + DRM_ERROR("amdgpu: IB test timed out.\n"); + r = -ETIMEDOUT; + goto err2; + } else if (r < 0) { + DRM_ERROR("amdgpu: fence wait failed (%ld).\n", r); + goto err2; + } + tmp = RREG32(scratch); + if (tmp == 0xDEADBEEF) { + DRM_INFO("ib test on ring %d succeeded\n", ring->idx); + r = 0; + } else { + DRM_ERROR("amdgpu: ib test failed (scratch(0x%04X)=0x%08X)\n", + scratch, tmp); + r = -EINVAL; + } +err2: + amdgpu_ib_free(adev, &ib, NULL); + dma_fence_put(f); +err1: + amdgpu_gfx_scratch_free(adev, scratch); + return r; +} + +static int gfx_v9_0_init_microcode(struct amdgpu_device *adev) +{ + const char *chip_name; + char fw_name[30]; + int err; + struct amdgpu_firmware_info *info = NULL; + const struct common_firmware_header *header = NULL; + const struct gfx_firmware_header_v1_0 *cp_hdr; + + DRM_DEBUG("\n"); + + switch (adev->asic_type) { + case CHIP_VEGA10: + chip_name = "vega10"; + break; + default: + BUG(); + } + + snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_pfp.bin", chip_name); + err = request_firmware(&adev->gfx.pfp_fw, fw_name, adev->dev); + if (err) + goto out; + err = amdgpu_ucode_validate(adev->gfx.pfp_fw); + if (err) + goto out; + cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.pfp_fw->data; + adev->gfx.pfp_fw_version = le32_to_cpu(cp_hdr->header.ucode_version); + adev->gfx.pfp_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version); + + snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_me.bin", chip_name); + err = request_firmware(&adev->gfx.me_fw, fw_name, adev->dev); + if (err) + goto out; + err = amdgpu_ucode_validate(adev->gfx.me_fw); + if (err) + goto out; + cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.me_fw->data; + adev->gfx.me_fw_version = le32_to_cpu(cp_hdr->header.ucode_version); + adev->gfx.me_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version); + + snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ce.bin", chip_name); + err = request_firmware(&adev->gfx.ce_fw, fw_name, adev->dev); + if (err) + goto out; + err = amdgpu_ucode_validate(adev->gfx.ce_fw); + if (err) + goto out; + cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.ce_fw->data; + adev->gfx.ce_fw_version = le32_to_cpu(cp_hdr->header.ucode_version); + adev->gfx.ce_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version); + + snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc.bin", chip_name); + err = request_firmware(&adev->gfx.rlc_fw, fw_name, adev->dev); + if (err) + goto out; + err = amdgpu_ucode_validate(adev->gfx.rlc_fw); + cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.rlc_fw->data; + adev->gfx.rlc_fw_version = le32_to_cpu(cp_hdr->header.ucode_version); + adev->gfx.rlc_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version); + + snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec.bin", chip_name); + err = request_firmware(&adev->gfx.mec_fw, fw_name, adev->dev); + if (err) + goto out; + err = amdgpu_ucode_validate(adev->gfx.mec_fw); + if (err) + goto out; + cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data; + adev->gfx.mec_fw_version = le32_to_cpu(cp_hdr->header.ucode_version); + adev->gfx.mec_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version); + + + snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec2.bin", chip_name); + err = request_firmware(&adev->gfx.mec2_fw, fw_name, adev->dev); + if (!err) { + err = amdgpu_ucode_validate(adev->gfx.mec2_fw); + if (err) + goto out; + cp_hdr = (const struct gfx_firmware_header_v1_0 *) + adev->gfx.mec2_fw->data; + adev->gfx.mec2_fw_version = + le32_to_cpu(cp_hdr->header.ucode_version); + adev->gfx.mec2_feature_version = + le32_to_cpu(cp_hdr->ucode_feature_version); + } else { + err = 0; + adev->gfx.mec2_fw = NULL; + } + + if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) { + info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_PFP]; + info->ucode_id = AMDGPU_UCODE_ID_CP_PFP; + info->fw = adev->gfx.pfp_fw; + header = (const struct common_firmware_header *)info->fw->data; + adev->firmware.fw_size += + ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE); + + info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_ME]; + info->ucode_id = AMDGPU_UCODE_ID_CP_ME; + info->fw = adev->gfx.me_fw; + header = (const struct common_firmware_header *)info->fw->data; + adev->firmware.fw_size += + ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE); + + info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_CE]; + info->ucode_id = AMDGPU_UCODE_ID_CP_CE; + info->fw = adev->gfx.ce_fw; + header = (const struct common_firmware_header *)info->fw->data; + adev->firmware.fw_size += + ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE); + + info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_G]; + info->ucode_id = AMDGPU_UCODE_ID_RLC_G; + info->fw = adev->gfx.rlc_fw; + header = (const struct common_firmware_header *)info->fw->data; + adev->firmware.fw_size += + ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE); + + info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC1]; + info->ucode_id = AMDGPU_UCODE_ID_CP_MEC1; + info->fw = adev->gfx.mec_fw; + header = (const struct common_firmware_header *)info->fw->data; + cp_hdr = (const struct gfx_firmware_header_v1_0 *)info->fw->data; + adev->firmware.fw_size += + ALIGN(le32_to_cpu(header->ucode_size_bytes) - le32_to_cpu(cp_hdr->jt_size) * 4, PAGE_SIZE); + + info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC1_JT]; + info->ucode_id = AMDGPU_UCODE_ID_CP_MEC1_JT; + info->fw = adev->gfx.mec_fw; + adev->firmware.fw_size += + ALIGN(le32_to_cpu(cp_hdr->jt_size) * 4, PAGE_SIZE); + + if (adev->gfx.mec2_fw) { + info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC2]; + info->ucode_id = AMDGPU_UCODE_ID_CP_MEC2; + info->fw = adev->gfx.mec2_fw; + header = (const struct common_firmware_header *)info->fw->data; + cp_hdr = (const struct gfx_firmware_header_v1_0 *)info->fw->data; + adev->firmware.fw_size += + ALIGN(le32_to_cpu(header->ucode_size_bytes) - le32_to_cpu(cp_hdr->jt_size) * 4, PAGE_SIZE); + info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC2_JT]; + info->ucode_id = AMDGPU_UCODE_ID_CP_MEC2_JT; + info->fw = adev->gfx.mec2_fw; + adev->firmware.fw_size += + ALIGN(le32_to_cpu(cp_hdr->jt_size) * 4, PAGE_SIZE); + } + + } + +out: + if (err) { + dev_err(adev->dev, + "gfx9: Failed to load firmware \"%s\"\n", + fw_name); + release_firmware(adev->gfx.pfp_fw); + adev->gfx.pfp_fw = NULL; + release_firmware(adev->gfx.me_fw); + adev->gfx.me_fw = NULL; + release_firmware(adev->gfx.ce_fw); + adev->gfx.ce_fw = NULL; + release_firmware(adev->gfx.rlc_fw); + adev->gfx.rlc_fw = NULL; + release_firmware(adev->gfx.mec_fw); + adev->gfx.mec_fw = NULL; + release_firmware(adev->gfx.mec2_fw); + adev->gfx.mec2_fw = NULL; + } + return err; +} + +static void gfx_v9_0_mec_fini(struct amdgpu_device *adev) +{ + int r; + + if (adev->gfx.mec.hpd_eop_obj) { + r = amdgpu_bo_reserve(adev->gfx.mec.hpd_eop_obj, false); + if (unlikely(r != 0)) + dev_warn(adev->dev, "(%d) reserve HPD EOP bo failed\n", r); + amdgpu_bo_unpin(adev->gfx.mec.hpd_eop_obj); + amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj); + + amdgpu_bo_unref(&adev->gfx.mec.hpd_eop_obj); + adev->gfx.mec.hpd_eop_obj = NULL; + } + if (adev->gfx.mec.mec_fw_obj) { + r = amdgpu_bo_reserve(adev->gfx.mec.mec_fw_obj, false); + if (unlikely(r != 0)) + dev_warn(adev->dev, "(%d) reserve mec firmware bo failed\n", r); + amdgpu_bo_unpin(adev->gfx.mec.mec_fw_obj); + amdgpu_bo_unreserve(adev->gfx.mec.mec_fw_obj); + + amdgpu_bo_unref(&adev->gfx.mec.mec_fw_obj); + adev->gfx.mec.mec_fw_obj = NULL; + } +} + +#define MEC_HPD_SIZE 2048 + +static int gfx_v9_0_mec_init(struct amdgpu_device *adev) +{ + int r; + u32 *hpd; + const __le32 *fw_data; + unsigned fw_size; + u32 *fw; + + const struct gfx_firmware_header_v1_0 *mec_hdr; + + /* + * we assign only 1 pipe because all other pipes will + * be handled by KFD + */ + adev->gfx.mec.num_mec = 1; + adev->gfx.mec.num_pipe = 1; + adev->gfx.mec.num_queue = adev->gfx.mec.num_mec * adev->gfx.mec.num_pipe * 8; + + if (adev->gfx.mec.hpd_eop_obj == NULL) { + r = amdgpu_bo_create(adev, + adev->gfx.mec.num_queue * MEC_HPD_SIZE, + PAGE_SIZE, true, + AMDGPU_GEM_DOMAIN_GTT, 0, NULL, NULL, + &adev->gfx.mec.hpd_eop_obj); + if (r) { + dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", r); + return r; + } + } + + r = amdgpu_bo_reserve(adev->gfx.mec.hpd_eop_obj, false); + if (unlikely(r != 0)) { + gfx_v9_0_mec_fini(adev); + return r; + } + r = amdgpu_bo_pin(adev->gfx.mec.hpd_eop_obj, AMDGPU_GEM_DOMAIN_GTT, + &adev->gfx.mec.hpd_eop_gpu_addr); + if (r) { + dev_warn(adev->dev, "(%d) pin HDP EOP bo failed\n", r); + gfx_v9_0_mec_fini(adev); + return r; + } + r = amdgpu_bo_kmap(adev->gfx.mec.hpd_eop_obj, (void **)&hpd); + if (r) { + dev_warn(adev->dev, "(%d) map HDP EOP bo failed\n", r); + gfx_v9_0_mec_fini(adev); + return r; + } + + memset(hpd, 0, adev->gfx.mec.hpd_eop_obj->tbo.mem.size); + + amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj); + amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj); + + mec_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data; + + fw_data = (const __le32 *) + (adev->gfx.mec_fw->data + + le32_to_cpu(mec_hdr->header.ucode_array_offset_bytes)); + fw_size = le32_to_cpu(mec_hdr->header.ucode_size_bytes) / 4; + + if (adev->gfx.mec.mec_fw_obj == NULL) { + r = amdgpu_bo_create(adev, + mec_hdr->header.ucode_size_bytes, + PAGE_SIZE, true, + AMDGPU_GEM_DOMAIN_GTT, 0, NULL, NULL, + &adev->gfx.mec.mec_fw_obj); + if (r) { + dev_warn(adev->dev, "(%d) create mec firmware bo failed\n", r); + return r; + } + } + + r = amdgpu_bo_reserve(adev->gfx.mec.mec_fw_obj, false); + if (unlikely(r != 0)) { + gfx_v9_0_mec_fini(adev); + return r; + } + r = amdgpu_bo_pin(adev->gfx.mec.mec_fw_obj, AMDGPU_GEM_DOMAIN_GTT, + &adev->gfx.mec.mec_fw_gpu_addr); + if (r) { + dev_warn(adev->dev, "(%d) pin mec firmware bo failed\n", r); + gfx_v9_0_mec_fini(adev); + return r; + } + r = amdgpu_bo_kmap(adev->gfx.mec.mec_fw_obj, (void **)&fw); + if (r) { + dev_warn(adev->dev, "(%d) map firmware bo failed\n", r); + gfx_v9_0_mec_fini(adev); + return r; + } + memcpy(fw, fw_data, fw_size); + + amdgpu_bo_kunmap(adev->gfx.mec.mec_fw_obj); + amdgpu_bo_unreserve(adev->gfx.mec.mec_fw_obj); + + + return 0; +} + +static void gfx_v9_0_kiq_fini(struct amdgpu_device *adev) +{ + struct amdgpu_kiq *kiq = &adev->gfx.kiq; + + amdgpu_bo_free_kernel(&kiq->eop_obj, &kiq->eop_gpu_addr, NULL); +} + +static int gfx_v9_0_kiq_init(struct amdgpu_device *adev) +{ + int r; + u32 *hpd; + struct amdgpu_kiq *kiq = &adev->gfx.kiq; + + r = amdgpu_bo_create_kernel(adev, MEC_HPD_SIZE, PAGE_SIZE, + AMDGPU_GEM_DOMAIN_GTT, &kiq->eop_obj, + &kiq->eop_gpu_addr, (void **)&hpd); + if (r) { + dev_warn(adev->dev, "failed to create KIQ bo (%d).\n", r); + return r; + } + + memset(hpd, 0, MEC_HPD_SIZE); + + r = amdgpu_bo_reserve(kiq->eop_obj, false); + if (unlikely(r != 0)) + dev_warn(adev->dev, "(%d) reserve kiq eop bo failed\n", r); + amdgpu_bo_kunmap(kiq->eop_obj); + amdgpu_bo_unreserve(kiq->eop_obj); + + return 0; +} + +static int gfx_v9_0_kiq_init_ring(struct amdgpu_device *adev, + struct amdgpu_ring *ring, + struct amdgpu_irq_src *irq) +{ + struct amdgpu_kiq *kiq = &adev->gfx.kiq; + int r = 0; + + r = amdgpu_wb_get(adev, &adev->virt.reg_val_offs); + if (r) + return r; + + ring->adev = NULL; + ring->ring_obj = NULL; + ring->use_doorbell = true; + ring->doorbell_index = AMDGPU_DOORBELL_KIQ; + if (adev->gfx.mec2_fw) { + ring->me = 2; + ring->pipe = 0; + } else { + ring->me = 1; + ring->pipe = 1; + } + + irq->data = ring; + ring->queue = 0; + ring->eop_gpu_addr = kiq->eop_gpu_addr; + sprintf(ring->name, "kiq %d.%d.%d", ring->me, ring->pipe, ring->queue); + r = amdgpu_ring_init(adev, ring, 1024, + irq, AMDGPU_CP_KIQ_IRQ_DRIVER0); + if (r) + dev_warn(adev->dev, "(%d) failed to init kiq ring\n", r); + + return r; +} +static void gfx_v9_0_kiq_free_ring(struct amdgpu_ring *ring, + struct amdgpu_irq_src *irq) +{ + amdgpu_wb_free(ring->adev, ring->adev->virt.reg_val_offs); + amdgpu_ring_fini(ring); + irq->data = NULL; +} + +/* create MQD for each compute queue */ +static int gfx_v9_0_compute_mqd_sw_init(struct amdgpu_device *adev) +{ + struct amdgpu_ring *ring = NULL; + int r, i; + + /* create MQD for KIQ */ + ring = &adev->gfx.kiq.ring; + if (!ring->mqd_obj) { + r = amdgpu_bo_create_kernel(adev, sizeof(struct v9_mqd), PAGE_SIZE, + AMDGPU_GEM_DOMAIN_GTT, &ring->mqd_obj, + &ring->mqd_gpu_addr, (void **)&ring->mqd_ptr); + if (r) { + dev_warn(adev->dev, "failed to create ring mqd ob (%d)", r); + return r; + } + + /*TODO: prepare MQD backup */ + } + + /* create MQD for each KCQ */ + for (i = 0; i < adev->gfx.num_compute_rings; i++) { + ring = &adev->gfx.compute_ring[i]; + if (!ring->mqd_obj) { + r = amdgpu_bo_create_kernel(adev, sizeof(struct v9_mqd), PAGE_SIZE, + AMDGPU_GEM_DOMAIN_GTT, &ring->mqd_obj, + &ring->mqd_gpu_addr, (void **)&ring->mqd_ptr); + if (r) { + dev_warn(adev->dev, "failed to create ring mqd ob (%d)", r); + return r; + } + + /* TODO: prepare MQD backup */ + } + } + + return 0; +} + +static void gfx_v9_0_compute_mqd_sw_fini(struct amdgpu_device *adev) +{ + struct amdgpu_ring *ring = NULL; + int i; + + for (i = 0; i < adev->gfx.num_compute_rings; i++) { + ring = &adev->gfx.compute_ring[i]; + amdgpu_bo_free_kernel(&ring->mqd_obj, &ring->mqd_gpu_addr, (void **)&ring->mqd_ptr); + } + + ring = &adev->gfx.kiq.ring; + amdgpu_bo_free_kernel(&ring->mqd_obj, &ring->mqd_gpu_addr, (void **)&ring->mqd_ptr); +} + +static uint32_t wave_read_ind(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t address) +{ + WREG32(SOC15_REG_OFFSET(GC, 0, mmSQ_IND_INDEX), + (wave << SQ_IND_INDEX__WAVE_ID__SHIFT) | + (simd << SQ_IND_INDEX__SIMD_ID__SHIFT) | + (address << SQ_IND_INDEX__INDEX__SHIFT) | + (SQ_IND_INDEX__FORCE_READ_MASK)); + return RREG32(SOC15_REG_OFFSET(GC, 0, mmSQ_IND_DATA)); +} + +static void wave_read_regs(struct amdgpu_device *adev, uint32_t simd, + uint32_t wave, uint32_t thread, + uint32_t regno, uint32_t num, uint32_t *out) +{ + WREG32(SOC15_REG_OFFSET(GC, 0, mmSQ_IND_INDEX), + (wave << SQ_IND_INDEX__WAVE_ID__SHIFT) | + (simd << SQ_IND_INDEX__SIMD_ID__SHIFT) | + (regno << SQ_IND_INDEX__INDEX__SHIFT) | + (thread << SQ_IND_INDEX__THREAD_ID__SHIFT) | + (SQ_IND_INDEX__FORCE_READ_MASK) | + (SQ_IND_INDEX__AUTO_INCR_MASK)); + while (num--) + *(out++) = RREG32(SOC15_REG_OFFSET(GC, 0, mmSQ_IND_DATA)); +} + +static void gfx_v9_0_read_wave_data(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t *dst, int *no_fields) +{ + /* type 1 wave data */ + dst[(*no_fields)++] = 1; + dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_STATUS); + dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_PC_LO); + dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_PC_HI); + dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_EXEC_LO); + dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_EXEC_HI); + dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_HW_ID); + dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_INST_DW0); + dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_INST_DW1); + dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_GPR_ALLOC); + dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_LDS_ALLOC); + dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TRAPSTS); + dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_STS); + dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_DBG0); + dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_M0); +} + +static void gfx_v9_0_read_wave_sgprs(struct amdgpu_device *adev, uint32_t simd, + uint32_t wave, uint32_t start, + uint32_t size, uint32_t *dst) +{ + wave_read_regs( + adev, simd, wave, 0, + start + SQIND_WAVE_SGPRS_OFFSET, size, dst); +} + + +static const struct amdgpu_gfx_funcs gfx_v9_0_gfx_funcs = { + .get_gpu_clock_counter = &gfx_v9_0_get_gpu_clock_counter, + .select_se_sh = &gfx_v9_0_select_se_sh, + .read_wave_data = &gfx_v9_0_read_wave_data, + .read_wave_sgprs = &gfx_v9_0_read_wave_sgprs, +}; + +static void gfx_v9_0_gpu_early_init(struct amdgpu_device *adev) +{ + u32 gb_addr_config; + + adev->gfx.funcs = &gfx_v9_0_gfx_funcs; + + switch (adev->asic_type) { + case CHIP_VEGA10: + adev->gfx.config.max_shader_engines = 4; + adev->gfx.config.max_tile_pipes = 8; //?? + adev->gfx.config.max_cu_per_sh = 16; + adev->gfx.config.max_sh_per_se = 1; + adev->gfx.config.max_backends_per_se = 4; + adev->gfx.config.max_texture_channel_caches = 16; + adev->gfx.config.max_gprs = 256; + adev->gfx.config.max_gs_threads = 32; + adev->gfx.config.max_hw_contexts = 8; + + adev->gfx.config.sc_prim_fifo_size_frontend = 0x20; + adev->gfx.config.sc_prim_fifo_size_backend = 0x100; + adev->gfx.config.sc_hiz_tile_fifo_size = 0x30; + adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0; + gb_addr_config = VEGA10_GB_ADDR_CONFIG_GOLDEN; + break; + default: + BUG(); + break; + } + + adev->gfx.config.gb_addr_config = gb_addr_config; + + adev->gfx.config.gb_addr_config_fields.num_pipes = 1 << + REG_GET_FIELD( + adev->gfx.config.gb_addr_config, + GB_ADDR_CONFIG, + NUM_PIPES); + adev->gfx.config.gb_addr_config_fields.num_banks = 1 << + REG_GET_FIELD( + adev->gfx.config.gb_addr_config, + GB_ADDR_CONFIG, + NUM_BANKS); + adev->gfx.config.gb_addr_config_fields.max_compress_frags = 1 << + REG_GET_FIELD( + adev->gfx.config.gb_addr_config, + GB_ADDR_CONFIG, + MAX_COMPRESSED_FRAGS); + adev->gfx.config.gb_addr_config_fields.num_rb_per_se = 1 << + REG_GET_FIELD( + adev->gfx.config.gb_addr_config, + GB_ADDR_CONFIG, + NUM_RB_PER_SE); + adev->gfx.config.gb_addr_config_fields.num_se = 1 << + REG_GET_FIELD( + adev->gfx.config.gb_addr_config, + GB_ADDR_CONFIG, + NUM_SHADER_ENGINES); + adev->gfx.config.gb_addr_config_fields.pipe_interleave_size = 1 << (8 + + REG_GET_FIELD( + adev->gfx.config.gb_addr_config, + GB_ADDR_CONFIG, + PIPE_INTERLEAVE_SIZE)); +} + +static int gfx_v9_0_ngg_create_buf(struct amdgpu_device *adev, + struct amdgpu_ngg_buf *ngg_buf, + int size_se, + int default_size_se) +{ + int r; + + if (size_se < 0) { + dev_err(adev->dev, "Buffer size is invalid: %d\n", size_se); + return -EINVAL; + } + size_se = size_se ? size_se : default_size_se; + + ngg_buf->size = size_se * GFX9_NUM_SE; + r = amdgpu_bo_create_kernel(adev, ngg_buf->size, + PAGE_SIZE, AMDGPU_GEM_DOMAIN_VRAM, + &ngg_buf->bo, + &ngg_buf->gpu_addr, + NULL); + if (r) { + dev_err(adev->dev, "(%d) failed to create NGG buffer\n", r); + return r; + } + ngg_buf->bo_size = amdgpu_bo_size(ngg_buf->bo); + + return r; +} + +static int gfx_v9_0_ngg_fini(struct amdgpu_device *adev) +{ + int i; + + for (i = 0; i < NGG_BUF_MAX; i++) + amdgpu_bo_free_kernel(&adev->gfx.ngg.buf[i].bo, + &adev->gfx.ngg.buf[i].gpu_addr, + NULL); + + memset(&adev->gfx.ngg.buf[0], 0, + sizeof(struct amdgpu_ngg_buf) * NGG_BUF_MAX); + + adev->gfx.ngg.init = false; + + return 0; +} + +static int gfx_v9_0_ngg_init(struct amdgpu_device *adev) +{ + int r; + + if (!amdgpu_ngg || adev->gfx.ngg.init == true) + return 0; + + /* GDS reserve memory: 64 bytes alignment */ + adev->gfx.ngg.gds_reserve_size = ALIGN(5 * 4, 0x40); + adev->gds.mem.total_size -= adev->gfx.ngg.gds_reserve_size; + adev->gds.mem.gfx_partition_size -= adev->gfx.ngg.gds_reserve_size; + adev->gfx.ngg.gds_reserve_addr = amdgpu_gds_reg_offset[0].mem_base; + adev->gfx.ngg.gds_reserve_addr += adev->gds.mem.gfx_partition_size; + + /* Primitive Buffer */ + r = gfx_v9_0_ngg_create_buf(adev, &adev->gfx.ngg.buf[PRIM], + amdgpu_prim_buf_per_se, + 64 * 1024); + if (r) { + dev_err(adev->dev, "Failed to create Primitive Buffer\n"); + goto err; + } + + /* Position Buffer */ + r = gfx_v9_0_ngg_create_buf(adev, &adev->gfx.ngg.buf[POS], + amdgpu_pos_buf_per_se, + 256 * 1024); + if (r) { + dev_err(adev->dev, "Failed to create Position Buffer\n"); + goto err; + } + + /* Control Sideband */ + r = gfx_v9_0_ngg_create_buf(adev, &adev->gfx.ngg.buf[CNTL], + amdgpu_cntl_sb_buf_per_se, + 256); + if (r) { + dev_err(adev->dev, "Failed to create Control Sideband Buffer\n"); + goto err; + } + + /* Parameter Cache, not created by default */ + if (amdgpu_param_buf_per_se <= 0) + goto out; + + r = gfx_v9_0_ngg_create_buf(adev, &adev->gfx.ngg.buf[PARAM], + amdgpu_param_buf_per_se, + 512 * 1024); + if (r) { + dev_err(adev->dev, "Failed to create Parameter Cache\n"); + goto err; + } + +out: + adev->gfx.ngg.init = true; + return 0; +err: + gfx_v9_0_ngg_fini(adev); + return r; +} + +static int gfx_v9_0_ngg_en(struct amdgpu_device *adev) +{ + struct amdgpu_ring *ring = &adev->gfx.gfx_ring[0]; + int r; + u32 data; + u32 size; + u32 base; + + if (!amdgpu_ngg) + return 0; + + /* Program buffer size */ + data = 0; + size = adev->gfx.ngg.buf[PRIM].size / 256; + data = REG_SET_FIELD(data, WD_BUF_RESOURCE_1, INDEX_BUF_SIZE, size); + + size = adev->gfx.ngg.buf[POS].size / 256; + data = REG_SET_FIELD(data, WD_BUF_RESOURCE_1, POS_BUF_SIZE, size); + + WREG32(SOC15_REG_OFFSET(GC, 0, mmWD_BUF_RESOURCE_1), data); + + data = 0; + size = adev->gfx.ngg.buf[CNTL].size / 256; + data = REG_SET_FIELD(data, WD_BUF_RESOURCE_2, CNTL_SB_BUF_SIZE, size); + + size = adev->gfx.ngg.buf[PARAM].size / 1024; + data = REG_SET_FIELD(data, WD_BUF_RESOURCE_2, PARAM_BUF_SIZE, size); + + WREG32(SOC15_REG_OFFSET(GC, 0, mmWD_BUF_RESOURCE_2), data); + + /* Program buffer base address */ + base = lower_32_bits(adev->gfx.ngg.buf[PRIM].gpu_addr); + data = REG_SET_FIELD(0, WD_INDEX_BUF_BASE, BASE, base); + WREG32(SOC15_REG_OFFSET(GC, 0, mmWD_INDEX_BUF_BASE), data); + + base = upper_32_bits(adev->gfx.ngg.buf[PRIM].gpu_addr); + data = REG_SET_FIELD(0, WD_INDEX_BUF_BASE_HI, BASE_HI, base); + WREG32(SOC15_REG_OFFSET(GC, 0, mmWD_INDEX_BUF_BASE_HI), data); + + base = lower_32_bits(adev->gfx.ngg.buf[POS].gpu_addr); + data = REG_SET_FIELD(0, WD_POS_BUF_BASE, BASE, base); + WREG32(SOC15_REG_OFFSET(GC, 0, mmWD_POS_BUF_BASE), data); + + base = upper_32_bits(adev->gfx.ngg.buf[POS].gpu_addr); + data = REG_SET_FIELD(0, WD_POS_BUF_BASE_HI, BASE_HI, base); + WREG32(SOC15_REG_OFFSET(GC, 0, mmWD_POS_BUF_BASE_HI), data); + + base = lower_32_bits(adev->gfx.ngg.buf[CNTL].gpu_addr); + data = REG_SET_FIELD(0, WD_CNTL_SB_BUF_BASE, BASE, base); + WREG32(SOC15_REG_OFFSET(GC, 0, mmWD_CNTL_SB_BUF_BASE), data); + + base = upper_32_bits(adev->gfx.ngg.buf[CNTL].gpu_addr); + data = REG_SET_FIELD(0, WD_CNTL_SB_BUF_BASE_HI, BASE_HI, base); + WREG32(SOC15_REG_OFFSET(GC, 0, mmWD_CNTL_SB_BUF_BASE_HI), data); + + /* Clear GDS reserved memory */ + r = amdgpu_ring_alloc(ring, 17); + if (r) { + DRM_ERROR("amdgpu: NGG failed to lock ring %d (%d).\n", + ring->idx, r); + return r; + } + + gfx_v9_0_write_data_to_reg(ring, 0, false, + amdgpu_gds_reg_offset[0].mem_size, + (adev->gds.mem.total_size + + adev->gfx.ngg.gds_reserve_size) >> + AMDGPU_GDS_SHIFT); + + amdgpu_ring_write(ring, PACKET3(PACKET3_DMA_DATA, 5)); + amdgpu_ring_write(ring, (PACKET3_DMA_DATA_CP_SYNC | + PACKET3_DMA_DATA_SRC_SEL(2))); + amdgpu_ring_write(ring, 0); + amdgpu_ring_write(ring, 0); + amdgpu_ring_write(ring, adev->gfx.ngg.gds_reserve_addr); + amdgpu_ring_write(ring, 0); + amdgpu_ring_write(ring, adev->gfx.ngg.gds_reserve_size); + + + gfx_v9_0_write_data_to_reg(ring, 0, false, + amdgpu_gds_reg_offset[0].mem_size, 0); + + amdgpu_ring_commit(ring); + + return 0; +} + +static int gfx_v9_0_sw_init(void *handle) +{ + int i, r; + struct amdgpu_ring *ring; + struct amdgpu_kiq *kiq; + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + + /* KIQ event */ + r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_GRBM_CP, 178, &adev->gfx.kiq.irq); + if (r) + return r; + + /* EOP Event */ + r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_GRBM_CP, 181, &adev->gfx.eop_irq); + if (r) + return r; + + /* Privileged reg */ + r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_GRBM_CP, 184, + &adev->gfx.priv_reg_irq); + if (r) + return r; + + /* Privileged inst */ + r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_GRBM_CP, 185, + &adev->gfx.priv_inst_irq); + if (r) + return r; + + adev->gfx.gfx_current_status = AMDGPU_GFX_NORMAL_MODE; + + gfx_v9_0_scratch_init(adev); + + r = gfx_v9_0_init_microcode(adev); + if (r) { + DRM_ERROR("Failed to load gfx firmware!\n"); + return r; + } + + r = gfx_v9_0_mec_init(adev); + if (r) { + DRM_ERROR("Failed to init MEC BOs!\n"); + return r; + } + + /* set up the gfx ring */ + for (i = 0; i < adev->gfx.num_gfx_rings; i++) { + ring = &adev->gfx.gfx_ring[i]; + ring->ring_obj = NULL; + sprintf(ring->name, "gfx"); + ring->use_doorbell = true; + ring->doorbell_index = AMDGPU_DOORBELL64_GFX_RING0 << 1; + r = amdgpu_ring_init(adev, ring, 1024, + &adev->gfx.eop_irq, AMDGPU_CP_IRQ_GFX_EOP); + if (r) + return r; + } + + /* set up the compute queues */ + for (i = 0; i < adev->gfx.num_compute_rings; i++) { + unsigned irq_type; + + /* max 32 queues per MEC */ + if ((i >= 32) || (i >= AMDGPU_MAX_COMPUTE_RINGS)) { + DRM_ERROR("Too many (%d) compute rings!\n", i); + break; + } + ring = &adev->gfx.compute_ring[i]; + ring->ring_obj = NULL; + ring->use_doorbell = true; + ring->doorbell_index = (AMDGPU_DOORBELL64_MEC_RING0 + i) << 1; + ring->me = 1; /* first MEC */ + ring->pipe = i / 8; + ring->queue = i % 8; + ring->eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr + (i * MEC_HPD_SIZE); + sprintf(ring->name, "comp %d.%d.%d", ring->me, ring->pipe, ring->queue); + irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP + ring->pipe; + /* type-2 packets are deprecated on MEC, use type-3 instead */ + r = amdgpu_ring_init(adev, ring, 1024, + &adev->gfx.eop_irq, irq_type); + if (r) + return r; + } + + if (amdgpu_sriov_vf(adev)) { + r = gfx_v9_0_kiq_init(adev); + if (r) { + DRM_ERROR("Failed to init KIQ BOs!\n"); + return r; + } + + kiq = &adev->gfx.kiq; + r = gfx_v9_0_kiq_init_ring(adev, &kiq->ring, &kiq->irq); + if (r) + return r; + + /* create MQD for all compute queues as wel as KIQ for SRIOV case */ + r = gfx_v9_0_compute_mqd_sw_init(adev); + if (r) + return r; + } + + /* reserve GDS, GWS and OA resource for gfx */ + r = amdgpu_bo_create_kernel(adev, adev->gds.mem.gfx_partition_size, + PAGE_SIZE, AMDGPU_GEM_DOMAIN_GDS, + &adev->gds.gds_gfx_bo, NULL, NULL); + if (r) + return r; + + r = amdgpu_bo_create_kernel(adev, adev->gds.gws.gfx_partition_size, + PAGE_SIZE, AMDGPU_GEM_DOMAIN_GWS, + &adev->gds.gws_gfx_bo, NULL, NULL); + if (r) + return r; + + r = amdgpu_bo_create_kernel(adev, adev->gds.oa.gfx_partition_size, + PAGE_SIZE, AMDGPU_GEM_DOMAIN_OA, + &adev->gds.oa_gfx_bo, NULL, NULL); + if (r) + return r; + + adev->gfx.ce_ram_size = 0x8000; + + gfx_v9_0_gpu_early_init(adev); + + r = gfx_v9_0_ngg_init(adev); + if (r) + return r; + + return 0; +} + + +static int gfx_v9_0_sw_fini(void *handle) +{ + int i; + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + + amdgpu_bo_free_kernel(&adev->gds.oa_gfx_bo, NULL, NULL); + amdgpu_bo_free_kernel(&adev->gds.gws_gfx_bo, NULL, NULL); + amdgpu_bo_free_kernel(&adev->gds.gds_gfx_bo, NULL, NULL); + + for (i = 0; i < adev->gfx.num_gfx_rings; i++) + amdgpu_ring_fini(&adev->gfx.gfx_ring[i]); + for (i = 0; i < adev->gfx.num_compute_rings; i++) + amdgpu_ring_fini(&adev->gfx.compute_ring[i]); + + if (amdgpu_sriov_vf(adev)) { + gfx_v9_0_compute_mqd_sw_fini(adev); + gfx_v9_0_kiq_free_ring(&adev->gfx.kiq.ring, &adev->gfx.kiq.irq); + gfx_v9_0_kiq_fini(adev); + } + + gfx_v9_0_mec_fini(adev); + gfx_v9_0_ngg_fini(adev); + + return 0; +} + + +static void gfx_v9_0_tiling_mode_table_init(struct amdgpu_device *adev) +{ + /* TODO */ +} + +static void gfx_v9_0_select_se_sh(struct amdgpu_device *adev, u32 se_num, u32 sh_num, u32 instance) +{ + u32 data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_BROADCAST_WRITES, 1); + + if ((se_num == 0xffffffff) && (sh_num == 0xffffffff)) { + data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_BROADCAST_WRITES, 1); + data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_BROADCAST_WRITES, 1); + } else if (se_num == 0xffffffff) { + data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_INDEX, sh_num); + data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_BROADCAST_WRITES, 1); + } else if (sh_num == 0xffffffff) { + data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_BROADCAST_WRITES, 1); + data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_INDEX, se_num); + } else { + data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_INDEX, sh_num); + data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_INDEX, se_num); + } + WREG32( SOC15_REG_OFFSET(GC, 0, mmGRBM_GFX_INDEX), data); +} + +static u32 gfx_v9_0_create_bitmask(u32 bit_width) +{ + return (u32)((1ULL << bit_width) - 1); +} + +static u32 gfx_v9_0_get_rb_active_bitmap(struct amdgpu_device *adev) +{ + u32 data, mask; + + data = RREG32(SOC15_REG_OFFSET(GC, 0, mmCC_RB_BACKEND_DISABLE)); + data |= RREG32(SOC15_REG_OFFSET(GC, 0, mmGC_USER_RB_BACKEND_DISABLE)); + + data &= CC_RB_BACKEND_DISABLE__BACKEND_DISABLE_MASK; + data >>= GC_USER_RB_BACKEND_DISABLE__BACKEND_DISABLE__SHIFT; + + mask = gfx_v9_0_create_bitmask(adev->gfx.config.max_backends_per_se / + adev->gfx.config.max_sh_per_se); + + return (~data) & mask; +} + +static void gfx_v9_0_setup_rb(struct amdgpu_device *adev) +{ + int i, j; + u32 data; + u32 active_rbs = 0; + u32 rb_bitmap_width_per_sh = adev->gfx.config.max_backends_per_se / + adev->gfx.config.max_sh_per_se; + + mutex_lock(&adev->grbm_idx_mutex); + for (i = 0; i < adev->gfx.config.max_shader_engines; i++) { + for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) { + gfx_v9_0_select_se_sh(adev, i, j, 0xffffffff); + data = gfx_v9_0_get_rb_active_bitmap(adev); + active_rbs |= data << ((i * adev->gfx.config.max_sh_per_se + j) * + rb_bitmap_width_per_sh); + } + } + gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); + mutex_unlock(&adev->grbm_idx_mutex); + + adev->gfx.config.backend_enable_mask = active_rbs; + adev->gfx.config.num_rbs = hweight32(active_rbs); +} + +#define DEFAULT_SH_MEM_BASES (0x6000) +#define FIRST_COMPUTE_VMID (8) +#define LAST_COMPUTE_VMID (16) +static void gfx_v9_0_init_compute_vmid(struct amdgpu_device *adev) +{ + int i; + uint32_t sh_mem_config; + uint32_t sh_mem_bases; + + /* + * Configure apertures: + * LDS: 0x60000000'00000000 - 0x60000001'00000000 (4GB) + * Scratch: 0x60000001'00000000 - 0x60000002'00000000 (4GB) + * GPUVM: 0x60010000'00000000 - 0x60020000'00000000 (1TB) + */ + sh_mem_bases = DEFAULT_SH_MEM_BASES | (DEFAULT_SH_MEM_BASES << 16); + + sh_mem_config = SH_MEM_ADDRESS_MODE_64 | + SH_MEM_ALIGNMENT_MODE_UNALIGNED << + SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT; + + mutex_lock(&adev->srbm_mutex); + for (i = FIRST_COMPUTE_VMID; i < LAST_COMPUTE_VMID; i++) { + soc15_grbm_select(adev, 0, 0, 0, i); + /* CP and shaders */ + WREG32(SOC15_REG_OFFSET(GC, 0, mmSH_MEM_CONFIG), sh_mem_config); + WREG32(SOC15_REG_OFFSET(GC, 0, mmSH_MEM_BASES), sh_mem_bases); + } + soc15_grbm_select(adev, 0, 0, 0, 0); + mutex_unlock(&adev->srbm_mutex); +} + +static void gfx_v9_0_gpu_init(struct amdgpu_device *adev) +{ + u32 tmp; + int i; + + WREG32_FIELD15(GC, 0, GRBM_CNTL, READ_TIMEOUT, 0xff); + + gfx_v9_0_tiling_mode_table_init(adev); + + gfx_v9_0_setup_rb(adev); + gfx_v9_0_get_cu_info(adev, &adev->gfx.cu_info); + + /* XXX SH_MEM regs */ + /* where to put LDS, scratch, GPUVM in FSA64 space */ + mutex_lock(&adev->srbm_mutex); + for (i = 0; i < 16; i++) { + soc15_grbm_select(adev, 0, 0, 0, i); + /* CP and shaders */ + tmp = 0; + tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, ALIGNMENT_MODE, + SH_MEM_ALIGNMENT_MODE_UNALIGNED); + WREG32(SOC15_REG_OFFSET(GC, 0, mmSH_MEM_CONFIG), tmp); + WREG32(SOC15_REG_OFFSET(GC, 0, mmSH_MEM_BASES), 0); + } + soc15_grbm_select(adev, 0, 0, 0, 0); + + mutex_unlock(&adev->srbm_mutex); + + gfx_v9_0_init_compute_vmid(adev); + + mutex_lock(&adev->grbm_idx_mutex); + /* + * making sure that the following register writes will be broadcasted + * to all the shaders + */ + gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); + + WREG32(SOC15_REG_OFFSET(GC, 0, mmPA_SC_FIFO_SIZE), + (adev->gfx.config.sc_prim_fifo_size_frontend << + PA_SC_FIFO_SIZE__SC_FRONTEND_PRIM_FIFO_SIZE__SHIFT) | + (adev->gfx.config.sc_prim_fifo_size_backend << + PA_SC_FIFO_SIZE__SC_BACKEND_PRIM_FIFO_SIZE__SHIFT) | + (adev->gfx.config.sc_hiz_tile_fifo_size << + PA_SC_FIFO_SIZE__SC_HIZ_TILE_FIFO_SIZE__SHIFT) | + (adev->gfx.config.sc_earlyz_tile_fifo_size << + PA_SC_FIFO_SIZE__SC_EARLYZ_TILE_FIFO_SIZE__SHIFT)); + mutex_unlock(&adev->grbm_idx_mutex); + +} + +static void gfx_v9_0_wait_for_rlc_serdes(struct amdgpu_device *adev) +{ + u32 i, j, k; + u32 mask; + + mutex_lock(&adev->grbm_idx_mutex); + for (i = 0; i < adev->gfx.config.max_shader_engines; i++) { + for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) { + gfx_v9_0_select_se_sh(adev, i, j, 0xffffffff); + for (k = 0; k < adev->usec_timeout; k++) { + if (RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SERDES_CU_MASTER_BUSY)) == 0) + break; + udelay(1); + } + } + } + gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); + mutex_unlock(&adev->grbm_idx_mutex); + + mask = RLC_SERDES_NONCU_MASTER_BUSY__SE_MASTER_BUSY_MASK | + RLC_SERDES_NONCU_MASTER_BUSY__GC_MASTER_BUSY_MASK | + RLC_SERDES_NONCU_MASTER_BUSY__TC0_MASTER_BUSY_MASK | + RLC_SERDES_NONCU_MASTER_BUSY__TC1_MASTER_BUSY_MASK; + for (k = 0; k < adev->usec_timeout; k++) { + if ((RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SERDES_NONCU_MASTER_BUSY)) & mask) == 0) + break; + udelay(1); + } +} + +static void gfx_v9_0_enable_gui_idle_interrupt(struct amdgpu_device *adev, + bool enable) +{ + u32 tmp = RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_INT_CNTL_RING0)); + + if (enable) + return; + + tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_BUSY_INT_ENABLE, enable ? 1 : 0); + tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_EMPTY_INT_ENABLE, enable ? 1 : 0); + tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CMP_BUSY_INT_ENABLE, enable ? 1 : 0); + tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, GFX_IDLE_INT_ENABLE, enable ? 1 : 0); + + WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_INT_CNTL_RING0), tmp); +} + +void gfx_v9_0_rlc_stop(struct amdgpu_device *adev) +{ + u32 tmp = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_CNTL)); + + tmp = REG_SET_FIELD(tmp, RLC_CNTL, RLC_ENABLE_F32, 0); + WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_CNTL), tmp); + + gfx_v9_0_enable_gui_idle_interrupt(adev, false); + + gfx_v9_0_wait_for_rlc_serdes(adev); +} + +static void gfx_v9_0_rlc_reset(struct amdgpu_device *adev) +{ + WREG32_FIELD15(GC, 0, GRBM_SOFT_RESET, SOFT_RESET_RLC, 1); + udelay(50); + WREG32_FIELD15(GC, 0, GRBM_SOFT_RESET, SOFT_RESET_RLC, 0); + udelay(50); +} + +static void gfx_v9_0_rlc_start(struct amdgpu_device *adev) +{ +#ifdef AMDGPU_RLC_DEBUG_RETRY + u32 rlc_ucode_ver; +#endif + + WREG32_FIELD15(GC, 0, RLC_CNTL, RLC_ENABLE_F32, 1); + + /* carrizo do enable cp interrupt after cp inited */ + if (!(adev->flags & AMD_IS_APU)) + gfx_v9_0_enable_gui_idle_interrupt(adev, true); + + udelay(50); + +#ifdef AMDGPU_RLC_DEBUG_RETRY + /* RLC_GPM_GENERAL_6 : RLC Ucode version */ + rlc_ucode_ver = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_GENERAL_6)); + if(rlc_ucode_ver == 0x108) { + DRM_INFO("Using rlc debug ucode. mmRLC_GPM_GENERAL_6 ==0x08%x / fw_ver == %i \n", + rlc_ucode_ver, adev->gfx.rlc_fw_version); + /* RLC_GPM_TIMER_INT_3 : Timer interval in RefCLK cycles, + * default is 0x9C4 to create a 100us interval */ + WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_TIMER_INT_3), 0x9C4); + /* RLC_GPM_GENERAL_12 : Minimum gap between wptr and rptr + * to disable the page fault retry interrupts, default is + * 0x100 (256) */ + WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_GENERAL_12), 0x100); + } +#endif +} + +static int gfx_v9_0_rlc_load_microcode(struct amdgpu_device *adev) +{ + const struct rlc_firmware_header_v2_0 *hdr; + const __le32 *fw_data; + unsigned i, fw_size; + + if (!adev->gfx.rlc_fw) + return -EINVAL; + + hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data; + amdgpu_ucode_print_rlc_hdr(&hdr->header); + + fw_data = (const __le32 *)(adev->gfx.rlc_fw->data + + le32_to_cpu(hdr->header.ucode_array_offset_bytes)); + fw_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4; + + WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_UCODE_ADDR), + RLCG_UCODE_LOADING_START_ADDRESS); + for (i = 0; i < fw_size; i++) + WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_UCODE_DATA), le32_to_cpup(fw_data++)); + WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_UCODE_ADDR), adev->gfx.rlc_fw_version); + + return 0; +} + +static int gfx_v9_0_rlc_resume(struct amdgpu_device *adev) +{ + int r; + + if (amdgpu_sriov_vf(adev)) + return 0; + + gfx_v9_0_rlc_stop(adev); + + /* disable CG */ + WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_CGCG_CGLS_CTRL), 0); + + /* disable PG */ + WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), 0); + + gfx_v9_0_rlc_reset(adev); + + if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) { + /* legacy rlc firmware loading */ + r = gfx_v9_0_rlc_load_microcode(adev); + if (r) + return r; + } + + gfx_v9_0_rlc_start(adev); + + return 0; +} + +static void gfx_v9_0_cp_gfx_enable(struct amdgpu_device *adev, bool enable) +{ + int i; + u32 tmp = RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_ME_CNTL)); + + tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, enable ? 0 : 1); + tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, enable ? 0 : 1); + tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_HALT, enable ? 0 : 1); + if (!enable) { + for (i = 0; i < adev->gfx.num_gfx_rings; i++) + adev->gfx.gfx_ring[i].ready = false; + } + WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_ME_CNTL), tmp); + udelay(50); +} + +static int gfx_v9_0_cp_gfx_load_microcode(struct amdgpu_device *adev) +{ + const struct gfx_firmware_header_v1_0 *pfp_hdr; + const struct gfx_firmware_header_v1_0 *ce_hdr; + const struct gfx_firmware_header_v1_0 *me_hdr; + const __le32 *fw_data; + unsigned i, fw_size; + + if (!adev->gfx.me_fw || !adev->gfx.pfp_fw || !adev->gfx.ce_fw) + return -EINVAL; + + pfp_hdr = (const struct gfx_firmware_header_v1_0 *) + adev->gfx.pfp_fw->data; + ce_hdr = (const struct gfx_firmware_header_v1_0 *) + adev->gfx.ce_fw->data; + me_hdr = (const struct gfx_firmware_header_v1_0 *) + adev->gfx.me_fw->data; + + amdgpu_ucode_print_gfx_hdr(&pfp_hdr->header); + amdgpu_ucode_print_gfx_hdr(&ce_hdr->header); + amdgpu_ucode_print_gfx_hdr(&me_hdr->header); + + gfx_v9_0_cp_gfx_enable(adev, false); + + /* PFP */ + fw_data = (const __le32 *) + (adev->gfx.pfp_fw->data + + le32_to_cpu(pfp_hdr->header.ucode_array_offset_bytes)); + fw_size = le32_to_cpu(pfp_hdr->header.ucode_size_bytes) / 4; + WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_PFP_UCODE_ADDR), 0); + for (i = 0; i < fw_size; i++) + WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_PFP_UCODE_DATA), le32_to_cpup(fw_data++)); + WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_PFP_UCODE_ADDR), adev->gfx.pfp_fw_version); + + /* CE */ + fw_data = (const __le32 *) + (adev->gfx.ce_fw->data + + le32_to_cpu(ce_hdr->header.ucode_array_offset_bytes)); + fw_size = le32_to_cpu(ce_hdr->header.ucode_size_bytes) / 4; + WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_CE_UCODE_ADDR), 0); + for (i = 0; i < fw_size; i++) + WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_CE_UCODE_DATA), le32_to_cpup(fw_data++)); + WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_CE_UCODE_ADDR), adev->gfx.ce_fw_version); + + /* ME */ + fw_data = (const __le32 *) + (adev->gfx.me_fw->data + + le32_to_cpu(me_hdr->header.ucode_array_offset_bytes)); + fw_size = le32_to_cpu(me_hdr->header.ucode_size_bytes) / 4; + WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_ME_RAM_WADDR), 0); + for (i = 0; i < fw_size; i++) + WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_ME_RAM_DATA), le32_to_cpup(fw_data++)); + WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_ME_RAM_WADDR), adev->gfx.me_fw_version); + + return 0; +} + +static u32 gfx_v9_0_get_csb_size(struct amdgpu_device *adev) +{ + u32 count = 0; + const struct cs_section_def *sect = NULL; + const struct cs_extent_def *ext = NULL; + + /* begin clear state */ + count += 2; + /* context control state */ + count += 3; + + for (sect = gfx9_cs_data; sect->section != NULL; ++sect) { + for (ext = sect->section; ext->extent != NULL; ++ext) { + if (sect->id == SECT_CONTEXT) + count += 2 + ext->reg_count; + else + return 0; + } + } + /* pa_sc_raster_config/pa_sc_raster_config1 */ + count += 4; + /* end clear state */ + count += 2; + /* clear state */ + count += 2; + + return count; +} + +static int gfx_v9_0_cp_gfx_start(struct amdgpu_device *adev) +{ + struct amdgpu_ring *ring = &adev->gfx.gfx_ring[0]; + const struct cs_section_def *sect = NULL; + const struct cs_extent_def *ext = NULL; + int r, i; + + /* init the CP */ + WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_MAX_CONTEXT), adev->gfx.config.max_hw_contexts - 1); + WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_DEVICE_ID), 1); + + gfx_v9_0_cp_gfx_enable(adev, true); + + r = amdgpu_ring_alloc(ring, gfx_v9_0_get_csb_size(adev) + 4); + if (r) { + DRM_ERROR("amdgpu: cp failed to lock ring (%d).\n", r); + return r; + } + + amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0)); + amdgpu_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE); + + amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1)); + amdgpu_ring_write(ring, 0x80000000); + amdgpu_ring_write(ring, 0x80000000); + + for (sect = gfx9_cs_data; sect->section != NULL; ++sect) { + for (ext = sect->section; ext->extent != NULL; ++ext) { + if (sect->id == SECT_CONTEXT) { + amdgpu_ring_write(ring, + PACKET3(PACKET3_SET_CONTEXT_REG, + ext->reg_count)); + amdgpu_ring_write(ring, + ext->reg_index - PACKET3_SET_CONTEXT_REG_START); + for (i = 0; i < ext->reg_count; i++) + amdgpu_ring_write(ring, ext->extent[i]); + } + } + } + + amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0)); + amdgpu_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE); + + amdgpu_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0)); + amdgpu_ring_write(ring, 0); + + amdgpu_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2)); + amdgpu_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE)); + amdgpu_ring_write(ring, 0x8000); + amdgpu_ring_write(ring, 0x8000); + + amdgpu_ring_commit(ring); + + return 0; +} + +static int gfx_v9_0_cp_gfx_resume(struct amdgpu_device *adev) +{ + struct amdgpu_ring *ring; + u32 tmp; + u32 rb_bufsz; + u64 rb_addr, rptr_addr, wptr_gpu_addr; + + /* Set the write pointer delay */ + WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_RB_WPTR_DELAY), 0); + + /* set the RB to use vmid 0 */ + WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_RB_VMID), 0); + + /* Set ring buffer size */ + ring = &adev->gfx.gfx_ring[0]; + rb_bufsz = order_base_2(ring->ring_size / 8); + tmp = REG_SET_FIELD(0, CP_RB0_CNTL, RB_BUFSZ, rb_bufsz); + tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, RB_BLKSZ, rb_bufsz - 2); +#ifdef __BIG_ENDIAN + tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, BUF_SWAP, 1); +#endif + WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_RB0_CNTL), tmp); + + /* Initialize the ring buffer's write pointers */ + ring->wptr = 0; + WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_RB0_WPTR), lower_32_bits(ring->wptr)); + WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_RB0_WPTR_HI), upper_32_bits(ring->wptr)); + + /* set the wb address wether it's enabled or not */ + rptr_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4); + WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_RB0_RPTR_ADDR), lower_32_bits(rptr_addr)); + WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_RB0_RPTR_ADDR_HI), upper_32_bits(rptr_addr) & CP_RB_RPTR_ADDR_HI__RB_RPTR_ADDR_HI_MASK); + + wptr_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4); + WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_RB_WPTR_POLL_ADDR_LO), lower_32_bits(wptr_gpu_addr)); + WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_RB_WPTR_POLL_ADDR_HI), upper_32_bits(wptr_gpu_addr)); + + mdelay(1); + WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_RB0_CNTL), tmp); + + rb_addr = ring->gpu_addr >> 8; + WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_RB0_BASE), rb_addr); + WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_RB0_BASE_HI), upper_32_bits(rb_addr)); + + tmp = RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_RB_DOORBELL_CONTROL)); + if (ring->use_doorbell) { + tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL, + DOORBELL_OFFSET, ring->doorbell_index); + tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL, + DOORBELL_EN, 1); + } else { + tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL, DOORBELL_EN, 0); + } + WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_RB_DOORBELL_CONTROL), tmp); + + tmp = REG_SET_FIELD(0, CP_RB_DOORBELL_RANGE_LOWER, + DOORBELL_RANGE_LOWER, ring->doorbell_index); + WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_RB_DOORBELL_RANGE_LOWER), tmp); + + WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_RB_DOORBELL_RANGE_UPPER), + CP_RB_DOORBELL_RANGE_UPPER__DOORBELL_RANGE_UPPER_MASK); + + + /* start the ring */ + gfx_v9_0_cp_gfx_start(adev); + ring->ready = true; + + return 0; +} + +static void gfx_v9_0_cp_compute_enable(struct amdgpu_device *adev, bool enable) +{ + int i; + + if (enable) { + WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_MEC_CNTL), 0); + } else { + WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_MEC_CNTL), + (CP_MEC_CNTL__MEC_ME1_HALT_MASK | CP_MEC_CNTL__MEC_ME2_HALT_MASK)); + for (i = 0; i < adev->gfx.num_compute_rings; i++) + adev->gfx.compute_ring[i].ready = false; + adev->gfx.kiq.ring.ready = false; + } + udelay(50); +} + +static int gfx_v9_0_cp_compute_start(struct amdgpu_device *adev) +{ + gfx_v9_0_cp_compute_enable(adev, true); + + return 0; +} + +static int gfx_v9_0_cp_compute_load_microcode(struct amdgpu_device *adev) +{ + const struct gfx_firmware_header_v1_0 *mec_hdr; + const __le32 *fw_data; + unsigned i; + u32 tmp; + + if (!adev->gfx.mec_fw) + return -EINVAL; + + gfx_v9_0_cp_compute_enable(adev, false); + + mec_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data; + amdgpu_ucode_print_gfx_hdr(&mec_hdr->header); + + fw_data = (const __le32 *) + (adev->gfx.mec_fw->data + + le32_to_cpu(mec_hdr->header.ucode_array_offset_bytes)); + tmp = 0; + tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, VMID, 0); + tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, CACHE_POLICY, 0); + WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_CPC_IC_BASE_CNTL), tmp); + + WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_CPC_IC_BASE_LO), + adev->gfx.mec.mec_fw_gpu_addr & 0xFFFFF000); + WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_CPC_IC_BASE_HI), + upper_32_bits(adev->gfx.mec.mec_fw_gpu_addr)); + + /* MEC1 */ + WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_MEC_ME1_UCODE_ADDR), + mec_hdr->jt_offset); + for (i = 0; i < mec_hdr->jt_size; i++) + WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_MEC_ME1_UCODE_DATA), + le32_to_cpup(fw_data + mec_hdr->jt_offset + i)); + + WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_MEC_ME1_UCODE_ADDR), + adev->gfx.mec_fw_version); + /* Todo : Loading MEC2 firmware is only necessary if MEC2 should run different microcode than MEC1. */ + + return 0; +} + +static void gfx_v9_0_cp_compute_fini(struct amdgpu_device *adev) +{ + int i, r; + + for (i = 0; i < adev->gfx.num_compute_rings; i++) { + struct amdgpu_ring *ring = &adev->gfx.compute_ring[i]; + + if (ring->mqd_obj) { + r = amdgpu_bo_reserve(ring->mqd_obj, false); + if (unlikely(r != 0)) + dev_warn(adev->dev, "(%d) reserve MQD bo failed\n", r); + + amdgpu_bo_unpin(ring->mqd_obj); + amdgpu_bo_unreserve(ring->mqd_obj); + + amdgpu_bo_unref(&ring->mqd_obj); + ring->mqd_obj = NULL; + } + } +} + +static int gfx_v9_0_init_queue(struct amdgpu_ring *ring); + +static int gfx_v9_0_cp_compute_resume(struct amdgpu_device *adev) +{ + int i, r; + for (i = 0; i < adev->gfx.num_compute_rings; i++) { + struct amdgpu_ring *ring = &adev->gfx.compute_ring[i]; + if (gfx_v9_0_init_queue(ring)) + dev_warn(adev->dev, "compute queue %d init failed!\n", i); + } + + r = gfx_v9_0_cp_compute_start(adev); + if (r) + return r; + + return 0; +} + +/* KIQ functions */ +static void gfx_v9_0_kiq_setting(struct amdgpu_ring *ring) +{ + uint32_t tmp; + struct amdgpu_device *adev = ring->adev; + + /* tell RLC which is KIQ queue */ + tmp = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_CP_SCHEDULERS)); + tmp &= 0xffffff00; + tmp |= (ring->me << 5) | (ring->pipe << 3) | (ring->queue); + WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_CP_SCHEDULERS), tmp); + tmp |= 0x80; + WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_CP_SCHEDULERS), tmp); +} + +static void gfx_v9_0_kiq_enable(struct amdgpu_ring *ring) +{ + amdgpu_ring_alloc(ring, 8); + /* set resources */ + amdgpu_ring_write(ring, PACKET3(PACKET3_SET_RESOURCES, 6)); + amdgpu_ring_write(ring, 0); /* vmid_mask:0 queue_type:0 (KIQ) */ + amdgpu_ring_write(ring, 0x000000FF); /* queue mask lo */ + amdgpu_ring_write(ring, 0); /* queue mask hi */ + amdgpu_ring_write(ring, 0); /* gws mask lo */ + amdgpu_ring_write(ring, 0); /* gws mask hi */ + amdgpu_ring_write(ring, 0); /* oac mask */ + amdgpu_ring_write(ring, 0); /* gds heap base:0, gds heap size:0 */ + amdgpu_ring_commit(ring); + udelay(50); +} + +static void gfx_v9_0_map_queue_enable(struct amdgpu_ring *kiq_ring, + struct amdgpu_ring *ring) +{ + struct amdgpu_device *adev = kiq_ring->adev; + uint64_t mqd_addr, wptr_addr; + + mqd_addr = amdgpu_bo_gpu_offset(ring->mqd_obj); + wptr_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4); + amdgpu_ring_alloc(kiq_ring, 8); + + amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_MAP_QUEUES, 5)); + /* Q_sel:0, vmid:0, vidmem: 1, engine:0, num_Q:1*/ + amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */ + (0 << 4) | /* Queue_Sel */ + (0 << 8) | /* VMID */ + (ring->queue << 13 ) | + (ring->pipe << 16) | + ((ring->me == 1 ? 0 : 1) << 18) | + (0 << 21) | /*queue_type: normal compute queue */ + (1 << 24) | /* alloc format: all_on_one_pipe */ + (0 << 26) | /* engine_sel: compute */ + (1 << 29)); /* num_queues: must be 1 */ + amdgpu_ring_write(kiq_ring, (ring->doorbell_index << 2)); + amdgpu_ring_write(kiq_ring, lower_32_bits(mqd_addr)); + amdgpu_ring_write(kiq_ring, upper_32_bits(mqd_addr)); + amdgpu_ring_write(kiq_ring, lower_32_bits(wptr_addr)); + amdgpu_ring_write(kiq_ring, upper_32_bits(wptr_addr)); + amdgpu_ring_commit(kiq_ring); + udelay(50); +} + +static int gfx_v9_0_mqd_init(struct amdgpu_ring *ring) +{ + struct amdgpu_device *adev = ring->adev; + struct v9_mqd *mqd = ring->mqd_ptr; + uint64_t hqd_gpu_addr, wb_gpu_addr, eop_base_addr; + uint32_t tmp; + + mqd->header = 0xC0310800; + mqd->compute_pipelinestat_enable = 0x00000001; + mqd->compute_static_thread_mgmt_se0 = 0xffffffff; + mqd->compute_static_thread_mgmt_se1 = 0xffffffff; + mqd->compute_static_thread_mgmt_se2 = 0xffffffff; + mqd->compute_static_thread_mgmt_se3 = 0xffffffff; + mqd->compute_misc_reserved = 0x00000003; + + eop_base_addr = ring->eop_gpu_addr >> 8; + mqd->cp_hqd_eop_base_addr_lo = eop_base_addr; + mqd->cp_hqd_eop_base_addr_hi = upper_32_bits(eop_base_addr); + + /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */ + tmp = RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_EOP_CONTROL)); + tmp = REG_SET_FIELD(tmp, CP_HQD_EOP_CONTROL, EOP_SIZE, + (order_base_2(MEC_HPD_SIZE / 4) - 1)); + + mqd->cp_hqd_eop_control = tmp; + + /* enable doorbell? */ + tmp = RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL)); + + if (ring->use_doorbell) { + tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, + DOORBELL_OFFSET, ring->doorbell_index); + tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, + DOORBELL_EN, 1); + tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, + DOORBELL_SOURCE, 0); + tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, + DOORBELL_HIT, 0); + } + else + tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, + DOORBELL_EN, 0); + + mqd->cp_hqd_pq_doorbell_control = tmp; + + /* disable the queue if it's active */ + ring->wptr = 0; + mqd->cp_hqd_dequeue_request = 0; + mqd->cp_hqd_pq_rptr = 0; + mqd->cp_hqd_pq_wptr_lo = 0; + mqd->cp_hqd_pq_wptr_hi = 0; + + /* set the pointer to the MQD */ + mqd->cp_mqd_base_addr_lo = ring->mqd_gpu_addr & 0xfffffffc; + mqd->cp_mqd_base_addr_hi = upper_32_bits(ring->mqd_gpu_addr); + + /* set MQD vmid to 0 */ + tmp = RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_MQD_CONTROL)); + tmp = REG_SET_FIELD(tmp, CP_MQD_CONTROL, VMID, 0); + mqd->cp_mqd_control = tmp; + + /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */ + hqd_gpu_addr = ring->gpu_addr >> 8; + mqd->cp_hqd_pq_base_lo = hqd_gpu_addr; + mqd->cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr); + + /* set up the HQD, this is similar to CP_RB0_CNTL */ + tmp = RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_CONTROL)); + tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, QUEUE_SIZE, + (order_base_2(ring->ring_size / 4) - 1)); + tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, RPTR_BLOCK_SIZE, + ((order_base_2(AMDGPU_GPU_PAGE_SIZE / 4) - 1) << 8)); +#ifdef __BIG_ENDIAN + tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ENDIAN_SWAP, 1); +#endif + tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, UNORD_DISPATCH, 0); + tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ROQ_PQ_IB_FLIP, 0); + tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, PRIV_STATE, 1); + tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, KMD_QUEUE, 1); + mqd->cp_hqd_pq_control = tmp; + + /* set the wb address whether it's enabled or not */ + wb_gpu_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4); + mqd->cp_hqd_pq_rptr_report_addr_lo = wb_gpu_addr & 0xfffffffc; + mqd->cp_hqd_pq_rptr_report_addr_hi = + upper_32_bits(wb_gpu_addr) & 0xffff; + + /* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */ + wb_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4); + mqd->cp_hqd_pq_wptr_poll_addr_lo = wb_gpu_addr & 0xfffffffc; + mqd->cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff; + + tmp = 0; + /* enable the doorbell if requested */ + if (ring->use_doorbell) { + tmp = RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL)); + tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, + DOORBELL_OFFSET, ring->doorbell_index); + + tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, + DOORBELL_EN, 1); + tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, + DOORBELL_SOURCE, 0); + tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, + DOORBELL_HIT, 0); + } + + mqd->cp_hqd_pq_doorbell_control = tmp; + + /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */ + ring->wptr = 0; + mqd->cp_hqd_pq_rptr = RREG32(mmCP_HQD_PQ_RPTR); + + /* set the vmid for the queue */ + mqd->cp_hqd_vmid = 0; + + tmp = RREG32(mmCP_HQD_PERSISTENT_STATE); + tmp = REG_SET_FIELD(tmp, CP_HQD_PERSISTENT_STATE, PRELOAD_SIZE, 0x53); + mqd->cp_hqd_persistent_state = tmp; + + /* activate the queue */ + mqd->cp_hqd_active = 1; + + return 0; +} + +static int gfx_v9_0_kiq_init_register(struct amdgpu_ring *ring) +{ + struct amdgpu_device *adev = ring->adev; + struct v9_mqd *mqd = ring->mqd_ptr; + int j; + + /* disable wptr polling */ + WREG32_FIELD15(GC, 0, CP_PQ_WPTR_POLL_CNTL, EN, 0); + + WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_EOP_BASE_ADDR), + mqd->cp_hqd_eop_base_addr_lo); + WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_EOP_BASE_ADDR_HI), + mqd->cp_hqd_eop_base_addr_hi); + + /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */ + WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_EOP_CONTROL), + mqd->cp_hqd_eop_control); + + /* enable doorbell? */ + WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL), + mqd->cp_hqd_pq_doorbell_control); + + /* disable the queue if it's active */ + if (RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_ACTIVE)) & 1) { + WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_DEQUEUE_REQUEST), 1); + for (j = 0; j < adev->usec_timeout; j++) { + if (!(RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_ACTIVE)) & 1)) + break; + udelay(1); + } + WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_DEQUEUE_REQUEST), + mqd->cp_hqd_dequeue_request); + WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_RPTR), + mqd->cp_hqd_pq_rptr); + WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_LO), + mqd->cp_hqd_pq_wptr_lo); + WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_HI), + mqd->cp_hqd_pq_wptr_hi); + } + + /* set the pointer to the MQD */ + WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_MQD_BASE_ADDR), + mqd->cp_mqd_base_addr_lo); + WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_MQD_BASE_ADDR_HI), + mqd->cp_mqd_base_addr_hi); + + /* set MQD vmid to 0 */ + WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_MQD_CONTROL), + mqd->cp_mqd_control); + + /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */ + WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_BASE), + mqd->cp_hqd_pq_base_lo); + WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_BASE_HI), + mqd->cp_hqd_pq_base_hi); + + /* set up the HQD, this is similar to CP_RB0_CNTL */ + WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_CONTROL), + mqd->cp_hqd_pq_control); + + /* set the wb address whether it's enabled or not */ + WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_RPTR_REPORT_ADDR), + mqd->cp_hqd_pq_rptr_report_addr_lo); + WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_RPTR_REPORT_ADDR_HI), + mqd->cp_hqd_pq_rptr_report_addr_hi); + + /* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */ + WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR), + mqd->cp_hqd_pq_wptr_poll_addr_lo); + WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR_HI), + mqd->cp_hqd_pq_wptr_poll_addr_hi); + + /* enable the doorbell if requested */ + if (ring->use_doorbell) { + WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_MEC_DOORBELL_RANGE_LOWER), + (AMDGPU_DOORBELL64_KIQ *2) << 2); + WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_MEC_DOORBELL_RANGE_UPPER), + (AMDGPU_DOORBELL64_USERQUEUE_END * 2) << 2); + } + + WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL), + mqd->cp_hqd_pq_doorbell_control); + + /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */ + WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_LO), + mqd->cp_hqd_pq_wptr_lo); + WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_HI), + mqd->cp_hqd_pq_wptr_hi); + + /* set the vmid for the queue */ + WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_VMID), mqd->cp_hqd_vmid); + + WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PERSISTENT_STATE), + mqd->cp_hqd_persistent_state); + + /* activate the queue */ + WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_ACTIVE), + mqd->cp_hqd_active); + + if (ring->use_doorbell) + WREG32_FIELD15(GC, 0, CP_PQ_STATUS, DOORBELL_ENABLE, 1); + + return 0; +} + +static int gfx_v9_0_kiq_init_queue(struct amdgpu_ring *ring) +{ + struct amdgpu_device *adev = ring->adev; + struct amdgpu_kiq *kiq = &adev->gfx.kiq; + struct v9_mqd *mqd = ring->mqd_ptr; + bool is_kiq = (ring->funcs->type == AMDGPU_RING_TYPE_KIQ); + int mqd_idx = AMDGPU_MAX_COMPUTE_RINGS; + + if (is_kiq) { + gfx_v9_0_kiq_setting(&kiq->ring); + } else { + mqd_idx = ring - &adev->gfx.compute_ring[0]; + } + + if (!adev->gfx.in_reset) { + memset((void *)mqd, 0, sizeof(*mqd)); + mutex_lock(&adev->srbm_mutex); + soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0); + gfx_v9_0_mqd_init(ring); + if (is_kiq) + gfx_v9_0_kiq_init_register(ring); + soc15_grbm_select(adev, 0, 0, 0, 0); + mutex_unlock(&adev->srbm_mutex); + + } else { /* for GPU_RESET case */ + /* reset MQD to a clean status */ + + /* reset ring buffer */ + ring->wptr = 0; + + if (is_kiq) { + mutex_lock(&adev->srbm_mutex); + soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0); + gfx_v9_0_kiq_init_register(ring); + soc15_grbm_select(adev, 0, 0, 0, 0); + mutex_unlock(&adev->srbm_mutex); + } + } + + if (is_kiq) + gfx_v9_0_kiq_enable(ring); + else + gfx_v9_0_map_queue_enable(&kiq->ring, ring); + + return 0; +} + +static int gfx_v9_0_kiq_resume(struct amdgpu_device *adev) +{ + struct amdgpu_ring *ring = NULL; + int r = 0, i; + + gfx_v9_0_cp_compute_enable(adev, true); + + ring = &adev->gfx.kiq.ring; + + r = amdgpu_bo_reserve(ring->mqd_obj, false); + if (unlikely(r != 0)) + goto done; + + r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&ring->mqd_ptr); + if (!r) { + r = gfx_v9_0_kiq_init_queue(ring); + amdgpu_bo_kunmap(ring->mqd_obj); + ring->mqd_ptr = NULL; + } + amdgpu_bo_unreserve(ring->mqd_obj); + if (r) + goto done; + + for (i = 0; i < adev->gfx.num_compute_rings; i++) { + ring = &adev->gfx.compute_ring[i]; + + r = amdgpu_bo_reserve(ring->mqd_obj, false); + if (unlikely(r != 0)) + goto done; + r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&ring->mqd_ptr); + if (!r) { + r = gfx_v9_0_kiq_init_queue(ring); + amdgpu_bo_kunmap(ring->mqd_obj); + ring->mqd_ptr = NULL; + } + amdgpu_bo_unreserve(ring->mqd_obj); + if (r) + goto done; + } + +done: + return r; +} + +static int gfx_v9_0_cp_resume(struct amdgpu_device *adev) +{ + int r,i; + struct amdgpu_ring *ring; + + if (!(adev->flags & AMD_IS_APU)) + gfx_v9_0_enable_gui_idle_interrupt(adev, false); + + if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) { + /* legacy firmware loading */ + r = gfx_v9_0_cp_gfx_load_microcode(adev); + if (r) + return r; + + r = gfx_v9_0_cp_compute_load_microcode(adev); + if (r) + return r; + } + + r = gfx_v9_0_cp_gfx_resume(adev); + if (r) + return r; + + if (amdgpu_sriov_vf(adev)) + r = gfx_v9_0_kiq_resume(adev); + else + r = gfx_v9_0_cp_compute_resume(adev); + if (r) + return r; + + ring = &adev->gfx.gfx_ring[0]; + r = amdgpu_ring_test_ring(ring); + if (r) { + ring->ready = false; + return r; + } + for (i = 0; i < adev->gfx.num_compute_rings; i++) { + ring = &adev->gfx.compute_ring[i]; + + ring->ready = true; + r = amdgpu_ring_test_ring(ring); + if (r) + ring->ready = false; + } + + if (amdgpu_sriov_vf(adev)) { + ring = &adev->gfx.kiq.ring; + ring->ready = true; + r = amdgpu_ring_test_ring(ring); + if (r) + ring->ready = false; + } + + gfx_v9_0_enable_gui_idle_interrupt(adev, true); + + return 0; +} + +static void gfx_v9_0_cp_enable(struct amdgpu_device *adev, bool enable) +{ + gfx_v9_0_cp_gfx_enable(adev, enable); + gfx_v9_0_cp_compute_enable(adev, enable); +} + +static int gfx_v9_0_hw_init(void *handle) +{ + int r; + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + + gfx_v9_0_init_golden_registers(adev); + + gfx_v9_0_gpu_init(adev); + + r = gfx_v9_0_rlc_resume(adev); + if (r) + return r; + + r = gfx_v9_0_cp_resume(adev); + if (r) + return r; + + r = gfx_v9_0_ngg_en(adev); + if (r) + return r; + + return r; +} + +static int gfx_v9_0_hw_fini(void *handle) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + + amdgpu_irq_put(adev, &adev->gfx.priv_reg_irq, 0); + amdgpu_irq_put(adev, &adev->gfx.priv_inst_irq, 0); + if (amdgpu_sriov_vf(adev)) { + pr_debug("For SRIOV client, shouldn't do anything.\n"); + return 0; + } + gfx_v9_0_cp_enable(adev, false); + gfx_v9_0_rlc_stop(adev); + gfx_v9_0_cp_compute_fini(adev); + + return 0; +} + +static int gfx_v9_0_suspend(void *handle) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + + return gfx_v9_0_hw_fini(adev); +} + +static int gfx_v9_0_resume(void *handle) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + + return gfx_v9_0_hw_init(adev); +} + +static bool gfx_v9_0_is_idle(void *handle) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + + if (REG_GET_FIELD(RREG32(SOC15_REG_OFFSET(GC, 0, mmGRBM_STATUS)), + GRBM_STATUS, GUI_ACTIVE)) + return false; + else + return true; +} + +static int gfx_v9_0_wait_for_idle(void *handle) +{ + unsigned i; + u32 tmp; + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + + for (i = 0; i < adev->usec_timeout; i++) { + /* read MC_STATUS */ + tmp = RREG32(SOC15_REG_OFFSET(GC, 0, mmGRBM_STATUS)) & + GRBM_STATUS__GUI_ACTIVE_MASK; + + if (!REG_GET_FIELD(tmp, GRBM_STATUS, GUI_ACTIVE)) + return 0; + udelay(1); + } + return -ETIMEDOUT; +} + +static int gfx_v9_0_soft_reset(void *handle) +{ + u32 grbm_soft_reset = 0; + u32 tmp; + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + + /* GRBM_STATUS */ + tmp = RREG32(SOC15_REG_OFFSET(GC, 0, mmGRBM_STATUS)); + if (tmp & (GRBM_STATUS__PA_BUSY_MASK | GRBM_STATUS__SC_BUSY_MASK | + GRBM_STATUS__BCI_BUSY_MASK | GRBM_STATUS__SX_BUSY_MASK | + GRBM_STATUS__TA_BUSY_MASK | GRBM_STATUS__VGT_BUSY_MASK | + GRBM_STATUS__DB_BUSY_MASK | GRBM_STATUS__CB_BUSY_MASK | + GRBM_STATUS__GDS_BUSY_MASK | GRBM_STATUS__SPI_BUSY_MASK | + GRBM_STATUS__IA_BUSY_MASK | GRBM_STATUS__IA_BUSY_NO_DMA_MASK)) { + grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, + GRBM_SOFT_RESET, SOFT_RESET_CP, 1); + grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, + GRBM_SOFT_RESET, SOFT_RESET_GFX, 1); + } + + if (tmp & (GRBM_STATUS__CP_BUSY_MASK | GRBM_STATUS__CP_COHERENCY_BUSY_MASK)) { + grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, + GRBM_SOFT_RESET, SOFT_RESET_CP, 1); + } + + /* GRBM_STATUS2 */ + tmp = RREG32(SOC15_REG_OFFSET(GC, 0, mmGRBM_STATUS2)); + if (REG_GET_FIELD(tmp, GRBM_STATUS2, RLC_BUSY)) + grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, + GRBM_SOFT_RESET, SOFT_RESET_RLC, 1); + + + if (grbm_soft_reset) { + /* stop the rlc */ + gfx_v9_0_rlc_stop(adev); + + /* Disable GFX parsing/prefetching */ + gfx_v9_0_cp_gfx_enable(adev, false); + + /* Disable MEC parsing/prefetching */ + gfx_v9_0_cp_compute_enable(adev, false); + + if (grbm_soft_reset) { + tmp = RREG32(SOC15_REG_OFFSET(GC, 0, mmGRBM_SOFT_RESET)); + tmp |= grbm_soft_reset; + dev_info(adev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp); + WREG32(SOC15_REG_OFFSET(GC, 0, mmGRBM_SOFT_RESET), tmp); + tmp = RREG32(SOC15_REG_OFFSET(GC, 0, mmGRBM_SOFT_RESET)); + + udelay(50); + + tmp &= ~grbm_soft_reset; + WREG32(SOC15_REG_OFFSET(GC, 0, mmGRBM_SOFT_RESET), tmp); + tmp = RREG32(SOC15_REG_OFFSET(GC, 0, mmGRBM_SOFT_RESET)); + } + + /* Wait a little for things to settle down */ + udelay(50); + } + return 0; +} + +static uint64_t gfx_v9_0_get_gpu_clock_counter(struct amdgpu_device *adev) +{ + uint64_t clock; + + mutex_lock(&adev->gfx.gpu_clock_mutex); + WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_CAPTURE_GPU_CLOCK_COUNT), 1); + clock = (uint64_t)RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPU_CLOCK_COUNT_LSB)) | + ((uint64_t)RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPU_CLOCK_COUNT_MSB)) << 32ULL); + mutex_unlock(&adev->gfx.gpu_clock_mutex); + return clock; +} + +static void gfx_v9_0_ring_emit_gds_switch(struct amdgpu_ring *ring, + uint32_t vmid, + uint32_t gds_base, uint32_t gds_size, + uint32_t gws_base, uint32_t gws_size, + uint32_t oa_base, uint32_t oa_size) +{ + gds_base = gds_base >> AMDGPU_GDS_SHIFT; + gds_size = gds_size >> AMDGPU_GDS_SHIFT; + + gws_base = gws_base >> AMDGPU_GWS_SHIFT; + gws_size = gws_size >> AMDGPU_GWS_SHIFT; + + oa_base = oa_base >> AMDGPU_OA_SHIFT; + oa_size = oa_size >> AMDGPU_OA_SHIFT; + + /* GDS Base */ + gfx_v9_0_write_data_to_reg(ring, 0, false, + amdgpu_gds_reg_offset[vmid].mem_base, + gds_base); + + /* GDS Size */ + gfx_v9_0_write_data_to_reg(ring, 0, false, + amdgpu_gds_reg_offset[vmid].mem_size, + gds_size); + + /* GWS */ + gfx_v9_0_write_data_to_reg(ring, 0, false, + amdgpu_gds_reg_offset[vmid].gws, + gws_size << GDS_GWS_VMID0__SIZE__SHIFT | gws_base); + + /* OA */ + gfx_v9_0_write_data_to_reg(ring, 0, false, + amdgpu_gds_reg_offset[vmid].oa, + (1 << (oa_size + oa_base)) - (1 << oa_base)); +} + +static int gfx_v9_0_early_init(void *handle) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + + adev->gfx.num_gfx_rings = GFX9_NUM_GFX_RINGS; + adev->gfx.num_compute_rings = GFX9_NUM_COMPUTE_RINGS; + gfx_v9_0_set_ring_funcs(adev); + gfx_v9_0_set_irq_funcs(adev); + gfx_v9_0_set_gds_init(adev); + gfx_v9_0_set_rlc_funcs(adev); + + return 0; +} + +static int gfx_v9_0_late_init(void *handle) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + int r; + + r = amdgpu_irq_get(adev, &adev->gfx.priv_reg_irq, 0); + if (r) + return r; + + r = amdgpu_irq_get(adev, &adev->gfx.priv_inst_irq, 0); + if (r) + return r; + + return 0; +} + +static void gfx_v9_0_enter_rlc_safe_mode(struct amdgpu_device *adev) +{ + uint32_t rlc_setting, data; + unsigned i; + + if (adev->gfx.rlc.in_safe_mode) + return; + + /* if RLC is not enabled, do nothing */ + rlc_setting = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_CNTL)); + if (!(rlc_setting & RLC_CNTL__RLC_ENABLE_F32_MASK)) + return; + + if (adev->cg_flags & + (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_MGCG | + AMD_CG_SUPPORT_GFX_3D_CGCG)) { + data = RLC_SAFE_MODE__CMD_MASK; + data |= (1 << RLC_SAFE_MODE__MESSAGE__SHIFT); + WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SAFE_MODE), data); + + /* wait for RLC_SAFE_MODE */ + for (i = 0; i < adev->usec_timeout; i++) { + if (!REG_GET_FIELD(SOC15_REG_OFFSET(GC, 0, mmRLC_SAFE_MODE), RLC_SAFE_MODE, CMD)) + break; + udelay(1); + } + adev->gfx.rlc.in_safe_mode = true; + } +} + +static void gfx_v9_0_exit_rlc_safe_mode(struct amdgpu_device *adev) +{ + uint32_t rlc_setting, data; + + if (!adev->gfx.rlc.in_safe_mode) + return; + + /* if RLC is not enabled, do nothing */ + rlc_setting = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_CNTL)); + if (!(rlc_setting & RLC_CNTL__RLC_ENABLE_F32_MASK)) + return; + + if (adev->cg_flags & + (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_MGCG)) { + /* + * Try to exit safe mode only if it is already in safe + * mode. + */ + data = RLC_SAFE_MODE__CMD_MASK; + WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SAFE_MODE), data); + adev->gfx.rlc.in_safe_mode = false; + } +} + +static void gfx_v9_0_update_medium_grain_clock_gating(struct amdgpu_device *adev, + bool enable) +{ + uint32_t data, def; + + /* It is disabled by HW by default */ + if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG)) { + /* 1 - RLC_CGTT_MGCG_OVERRIDE */ + def = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE)); + data &= ~(RLC_CGTT_MGCG_OVERRIDE__CPF_CGTT_SCLK_OVERRIDE_MASK | + RLC_CGTT_MGCG_OVERRIDE__GRBM_CGTT_SCLK_OVERRIDE_MASK | + RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK | + RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGLS_OVERRIDE_MASK); + + /* only for Vega10 & Raven1 */ + data |= RLC_CGTT_MGCG_OVERRIDE__RLC_CGTT_SCLK_OVERRIDE_MASK; + + if (def != data) + WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE), data); + + /* MGLS is a global flag to control all MGLS in GFX */ + if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) { + /* 2 - RLC memory Light sleep */ + if (adev->cg_flags & AMD_CG_SUPPORT_GFX_RLC_LS) { + def = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_MEM_SLP_CNTL)); + data |= RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK; + if (def != data) + WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_MEM_SLP_CNTL), data); + } + /* 3 - CP memory Light sleep */ + if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CP_LS) { + def = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_MEM_SLP_CNTL)); + data |= CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK; + if (def != data) + WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_MEM_SLP_CNTL), data); + } + } + } else { + /* 1 - MGCG_OVERRIDE */ + def = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE)); + data |= (RLC_CGTT_MGCG_OVERRIDE__CPF_CGTT_SCLK_OVERRIDE_MASK | + RLC_CGTT_MGCG_OVERRIDE__RLC_CGTT_SCLK_OVERRIDE_MASK | + RLC_CGTT_MGCG_OVERRIDE__GRBM_CGTT_SCLK_OVERRIDE_MASK | + RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK | + RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGLS_OVERRIDE_MASK); + if (def != data) + WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE), data); + + /* 2 - disable MGLS in RLC */ + data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_MEM_SLP_CNTL)); + if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK) { + data &= ~RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK; + WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_MEM_SLP_CNTL), data); + } + + /* 3 - disable MGLS in CP */ + data = RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_MEM_SLP_CNTL)); + if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK) { + data &= ~CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK; + WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_MEM_SLP_CNTL), data); + } + } +} + +static void gfx_v9_0_update_3d_clock_gating(struct amdgpu_device *adev, + bool enable) +{ + uint32_t data, def; + + adev->gfx.rlc.funcs->enter_safe_mode(adev); + + /* Enable 3D CGCG/CGLS */ + if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGCG)) { + /* write cmd to clear cgcg/cgls ov */ + def = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE)); + /* unset CGCG override */ + data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_GFX3D_CG_OVERRIDE_MASK; + /* update CGCG and CGLS override bits */ + if (def != data) + WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE), data); + /* enable 3Dcgcg FSM(0x0020003f) */ + def = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D)); + data = (0x2000 << RLC_CGCG_CGLS_CTRL_3D__CGCG_GFX_IDLE_THRESHOLD__SHIFT) | + RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK; + if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGLS) + data |= (0x000F << RLC_CGCG_CGLS_CTRL_3D__CGLS_REP_COMPANSAT_DELAY__SHIFT) | + RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK; + if (def != data) + WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D), data); + + /* set IDLE_POLL_COUNT(0x00900100) */ + def = RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_RB_WPTR_POLL_CNTL)); + data = (0x0100 << CP_RB_WPTR_POLL_CNTL__POLL_FREQUENCY__SHIFT) | + (0x0090 << CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT__SHIFT); + if (def != data) + WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_RB_WPTR_POLL_CNTL), data); + } else { + /* Disable CGCG/CGLS */ + def = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D)); + /* disable cgcg, cgls should be disabled */ + data &= ~(RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK | + RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK); + /* disable cgcg and cgls in FSM */ + if (def != data) + WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D), data); + } + + adev->gfx.rlc.funcs->exit_safe_mode(adev); +} + +static void gfx_v9_0_update_coarse_grain_clock_gating(struct amdgpu_device *adev, + bool enable) +{ + uint32_t def, data; + + adev->gfx.rlc.funcs->enter_safe_mode(adev); + + if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG)) { + def = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE)); + /* unset CGCG override */ + data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_CGCG_OVERRIDE_MASK; + if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) + data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_CGLS_OVERRIDE_MASK; + else + data |= RLC_CGTT_MGCG_OVERRIDE__GFXIP_CGLS_OVERRIDE_MASK; + /* update CGCG and CGLS override bits */ + if (def != data) + WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE), data); + + /* enable cgcg FSM(0x0020003F) */ + def = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_CGCG_CGLS_CTRL)); + data = (0x2000 << RLC_CGCG_CGLS_CTRL__CGCG_GFX_IDLE_THRESHOLD__SHIFT) | + RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK; + if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) + data |= (0x000F << RLC_CGCG_CGLS_CTRL__CGLS_REP_COMPANSAT_DELAY__SHIFT) | + RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK; + if (def != data) + WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_CGCG_CGLS_CTRL), data); + + /* set IDLE_POLL_COUNT(0x00900100) */ + def = RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_RB_WPTR_POLL_CNTL)); + data = (0x0100 << CP_RB_WPTR_POLL_CNTL__POLL_FREQUENCY__SHIFT) | + (0x0090 << CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT__SHIFT); + if (def != data) + WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_RB_WPTR_POLL_CNTL), data); + } else { + def = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_CGCG_CGLS_CTRL)); + /* reset CGCG/CGLS bits */ + data &= ~(RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK | RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK); + /* disable cgcg and cgls in FSM */ + if (def != data) + WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_CGCG_CGLS_CTRL), data); + } + + adev->gfx.rlc.funcs->exit_safe_mode(adev); +} + +static int gfx_v9_0_update_gfx_clock_gating(struct amdgpu_device *adev, + bool enable) +{ + if (enable) { + /* CGCG/CGLS should be enabled after MGCG/MGLS + * === MGCG + MGLS === + */ + gfx_v9_0_update_medium_grain_clock_gating(adev, enable); + /* === CGCG /CGLS for GFX 3D Only === */ + gfx_v9_0_update_3d_clock_gating(adev, enable); + /* === CGCG + CGLS === */ + gfx_v9_0_update_coarse_grain_clock_gating(adev, enable); + } else { + /* CGCG/CGLS should be disabled before MGCG/MGLS + * === CGCG + CGLS === + */ + gfx_v9_0_update_coarse_grain_clock_gating(adev, enable); + /* === CGCG /CGLS for GFX 3D Only === */ + gfx_v9_0_update_3d_clock_gating(adev, enable); + /* === MGCG + MGLS === */ + gfx_v9_0_update_medium_grain_clock_gating(adev, enable); + } + return 0; +} + +static const struct amdgpu_rlc_funcs gfx_v9_0_rlc_funcs = { + .enter_safe_mode = gfx_v9_0_enter_rlc_safe_mode, + .exit_safe_mode = gfx_v9_0_exit_rlc_safe_mode +}; + +static int gfx_v9_0_set_powergating_state(void *handle, + enum amd_powergating_state state) +{ + return 0; +} + +static int gfx_v9_0_set_clockgating_state(void *handle, + enum amd_clockgating_state state) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + + switch (adev->asic_type) { + case CHIP_VEGA10: + gfx_v9_0_update_gfx_clock_gating(adev, + state == AMD_CG_STATE_GATE ? true : false); + break; + default: + break; + } + return 0; +} + +static void gfx_v9_0_get_clockgating_state(void *handle, u32 *flags) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + int data; + + if (amdgpu_sriov_vf(adev)) + *flags = 0; + + /* AMD_CG_SUPPORT_GFX_MGCG */ + data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE)); + if (!(data & RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK)) + *flags |= AMD_CG_SUPPORT_GFX_MGCG; + + /* AMD_CG_SUPPORT_GFX_CGCG */ + data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_CGCG_CGLS_CTRL)); + if (data & RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK) + *flags |= AMD_CG_SUPPORT_GFX_CGCG; + + /* AMD_CG_SUPPORT_GFX_CGLS */ + if (data & RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK) + *flags |= AMD_CG_SUPPORT_GFX_CGLS; + + /* AMD_CG_SUPPORT_GFX_RLC_LS */ + data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_MEM_SLP_CNTL)); + if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK) + *flags |= AMD_CG_SUPPORT_GFX_RLC_LS | AMD_CG_SUPPORT_GFX_MGLS; + + /* AMD_CG_SUPPORT_GFX_CP_LS */ + data = RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_MEM_SLP_CNTL)); + if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK) + *flags |= AMD_CG_SUPPORT_GFX_CP_LS | AMD_CG_SUPPORT_GFX_MGLS; + + /* AMD_CG_SUPPORT_GFX_3D_CGCG */ + data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D)); + if (data & RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK) + *flags |= AMD_CG_SUPPORT_GFX_3D_CGCG; + + /* AMD_CG_SUPPORT_GFX_3D_CGLS */ + if (data & RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK) + *flags |= AMD_CG_SUPPORT_GFX_3D_CGLS; +} + +static u64 gfx_v9_0_ring_get_rptr_gfx(struct amdgpu_ring *ring) +{ + return ring->adev->wb.wb[ring->rptr_offs]; /* gfx9 is 32bit rptr*/ +} + +static u64 gfx_v9_0_ring_get_wptr_gfx(struct amdgpu_ring *ring) +{ + struct amdgpu_device *adev = ring->adev; + u64 wptr; + + /* XXX check if swapping is necessary on BE */ + if (ring->use_doorbell) { + wptr = atomic64_read((atomic64_t *)&adev->wb.wb[ring->wptr_offs]); + } else { + wptr = RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_RB0_WPTR)); + wptr += (u64)RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_RB0_WPTR_HI)) << 32; + } + + return wptr; +} + +static void gfx_v9_0_ring_set_wptr_gfx(struct amdgpu_ring *ring) +{ + struct amdgpu_device *adev = ring->adev; + + if (ring->use_doorbell) { + /* XXX check if swapping is necessary on BE */ + atomic64_set((atomic64_t*)&adev->wb.wb[ring->wptr_offs], ring->wptr); + WDOORBELL64(ring->doorbell_index, ring->wptr); + } else { + WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_RB0_WPTR), lower_32_bits(ring->wptr)); + WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_RB0_WPTR_HI), upper_32_bits(ring->wptr)); + } +} + +static void gfx_v9_0_ring_emit_hdp_flush(struct amdgpu_ring *ring) +{ + u32 ref_and_mask, reg_mem_engine; + struct nbio_hdp_flush_reg *nbio_hf_reg; + + if (ring->adev->asic_type == CHIP_VEGA10) + nbio_hf_reg = &nbio_v6_1_hdp_flush_reg; + + if (ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE) { + switch (ring->me) { + case 1: + ref_and_mask = nbio_hf_reg->ref_and_mask_cp2 << ring->pipe; + break; + case 2: + ref_and_mask = nbio_hf_reg->ref_and_mask_cp6 << ring->pipe; + break; + default: + return; + } + reg_mem_engine = 0; + } else { + ref_and_mask = nbio_hf_reg->ref_and_mask_cp0; + reg_mem_engine = 1; /* pfp */ + } + + gfx_v9_0_wait_reg_mem(ring, reg_mem_engine, 0, 1, + nbio_hf_reg->hdp_flush_req_offset, + nbio_hf_reg->hdp_flush_done_offset, + ref_and_mask, ref_and_mask, 0x20); +} + +static void gfx_v9_0_ring_emit_hdp_invalidate(struct amdgpu_ring *ring) +{ + gfx_v9_0_write_data_to_reg(ring, 0, true, + SOC15_REG_OFFSET(HDP, 0, mmHDP_DEBUG0), 1); +} + +static void gfx_v9_0_ring_emit_ib_gfx(struct amdgpu_ring *ring, + struct amdgpu_ib *ib, + unsigned vm_id, bool ctx_switch) +{ + u32 header, control = 0; + + if (ib->flags & AMDGPU_IB_FLAG_CE) + header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2); + else + header = PACKET3(PACKET3_INDIRECT_BUFFER, 2); + + control |= ib->length_dw | (vm_id << 24); + + if (amdgpu_sriov_vf(ring->adev) && (ib->flags & AMDGPU_IB_FLAG_PREEMPT)) + control |= INDIRECT_BUFFER_PRE_ENB(1); + + amdgpu_ring_write(ring, header); + BUG_ON(ib->gpu_addr & 0x3); /* Dword align */ + amdgpu_ring_write(ring, +#ifdef __BIG_ENDIAN + (2 << 0) | +#endif + lower_32_bits(ib->gpu_addr)); + amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr)); + amdgpu_ring_write(ring, control); +} + +#define INDIRECT_BUFFER_VALID (1 << 23) + +static void gfx_v9_0_ring_emit_ib_compute(struct amdgpu_ring *ring, + struct amdgpu_ib *ib, + unsigned vm_id, bool ctx_switch) +{ + u32 control = INDIRECT_BUFFER_VALID | ib->length_dw | (vm_id << 24); + + amdgpu_ring_write(ring, PACKET3(PACKET3_INDIRECT_BUFFER, 2)); + BUG_ON(ib->gpu_addr & 0x3); /* Dword align */ + amdgpu_ring_write(ring, +#ifdef __BIG_ENDIAN + (2 << 0) | +#endif + lower_32_bits(ib->gpu_addr)); + amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr)); + amdgpu_ring_write(ring, control); +} + +static void gfx_v9_0_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, + u64 seq, unsigned flags) +{ + bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT; + bool int_sel = flags & AMDGPU_FENCE_FLAG_INT; + + /* RELEASE_MEM - flush caches, send int */ + amdgpu_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 6)); + amdgpu_ring_write(ring, (EOP_TCL1_ACTION_EN | + EOP_TC_ACTION_EN | + EOP_TC_WB_ACTION_EN | + EOP_TC_MD_ACTION_EN | + EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) | + EVENT_INDEX(5))); + amdgpu_ring_write(ring, DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0)); + + /* + * the address should be Qword aligned if 64bit write, Dword + * aligned if only send 32bit data low (discard data high) + */ + if (write64bit) + BUG_ON(addr & 0x7); + else + BUG_ON(addr & 0x3); + amdgpu_ring_write(ring, lower_32_bits(addr)); + amdgpu_ring_write(ring, upper_32_bits(addr)); + amdgpu_ring_write(ring, lower_32_bits(seq)); + amdgpu_ring_write(ring, upper_32_bits(seq)); + amdgpu_ring_write(ring, 0); +} + +static void gfx_v9_0_ring_emit_pipeline_sync(struct amdgpu_ring *ring) +{ + int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX); + uint32_t seq = ring->fence_drv.sync_seq; + uint64_t addr = ring->fence_drv.gpu_addr; + + gfx_v9_0_wait_reg_mem(ring, usepfp, 1, 0, + lower_32_bits(addr), upper_32_bits(addr), + seq, 0xffffffff, 4); +} + +static void gfx_v9_0_ring_emit_vm_flush(struct amdgpu_ring *ring, + unsigned vm_id, uint64_t pd_addr) +{ + int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX); + uint32_t req = ring->adev->gart.gart_funcs->get_invalidate_req(vm_id); + unsigned eng = ring->idx; + unsigned i; + + pd_addr = pd_addr | 0x1; /* valid bit */ + /* now only use physical base address of PDE and valid */ + BUG_ON(pd_addr & 0xFFFF00000000003EULL); + + for (i = 0; i < AMDGPU_MAX_VMHUBS; ++i) { + struct amdgpu_vmhub *hub = &ring->adev->vmhub[i]; + + gfx_v9_0_write_data_to_reg(ring, usepfp, true, + hub->ctx0_ptb_addr_lo32 + + (2 * vm_id), + lower_32_bits(pd_addr)); + + gfx_v9_0_write_data_to_reg(ring, usepfp, true, + hub->ctx0_ptb_addr_hi32 + + (2 * vm_id), + upper_32_bits(pd_addr)); + + gfx_v9_0_write_data_to_reg(ring, usepfp, true, + hub->vm_inv_eng0_req + eng, req); + + /* wait for the invalidate to complete */ + gfx_v9_0_wait_reg_mem(ring, 0, 0, 0, hub->vm_inv_eng0_ack + + eng, 0, 1 << vm_id, 1 << vm_id, 0x20); + } + + /* compute doesn't have PFP */ + if (usepfp) { + /* sync PFP to ME, otherwise we might get invalid PFP reads */ + amdgpu_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0)); + amdgpu_ring_write(ring, 0x0); + } +} + +static u64 gfx_v9_0_ring_get_rptr_compute(struct amdgpu_ring *ring) +{ + return ring->adev->wb.wb[ring->rptr_offs]; /* gfx9 hardware is 32bit rptr */ +} + +static u64 gfx_v9_0_ring_get_wptr_compute(struct amdgpu_ring *ring) +{ + u64 wptr; + + /* XXX check if swapping is necessary on BE */ + if (ring->use_doorbell) + wptr = atomic64_read((atomic64_t *)&ring->adev->wb.wb[ring->wptr_offs]); + else + BUG(); + return wptr; +} + +static void gfx_v9_0_ring_set_wptr_compute(struct amdgpu_ring *ring) +{ + struct amdgpu_device *adev = ring->adev; + + /* XXX check if swapping is necessary on BE */ + if (ring->use_doorbell) { + atomic64_set((atomic64_t*)&adev->wb.wb[ring->wptr_offs], ring->wptr); + WDOORBELL64(ring->doorbell_index, ring->wptr); + } else{ + BUG(); /* only DOORBELL method supported on gfx9 now */ + } +} + +static void gfx_v9_0_ring_emit_fence_kiq(struct amdgpu_ring *ring, u64 addr, + u64 seq, unsigned int flags) +{ + /* we only allocate 32bit for each seq wb address */ + BUG_ON(flags & AMDGPU_FENCE_FLAG_64BIT); + + /* write fence seq to the "addr" */ + amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); + amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) | + WRITE_DATA_DST_SEL(5) | WR_CONFIRM)); + amdgpu_ring_write(ring, lower_32_bits(addr)); + amdgpu_ring_write(ring, upper_32_bits(addr)); + amdgpu_ring_write(ring, lower_32_bits(seq)); + + if (flags & AMDGPU_FENCE_FLAG_INT) { + /* set register to trigger INT */ + amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); + amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) | + WRITE_DATA_DST_SEL(0) | WR_CONFIRM)); + amdgpu_ring_write(ring, SOC15_REG_OFFSET(GC, 0, mmCPC_INT_STATUS)); + amdgpu_ring_write(ring, 0); + amdgpu_ring_write(ring, 0x20000000); /* src_id is 178 */ + } +} + +static void gfx_v9_ring_emit_sb(struct amdgpu_ring *ring) +{ + amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0)); + amdgpu_ring_write(ring, 0); +} + +static void gfx_v9_0_ring_emit_ce_meta(struct amdgpu_ring *ring) +{ + static struct v9_ce_ib_state ce_payload = {0}; + uint64_t csa_addr; + int cnt; + + cnt = (sizeof(ce_payload) >> 2) + 4 - 2; + csa_addr = AMDGPU_VA_RESERVED_SIZE - 2 * 4096; + + amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt)); + amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(2) | + WRITE_DATA_DST_SEL(8) | + WR_CONFIRM) | + WRITE_DATA_CACHE_POLICY(0)); + amdgpu_ring_write(ring, lower_32_bits(csa_addr + offsetof(struct v9_gfx_meta_data, ce_payload))); + amdgpu_ring_write(ring, upper_32_bits(csa_addr + offsetof(struct v9_gfx_meta_data, ce_payload))); + amdgpu_ring_write_multiple(ring, (void *)&ce_payload, sizeof(ce_payload) >> 2); +} + +static void gfx_v9_0_ring_emit_de_meta(struct amdgpu_ring *ring) +{ + static struct v9_de_ib_state de_payload = {0}; + uint64_t csa_addr, gds_addr; + int cnt; + + csa_addr = AMDGPU_VA_RESERVED_SIZE - 2 * 4096; + gds_addr = csa_addr + 4096; + de_payload.gds_backup_addrlo = lower_32_bits(gds_addr); + de_payload.gds_backup_addrhi = upper_32_bits(gds_addr); + + cnt = (sizeof(de_payload) >> 2) + 4 - 2; + amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt)); + amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(1) | + WRITE_DATA_DST_SEL(8) | + WR_CONFIRM) | + WRITE_DATA_CACHE_POLICY(0)); + amdgpu_ring_write(ring, lower_32_bits(csa_addr + offsetof(struct v9_gfx_meta_data, de_payload))); + amdgpu_ring_write(ring, upper_32_bits(csa_addr + offsetof(struct v9_gfx_meta_data, de_payload))); + amdgpu_ring_write_multiple(ring, (void *)&de_payload, sizeof(de_payload) >> 2); +} + +static void gfx_v9_ring_emit_cntxcntl(struct amdgpu_ring *ring, uint32_t flags) +{ + uint32_t dw2 = 0; + + if (amdgpu_sriov_vf(ring->adev)) + gfx_v9_0_ring_emit_ce_meta(ring); + + dw2 |= 0x80000000; /* set load_enable otherwise this package is just NOPs */ + if (flags & AMDGPU_HAVE_CTX_SWITCH) { + /* set load_global_config & load_global_uconfig */ + dw2 |= 0x8001; + /* set load_cs_sh_regs */ + dw2 |= 0x01000000; + /* set load_per_context_state & load_gfx_sh_regs for GFX */ + dw2 |= 0x10002; + + /* set load_ce_ram if preamble presented */ + if (AMDGPU_PREAMBLE_IB_PRESENT & flags) + dw2 |= 0x10000000; + } else { + /* still load_ce_ram if this is the first time preamble presented + * although there is no context switch happens. + */ + if (AMDGPU_PREAMBLE_IB_PRESENT_FIRST & flags) + dw2 |= 0x10000000; + } + + amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1)); + amdgpu_ring_write(ring, dw2); + amdgpu_ring_write(ring, 0); + + if (amdgpu_sriov_vf(ring->adev)) + gfx_v9_0_ring_emit_de_meta(ring); +} + +static unsigned gfx_v9_0_ring_emit_init_cond_exec(struct amdgpu_ring *ring) +{ + unsigned ret; + amdgpu_ring_write(ring, PACKET3(PACKET3_COND_EXEC, 3)); + amdgpu_ring_write(ring, lower_32_bits(ring->cond_exe_gpu_addr)); + amdgpu_ring_write(ring, upper_32_bits(ring->cond_exe_gpu_addr)); + amdgpu_ring_write(ring, 0); /* discard following DWs if *cond_exec_gpu_addr==0 */ + ret = ring->wptr & ring->buf_mask; + amdgpu_ring_write(ring, 0x55aa55aa); /* patch dummy value later */ + return ret; +} + +static void gfx_v9_0_ring_emit_patch_cond_exec(struct amdgpu_ring *ring, unsigned offset) +{ + unsigned cur; + BUG_ON(offset > ring->buf_mask); + BUG_ON(ring->ring[offset] != 0x55aa55aa); + + cur = (ring->wptr & ring->buf_mask) - 1; + if (likely(cur > offset)) + ring->ring[offset] = cur - offset; + else + ring->ring[offset] = (ring->ring_size>>2) - offset + cur; +} + +static void gfx_v9_0_ring_emit_rreg(struct amdgpu_ring *ring, uint32_t reg) +{ + struct amdgpu_device *adev = ring->adev; + + amdgpu_ring_write(ring, PACKET3(PACKET3_COPY_DATA, 4)); + amdgpu_ring_write(ring, 0 | /* src: register*/ + (5 << 8) | /* dst: memory */ + (1 << 20)); /* write confirm */ + amdgpu_ring_write(ring, reg); + amdgpu_ring_write(ring, 0); + amdgpu_ring_write(ring, lower_32_bits(adev->wb.gpu_addr + + adev->virt.reg_val_offs * 4)); + amdgpu_ring_write(ring, upper_32_bits(adev->wb.gpu_addr + + adev->virt.reg_val_offs * 4)); +} + +static void gfx_v9_0_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t reg, + uint32_t val) +{ + amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); + amdgpu_ring_write(ring, (1 << 16)); /* no inc addr */ + amdgpu_ring_write(ring, reg); + amdgpu_ring_write(ring, 0); + amdgpu_ring_write(ring, val); +} + +static void gfx_v9_0_set_gfx_eop_interrupt_state(struct amdgpu_device *adev, + enum amdgpu_interrupt_state state) +{ + switch (state) { + case AMDGPU_IRQ_STATE_DISABLE: + case AMDGPU_IRQ_STATE_ENABLE: + WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0, + TIME_STAMP_INT_ENABLE, + state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0); + break; + default: + break; + } +} + +static void gfx_v9_0_set_compute_eop_interrupt_state(struct amdgpu_device *adev, + int me, int pipe, + enum amdgpu_interrupt_state state) +{ + u32 mec_int_cntl, mec_int_cntl_reg; + + /* + * amdgpu controls only pipe 0 of MEC1. That's why this function only + * handles the setting of interrupts for this specific pipe. All other + * pipes' interrupts are set by amdkfd. + */ + + if (me == 1) { + switch (pipe) { + case 0: + mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE0_INT_CNTL); + break; + default: + DRM_DEBUG("invalid pipe %d\n", pipe); + return; + } + } else { + DRM_DEBUG("invalid me %d\n", me); + return; + } + + switch (state) { + case AMDGPU_IRQ_STATE_DISABLE: + mec_int_cntl = RREG32(mec_int_cntl_reg); + mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL, + TIME_STAMP_INT_ENABLE, 0); + WREG32(mec_int_cntl_reg, mec_int_cntl); + break; + case AMDGPU_IRQ_STATE_ENABLE: + mec_int_cntl = RREG32(mec_int_cntl_reg); + mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL, + TIME_STAMP_INT_ENABLE, 1); + WREG32(mec_int_cntl_reg, mec_int_cntl); + break; + default: + break; + } +} + +static int gfx_v9_0_set_priv_reg_fault_state(struct amdgpu_device *adev, + struct amdgpu_irq_src *source, + unsigned type, + enum amdgpu_interrupt_state state) +{ + switch (state) { + case AMDGPU_IRQ_STATE_DISABLE: + case AMDGPU_IRQ_STATE_ENABLE: + WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0, + PRIV_REG_INT_ENABLE, + state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0); + break; + default: + break; + } + + return 0; +} + +static int gfx_v9_0_set_priv_inst_fault_state(struct amdgpu_device *adev, + struct amdgpu_irq_src *source, + unsigned type, + enum amdgpu_interrupt_state state) +{ + switch (state) { + case AMDGPU_IRQ_STATE_DISABLE: + case AMDGPU_IRQ_STATE_ENABLE: + WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0, + PRIV_INSTR_INT_ENABLE, + state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0); + default: + break; + } + + return 0; +} + +static int gfx_v9_0_set_eop_interrupt_state(struct amdgpu_device *adev, + struct amdgpu_irq_src *src, + unsigned type, + enum amdgpu_interrupt_state state) +{ + switch (type) { + case AMDGPU_CP_IRQ_GFX_EOP: + gfx_v9_0_set_gfx_eop_interrupt_state(adev, state); + break; + case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP: + gfx_v9_0_set_compute_eop_interrupt_state(adev, 1, 0, state); + break; + case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE1_EOP: + gfx_v9_0_set_compute_eop_interrupt_state(adev, 1, 1, state); + break; + case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE2_EOP: + gfx_v9_0_set_compute_eop_interrupt_state(adev, 1, 2, state); + break; + case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE3_EOP: + gfx_v9_0_set_compute_eop_interrupt_state(adev, 1, 3, state); + break; + case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE0_EOP: + gfx_v9_0_set_compute_eop_interrupt_state(adev, 2, 0, state); + break; + case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE1_EOP: + gfx_v9_0_set_compute_eop_interrupt_state(adev, 2, 1, state); + break; + case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE2_EOP: + gfx_v9_0_set_compute_eop_interrupt_state(adev, 2, 2, state); + break; + case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE3_EOP: + gfx_v9_0_set_compute_eop_interrupt_state(adev, 2, 3, state); + break; + default: + break; + } + return 0; +} + +static int gfx_v9_0_eop_irq(struct amdgpu_device *adev, + struct amdgpu_irq_src *source, + struct amdgpu_iv_entry *entry) +{ + int i; + u8 me_id, pipe_id, queue_id; + struct amdgpu_ring *ring; + + DRM_DEBUG("IH: CP EOP\n"); + me_id = (entry->ring_id & 0x0c) >> 2; + pipe_id = (entry->ring_id & 0x03) >> 0; + queue_id = (entry->ring_id & 0x70) >> 4; + + switch (me_id) { + case 0: + amdgpu_fence_process(&adev->gfx.gfx_ring[0]); + break; + case 1: + case 2: + for (i = 0; i < adev->gfx.num_compute_rings; i++) { + ring = &adev->gfx.compute_ring[i]; + /* Per-queue interrupt is supported for MEC starting from VI. + * The interrupt can only be enabled/disabled per pipe instead of per queue. + */ + if ((ring->me == me_id) && (ring->pipe == pipe_id) && (ring->queue == queue_id)) + amdgpu_fence_process(ring); + } + break; + } + return 0; +} + +static int gfx_v9_0_priv_reg_irq(struct amdgpu_device *adev, + struct amdgpu_irq_src *source, + struct amdgpu_iv_entry *entry) +{ + DRM_ERROR("Illegal register access in command stream\n"); + schedule_work(&adev->reset_work); + return 0; +} + +static int gfx_v9_0_priv_inst_irq(struct amdgpu_device *adev, + struct amdgpu_irq_src *source, + struct amdgpu_iv_entry *entry) +{ + DRM_ERROR("Illegal instruction in command stream\n"); + schedule_work(&adev->reset_work); + return 0; +} + +static int gfx_v9_0_kiq_set_interrupt_state(struct amdgpu_device *adev, + struct amdgpu_irq_src *src, + unsigned int type, + enum amdgpu_interrupt_state state) +{ + uint32_t tmp, target; + struct amdgpu_ring *ring = (struct amdgpu_ring *)src->data; + + BUG_ON(!ring || (ring->funcs->type != AMDGPU_RING_TYPE_KIQ)); + + if (ring->me == 1) + target = SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE0_INT_CNTL); + else + target = SOC15_REG_OFFSET(GC, 0, mmCP_ME2_PIPE0_INT_CNTL); + target += ring->pipe; + + switch (type) { + case AMDGPU_CP_KIQ_IRQ_DRIVER0: + if (state == AMDGPU_IRQ_STATE_DISABLE) { + tmp = RREG32(SOC15_REG_OFFSET(GC, 0, mmCPC_INT_CNTL)); + tmp = REG_SET_FIELD(tmp, CPC_INT_CNTL, + GENERIC2_INT_ENABLE, 0); + WREG32(SOC15_REG_OFFSET(GC, 0, mmCPC_INT_CNTL), tmp); + + tmp = RREG32(target); + tmp = REG_SET_FIELD(tmp, CP_ME2_PIPE0_INT_CNTL, + GENERIC2_INT_ENABLE, 0); + WREG32(target, tmp); + } else { + tmp = RREG32(SOC15_REG_OFFSET(GC, 0, mmCPC_INT_CNTL)); + tmp = REG_SET_FIELD(tmp, CPC_INT_CNTL, + GENERIC2_INT_ENABLE, 1); + WREG32(SOC15_REG_OFFSET(GC, 0, mmCPC_INT_CNTL), tmp); + + tmp = RREG32(target); + tmp = REG_SET_FIELD(tmp, CP_ME2_PIPE0_INT_CNTL, + GENERIC2_INT_ENABLE, 1); + WREG32(target, tmp); + } + break; + default: + BUG(); /* kiq only support GENERIC2_INT now */ + break; + } + return 0; +} + +static int gfx_v9_0_kiq_irq(struct amdgpu_device *adev, + struct amdgpu_irq_src *source, + struct amdgpu_iv_entry *entry) +{ + u8 me_id, pipe_id, queue_id; + struct amdgpu_ring *ring = (struct amdgpu_ring *)source->data; + + BUG_ON(!ring || (ring->funcs->type != AMDGPU_RING_TYPE_KIQ)); + + me_id = (entry->ring_id & 0x0c) >> 2; + pipe_id = (entry->ring_id & 0x03) >> 0; + queue_id = (entry->ring_id & 0x70) >> 4; + DRM_DEBUG("IH: CPC GENERIC2_INT, me:%d, pipe:%d, queue:%d\n", + me_id, pipe_id, queue_id); + + amdgpu_fence_process(ring); + return 0; +} + +const struct amd_ip_funcs gfx_v9_0_ip_funcs = { + .name = "gfx_v9_0", + .early_init = gfx_v9_0_early_init, + .late_init = gfx_v9_0_late_init, + .sw_init = gfx_v9_0_sw_init, + .sw_fini = gfx_v9_0_sw_fini, + .hw_init = gfx_v9_0_hw_init, + .hw_fini = gfx_v9_0_hw_fini, + .suspend = gfx_v9_0_suspend, + .resume = gfx_v9_0_resume, + .is_idle = gfx_v9_0_is_idle, + .wait_for_idle = gfx_v9_0_wait_for_idle, + .soft_reset = gfx_v9_0_soft_reset, + .set_clockgating_state = gfx_v9_0_set_clockgating_state, + .set_powergating_state = gfx_v9_0_set_powergating_state, + .get_clockgating_state = gfx_v9_0_get_clockgating_state, +}; + +static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_gfx = { + .type = AMDGPU_RING_TYPE_GFX, + .align_mask = 0xff, + .nop = PACKET3(PACKET3_NOP, 0x3FFF), + .support_64bit_ptrs = true, + .get_rptr = gfx_v9_0_ring_get_rptr_gfx, + .get_wptr = gfx_v9_0_ring_get_wptr_gfx, + .set_wptr = gfx_v9_0_ring_set_wptr_gfx, + .emit_frame_size = /* totally 242 maximum if 16 IBs */ + 5 + /* COND_EXEC */ + 7 + /* PIPELINE_SYNC */ + 46 + /* VM_FLUSH */ + 8 + /* FENCE for VM_FLUSH */ + 20 + /* GDS switch */ + 4 + /* double SWITCH_BUFFER, + the first COND_EXEC jump to the place just + prior to this double SWITCH_BUFFER */ + 5 + /* COND_EXEC */ + 7 + /* HDP_flush */ + 4 + /* VGT_flush */ + 14 + /* CE_META */ + 31 + /* DE_META */ + 3 + /* CNTX_CTRL */ + 5 + /* HDP_INVL */ + 8 + 8 + /* FENCE x2 */ + 2, /* SWITCH_BUFFER */ + .emit_ib_size = 4, /* gfx_v9_0_ring_emit_ib_gfx */ + .emit_ib = gfx_v9_0_ring_emit_ib_gfx, + .emit_fence = gfx_v9_0_ring_emit_fence, + .emit_pipeline_sync = gfx_v9_0_ring_emit_pipeline_sync, + .emit_vm_flush = gfx_v9_0_ring_emit_vm_flush, + .emit_gds_switch = gfx_v9_0_ring_emit_gds_switch, + .emit_hdp_flush = gfx_v9_0_ring_emit_hdp_flush, + .emit_hdp_invalidate = gfx_v9_0_ring_emit_hdp_invalidate, + .test_ring = gfx_v9_0_ring_test_ring, + .test_ib = gfx_v9_0_ring_test_ib, + .insert_nop = amdgpu_ring_insert_nop, + .pad_ib = amdgpu_ring_generic_pad_ib, + .emit_switch_buffer = gfx_v9_ring_emit_sb, + .emit_cntxcntl = gfx_v9_ring_emit_cntxcntl, + .init_cond_exec = gfx_v9_0_ring_emit_init_cond_exec, + .patch_cond_exec = gfx_v9_0_ring_emit_patch_cond_exec, +}; + +static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_compute = { + .type = AMDGPU_RING_TYPE_COMPUTE, + .align_mask = 0xff, + .nop = PACKET3(PACKET3_NOP, 0x3FFF), + .support_64bit_ptrs = true, + .get_rptr = gfx_v9_0_ring_get_rptr_compute, + .get_wptr = gfx_v9_0_ring_get_wptr_compute, + .set_wptr = gfx_v9_0_ring_set_wptr_compute, + .emit_frame_size = + 20 + /* gfx_v9_0_ring_emit_gds_switch */ + 7 + /* gfx_v9_0_ring_emit_hdp_flush */ + 5 + /* gfx_v9_0_ring_emit_hdp_invalidate */ + 7 + /* gfx_v9_0_ring_emit_pipeline_sync */ + 64 + /* gfx_v9_0_ring_emit_vm_flush */ + 8 + 8 + 8, /* gfx_v9_0_ring_emit_fence x3 for user fence, vm fence */ + .emit_ib_size = 4, /* gfx_v9_0_ring_emit_ib_compute */ + .emit_ib = gfx_v9_0_ring_emit_ib_compute, + .emit_fence = gfx_v9_0_ring_emit_fence, + .emit_pipeline_sync = gfx_v9_0_ring_emit_pipeline_sync, + .emit_vm_flush = gfx_v9_0_ring_emit_vm_flush, + .emit_gds_switch = gfx_v9_0_ring_emit_gds_switch, + .emit_hdp_flush = gfx_v9_0_ring_emit_hdp_flush, + .emit_hdp_invalidate = gfx_v9_0_ring_emit_hdp_invalidate, + .test_ring = gfx_v9_0_ring_test_ring, + .test_ib = gfx_v9_0_ring_test_ib, + .insert_nop = amdgpu_ring_insert_nop, + .pad_ib = amdgpu_ring_generic_pad_ib, +}; + +static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_kiq = { + .type = AMDGPU_RING_TYPE_KIQ, + .align_mask = 0xff, + .nop = PACKET3(PACKET3_NOP, 0x3FFF), + .support_64bit_ptrs = true, + .get_rptr = gfx_v9_0_ring_get_rptr_compute, + .get_wptr = gfx_v9_0_ring_get_wptr_compute, + .set_wptr = gfx_v9_0_ring_set_wptr_compute, + .emit_frame_size = + 20 + /* gfx_v9_0_ring_emit_gds_switch */ + 7 + /* gfx_v9_0_ring_emit_hdp_flush */ + 5 + /* gfx_v9_0_ring_emit_hdp_invalidate */ + 7 + /* gfx_v9_0_ring_emit_pipeline_sync */ + 64 + /* gfx_v9_0_ring_emit_vm_flush */ + 8 + 8 + 8, /* gfx_v9_0_ring_emit_fence_kiq x3 for user fence, vm fence */ + .emit_ib_size = 4, /* gfx_v9_0_ring_emit_ib_compute */ + .emit_ib = gfx_v9_0_ring_emit_ib_compute, + .emit_fence = gfx_v9_0_ring_emit_fence_kiq, + .test_ring = gfx_v9_0_ring_test_ring, + .test_ib = gfx_v9_0_ring_test_ib, + .insert_nop = amdgpu_ring_insert_nop, + .pad_ib = amdgpu_ring_generic_pad_ib, + .emit_rreg = gfx_v9_0_ring_emit_rreg, + .emit_wreg = gfx_v9_0_ring_emit_wreg, +}; + +static void gfx_v9_0_set_ring_funcs(struct amdgpu_device *adev) +{ + int i; + + adev->gfx.kiq.ring.funcs = &gfx_v9_0_ring_funcs_kiq; + + for (i = 0; i < adev->gfx.num_gfx_rings; i++) + adev->gfx.gfx_ring[i].funcs = &gfx_v9_0_ring_funcs_gfx; + + for (i = 0; i < adev->gfx.num_compute_rings; i++) + adev->gfx.compute_ring[i].funcs = &gfx_v9_0_ring_funcs_compute; +} + +static const struct amdgpu_irq_src_funcs gfx_v9_0_kiq_irq_funcs = { + .set = gfx_v9_0_kiq_set_interrupt_state, + .process = gfx_v9_0_kiq_irq, +}; + +static const struct amdgpu_irq_src_funcs gfx_v9_0_eop_irq_funcs = { + .set = gfx_v9_0_set_eop_interrupt_state, + .process = gfx_v9_0_eop_irq, +}; + +static const struct amdgpu_irq_src_funcs gfx_v9_0_priv_reg_irq_funcs = { + .set = gfx_v9_0_set_priv_reg_fault_state, + .process = gfx_v9_0_priv_reg_irq, +}; + +static const struct amdgpu_irq_src_funcs gfx_v9_0_priv_inst_irq_funcs = { + .set = gfx_v9_0_set_priv_inst_fault_state, + .process = gfx_v9_0_priv_inst_irq, +}; + +static void gfx_v9_0_set_irq_funcs(struct amdgpu_device *adev) +{ + adev->gfx.eop_irq.num_types = AMDGPU_CP_IRQ_LAST; + adev->gfx.eop_irq.funcs = &gfx_v9_0_eop_irq_funcs; + + adev->gfx.priv_reg_irq.num_types = 1; + adev->gfx.priv_reg_irq.funcs = &gfx_v9_0_priv_reg_irq_funcs; + + adev->gfx.priv_inst_irq.num_types = 1; + adev->gfx.priv_inst_irq.funcs = &gfx_v9_0_priv_inst_irq_funcs; + + adev->gfx.kiq.irq.num_types = AMDGPU_CP_KIQ_IRQ_LAST; + adev->gfx.kiq.irq.funcs = &gfx_v9_0_kiq_irq_funcs; +} + +static void gfx_v9_0_set_rlc_funcs(struct amdgpu_device *adev) +{ + switch (adev->asic_type) { + case CHIP_VEGA10: + adev->gfx.rlc.funcs = &gfx_v9_0_rlc_funcs; + break; + default: + break; + } +} + +static void gfx_v9_0_set_gds_init(struct amdgpu_device *adev) +{ + /* init asci gds info */ + adev->gds.mem.total_size = RREG32(SOC15_REG_OFFSET(GC, 0, mmGDS_VMID0_SIZE)); + adev->gds.gws.total_size = 64; + adev->gds.oa.total_size = 16; + + if (adev->gds.mem.total_size == 64 * 1024) { + adev->gds.mem.gfx_partition_size = 4096; + adev->gds.mem.cs_partition_size = 4096; + + adev->gds.gws.gfx_partition_size = 4; + adev->gds.gws.cs_partition_size = 4; + + adev->gds.oa.gfx_partition_size = 4; + adev->gds.oa.cs_partition_size = 1; + } else { + adev->gds.mem.gfx_partition_size = 1024; + adev->gds.mem.cs_partition_size = 1024; + + adev->gds.gws.gfx_partition_size = 16; + adev->gds.gws.cs_partition_size = 16; + + adev->gds.oa.gfx_partition_size = 4; + adev->gds.oa.cs_partition_size = 4; + } +} + +static u32 gfx_v9_0_get_cu_active_bitmap(struct amdgpu_device *adev) +{ + u32 data, mask; + + data = RREG32(SOC15_REG_OFFSET(GC, 0, mmCC_GC_SHADER_ARRAY_CONFIG)); + data |= RREG32(SOC15_REG_OFFSET(GC, 0, mmGC_USER_SHADER_ARRAY_CONFIG)); + + data &= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_CUS_MASK; + data >>= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_CUS__SHIFT; + + mask = gfx_v9_0_create_bitmask(adev->gfx.config.max_cu_per_sh); + + return (~data) & mask; +} + +static int gfx_v9_0_get_cu_info(struct amdgpu_device *adev, + struct amdgpu_cu_info *cu_info) +{ + int i, j, k, counter, active_cu_number = 0; + u32 mask, bitmap, ao_bitmap, ao_cu_mask = 0; + + if (!adev || !cu_info) + return -EINVAL; + + memset(cu_info, 0, sizeof(*cu_info)); + + mutex_lock(&adev->grbm_idx_mutex); + for (i = 0; i < adev->gfx.config.max_shader_engines; i++) { + for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) { + mask = 1; + ao_bitmap = 0; + counter = 0; + gfx_v9_0_select_se_sh(adev, i, j, 0xffffffff); + bitmap = gfx_v9_0_get_cu_active_bitmap(adev); + cu_info->bitmap[i][j] = bitmap; + + for (k = 0; k < 16; k ++) { + if (bitmap & mask) { + if (counter < 2) + ao_bitmap |= mask; + counter ++; + } + mask <<= 1; + } + active_cu_number += counter; + ao_cu_mask |= (ao_bitmap << (i * 16 + j * 8)); + } + } + gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); + mutex_unlock(&adev->grbm_idx_mutex); + + cu_info->number = active_cu_number; + cu_info->ao_cu_mask = ao_cu_mask; + + return 0; +} + +static int gfx_v9_0_init_queue(struct amdgpu_ring *ring) +{ + int r, j; + u32 tmp; + bool use_doorbell = true; + u64 hqd_gpu_addr; + u64 mqd_gpu_addr; + u64 eop_gpu_addr; + u64 wb_gpu_addr; + u32 *buf; + struct v9_mqd *mqd; + struct amdgpu_device *adev; + + adev = ring->adev; + if (ring->mqd_obj == NULL) { + r = amdgpu_bo_create(adev, + sizeof(struct v9_mqd), + PAGE_SIZE,true, + AMDGPU_GEM_DOMAIN_GTT, 0, NULL, + NULL, &ring->mqd_obj); + if (r) { + dev_warn(adev->dev, "(%d) create MQD bo failed\n", r); + return r; + } + } + + r = amdgpu_bo_reserve(ring->mqd_obj, false); + if (unlikely(r != 0)) { + gfx_v9_0_cp_compute_fini(adev); + return r; + } + + r = amdgpu_bo_pin(ring->mqd_obj, AMDGPU_GEM_DOMAIN_GTT, + &mqd_gpu_addr); + if (r) { + dev_warn(adev->dev, "(%d) pin MQD bo failed\n", r); + gfx_v9_0_cp_compute_fini(adev); + return r; + } + r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&buf); + if (r) { + dev_warn(adev->dev, "(%d) map MQD bo failed\n", r); + gfx_v9_0_cp_compute_fini(adev); + return r; + } + + /* init the mqd struct */ + memset(buf, 0, sizeof(struct v9_mqd)); + + mqd = (struct v9_mqd *)buf; + mqd->header = 0xC0310800; + mqd->compute_pipelinestat_enable = 0x00000001; + mqd->compute_static_thread_mgmt_se0 = 0xffffffff; + mqd->compute_static_thread_mgmt_se1 = 0xffffffff; + mqd->compute_static_thread_mgmt_se2 = 0xffffffff; + mqd->compute_static_thread_mgmt_se3 = 0xffffffff; + mqd->compute_misc_reserved = 0x00000003; + mutex_lock(&adev->srbm_mutex); + soc15_grbm_select(adev, ring->me, + ring->pipe, + ring->queue, 0); + /* disable wptr polling */ + WREG32_FIELD15(GC, 0, CP_PQ_WPTR_POLL_CNTL, EN, 0); + + /* write the EOP addr */ + BUG_ON(ring->me != 1 || ring->pipe != 0); /* can't handle other cases eop address */ + eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr + (ring->queue * MEC_HPD_SIZE); + eop_gpu_addr >>= 8; + + WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_EOP_BASE_ADDR), lower_32_bits(eop_gpu_addr)); + WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_EOP_BASE_ADDR_HI), upper_32_bits(eop_gpu_addr)); + mqd->cp_hqd_eop_base_addr_lo = lower_32_bits(eop_gpu_addr); + mqd->cp_hqd_eop_base_addr_hi = upper_32_bits(eop_gpu_addr); + + /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */ + tmp = RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_EOP_CONTROL)); + tmp = REG_SET_FIELD(tmp, CP_HQD_EOP_CONTROL, EOP_SIZE, + (order_base_2(MEC_HPD_SIZE / 4) - 1)); + WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_EOP_CONTROL), tmp); + + /* enable doorbell? */ + tmp = RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL)); + if (use_doorbell) + tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_EN, 1); + else + tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_EN, 0); + + WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL), tmp); + mqd->cp_hqd_pq_doorbell_control = tmp; + + /* disable the queue if it's active */ + ring->wptr = 0; + mqd->cp_hqd_dequeue_request = 0; + mqd->cp_hqd_pq_rptr = 0; + mqd->cp_hqd_pq_wptr_lo = 0; + mqd->cp_hqd_pq_wptr_hi = 0; + if (RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_ACTIVE)) & 1) { + WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_DEQUEUE_REQUEST), 1); + for (j = 0; j < adev->usec_timeout; j++) { + if (!(RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_ACTIVE)) & 1)) + break; + udelay(1); + } + WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_DEQUEUE_REQUEST), mqd->cp_hqd_dequeue_request); + WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_RPTR), mqd->cp_hqd_pq_rptr); + WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_LO), mqd->cp_hqd_pq_wptr_lo); + WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_HI), mqd->cp_hqd_pq_wptr_hi); + } + + /* set the pointer to the MQD */ + mqd->cp_mqd_base_addr_lo = mqd_gpu_addr & 0xfffffffc; + mqd->cp_mqd_base_addr_hi = upper_32_bits(mqd_gpu_addr); + WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_MQD_BASE_ADDR), mqd->cp_mqd_base_addr_lo); + WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_MQD_BASE_ADDR_HI), mqd->cp_mqd_base_addr_hi); + + /* set MQD vmid to 0 */ + tmp = RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_MQD_CONTROL)); + tmp = REG_SET_FIELD(tmp, CP_MQD_CONTROL, VMID, 0); + WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_MQD_CONTROL), tmp); + mqd->cp_mqd_control = tmp; + + /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */ + hqd_gpu_addr = ring->gpu_addr >> 8; + mqd->cp_hqd_pq_base_lo = hqd_gpu_addr; + mqd->cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr); + WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_BASE), mqd->cp_hqd_pq_base_lo); + WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_BASE_HI), mqd->cp_hqd_pq_base_hi); + + /* set up the HQD, this is similar to CP_RB0_CNTL */ + tmp = RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_CONTROL)); + tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, QUEUE_SIZE, + (order_base_2(ring->ring_size / 4) - 1)); + tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, RPTR_BLOCK_SIZE, + ((order_base_2(AMDGPU_GPU_PAGE_SIZE / 4) - 1) << 8)); +#ifdef __BIG_ENDIAN + tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ENDIAN_SWAP, 1); +#endif + tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, UNORD_DISPATCH, 0); + tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ROQ_PQ_IB_FLIP, 0); + tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, PRIV_STATE, 1); + tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, KMD_QUEUE, 1); + WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_CONTROL), tmp); + mqd->cp_hqd_pq_control = tmp; + + /* set the wb address wether it's enabled or not */ + wb_gpu_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4); + mqd->cp_hqd_pq_rptr_report_addr_lo = wb_gpu_addr & 0xfffffffc; + mqd->cp_hqd_pq_rptr_report_addr_hi = + upper_32_bits(wb_gpu_addr) & 0xffff; + WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_RPTR_REPORT_ADDR), + mqd->cp_hqd_pq_rptr_report_addr_lo); + WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_RPTR_REPORT_ADDR_HI), + mqd->cp_hqd_pq_rptr_report_addr_hi); + + /* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */ + wb_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4); + mqd->cp_hqd_pq_wptr_poll_addr_lo = wb_gpu_addr & 0xfffffffc; + mqd->cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff; + WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR), + mqd->cp_hqd_pq_wptr_poll_addr_lo); + WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR_HI), + mqd->cp_hqd_pq_wptr_poll_addr_hi); + + /* enable the doorbell if requested */ + if (use_doorbell) { + WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_MEC_DOORBELL_RANGE_LOWER), + (AMDGPU_DOORBELL64_KIQ * 2) << 2); + WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_MEC_DOORBELL_RANGE_UPPER), + (AMDGPU_DOORBELL64_MEC_RING7 * 2) << 2); + tmp = RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL)); + tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, + DOORBELL_OFFSET, ring->doorbell_index); + tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_EN, 1); + tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_SOURCE, 0); + tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_HIT, 0); + mqd->cp_hqd_pq_doorbell_control = tmp; + + } else { + mqd->cp_hqd_pq_doorbell_control = 0; + } + WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL), + mqd->cp_hqd_pq_doorbell_control); + + /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */ + WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_LO), mqd->cp_hqd_pq_wptr_lo); + WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_HI), mqd->cp_hqd_pq_wptr_hi); + + /* set the vmid for the queue */ + mqd->cp_hqd_vmid = 0; + WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_VMID), mqd->cp_hqd_vmid); + + tmp = RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PERSISTENT_STATE)); + tmp = REG_SET_FIELD(tmp, CP_HQD_PERSISTENT_STATE, PRELOAD_SIZE, 0x53); + WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PERSISTENT_STATE), tmp); + mqd->cp_hqd_persistent_state = tmp; + + /* activate the queue */ + mqd->cp_hqd_active = 1; + WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_ACTIVE), mqd->cp_hqd_active); + + soc15_grbm_select(adev, 0, 0, 0, 0); + mutex_unlock(&adev->srbm_mutex); + + amdgpu_bo_kunmap(ring->mqd_obj); + amdgpu_bo_unreserve(ring->mqd_obj); + + if (use_doorbell) + WREG32_FIELD15(GC, 0, CP_PQ_STATUS, DOORBELL_ENABLE, 1); + + return 0; +} + +const struct amdgpu_ip_block_version gfx_v9_0_ip_block = +{ + .type = AMD_IP_BLOCK_TYPE_GFX, + .major = 9, + .minor = 0, + .rev = 0, + .funcs = &gfx_v9_0_ip_funcs, +}; diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.h b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.h new file mode 100644 index 000000000000..56ef652a575d --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.h @@ -0,0 +1,35 @@ +/* + * Copyright 2016 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#ifndef __GFX_V9_0_H__ +#define __GFX_V9_0_H__ + +extern const struct amd_ip_funcs gfx_v9_0_ip_funcs; +extern const struct amdgpu_ip_block_version gfx_v9_0_ip_block; + +void gfx_v9_0_select_se_sh(struct amdgpu_device *adev, u32 se_num, u32 sh_num); + +uint64_t gfx_v9_0_get_gpu_clock_counter(struct amdgpu_device *adev); +int gfx_v9_0_get_cu_info(struct amdgpu_device *adev, struct amdgpu_cu_info *cu_info); + +#endif diff --git a/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c b/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c new file mode 100644 index 000000000000..005075ff00f7 --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c @@ -0,0 +1,425 @@ +/* + * Copyright 2016 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ +#include "amdgpu.h" +#include "gfxhub_v1_0.h" + +#include "vega10/soc15ip.h" +#include "vega10/GC/gc_9_0_offset.h" +#include "vega10/GC/gc_9_0_sh_mask.h" +#include "vega10/GC/gc_9_0_default.h" +#include "vega10/vega10_enum.h" + +#include "soc15_common.h" + +int gfxhub_v1_0_gart_enable(struct amdgpu_device *adev) +{ + u32 tmp; + u64 value; + u32 i; + + /* Program MC. */ + /* Update configuration */ + WREG32(SOC15_REG_OFFSET(GC, 0, mmMC_VM_SYSTEM_APERTURE_LOW_ADDR), + adev->mc.vram_start >> 18); + WREG32(SOC15_REG_OFFSET(GC, 0, mmMC_VM_SYSTEM_APERTURE_HIGH_ADDR), + adev->mc.vram_end >> 18); + + value = adev->vram_scratch.gpu_addr - adev->mc.vram_start + + adev->vm_manager.vram_base_offset; + WREG32(SOC15_REG_OFFSET(GC, 0, + mmMC_VM_SYSTEM_APERTURE_DEFAULT_ADDR_LSB), + (u32)(value >> 12)); + WREG32(SOC15_REG_OFFSET(GC, 0, + mmMC_VM_SYSTEM_APERTURE_DEFAULT_ADDR_MSB), + (u32)(value >> 44)); + + if (amdgpu_sriov_vf(adev)) { + /* MC_VM_FB_LOCATION_BASE/TOP is NULL for VF, becuase they are VF copy registers so + vbios post doesn't program them, for SRIOV driver need to program them */ + WREG32(SOC15_REG_OFFSET(GC, 0, mmMC_VM_FB_LOCATION_BASE), + adev->mc.vram_start >> 24); + WREG32(SOC15_REG_OFFSET(GC, 0, mmMC_VM_FB_LOCATION_TOP), + adev->mc.vram_end >> 24); + } + + /* Disable AGP. */ + WREG32(SOC15_REG_OFFSET(GC, 0, mmMC_VM_AGP_BASE), 0); + WREG32(SOC15_REG_OFFSET(GC, 0, mmMC_VM_AGP_TOP), 0); + WREG32(SOC15_REG_OFFSET(GC, 0, mmMC_VM_AGP_BOT), 0xFFFFFFFF); + + /* GART Enable. */ + + /* Setup TLB control */ + tmp = RREG32(SOC15_REG_OFFSET(GC, 0, mmMC_VM_MX_L1_TLB_CNTL)); + tmp = REG_SET_FIELD(tmp, MC_VM_MX_L1_TLB_CNTL, ENABLE_L1_TLB, 1); + tmp = REG_SET_FIELD(tmp, + MC_VM_MX_L1_TLB_CNTL, + SYSTEM_ACCESS_MODE, + 3); + tmp = REG_SET_FIELD(tmp, + MC_VM_MX_L1_TLB_CNTL, + ENABLE_ADVANCED_DRIVER_MODEL, + 1); + tmp = REG_SET_FIELD(tmp, + MC_VM_MX_L1_TLB_CNTL, + SYSTEM_APERTURE_UNMAPPED_ACCESS, + 0); + tmp = REG_SET_FIELD(tmp, + MC_VM_MX_L1_TLB_CNTL, + ECO_BITS, + 0); + tmp = REG_SET_FIELD(tmp, + MC_VM_MX_L1_TLB_CNTL, + MTYPE, + MTYPE_UC);/* XXX for emulation. */ + tmp = REG_SET_FIELD(tmp, + MC_VM_MX_L1_TLB_CNTL, + ATC_EN, + 1); + WREG32(SOC15_REG_OFFSET(GC, 0, mmMC_VM_MX_L1_TLB_CNTL), tmp); + + /* Setup L2 cache */ + tmp = RREG32(SOC15_REG_OFFSET(GC, 0, mmVM_L2_CNTL)); + tmp = REG_SET_FIELD(tmp, VM_L2_CNTL, ENABLE_L2_CACHE, 1); + tmp = REG_SET_FIELD(tmp, + VM_L2_CNTL, + ENABLE_L2_FRAGMENT_PROCESSING, + 0); + tmp = REG_SET_FIELD(tmp, + VM_L2_CNTL, + L2_PDE0_CACHE_TAG_GENERATION_MODE, + 0);/* XXX for emulation, Refer to closed source code.*/ + tmp = REG_SET_FIELD(tmp, VM_L2_CNTL, PDE_FAULT_CLASSIFICATION, 1); + tmp = REG_SET_FIELD(tmp, + VM_L2_CNTL, + CONTEXT1_IDENTITY_ACCESS_MODE, + 1); + tmp = REG_SET_FIELD(tmp, + VM_L2_CNTL, + IDENTITY_MODE_FRAGMENT_SIZE, + 0); + WREG32(SOC15_REG_OFFSET(GC, 0, mmVM_L2_CNTL), tmp); + + tmp = RREG32(SOC15_REG_OFFSET(GC, 0, mmVM_L2_CNTL2)); + tmp = REG_SET_FIELD(tmp, VM_L2_CNTL2, INVALIDATE_ALL_L1_TLBS, 1); + tmp = REG_SET_FIELD(tmp, VM_L2_CNTL2, INVALIDATE_L2_CACHE, 1); + WREG32(SOC15_REG_OFFSET(GC, 0, mmVM_L2_CNTL2), tmp); + + tmp = mmVM_L2_CNTL3_DEFAULT; + WREG32(SOC15_REG_OFFSET(GC, 0, mmVM_L2_CNTL3), tmp); + + tmp = RREG32(SOC15_REG_OFFSET(GC, 0, mmVM_L2_CNTL4)); + tmp = REG_SET_FIELD(tmp, + VM_L2_CNTL4, + VMC_TAP_PDE_REQUEST_PHYSICAL, + 0); + tmp = REG_SET_FIELD(tmp, + VM_L2_CNTL4, + VMC_TAP_PTE_REQUEST_PHYSICAL, + 0); + WREG32(SOC15_REG_OFFSET(GC, 0, mmVM_L2_CNTL4), tmp); + + /* setup context0 */ + WREG32(SOC15_REG_OFFSET(GC, 0, + mmVM_CONTEXT0_PAGE_TABLE_START_ADDR_LO32), + (u32)(adev->mc.gtt_start >> 12)); + WREG32(SOC15_REG_OFFSET(GC, 0, + mmVM_CONTEXT0_PAGE_TABLE_START_ADDR_HI32), + (u32)(adev->mc.gtt_start >> 44)); + + WREG32(SOC15_REG_OFFSET(GC, 0, + mmVM_CONTEXT0_PAGE_TABLE_END_ADDR_LO32), + (u32)(adev->mc.gtt_end >> 12)); + WREG32(SOC15_REG_OFFSET(GC, 0, + mmVM_CONTEXT0_PAGE_TABLE_END_ADDR_HI32), + (u32)(adev->mc.gtt_end >> 44)); + + BUG_ON(adev->gart.table_addr & (~0x0000FFFFFFFFF000ULL)); + value = adev->gart.table_addr - adev->mc.vram_start + + adev->vm_manager.vram_base_offset; + value &= 0x0000FFFFFFFFF000ULL; + value |= 0x1; /*valid bit*/ + + WREG32(SOC15_REG_OFFSET(GC, 0, + mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32), + (u32)value); + WREG32(SOC15_REG_OFFSET(GC, 0, + mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_HI32), + (u32)(value >> 32)); + + WREG32(SOC15_REG_OFFSET(GC, 0, + mmVM_L2_PROTECTION_FAULT_DEFAULT_ADDR_LO32), + (u32)(adev->dummy_page.addr >> 12)); + WREG32(SOC15_REG_OFFSET(GC, 0, + mmVM_L2_PROTECTION_FAULT_DEFAULT_ADDR_HI32), + (u32)((u64)adev->dummy_page.addr >> 44)); + + tmp = RREG32(SOC15_REG_OFFSET(GC, 0, mmVM_L2_PROTECTION_FAULT_CNTL2)); + tmp = REG_SET_FIELD(tmp, VM_L2_PROTECTION_FAULT_CNTL2, + ACTIVE_PAGE_MIGRATION_PTE_READ_RETRY, + 1); + WREG32(SOC15_REG_OFFSET(GC, 0, mmVM_L2_PROTECTION_FAULT_CNTL2), tmp); + + tmp = RREG32(SOC15_REG_OFFSET(GC, 0, mmVM_CONTEXT0_CNTL)); + tmp = REG_SET_FIELD(tmp, VM_CONTEXT0_CNTL, ENABLE_CONTEXT, 1); + tmp = REG_SET_FIELD(tmp, VM_CONTEXT0_CNTL, PAGE_TABLE_DEPTH, 0); + WREG32(SOC15_REG_OFFSET(GC, 0, mmVM_CONTEXT0_CNTL), tmp); + + /* Disable identity aperture.*/ + WREG32(SOC15_REG_OFFSET(GC, 0, + mmVM_L2_CONTEXT1_IDENTITY_APERTURE_LOW_ADDR_LO32), 0XFFFFFFFF); + WREG32(SOC15_REG_OFFSET(GC, 0, + mmVM_L2_CONTEXT1_IDENTITY_APERTURE_LOW_ADDR_HI32), 0x0000000F); + + WREG32(SOC15_REG_OFFSET(GC, 0, + mmVM_L2_CONTEXT1_IDENTITY_APERTURE_HIGH_ADDR_LO32), 0); + WREG32(SOC15_REG_OFFSET(GC, 0, + mmVM_L2_CONTEXT1_IDENTITY_APERTURE_HIGH_ADDR_HI32), 0); + + WREG32(SOC15_REG_OFFSET(GC, 0, + mmVM_L2_CONTEXT_IDENTITY_PHYSICAL_OFFSET_LO32), 0); + WREG32(SOC15_REG_OFFSET(GC, 0, + mmVM_L2_CONTEXT_IDENTITY_PHYSICAL_OFFSET_HI32), 0); + + for (i = 0; i <= 14; i++) { + tmp = RREG32(SOC15_REG_OFFSET(GC, 0, mmVM_CONTEXT1_CNTL) + i); + tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL, ENABLE_CONTEXT, 1); + tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL, PAGE_TABLE_DEPTH, + adev->vm_manager.num_level); + tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL, + RANGE_PROTECTION_FAULT_ENABLE_DEFAULT, 1); + tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL, + DUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT, 1); + tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL, + PDE0_PROTECTION_FAULT_ENABLE_DEFAULT, 1); + tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL, + VALID_PROTECTION_FAULT_ENABLE_DEFAULT, 1); + tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL, + READ_PROTECTION_FAULT_ENABLE_DEFAULT, 1); + tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL, + WRITE_PROTECTION_FAULT_ENABLE_DEFAULT, 1); + tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL, + EXECUTE_PROTECTION_FAULT_ENABLE_DEFAULT, 1); + tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL, + PAGE_TABLE_BLOCK_SIZE, + adev->vm_manager.block_size - 9); + WREG32(SOC15_REG_OFFSET(GC, 0, mmVM_CONTEXT1_CNTL) + i, tmp); + WREG32(SOC15_REG_OFFSET(GC, 0, mmVM_CONTEXT1_PAGE_TABLE_START_ADDR_LO32) + i*2, 0); + WREG32(SOC15_REG_OFFSET(GC, 0, mmVM_CONTEXT1_PAGE_TABLE_START_ADDR_HI32) + i*2, 0); + WREG32(SOC15_REG_OFFSET(GC, 0, mmVM_CONTEXT1_PAGE_TABLE_END_ADDR_LO32) + i*2, + lower_32_bits(adev->vm_manager.max_pfn - 1)); + WREG32(SOC15_REG_OFFSET(GC, 0, mmVM_CONTEXT1_PAGE_TABLE_END_ADDR_HI32) + i*2, + upper_32_bits(adev->vm_manager.max_pfn - 1)); + } + + + return 0; +} + +void gfxhub_v1_0_gart_disable(struct amdgpu_device *adev) +{ + u32 tmp; + u32 i; + + /* Disable all tables */ + for (i = 0; i < 16; i++) + WREG32(SOC15_REG_OFFSET(GC, 0, mmVM_CONTEXT0_CNTL) + i, 0); + + /* Setup TLB control */ + tmp = RREG32(SOC15_REG_OFFSET(GC, 0, mmMC_VM_MX_L1_TLB_CNTL)); + tmp = REG_SET_FIELD(tmp, MC_VM_MX_L1_TLB_CNTL, ENABLE_L1_TLB, 0); + tmp = REG_SET_FIELD(tmp, + MC_VM_MX_L1_TLB_CNTL, + ENABLE_ADVANCED_DRIVER_MODEL, + 0); + WREG32(SOC15_REG_OFFSET(GC, 0, mmMC_VM_MX_L1_TLB_CNTL), tmp); + + /* Setup L2 cache */ + tmp = RREG32(SOC15_REG_OFFSET(GC, 0, mmVM_L2_CNTL)); + tmp = REG_SET_FIELD(tmp, VM_L2_CNTL, ENABLE_L2_CACHE, 0); + WREG32(SOC15_REG_OFFSET(GC, 0, mmVM_L2_CNTL), tmp); + WREG32(SOC15_REG_OFFSET(GC, 0, mmVM_L2_CNTL3), 0); +} + +/** + * gfxhub_v1_0_set_fault_enable_default - update GART/VM fault handling + * + * @adev: amdgpu_device pointer + * @value: true redirects VM faults to the default page + */ +void gfxhub_v1_0_set_fault_enable_default(struct amdgpu_device *adev, + bool value) +{ + u32 tmp; + tmp = RREG32(SOC15_REG_OFFSET(GC, 0, mmVM_L2_PROTECTION_FAULT_CNTL)); + tmp = REG_SET_FIELD(tmp, VM_L2_PROTECTION_FAULT_CNTL, + RANGE_PROTECTION_FAULT_ENABLE_DEFAULT, value); + tmp = REG_SET_FIELD(tmp, VM_L2_PROTECTION_FAULT_CNTL, + PDE0_PROTECTION_FAULT_ENABLE_DEFAULT, value); + tmp = REG_SET_FIELD(tmp, VM_L2_PROTECTION_FAULT_CNTL, + PDE1_PROTECTION_FAULT_ENABLE_DEFAULT, value); + tmp = REG_SET_FIELD(tmp, VM_L2_PROTECTION_FAULT_CNTL, + PDE2_PROTECTION_FAULT_ENABLE_DEFAULT, value); + tmp = REG_SET_FIELD(tmp, + VM_L2_PROTECTION_FAULT_CNTL, + TRANSLATE_FURTHER_PROTECTION_FAULT_ENABLE_DEFAULT, + value); + tmp = REG_SET_FIELD(tmp, VM_L2_PROTECTION_FAULT_CNTL, + NACK_PROTECTION_FAULT_ENABLE_DEFAULT, value); + tmp = REG_SET_FIELD(tmp, VM_L2_PROTECTION_FAULT_CNTL, + DUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT, value); + tmp = REG_SET_FIELD(tmp, VM_L2_PROTECTION_FAULT_CNTL, + VALID_PROTECTION_FAULT_ENABLE_DEFAULT, value); + tmp = REG_SET_FIELD(tmp, VM_L2_PROTECTION_FAULT_CNTL, + READ_PROTECTION_FAULT_ENABLE_DEFAULT, value); + tmp = REG_SET_FIELD(tmp, VM_L2_PROTECTION_FAULT_CNTL, + WRITE_PROTECTION_FAULT_ENABLE_DEFAULT, value); + tmp = REG_SET_FIELD(tmp, VM_L2_PROTECTION_FAULT_CNTL, + EXECUTE_PROTECTION_FAULT_ENABLE_DEFAULT, value); + WREG32(SOC15_REG_OFFSET(GC, 0, mmVM_L2_PROTECTION_FAULT_CNTL), tmp); +} + +static int gfxhub_v1_0_early_init(void *handle) +{ + return 0; +} + +static int gfxhub_v1_0_late_init(void *handle) +{ + return 0; +} + +static int gfxhub_v1_0_sw_init(void *handle) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_GFXHUB]; + + hub->ctx0_ptb_addr_lo32 = + SOC15_REG_OFFSET(GC, 0, + mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32); + hub->ctx0_ptb_addr_hi32 = + SOC15_REG_OFFSET(GC, 0, + mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_HI32); + hub->vm_inv_eng0_req = + SOC15_REG_OFFSET(GC, 0, mmVM_INVALIDATE_ENG0_REQ); + hub->vm_inv_eng0_ack = + SOC15_REG_OFFSET(GC, 0, mmVM_INVALIDATE_ENG0_ACK); + hub->vm_context0_cntl = + SOC15_REG_OFFSET(GC, 0, mmVM_CONTEXT0_CNTL); + hub->vm_l2_pro_fault_status = + SOC15_REG_OFFSET(GC, 0, mmVM_L2_PROTECTION_FAULT_STATUS); + hub->vm_l2_pro_fault_cntl = + SOC15_REG_OFFSET(GC, 0, mmVM_L2_PROTECTION_FAULT_CNTL); + + return 0; +} + +static int gfxhub_v1_0_sw_fini(void *handle) +{ + return 0; +} + +static int gfxhub_v1_0_hw_init(void *handle) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + unsigned i; + + for (i = 0 ; i < 18; ++i) { + WREG32(SOC15_REG_OFFSET(GC, 0, + mmVM_INVALIDATE_ENG0_ADDR_RANGE_LO32) + + 2 * i, 0xffffffff); + WREG32(SOC15_REG_OFFSET(GC, 0, + mmVM_INVALIDATE_ENG0_ADDR_RANGE_HI32) + + 2 * i, 0x1f); + } + + return 0; +} + +static int gfxhub_v1_0_hw_fini(void *handle) +{ + return 0; +} + +static int gfxhub_v1_0_suspend(void *handle) +{ + return 0; +} + +static int gfxhub_v1_0_resume(void *handle) +{ + return 0; +} + +static bool gfxhub_v1_0_is_idle(void *handle) +{ + return true; +} + +static int gfxhub_v1_0_wait_for_idle(void *handle) +{ + return 0; +} + +static int gfxhub_v1_0_soft_reset(void *handle) +{ + return 0; +} + +static int gfxhub_v1_0_set_clockgating_state(void *handle, + enum amd_clockgating_state state) +{ + return 0; +} + +static int gfxhub_v1_0_set_powergating_state(void *handle, + enum amd_powergating_state state) +{ + return 0; +} + +const struct amd_ip_funcs gfxhub_v1_0_ip_funcs = { + .name = "gfxhub_v1_0", + .early_init = gfxhub_v1_0_early_init, + .late_init = gfxhub_v1_0_late_init, + .sw_init = gfxhub_v1_0_sw_init, + .sw_fini = gfxhub_v1_0_sw_fini, + .hw_init = gfxhub_v1_0_hw_init, + .hw_fini = gfxhub_v1_0_hw_fini, + .suspend = gfxhub_v1_0_suspend, + .resume = gfxhub_v1_0_resume, + .is_idle = gfxhub_v1_0_is_idle, + .wait_for_idle = gfxhub_v1_0_wait_for_idle, + .soft_reset = gfxhub_v1_0_soft_reset, + .set_clockgating_state = gfxhub_v1_0_set_clockgating_state, + .set_powergating_state = gfxhub_v1_0_set_powergating_state, +}; + +const struct amdgpu_ip_block_version gfxhub_v1_0_ip_block = +{ + .type = AMD_IP_BLOCK_TYPE_GFXHUB, + .major = 1, + .minor = 0, + .rev = 0, + .funcs = &gfxhub_v1_0_ip_funcs, +}; diff --git a/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.h b/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.h new file mode 100644 index 000000000000..5129a8ff0932 --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.h @@ -0,0 +1,35 @@ +/* + * Copyright 2016 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#ifndef __GFXHUB_V1_0_H__ +#define __GFXHUB_V1_0_H__ + +int gfxhub_v1_0_gart_enable(struct amdgpu_device *adev); +void gfxhub_v1_0_gart_disable(struct amdgpu_device *adev); +void gfxhub_v1_0_set_fault_enable_default(struct amdgpu_device *adev, + bool value); + +extern const struct amd_ip_funcs gfxhub_v1_0_ip_funcs; +extern const struct amdgpu_ip_block_version gfxhub_v1_0_ip_block; + +#endif diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c index 0635829b18cf..631aef38126d 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c @@ -367,7 +367,7 @@ static int gmc_v6_0_gart_set_pte_pde(struct amdgpu_device *adev, void *cpu_pt_addr, uint32_t gpu_page_idx, uint64_t addr, - uint32_t flags) + uint64_t flags) { void __iomem *ptr = (void *)cpu_pt_addr; uint64_t value; @@ -379,6 +379,21 @@ static int gmc_v6_0_gart_set_pte_pde(struct amdgpu_device *adev, return 0; } +static uint64_t gmc_v6_0_get_vm_pte_flags(struct amdgpu_device *adev, + uint32_t flags) +{ + uint64_t pte_flag = 0; + + if (flags & AMDGPU_VM_PAGE_READABLE) + pte_flag |= AMDGPU_PTE_READABLE; + if (flags & AMDGPU_VM_PAGE_WRITEABLE) + pte_flag |= AMDGPU_PTE_WRITEABLE; + if (flags & AMDGPU_VM_PAGE_PRT) + pte_flag |= AMDGPU_PTE_PRT; + + return pte_flag; +} + static void gmc_v6_0_set_fault_enable_default(struct amdgpu_device *adev, bool value) { @@ -400,6 +415,60 @@ static void gmc_v6_0_set_fault_enable_default(struct amdgpu_device *adev, WREG32(mmVM_CONTEXT1_CNTL, tmp); } + /** + + * gmc_v8_0_set_prt - set PRT VM fault + + * + + * @adev: amdgpu_device pointer + + * @enable: enable/disable VM fault handling for PRT + +*/ +static void gmc_v6_0_set_prt(struct amdgpu_device *adev, bool enable) +{ + u32 tmp; + + if (enable && !adev->mc.prt_warning) { + dev_warn(adev->dev, "Disabling VM faults because of PRT request!\n"); + adev->mc.prt_warning = true; + } + + tmp = RREG32(mmVM_PRT_CNTL); + tmp = REG_SET_FIELD(tmp, VM_PRT_CNTL, + CB_DISABLE_FAULT_ON_UNMAPPED_ACCESS, + enable); + tmp = REG_SET_FIELD(tmp, VM_PRT_CNTL, + TC_DISABLE_FAULT_ON_UNMAPPED_ACCESS, + enable); + tmp = REG_SET_FIELD(tmp, VM_PRT_CNTL, + L2_CACHE_STORE_INVALID_ENTRIES, + enable); + tmp = REG_SET_FIELD(tmp, VM_PRT_CNTL, + L1_TLB_STORE_INVALID_ENTRIES, + enable); + WREG32(mmVM_PRT_CNTL, tmp); + + if (enable) { + uint32_t low = AMDGPU_VA_RESERVED_SIZE >> AMDGPU_GPU_PAGE_SHIFT; + uint32_t high = adev->vm_manager.max_pfn; + + WREG32(mmVM_PRT_APERTURE0_LOW_ADDR, low); + WREG32(mmVM_PRT_APERTURE1_LOW_ADDR, low); + WREG32(mmVM_PRT_APERTURE2_LOW_ADDR, low); + WREG32(mmVM_PRT_APERTURE3_LOW_ADDR, low); + WREG32(mmVM_PRT_APERTURE0_HIGH_ADDR, high); + WREG32(mmVM_PRT_APERTURE1_HIGH_ADDR, high); + WREG32(mmVM_PRT_APERTURE2_HIGH_ADDR, high); + WREG32(mmVM_PRT_APERTURE3_HIGH_ADDR, high); + } else { + WREG32(mmVM_PRT_APERTURE0_LOW_ADDR, 0xfffffff); + WREG32(mmVM_PRT_APERTURE1_LOW_ADDR, 0xfffffff); + WREG32(mmVM_PRT_APERTURE2_LOW_ADDR, 0xfffffff); + WREG32(mmVM_PRT_APERTURE3_LOW_ADDR, 0xfffffff); + WREG32(mmVM_PRT_APERTURE0_HIGH_ADDR, 0x0); + WREG32(mmVM_PRT_APERTURE1_HIGH_ADDR, 0x0); + WREG32(mmVM_PRT_APERTURE2_HIGH_ADDR, 0x0); + WREG32(mmVM_PRT_APERTURE3_HIGH_ADDR, 0x0); + } +} + static int gmc_v6_0_gart_enable(struct amdgpu_device *adev) { int r, i; @@ -474,7 +543,8 @@ static int gmc_v6_0_gart_enable(struct amdgpu_device *adev) WREG32(mmVM_CONTEXT1_CNTL, VM_CONTEXT1_CNTL__ENABLE_CONTEXT_MASK | (1UL << VM_CONTEXT1_CNTL__PAGE_TABLE_DEPTH__SHIFT) | - ((amdgpu_vm_block_size - 9) << VM_CONTEXT1_CNTL__PAGE_TABLE_BLOCK_SIZE__SHIFT)); + ((adev->vm_manager.block_size - 9) + << VM_CONTEXT1_CNTL__PAGE_TABLE_BLOCK_SIZE__SHIFT)); if (amdgpu_vm_fault_stop == AMDGPU_VM_FAULT_STOP_ALWAYS) gmc_v6_0_set_fault_enable_default(adev, false); else @@ -500,6 +570,7 @@ static int gmc_v6_0_gart_init(struct amdgpu_device *adev) if (r) return r; adev->gart.table_size = adev->gart.num_gpu_pages * 8; + adev->gart.gart_pte_flags = 0; return amdgpu_gart_table_vram_alloc(adev); } @@ -551,6 +622,7 @@ static int gmc_v6_0_vm_init(struct amdgpu_device *adev) * amdkfd will use VMIDs 8-15 */ adev->vm_manager.num_ids = AMDGPU_NUM_OF_VMIDS; + adev->vm_manager.num_level = 1; amdgpu_vm_manager_init(adev); /* base offset of vram pages */ @@ -769,15 +841,16 @@ static int gmc_v6_0_sw_init(void *handle) int dma_bits; struct amdgpu_device *adev = (struct amdgpu_device *)handle; - r = amdgpu_irq_add_id(adev, 146, &adev->mc.vm_fault); + r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 146, &adev->mc.vm_fault); if (r) return r; - r = amdgpu_irq_add_id(adev, 147, &adev->mc.vm_fault); + r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 147, &adev->mc.vm_fault); if (r) return r; - adev->vm_manager.max_pfn = amdgpu_vm_size << 18; + amdgpu_vm_adjust_size(adev, 64); + adev->vm_manager.max_pfn = adev->vm_manager.vm_size << 18; adev->mc.mc_mask = 0xffffffffffULL; @@ -1039,7 +1112,7 @@ static int gmc_v6_0_process_interrupt(struct amdgpu_device *adev, if (printk_ratelimit()) { dev_err(adev->dev, "GPU fault detected: %d 0x%08x\n", - entry->src_id, entry->src_data); + entry->src_id, entry->src_data[0]); dev_err(adev->dev, " VM_CONTEXT1_PROTECTION_FAULT_ADDR 0x%08X\n", addr); dev_err(adev->dev, " VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n", @@ -1082,6 +1155,8 @@ static const struct amd_ip_funcs gmc_v6_0_ip_funcs = { static const struct amdgpu_gart_funcs gmc_v6_0_gart_funcs = { .flush_gpu_tlb = gmc_v6_0_gart_flush_gpu_tlb, .set_pte_pde = gmc_v6_0_gart_set_pte_pde, + .set_prt = gmc_v6_0_set_prt, + .get_vm_pte_flags = gmc_v6_0_get_vm_pte_flags }; static const struct amdgpu_irq_src_funcs gmc_v6_0_irq_funcs = { diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c index 8d05e0c4e3d7..92abe12d92bb 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c @@ -37,6 +37,8 @@ #include "oss/oss_2_0_d.h" #include "oss/oss_2_0_sh_mask.h" +#include "amdgpu_atombios.h" + static void gmc_v7_0_set_gart_funcs(struct amdgpu_device *adev); static void gmc_v7_0_set_irq_funcs(struct amdgpu_device *adev); static int gmc_v7_0_wait_for_idle(void *handle); @@ -161,9 +163,7 @@ static int gmc_v7_0_init_microcode(struct amdgpu_device *adev) out: if (err) { - printk(KERN_ERR - "cik_mc: Failed to load firmware \"%s\"\n", - fw_name); + pr_err("cik_mc: Failed to load firmware \"%s\"\n", fw_name); release_firmware(adev->mc.fw); adev->mc.fw = NULL; } @@ -327,48 +327,51 @@ static void gmc_v7_0_mc_program(struct amdgpu_device *adev) */ static int gmc_v7_0_mc_init(struct amdgpu_device *adev) { - u32 tmp; - int chansize, numchan; - - /* Get VRAM informations */ - tmp = RREG32(mmMC_ARB_RAMCFG); - if (REG_GET_FIELD(tmp, MC_ARB_RAMCFG, CHANSIZE)) { - chansize = 64; - } else { - chansize = 32; - } - tmp = RREG32(mmMC_SHARED_CHMAP); - switch (REG_GET_FIELD(tmp, MC_SHARED_CHMAP, NOOFCHAN)) { - case 0: - default: - numchan = 1; - break; - case 1: - numchan = 2; - break; - case 2: - numchan = 4; - break; - case 3: - numchan = 8; - break; - case 4: - numchan = 3; - break; - case 5: - numchan = 6; - break; - case 6: - numchan = 10; - break; - case 7: - numchan = 12; - break; - case 8: - numchan = 16; - break; + adev->mc.vram_width = amdgpu_atombios_get_vram_width(adev); + if (!adev->mc.vram_width) { + u32 tmp; + int chansize, numchan; + + /* Get VRAM informations */ + tmp = RREG32(mmMC_ARB_RAMCFG); + if (REG_GET_FIELD(tmp, MC_ARB_RAMCFG, CHANSIZE)) { + chansize = 64; + } else { + chansize = 32; + } + tmp = RREG32(mmMC_SHARED_CHMAP); + switch (REG_GET_FIELD(tmp, MC_SHARED_CHMAP, NOOFCHAN)) { + case 0: + default: + numchan = 1; + break; + case 1: + numchan = 2; + break; + case 2: + numchan = 4; + break; + case 3: + numchan = 8; + break; + case 4: + numchan = 3; + break; + case 5: + numchan = 6; + break; + case 6: + numchan = 10; + break; + case 7: + numchan = 12; + break; + case 8: + numchan = 16; + break; + } + adev->mc.vram_width = numchan * chansize; } - adev->mc.vram_width = numchan * chansize; /* Could aper size report 0 ? */ adev->mc.aper_base = pci_resource_start(adev->pdev, 0); adev->mc.aper_size = pci_resource_len(adev->pdev, 0); @@ -441,7 +444,7 @@ static int gmc_v7_0_gart_set_pte_pde(struct amdgpu_device *adev, void *cpu_pt_addr, uint32_t gpu_page_idx, uint64_t addr, - uint32_t flags) + uint64_t flags) { void __iomem *ptr = (void *)cpu_pt_addr; uint64_t value; @@ -453,6 +456,21 @@ static int gmc_v7_0_gart_set_pte_pde(struct amdgpu_device *adev, return 0; } +static uint64_t gmc_v7_0_get_vm_pte_flags(struct amdgpu_device *adev, + uint32_t flags) +{ + uint64_t pte_flag = 0; + + if (flags & AMDGPU_VM_PAGE_READABLE) + pte_flag |= AMDGPU_PTE_READABLE; + if (flags & AMDGPU_VM_PAGE_WRITEABLE) + pte_flag |= AMDGPU_PTE_WRITEABLE; + if (flags & AMDGPU_VM_PAGE_PRT) + pte_flag |= AMDGPU_PTE_PRT; + + return pte_flag; +} + /** * gmc_v8_0_set_fault_enable_default - update VM fault handling * @@ -481,6 +499,62 @@ static void gmc_v7_0_set_fault_enable_default(struct amdgpu_device *adev, } /** + * gmc_v7_0_set_prt - set PRT VM fault + * + * @adev: amdgpu_device pointer + * @enable: enable/disable VM fault handling for PRT + */ +static void gmc_v7_0_set_prt(struct amdgpu_device *adev, bool enable) +{ + uint32_t tmp; + + if (enable && !adev->mc.prt_warning) { + dev_warn(adev->dev, "Disabling VM faults because of PRT request!\n"); + adev->mc.prt_warning = true; + } + + tmp = RREG32(mmVM_PRT_CNTL); + tmp = REG_SET_FIELD(tmp, VM_PRT_CNTL, + CB_DISABLE_READ_FAULT_ON_UNMAPPED_ACCESS, enable); + tmp = REG_SET_FIELD(tmp, VM_PRT_CNTL, + CB_DISABLE_WRITE_FAULT_ON_UNMAPPED_ACCESS, enable); + tmp = REG_SET_FIELD(tmp, VM_PRT_CNTL, + TC_DISABLE_READ_FAULT_ON_UNMAPPED_ACCESS, enable); + tmp = REG_SET_FIELD(tmp, VM_PRT_CNTL, + TC_DISABLE_WRITE_FAULT_ON_UNMAPPED_ACCESS, enable); + tmp = REG_SET_FIELD(tmp, VM_PRT_CNTL, + L2_CACHE_STORE_INVALID_ENTRIES, enable); + tmp = REG_SET_FIELD(tmp, VM_PRT_CNTL, + L1_TLB_STORE_INVALID_ENTRIES, enable); + tmp = REG_SET_FIELD(tmp, VM_PRT_CNTL, + MASK_PDE0_FAULT, enable); + WREG32(mmVM_PRT_CNTL, tmp); + + if (enable) { + uint32_t low = AMDGPU_VA_RESERVED_SIZE >> AMDGPU_GPU_PAGE_SHIFT; + uint32_t high = adev->vm_manager.max_pfn; + + WREG32(mmVM_PRT_APERTURE0_LOW_ADDR, low); + WREG32(mmVM_PRT_APERTURE1_LOW_ADDR, low); + WREG32(mmVM_PRT_APERTURE2_LOW_ADDR, low); + WREG32(mmVM_PRT_APERTURE3_LOW_ADDR, low); + WREG32(mmVM_PRT_APERTURE0_HIGH_ADDR, high); + WREG32(mmVM_PRT_APERTURE1_HIGH_ADDR, high); + WREG32(mmVM_PRT_APERTURE2_HIGH_ADDR, high); + WREG32(mmVM_PRT_APERTURE3_HIGH_ADDR, high); + } else { + WREG32(mmVM_PRT_APERTURE0_LOW_ADDR, 0xfffffff); + WREG32(mmVM_PRT_APERTURE1_LOW_ADDR, 0xfffffff); + WREG32(mmVM_PRT_APERTURE2_LOW_ADDR, 0xfffffff); + WREG32(mmVM_PRT_APERTURE3_LOW_ADDR, 0xfffffff); + WREG32(mmVM_PRT_APERTURE0_HIGH_ADDR, 0x0); + WREG32(mmVM_PRT_APERTURE1_HIGH_ADDR, 0x0); + WREG32(mmVM_PRT_APERTURE2_HIGH_ADDR, 0x0); + WREG32(mmVM_PRT_APERTURE3_HIGH_ADDR, 0x0); + } +} + +/** * gmc_v7_0_gart_enable - gart enable * * @adev: amdgpu_device pointer @@ -570,7 +644,7 @@ static int gmc_v7_0_gart_enable(struct amdgpu_device *adev) tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL, ENABLE_CONTEXT, 1); tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL, PAGE_TABLE_DEPTH, 1); tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL, PAGE_TABLE_BLOCK_SIZE, - amdgpu_vm_block_size - 9); + adev->vm_manager.block_size - 9); WREG32(mmVM_CONTEXT1_CNTL, tmp); if (amdgpu_vm_fault_stop == AMDGPU_VM_FAULT_STOP_ALWAYS) gmc_v7_0_set_fault_enable_default(adev, false); @@ -604,6 +678,7 @@ static int gmc_v7_0_gart_init(struct amdgpu_device *adev) if (r) return r; adev->gart.table_size = adev->gart.num_gpu_pages * 8; + adev->gart.gart_pte_flags = 0; return amdgpu_gart_table_vram_alloc(adev); } @@ -672,6 +747,7 @@ static int gmc_v7_0_vm_init(struct amdgpu_device *adev) * amdkfd will use VMIDs 8-15 */ adev->vm_manager.num_ids = AMDGPU_NUM_OF_VMIDS; + adev->vm_manager.num_level = 1; amdgpu_vm_manager_init(adev); /* base offset of vram pages */ @@ -880,6 +956,14 @@ static int gmc_v7_0_early_init(void *handle) gmc_v7_0_set_gart_funcs(adev); gmc_v7_0_set_irq_funcs(adev); + adev->mc.shared_aperture_start = 0x2000000000000000ULL; + adev->mc.shared_aperture_end = + adev->mc.shared_aperture_start + (4ULL << 30) - 1; + adev->mc.private_aperture_start = + adev->mc.shared_aperture_end + 1; + adev->mc.private_aperture_end = + adev->mc.private_aperture_start + (4ULL << 30) - 1; + return 0; } @@ -907,11 +991,11 @@ static int gmc_v7_0_sw_init(void *handle) adev->mc.vram_type = gmc_v7_0_convert_vram_type(tmp); } - r = amdgpu_irq_add_id(adev, 146, &adev->mc.vm_fault); + r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 146, &adev->mc.vm_fault); if (r) return r; - r = amdgpu_irq_add_id(adev, 147, &adev->mc.vm_fault); + r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 147, &adev->mc.vm_fault); if (r) return r; @@ -919,7 +1003,8 @@ static int gmc_v7_0_sw_init(void *handle) * Currently set to 4GB ((1 << 20) 4k pages). * Max GPUVM size for cayman and SI is 40 bits. */ - adev->vm_manager.max_pfn = amdgpu_vm_size << 18; + amdgpu_vm_adjust_size(adev, 64); + adev->vm_manager.max_pfn = adev->vm_manager.vm_size << 18; /* Set the internal MC address mask * This is the max address of the GPU's @@ -938,12 +1023,12 @@ static int gmc_v7_0_sw_init(void *handle) if (r) { adev->need_dma32 = true; dma_bits = 32; - printk(KERN_WARNING "amdgpu: No suitable DMA available.\n"); + pr_warn("amdgpu: No suitable DMA available\n"); } r = pci_set_consistent_dma_mask(adev->pdev, DMA_BIT_MASK(dma_bits)); if (r) { pci_set_consistent_dma_mask(adev->pdev, DMA_BIT_MASK(32)); - printk(KERN_WARNING "amdgpu: No coherent DMA available.\n"); + pr_warn("amdgpu: No coherent DMA available\n"); } r = gmc_v7_0_init_microcode(adev); @@ -1202,7 +1287,7 @@ static int gmc_v7_0_process_interrupt(struct amdgpu_device *adev, if (printk_ratelimit()) { dev_err(adev->dev, "GPU fault detected: %d 0x%08x\n", - entry->src_id, entry->src_data); + entry->src_id, entry->src_data[0]); dev_err(adev->dev, " VM_CONTEXT1_PROTECTION_FAULT_ADDR 0x%08X\n", addr); dev_err(adev->dev, " VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n", @@ -1259,6 +1344,8 @@ static const struct amd_ip_funcs gmc_v7_0_ip_funcs = { static const struct amdgpu_gart_funcs gmc_v7_0_gart_funcs = { .flush_gpu_tlb = gmc_v7_0_gart_flush_gpu_tlb, .set_pte_pde = gmc_v7_0_gart_set_pte_pde, + .set_prt = gmc_v7_0_set_prt, + .get_vm_pte_flags = gmc_v7_0_get_vm_pte_flags }; static const struct amdgpu_irq_src_funcs gmc_v7_0_irq_funcs = { diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c index 7669b3259f35..f2ccefc66fd4 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c @@ -38,6 +38,8 @@ #include "vid.h" #include "vi.h" +#include "amdgpu_atombios.h" + static void gmc_v8_0_set_gart_funcs(struct amdgpu_device *adev); static void gmc_v8_0_set_irq_funcs(struct amdgpu_device *adev); @@ -245,9 +247,7 @@ static int gmc_v8_0_init_microcode(struct amdgpu_device *adev) out: if (err) { - printk(KERN_ERR - "mc: Failed to load firmware \"%s\"\n", - fw_name); + pr_err("mc: Failed to load firmware \"%s\"\n", fw_name); release_firmware(adev->mc.fw); adev->mc.fw = NULL; } @@ -255,14 +255,14 @@ out: } /** - * gmc_v8_0_mc_load_microcode - load MC ucode into the hw + * gmc_v8_0_tonga_mc_load_microcode - load tonga MC ucode into the hw * * @adev: amdgpu_device pointer * * Load the GDDR MC ucode into the hw (CIK). * Returns 0 on success, error on failure. */ -static int gmc_v8_0_mc_load_microcode(struct amdgpu_device *adev) +static int gmc_v8_0_tonga_mc_load_microcode(struct amdgpu_device *adev) { const struct mc_firmware_header_v1_0 *hdr; const __le32 *fw_data = NULL; @@ -270,9 +270,6 @@ static int gmc_v8_0_mc_load_microcode(struct amdgpu_device *adev) u32 running; int i, ucode_size, regs_size; - if (!adev->mc.fw) - return -EINVAL; - /* Skip MC ucode loading on SR-IOV capable boards. * vbios does this for us in asic_init in that case. * Skip MC ucode loading on VF, because hypervisor will do that @@ -281,6 +278,9 @@ static int gmc_v8_0_mc_load_microcode(struct amdgpu_device *adev) if (amdgpu_sriov_bios(adev)) return 0; + if (!adev->mc.fw) + return -EINVAL; + hdr = (const struct mc_firmware_header_v1_0 *)adev->mc.fw->data; amdgpu_ucode_print_mc_hdr(&hdr->header); @@ -331,6 +331,76 @@ static int gmc_v8_0_mc_load_microcode(struct amdgpu_device *adev) return 0; } +static int gmc_v8_0_polaris_mc_load_microcode(struct amdgpu_device *adev) +{ + const struct mc_firmware_header_v1_0 *hdr; + const __le32 *fw_data = NULL; + const __le32 *io_mc_regs = NULL; + u32 data, vbios_version; + int i, ucode_size, regs_size; + + /* Skip MC ucode loading on SR-IOV capable boards. + * vbios does this for us in asic_init in that case. + * Skip MC ucode loading on VF, because hypervisor will do that + * for this adaptor. + */ + if (amdgpu_sriov_bios(adev)) + return 0; + + WREG32(mmMC_SEQ_IO_DEBUG_INDEX, 0x9F); + data = RREG32(mmMC_SEQ_IO_DEBUG_DATA); + vbios_version = data & 0xf; + + if (vbios_version == 0) + return 0; + + if (!adev->mc.fw) + return -EINVAL; + + hdr = (const struct mc_firmware_header_v1_0 *)adev->mc.fw->data; + amdgpu_ucode_print_mc_hdr(&hdr->header); + + adev->mc.fw_version = le32_to_cpu(hdr->header.ucode_version); + regs_size = le32_to_cpu(hdr->io_debug_size_bytes) / (4 * 2); + io_mc_regs = (const __le32 *) + (adev->mc.fw->data + le32_to_cpu(hdr->io_debug_array_offset_bytes)); + ucode_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4; + fw_data = (const __le32 *) + (adev->mc.fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes)); + + data = RREG32(mmMC_SEQ_MISC0); + data &= ~(0x40); + WREG32(mmMC_SEQ_MISC0, data); + + /* load mc io regs */ + for (i = 0; i < regs_size; i++) { + WREG32(mmMC_SEQ_IO_DEBUG_INDEX, le32_to_cpup(io_mc_regs++)); + WREG32(mmMC_SEQ_IO_DEBUG_DATA, le32_to_cpup(io_mc_regs++)); + } + + WREG32(mmMC_SEQ_SUP_CNTL, 0x00000008); + WREG32(mmMC_SEQ_SUP_CNTL, 0x00000010); + + /* load the MC ucode */ + for (i = 0; i < ucode_size; i++) + WREG32(mmMC_SEQ_SUP_PGM, le32_to_cpup(fw_data++)); + + /* put the engine back into the active state */ + WREG32(mmMC_SEQ_SUP_CNTL, 0x00000008); + WREG32(mmMC_SEQ_SUP_CNTL, 0x00000004); + WREG32(mmMC_SEQ_SUP_CNTL, 0x00000001); + + /* wait for training to complete */ + for (i = 0; i < adev->usec_timeout; i++) { + data = RREG32(mmMC_SEQ_MISC0); + if (data & 0x80) + break; + udelay(1); + } + + return 0; +} + static void gmc_v8_0_vram_gtt_location(struct amdgpu_device *adev, struct amdgpu_mc *mc) { @@ -419,48 +489,51 @@ static void gmc_v8_0_mc_program(struct amdgpu_device *adev) */ static int gmc_v8_0_mc_init(struct amdgpu_device *adev) { - u32 tmp; - int chansize, numchan; - - /* Get VRAM informations */ - tmp = RREG32(mmMC_ARB_RAMCFG); - if (REG_GET_FIELD(tmp, MC_ARB_RAMCFG, CHANSIZE)) { - chansize = 64; - } else { - chansize = 32; - } - tmp = RREG32(mmMC_SHARED_CHMAP); - switch (REG_GET_FIELD(tmp, MC_SHARED_CHMAP, NOOFCHAN)) { - case 0: - default: - numchan = 1; - break; - case 1: - numchan = 2; - break; - case 2: - numchan = 4; - break; - case 3: - numchan = 8; - break; - case 4: - numchan = 3; - break; - case 5: - numchan = 6; - break; - case 6: - numchan = 10; - break; - case 7: - numchan = 12; - break; - case 8: - numchan = 16; - break; + adev->mc.vram_width = amdgpu_atombios_get_vram_width(adev); + if (!adev->mc.vram_width) { + u32 tmp; + int chansize, numchan; + + /* Get VRAM informations */ + tmp = RREG32(mmMC_ARB_RAMCFG); + if (REG_GET_FIELD(tmp, MC_ARB_RAMCFG, CHANSIZE)) { + chansize = 64; + } else { + chansize = 32; + } + tmp = RREG32(mmMC_SHARED_CHMAP); + switch (REG_GET_FIELD(tmp, MC_SHARED_CHMAP, NOOFCHAN)) { + case 0: + default: + numchan = 1; + break; + case 1: + numchan = 2; + break; + case 2: + numchan = 4; + break; + case 3: + numchan = 8; + break; + case 4: + numchan = 3; + break; + case 5: + numchan = 6; + break; + case 6: + numchan = 10; + break; + case 7: + numchan = 12; + break; + case 8: + numchan = 16; + break; + } + adev->mc.vram_width = numchan * chansize; } - adev->mc.vram_width = numchan * chansize; /* Could aper size report 0 ? */ adev->mc.aper_base = pci_resource_start(adev->pdev, 0); adev->mc.aper_size = pci_resource_len(adev->pdev, 0); @@ -533,7 +606,7 @@ static int gmc_v8_0_gart_set_pte_pde(struct amdgpu_device *adev, void *cpu_pt_addr, uint32_t gpu_page_idx, uint64_t addr, - uint32_t flags) + uint64_t flags) { void __iomem *ptr = (void *)cpu_pt_addr; uint64_t value; @@ -565,6 +638,23 @@ static int gmc_v8_0_gart_set_pte_pde(struct amdgpu_device *adev, return 0; } +static uint64_t gmc_v8_0_get_vm_pte_flags(struct amdgpu_device *adev, + uint32_t flags) +{ + uint64_t pte_flag = 0; + + if (flags & AMDGPU_VM_PAGE_EXECUTABLE) + pte_flag |= AMDGPU_PTE_EXECUTABLE; + if (flags & AMDGPU_VM_PAGE_READABLE) + pte_flag |= AMDGPU_PTE_READABLE; + if (flags & AMDGPU_VM_PAGE_WRITEABLE) + pte_flag |= AMDGPU_PTE_WRITEABLE; + if (flags & AMDGPU_VM_PAGE_PRT) + pte_flag |= AMDGPU_PTE_PRT; + + return pte_flag; +} + /** * gmc_v8_0_set_fault_enable_default - update VM fault handling * @@ -595,6 +685,62 @@ static void gmc_v8_0_set_fault_enable_default(struct amdgpu_device *adev, } /** + * gmc_v8_0_set_prt - set PRT VM fault + * + * @adev: amdgpu_device pointer + * @enable: enable/disable VM fault handling for PRT +*/ +static void gmc_v8_0_set_prt(struct amdgpu_device *adev, bool enable) +{ + u32 tmp; + + if (enable && !adev->mc.prt_warning) { + dev_warn(adev->dev, "Disabling VM faults because of PRT request!\n"); + adev->mc.prt_warning = true; + } + + tmp = RREG32(mmVM_PRT_CNTL); + tmp = REG_SET_FIELD(tmp, VM_PRT_CNTL, + CB_DISABLE_READ_FAULT_ON_UNMAPPED_ACCESS, enable); + tmp = REG_SET_FIELD(tmp, VM_PRT_CNTL, + CB_DISABLE_WRITE_FAULT_ON_UNMAPPED_ACCESS, enable); + tmp = REG_SET_FIELD(tmp, VM_PRT_CNTL, + TC_DISABLE_READ_FAULT_ON_UNMAPPED_ACCESS, enable); + tmp = REG_SET_FIELD(tmp, VM_PRT_CNTL, + TC_DISABLE_WRITE_FAULT_ON_UNMAPPED_ACCESS, enable); + tmp = REG_SET_FIELD(tmp, VM_PRT_CNTL, + L2_CACHE_STORE_INVALID_ENTRIES, enable); + tmp = REG_SET_FIELD(tmp, VM_PRT_CNTL, + L1_TLB_STORE_INVALID_ENTRIES, enable); + tmp = REG_SET_FIELD(tmp, VM_PRT_CNTL, + MASK_PDE0_FAULT, enable); + WREG32(mmVM_PRT_CNTL, tmp); + + if (enable) { + uint32_t low = AMDGPU_VA_RESERVED_SIZE >> AMDGPU_GPU_PAGE_SHIFT; + uint32_t high = adev->vm_manager.max_pfn; + + WREG32(mmVM_PRT_APERTURE0_LOW_ADDR, low); + WREG32(mmVM_PRT_APERTURE1_LOW_ADDR, low); + WREG32(mmVM_PRT_APERTURE2_LOW_ADDR, low); + WREG32(mmVM_PRT_APERTURE3_LOW_ADDR, low); + WREG32(mmVM_PRT_APERTURE0_HIGH_ADDR, high); + WREG32(mmVM_PRT_APERTURE1_HIGH_ADDR, high); + WREG32(mmVM_PRT_APERTURE2_HIGH_ADDR, high); + WREG32(mmVM_PRT_APERTURE3_HIGH_ADDR, high); + } else { + WREG32(mmVM_PRT_APERTURE0_LOW_ADDR, 0xfffffff); + WREG32(mmVM_PRT_APERTURE1_LOW_ADDR, 0xfffffff); + WREG32(mmVM_PRT_APERTURE2_LOW_ADDR, 0xfffffff); + WREG32(mmVM_PRT_APERTURE3_LOW_ADDR, 0xfffffff); + WREG32(mmVM_PRT_APERTURE0_HIGH_ADDR, 0x0); + WREG32(mmVM_PRT_APERTURE1_HIGH_ADDR, 0x0); + WREG32(mmVM_PRT_APERTURE2_HIGH_ADDR, 0x0); + WREG32(mmVM_PRT_APERTURE3_HIGH_ADDR, 0x0); + } +} + +/** * gmc_v8_0_gart_enable - gart enable * * @adev: amdgpu_device pointer @@ -707,7 +853,7 @@ static int gmc_v8_0_gart_enable(struct amdgpu_device *adev) tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL, WRITE_PROTECTION_FAULT_ENABLE_DEFAULT, 1); tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL, EXECUTE_PROTECTION_FAULT_ENABLE_DEFAULT, 1); tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL, PAGE_TABLE_BLOCK_SIZE, - amdgpu_vm_block_size - 9); + adev->vm_manager.block_size - 9); WREG32(mmVM_CONTEXT1_CNTL, tmp); if (amdgpu_vm_fault_stop == AMDGPU_VM_FAULT_STOP_ALWAYS) gmc_v8_0_set_fault_enable_default(adev, false); @@ -735,6 +881,7 @@ static int gmc_v8_0_gart_init(struct amdgpu_device *adev) if (r) return r; adev->gart.table_size = adev->gart.num_gpu_pages * 8; + adev->gart.gart_pte_flags = AMDGPU_PTE_EXECUTABLE; return amdgpu_gart_table_vram_alloc(adev); } @@ -803,6 +950,7 @@ static int gmc_v8_0_vm_init(struct amdgpu_device *adev) * amdkfd will use VMIDs 8-15 */ adev->vm_manager.num_ids = AMDGPU_NUM_OF_VMIDS; + adev->vm_manager.num_level = 1; amdgpu_vm_manager_init(adev); /* base offset of vram pages */ @@ -885,6 +1033,14 @@ static int gmc_v8_0_early_init(void *handle) gmc_v8_0_set_gart_funcs(adev); gmc_v8_0_set_irq_funcs(adev); + adev->mc.shared_aperture_start = 0x2000000000000000ULL; + adev->mc.shared_aperture_end = + adev->mc.shared_aperture_start + (4ULL << 30) - 1; + adev->mc.private_aperture_start = + adev->mc.shared_aperture_end + 1; + adev->mc.private_aperture_end = + adev->mc.private_aperture_start + (4ULL << 30) - 1; + return 0; } @@ -919,11 +1075,11 @@ static int gmc_v8_0_sw_init(void *handle) adev->mc.vram_type = gmc_v8_0_convert_vram_type(tmp); } - r = amdgpu_irq_add_id(adev, 146, &adev->mc.vm_fault); + r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 146, &adev->mc.vm_fault); if (r) return r; - r = amdgpu_irq_add_id(adev, 147, &adev->mc.vm_fault); + r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 147, &adev->mc.vm_fault); if (r) return r; @@ -931,7 +1087,8 @@ static int gmc_v8_0_sw_init(void *handle) * Currently set to 4GB ((1 << 20) 4k pages). * Max GPUVM size for cayman and SI is 40 bits. */ - adev->vm_manager.max_pfn = amdgpu_vm_size << 18; + amdgpu_vm_adjust_size(adev, 64); + adev->vm_manager.max_pfn = adev->vm_manager.vm_size << 18; /* Set the internal MC address mask * This is the max address of the GPU's @@ -950,12 +1107,12 @@ static int gmc_v8_0_sw_init(void *handle) if (r) { adev->need_dma32 = true; dma_bits = 32; - printk(KERN_WARNING "amdgpu: No suitable DMA available.\n"); + pr_warn("amdgpu: No suitable DMA available\n"); } r = pci_set_consistent_dma_mask(adev->pdev, DMA_BIT_MASK(dma_bits)); if (r) { pci_set_consistent_dma_mask(adev->pdev, DMA_BIT_MASK(32)); - printk(KERN_WARNING "amdgpu: No coherent DMA available.\n"); + pr_warn("amdgpu: No coherent DMA available\n"); } r = gmc_v8_0_init_microcode(adev); @@ -1015,7 +1172,15 @@ static int gmc_v8_0_hw_init(void *handle) gmc_v8_0_mc_program(adev); if (adev->asic_type == CHIP_TONGA) { - r = gmc_v8_0_mc_load_microcode(adev); + r = gmc_v8_0_tonga_mc_load_microcode(adev); + if (r) { + DRM_ERROR("Failed to load MC firmware!\n"); + return r; + } + } else if (adev->asic_type == CHIP_POLARIS11 || + adev->asic_type == CHIP_POLARIS10 || + adev->asic_type == CHIP_POLARIS12) { + r = gmc_v8_0_polaris_mc_load_microcode(adev); if (r) { DRM_ERROR("Failed to load MC firmware!\n"); return r; @@ -1237,6 +1402,13 @@ static int gmc_v8_0_process_interrupt(struct amdgpu_device *adev, { u32 addr, status, mc_client; + if (amdgpu_sriov_vf(adev)) { + dev_err(adev->dev, "GPU fault detected: %d 0x%08x\n", + entry->src_id, entry->src_data[0]); + dev_err(adev->dev, " Can't decode VM fault info here on SRIOV VF\n"); + return 0; + } + addr = RREG32(mmVM_CONTEXT1_PROTECTION_FAULT_ADDR); status = RREG32(mmVM_CONTEXT1_PROTECTION_FAULT_STATUS); mc_client = RREG32(mmVM_CONTEXT1_PROTECTION_FAULT_MCCLIENT); @@ -1251,7 +1423,7 @@ static int gmc_v8_0_process_interrupt(struct amdgpu_device *adev, if (printk_ratelimit()) { dev_err(adev->dev, "GPU fault detected: %d 0x%08x\n", - entry->src_id, entry->src_data); + entry->src_id, entry->src_data[0]); dev_err(adev->dev, " VM_CONTEXT1_PROTECTION_FAULT_ADDR 0x%08X\n", addr); dev_err(adev->dev, " VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n", @@ -1427,12 +1599,15 @@ static int gmc_v8_0_set_clockgating_state(void *handle, { struct amdgpu_device *adev = (struct amdgpu_device *)handle; + if (amdgpu_sriov_vf(adev)) + return 0; + switch (adev->asic_type) { case CHIP_FIJI: fiji_update_mc_medium_grain_clock_gating(adev, - state == AMD_CG_STATE_GATE ? true : false); + state == AMD_CG_STATE_GATE); fiji_update_mc_light_sleep(adev, - state == AMD_CG_STATE_GATE ? true : false); + state == AMD_CG_STATE_GATE); break; default: break; @@ -1451,6 +1626,9 @@ static void gmc_v8_0_get_clockgating_state(void *handle, u32 *flags) struct amdgpu_device *adev = (struct amdgpu_device *)handle; int data; + if (amdgpu_sriov_vf(adev)) + *flags = 0; + /* AMD_CG_SUPPORT_MC_MGCG */ data = RREG32(mmMC_HUB_MISC_HUB_CG); if (data & MC_HUB_MISC_HUB_CG__ENABLE_MASK) @@ -1485,6 +1663,8 @@ static const struct amd_ip_funcs gmc_v8_0_ip_funcs = { static const struct amdgpu_gart_funcs gmc_v8_0_gart_funcs = { .flush_gpu_tlb = gmc_v8_0_gart_flush_gpu_tlb, .set_pte_pde = gmc_v8_0_gart_set_pte_pde, + .set_prt = gmc_v8_0_set_prt, + .get_vm_pte_flags = gmc_v8_0_get_vm_pte_flags }; static const struct amdgpu_irq_src_funcs gmc_v8_0_irq_funcs = { diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c new file mode 100644 index 000000000000..3b045e0b114e --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c @@ -0,0 +1,860 @@ +/* + * Copyright 2016 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ +#include <linux/firmware.h> +#include "amdgpu.h" +#include "gmc_v9_0.h" + +#include "vega10/soc15ip.h" +#include "vega10/HDP/hdp_4_0_offset.h" +#include "vega10/HDP/hdp_4_0_sh_mask.h" +#include "vega10/GC/gc_9_0_sh_mask.h" +#include "vega10/vega10_enum.h" + +#include "soc15_common.h" + +#include "nbio_v6_1.h" +#include "gfxhub_v1_0.h" +#include "mmhub_v1_0.h" + +#define mmDF_CS_AON0_DramBaseAddress0 0x0044 +#define mmDF_CS_AON0_DramBaseAddress0_BASE_IDX 0 +//DF_CS_AON0_DramBaseAddress0 +#define DF_CS_AON0_DramBaseAddress0__AddrRngVal__SHIFT 0x0 +#define DF_CS_AON0_DramBaseAddress0__LgcyMmioHoleEn__SHIFT 0x1 +#define DF_CS_AON0_DramBaseAddress0__IntLvNumChan__SHIFT 0x4 +#define DF_CS_AON0_DramBaseAddress0__IntLvAddrSel__SHIFT 0x8 +#define DF_CS_AON0_DramBaseAddress0__DramBaseAddr__SHIFT 0xc +#define DF_CS_AON0_DramBaseAddress0__AddrRngVal_MASK 0x00000001L +#define DF_CS_AON0_DramBaseAddress0__LgcyMmioHoleEn_MASK 0x00000002L +#define DF_CS_AON0_DramBaseAddress0__IntLvNumChan_MASK 0x000000F0L +#define DF_CS_AON0_DramBaseAddress0__IntLvAddrSel_MASK 0x00000700L +#define DF_CS_AON0_DramBaseAddress0__DramBaseAddr_MASK 0xFFFFF000L + +/* XXX Move this macro to VEGA10 header file, which is like vid.h for VI.*/ +#define AMDGPU_NUM_OF_VMIDS 8 + +static const u32 golden_settings_vega10_hdp[] = +{ + 0xf64, 0x0fffffff, 0x00000000, + 0xf65, 0x0fffffff, 0x00000000, + 0xf66, 0x0fffffff, 0x00000000, + 0xf67, 0x0fffffff, 0x00000000, + 0xf68, 0x0fffffff, 0x00000000, + 0xf6a, 0x0fffffff, 0x00000000, + 0xf6b, 0x0fffffff, 0x00000000, + 0xf6c, 0x0fffffff, 0x00000000, + 0xf6d, 0x0fffffff, 0x00000000, + 0xf6e, 0x0fffffff, 0x00000000, +}; + +static int gmc_v9_0_vm_fault_interrupt_state(struct amdgpu_device *adev, + struct amdgpu_irq_src *src, + unsigned type, + enum amdgpu_interrupt_state state) +{ + struct amdgpu_vmhub *hub; + u32 tmp, reg, bits, i; + + bits = VM_CONTEXT1_CNTL__RANGE_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK | + VM_CONTEXT1_CNTL__DUMMY_PAGE_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK | + VM_CONTEXT1_CNTL__PDE0_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK | + VM_CONTEXT1_CNTL__VALID_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK | + VM_CONTEXT1_CNTL__READ_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK | + VM_CONTEXT1_CNTL__WRITE_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK | + VM_CONTEXT1_CNTL__EXECUTE_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK; + + switch (state) { + case AMDGPU_IRQ_STATE_DISABLE: + /* MM HUB */ + hub = &adev->vmhub[AMDGPU_MMHUB]; + for (i = 0; i< 16; i++) { + reg = hub->vm_context0_cntl + i; + tmp = RREG32(reg); + tmp &= ~bits; + WREG32(reg, tmp); + } + + /* GFX HUB */ + hub = &adev->vmhub[AMDGPU_GFXHUB]; + for (i = 0; i < 16; i++) { + reg = hub->vm_context0_cntl + i; + tmp = RREG32(reg); + tmp &= ~bits; + WREG32(reg, tmp); + } + break; + case AMDGPU_IRQ_STATE_ENABLE: + /* MM HUB */ + hub = &adev->vmhub[AMDGPU_MMHUB]; + for (i = 0; i< 16; i++) { + reg = hub->vm_context0_cntl + i; + tmp = RREG32(reg); + tmp |= bits; + WREG32(reg, tmp); + } + + /* GFX HUB */ + hub = &adev->vmhub[AMDGPU_GFXHUB]; + for (i = 0; i < 16; i++) { + reg = hub->vm_context0_cntl + i; + tmp = RREG32(reg); + tmp |= bits; + WREG32(reg, tmp); + } + break; + default: + break; + } + + return 0; +} + +static int gmc_v9_0_process_interrupt(struct amdgpu_device *adev, + struct amdgpu_irq_src *source, + struct amdgpu_iv_entry *entry) +{ + struct amdgpu_vmhub *hub = &adev->vmhub[entry->vm_id_src]; + uint32_t status = 0; + u64 addr; + + addr = (u64)entry->src_data[0] << 12; + addr |= ((u64)entry->src_data[1] & 0xf) << 44; + + if (!amdgpu_sriov_vf(adev)) { + status = RREG32(hub->vm_l2_pro_fault_status); + WREG32_P(hub->vm_l2_pro_fault_cntl, 1, ~1); + } + + if (printk_ratelimit()) { + dev_err(adev->dev, + "[%s] VMC page fault (src_id:%u ring:%u vm_id:%u pas_id:%u)\n", + entry->vm_id_src ? "mmhub" : "gfxhub", + entry->src_id, entry->ring_id, entry->vm_id, + entry->pas_id); + dev_err(adev->dev, " at page 0x%016llx from %d\n", + addr, entry->client_id); + if (!amdgpu_sriov_vf(adev)) + dev_err(adev->dev, + "VM_L2_PROTECTION_FAULT_STATUS:0x%08X\n", + status); + } + + return 0; +} + +static const struct amdgpu_irq_src_funcs gmc_v9_0_irq_funcs = { + .set = gmc_v9_0_vm_fault_interrupt_state, + .process = gmc_v9_0_process_interrupt, +}; + +static void gmc_v9_0_set_irq_funcs(struct amdgpu_device *adev) +{ + adev->mc.vm_fault.num_types = 1; + adev->mc.vm_fault.funcs = &gmc_v9_0_irq_funcs; +} + +static uint32_t gmc_v9_0_get_invalidate_req(unsigned int vm_id) +{ + u32 req = 0; + + /* invalidate using legacy mode on vm_id*/ + req = REG_SET_FIELD(req, VM_INVALIDATE_ENG0_REQ, + PER_VMID_INVALIDATE_REQ, 1 << vm_id); + req = REG_SET_FIELD(req, VM_INVALIDATE_ENG0_REQ, FLUSH_TYPE, 0); + req = REG_SET_FIELD(req, VM_INVALIDATE_ENG0_REQ, INVALIDATE_L2_PTES, 1); + req = REG_SET_FIELD(req, VM_INVALIDATE_ENG0_REQ, INVALIDATE_L2_PDE0, 1); + req = REG_SET_FIELD(req, VM_INVALIDATE_ENG0_REQ, INVALIDATE_L2_PDE1, 1); + req = REG_SET_FIELD(req, VM_INVALIDATE_ENG0_REQ, INVALIDATE_L2_PDE2, 1); + req = REG_SET_FIELD(req, VM_INVALIDATE_ENG0_REQ, INVALIDATE_L1_PTES, 1); + req = REG_SET_FIELD(req, VM_INVALIDATE_ENG0_REQ, + CLEAR_PROTECTION_FAULT_STATUS_ADDR, 0); + + return req; +} + +/* + * GART + * VMID 0 is the physical GPU addresses as used by the kernel. + * VMIDs 1-15 are used for userspace clients and are handled + * by the amdgpu vm/hsa code. + */ + +/** + * gmc_v9_0_gart_flush_gpu_tlb - gart tlb flush callback + * + * @adev: amdgpu_device pointer + * @vmid: vm instance to flush + * + * Flush the TLB for the requested page table. + */ +static void gmc_v9_0_gart_flush_gpu_tlb(struct amdgpu_device *adev, + uint32_t vmid) +{ + /* Use register 17 for GART */ + const unsigned eng = 17; + unsigned i, j; + + /* flush hdp cache */ + nbio_v6_1_hdp_flush(adev); + + spin_lock(&adev->mc.invalidate_lock); + + for (i = 0; i < AMDGPU_MAX_VMHUBS; ++i) { + struct amdgpu_vmhub *hub = &adev->vmhub[i]; + u32 tmp = gmc_v9_0_get_invalidate_req(vmid); + + WREG32_NO_KIQ(hub->vm_inv_eng0_req + eng, tmp); + + /* Busy wait for ACK.*/ + for (j = 0; j < 100; j++) { + tmp = RREG32_NO_KIQ(hub->vm_inv_eng0_ack + eng); + tmp &= 1 << vmid; + if (tmp) + break; + cpu_relax(); + } + if (j < 100) + continue; + + /* Wait for ACK with a delay.*/ + for (j = 0; j < adev->usec_timeout; j++) { + tmp = RREG32_NO_KIQ(hub->vm_inv_eng0_ack + eng); + tmp &= 1 << vmid; + if (tmp) + break; + udelay(1); + } + if (j < adev->usec_timeout) + continue; + + DRM_ERROR("Timeout waiting for VM flush ACK!\n"); + } + + spin_unlock(&adev->mc.invalidate_lock); +} + +/** + * gmc_v9_0_gart_set_pte_pde - update the page tables using MMIO + * + * @adev: amdgpu_device pointer + * @cpu_pt_addr: cpu address of the page table + * @gpu_page_idx: entry in the page table to update + * @addr: dst addr to write into pte/pde + * @flags: access flags + * + * Update the page tables using the CPU. + */ +static int gmc_v9_0_gart_set_pte_pde(struct amdgpu_device *adev, + void *cpu_pt_addr, + uint32_t gpu_page_idx, + uint64_t addr, + uint64_t flags) +{ + void __iomem *ptr = (void *)cpu_pt_addr; + uint64_t value; + + /* + * PTE format on VEGA 10: + * 63:59 reserved + * 58:57 mtype + * 56 F + * 55 L + * 54 P + * 53 SW + * 52 T + * 50:48 reserved + * 47:12 4k physical page base address + * 11:7 fragment + * 6 write + * 5 read + * 4 exe + * 3 Z + * 2 snooped + * 1 system + * 0 valid + * + * PDE format on VEGA 10: + * 63:59 block fragment size + * 58:55 reserved + * 54 P + * 53:48 reserved + * 47:6 physical base address of PD or PTE + * 5:3 reserved + * 2 C + * 1 system + * 0 valid + */ + + /* + * The following is for PTE only. GART does not have PDEs. + */ + value = addr & 0x0000FFFFFFFFF000ULL; + value |= flags; + writeq(value, ptr + (gpu_page_idx * 8)); + return 0; +} + +static uint64_t gmc_v9_0_get_vm_pte_flags(struct amdgpu_device *adev, + uint32_t flags) + +{ + uint64_t pte_flag = 0; + + if (flags & AMDGPU_VM_PAGE_EXECUTABLE) + pte_flag |= AMDGPU_PTE_EXECUTABLE; + if (flags & AMDGPU_VM_PAGE_READABLE) + pte_flag |= AMDGPU_PTE_READABLE; + if (flags & AMDGPU_VM_PAGE_WRITEABLE) + pte_flag |= AMDGPU_PTE_WRITEABLE; + + switch (flags & AMDGPU_VM_MTYPE_MASK) { + case AMDGPU_VM_MTYPE_DEFAULT: + pte_flag |= AMDGPU_PTE_MTYPE(MTYPE_NC); + break; + case AMDGPU_VM_MTYPE_NC: + pte_flag |= AMDGPU_PTE_MTYPE(MTYPE_NC); + break; + case AMDGPU_VM_MTYPE_WC: + pte_flag |= AMDGPU_PTE_MTYPE(MTYPE_WC); + break; + case AMDGPU_VM_MTYPE_CC: + pte_flag |= AMDGPU_PTE_MTYPE(MTYPE_CC); + break; + case AMDGPU_VM_MTYPE_UC: + pte_flag |= AMDGPU_PTE_MTYPE(MTYPE_UC); + break; + default: + pte_flag |= AMDGPU_PTE_MTYPE(MTYPE_NC); + break; + } + + if (flags & AMDGPU_VM_PAGE_PRT) + pte_flag |= AMDGPU_PTE_PRT; + + return pte_flag; +} + +static u64 gmc_v9_0_adjust_mc_addr(struct amdgpu_device *adev, u64 mc_addr) +{ + return adev->vm_manager.vram_base_offset + mc_addr - adev->mc.vram_start; +} + +static const struct amdgpu_gart_funcs gmc_v9_0_gart_funcs = { + .flush_gpu_tlb = gmc_v9_0_gart_flush_gpu_tlb, + .set_pte_pde = gmc_v9_0_gart_set_pte_pde, + .get_vm_pte_flags = gmc_v9_0_get_vm_pte_flags, + .adjust_mc_addr = gmc_v9_0_adjust_mc_addr, + .get_invalidate_req = gmc_v9_0_get_invalidate_req, +}; + +static void gmc_v9_0_set_gart_funcs(struct amdgpu_device *adev) +{ + if (adev->gart.gart_funcs == NULL) + adev->gart.gart_funcs = &gmc_v9_0_gart_funcs; +} + +static int gmc_v9_0_early_init(void *handle) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + + gmc_v9_0_set_gart_funcs(adev); + gmc_v9_0_set_irq_funcs(adev); + + return 0; +} + +static int gmc_v9_0_late_init(void *handle) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + return amdgpu_irq_get(adev, &adev->mc.vm_fault, 0); +} + +static void gmc_v9_0_vram_gtt_location(struct amdgpu_device *adev, + struct amdgpu_mc *mc) +{ + u64 base = 0; + if (!amdgpu_sriov_vf(adev)) + base = mmhub_v1_0_get_fb_location(adev); + amdgpu_vram_location(adev, &adev->mc, base); + adev->mc.gtt_base_align = 0; + amdgpu_gtt_location(adev, mc); +} + +/** + * gmc_v9_0_mc_init - initialize the memory controller driver params + * + * @adev: amdgpu_device pointer + * + * Look up the amount of vram, vram width, and decide how to place + * vram and gart within the GPU's physical address space. + * Returns 0 for success. + */ +static int gmc_v9_0_mc_init(struct amdgpu_device *adev) +{ + u32 tmp; + int chansize, numchan; + + /* hbm memory channel size */ + chansize = 128; + + tmp = RREG32(SOC15_REG_OFFSET(DF, 0, mmDF_CS_AON0_DramBaseAddress0)); + tmp &= DF_CS_AON0_DramBaseAddress0__IntLvNumChan_MASK; + tmp >>= DF_CS_AON0_DramBaseAddress0__IntLvNumChan__SHIFT; + switch (tmp) { + case 0: + default: + numchan = 1; + break; + case 1: + numchan = 2; + break; + case 2: + numchan = 0; + break; + case 3: + numchan = 4; + break; + case 4: + numchan = 0; + break; + case 5: + numchan = 8; + break; + case 6: + numchan = 0; + break; + case 7: + numchan = 16; + break; + case 8: + numchan = 2; + break; + } + adev->mc.vram_width = numchan * chansize; + + /* Could aper size report 0 ? */ + adev->mc.aper_base = pci_resource_start(adev->pdev, 0); + adev->mc.aper_size = pci_resource_len(adev->pdev, 0); + /* size in MB on si */ + adev->mc.mc_vram_size = + nbio_v6_1_get_memsize(adev) * 1024ULL * 1024ULL; + adev->mc.real_vram_size = adev->mc.mc_vram_size; + adev->mc.visible_vram_size = adev->mc.aper_size; + + /* In case the PCI BAR is larger than the actual amount of vram */ + if (adev->mc.visible_vram_size > adev->mc.real_vram_size) + adev->mc.visible_vram_size = adev->mc.real_vram_size; + + /* unless the user had overridden it, set the gart + * size equal to the 1024 or vram, whichever is larger. + */ + if (amdgpu_gart_size == -1) + adev->mc.gtt_size = max((1024ULL << 20), adev->mc.mc_vram_size); + else + adev->mc.gtt_size = (uint64_t)amdgpu_gart_size << 20; + + gmc_v9_0_vram_gtt_location(adev, &adev->mc); + + return 0; +} + +static int gmc_v9_0_gart_init(struct amdgpu_device *adev) +{ + int r; + + if (adev->gart.robj) { + WARN(1, "VEGA10 PCIE GART already initialized\n"); + return 0; + } + /* Initialize common gart structure */ + r = amdgpu_gart_init(adev); + if (r) + return r; + adev->gart.table_size = adev->gart.num_gpu_pages * 8; + adev->gart.gart_pte_flags = AMDGPU_PTE_MTYPE(MTYPE_UC) | + AMDGPU_PTE_EXECUTABLE; + return amdgpu_gart_table_vram_alloc(adev); +} + +/* + * vm + * VMID 0 is the physical GPU addresses as used by the kernel. + * VMIDs 1-15 are used for userspace clients and are handled + * by the amdgpu vm/hsa code. + */ +/** + * gmc_v9_0_vm_init - vm init callback + * + * @adev: amdgpu_device pointer + * + * Inits vega10 specific vm parameters (number of VMs, base of vram for + * VMIDs 1-15) (vega10). + * Returns 0 for success. + */ +static int gmc_v9_0_vm_init(struct amdgpu_device *adev) +{ + /* + * number of VMs + * VMID 0 is reserved for System + * amdgpu graphics/compute will use VMIDs 1-7 + * amdkfd will use VMIDs 8-15 + */ + adev->vm_manager.num_ids = AMDGPU_NUM_OF_VMIDS; + + /* TODO: fix num_level for APU when updating vm size and block size */ + if (adev->flags & AMD_IS_APU) + adev->vm_manager.num_level = 1; + else + adev->vm_manager.num_level = 3; + amdgpu_vm_manager_init(adev); + + /* base offset of vram pages */ + /*XXX This value is not zero for APU*/ + adev->vm_manager.vram_base_offset = 0; + + return 0; +} + +/** + * gmc_v9_0_vm_fini - vm fini callback + * + * @adev: amdgpu_device pointer + * + * Tear down any asic specific VM setup. + */ +static void gmc_v9_0_vm_fini(struct amdgpu_device *adev) +{ + return; +} + +static int gmc_v9_0_sw_init(void *handle) +{ + int r; + int dma_bits; + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + + spin_lock_init(&adev->mc.invalidate_lock); + + if (adev->flags & AMD_IS_APU) { + adev->mc.vram_type = AMDGPU_VRAM_TYPE_UNKNOWN; + amdgpu_vm_adjust_size(adev, 64); + } else { + /* XXX Don't know how to get VRAM type yet. */ + adev->mc.vram_type = AMDGPU_VRAM_TYPE_HBM; + /* + * To fulfill 4-level page support, + * vm size is 256TB (48bit), maximum size of Vega10, + * block size 512 (9bit) + */ + adev->vm_manager.vm_size = 1U << 18; + adev->vm_manager.block_size = 9; + DRM_INFO("vm size is %llu GB, block size is %u-bit\n", + adev->vm_manager.vm_size, + adev->vm_manager.block_size); + } + + /* This interrupt is VMC page fault.*/ + r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_VMC, 0, + &adev->mc.vm_fault); + r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_UTCL2, 0, + &adev->mc.vm_fault); + + if (r) + return r; + + adev->vm_manager.max_pfn = adev->vm_manager.vm_size << 18; + + /* Set the internal MC address mask + * This is the max address of the GPU's + * internal address space. + */ + adev->mc.mc_mask = 0xffffffffffffULL; /* 48 bit MC */ + + /* set DMA mask + need_dma32 flags. + * PCIE - can handle 44-bits. + * IGP - can handle 44-bits + * PCI - dma32 for legacy pci gart, 44 bits on vega10 + */ + adev->need_dma32 = false; + dma_bits = adev->need_dma32 ? 32 : 44; + r = pci_set_dma_mask(adev->pdev, DMA_BIT_MASK(dma_bits)); + if (r) { + adev->need_dma32 = true; + dma_bits = 32; + printk(KERN_WARNING "amdgpu: No suitable DMA available.\n"); + } + r = pci_set_consistent_dma_mask(adev->pdev, DMA_BIT_MASK(dma_bits)); + if (r) { + pci_set_consistent_dma_mask(adev->pdev, DMA_BIT_MASK(32)); + printk(KERN_WARNING "amdgpu: No coherent DMA available.\n"); + } + + r = gmc_v9_0_mc_init(adev); + if (r) + return r; + + /* Memory manager */ + r = amdgpu_bo_init(adev); + if (r) + return r; + + r = gmc_v9_0_gart_init(adev); + if (r) + return r; + + if (!adev->vm_manager.enabled) { + r = gmc_v9_0_vm_init(adev); + if (r) { + dev_err(adev->dev, "vm manager initialization failed (%d).\n", r); + return r; + } + adev->vm_manager.enabled = true; + } + return r; +} + +/** + * gmc_v8_0_gart_fini - vm fini callback + * + * @adev: amdgpu_device pointer + * + * Tears down the driver GART/VM setup (CIK). + */ +static void gmc_v9_0_gart_fini(struct amdgpu_device *adev) +{ + amdgpu_gart_table_vram_free(adev); + amdgpu_gart_fini(adev); +} + +static int gmc_v9_0_sw_fini(void *handle) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + + if (adev->vm_manager.enabled) { + amdgpu_vm_manager_fini(adev); + gmc_v9_0_vm_fini(adev); + adev->vm_manager.enabled = false; + } + gmc_v9_0_gart_fini(adev); + amdgpu_gem_force_release(adev); + amdgpu_bo_fini(adev); + + return 0; +} + +static void gmc_v9_0_init_golden_registers(struct amdgpu_device *adev) +{ + switch (adev->asic_type) { + case CHIP_VEGA10: + break; + default: + break; + } +} + +/** + * gmc_v9_0_gart_enable - gart enable + * + * @adev: amdgpu_device pointer + */ +static int gmc_v9_0_gart_enable(struct amdgpu_device *adev) +{ + int r; + bool value; + u32 tmp; + + amdgpu_program_register_sequence(adev, + golden_settings_vega10_hdp, + (const u32)ARRAY_SIZE(golden_settings_vega10_hdp)); + + if (adev->gart.robj == NULL) { + dev_err(adev->dev, "No VRAM object for PCIE GART.\n"); + return -EINVAL; + } + r = amdgpu_gart_table_vram_pin(adev); + if (r) + return r; + + /* After HDP is initialized, flush HDP.*/ + nbio_v6_1_hdp_flush(adev); + + r = gfxhub_v1_0_gart_enable(adev); + if (r) + return r; + + r = mmhub_v1_0_gart_enable(adev); + if (r) + return r; + + tmp = RREG32(SOC15_REG_OFFSET(HDP, 0, mmHDP_MISC_CNTL)); + tmp |= HDP_MISC_CNTL__FLUSH_INVALIDATE_CACHE_MASK; + WREG32(SOC15_REG_OFFSET(HDP, 0, mmHDP_MISC_CNTL), tmp); + + tmp = RREG32(SOC15_REG_OFFSET(HDP, 0, mmHDP_HOST_PATH_CNTL)); + WREG32(SOC15_REG_OFFSET(HDP, 0, mmHDP_HOST_PATH_CNTL), tmp); + + + if (amdgpu_vm_fault_stop == AMDGPU_VM_FAULT_STOP_ALWAYS) + value = false; + else + value = true; + + gfxhub_v1_0_set_fault_enable_default(adev, value); + mmhub_v1_0_set_fault_enable_default(adev, value); + + gmc_v9_0_gart_flush_gpu_tlb(adev, 0); + + DRM_INFO("PCIE GART of %uM enabled (table at 0x%016llX).\n", + (unsigned)(adev->mc.gtt_size >> 20), + (unsigned long long)adev->gart.table_addr); + adev->gart.ready = true; + return 0; +} + +static int gmc_v9_0_hw_init(void *handle) +{ + int r; + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + + /* The sequence of these two function calls matters.*/ + gmc_v9_0_init_golden_registers(adev); + + r = gmc_v9_0_gart_enable(adev); + + return r; +} + +/** + * gmc_v9_0_gart_disable - gart disable + * + * @adev: amdgpu_device pointer + * + * This disables all VM page table. + */ +static void gmc_v9_0_gart_disable(struct amdgpu_device *adev) +{ + gfxhub_v1_0_gart_disable(adev); + mmhub_v1_0_gart_disable(adev); + amdgpu_gart_table_vram_unpin(adev); +} + +static int gmc_v9_0_hw_fini(void *handle) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + + amdgpu_irq_put(adev, &adev->mc.vm_fault, 0); + gmc_v9_0_gart_disable(adev); + + return 0; +} + +static int gmc_v9_0_suspend(void *handle) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + + if (adev->vm_manager.enabled) { + gmc_v9_0_vm_fini(adev); + adev->vm_manager.enabled = false; + } + gmc_v9_0_hw_fini(adev); + + return 0; +} + +static int gmc_v9_0_resume(void *handle) +{ + int r; + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + + r = gmc_v9_0_hw_init(adev); + if (r) + return r; + + if (!adev->vm_manager.enabled) { + r = gmc_v9_0_vm_init(adev); + if (r) { + dev_err(adev->dev, + "vm manager initialization failed (%d).\n", r); + return r; + } + adev->vm_manager.enabled = true; + } + + return r; +} + +static bool gmc_v9_0_is_idle(void *handle) +{ + /* MC is always ready in GMC v9.*/ + return true; +} + +static int gmc_v9_0_wait_for_idle(void *handle) +{ + /* There is no need to wait for MC idle in GMC v9.*/ + return 0; +} + +static int gmc_v9_0_soft_reset(void *handle) +{ + /* XXX for emulation.*/ + return 0; +} + +static int gmc_v9_0_set_clockgating_state(void *handle, + enum amd_clockgating_state state) +{ + return 0; +} + +static int gmc_v9_0_set_powergating_state(void *handle, + enum amd_powergating_state state) +{ + return 0; +} + +const struct amd_ip_funcs gmc_v9_0_ip_funcs = { + .name = "gmc_v9_0", + .early_init = gmc_v9_0_early_init, + .late_init = gmc_v9_0_late_init, + .sw_init = gmc_v9_0_sw_init, + .sw_fini = gmc_v9_0_sw_fini, + .hw_init = gmc_v9_0_hw_init, + .hw_fini = gmc_v9_0_hw_fini, + .suspend = gmc_v9_0_suspend, + .resume = gmc_v9_0_resume, + .is_idle = gmc_v9_0_is_idle, + .wait_for_idle = gmc_v9_0_wait_for_idle, + .soft_reset = gmc_v9_0_soft_reset, + .set_clockgating_state = gmc_v9_0_set_clockgating_state, + .set_powergating_state = gmc_v9_0_set_powergating_state, +}; + +const struct amdgpu_ip_block_version gmc_v9_0_ip_block = +{ + .type = AMD_IP_BLOCK_TYPE_GMC, + .major = 9, + .minor = 0, + .rev = 0, + .funcs = &gmc_v9_0_ip_funcs, +}; diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.h b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.h new file mode 100644 index 000000000000..b030ca5ea107 --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.h @@ -0,0 +1,30 @@ +/* + * Copyright 2016 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#ifndef __GMC_V9_0_H__ +#define __GMC_V9_0_H__ + +extern const struct amd_ip_funcs gmc_v9_0_ip_funcs; +extern const struct amdgpu_ip_block_version gmc_v9_0_ip_block; + +#endif diff --git a/drivers/gpu/drm/amd/amdgpu/iceland_ih.c b/drivers/gpu/drm/amd/amdgpu/iceland_ih.c index ac21bb7bc0f3..cb622add99a7 100644 --- a/drivers/gpu/drm/amd/amdgpu/iceland_ih.c +++ b/drivers/gpu/drm/amd/amdgpu/iceland_ih.c @@ -227,8 +227,9 @@ static void iceland_ih_decode_iv(struct amdgpu_device *adev, dw[2] = le32_to_cpu(adev->irq.ih.ring[ring_index + 2]); dw[3] = le32_to_cpu(adev->irq.ih.ring[ring_index + 3]); + entry->client_id = AMDGPU_IH_CLIENTID_LEGACY; entry->src_id = dw[0] & 0xff; - entry->src_data = dw[1] & 0xfffffff; + entry->src_data[0] = dw[1] & 0xfffffff; entry->ring_id = dw[2] & 0xff; entry->vm_id = (dw[2] >> 8) & 0xff; entry->pas_id = (dw[2] >> 16) & 0xffff; diff --git a/drivers/gpu/drm/amd/amdgpu/kv_dpm.c b/drivers/gpu/drm/amd/amdgpu/kv_dpm.c index f5a343cb0010..79a52ad2c80d 100644 --- a/drivers/gpu/drm/amd/amdgpu/kv_dpm.c +++ b/drivers/gpu/drm/amd/amdgpu/kv_dpm.c @@ -2981,11 +2981,13 @@ static int kv_dpm_sw_init(void *handle) int ret; struct amdgpu_device *adev = (struct amdgpu_device *)handle; - ret = amdgpu_irq_add_id(adev, 230, &adev->pm.dpm.thermal.irq); + ret = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 230, + &adev->pm.dpm.thermal.irq); if (ret) return ret; - ret = amdgpu_irq_add_id(adev, 231, &adev->pm.dpm.thermal.irq); + ret = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 231, + &adev->pm.dpm.thermal.irq); if (ret) return ret; @@ -3260,6 +3262,39 @@ static int kv_check_state_equal(struct amdgpu_device *adev, return 0; } +static int kv_dpm_read_sensor(struct amdgpu_device *adev, int idx, + void *value, int *size) +{ + struct kv_power_info *pi = kv_get_pi(adev); + uint32_t sclk; + u32 pl_index = + (RREG32_SMC(ixTARGET_AND_CURRENT_PROFILE_INDEX) & + TARGET_AND_CURRENT_PROFILE_INDEX__CURR_SCLK_INDEX_MASK) >> + TARGET_AND_CURRENT_PROFILE_INDEX__CURR_SCLK_INDEX__SHIFT; + + /* size must be at least 4 bytes for all sensors */ + if (*size < 4) + return -EINVAL; + + switch (idx) { + case AMDGPU_PP_SENSOR_GFX_SCLK: + if (pl_index < SMU__NUM_SCLK_DPM_STATE) { + sclk = be32_to_cpu( + pi->graphics_level[pl_index].SclkFrequency); + *((uint32_t *)value) = sclk; + *size = 4; + return 0; + } + return -EINVAL; + case AMDGPU_PP_SENSOR_GPU_TEMP: + *((uint32_t *)value) = kv_dpm_get_temp(adev); + *size = 4; + return 0; + default: + return -EINVAL; + } +} + const struct amd_ip_funcs kv_dpm_ip_funcs = { .name = "kv_dpm", .early_init = kv_dpm_early_init, @@ -3292,6 +3327,7 @@ static const struct amdgpu_dpm_funcs kv_dpm_funcs = { .enable_bapm = &kv_dpm_enable_bapm, .get_vce_clock_state = amdgpu_get_vce_clock_state, .check_state_equal = kv_check_state_equal, + .read_sensor = &kv_dpm_read_sensor, }; static void kv_dpm_set_dpm_funcs(struct amdgpu_device *adev) diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c new file mode 100644 index 000000000000..62684510ddcd --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c @@ -0,0 +1,582 @@ +/* + * Copyright 2016 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ +#include "amdgpu.h" +#include "mmhub_v1_0.h" + +#include "vega10/soc15ip.h" +#include "vega10/MMHUB/mmhub_1_0_offset.h" +#include "vega10/MMHUB/mmhub_1_0_sh_mask.h" +#include "vega10/MMHUB/mmhub_1_0_default.h" +#include "vega10/ATHUB/athub_1_0_offset.h" +#include "vega10/ATHUB/athub_1_0_sh_mask.h" +#include "vega10/ATHUB/athub_1_0_default.h" +#include "vega10/vega10_enum.h" + +#include "soc15_common.h" + +u64 mmhub_v1_0_get_fb_location(struct amdgpu_device *adev) +{ + u64 base = RREG32(SOC15_REG_OFFSET(MMHUB, 0, mmMC_VM_FB_LOCATION_BASE)); + + base &= MC_VM_FB_LOCATION_BASE__FB_BASE_MASK; + base <<= 24; + + return base; +} + +int mmhub_v1_0_gart_enable(struct amdgpu_device *adev) +{ + u32 tmp; + u64 value; + uint64_t addr; + u32 i; + + /* Program MC. */ + /* Update configuration */ + DRM_INFO("%s -- in\n", __func__); + WREG32(SOC15_REG_OFFSET(MMHUB, 0, mmMC_VM_SYSTEM_APERTURE_LOW_ADDR), + adev->mc.vram_start >> 18); + WREG32(SOC15_REG_OFFSET(MMHUB, 0, mmMC_VM_SYSTEM_APERTURE_HIGH_ADDR), + adev->mc.vram_end >> 18); + value = adev->vram_scratch.gpu_addr - adev->mc.vram_start + + adev->vm_manager.vram_base_offset; + WREG32(SOC15_REG_OFFSET(MMHUB, 0, + mmMC_VM_SYSTEM_APERTURE_DEFAULT_ADDR_LSB), + (u32)(value >> 12)); + WREG32(SOC15_REG_OFFSET(MMHUB, 0, + mmMC_VM_SYSTEM_APERTURE_DEFAULT_ADDR_MSB), + (u32)(value >> 44)); + + if (amdgpu_sriov_vf(adev)) { + /* MC_VM_FB_LOCATION_BASE/TOP is NULL for VF, becuase they are VF copy registers so + vbios post doesn't program them, for SRIOV driver need to program them */ + WREG32(SOC15_REG_OFFSET(MMHUB, 0, mmMC_VM_FB_LOCATION_BASE), + adev->mc.vram_start >> 24); + WREG32(SOC15_REG_OFFSET(MMHUB, 0, mmMC_VM_FB_LOCATION_TOP), + adev->mc.vram_end >> 24); + } + + /* Disable AGP. */ + WREG32(SOC15_REG_OFFSET(MMHUB, 0, mmMC_VM_AGP_BASE), 0); + WREG32(SOC15_REG_OFFSET(MMHUB, 0, mmMC_VM_AGP_TOP), 0); + WREG32(SOC15_REG_OFFSET(MMHUB, 0, mmMC_VM_AGP_BOT), 0x00FFFFFF); + + /* GART Enable. */ + + /* Setup TLB control */ + tmp = RREG32(SOC15_REG_OFFSET(MMHUB, 0, mmMC_VM_MX_L1_TLB_CNTL)); + tmp = REG_SET_FIELD(tmp, MC_VM_MX_L1_TLB_CNTL, ENABLE_L1_TLB, 1); + tmp = REG_SET_FIELD(tmp, + MC_VM_MX_L1_TLB_CNTL, + SYSTEM_ACCESS_MODE, + 3); + tmp = REG_SET_FIELD(tmp, + MC_VM_MX_L1_TLB_CNTL, + ENABLE_ADVANCED_DRIVER_MODEL, + 1); + tmp = REG_SET_FIELD(tmp, + MC_VM_MX_L1_TLB_CNTL, + SYSTEM_APERTURE_UNMAPPED_ACCESS, + 0); + tmp = REG_SET_FIELD(tmp, + MC_VM_MX_L1_TLB_CNTL, + ECO_BITS, + 0); + tmp = REG_SET_FIELD(tmp, + MC_VM_MX_L1_TLB_CNTL, + MTYPE, + MTYPE_UC);/* XXX for emulation. */ + tmp = REG_SET_FIELD(tmp, + MC_VM_MX_L1_TLB_CNTL, + ATC_EN, + 1); + WREG32(SOC15_REG_OFFSET(MMHUB, 0, mmMC_VM_MX_L1_TLB_CNTL), tmp); + + /* Setup L2 cache */ + tmp = RREG32(SOC15_REG_OFFSET(MMHUB, 0, mmVM_L2_CNTL)); + tmp = REG_SET_FIELD(tmp, VM_L2_CNTL, ENABLE_L2_CACHE, 1); + tmp = REG_SET_FIELD(tmp, + VM_L2_CNTL, + ENABLE_L2_FRAGMENT_PROCESSING, + 0); + tmp = REG_SET_FIELD(tmp, + VM_L2_CNTL, + L2_PDE0_CACHE_TAG_GENERATION_MODE, + 0);/* XXX for emulation, Refer to closed source code.*/ + tmp = REG_SET_FIELD(tmp, VM_L2_CNTL, PDE_FAULT_CLASSIFICATION, 1); + tmp = REG_SET_FIELD(tmp, + VM_L2_CNTL, + CONTEXT1_IDENTITY_ACCESS_MODE, + 1); + tmp = REG_SET_FIELD(tmp, + VM_L2_CNTL, + IDENTITY_MODE_FRAGMENT_SIZE, + 0); + WREG32(SOC15_REG_OFFSET(MMHUB, 0, mmVM_L2_CNTL), tmp); + + tmp = RREG32(SOC15_REG_OFFSET(MMHUB, 0, mmVM_L2_CNTL2)); + tmp = REG_SET_FIELD(tmp, VM_L2_CNTL2, INVALIDATE_ALL_L1_TLBS, 1); + tmp = REG_SET_FIELD(tmp, VM_L2_CNTL2, INVALIDATE_L2_CACHE, 1); + WREG32(SOC15_REG_OFFSET(MMHUB, 0, mmVM_L2_CNTL2), tmp); + + tmp = mmVM_L2_CNTL3_DEFAULT; + WREG32(SOC15_REG_OFFSET(MMHUB, 0, mmVM_L2_CNTL3), tmp); + + tmp = RREG32(SOC15_REG_OFFSET(MMHUB, 0, mmVM_L2_CNTL4)); + tmp = REG_SET_FIELD(tmp, + VM_L2_CNTL4, + VMC_TAP_PDE_REQUEST_PHYSICAL, + 0); + tmp = REG_SET_FIELD(tmp, + VM_L2_CNTL4, + VMC_TAP_PTE_REQUEST_PHYSICAL, + 0); + WREG32(SOC15_REG_OFFSET(MMHUB, 0, mmVM_L2_CNTL4), tmp); + + /* setup context0 */ + WREG32(SOC15_REG_OFFSET(MMHUB, 0, + mmVM_CONTEXT0_PAGE_TABLE_START_ADDR_LO32), + (u32)(adev->mc.gtt_start >> 12)); + WREG32(SOC15_REG_OFFSET(MMHUB, 0, + mmVM_CONTEXT0_PAGE_TABLE_START_ADDR_HI32), + (u32)(adev->mc.gtt_start >> 44)); + + WREG32(SOC15_REG_OFFSET(MMHUB, 0, + mmVM_CONTEXT0_PAGE_TABLE_END_ADDR_LO32), + (u32)(adev->mc.gtt_end >> 12)); + WREG32(SOC15_REG_OFFSET(MMHUB, 0, + mmVM_CONTEXT0_PAGE_TABLE_END_ADDR_HI32), + (u32)(adev->mc.gtt_end >> 44)); + + BUG_ON(adev->gart.table_addr & (~0x0000FFFFFFFFF000ULL)); + value = adev->gart.table_addr - adev->mc.vram_start + + adev->vm_manager.vram_base_offset; + value &= 0x0000FFFFFFFFF000ULL; + value |= 0x1; /* valid bit */ + + WREG32(SOC15_REG_OFFSET(MMHUB, 0, + mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32), + (u32)value); + WREG32(SOC15_REG_OFFSET(MMHUB, 0, + mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_HI32), + (u32)(value >> 32)); + + WREG32(SOC15_REG_OFFSET(MMHUB, 0, + mmVM_L2_PROTECTION_FAULT_DEFAULT_ADDR_LO32), + (u32)(adev->dummy_page.addr >> 12)); + WREG32(SOC15_REG_OFFSET(MMHUB, 0, + mmVM_L2_PROTECTION_FAULT_DEFAULT_ADDR_HI32), + (u32)((u64)adev->dummy_page.addr >> 44)); + + tmp = RREG32(SOC15_REG_OFFSET(MMHUB, 0, mmVM_L2_PROTECTION_FAULT_CNTL2)); + tmp = REG_SET_FIELD(tmp, VM_L2_PROTECTION_FAULT_CNTL2, + ACTIVE_PAGE_MIGRATION_PTE_READ_RETRY, + 1); + WREG32(SOC15_REG_OFFSET(MMHUB, 0, mmVM_L2_PROTECTION_FAULT_CNTL2), tmp); + + addr = SOC15_REG_OFFSET(MMHUB, 0, mmVM_CONTEXT0_CNTL); + tmp = RREG32(addr); + + tmp = REG_SET_FIELD(tmp, VM_CONTEXT0_CNTL, ENABLE_CONTEXT, 1); + tmp = REG_SET_FIELD(tmp, VM_CONTEXT0_CNTL, PAGE_TABLE_DEPTH, 0); + WREG32(SOC15_REG_OFFSET(MMHUB, 0, mmVM_CONTEXT0_CNTL), tmp); + + tmp = RREG32(addr); + + /* Disable identity aperture.*/ + WREG32(SOC15_REG_OFFSET(MMHUB, 0, + mmVM_L2_CONTEXT1_IDENTITY_APERTURE_LOW_ADDR_LO32), 0XFFFFFFFF); + WREG32(SOC15_REG_OFFSET(MMHUB, 0, + mmVM_L2_CONTEXT1_IDENTITY_APERTURE_LOW_ADDR_HI32), 0x0000000F); + + WREG32(SOC15_REG_OFFSET(MMHUB, 0, + mmVM_L2_CONTEXT1_IDENTITY_APERTURE_HIGH_ADDR_LO32), 0); + WREG32(SOC15_REG_OFFSET(MMHUB, 0, + mmVM_L2_CONTEXT1_IDENTITY_APERTURE_HIGH_ADDR_HI32), 0); + + WREG32(SOC15_REG_OFFSET(MMHUB, 0, + mmVM_L2_CONTEXT_IDENTITY_PHYSICAL_OFFSET_LO32), 0); + WREG32(SOC15_REG_OFFSET(MMHUB, 0, + mmVM_L2_CONTEXT_IDENTITY_PHYSICAL_OFFSET_HI32), 0); + + for (i = 0; i <= 14; i++) { + tmp = RREG32(SOC15_REG_OFFSET(MMHUB, 0, mmVM_CONTEXT1_CNTL) + + i); + tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL, + ENABLE_CONTEXT, 1); + tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL, + PAGE_TABLE_DEPTH, adev->vm_manager.num_level); + tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL, + RANGE_PROTECTION_FAULT_ENABLE_DEFAULT, 1); + tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL, + DUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT, 1); + tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL, + PDE0_PROTECTION_FAULT_ENABLE_DEFAULT, 1); + tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL, + VALID_PROTECTION_FAULT_ENABLE_DEFAULT, 1); + tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL, + READ_PROTECTION_FAULT_ENABLE_DEFAULT, 1); + tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL, + WRITE_PROTECTION_FAULT_ENABLE_DEFAULT, 1); + tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL, + EXECUTE_PROTECTION_FAULT_ENABLE_DEFAULT, 1); + tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL, + PAGE_TABLE_BLOCK_SIZE, + adev->vm_manager.block_size - 9); + WREG32(SOC15_REG_OFFSET(MMHUB, 0, mmVM_CONTEXT1_CNTL) + i, tmp); + WREG32(SOC15_REG_OFFSET(MMHUB, 0, mmVM_CONTEXT1_PAGE_TABLE_START_ADDR_LO32) + i*2, 0); + WREG32(SOC15_REG_OFFSET(MMHUB, 0, mmVM_CONTEXT1_PAGE_TABLE_START_ADDR_HI32) + i*2, 0); + WREG32(SOC15_REG_OFFSET(MMHUB, 0, mmVM_CONTEXT1_PAGE_TABLE_END_ADDR_LO32) + i*2, + lower_32_bits(adev->vm_manager.max_pfn - 1)); + WREG32(SOC15_REG_OFFSET(MMHUB, 0, mmVM_CONTEXT1_PAGE_TABLE_END_ADDR_HI32) + i*2, + upper_32_bits(adev->vm_manager.max_pfn - 1)); + } + + return 0; +} + +void mmhub_v1_0_gart_disable(struct amdgpu_device *adev) +{ + u32 tmp; + u32 i; + + /* Disable all tables */ + for (i = 0; i < 16; i++) + WREG32(SOC15_REG_OFFSET(MMHUB, 0, mmVM_CONTEXT0_CNTL) + i, 0); + + /* Setup TLB control */ + tmp = RREG32(SOC15_REG_OFFSET(MMHUB, 0, mmMC_VM_MX_L1_TLB_CNTL)); + tmp = REG_SET_FIELD(tmp, MC_VM_MX_L1_TLB_CNTL, ENABLE_L1_TLB, 0); + tmp = REG_SET_FIELD(tmp, + MC_VM_MX_L1_TLB_CNTL, + ENABLE_ADVANCED_DRIVER_MODEL, + 0); + WREG32(SOC15_REG_OFFSET(MMHUB, 0, mmMC_VM_MX_L1_TLB_CNTL), tmp); + + /* Setup L2 cache */ + tmp = RREG32(SOC15_REG_OFFSET(MMHUB, 0, mmVM_L2_CNTL)); + tmp = REG_SET_FIELD(tmp, VM_L2_CNTL, ENABLE_L2_CACHE, 0); + WREG32(SOC15_REG_OFFSET(MMHUB, 0, mmVM_L2_CNTL), tmp); + WREG32(SOC15_REG_OFFSET(MMHUB, 0, mmVM_L2_CNTL3), 0); +} + +/** + * mmhub_v1_0_set_fault_enable_default - update GART/VM fault handling + * + * @adev: amdgpu_device pointer + * @value: true redirects VM faults to the default page + */ +void mmhub_v1_0_set_fault_enable_default(struct amdgpu_device *adev, bool value) +{ + u32 tmp; + tmp = RREG32(SOC15_REG_OFFSET(MMHUB, 0, mmVM_L2_PROTECTION_FAULT_CNTL)); + tmp = REG_SET_FIELD(tmp, VM_L2_PROTECTION_FAULT_CNTL, + RANGE_PROTECTION_FAULT_ENABLE_DEFAULT, value); + tmp = REG_SET_FIELD(tmp, VM_L2_PROTECTION_FAULT_CNTL, + PDE0_PROTECTION_FAULT_ENABLE_DEFAULT, value); + tmp = REG_SET_FIELD(tmp, VM_L2_PROTECTION_FAULT_CNTL, + PDE1_PROTECTION_FAULT_ENABLE_DEFAULT, value); + tmp = REG_SET_FIELD(tmp, VM_L2_PROTECTION_FAULT_CNTL, + PDE2_PROTECTION_FAULT_ENABLE_DEFAULT, value); + tmp = REG_SET_FIELD(tmp, + VM_L2_PROTECTION_FAULT_CNTL, + TRANSLATE_FURTHER_PROTECTION_FAULT_ENABLE_DEFAULT, + value); + tmp = REG_SET_FIELD(tmp, VM_L2_PROTECTION_FAULT_CNTL, + NACK_PROTECTION_FAULT_ENABLE_DEFAULT, value); + tmp = REG_SET_FIELD(tmp, VM_L2_PROTECTION_FAULT_CNTL, + DUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT, value); + tmp = REG_SET_FIELD(tmp, VM_L2_PROTECTION_FAULT_CNTL, + VALID_PROTECTION_FAULT_ENABLE_DEFAULT, value); + tmp = REG_SET_FIELD(tmp, VM_L2_PROTECTION_FAULT_CNTL, + READ_PROTECTION_FAULT_ENABLE_DEFAULT, value); + tmp = REG_SET_FIELD(tmp, VM_L2_PROTECTION_FAULT_CNTL, + WRITE_PROTECTION_FAULT_ENABLE_DEFAULT, value); + tmp = REG_SET_FIELD(tmp, VM_L2_PROTECTION_FAULT_CNTL, + EXECUTE_PROTECTION_FAULT_ENABLE_DEFAULT, value); + WREG32(SOC15_REG_OFFSET(MMHUB, 0, mmVM_L2_PROTECTION_FAULT_CNTL), tmp); +} + +static int mmhub_v1_0_early_init(void *handle) +{ + return 0; +} + +static int mmhub_v1_0_late_init(void *handle) +{ + return 0; +} + +static int mmhub_v1_0_sw_init(void *handle) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB]; + + hub->ctx0_ptb_addr_lo32 = + SOC15_REG_OFFSET(MMHUB, 0, + mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32); + hub->ctx0_ptb_addr_hi32 = + SOC15_REG_OFFSET(MMHUB, 0, + mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_HI32); + hub->vm_inv_eng0_req = + SOC15_REG_OFFSET(MMHUB, 0, mmVM_INVALIDATE_ENG0_REQ); + hub->vm_inv_eng0_ack = + SOC15_REG_OFFSET(MMHUB, 0, mmVM_INVALIDATE_ENG0_ACK); + hub->vm_context0_cntl = + SOC15_REG_OFFSET(MMHUB, 0, mmVM_CONTEXT0_CNTL); + hub->vm_l2_pro_fault_status = + SOC15_REG_OFFSET(MMHUB, 0, mmVM_L2_PROTECTION_FAULT_STATUS); + hub->vm_l2_pro_fault_cntl = + SOC15_REG_OFFSET(MMHUB, 0, mmVM_L2_PROTECTION_FAULT_CNTL); + + return 0; +} + +static int mmhub_v1_0_sw_fini(void *handle) +{ + return 0; +} + +static int mmhub_v1_0_hw_init(void *handle) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + unsigned i; + + for (i = 0; i < 18; ++i) { + WREG32(SOC15_REG_OFFSET(MMHUB, 0, + mmVM_INVALIDATE_ENG0_ADDR_RANGE_LO32) + + 2 * i, 0xffffffff); + WREG32(SOC15_REG_OFFSET(MMHUB, 0, + mmVM_INVALIDATE_ENG0_ADDR_RANGE_HI32) + + 2 * i, 0x1f); + } + + return 0; +} + +static int mmhub_v1_0_hw_fini(void *handle) +{ + return 0; +} + +static int mmhub_v1_0_suspend(void *handle) +{ + return 0; +} + +static int mmhub_v1_0_resume(void *handle) +{ + return 0; +} + +static bool mmhub_v1_0_is_idle(void *handle) +{ + return true; +} + +static int mmhub_v1_0_wait_for_idle(void *handle) +{ + return 0; +} + +static int mmhub_v1_0_soft_reset(void *handle) +{ + return 0; +} + +static void mmhub_v1_0_update_medium_grain_clock_gating(struct amdgpu_device *adev, + bool enable) +{ + uint32_t def, data, def1, data1, def2, data2; + + def = data = RREG32(SOC15_REG_OFFSET(MMHUB, 0, mmATC_L2_MISC_CG)); + def1 = data1 = RREG32(SOC15_REG_OFFSET(MMHUB, 0, mmDAGB0_CNTL_MISC2)); + def2 = data2 = RREG32(SOC15_REG_OFFSET(MMHUB, 0, mmDAGB1_CNTL_MISC2)); + + if (enable && (adev->cg_flags & AMD_CG_SUPPORT_MC_MGCG)) { + data |= ATC_L2_MISC_CG__ENABLE_MASK; + + data1 &= ~(DAGB0_CNTL_MISC2__DISABLE_WRREQ_CG_MASK | + DAGB0_CNTL_MISC2__DISABLE_WRRET_CG_MASK | + DAGB0_CNTL_MISC2__DISABLE_RDREQ_CG_MASK | + DAGB0_CNTL_MISC2__DISABLE_RDRET_CG_MASK | + DAGB0_CNTL_MISC2__DISABLE_TLBWR_CG_MASK | + DAGB0_CNTL_MISC2__DISABLE_TLBRD_CG_MASK); + + data2 &= ~(DAGB1_CNTL_MISC2__DISABLE_WRREQ_CG_MASK | + DAGB1_CNTL_MISC2__DISABLE_WRRET_CG_MASK | + DAGB1_CNTL_MISC2__DISABLE_RDREQ_CG_MASK | + DAGB1_CNTL_MISC2__DISABLE_RDRET_CG_MASK | + DAGB1_CNTL_MISC2__DISABLE_TLBWR_CG_MASK | + DAGB1_CNTL_MISC2__DISABLE_TLBRD_CG_MASK); + } else { + data &= ~ATC_L2_MISC_CG__ENABLE_MASK; + + data1 |= (DAGB0_CNTL_MISC2__DISABLE_WRREQ_CG_MASK | + DAGB0_CNTL_MISC2__DISABLE_WRRET_CG_MASK | + DAGB0_CNTL_MISC2__DISABLE_RDREQ_CG_MASK | + DAGB0_CNTL_MISC2__DISABLE_RDRET_CG_MASK | + DAGB0_CNTL_MISC2__DISABLE_TLBWR_CG_MASK | + DAGB0_CNTL_MISC2__DISABLE_TLBRD_CG_MASK); + + data2 |= (DAGB1_CNTL_MISC2__DISABLE_WRREQ_CG_MASK | + DAGB1_CNTL_MISC2__DISABLE_WRRET_CG_MASK | + DAGB1_CNTL_MISC2__DISABLE_RDREQ_CG_MASK | + DAGB1_CNTL_MISC2__DISABLE_RDRET_CG_MASK | + DAGB1_CNTL_MISC2__DISABLE_TLBWR_CG_MASK | + DAGB1_CNTL_MISC2__DISABLE_TLBRD_CG_MASK); + } + + if (def != data) + WREG32(SOC15_REG_OFFSET(MMHUB, 0, mmATC_L2_MISC_CG), data); + + if (def1 != data1) + WREG32(SOC15_REG_OFFSET(MMHUB, 0, mmDAGB0_CNTL_MISC2), data1); + + if (def2 != data2) + WREG32(SOC15_REG_OFFSET(MMHUB, 0, mmDAGB1_CNTL_MISC2), data2); +} + +static void athub_update_medium_grain_clock_gating(struct amdgpu_device *adev, + bool enable) +{ + uint32_t def, data; + + def = data = RREG32(SOC15_REG_OFFSET(ATHUB, 0, mmATHUB_MISC_CNTL)); + + if (enable && (adev->cg_flags & AMD_CG_SUPPORT_MC_MGCG)) + data |= ATHUB_MISC_CNTL__CG_ENABLE_MASK; + else + data &= ~ATHUB_MISC_CNTL__CG_ENABLE_MASK; + + if (def != data) + WREG32(SOC15_REG_OFFSET(ATHUB, 0, mmATHUB_MISC_CNTL), data); +} + +static void mmhub_v1_0_update_medium_grain_light_sleep(struct amdgpu_device *adev, + bool enable) +{ + uint32_t def, data; + + def = data = RREG32(SOC15_REG_OFFSET(MMHUB, 0, mmATC_L2_MISC_CG)); + + if (enable && (adev->cg_flags & AMD_CG_SUPPORT_MC_LS)) + data |= ATC_L2_MISC_CG__MEM_LS_ENABLE_MASK; + else + data &= ~ATC_L2_MISC_CG__MEM_LS_ENABLE_MASK; + + if (def != data) + WREG32(SOC15_REG_OFFSET(MMHUB, 0, mmATC_L2_MISC_CG), data); +} + +static void athub_update_medium_grain_light_sleep(struct amdgpu_device *adev, + bool enable) +{ + uint32_t def, data; + + def = data = RREG32(SOC15_REG_OFFSET(ATHUB, 0, mmATHUB_MISC_CNTL)); + + if (enable && (adev->cg_flags & AMD_CG_SUPPORT_MC_LS) && + (adev->cg_flags & AMD_CG_SUPPORT_HDP_LS)) + data |= ATHUB_MISC_CNTL__CG_MEM_LS_ENABLE_MASK; + else + data &= ~ATHUB_MISC_CNTL__CG_MEM_LS_ENABLE_MASK; + + if(def != data) + WREG32(SOC15_REG_OFFSET(ATHUB, 0, mmATHUB_MISC_CNTL), data); +} + +static int mmhub_v1_0_set_clockgating_state(void *handle, + enum amd_clockgating_state state) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + + switch (adev->asic_type) { + case CHIP_VEGA10: + mmhub_v1_0_update_medium_grain_clock_gating(adev, + state == AMD_CG_STATE_GATE ? true : false); + athub_update_medium_grain_clock_gating(adev, + state == AMD_CG_STATE_GATE ? true : false); + mmhub_v1_0_update_medium_grain_light_sleep(adev, + state == AMD_CG_STATE_GATE ? true : false); + athub_update_medium_grain_light_sleep(adev, + state == AMD_CG_STATE_GATE ? true : false); + break; + default: + break; + } + + return 0; +} + +static void mmhub_v1_0_get_clockgating_state(void *handle, u32 *flags) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + int data; + + if (amdgpu_sriov_vf(adev)) + *flags = 0; + + /* AMD_CG_SUPPORT_MC_MGCG */ + data = RREG32(SOC15_REG_OFFSET(ATHUB, 0, mmATHUB_MISC_CNTL)); + if (data & ATHUB_MISC_CNTL__CG_ENABLE_MASK) + *flags |= AMD_CG_SUPPORT_MC_MGCG; + + /* AMD_CG_SUPPORT_MC_LS */ + data = RREG32(SOC15_REG_OFFSET(MMHUB, 0, mmATC_L2_MISC_CG)); + if (data & ATC_L2_MISC_CG__MEM_LS_ENABLE_MASK) + *flags |= AMD_CG_SUPPORT_MC_LS; +} + +static int mmhub_v1_0_set_powergating_state(void *handle, + enum amd_powergating_state state) +{ + return 0; +} + +const struct amd_ip_funcs mmhub_v1_0_ip_funcs = { + .name = "mmhub_v1_0", + .early_init = mmhub_v1_0_early_init, + .late_init = mmhub_v1_0_late_init, + .sw_init = mmhub_v1_0_sw_init, + .sw_fini = mmhub_v1_0_sw_fini, + .hw_init = mmhub_v1_0_hw_init, + .hw_fini = mmhub_v1_0_hw_fini, + .suspend = mmhub_v1_0_suspend, + .resume = mmhub_v1_0_resume, + .is_idle = mmhub_v1_0_is_idle, + .wait_for_idle = mmhub_v1_0_wait_for_idle, + .soft_reset = mmhub_v1_0_soft_reset, + .set_clockgating_state = mmhub_v1_0_set_clockgating_state, + .set_powergating_state = mmhub_v1_0_set_powergating_state, + .get_clockgating_state = mmhub_v1_0_get_clockgating_state, +}; + +const struct amdgpu_ip_block_version mmhub_v1_0_ip_block = +{ + .type = AMD_IP_BLOCK_TYPE_MMHUB, + .major = 1, + .minor = 0, + .rev = 0, + .funcs = &mmhub_v1_0_ip_funcs, +}; diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.h b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.h new file mode 100644 index 000000000000..aadedf99c028 --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.h @@ -0,0 +1,35 @@ +/* + * Copyright 2016 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ +#ifndef __MMHUB_V1_0_H__ +#define __MMHUB_V1_0_H__ + +u64 mmhub_v1_0_get_fb_location(struct amdgpu_device *adev); +int mmhub_v1_0_gart_enable(struct amdgpu_device *adev); +void mmhub_v1_0_gart_disable(struct amdgpu_device *adev); +void mmhub_v1_0_set_fault_enable_default(struct amdgpu_device *adev, + bool value); + +extern const struct amd_ip_funcs mmhub_v1_0_ip_funcs; +extern const struct amdgpu_ip_block_version mmhub_v1_0_ip_block; + +#endif diff --git a/drivers/gpu/drm/amd/amdgpu/mmsch_v1_0.h b/drivers/gpu/drm/amd/amdgpu/mmsch_v1_0.h new file mode 100644 index 000000000000..5f0fc8bf16a9 --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/mmsch_v1_0.h @@ -0,0 +1,87 @@ +/* + * Copyright 2017 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#ifndef __MMSCH_V1_0_H__ +#define __MMSCH_V1_0_H__ + +#define MMSCH_VERSION_MAJOR 1 +#define MMSCH_VERSION_MINOR 0 +#define MMSCH_VERSION (MMSCH_VERSION_MAJOR << 16 | MMSCH_VERSION_MINOR) + +enum mmsch_v1_0_command_type { + MMSCH_COMMAND__DIRECT_REG_WRITE = 0, + MMSCH_COMMAND__DIRECT_REG_POLLING = 2, + MMSCH_COMMAND__DIRECT_REG_READ_MODIFY_WRITE = 3, + MMSCH_COMMAND__INDIRECT_REG_WRITE = 8, + MMSCH_COMMAND__END = 0xf +}; + +struct mmsch_v1_0_init_header { + uint32_t version; + uint32_t header_size; + uint32_t vce_init_status; + uint32_t uvd_init_status; + uint32_t vce_table_offset; + uint32_t vce_table_size; + uint32_t uvd_table_offset; + uint32_t uvd_table_size; +}; + +struct mmsch_v1_0_cmd_direct_reg_header { + uint32_t reg_offset : 28; + uint32_t command_type : 4; +}; + +struct mmsch_v1_0_cmd_indirect_reg_header { + uint32_t reg_offset : 20; + uint32_t reg_idx_space : 8; + uint32_t command_type : 4; +}; + +struct mmsch_v1_0_cmd_direct_write { + struct mmsch_v1_0_cmd_direct_reg_header cmd_header; + uint32_t reg_value; +}; + +struct mmsch_v1_0_cmd_direct_read_modify_write { + struct mmsch_v1_0_cmd_direct_reg_header cmd_header; + uint32_t write_data; + uint32_t mask_value; +}; + +struct mmsch_v1_0_cmd_direct_polling { + struct mmsch_v1_0_cmd_direct_reg_header cmd_header; + uint32_t mask_value; + uint32_t wait_value; +}; + +struct mmsch_v1_0_cmd_end { + struct mmsch_v1_0_cmd_direct_reg_header cmd_header; +}; + +struct mmsch_v1_0_cmd_indirect_write { + struct mmsch_v1_0_cmd_indirect_reg_header cmd_header; + uint32_t reg_value; +}; + +#endif diff --git a/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c b/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c new file mode 100644 index 000000000000..1493301b6a94 --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c @@ -0,0 +1,340 @@ +/* + * Copyright 2014 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#include "amdgpu.h" +#include "vega10/soc15ip.h" +#include "vega10/NBIO/nbio_6_1_offset.h" +#include "vega10/NBIO/nbio_6_1_sh_mask.h" +#include "vega10/GC/gc_9_0_offset.h" +#include "vega10/GC/gc_9_0_sh_mask.h" +#include "soc15.h" +#include "vega10_ih.h" +#include "soc15_common.h" +#include "mxgpu_ai.h" + +static void xgpu_ai_mailbox_send_ack(struct amdgpu_device *adev) +{ + u32 reg; + int timeout = AI_MAILBOX_TIMEDOUT; + u32 mask = REG_FIELD_MASK(BIF_BX_PF0_MAILBOX_CONTROL, RCV_MSG_VALID); + + reg = RREG32_NO_KIQ(SOC15_REG_OFFSET(NBIO, 0, + mmBIF_BX_PF0_MAILBOX_CONTROL)); + reg = REG_SET_FIELD(reg, BIF_BX_PF0_MAILBOX_CONTROL, RCV_MSG_ACK, 1); + WREG32_NO_KIQ(SOC15_REG_OFFSET(NBIO, 0, + mmBIF_BX_PF0_MAILBOX_CONTROL), reg); + + /*Wait for RCV_MSG_VALID to be 0*/ + reg = RREG32_NO_KIQ(SOC15_REG_OFFSET(NBIO, 0, + mmBIF_BX_PF0_MAILBOX_CONTROL)); + while (reg & mask) { + if (timeout <= 0) { + pr_err("RCV_MSG_VALID is not cleared\n"); + break; + } + mdelay(1); + timeout -=1; + + reg = RREG32_NO_KIQ(SOC15_REG_OFFSET(NBIO, 0, + mmBIF_BX_PF0_MAILBOX_CONTROL)); + } +} + +static void xgpu_ai_mailbox_set_valid(struct amdgpu_device *adev, bool val) +{ + u32 reg; + + reg = RREG32_NO_KIQ(SOC15_REG_OFFSET(NBIO, 0, + mmBIF_BX_PF0_MAILBOX_CONTROL)); + reg = REG_SET_FIELD(reg, BIF_BX_PF0_MAILBOX_CONTROL, + TRN_MSG_VALID, val ? 1 : 0); + WREG32_NO_KIQ(SOC15_REG_OFFSET(NBIO, 0, mmBIF_BX_PF0_MAILBOX_CONTROL), + reg); +} + +static void xgpu_ai_mailbox_trans_msg(struct amdgpu_device *adev, + enum idh_request req) +{ + u32 reg; + + reg = RREG32_NO_KIQ(SOC15_REG_OFFSET(NBIO, 0, + mmBIF_BX_PF0_MAILBOX_MSGBUF_TRN_DW0)); + reg = REG_SET_FIELD(reg, BIF_BX_PF0_MAILBOX_MSGBUF_TRN_DW0, + MSGBUF_DATA, req); + WREG32_NO_KIQ(SOC15_REG_OFFSET(NBIO, 0, mmBIF_BX_PF0_MAILBOX_MSGBUF_TRN_DW0), + reg); + + xgpu_ai_mailbox_set_valid(adev, true); +} + +static int xgpu_ai_mailbox_rcv_msg(struct amdgpu_device *adev, + enum idh_event event) +{ + u32 reg; + u32 mask = REG_FIELD_MASK(BIF_BX_PF0_MAILBOX_CONTROL, RCV_MSG_VALID); + + if (event != IDH_FLR_NOTIFICATION_CMPL) { + reg = RREG32_NO_KIQ(SOC15_REG_OFFSET(NBIO, 0, + mmBIF_BX_PF0_MAILBOX_CONTROL)); + if (!(reg & mask)) + return -ENOENT; + } + + reg = RREG32_NO_KIQ(SOC15_REG_OFFSET(NBIO, 0, + mmBIF_BX_PF0_MAILBOX_MSGBUF_RCV_DW0)); + if (reg != event) + return -ENOENT; + + xgpu_ai_mailbox_send_ack(adev); + + return 0; +} + +static int xgpu_ai_poll_ack(struct amdgpu_device *adev) +{ + int r = 0, timeout = AI_MAILBOX_TIMEDOUT; + u32 mask = REG_FIELD_MASK(BIF_BX_PF0_MAILBOX_CONTROL, TRN_MSG_ACK); + u32 reg; + + reg = RREG32_NO_KIQ(SOC15_REG_OFFSET(NBIO, 0, + mmBIF_BX_PF0_MAILBOX_CONTROL)); + while (!(reg & mask)) { + if (timeout <= 0) { + pr_err("Doesn't get ack from pf.\n"); + r = -ETIME; + break; + } + msleep(1); + timeout -= 1; + + reg = RREG32_NO_KIQ(SOC15_REG_OFFSET(NBIO, 0, + mmBIF_BX_PF0_MAILBOX_CONTROL)); + } + + return r; +} + +static int xgpu_ai_poll_msg(struct amdgpu_device *adev, enum idh_event event) +{ + int r = 0, timeout = AI_MAILBOX_TIMEDOUT; + + r = xgpu_ai_mailbox_rcv_msg(adev, event); + while (r) { + if (timeout <= 0) { + pr_err("Doesn't get ack from pf.\n"); + r = -ETIME; + break; + } + msleep(1); + timeout -= 1; + + r = xgpu_ai_mailbox_rcv_msg(adev, event); + } + + return r; +} + + +static int xgpu_ai_send_access_requests(struct amdgpu_device *adev, + enum idh_request req) +{ + int r; + + xgpu_ai_mailbox_trans_msg(adev, req); + + /* start to poll ack */ + r = xgpu_ai_poll_ack(adev); + if (r) + return r; + + xgpu_ai_mailbox_set_valid(adev, false); + + /* start to check msg if request is idh_req_gpu_init_access */ + if (req == IDH_REQ_GPU_INIT_ACCESS || + req == IDH_REQ_GPU_FINI_ACCESS || + req == IDH_REQ_GPU_RESET_ACCESS) { + r = xgpu_ai_poll_msg(adev, IDH_READY_TO_ACCESS_GPU); + if (r) + return r; + } + + return 0; +} + +static int xgpu_ai_request_reset(struct amdgpu_device *adev) +{ + return xgpu_ai_send_access_requests(adev, IDH_REQ_GPU_RESET_ACCESS); +} + +static int xgpu_ai_request_full_gpu_access(struct amdgpu_device *adev, + bool init) +{ + enum idh_request req; + + req = init ? IDH_REQ_GPU_INIT_ACCESS : IDH_REQ_GPU_FINI_ACCESS; + return xgpu_ai_send_access_requests(adev, req); +} + +static int xgpu_ai_release_full_gpu_access(struct amdgpu_device *adev, + bool init) +{ + enum idh_request req; + int r = 0; + + req = init ? IDH_REL_GPU_INIT_ACCESS : IDH_REL_GPU_FINI_ACCESS; + r = xgpu_ai_send_access_requests(adev, req); + + return r; +} + +static int xgpu_ai_mailbox_ack_irq(struct amdgpu_device *adev, + struct amdgpu_irq_src *source, + struct amdgpu_iv_entry *entry) +{ + DRM_DEBUG("get ack intr and do nothing.\n"); + return 0; +} + +static int xgpu_ai_set_mailbox_ack_irq(struct amdgpu_device *adev, + struct amdgpu_irq_src *source, + unsigned type, + enum amdgpu_interrupt_state state) +{ + u32 tmp = RREG32_NO_KIQ(SOC15_REG_OFFSET(NBIO, 0, mmBIF_BX_PF0_MAILBOX_INT_CNTL)); + + tmp = REG_SET_FIELD(tmp, BIF_BX_PF0_MAILBOX_INT_CNTL, ACK_INT_EN, + (state == AMDGPU_IRQ_STATE_ENABLE) ? 1 : 0); + WREG32_NO_KIQ(SOC15_REG_OFFSET(NBIO, 0, mmBIF_BX_PF0_MAILBOX_INT_CNTL), tmp); + + return 0; +} + +static void xgpu_ai_mailbox_flr_work(struct work_struct *work) +{ + struct amdgpu_virt *virt = container_of(work, struct amdgpu_virt, flr_work); + struct amdgpu_device *adev = container_of(virt, struct amdgpu_device, virt); + + /* wait until RCV_MSG become 3 */ + if (xgpu_ai_poll_msg(adev, IDH_FLR_NOTIFICATION_CMPL)) { + pr_err("failed to recieve FLR_CMPL\n"); + return; + } + + /* Trigger recovery due to world switch failure */ + amdgpu_sriov_gpu_reset(adev, false); +} + +static int xgpu_ai_set_mailbox_rcv_irq(struct amdgpu_device *adev, + struct amdgpu_irq_src *src, + unsigned type, + enum amdgpu_interrupt_state state) +{ + u32 tmp = RREG32_NO_KIQ(SOC15_REG_OFFSET(NBIO, 0, mmBIF_BX_PF0_MAILBOX_INT_CNTL)); + + tmp = REG_SET_FIELD(tmp, BIF_BX_PF0_MAILBOX_INT_CNTL, VALID_INT_EN, + (state == AMDGPU_IRQ_STATE_ENABLE) ? 1 : 0); + WREG32_NO_KIQ(SOC15_REG_OFFSET(NBIO, 0, mmBIF_BX_PF0_MAILBOX_INT_CNTL), tmp); + + return 0; +} + +static int xgpu_ai_mailbox_rcv_irq(struct amdgpu_device *adev, + struct amdgpu_irq_src *source, + struct amdgpu_iv_entry *entry) +{ + int r; + + /* see what event we get */ + r = xgpu_ai_mailbox_rcv_msg(adev, IDH_FLR_NOTIFICATION); + + /* only handle FLR_NOTIFY now */ + if (!r) + schedule_work(&adev->virt.flr_work); + + return 0; +} + +static const struct amdgpu_irq_src_funcs xgpu_ai_mailbox_ack_irq_funcs = { + .set = xgpu_ai_set_mailbox_ack_irq, + .process = xgpu_ai_mailbox_ack_irq, +}; + +static const struct amdgpu_irq_src_funcs xgpu_ai_mailbox_rcv_irq_funcs = { + .set = xgpu_ai_set_mailbox_rcv_irq, + .process = xgpu_ai_mailbox_rcv_irq, +}; + +void xgpu_ai_mailbox_set_irq_funcs(struct amdgpu_device *adev) +{ + adev->virt.ack_irq.num_types = 1; + adev->virt.ack_irq.funcs = &xgpu_ai_mailbox_ack_irq_funcs; + adev->virt.rcv_irq.num_types = 1; + adev->virt.rcv_irq.funcs = &xgpu_ai_mailbox_rcv_irq_funcs; +} + +int xgpu_ai_mailbox_add_irq_id(struct amdgpu_device *adev) +{ + int r; + + r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 135, &adev->virt.rcv_irq); + if (r) + return r; + + r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 138, &adev->virt.ack_irq); + if (r) { + amdgpu_irq_put(adev, &adev->virt.rcv_irq, 0); + return r; + } + + return 0; +} + +int xgpu_ai_mailbox_get_irq(struct amdgpu_device *adev) +{ + int r; + + r = amdgpu_irq_get(adev, &adev->virt.rcv_irq, 0); + if (r) + return r; + r = amdgpu_irq_get(adev, &adev->virt.ack_irq, 0); + if (r) { + amdgpu_irq_put(adev, &adev->virt.rcv_irq, 0); + return r; + } + + INIT_WORK(&adev->virt.flr_work, xgpu_ai_mailbox_flr_work); + + return 0; +} + +void xgpu_ai_mailbox_put_irq(struct amdgpu_device *adev) +{ + amdgpu_irq_put(adev, &adev->virt.ack_irq, 0); + amdgpu_irq_put(adev, &adev->virt.rcv_irq, 0); +} + +const struct amdgpu_virt_ops xgpu_ai_virt_ops = { + .req_full_gpu = xgpu_ai_request_full_gpu_access, + .rel_full_gpu = xgpu_ai_release_full_gpu_access, + .reset_gpu = xgpu_ai_request_reset, +}; diff --git a/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.h b/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.h new file mode 100644 index 000000000000..9aefc44d2c34 --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.h @@ -0,0 +1,52 @@ +/* + * Copyright 2014 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#ifndef __MXGPU_AI_H__ +#define __MXGPU_AI_H__ + +#define AI_MAILBOX_TIMEDOUT 5000 + +enum idh_request { + IDH_REQ_GPU_INIT_ACCESS = 1, + IDH_REL_GPU_INIT_ACCESS, + IDH_REQ_GPU_FINI_ACCESS, + IDH_REL_GPU_FINI_ACCESS, + IDH_REQ_GPU_RESET_ACCESS +}; + +enum idh_event { + IDH_CLR_MSG_BUF = 0, + IDH_READY_TO_ACCESS_GPU, + IDH_FLR_NOTIFICATION, + IDH_FLR_NOTIFICATION_CMPL, + IDH_EVENT_MAX +}; + +extern const struct amdgpu_virt_ops xgpu_ai_virt_ops; + +void xgpu_ai_mailbox_set_irq_funcs(struct amdgpu_device *adev); +int xgpu_ai_mailbox_add_irq_id(struct amdgpu_device *adev); +int xgpu_ai_mailbox_get_irq(struct amdgpu_device *adev); +void xgpu_ai_mailbox_put_irq(struct amdgpu_device *adev); + +#endif diff --git a/drivers/gpu/drm/amd/amdgpu/mxgpu_vi.c b/drivers/gpu/drm/amd/amdgpu/mxgpu_vi.c index d2622b6f49fa..70a3dd13cb02 100644 --- a/drivers/gpu/drm/amd/amdgpu/mxgpu_vi.c +++ b/drivers/gpu/drm/amd/amdgpu/mxgpu_vi.c @@ -318,31 +318,46 @@ void xgpu_vi_init_golden_registers(struct amdgpu_device *adev) static void xgpu_vi_mailbox_send_ack(struct amdgpu_device *adev) { u32 reg; + int timeout = VI_MAILBOX_TIMEDOUT; + u32 mask = REG_FIELD_MASK(MAILBOX_CONTROL, RCV_MSG_VALID); - reg = RREG32(mmMAILBOX_CONTROL); + reg = RREG32_NO_KIQ(mmMAILBOX_CONTROL); reg = REG_SET_FIELD(reg, MAILBOX_CONTROL, RCV_MSG_ACK, 1); - WREG32(mmMAILBOX_CONTROL, reg); + WREG32_NO_KIQ(mmMAILBOX_CONTROL, reg); + + /*Wait for RCV_MSG_VALID to be 0*/ + reg = RREG32_NO_KIQ(mmMAILBOX_CONTROL); + while (reg & mask) { + if (timeout <= 0) { + pr_err("RCV_MSG_VALID is not cleared\n"); + break; + } + mdelay(1); + timeout -=1; + + reg = RREG32_NO_KIQ(mmMAILBOX_CONTROL); + } } static void xgpu_vi_mailbox_set_valid(struct amdgpu_device *adev, bool val) { u32 reg; - reg = RREG32(mmMAILBOX_CONTROL); + reg = RREG32_NO_KIQ(mmMAILBOX_CONTROL); reg = REG_SET_FIELD(reg, MAILBOX_CONTROL, TRN_MSG_VALID, val ? 1 : 0); - WREG32(mmMAILBOX_CONTROL, reg); + WREG32_NO_KIQ(mmMAILBOX_CONTROL, reg); } static void xgpu_vi_mailbox_trans_msg(struct amdgpu_device *adev, - enum idh_event event) + enum idh_request req) { u32 reg; - reg = RREG32(mmMAILBOX_MSGBUF_TRN_DW0); + reg = RREG32_NO_KIQ(mmMAILBOX_MSGBUF_TRN_DW0); reg = REG_SET_FIELD(reg, MAILBOX_MSGBUF_TRN_DW0, - MSGBUF_DATA, event); - WREG32(mmMAILBOX_MSGBUF_TRN_DW0, reg); + MSGBUF_DATA, req); + WREG32_NO_KIQ(mmMAILBOX_MSGBUF_TRN_DW0, reg); xgpu_vi_mailbox_set_valid(adev, true); } @@ -351,8 +366,13 @@ static int xgpu_vi_mailbox_rcv_msg(struct amdgpu_device *adev, enum idh_event event) { u32 reg; + u32 mask = REG_FIELD_MASK(MAILBOX_CONTROL, RCV_MSG_VALID); - reg = RREG32(mmMAILBOX_MSGBUF_RCV_DW0); + reg = RREG32_NO_KIQ(mmMAILBOX_CONTROL); + if (!(reg & mask)) + return -ENOENT; + + reg = RREG32_NO_KIQ(mmMAILBOX_MSGBUF_RCV_DW0); if (reg != event) return -ENOENT; @@ -368,7 +388,7 @@ static int xgpu_vi_poll_ack(struct amdgpu_device *adev) u32 mask = REG_FIELD_MASK(MAILBOX_CONTROL, TRN_MSG_ACK); u32 reg; - reg = RREG32(mmMAILBOX_CONTROL); + reg = RREG32_NO_KIQ(mmMAILBOX_CONTROL); while (!(reg & mask)) { if (timeout <= 0) { pr_err("Doesn't get ack from pf.\n"); @@ -378,7 +398,7 @@ static int xgpu_vi_poll_ack(struct amdgpu_device *adev) msleep(1); timeout -= 1; - reg = RREG32(mmMAILBOX_CONTROL); + reg = RREG32_NO_KIQ(mmMAILBOX_CONTROL); } return r; @@ -419,7 +439,9 @@ static int xgpu_vi_send_access_requests(struct amdgpu_device *adev, xgpu_vi_mailbox_set_valid(adev, false); /* start to check msg if request is idh_req_gpu_init_access */ - if (request == IDH_REQ_GPU_INIT_ACCESS) { + if (request == IDH_REQ_GPU_INIT_ACCESS || + request == IDH_REQ_GPU_FINI_ACCESS || + request == IDH_REQ_GPU_RESET_ACCESS) { r = xgpu_vi_poll_msg(adev, IDH_READY_TO_ACCESS_GPU); if (r) return r; @@ -436,20 +458,20 @@ static int xgpu_vi_request_reset(struct amdgpu_device *adev) static int xgpu_vi_request_full_gpu_access(struct amdgpu_device *adev, bool init) { - enum idh_event event; + enum idh_request req; - event = init ? IDH_REQ_GPU_INIT_ACCESS : IDH_REQ_GPU_FINI_ACCESS; - return xgpu_vi_send_access_requests(adev, event); + req = init ? IDH_REQ_GPU_INIT_ACCESS : IDH_REQ_GPU_FINI_ACCESS; + return xgpu_vi_send_access_requests(adev, req); } static int xgpu_vi_release_full_gpu_access(struct amdgpu_device *adev, bool init) { - enum idh_event event; + enum idh_request req; int r = 0; - event = init ? IDH_REL_GPU_INIT_ACCESS : IDH_REL_GPU_FINI_ACCESS; - r = xgpu_vi_send_access_requests(adev, event); + req = init ? IDH_REL_GPU_INIT_ACCESS : IDH_REL_GPU_FINI_ACCESS; + r = xgpu_vi_send_access_requests(adev, req); return r; } @@ -468,28 +490,28 @@ static int xgpu_vi_set_mailbox_ack_irq(struct amdgpu_device *adev, unsigned type, enum amdgpu_interrupt_state state) { - u32 tmp = RREG32(mmMAILBOX_INT_CNTL); + u32 tmp = RREG32_NO_KIQ(mmMAILBOX_INT_CNTL); tmp = REG_SET_FIELD(tmp, MAILBOX_INT_CNTL, ACK_INT_EN, (state == AMDGPU_IRQ_STATE_ENABLE) ? 1 : 0); - WREG32(mmMAILBOX_INT_CNTL, tmp); + WREG32_NO_KIQ(mmMAILBOX_INT_CNTL, tmp); return 0; } static void xgpu_vi_mailbox_flr_work(struct work_struct *work) { - struct amdgpu_virt *virt = container_of(work, - struct amdgpu_virt, flr_work.work); - struct amdgpu_device *adev = container_of(virt, - struct amdgpu_device, virt); - int r = 0; + struct amdgpu_virt *virt = container_of(work, struct amdgpu_virt, flr_work); + struct amdgpu_device *adev = container_of(virt, struct amdgpu_device, virt); - r = xgpu_vi_poll_msg(adev, IDH_FLR_NOTIFICATION_CMPL); - if (r) - DRM_ERROR("failed to get flr cmpl msg from hypervior.\n"); + /* wait until RCV_MSG become 3 */ + if (xgpu_vi_poll_msg(adev, IDH_FLR_NOTIFICATION_CMPL)) { + pr_err("failed to recieve FLR_CMPL\n"); + return; + } - /* TODO: need to restore gfx states */ + /* Trigger recovery due to world switch failure */ + amdgpu_sriov_gpu_reset(adev, false); } static int xgpu_vi_set_mailbox_rcv_irq(struct amdgpu_device *adev, @@ -497,11 +519,11 @@ static int xgpu_vi_set_mailbox_rcv_irq(struct amdgpu_device *adev, unsigned type, enum amdgpu_interrupt_state state) { - u32 tmp = RREG32(mmMAILBOX_INT_CNTL); + u32 tmp = RREG32_NO_KIQ(mmMAILBOX_INT_CNTL); tmp = REG_SET_FIELD(tmp, MAILBOX_INT_CNTL, VALID_INT_EN, (state == AMDGPU_IRQ_STATE_ENABLE) ? 1 : 0); - WREG32(mmMAILBOX_INT_CNTL, tmp); + WREG32_NO_KIQ(mmMAILBOX_INT_CNTL, tmp); return 0; } @@ -512,15 +534,12 @@ static int xgpu_vi_mailbox_rcv_irq(struct amdgpu_device *adev, { int r; - adev->virt.caps &= ~AMDGPU_SRIOV_CAPS_RUNTIME; + /* see what event we get */ r = xgpu_vi_mailbox_rcv_msg(adev, IDH_FLR_NOTIFICATION); - /* do nothing for other msg */ - if (r) - return 0; - /* TODO: need to save gfx states */ - schedule_delayed_work(&adev->virt.flr_work, - msecs_to_jiffies(VI_MAILBOX_RESET_TIME)); + /* only handle FLR_NOTIFY now */ + if (!r) + schedule_work(&adev->virt.flr_work); return 0; } @@ -547,11 +566,11 @@ int xgpu_vi_mailbox_add_irq_id(struct amdgpu_device *adev) { int r; - r = amdgpu_irq_add_id(adev, 135, &adev->virt.rcv_irq); + r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 135, &adev->virt.rcv_irq); if (r) return r; - r = amdgpu_irq_add_id(adev, 138, &adev->virt.ack_irq); + r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 138, &adev->virt.ack_irq); if (r) { amdgpu_irq_put(adev, &adev->virt.rcv_irq, 0); return r; @@ -573,14 +592,13 @@ int xgpu_vi_mailbox_get_irq(struct amdgpu_device *adev) return r; } - INIT_DELAYED_WORK(&adev->virt.flr_work, xgpu_vi_mailbox_flr_work); + INIT_WORK(&adev->virt.flr_work, xgpu_vi_mailbox_flr_work); return 0; } void xgpu_vi_mailbox_put_irq(struct amdgpu_device *adev) { - cancel_delayed_work_sync(&adev->virt.flr_work); amdgpu_irq_put(adev, &adev->virt.ack_irq, 0); amdgpu_irq_put(adev, &adev->virt.rcv_irq, 0); } diff --git a/drivers/gpu/drm/amd/amdgpu/mxgpu_vi.h b/drivers/gpu/drm/amd/amdgpu/mxgpu_vi.h index fd6216efd2b0..2db741131bc6 100644 --- a/drivers/gpu/drm/amd/amdgpu/mxgpu_vi.h +++ b/drivers/gpu/drm/amd/amdgpu/mxgpu_vi.h @@ -23,7 +23,7 @@ #ifndef __MXGPU_VI_H__ #define __MXGPU_VI_H__ -#define VI_MAILBOX_TIMEDOUT 150 +#define VI_MAILBOX_TIMEDOUT 5000 #define VI_MAILBOX_RESET_TIME 12 /* VI mailbox messages request */ diff --git a/drivers/gpu/drm/amd/amdgpu/nbio_v6_1.c b/drivers/gpu/drm/amd/amdgpu/nbio_v6_1.c new file mode 100644 index 000000000000..97057f4a10de --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/nbio_v6_1.c @@ -0,0 +1,266 @@ +/* + * Copyright 2016 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ +#include "amdgpu.h" +#include "amdgpu_atombios.h" +#include "nbio_v6_1.h" + +#include "vega10/soc15ip.h" +#include "vega10/NBIO/nbio_6_1_default.h" +#include "vega10/NBIO/nbio_6_1_offset.h" +#include "vega10/NBIO/nbio_6_1_sh_mask.h" +#include "vega10/vega10_enum.h" + +#define smnCPM_CONTROL 0x11180460 +#define smnPCIE_CNTL2 0x11180070 + +u32 nbio_v6_1_get_rev_id(struct amdgpu_device *adev) +{ + u32 tmp = RREG32(SOC15_REG_OFFSET(NBIO, 0, mmRCC_DEV0_EPF0_STRAP0)); + + tmp &= RCC_DEV0_EPF0_STRAP0__STRAP_ATI_REV_ID_DEV0_F0_MASK; + tmp >>= RCC_DEV0_EPF0_STRAP0__STRAP_ATI_REV_ID_DEV0_F0__SHIFT; + + return tmp; +} + +u32 nbio_v6_1_get_atombios_scratch_regs(struct amdgpu_device *adev, + uint32_t idx) +{ + return RREG32(SOC15_REG_OFFSET(NBIO, 0, mmBIOS_SCRATCH_0) + idx); +} + +void nbio_v6_1_set_atombios_scratch_regs(struct amdgpu_device *adev, + uint32_t idx, uint32_t val) +{ + WREG32(SOC15_REG_OFFSET(NBIO, 0, mmBIOS_SCRATCH_0) + idx, val); +} + +void nbio_v6_1_mc_access_enable(struct amdgpu_device *adev, bool enable) +{ + if (enable) + WREG32(SOC15_REG_OFFSET(NBIO, 0, mmBIF_FB_EN), + BIF_FB_EN__FB_READ_EN_MASK | BIF_FB_EN__FB_WRITE_EN_MASK); + else + WREG32(SOC15_REG_OFFSET(NBIO, 0, mmBIF_FB_EN), 0); +} + +void nbio_v6_1_hdp_flush(struct amdgpu_device *adev) +{ + WREG32(SOC15_REG_OFFSET(NBIO, 0, mmBIF_BX_PF0_HDP_MEM_COHERENCY_FLUSH_CNTL), 0); +} + +u32 nbio_v6_1_get_memsize(struct amdgpu_device *adev) +{ + return RREG32(SOC15_REG_OFFSET(NBIO, 0, mmRCC_PF_0_0_RCC_CONFIG_MEMSIZE)); +} + +static const u32 nbio_sdma_doorbell_range_reg[] = +{ + SOC15_REG_OFFSET(NBIO, 0, mmBIF_SDMA0_DOORBELL_RANGE), + SOC15_REG_OFFSET(NBIO, 0, mmBIF_SDMA1_DOORBELL_RANGE) +}; + +void nbio_v6_1_sdma_doorbell_range(struct amdgpu_device *adev, int instance, + bool use_doorbell, int doorbell_index) +{ + u32 doorbell_range = RREG32(nbio_sdma_doorbell_range_reg[instance]); + + if (use_doorbell) { + doorbell_range = REG_SET_FIELD(doorbell_range, BIF_SDMA0_DOORBELL_RANGE, OFFSET, doorbell_index); + doorbell_range = REG_SET_FIELD(doorbell_range, BIF_SDMA0_DOORBELL_RANGE, SIZE, 2); + } else + doorbell_range = REG_SET_FIELD(doorbell_range, BIF_SDMA0_DOORBELL_RANGE, SIZE, 0); + + WREG32(nbio_sdma_doorbell_range_reg[instance], doorbell_range); +} + +void nbio_v6_1_enable_doorbell_aperture(struct amdgpu_device *adev, + bool enable) +{ + u32 tmp; + + tmp = RREG32(SOC15_REG_OFFSET(NBIO, 0, mmRCC_PF_0_0_RCC_DOORBELL_APER_EN)); + if (enable) + tmp = REG_SET_FIELD(tmp, RCC_PF_0_0_RCC_DOORBELL_APER_EN, BIF_DOORBELL_APER_EN, 1); + else + tmp = REG_SET_FIELD(tmp, RCC_PF_0_0_RCC_DOORBELL_APER_EN, BIF_DOORBELL_APER_EN, 0); + + WREG32(SOC15_REG_OFFSET(NBIO, 0, mmRCC_PF_0_0_RCC_DOORBELL_APER_EN), tmp); +} + +void nbio_v6_1_enable_doorbell_selfring_aperture(struct amdgpu_device *adev, + bool enable) +{ + u32 tmp = 0; + + if (enable) { + tmp = REG_SET_FIELD(tmp, BIF_BX_PF0_DOORBELL_SELFRING_GPA_APER_CNTL, DOORBELL_SELFRING_GPA_APER_EN, 1) | + REG_SET_FIELD(tmp, BIF_BX_PF0_DOORBELL_SELFRING_GPA_APER_CNTL, DOORBELL_SELFRING_GPA_APER_MODE, 1) | + REG_SET_FIELD(tmp, BIF_BX_PF0_DOORBELL_SELFRING_GPA_APER_CNTL, DOORBELL_SELFRING_GPA_APER_SIZE, 0); + + WREG32(SOC15_REG_OFFSET(NBIO, 0, mmBIF_BX_PF0_DOORBELL_SELFRING_GPA_APER_BASE_LOW), + lower_32_bits(adev->doorbell.base)); + WREG32(SOC15_REG_OFFSET(NBIO, 0, mmBIF_BX_PF0_DOORBELL_SELFRING_GPA_APER_BASE_HIGH), + upper_32_bits(adev->doorbell.base)); + } + + WREG32(SOC15_REG_OFFSET(NBIO, 0, mmBIF_BX_PF0_DOORBELL_SELFRING_GPA_APER_CNTL), tmp); +} + + +void nbio_v6_1_ih_doorbell_range(struct amdgpu_device *adev, + bool use_doorbell, int doorbell_index) +{ + u32 ih_doorbell_range = RREG32(SOC15_REG_OFFSET(NBIO, 0 , mmBIF_IH_DOORBELL_RANGE)); + + if (use_doorbell) { + ih_doorbell_range = REG_SET_FIELD(ih_doorbell_range, BIF_IH_DOORBELL_RANGE, OFFSET, doorbell_index); + ih_doorbell_range = REG_SET_FIELD(ih_doorbell_range, BIF_IH_DOORBELL_RANGE, SIZE, 2); + } else + ih_doorbell_range = REG_SET_FIELD(ih_doorbell_range, BIF_IH_DOORBELL_RANGE, SIZE, 0); + + WREG32(SOC15_REG_OFFSET(NBIO, 0, mmBIF_IH_DOORBELL_RANGE), ih_doorbell_range); +} + +void nbio_v6_1_ih_control(struct amdgpu_device *adev) +{ + u32 interrupt_cntl; + + /* setup interrupt control */ + WREG32(SOC15_REG_OFFSET(NBIO, 0, mmINTERRUPT_CNTL2), adev->dummy_page.addr >> 8); + interrupt_cntl = RREG32(SOC15_REG_OFFSET(NBIO, 0, mmINTERRUPT_CNTL)); + /* INTERRUPT_CNTL__IH_DUMMY_RD_OVERRIDE_MASK=0 - dummy read disabled with msi, enabled without msi + * INTERRUPT_CNTL__IH_DUMMY_RD_OVERRIDE_MASK=1 - dummy read controlled by IH_DUMMY_RD_EN + */ + interrupt_cntl = REG_SET_FIELD(interrupt_cntl, INTERRUPT_CNTL, IH_DUMMY_RD_OVERRIDE, 0); + /* INTERRUPT_CNTL__IH_REQ_NONSNOOP_EN_MASK=1 if ring is in non-cacheable memory, e.g., vram */ + interrupt_cntl = REG_SET_FIELD(interrupt_cntl, INTERRUPT_CNTL, IH_REQ_NONSNOOP_EN, 0); + WREG32(SOC15_REG_OFFSET(NBIO, 0, mmINTERRUPT_CNTL), interrupt_cntl); +} + +void nbio_v6_1_update_medium_grain_clock_gating(struct amdgpu_device *adev, + bool enable) +{ + uint32_t def, data; + + def = data = RREG32_PCIE(smnCPM_CONTROL); + if (enable && (adev->cg_flags & AMD_CG_SUPPORT_BIF_MGCG)) { + data |= (CPM_CONTROL__LCLK_DYN_GATE_ENABLE_MASK | + CPM_CONTROL__TXCLK_DYN_GATE_ENABLE_MASK | + CPM_CONTROL__TXCLK_PERM_GATE_ENABLE_MASK | + CPM_CONTROL__TXCLK_LCNT_GATE_ENABLE_MASK | + CPM_CONTROL__TXCLK_REGS_GATE_ENABLE_MASK | + CPM_CONTROL__TXCLK_PRBS_GATE_ENABLE_MASK | + CPM_CONTROL__REFCLK_REGS_GATE_ENABLE_MASK); + } else { + data &= ~(CPM_CONTROL__LCLK_DYN_GATE_ENABLE_MASK | + CPM_CONTROL__TXCLK_DYN_GATE_ENABLE_MASK | + CPM_CONTROL__TXCLK_PERM_GATE_ENABLE_MASK | + CPM_CONTROL__TXCLK_LCNT_GATE_ENABLE_MASK | + CPM_CONTROL__TXCLK_REGS_GATE_ENABLE_MASK | + CPM_CONTROL__TXCLK_PRBS_GATE_ENABLE_MASK | + CPM_CONTROL__REFCLK_REGS_GATE_ENABLE_MASK); + } + + if (def != data) + WREG32_PCIE(smnCPM_CONTROL, data); +} + +void nbio_v6_1_update_medium_grain_light_sleep(struct amdgpu_device *adev, + bool enable) +{ + uint32_t def, data; + + def = data = RREG32_PCIE(smnPCIE_CNTL2); + if (enable && (adev->cg_flags & AMD_CG_SUPPORT_BIF_LS)) { + data |= (PCIE_CNTL2__SLV_MEM_LS_EN_MASK | + PCIE_CNTL2__MST_MEM_LS_EN_MASK | + PCIE_CNTL2__REPLAY_MEM_LS_EN_MASK); + } else { + data &= ~(PCIE_CNTL2__SLV_MEM_LS_EN_MASK | + PCIE_CNTL2__MST_MEM_LS_EN_MASK | + PCIE_CNTL2__REPLAY_MEM_LS_EN_MASK); + } + + if (def != data) + WREG32_PCIE(smnPCIE_CNTL2, data); +} + +void nbio_v6_1_get_clockgating_state(struct amdgpu_device *adev, u32 *flags) +{ + int data; + + /* AMD_CG_SUPPORT_BIF_MGCG */ + data = RREG32_PCIE(smnCPM_CONTROL); + if (data & CPM_CONTROL__LCLK_DYN_GATE_ENABLE_MASK) + *flags |= AMD_CG_SUPPORT_BIF_MGCG; + + /* AMD_CG_SUPPORT_BIF_LS */ + data = RREG32_PCIE(smnPCIE_CNTL2); + if (data & PCIE_CNTL2__SLV_MEM_LS_EN_MASK) + *flags |= AMD_CG_SUPPORT_BIF_LS; +} + +struct nbio_hdp_flush_reg nbio_v6_1_hdp_flush_reg; +struct nbio_pcie_index_data nbio_v6_1_pcie_index_data; + +int nbio_v6_1_init(struct amdgpu_device *adev) +{ + nbio_v6_1_hdp_flush_reg.hdp_flush_req_offset = SOC15_REG_OFFSET(NBIO, 0, mmBIF_BX_PF0_GPU_HDP_FLUSH_REQ); + nbio_v6_1_hdp_flush_reg.hdp_flush_done_offset = SOC15_REG_OFFSET(NBIO, 0, mmBIF_BX_PF0_GPU_HDP_FLUSH_DONE); + nbio_v6_1_hdp_flush_reg.ref_and_mask_cp0 = BIF_BX_PF0_GPU_HDP_FLUSH_DONE__CP0_MASK; + nbio_v6_1_hdp_flush_reg.ref_and_mask_cp1 = BIF_BX_PF0_GPU_HDP_FLUSH_DONE__CP1_MASK; + nbio_v6_1_hdp_flush_reg.ref_and_mask_cp2 = BIF_BX_PF0_GPU_HDP_FLUSH_DONE__CP2_MASK; + nbio_v6_1_hdp_flush_reg.ref_and_mask_cp3 = BIF_BX_PF0_GPU_HDP_FLUSH_DONE__CP3_MASK; + nbio_v6_1_hdp_flush_reg.ref_and_mask_cp4 = BIF_BX_PF0_GPU_HDP_FLUSH_DONE__CP4_MASK; + nbio_v6_1_hdp_flush_reg.ref_and_mask_cp5 = BIF_BX_PF0_GPU_HDP_FLUSH_DONE__CP5_MASK; + nbio_v6_1_hdp_flush_reg.ref_and_mask_cp6 = BIF_BX_PF0_GPU_HDP_FLUSH_DONE__CP6_MASK; + nbio_v6_1_hdp_flush_reg.ref_and_mask_cp7 = BIF_BX_PF0_GPU_HDP_FLUSH_DONE__CP7_MASK; + nbio_v6_1_hdp_flush_reg.ref_and_mask_cp8 = BIF_BX_PF0_GPU_HDP_FLUSH_DONE__CP8_MASK; + nbio_v6_1_hdp_flush_reg.ref_and_mask_cp9 = BIF_BX_PF0_GPU_HDP_FLUSH_DONE__CP9_MASK; + nbio_v6_1_hdp_flush_reg.ref_and_mask_sdma0 = BIF_BX_PF0_GPU_HDP_FLUSH_DONE__SDMA0_MASK; + nbio_v6_1_hdp_flush_reg.ref_and_mask_sdma1 = BIF_BX_PF0_GPU_HDP_FLUSH_DONE__SDMA1_MASK; + + nbio_v6_1_pcie_index_data.index_offset = SOC15_REG_OFFSET(NBIO, 0, mmPCIE_INDEX); + nbio_v6_1_pcie_index_data.data_offset = SOC15_REG_OFFSET(NBIO, 0, mmPCIE_DATA); + + return 0; +} + +void nbio_v6_1_detect_hw_virt(struct amdgpu_device *adev) +{ + uint32_t reg; + + reg = RREG32(SOC15_REG_OFFSET(NBIO, 0, + mmRCC_PF_0_0_RCC_IOV_FUNC_IDENTIFIER)); + if (reg & 1) + adev->virt.caps |= AMDGPU_SRIOV_CAPS_IS_VF; + + if (reg & 0x80000000) + adev->virt.caps |= AMDGPU_SRIOV_CAPS_ENABLE_IOV; + + if (!reg) { + if (is_virtual_machine()) /* passthrough mode exclus sriov mod */ + adev->virt.caps |= AMDGPU_PASSTHROUGH_MODE; + } +} diff --git a/drivers/gpu/drm/amd/amdgpu/nbio_v6_1.h b/drivers/gpu/drm/amd/amdgpu/nbio_v6_1.h new file mode 100644 index 000000000000..f6f8bc045518 --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/nbio_v6_1.h @@ -0,0 +1,54 @@ +/* + * Copyright 2016 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#ifndef __NBIO_V6_1_H__ +#define __NBIO_V6_1_H__ + +#include "soc15_common.h" + +extern struct nbio_hdp_flush_reg nbio_v6_1_hdp_flush_reg; +extern struct nbio_pcie_index_data nbio_v6_1_pcie_index_data; +int nbio_v6_1_init(struct amdgpu_device *adev); +u32 nbio_v6_1_get_atombios_scratch_regs(struct amdgpu_device *adev, + uint32_t idx); +void nbio_v6_1_set_atombios_scratch_regs(struct amdgpu_device *adev, + uint32_t idx, uint32_t val); +void nbio_v6_1_mc_access_enable(struct amdgpu_device *adev, bool enable); +void nbio_v6_1_hdp_flush(struct amdgpu_device *adev); +u32 nbio_v6_1_get_memsize(struct amdgpu_device *adev); +void nbio_v6_1_sdma_doorbell_range(struct amdgpu_device *adev, int instance, + bool use_doorbell, int doorbell_index); +void nbio_v6_1_enable_doorbell_aperture(struct amdgpu_device *adev, + bool enable); +void nbio_v6_1_enable_doorbell_selfring_aperture(struct amdgpu_device *adev, + bool enable); +void nbio_v6_1_ih_doorbell_range(struct amdgpu_device *adev, + bool use_doorbell, int doorbell_index); +void nbio_v6_1_ih_control(struct amdgpu_device *adev); +u32 nbio_v6_1_get_rev_id(struct amdgpu_device *adev); +void nbio_v6_1_update_medium_grain_clock_gating(struct amdgpu_device *adev, bool enable); +void nbio_v6_1_update_medium_grain_light_sleep(struct amdgpu_device *adev, bool enable); +void nbio_v6_1_get_clockgating_state(struct amdgpu_device *adev, u32 *flags); +void nbio_v6_1_detect_hw_virt(struct amdgpu_device *adev); + +#endif diff --git a/drivers/gpu/drm/amd/amdgpu/psp_gfx_if.h b/drivers/gpu/drm/amd/amdgpu/psp_gfx_if.h new file mode 100644 index 000000000000..8da6da90b1c9 --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/psp_gfx_if.h @@ -0,0 +1,269 @@ +/* + * Copyright 2017 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#ifndef _PSP_TEE_GFX_IF_H_ +#define _PSP_TEE_GFX_IF_H_ + +#define PSP_GFX_CMD_BUF_VERSION 0x00000001 + +#define GFX_CMD_STATUS_MASK 0x0000FFFF +#define GFX_CMD_ID_MASK 0x000F0000 +#define GFX_CMD_RESERVED_MASK 0x7FF00000 +#define GFX_CMD_RESPONSE_MASK 0x80000000 + +/* TEE Gfx Command IDs for the register interface. +* Command ID must be between 0x00010000 and 0x000F0000. +*/ +enum psp_gfx_crtl_cmd_id +{ + GFX_CTRL_CMD_ID_INIT_RBI_RING = 0x00010000, /* initialize RBI ring */ + GFX_CTRL_CMD_ID_INIT_GPCOM_RING = 0x00020000, /* initialize GPCOM ring */ + GFX_CTRL_CMD_ID_DESTROY_RINGS = 0x00030000, /* destroy rings */ + GFX_CTRL_CMD_ID_CAN_INIT_RINGS = 0x00040000, /* is it allowed to initialized the rings */ + + GFX_CTRL_CMD_ID_MAX = 0x000F0000, /* max command ID */ +}; + + +/* Control registers of the TEE Gfx interface. These are located in +* SRBM-to-PSP mailbox registers (total 8 registers). +*/ +struct psp_gfx_ctrl +{ + volatile uint32_t cmd_resp; /* +0 Command/Response register for Gfx commands */ + volatile uint32_t rbi_wptr; /* +4 Write pointer (index) of RBI ring */ + volatile uint32_t rbi_rptr; /* +8 Read pointer (index) of RBI ring */ + volatile uint32_t gpcom_wptr; /* +12 Write pointer (index) of GPCOM ring */ + volatile uint32_t gpcom_rptr; /* +16 Read pointer (index) of GPCOM ring */ + volatile uint32_t ring_addr_lo; /* +20 bits [31:0] of physical address of ring buffer */ + volatile uint32_t ring_addr_hi; /* +24 bits [63:32] of physical address of ring buffer */ + volatile uint32_t ring_buf_size; /* +28 Ring buffer size (in bytes) */ + +}; + + +/* Response flag is set in the command when command is completed by PSP. +* Used in the GFX_CTRL.CmdResp. +* When PSP GFX I/F is initialized, the flag is set. +*/ +#define GFX_FLAG_RESPONSE 0x80000000 + + +/* TEE Gfx Command IDs for the ring buffer interface. */ +enum psp_gfx_cmd_id +{ + GFX_CMD_ID_LOAD_TA = 0x00000001, /* load TA */ + GFX_CMD_ID_UNLOAD_TA = 0x00000002, /* unload TA */ + GFX_CMD_ID_INVOKE_CMD = 0x00000003, /* send command to TA */ + GFX_CMD_ID_LOAD_ASD = 0x00000004, /* load ASD Driver */ + GFX_CMD_ID_SETUP_TMR = 0x00000005, /* setup TMR region */ + GFX_CMD_ID_LOAD_IP_FW = 0x00000006, /* load HW IP FW */ + +}; + + +/* Command to load Trusted Application binary into PSP OS. */ +struct psp_gfx_cmd_load_ta +{ + uint32_t app_phy_addr_lo; /* bits [31:0] of the physical address of the TA binary (must be 4 KB aligned) */ + uint32_t app_phy_addr_hi; /* bits [63:32] of the physical address of the TA binary */ + uint32_t app_len; /* length of the TA binary in bytes */ + uint32_t cmd_buf_phy_addr_lo; /* bits [31:0] of the physical address of CMD buffer (must be 4 KB aligned) */ + uint32_t cmd_buf_phy_addr_hi; /* bits [63:32] of the physical address of CMD buffer */ + uint32_t cmd_buf_len; /* length of the CMD buffer in bytes; must be multiple of 4 KB */ + + /* Note: CmdBufLen can be set to 0. In this case no persistent CMD buffer is provided + * for the TA. Each InvokeCommand can have dinamically mapped CMD buffer instead + * of using global persistent buffer. + */ +}; + + +/* Command to Unload Trusted Application binary from PSP OS. */ +struct psp_gfx_cmd_unload_ta +{ + uint32_t session_id; /* Session ID of the loaded TA to be unloaded */ + +}; + + +/* Shared buffers for InvokeCommand. +*/ +struct psp_gfx_buf_desc +{ + uint32_t buf_phy_addr_lo; /* bits [31:0] of physical address of the buffer (must be 4 KB aligned) */ + uint32_t buf_phy_addr_hi; /* bits [63:32] of physical address of the buffer */ + uint32_t buf_size; /* buffer size in bytes (must be multiple of 4 KB and no bigger than 64 MB) */ + +}; + +/* Max number of descriptors for one shared buffer (in how many different +* physical locations one shared buffer can be stored). If buffer is too much +* fragmented, error will be returned. +*/ +#define GFX_BUF_MAX_DESC 64 + +struct psp_gfx_buf_list +{ + uint32_t num_desc; /* number of buffer descriptors in the list */ + uint32_t total_size; /* total size of all buffers in the list in bytes (must be multiple of 4 KB) */ + struct psp_gfx_buf_desc buf_desc[GFX_BUF_MAX_DESC]; /* list of buffer descriptors */ + + /* total 776 bytes */ +}; + +/* Command to execute InvokeCommand entry point of the TA. */ +struct psp_gfx_cmd_invoke_cmd +{ + uint32_t session_id; /* Session ID of the TA to be executed */ + uint32_t ta_cmd_id; /* Command ID to be sent to TA */ + struct psp_gfx_buf_list buf; /* one indirect buffer (scatter/gather list) */ + +}; + + +/* Command to setup TMR region. */ +struct psp_gfx_cmd_setup_tmr +{ + uint32_t buf_phy_addr_lo; /* bits [31:0] of physical address of TMR buffer (must be 4 KB aligned) */ + uint32_t buf_phy_addr_hi; /* bits [63:32] of physical address of TMR buffer */ + uint32_t buf_size; /* buffer size in bytes (must be multiple of 4 KB) */ + +}; + + +/* FW types for GFX_CMD_ID_LOAD_IP_FW command. Limit 31. */ +enum psp_gfx_fw_type +{ + GFX_FW_TYPE_NONE = 0, + GFX_FW_TYPE_CP_ME = 1, + GFX_FW_TYPE_CP_PFP = 2, + GFX_FW_TYPE_CP_CE = 3, + GFX_FW_TYPE_CP_MEC = 4, + GFX_FW_TYPE_CP_MEC_ME1 = 5, + GFX_FW_TYPE_CP_MEC_ME2 = 6, + GFX_FW_TYPE_RLC_V = 7, + GFX_FW_TYPE_RLC_G = 8, + GFX_FW_TYPE_SDMA0 = 9, + GFX_FW_TYPE_SDMA1 = 10, + GFX_FW_TYPE_DMCU_ERAM = 11, + GFX_FW_TYPE_DMCU_ISR = 12, + GFX_FW_TYPE_VCN = 13, + GFX_FW_TYPE_UVD = 14, + GFX_FW_TYPE_VCE = 15, + GFX_FW_TYPE_ISP = 16, + GFX_FW_TYPE_ACP = 17, + GFX_FW_TYPE_SMU = 18, +}; + +/* Command to load HW IP FW. */ +struct psp_gfx_cmd_load_ip_fw +{ + uint32_t fw_phy_addr_lo; /* bits [31:0] of physical address of FW location (must be 4 KB aligned) */ + uint32_t fw_phy_addr_hi; /* bits [63:32] of physical address of FW location */ + uint32_t fw_size; /* FW buffer size in bytes */ + enum psp_gfx_fw_type fw_type; /* FW type */ + +}; + + +/* All GFX ring buffer commands. */ +union psp_gfx_commands +{ + struct psp_gfx_cmd_load_ta cmd_load_ta; + struct psp_gfx_cmd_unload_ta cmd_unload_ta; + struct psp_gfx_cmd_invoke_cmd cmd_invoke_cmd; + struct psp_gfx_cmd_setup_tmr cmd_setup_tmr; + struct psp_gfx_cmd_load_ip_fw cmd_load_ip_fw; + +}; + + +/* Structure of GFX Response buffer. +* For GPCOM I/F it is part of GFX_CMD_RESP buffer, for RBI +* it is separate buffer. +*/ +struct psp_gfx_resp +{ + uint32_t status; /* +0 status of command execution */ + uint32_t session_id; /* +4 session ID in response to LoadTa command */ + uint32_t fw_addr_lo; /* +8 bits [31:0] of FW address within TMR (in response to cmd_load_ip_fw command) */ + uint32_t fw_addr_hi; /* +12 bits [63:32] of FW address within TMR (in response to cmd_load_ip_fw command) */ + + uint32_t reserved[4]; + + /* total 32 bytes */ +}; + +/* Structure of Command buffer pointed by psp_gfx_rb_frame.cmd_buf_addr_hi +* and psp_gfx_rb_frame.cmd_buf_addr_lo. +*/ +struct psp_gfx_cmd_resp +{ + uint32_t buf_size; /* +0 total size of the buffer in bytes */ + uint32_t buf_version; /* +4 version of the buffer strusture; must be PSP_GFX_CMD_BUF_VERSION */ + uint32_t cmd_id; /* +8 command ID */ + + /* These fields are used for RBI only. They are all 0 in GPCOM commands + */ + uint32_t resp_buf_addr_lo; /* +12 bits [31:0] of physical address of response buffer (must be 4 KB aligned) */ + uint32_t resp_buf_addr_hi; /* +16 bits [63:32] of physical address of response buffer */ + uint32_t resp_offset; /* +20 offset within response buffer */ + uint32_t resp_buf_size; /* +24 total size of the response buffer in bytes */ + + union psp_gfx_commands cmd; /* +28 command specific structures */ + + uint8_t reserved_1[864 - sizeof(union psp_gfx_commands) - 28]; + + /* Note: Resp is part of this buffer for GPCOM ring. For RBI ring the response + * is separate buffer pointed by resp_buf_addr_hi and resp_buf_addr_lo. + */ + struct psp_gfx_resp resp; /* +864 response */ + + uint8_t reserved_2[1024 - 864 - sizeof(struct psp_gfx_resp)]; + + /* total size 1024 bytes */ +}; + + +#define FRAME_TYPE_DESTROY 1 /* frame sent by KMD driver when UMD Scheduler context is destroyed*/ + +/* Structure of the Ring Buffer Frame */ +struct psp_gfx_rb_frame +{ + uint32_t cmd_buf_addr_lo; /* +0 bits [31:0] of physical address of command buffer (must be 4 KB aligned) */ + uint32_t cmd_buf_addr_hi; /* +4 bits [63:32] of physical address of command buffer */ + uint32_t cmd_buf_size; /* +8 command buffer size in bytes */ + uint32_t fence_addr_lo; /* +12 bits [31:0] of physical address of Fence for this frame */ + uint32_t fence_addr_hi; /* +16 bits [63:32] of physical address of Fence for this frame */ + uint32_t fence_value; /* +20 Fence value */ + uint32_t sid_lo; /* +24 bits [31:0] of SID value (used only for RBI frames) */ + uint32_t sid_hi; /* +28 bits [63:32] of SID value (used only for RBI frames) */ + uint8_t vmid; /* +32 VMID value used for mapping of all addresses for this frame */ + uint8_t frame_type; /* +33 1: destory context frame, 0: all other frames; used only for RBI frames */ + uint8_t reserved1[2]; /* +34 reserved, must be 0 */ + uint32_t reserved2[7]; /* +40 reserved, must be 0 */ + /* total 64 bytes */ +}; + +#endif /* _PSP_TEE_GFX_IF_H_ */ diff --git a/drivers/gpu/drm/amd/amdgpu/psp_v3_1.c b/drivers/gpu/drm/amd/amdgpu/psp_v3_1.c new file mode 100644 index 000000000000..c3588d1c7cb0 --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/psp_v3_1.c @@ -0,0 +1,517 @@ +/* + * Copyright 2016 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + * Author: Huang Rui + * + */ + +#include <linux/firmware.h> +#include "drmP.h" +#include "amdgpu.h" +#include "amdgpu_psp.h" +#include "amdgpu_ucode.h" +#include "soc15_common.h" +#include "psp_v3_1.h" + +#include "vega10/soc15ip.h" +#include "vega10/MP/mp_9_0_offset.h" +#include "vega10/MP/mp_9_0_sh_mask.h" +#include "vega10/GC/gc_9_0_offset.h" +#include "vega10/SDMA0/sdma0_4_0_offset.h" +#include "vega10/NBIO/nbio_6_1_offset.h" + +MODULE_FIRMWARE("amdgpu/vega10_sos.bin"); +MODULE_FIRMWARE("amdgpu/vega10_asd.bin"); + +#define smnMP1_FIRMWARE_FLAGS 0x3010028 + +static int +psp_v3_1_get_fw_type(struct amdgpu_firmware_info *ucode, enum psp_gfx_fw_type *type) +{ + switch(ucode->ucode_id) { + case AMDGPU_UCODE_ID_SDMA0: + *type = GFX_FW_TYPE_SDMA0; + break; + case AMDGPU_UCODE_ID_SDMA1: + *type = GFX_FW_TYPE_SDMA1; + break; + case AMDGPU_UCODE_ID_CP_CE: + *type = GFX_FW_TYPE_CP_CE; + break; + case AMDGPU_UCODE_ID_CP_PFP: + *type = GFX_FW_TYPE_CP_PFP; + break; + case AMDGPU_UCODE_ID_CP_ME: + *type = GFX_FW_TYPE_CP_ME; + break; + case AMDGPU_UCODE_ID_CP_MEC1: + *type = GFX_FW_TYPE_CP_MEC; + break; + case AMDGPU_UCODE_ID_CP_MEC1_JT: + *type = GFX_FW_TYPE_CP_MEC_ME1; + break; + case AMDGPU_UCODE_ID_CP_MEC2: + *type = GFX_FW_TYPE_CP_MEC; + break; + case AMDGPU_UCODE_ID_CP_MEC2_JT: + *type = GFX_FW_TYPE_CP_MEC_ME2; + break; + case AMDGPU_UCODE_ID_RLC_G: + *type = GFX_FW_TYPE_RLC_G; + break; + case AMDGPU_UCODE_ID_SMC: + *type = GFX_FW_TYPE_SMU; + break; + case AMDGPU_UCODE_ID_UVD: + *type = GFX_FW_TYPE_UVD; + break; + case AMDGPU_UCODE_ID_VCE: + *type = GFX_FW_TYPE_VCE; + break; + case AMDGPU_UCODE_ID_MAXIMUM: + default: + return -EINVAL; + } + + return 0; +} + +int psp_v3_1_init_microcode(struct psp_context *psp) +{ + struct amdgpu_device *adev = psp->adev; + const char *chip_name; + char fw_name[30]; + int err = 0; + const struct psp_firmware_header_v1_0 *hdr; + + DRM_DEBUG("\n"); + + switch (adev->asic_type) { + case CHIP_VEGA10: + chip_name = "vega10"; + break; + default: BUG(); + } + + snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_sos.bin", chip_name); + err = request_firmware(&adev->psp.sos_fw, fw_name, adev->dev); + if (err) + goto out; + + err = amdgpu_ucode_validate(adev->psp.sos_fw); + if (err) + goto out; + + hdr = (const struct psp_firmware_header_v1_0 *)adev->psp.sos_fw->data; + adev->psp.sos_fw_version = le32_to_cpu(hdr->header.ucode_version); + adev->psp.sos_feature_version = le32_to_cpu(hdr->ucode_feature_version); + adev->psp.sos_bin_size = le32_to_cpu(hdr->sos_size_bytes); + adev->psp.sys_bin_size = le32_to_cpu(hdr->header.ucode_size_bytes) - + le32_to_cpu(hdr->sos_size_bytes); + adev->psp.sys_start_addr = (uint8_t *)hdr + + le32_to_cpu(hdr->header.ucode_array_offset_bytes); + adev->psp.sos_start_addr = (uint8_t *)adev->psp.sys_start_addr + + le32_to_cpu(hdr->sos_offset_bytes); + + snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_asd.bin", chip_name); + err = request_firmware(&adev->psp.asd_fw, fw_name, adev->dev); + if (err) + goto out; + + err = amdgpu_ucode_validate(adev->psp.asd_fw); + if (err) + goto out; + + hdr = (const struct psp_firmware_header_v1_0 *)adev->psp.asd_fw->data; + adev->psp.asd_fw_version = le32_to_cpu(hdr->header.ucode_version); + adev->psp.asd_feature_version = le32_to_cpu(hdr->ucode_feature_version); + adev->psp.asd_ucode_size = le32_to_cpu(hdr->header.ucode_size_bytes); + adev->psp.asd_start_addr = (uint8_t *)hdr + + le32_to_cpu(hdr->header.ucode_array_offset_bytes); + + return 0; +out: + if (err) { + dev_err(adev->dev, + "psp v3.1: Failed to load firmware \"%s\"\n", + fw_name); + release_firmware(adev->psp.sos_fw); + adev->psp.sos_fw = NULL; + release_firmware(adev->psp.asd_fw); + adev->psp.asd_fw = NULL; + } + + return err; +} + +int psp_v3_1_bootloader_load_sysdrv(struct psp_context *psp) +{ + int ret; + uint32_t psp_gfxdrv_command_reg = 0; + struct amdgpu_bo *psp_sysdrv; + void *psp_sysdrv_virt = NULL; + uint64_t psp_sysdrv_mem; + struct amdgpu_device *adev = psp->adev; + uint32_t size, sol_reg; + + /* Check sOS sign of life register to confirm sys driver and sOS + * are already been loaded. + */ + sol_reg = RREG32(SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_81)); + if (sol_reg) + return 0; + + /* Wait for bootloader to signify that is ready having bit 31 of C2PMSG_35 set to 1 */ + ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_35), + 0x80000000, 0x80000000, false); + if (ret) + return ret; + + /* + * Create a 1 meg GART memory to store the psp sys driver + * binary with a 1 meg aligned address + */ + size = (psp->sys_bin_size + (PSP_BOOTLOADER_1_MEG_ALIGNMENT - 1)) & + (~(PSP_BOOTLOADER_1_MEG_ALIGNMENT - 1)); + + ret = amdgpu_bo_create_kernel(adev, size, PSP_BOOTLOADER_1_MEG_ALIGNMENT, + AMDGPU_GEM_DOMAIN_GTT, + &psp_sysdrv, + &psp_sysdrv_mem, + &psp_sysdrv_virt); + if (ret) + return ret; + + /* Copy PSP System Driver binary to memory */ + memcpy(psp_sysdrv_virt, psp->sys_start_addr, psp->sys_bin_size); + + /* Provide the sys driver to bootrom */ + WREG32(SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_36), + (uint32_t)(psp_sysdrv_mem >> 20)); + psp_gfxdrv_command_reg = 1 << 16; + WREG32(SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_35), + psp_gfxdrv_command_reg); + + /* there might be handshake issue with hardware which needs delay */ + mdelay(20); + + ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_35), + 0x80000000, 0x80000000, false); + + amdgpu_bo_free_kernel(&psp_sysdrv, &psp_sysdrv_mem, &psp_sysdrv_virt); + + return ret; +} + +int psp_v3_1_bootloader_load_sos(struct psp_context *psp) +{ + int ret; + unsigned int psp_gfxdrv_command_reg = 0; + struct amdgpu_bo *psp_sos; + void *psp_sos_virt = NULL; + uint64_t psp_sos_mem; + struct amdgpu_device *adev = psp->adev; + uint32_t size, sol_reg; + + /* Check sOS sign of life register to confirm sys driver and sOS + * are already been loaded. + */ + sol_reg = RREG32(SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_81)); + if (sol_reg) + return 0; + + /* Wait for bootloader to signify that is ready having bit 31 of C2PMSG_35 set to 1 */ + ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_35), + 0x80000000, 0x80000000, false); + if (ret) + return ret; + + size = (psp->sos_bin_size + (PSP_BOOTLOADER_1_MEG_ALIGNMENT - 1)) & + (~((uint64_t)PSP_BOOTLOADER_1_MEG_ALIGNMENT - 1)); + + ret = amdgpu_bo_create_kernel(adev, size, PSP_BOOTLOADER_1_MEG_ALIGNMENT, + AMDGPU_GEM_DOMAIN_GTT, + &psp_sos, + &psp_sos_mem, + &psp_sos_virt); + if (ret) + return ret; + + /* Copy Secure OS binary to PSP memory */ + memcpy(psp_sos_virt, psp->sos_start_addr, psp->sos_bin_size); + + /* Provide the PSP secure OS to bootrom */ + WREG32(SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_36), + (uint32_t)(psp_sos_mem >> 20)); + psp_gfxdrv_command_reg = 2 << 16; + WREG32(SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_35), + psp_gfxdrv_command_reg); + + /* there might be handshake issue with hardware which needs delay */ + mdelay(20); +#if 0 + ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_81), + RREG32(SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_81)), + 0, true); +#endif + + amdgpu_bo_free_kernel(&psp_sos, &psp_sos_mem, &psp_sos_virt); + + return ret; +} + +int psp_v3_1_prep_cmd_buf(struct amdgpu_firmware_info *ucode, struct psp_gfx_cmd_resp *cmd) +{ + int ret; + uint64_t fw_mem_mc_addr = ucode->mc_addr; + + memset(cmd, 0, sizeof(struct psp_gfx_cmd_resp)); + + cmd->cmd_id = GFX_CMD_ID_LOAD_IP_FW; + cmd->cmd.cmd_load_ip_fw.fw_phy_addr_lo = (uint32_t)fw_mem_mc_addr; + cmd->cmd.cmd_load_ip_fw.fw_phy_addr_hi = (uint32_t)((uint64_t)fw_mem_mc_addr >> 32); + cmd->cmd.cmd_load_ip_fw.fw_size = ucode->ucode_size; + + ret = psp_v3_1_get_fw_type(ucode, &cmd->cmd.cmd_load_ip_fw.fw_type); + if (ret) + DRM_ERROR("Unknown firmware type\n"); + + return ret; +} + +int psp_v3_1_ring_init(struct psp_context *psp, enum psp_ring_type ring_type) +{ + int ret = 0; + unsigned int psp_ring_reg = 0; + struct psp_ring *ring; + struct amdgpu_device *adev = psp->adev; + + ring = &psp->km_ring; + + ring->ring_type = ring_type; + + /* allocate 4k Page of Local Frame Buffer memory for ring */ + ring->ring_size = 0x1000; + ret = amdgpu_bo_create_kernel(adev, ring->ring_size, PAGE_SIZE, + AMDGPU_GEM_DOMAIN_VRAM, + &adev->firmware.rbuf, + &ring->ring_mem_mc_addr, + (void **)&ring->ring_mem); + if (ret) { + ring->ring_size = 0; + return ret; + } + + /* Write low address of the ring to C2PMSG_69 */ + psp_ring_reg = lower_32_bits(ring->ring_mem_mc_addr); + WREG32(SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_69), psp_ring_reg); + /* Write high address of the ring to C2PMSG_70 */ + psp_ring_reg = upper_32_bits(ring->ring_mem_mc_addr); + WREG32(SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_70), psp_ring_reg); + /* Write size of ring to C2PMSG_71 */ + psp_ring_reg = ring->ring_size; + WREG32(SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_71), psp_ring_reg); + /* Write the ring initialization command to C2PMSG_64 */ + psp_ring_reg = ring_type; + psp_ring_reg = psp_ring_reg << 16; + WREG32(SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_64), psp_ring_reg); + + /* there might be handshake issue with hardware which needs delay */ + mdelay(20); + + /* Wait for response flag (bit 31) in C2PMSG_64 */ + ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_64), + 0x80000000, 0x8000FFFF, false); + + return ret; +} + +int psp_v3_1_cmd_submit(struct psp_context *psp, + struct amdgpu_firmware_info *ucode, + uint64_t cmd_buf_mc_addr, uint64_t fence_mc_addr, + int index) +{ + unsigned int psp_write_ptr_reg = 0; + struct psp_gfx_rb_frame * write_frame = psp->km_ring.ring_mem; + struct psp_ring *ring = &psp->km_ring; + struct amdgpu_device *adev = psp->adev; + uint32_t ring_size_dw = ring->ring_size / 4; + uint32_t rb_frame_size_dw = sizeof(struct psp_gfx_rb_frame) / 4; + + /* KM (GPCOM) prepare write pointer */ + psp_write_ptr_reg = RREG32(SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_67)); + + /* Update KM RB frame pointer to new frame */ + /* write_frame ptr increments by size of rb_frame in bytes */ + /* psp_write_ptr_reg increments by size of rb_frame in DWORDs */ + if ((psp_write_ptr_reg % ring_size_dw) == 0) + write_frame = ring->ring_mem; + else + write_frame = ring->ring_mem + (psp_write_ptr_reg / rb_frame_size_dw); + + /* Initialize KM RB frame */ + memset(write_frame, 0, sizeof(struct psp_gfx_rb_frame)); + + /* Update KM RB frame */ + write_frame->cmd_buf_addr_hi = (unsigned int)(cmd_buf_mc_addr >> 32); + write_frame->cmd_buf_addr_lo = (unsigned int)(cmd_buf_mc_addr); + write_frame->fence_addr_hi = (unsigned int)(fence_mc_addr >> 32); + write_frame->fence_addr_lo = (unsigned int)(fence_mc_addr); + write_frame->fence_value = index; + + /* Update the write Pointer in DWORDs */ + psp_write_ptr_reg = (psp_write_ptr_reg + rb_frame_size_dw) % ring_size_dw; + WREG32(SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_67), psp_write_ptr_reg); + + return 0; +} + +static int +psp_v3_1_sram_map(unsigned int *sram_offset, unsigned int *sram_addr_reg_offset, + unsigned int *sram_data_reg_offset, + enum AMDGPU_UCODE_ID ucode_id) +{ + int ret = 0; + + switch(ucode_id) { +/* TODO: needs to confirm */ +#if 0 + case AMDGPU_UCODE_ID_SMC: + *sram_offset = 0; + *sram_addr_reg_offset = 0; + *sram_data_reg_offset = 0; + break; +#endif + + case AMDGPU_UCODE_ID_CP_CE: + *sram_offset = 0x0; + *sram_addr_reg_offset = SOC15_REG_OFFSET(GC, 0, mmCP_CE_UCODE_ADDR); + *sram_data_reg_offset = SOC15_REG_OFFSET(GC, 0, mmCP_CE_UCODE_DATA); + break; + + case AMDGPU_UCODE_ID_CP_PFP: + *sram_offset = 0x0; + *sram_addr_reg_offset = SOC15_REG_OFFSET(GC, 0, mmCP_PFP_UCODE_ADDR); + *sram_data_reg_offset = SOC15_REG_OFFSET(GC, 0, mmCP_PFP_UCODE_DATA); + break; + + case AMDGPU_UCODE_ID_CP_ME: + *sram_offset = 0x0; + *sram_addr_reg_offset = SOC15_REG_OFFSET(GC, 0, mmCP_HYP_ME_UCODE_ADDR); + *sram_data_reg_offset = SOC15_REG_OFFSET(GC, 0, mmCP_HYP_ME_UCODE_DATA); + break; + + case AMDGPU_UCODE_ID_CP_MEC1: + *sram_offset = 0x10000; + *sram_addr_reg_offset = SOC15_REG_OFFSET(GC, 0, mmCP_MEC_ME1_UCODE_ADDR); + *sram_data_reg_offset = SOC15_REG_OFFSET(GC, 0, mmCP_MEC_ME1_UCODE_DATA); + break; + + case AMDGPU_UCODE_ID_CP_MEC2: + *sram_offset = 0x10000; + *sram_addr_reg_offset = SOC15_REG_OFFSET(GC, 0, mmCP_HYP_MEC2_UCODE_ADDR); + *sram_data_reg_offset = SOC15_REG_OFFSET(GC, 0, mmCP_HYP_MEC2_UCODE_DATA); + break; + + case AMDGPU_UCODE_ID_RLC_G: + *sram_offset = 0x2000; + *sram_addr_reg_offset = SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_UCODE_ADDR); + *sram_data_reg_offset = SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_UCODE_DATA); + break; + + case AMDGPU_UCODE_ID_SDMA0: + *sram_offset = 0x0; + *sram_addr_reg_offset = SOC15_REG_OFFSET(SDMA0, 0, mmSDMA0_UCODE_ADDR); + *sram_data_reg_offset = SOC15_REG_OFFSET(SDMA0, 0, mmSDMA0_UCODE_DATA); + break; + +/* TODO: needs to confirm */ +#if 0 + case AMDGPU_UCODE_ID_SDMA1: + *sram_offset = ; + *sram_addr_reg_offset = ; + break; + + case AMDGPU_UCODE_ID_UVD: + *sram_offset = ; + *sram_addr_reg_offset = ; + break; + + case AMDGPU_UCODE_ID_VCE: + *sram_offset = ; + *sram_addr_reg_offset = ; + break; +#endif + + case AMDGPU_UCODE_ID_MAXIMUM: + default: + ret = -EINVAL; + break; + } + + return ret; +} + +bool psp_v3_1_compare_sram_data(struct psp_context *psp, + struct amdgpu_firmware_info *ucode, + enum AMDGPU_UCODE_ID ucode_type) +{ + int err = 0; + unsigned int fw_sram_reg_val = 0; + unsigned int fw_sram_addr_reg_offset = 0; + unsigned int fw_sram_data_reg_offset = 0; + unsigned int ucode_size; + uint32_t *ucode_mem = NULL; + struct amdgpu_device *adev = psp->adev; + + err = psp_v3_1_sram_map(&fw_sram_reg_val, &fw_sram_addr_reg_offset, + &fw_sram_data_reg_offset, ucode_type); + if (err) + return false; + + WREG32(fw_sram_addr_reg_offset, fw_sram_reg_val); + + ucode_size = ucode->ucode_size; + ucode_mem = (uint32_t *)ucode->kaddr; + while (ucode_size) { + fw_sram_reg_val = RREG32(fw_sram_data_reg_offset); + + if (*ucode_mem != fw_sram_reg_val) + return false; + + ucode_mem++; + /* 4 bytes */ + ucode_size -= 4; + } + + return true; +} + +bool psp_v3_1_smu_reload_quirk(struct psp_context *psp) +{ + struct amdgpu_device *adev = psp->adev; + uint32_t reg; + + reg = smnMP1_FIRMWARE_FLAGS | 0x03b00000; + WREG32(SOC15_REG_OFFSET(NBIO, 0, mmPCIE_INDEX2), reg); + reg = RREG32(SOC15_REG_OFFSET(NBIO, 0, mmPCIE_DATA2)); + return (reg & MP1_FIRMWARE_FLAGS__INTERRUPTS_ENABLED_MASK) ? true : false; +} diff --git a/drivers/gpu/drm/amd/amdgpu/psp_v3_1.h b/drivers/gpu/drm/amd/amdgpu/psp_v3_1.h new file mode 100644 index 000000000000..e82eff741a08 --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/psp_v3_1.h @@ -0,0 +1,50 @@ +/* + * Copyright 2016 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + * Author: Huang Rui + * + */ +#ifndef __PSP_V3_1_H__ +#define __PSP_V3_1_H__ + +#include "amdgpu_psp.h" + +enum { PSP_DIRECTORY_TABLE_ENTRIES = 4 }; +enum { PSP_BINARY_ALIGNMENT = 64 }; +enum { PSP_BOOTLOADER_1_MEG_ALIGNMENT = 0x100000 }; +enum { PSP_BOOTLOADER_8_MEM_ALIGNMENT = 0x800000 }; + +extern int psp_v3_1_init_microcode(struct psp_context *psp); +extern int psp_v3_1_bootloader_load_sysdrv(struct psp_context *psp); +extern int psp_v3_1_bootloader_load_sos(struct psp_context *psp); +extern int psp_v3_1_prep_cmd_buf(struct amdgpu_firmware_info *ucode, + struct psp_gfx_cmd_resp *cmd); +extern int psp_v3_1_ring_init(struct psp_context *psp, + enum psp_ring_type ring_type); +extern int psp_v3_1_cmd_submit(struct psp_context *psp, + struct amdgpu_firmware_info *ucode, + uint64_t cmd_buf_mc_addr, uint64_t fence_mc_addr, + int index); +extern bool psp_v3_1_compare_sram_data(struct psp_context *psp, + struct amdgpu_firmware_info *ucode, + enum AMDGPU_UCODE_ID ucode_type); +extern bool psp_v3_1_smu_reload_quirk(struct psp_context *psp); +#endif diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c b/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c index 896be64b7013..f2d0710258cb 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c @@ -158,7 +158,7 @@ static int sdma_v2_4_init_microcode(struct amdgpu_device *adev) if (adev->sdma.instance[i].feature_version >= 20) adev->sdma.instance[i].burst_nop = true; - if (adev->firmware.smu_load) { + if (adev->firmware.load_type == AMDGPU_FW_LOAD_SMU) { info = &adev->firmware.ucode[AMDGPU_UCODE_ID_SDMA0 + i]; info->ucode_id = AMDGPU_UCODE_ID_SDMA0 + i; info->fw = adev->sdma.instance[i].fw; @@ -170,9 +170,7 @@ static int sdma_v2_4_init_microcode(struct amdgpu_device *adev) out: if (err) { - printk(KERN_ERR - "sdma_v2_4: Failed to load firmware \"%s\"\n", - fw_name); + pr_err("sdma_v2_4: Failed to load firmware \"%s\"\n", fw_name); for (i = 0; i < adev->sdma.num_instances; i++) { release_firmware(adev->sdma.instance[i].fw); adev->sdma.instance[i].fw = NULL; @@ -188,7 +186,7 @@ out: * * Get the current rptr from the hardware (VI+). */ -static uint32_t sdma_v2_4_ring_get_rptr(struct amdgpu_ring *ring) +static uint64_t sdma_v2_4_ring_get_rptr(struct amdgpu_ring *ring) { /* XXX check if swapping is necessary on BE */ return ring->adev->wb.wb[ring->rptr_offs] >> 2; @@ -201,7 +199,7 @@ static uint32_t sdma_v2_4_ring_get_rptr(struct amdgpu_ring *ring) * * Get the current wptr from the hardware (VI+). */ -static uint32_t sdma_v2_4_ring_get_wptr(struct amdgpu_ring *ring) +static uint64_t sdma_v2_4_ring_get_wptr(struct amdgpu_ring *ring) { struct amdgpu_device *adev = ring->adev; int me = (ring == &ring->adev->sdma.instance[0].ring) ? 0 : 1; @@ -222,7 +220,7 @@ static void sdma_v2_4_ring_set_wptr(struct amdgpu_ring *ring) struct amdgpu_device *adev = ring->adev; int me = (ring == &ring->adev->sdma.instance[0].ring) ? 0 : 1; - WREG32(mmSDMA0_GFX_RB_WPTR + sdma_offsets[me], ring->wptr << 2); + WREG32(mmSDMA0_GFX_RB_WPTR + sdma_offsets[me], lower_32_bits(ring->wptr) << 2); } static void sdma_v2_4_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count) @@ -253,7 +251,7 @@ static void sdma_v2_4_ring_emit_ib(struct amdgpu_ring *ring, u32 vmid = vm_id & 0xf; /* IB packet must end on a 8 DW boundary */ - sdma_v2_4_ring_insert_nop(ring, (10 - (ring->wptr & 7)) % 8); + sdma_v2_4_ring_insert_nop(ring, (10 - (lower_32_bits(ring->wptr) & 7)) % 8); amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_INDIRECT) | SDMA_PKT_INDIRECT_HEADER_VMID(vmid)); @@ -468,7 +466,7 @@ static int sdma_v2_4_gfx_resume(struct amdgpu_device *adev) WREG32(mmSDMA0_GFX_RB_BASE_HI + sdma_offsets[i], ring->gpu_addr >> 40); ring->wptr = 0; - WREG32(mmSDMA0_GFX_RB_WPTR + sdma_offsets[i], ring->wptr << 2); + WREG32(mmSDMA0_GFX_RB_WPTR + sdma_offsets[i], lower_32_bits(ring->wptr) << 2); /* enable DMA RB */ rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, RB_ENABLE, 1); @@ -564,7 +562,7 @@ static int sdma_v2_4_start(struct amdgpu_device *adev) int r; if (!adev->pp_enabled) { - if (!adev->firmware.smu_load) { + if (adev->firmware.load_type != AMDGPU_FW_LOAD_SMU) { r = sdma_v2_4_load_microcode(adev); if (r) return r; @@ -800,14 +798,14 @@ static void sdma_v2_4_vm_write_pte(struct amdgpu_ib *ib, uint64_t pe, */ static void sdma_v2_4_vm_set_pte_pde(struct amdgpu_ib *ib, uint64_t pe, uint64_t addr, unsigned count, - uint32_t incr, uint32_t flags) + uint32_t incr, uint64_t flags) { /* for physically contiguous pages (vram) */ ib->ptr[ib->length_dw++] = SDMA_PKT_HEADER_OP(SDMA_OP_GEN_PTEPDE); ib->ptr[ib->length_dw++] = lower_32_bits(pe); /* dst addr */ ib->ptr[ib->length_dw++] = upper_32_bits(pe); - ib->ptr[ib->length_dw++] = flags; /* mask */ - ib->ptr[ib->length_dw++] = 0; + ib->ptr[ib->length_dw++] = lower_32_bits(flags); /* mask */ + ib->ptr[ib->length_dw++] = upper_32_bits(flags); ib->ptr[ib->length_dw++] = lower_32_bits(addr); /* value */ ib->ptr[ib->length_dw++] = upper_32_bits(addr); ib->ptr[ib->length_dw++] = incr; /* increment size */ @@ -923,17 +921,20 @@ static int sdma_v2_4_sw_init(void *handle) struct amdgpu_device *adev = (struct amdgpu_device *)handle; /* SDMA trap event */ - r = amdgpu_irq_add_id(adev, 224, &adev->sdma.trap_irq); + r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 224, + &adev->sdma.trap_irq); if (r) return r; /* SDMA Privileged inst */ - r = amdgpu_irq_add_id(adev, 241, &adev->sdma.illegal_inst_irq); + r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 241, + &adev->sdma.illegal_inst_irq); if (r) return r; /* SDMA Privileged inst */ - r = amdgpu_irq_add_id(adev, 247, &adev->sdma.illegal_inst_irq); + r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 247, + &adev->sdma.illegal_inst_irq); if (r) return r; @@ -1208,6 +1209,7 @@ static const struct amdgpu_ring_funcs sdma_v2_4_ring_funcs = { .type = AMDGPU_RING_TYPE_SDMA, .align_mask = 0xf, .nop = SDMA_PKT_NOP_HEADER_OP(SDMA_OP_NOP), + .support_64bit_ptrs = false, .get_rptr = sdma_v2_4_ring_get_rptr, .get_wptr = sdma_v2_4_ring_get_wptr, .set_wptr = sdma_v2_4_ring_set_wptr, diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c index 011800f621c6..a69e5d4e1d2a 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c @@ -310,7 +310,7 @@ static int sdma_v3_0_init_microcode(struct amdgpu_device *adev) if (adev->sdma.instance[i].feature_version >= 20) adev->sdma.instance[i].burst_nop = true; - if (adev->firmware.smu_load) { + if (adev->firmware.load_type == AMDGPU_FW_LOAD_SMU) { info = &adev->firmware.ucode[AMDGPU_UCODE_ID_SDMA0 + i]; info->ucode_id = AMDGPU_UCODE_ID_SDMA0 + i; info->fw = adev->sdma.instance[i].fw; @@ -321,9 +321,7 @@ static int sdma_v3_0_init_microcode(struct amdgpu_device *adev) } out: if (err) { - printk(KERN_ERR - "sdma_v3_0: Failed to load firmware \"%s\"\n", - fw_name); + pr_err("sdma_v3_0: Failed to load firmware \"%s\"\n", fw_name); for (i = 0; i < adev->sdma.num_instances; i++) { release_firmware(adev->sdma.instance[i].fw); adev->sdma.instance[i].fw = NULL; @@ -339,7 +337,7 @@ out: * * Get the current rptr from the hardware (VI+). */ -static uint32_t sdma_v3_0_ring_get_rptr(struct amdgpu_ring *ring) +static uint64_t sdma_v3_0_ring_get_rptr(struct amdgpu_ring *ring) { /* XXX check if swapping is necessary on BE */ return ring->adev->wb.wb[ring->rptr_offs] >> 2; @@ -352,7 +350,7 @@ static uint32_t sdma_v3_0_ring_get_rptr(struct amdgpu_ring *ring) * * Get the current wptr from the hardware (VI+). */ -static uint32_t sdma_v3_0_ring_get_wptr(struct amdgpu_ring *ring) +static uint64_t sdma_v3_0_ring_get_wptr(struct amdgpu_ring *ring) { struct amdgpu_device *adev = ring->adev; u32 wptr; @@ -382,12 +380,12 @@ static void sdma_v3_0_ring_set_wptr(struct amdgpu_ring *ring) if (ring->use_doorbell) { /* XXX check if swapping is necessary on BE */ - adev->wb.wb[ring->wptr_offs] = ring->wptr << 2; - WDOORBELL32(ring->doorbell_index, ring->wptr << 2); + adev->wb.wb[ring->wptr_offs] = lower_32_bits(ring->wptr) << 2; + WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr) << 2); } else { int me = (ring == &ring->adev->sdma.instance[0].ring) ? 0 : 1; - WREG32(mmSDMA0_GFX_RB_WPTR + sdma_offsets[me], ring->wptr << 2); + WREG32(mmSDMA0_GFX_RB_WPTR + sdma_offsets[me], lower_32_bits(ring->wptr) << 2); } } @@ -419,7 +417,7 @@ static void sdma_v3_0_ring_emit_ib(struct amdgpu_ring *ring, u32 vmid = vm_id & 0xf; /* IB packet must end on a 8 DW boundary */ - sdma_v3_0_ring_insert_nop(ring, (10 - (ring->wptr & 7)) % 8); + sdma_v3_0_ring_insert_nop(ring, (10 - (lower_32_bits(ring->wptr) & 7)) % 8); amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_INDIRECT) | SDMA_PKT_INDIRECT_HEADER_VMID(vmid)); @@ -615,6 +613,7 @@ static int sdma_v3_0_gfx_resume(struct amdgpu_device *adev) for (i = 0; i < adev->sdma.num_instances; i++) { ring = &adev->sdma.instance[i].ring; + amdgpu_ring_clear_ring(ring); wb_offset = (ring->rptr_offs * 4); mutex_lock(&adev->srbm_mutex); @@ -661,7 +660,7 @@ static int sdma_v3_0_gfx_resume(struct amdgpu_device *adev) WREG32(mmSDMA0_GFX_RB_BASE_HI + sdma_offsets[i], ring->gpu_addr >> 40); ring->wptr = 0; - WREG32(mmSDMA0_GFX_RB_WPTR + sdma_offsets[i], ring->wptr << 2); + WREG32(mmSDMA0_GFX_RB_WPTR + sdma_offsets[i], lower_32_bits(ring->wptr) << 2); doorbell = RREG32(mmSDMA0_GFX_DOORBELL + sdma_offsets[i]); @@ -772,7 +771,7 @@ static int sdma_v3_0_start(struct amdgpu_device *adev) int r, i; if (!adev->pp_enabled) { - if (!adev->firmware.smu_load) { + if (adev->firmware.load_type != AMDGPU_FW_LOAD_SMU) { r = sdma_v3_0_load_microcode(adev); if (r) return r; @@ -1008,14 +1007,14 @@ static void sdma_v3_0_vm_write_pte(struct amdgpu_ib *ib, uint64_t pe, */ static void sdma_v3_0_vm_set_pte_pde(struct amdgpu_ib *ib, uint64_t pe, uint64_t addr, unsigned count, - uint32_t incr, uint32_t flags) + uint32_t incr, uint64_t flags) { /* for physically contiguous pages (vram) */ ib->ptr[ib->length_dw++] = SDMA_PKT_HEADER_OP(SDMA_OP_GEN_PTEPDE); ib->ptr[ib->length_dw++] = lower_32_bits(pe); /* dst addr */ ib->ptr[ib->length_dw++] = upper_32_bits(pe); - ib->ptr[ib->length_dw++] = flags; /* mask */ - ib->ptr[ib->length_dw++] = 0; + ib->ptr[ib->length_dw++] = lower_32_bits(flags); /* mask */ + ib->ptr[ib->length_dw++] = upper_32_bits(flags); ib->ptr[ib->length_dw++] = lower_32_bits(addr); /* value */ ib->ptr[ib->length_dw++] = upper_32_bits(addr); ib->ptr[ib->length_dw++] = incr; /* increment size */ @@ -1138,17 +1137,20 @@ static int sdma_v3_0_sw_init(void *handle) struct amdgpu_device *adev = (struct amdgpu_device *)handle; /* SDMA trap event */ - r = amdgpu_irq_add_id(adev, 224, &adev->sdma.trap_irq); + r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 224, + &adev->sdma.trap_irq); if (r) return r; /* SDMA Privileged inst */ - r = amdgpu_irq_add_id(adev, 241, &adev->sdma.illegal_inst_irq); + r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 241, + &adev->sdma.illegal_inst_irq); if (r) return r; /* SDMA Privileged inst */ - r = amdgpu_irq_add_id(adev, 247, &adev->sdma.illegal_inst_irq); + r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 247, + &adev->sdma.illegal_inst_irq); if (r) return r; @@ -1512,14 +1514,17 @@ static int sdma_v3_0_set_clockgating_state(void *handle, { struct amdgpu_device *adev = (struct amdgpu_device *)handle; + if (amdgpu_sriov_vf(adev)) + return 0; + switch (adev->asic_type) { case CHIP_FIJI: case CHIP_CARRIZO: case CHIP_STONEY: sdma_v3_0_update_sdma_medium_grain_clock_gating(adev, - state == AMD_CG_STATE_GATE ? true : false); + state == AMD_CG_STATE_GATE); sdma_v3_0_update_sdma_medium_grain_light_sleep(adev, - state == AMD_CG_STATE_GATE ? true : false); + state == AMD_CG_STATE_GATE); break; default: break; @@ -1538,6 +1543,9 @@ static void sdma_v3_0_get_clockgating_state(void *handle, u32 *flags) struct amdgpu_device *adev = (struct amdgpu_device *)handle; int data; + if (amdgpu_sriov_vf(adev)) + *flags = 0; + /* AMD_CG_SUPPORT_SDMA_MGCG */ data = RREG32(mmSDMA0_CLK_CTRL + sdma_offsets[0]); if (!(data & SDMA0_CLK_CTRL__SOFT_OVERRIDE0_MASK)) @@ -1574,6 +1582,7 @@ static const struct amdgpu_ring_funcs sdma_v3_0_ring_funcs = { .type = AMDGPU_RING_TYPE_SDMA, .align_mask = 0xf, .nop = SDMA_PKT_NOP_HEADER_OP(SDMA_OP_NOP), + .support_64bit_ptrs = false, .get_rptr = sdma_v3_0_ring_get_rptr, .get_wptr = sdma_v3_0_ring_get_wptr, .set_wptr = sdma_v3_0_ring_set_wptr, diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c new file mode 100644 index 000000000000..21f38d882335 --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c @@ -0,0 +1,1616 @@ +/* + * Copyright 2016 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#include <linux/firmware.h> +#include <drm/drmP.h> +#include "amdgpu.h" +#include "amdgpu_ucode.h" +#include "amdgpu_trace.h" + +#include "vega10/soc15ip.h" +#include "vega10/SDMA0/sdma0_4_0_offset.h" +#include "vega10/SDMA0/sdma0_4_0_sh_mask.h" +#include "vega10/SDMA1/sdma1_4_0_offset.h" +#include "vega10/SDMA1/sdma1_4_0_sh_mask.h" +#include "vega10/MMHUB/mmhub_1_0_offset.h" +#include "vega10/MMHUB/mmhub_1_0_sh_mask.h" +#include "vega10/HDP/hdp_4_0_offset.h" + +#include "soc15_common.h" +#include "soc15.h" +#include "vega10_sdma_pkt_open.h" + +MODULE_FIRMWARE("amdgpu/vega10_sdma.bin"); +MODULE_FIRMWARE("amdgpu/vega10_sdma1.bin"); + +static void sdma_v4_0_set_ring_funcs(struct amdgpu_device *adev); +static void sdma_v4_0_set_buffer_funcs(struct amdgpu_device *adev); +static void sdma_v4_0_set_vm_pte_funcs(struct amdgpu_device *adev); +static void sdma_v4_0_set_irq_funcs(struct amdgpu_device *adev); + +static const u32 golden_settings_sdma_4[] = +{ + SOC15_REG_OFFSET(SDMA0, 0, mmSDMA0_CHICKEN_BITS), 0xfe931f07, 0x02831f07, + SOC15_REG_OFFSET(SDMA0, 0, mmSDMA0_CLK_CTRL), 0xff000ff0, 0x3f000100, + SOC15_REG_OFFSET(SDMA0, 0, mmSDMA0_GFX_IB_CNTL), 0x800f0100, 0x00000100, + SOC15_REG_OFFSET(SDMA0, 0, mmSDMA0_GFX_RB_WPTR_POLL_CNTL), 0xfffffff7, 0x00403000, + SOC15_REG_OFFSET(SDMA0, 0, mmSDMA0_PAGE_IB_CNTL), 0x800f0100, 0x00000100, + SOC15_REG_OFFSET(SDMA0, 0, mmSDMA0_PAGE_RB_WPTR_POLL_CNTL), 0x0000fff0, 0x00403000, + SOC15_REG_OFFSET(SDMA0, 0, mmSDMA0_POWER_CNTL), 0x003ff006, 0x0003c000, + SOC15_REG_OFFSET(SDMA0, 0, mmSDMA0_RLC0_IB_CNTL), 0x800f0100, 0x00000100, + SOC15_REG_OFFSET(SDMA0, 0, mmSDMA0_RLC0_RB_WPTR_POLL_CNTL), 0x0000fff0, 0x00403000, + SOC15_REG_OFFSET(SDMA0, 0, mmSDMA0_RLC1_IB_CNTL), 0x800f0100, 0x00000100, + SOC15_REG_OFFSET(SDMA0, 0, mmSDMA0_RLC1_RB_WPTR_POLL_CNTL), 0x0000fff0, 0x00403000, + SOC15_REG_OFFSET(SDMA0, 0, mmSDMA0_UTCL1_PAGE), 0x000003ff, 0x000003c0, + SOC15_REG_OFFSET(SDMA1, 0, mmSDMA1_CHICKEN_BITS), 0xfe931f07, 0x02831f07, + SOC15_REG_OFFSET(SDMA1, 0, mmSDMA1_CLK_CTRL), 0xffffffff, 0x3f000100, + SOC15_REG_OFFSET(SDMA1, 0, mmSDMA1_GFX_IB_CNTL), 0x800f0100, 0x00000100, + SOC15_REG_OFFSET(SDMA1, 0, mmSDMA1_GFX_RB_WPTR_POLL_CNTL), 0x0000fff0, 0x00403000, + SOC15_REG_OFFSET(SDMA1, 0, mmSDMA1_PAGE_IB_CNTL), 0x800f0100, 0x00000100, + SOC15_REG_OFFSET(SDMA1, 0, mmSDMA1_PAGE_RB_WPTR_POLL_CNTL), 0x0000fff0, 0x00403000, + SOC15_REG_OFFSET(SDMA1, 0, mmSDMA1_POWER_CNTL), 0x003ff000, 0x0003c000, + SOC15_REG_OFFSET(SDMA1, 0, mmSDMA1_RLC0_IB_CNTL), 0x800f0100, 0x00000100, + SOC15_REG_OFFSET(SDMA1, 0, mmSDMA1_RLC0_RB_WPTR_POLL_CNTL), 0x0000fff0, 0x00403000, + SOC15_REG_OFFSET(SDMA1, 0, mmSDMA1_RLC1_IB_CNTL), 0x800f0100, 0x00000100, + SOC15_REG_OFFSET(SDMA1, 0, mmSDMA1_RLC1_RB_WPTR_POLL_CNTL), 0x0000fff0, 0x00403000, + SOC15_REG_OFFSET(SDMA1, 0, mmSDMA1_UTCL1_PAGE), 0x000003ff, 0x000003c0 +}; + +static const u32 golden_settings_sdma_vg10[] = +{ + SOC15_REG_OFFSET(SDMA0, 0, mmSDMA0_GB_ADDR_CONFIG), 0x0018773f, 0x00104002, + SOC15_REG_OFFSET(SDMA0, 0, mmSDMA0_GB_ADDR_CONFIG_READ), 0x0018773f, 0x00104002, + SOC15_REG_OFFSET(SDMA1, 0, mmSDMA1_GB_ADDR_CONFIG), 0x0018773f, 0x00104002, + SOC15_REG_OFFSET(SDMA1, 0, mmSDMA1_GB_ADDR_CONFIG_READ), 0x0018773f, 0x00104002 +}; + +static u32 sdma_v4_0_get_reg_offset(u32 instance, u32 internal_offset) +{ + u32 base = 0; + switch (instance) { + case 0: + base = SDMA0_BASE.instance[0].segment[0]; + break; + case 1: + base = SDMA1_BASE.instance[0].segment[0]; + break; + default: + BUG(); + break; + } + + return base + internal_offset; +} + +static void sdma_v4_0_init_golden_registers(struct amdgpu_device *adev) +{ + switch (adev->asic_type) { + case CHIP_VEGA10: + amdgpu_program_register_sequence(adev, + golden_settings_sdma_4, + (const u32)ARRAY_SIZE(golden_settings_sdma_4)); + amdgpu_program_register_sequence(adev, + golden_settings_sdma_vg10, + (const u32)ARRAY_SIZE(golden_settings_sdma_vg10)); + break; + default: + break; + } +} + +static void sdma_v4_0_print_ucode_regs(void *handle) +{ + int i; + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + + dev_info(adev->dev, "VEGA10 SDMA ucode registers\n"); + for (i = 0; i < adev->sdma.num_instances; i++) { + dev_info(adev->dev, " SDMA%d_UCODE_ADDR=0x%08X\n", + i, RREG32(sdma_v4_0_get_reg_offset(i, mmSDMA0_UCODE_ADDR))); + dev_info(adev->dev, " SDMA%d_UCODE_CHECKSUM=0x%08X\n", + i, RREG32(sdma_v4_0_get_reg_offset(i, mmSDMA0_UCODE_CHECKSUM))); + } +} + +/** + * sdma_v4_0_init_microcode - load ucode images from disk + * + * @adev: amdgpu_device pointer + * + * Use the firmware interface to load the ucode images into + * the driver (not loaded into hw). + * Returns 0 on success, error on failure. + */ + +// emulation only, won't work on real chip +// vega10 real chip need to use PSP to load firmware +static int sdma_v4_0_init_microcode(struct amdgpu_device *adev) +{ + const char *chip_name; + char fw_name[30]; + int err = 0, i; + struct amdgpu_firmware_info *info = NULL; + const struct common_firmware_header *header = NULL; + const struct sdma_firmware_header_v1_0 *hdr; + + DRM_DEBUG("\n"); + + switch (adev->asic_type) { + case CHIP_VEGA10: + chip_name = "vega10"; + break; + default: BUG(); + } + + for (i = 0; i < adev->sdma.num_instances; i++) { + if (i == 0) + snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_sdma.bin", chip_name); + else + snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_sdma1.bin", chip_name); + err = request_firmware(&adev->sdma.instance[i].fw, fw_name, adev->dev); + if (err) + goto out; + err = amdgpu_ucode_validate(adev->sdma.instance[i].fw); + if (err) + goto out; + hdr = (const struct sdma_firmware_header_v1_0 *)adev->sdma.instance[i].fw->data; + adev->sdma.instance[i].fw_version = le32_to_cpu(hdr->header.ucode_version); + adev->sdma.instance[i].feature_version = le32_to_cpu(hdr->ucode_feature_version); + if (adev->sdma.instance[i].feature_version >= 20) + adev->sdma.instance[i].burst_nop = true; + DRM_DEBUG("psp_load == '%s'\n", + adev->firmware.load_type == AMDGPU_FW_LOAD_PSP? "true": "false"); + + if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) { + info = &adev->firmware.ucode[AMDGPU_UCODE_ID_SDMA0 + i]; + info->ucode_id = AMDGPU_UCODE_ID_SDMA0 + i; + info->fw = adev->sdma.instance[i].fw; + header = (const struct common_firmware_header *)info->fw->data; + adev->firmware.fw_size += + ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE); + } + } +out: + if (err) { + printk(KERN_ERR + "sdma_v4_0: Failed to load firmware \"%s\"\n", + fw_name); + for (i = 0; i < adev->sdma.num_instances; i++) { + release_firmware(adev->sdma.instance[i].fw); + adev->sdma.instance[i].fw = NULL; + } + } + return err; +} + +/** + * sdma_v4_0_ring_get_rptr - get the current read pointer + * + * @ring: amdgpu ring pointer + * + * Get the current rptr from the hardware (VEGA10+). + */ +static uint64_t sdma_v4_0_ring_get_rptr(struct amdgpu_ring *ring) +{ + u64* rptr; + + /* XXX check if swapping is necessary on BE */ + rptr =((u64*)&ring->adev->wb.wb[ring->rptr_offs]); + + DRM_DEBUG("rptr before shift == 0x%016llx\n", *rptr); + return ((*rptr) >> 2); +} + +/** + * sdma_v4_0_ring_get_wptr - get the current write pointer + * + * @ring: amdgpu ring pointer + * + * Get the current wptr from the hardware (VEGA10+). + */ +static uint64_t sdma_v4_0_ring_get_wptr(struct amdgpu_ring *ring) +{ + struct amdgpu_device *adev = ring->adev; + u64* wptr = NULL; + uint64_t local_wptr=0; + + if (ring->use_doorbell) { + /* XXX check if swapping is necessary on BE */ + wptr = ((u64*)&adev->wb.wb[ring->wptr_offs]); + DRM_DEBUG("wptr/doorbell before shift == 0x%016llx\n", *wptr); + *wptr = (*wptr) >> 2; + DRM_DEBUG("wptr/doorbell after shift == 0x%016llx\n", *wptr); + } else { + u32 lowbit, highbit; + int me = (ring == &adev->sdma.instance[0].ring) ? 0 : 1; + wptr=&local_wptr; + lowbit = RREG32(sdma_v4_0_get_reg_offset(me, mmSDMA0_GFX_RB_WPTR)) >> 2; + highbit = RREG32(sdma_v4_0_get_reg_offset(me, mmSDMA0_GFX_RB_WPTR_HI)) >> 2; + + DRM_DEBUG("wptr [%i]high== 0x%08x low==0x%08x\n", + me, highbit, lowbit); + *wptr = highbit; + *wptr = (*wptr) << 32; + *wptr |= lowbit; + } + + return *wptr; +} + +/** + * sdma_v4_0_ring_set_wptr - commit the write pointer + * + * @ring: amdgpu ring pointer + * + * Write the wptr back to the hardware (VEGA10+). + */ +static void sdma_v4_0_ring_set_wptr(struct amdgpu_ring *ring) +{ + struct amdgpu_device *adev = ring->adev; + + DRM_DEBUG("Setting write pointer\n"); + if (ring->use_doorbell) { + DRM_DEBUG("Using doorbell -- " + "wptr_offs == 0x%08x " + "lower_32_bits(ring->wptr) << 2 == 0x%08x " + "upper_32_bits(ring->wptr) << 2 == 0x%08x\n", + ring->wptr_offs, + lower_32_bits(ring->wptr << 2), + upper_32_bits(ring->wptr << 2)); + /* XXX check if swapping is necessary on BE */ + adev->wb.wb[ring->wptr_offs] = lower_32_bits(ring->wptr << 2); + adev->wb.wb[ring->wptr_offs + 1] = upper_32_bits(ring->wptr << 2); + DRM_DEBUG("calling WDOORBELL64(0x%08x, 0x%016llx)\n", + ring->doorbell_index, ring->wptr << 2); + WDOORBELL64(ring->doorbell_index, ring->wptr << 2); + } else { + int me = (ring == &ring->adev->sdma.instance[0].ring) ? 0 : 1; + DRM_DEBUG("Not using doorbell -- " + "mmSDMA%i_GFX_RB_WPTR == 0x%08x " + "mmSDMA%i_GFX_RB_WPTR_HI == 0x%08x \n", + me, + me, + lower_32_bits(ring->wptr << 2), + upper_32_bits(ring->wptr << 2)); + WREG32(sdma_v4_0_get_reg_offset(me, mmSDMA0_GFX_RB_WPTR), lower_32_bits(ring->wptr << 2)); + WREG32(sdma_v4_0_get_reg_offset(me, mmSDMA0_GFX_RB_WPTR_HI), upper_32_bits(ring->wptr << 2)); + } +} + +static void sdma_v4_0_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count) +{ + struct amdgpu_sdma_instance *sdma = amdgpu_get_sdma_instance(ring); + int i; + + for (i = 0; i < count; i++) + if (sdma && sdma->burst_nop && (i == 0)) + amdgpu_ring_write(ring, ring->funcs->nop | + SDMA_PKT_NOP_HEADER_COUNT(count - 1)); + else + amdgpu_ring_write(ring, ring->funcs->nop); +} + +/** + * sdma_v4_0_ring_emit_ib - Schedule an IB on the DMA engine + * + * @ring: amdgpu ring pointer + * @ib: IB object to schedule + * + * Schedule an IB in the DMA ring (VEGA10). + */ +static void sdma_v4_0_ring_emit_ib(struct amdgpu_ring *ring, + struct amdgpu_ib *ib, + unsigned vm_id, bool ctx_switch) +{ + u32 vmid = vm_id & 0xf; + + /* IB packet must end on a 8 DW boundary */ + sdma_v4_0_ring_insert_nop(ring, (10 - (lower_32_bits(ring->wptr) & 7)) % 8); + + amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_INDIRECT) | + SDMA_PKT_INDIRECT_HEADER_VMID(vmid)); + /* base must be 32 byte aligned */ + amdgpu_ring_write(ring, lower_32_bits(ib->gpu_addr) & 0xffffffe0); + amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr)); + amdgpu_ring_write(ring, ib->length_dw); + amdgpu_ring_write(ring, 0); + amdgpu_ring_write(ring, 0); + +} + +/** + * sdma_v4_0_ring_emit_hdp_flush - emit an hdp flush on the DMA ring + * + * @ring: amdgpu ring pointer + * + * Emit an hdp flush packet on the requested DMA ring. + */ +static void sdma_v4_0_ring_emit_hdp_flush(struct amdgpu_ring *ring) +{ + u32 ref_and_mask = 0; + struct nbio_hdp_flush_reg *nbio_hf_reg; + + if (ring->adev->asic_type == CHIP_VEGA10) + nbio_hf_reg = &nbio_v6_1_hdp_flush_reg; + + if (ring == &ring->adev->sdma.instance[0].ring) + ref_and_mask = nbio_hf_reg->ref_and_mask_sdma0; + else + ref_and_mask = nbio_hf_reg->ref_and_mask_sdma1; + + amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_POLL_REGMEM) | + SDMA_PKT_POLL_REGMEM_HEADER_HDP_FLUSH(1) | + SDMA_PKT_POLL_REGMEM_HEADER_FUNC(3)); /* == */ + amdgpu_ring_write(ring, nbio_hf_reg->hdp_flush_done_offset << 2); + amdgpu_ring_write(ring, nbio_hf_reg->hdp_flush_req_offset << 2); + amdgpu_ring_write(ring, ref_and_mask); /* reference */ + amdgpu_ring_write(ring, ref_and_mask); /* mask */ + amdgpu_ring_write(ring, SDMA_PKT_POLL_REGMEM_DW5_RETRY_COUNT(0xfff) | + SDMA_PKT_POLL_REGMEM_DW5_INTERVAL(10)); /* retry count, poll interval */ +} + +static void sdma_v4_0_ring_emit_hdp_invalidate(struct amdgpu_ring *ring) +{ + amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_SRBM_WRITE) | + SDMA_PKT_SRBM_WRITE_HEADER_BYTE_EN(0xf)); + amdgpu_ring_write(ring, SOC15_REG_OFFSET(HDP, 0, mmHDP_DEBUG0)); + amdgpu_ring_write(ring, 1); +} + +/** + * sdma_v4_0_ring_emit_fence - emit a fence on the DMA ring + * + * @ring: amdgpu ring pointer + * @fence: amdgpu fence object + * + * Add a DMA fence packet to the ring to write + * the fence seq number and DMA trap packet to generate + * an interrupt if needed (VEGA10). + */ +static void sdma_v4_0_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u64 seq, + unsigned flags) +{ + bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT; + /* write the fence */ + amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_FENCE)); + /* zero in first two bits */ + BUG_ON(addr & 0x3); + amdgpu_ring_write(ring, lower_32_bits(addr)); + amdgpu_ring_write(ring, upper_32_bits(addr)); + amdgpu_ring_write(ring, lower_32_bits(seq)); + + /* optionally write high bits as well */ + if (write64bit) { + addr += 4; + amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_FENCE)); + /* zero in first two bits */ + BUG_ON(addr & 0x3); + amdgpu_ring_write(ring, lower_32_bits(addr)); + amdgpu_ring_write(ring, upper_32_bits(addr)); + amdgpu_ring_write(ring, upper_32_bits(seq)); + } + + /* generate an interrupt */ + amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_TRAP)); + amdgpu_ring_write(ring, SDMA_PKT_TRAP_INT_CONTEXT_INT_CONTEXT(0)); +} + + +/** + * sdma_v4_0_gfx_stop - stop the gfx async dma engines + * + * @adev: amdgpu_device pointer + * + * Stop the gfx async dma ring buffers (VEGA10). + */ +static void sdma_v4_0_gfx_stop(struct amdgpu_device *adev) +{ + struct amdgpu_ring *sdma0 = &adev->sdma.instance[0].ring; + struct amdgpu_ring *sdma1 = &adev->sdma.instance[1].ring; + u32 rb_cntl, ib_cntl; + int i; + + if ((adev->mman.buffer_funcs_ring == sdma0) || + (adev->mman.buffer_funcs_ring == sdma1)) + amdgpu_ttm_set_active_vram_size(adev, adev->mc.visible_vram_size); + + for (i = 0; i < adev->sdma.num_instances; i++) { + rb_cntl = RREG32(sdma_v4_0_get_reg_offset(i, mmSDMA0_GFX_RB_CNTL)); + rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, RB_ENABLE, 0); + WREG32(sdma_v4_0_get_reg_offset(i, mmSDMA0_GFX_RB_CNTL), rb_cntl); + ib_cntl = RREG32(sdma_v4_0_get_reg_offset(i, mmSDMA0_GFX_IB_CNTL)); + ib_cntl = REG_SET_FIELD(ib_cntl, SDMA0_GFX_IB_CNTL, IB_ENABLE, 0); + WREG32(sdma_v4_0_get_reg_offset(i, mmSDMA0_GFX_IB_CNTL), ib_cntl); + } + + sdma0->ready = false; + sdma1->ready = false; +} + +/** + * sdma_v4_0_rlc_stop - stop the compute async dma engines + * + * @adev: amdgpu_device pointer + * + * Stop the compute async dma queues (VEGA10). + */ +static void sdma_v4_0_rlc_stop(struct amdgpu_device *adev) +{ + /* XXX todo */ +} + +/** + * sdma_v_0_ctx_switch_enable - stop the async dma engines context switch + * + * @adev: amdgpu_device pointer + * @enable: enable/disable the DMA MEs context switch. + * + * Halt or unhalt the async dma engines context switch (VEGA10). + */ +static void sdma_v4_0_ctx_switch_enable(struct amdgpu_device *adev, bool enable) +{ + u32 f32_cntl; + int i; + + for (i = 0; i < adev->sdma.num_instances; i++) { + f32_cntl = RREG32(sdma_v4_0_get_reg_offset(i, mmSDMA0_CNTL)); + f32_cntl = REG_SET_FIELD(f32_cntl, SDMA0_CNTL, + AUTO_CTXSW_ENABLE, enable ? 1 : 0); + WREG32(sdma_v4_0_get_reg_offset(i, mmSDMA0_CNTL), f32_cntl); + } + +} + +/** + * sdma_v4_0_enable - stop the async dma engines + * + * @adev: amdgpu_device pointer + * @enable: enable/disable the DMA MEs. + * + * Halt or unhalt the async dma engines (VEGA10). + */ +static void sdma_v4_0_enable(struct amdgpu_device *adev, bool enable) +{ + u32 f32_cntl; + int i; + + if (enable == false) { + sdma_v4_0_gfx_stop(adev); + sdma_v4_0_rlc_stop(adev); + } + + for (i = 0; i < adev->sdma.num_instances; i++) { + f32_cntl = RREG32(sdma_v4_0_get_reg_offset(i, mmSDMA0_F32_CNTL)); + f32_cntl = REG_SET_FIELD(f32_cntl, SDMA0_F32_CNTL, HALT, enable ? 0 : 1); + WREG32(sdma_v4_0_get_reg_offset(i, mmSDMA0_F32_CNTL), f32_cntl); + } +} + +/** + * sdma_v4_0_gfx_resume - setup and start the async dma engines + * + * @adev: amdgpu_device pointer + * + * Set up the gfx DMA ring buffers and enable them (VEGA10). + * Returns 0 for success, error for failure. + */ +static int sdma_v4_0_gfx_resume(struct amdgpu_device *adev) +{ + struct amdgpu_ring *ring; + u32 rb_cntl, ib_cntl; + u32 rb_bufsz; + u32 wb_offset; + u32 doorbell; + u32 doorbell_offset; + u32 temp; + int i,r; + + for (i = 0; i < adev->sdma.num_instances; i++) { + ring = &adev->sdma.instance[i].ring; + wb_offset = (ring->rptr_offs * 4); + + WREG32(sdma_v4_0_get_reg_offset(i, mmSDMA0_SEM_WAIT_FAIL_TIMER_CNTL), 0); + + /* Set ring buffer size in dwords */ + rb_bufsz = order_base_2(ring->ring_size / 4); + rb_cntl = RREG32(sdma_v4_0_get_reg_offset(i, mmSDMA0_GFX_RB_CNTL)); + rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, RB_SIZE, rb_bufsz); +#ifdef __BIG_ENDIAN + rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, RB_SWAP_ENABLE, 1); + rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, + RPTR_WRITEBACK_SWAP_ENABLE, 1); +#endif + WREG32(sdma_v4_0_get_reg_offset(i, mmSDMA0_GFX_RB_CNTL), rb_cntl); + + /* Initialize the ring buffer's read and write pointers */ + WREG32(sdma_v4_0_get_reg_offset(i, mmSDMA0_GFX_RB_RPTR), 0); + WREG32(sdma_v4_0_get_reg_offset(i, mmSDMA0_GFX_RB_RPTR_HI), 0); + WREG32(sdma_v4_0_get_reg_offset(i, mmSDMA0_GFX_RB_WPTR), 0); + WREG32(sdma_v4_0_get_reg_offset(i, mmSDMA0_GFX_RB_WPTR_HI), 0); + + /* set the wb address whether it's enabled or not */ + WREG32(sdma_v4_0_get_reg_offset(i, mmSDMA0_GFX_RB_RPTR_ADDR_HI), + upper_32_bits(adev->wb.gpu_addr + wb_offset) & 0xFFFFFFFF); + WREG32(sdma_v4_0_get_reg_offset(i, mmSDMA0_GFX_RB_RPTR_ADDR_LO), + lower_32_bits(adev->wb.gpu_addr + wb_offset) & 0xFFFFFFFC); + + rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, RPTR_WRITEBACK_ENABLE, 1); + + WREG32(sdma_v4_0_get_reg_offset(i, mmSDMA0_GFX_RB_BASE), ring->gpu_addr >> 8); + WREG32(sdma_v4_0_get_reg_offset(i, mmSDMA0_GFX_RB_BASE_HI), ring->gpu_addr >> 40); + + ring->wptr = 0; + + /* before programing wptr to a less value, need set minor_ptr_update first */ + WREG32(sdma_v4_0_get_reg_offset(i, mmSDMA0_GFX_MINOR_PTR_UPDATE), 1); + + if (!amdgpu_sriov_vf(adev)) { /* only bare-metal use register write for wptr */ + WREG32(sdma_v4_0_get_reg_offset(i, mmSDMA0_GFX_RB_WPTR), lower_32_bits(ring->wptr) << 2); + WREG32(sdma_v4_0_get_reg_offset(i, mmSDMA0_GFX_RB_WPTR_HI), upper_32_bits(ring->wptr) << 2); + } + + doorbell = RREG32(sdma_v4_0_get_reg_offset(i, mmSDMA0_GFX_DOORBELL)); + doorbell_offset = RREG32(sdma_v4_0_get_reg_offset(i, mmSDMA0_GFX_DOORBELL_OFFSET)); + + if (ring->use_doorbell){ + doorbell = REG_SET_FIELD(doorbell, SDMA0_GFX_DOORBELL, ENABLE, 1); + doorbell_offset = REG_SET_FIELD(doorbell_offset, SDMA0_GFX_DOORBELL_OFFSET, + OFFSET, ring->doorbell_index); + } else { + doorbell = REG_SET_FIELD(doorbell, SDMA0_GFX_DOORBELL, ENABLE, 0); + } + WREG32(sdma_v4_0_get_reg_offset(i, mmSDMA0_GFX_DOORBELL), doorbell); + WREG32(sdma_v4_0_get_reg_offset(i, mmSDMA0_GFX_DOORBELL_OFFSET), doorbell_offset); + nbio_v6_1_sdma_doorbell_range(adev, i, ring->use_doorbell, ring->doorbell_index); + + if (amdgpu_sriov_vf(adev)) + sdma_v4_0_ring_set_wptr(ring); + + /* set minor_ptr_update to 0 after wptr programed */ + WREG32(sdma_v4_0_get_reg_offset(i, mmSDMA0_GFX_MINOR_PTR_UPDATE), 0); + + /* set utc l1 enable flag always to 1 */ + temp = RREG32(sdma_v4_0_get_reg_offset(i, mmSDMA0_CNTL)); + temp = REG_SET_FIELD(temp, SDMA0_CNTL, UTC_L1_ENABLE, 1); + WREG32(sdma_v4_0_get_reg_offset(i, mmSDMA0_CNTL), temp); + + if (!amdgpu_sriov_vf(adev)) { + /* unhalt engine */ + temp = RREG32(sdma_v4_0_get_reg_offset(i, mmSDMA0_F32_CNTL)); + temp = REG_SET_FIELD(temp, SDMA0_F32_CNTL, HALT, 0); + WREG32(sdma_v4_0_get_reg_offset(i, mmSDMA0_F32_CNTL), temp); + } + + /* enable DMA RB */ + rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, RB_ENABLE, 1); + WREG32(sdma_v4_0_get_reg_offset(i, mmSDMA0_GFX_RB_CNTL), rb_cntl); + + ib_cntl = RREG32(sdma_v4_0_get_reg_offset(i, mmSDMA0_GFX_IB_CNTL)); + ib_cntl = REG_SET_FIELD(ib_cntl, SDMA0_GFX_IB_CNTL, IB_ENABLE, 1); +#ifdef __BIG_ENDIAN + ib_cntl = REG_SET_FIELD(ib_cntl, SDMA0_GFX_IB_CNTL, IB_SWAP_ENABLE, 1); +#endif + /* enable DMA IBs */ + WREG32(sdma_v4_0_get_reg_offset(i, mmSDMA0_GFX_IB_CNTL), ib_cntl); + + ring->ready = true; + + if (amdgpu_sriov_vf(adev)) { /* bare-metal sequence doesn't need below to lines */ + sdma_v4_0_ctx_switch_enable(adev, true); + sdma_v4_0_enable(adev, true); + } + + r = amdgpu_ring_test_ring(ring); + if (r) { + ring->ready = false; + return r; + } + + if (adev->mman.buffer_funcs_ring == ring) + amdgpu_ttm_set_active_vram_size(adev, adev->mc.real_vram_size); + } + + return 0; +} + +/** + * sdma_v4_0_rlc_resume - setup and start the async dma engines + * + * @adev: amdgpu_device pointer + * + * Set up the compute DMA queues and enable them (VEGA10). + * Returns 0 for success, error for failure. + */ +static int sdma_v4_0_rlc_resume(struct amdgpu_device *adev) +{ + /* XXX todo */ + return 0; +} + +/** + * sdma_v4_0_load_microcode - load the sDMA ME ucode + * + * @adev: amdgpu_device pointer + * + * Loads the sDMA0/1 ucode. + * Returns 0 for success, -EINVAL if the ucode is not available. + */ +static int sdma_v4_0_load_microcode(struct amdgpu_device *adev) +{ + const struct sdma_firmware_header_v1_0 *hdr; + const __le32 *fw_data; + u32 fw_size; + u32 digest_size = 0; + int i, j; + + /* halt the MEs */ + sdma_v4_0_enable(adev, false); + + for (i = 0; i < adev->sdma.num_instances; i++) { + uint16_t version_major; + uint16_t version_minor; + if (!adev->sdma.instance[i].fw) + return -EINVAL; + + hdr = (const struct sdma_firmware_header_v1_0 *)adev->sdma.instance[i].fw->data; + amdgpu_ucode_print_sdma_hdr(&hdr->header); + fw_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4; + + version_major = le16_to_cpu(hdr->header.header_version_major); + version_minor = le16_to_cpu(hdr->header.header_version_minor); + + if (version_major == 1 && version_minor >= 1) { + const struct sdma_firmware_header_v1_1 *sdma_v1_1_hdr = (const struct sdma_firmware_header_v1_1 *) hdr; + digest_size = le32_to_cpu(sdma_v1_1_hdr->digest_size); + } + + fw_size -= digest_size; + + fw_data = (const __le32 *) + (adev->sdma.instance[i].fw->data + + le32_to_cpu(hdr->header.ucode_array_offset_bytes)); + + WREG32(sdma_v4_0_get_reg_offset(i, mmSDMA0_UCODE_ADDR), 0); + + + for (j = 0; j < fw_size; j++) + { + WREG32(sdma_v4_0_get_reg_offset(i, mmSDMA0_UCODE_DATA), le32_to_cpup(fw_data++)); + } + + WREG32(sdma_v4_0_get_reg_offset(i, mmSDMA0_UCODE_ADDR), adev->sdma.instance[i].fw_version); + } + + sdma_v4_0_print_ucode_regs(adev); + + return 0; +} + +/** + * sdma_v4_0_start - setup and start the async dma engines + * + * @adev: amdgpu_device pointer + * + * Set up the DMA engines and enable them (VEGA10). + * Returns 0 for success, error for failure. + */ +static int sdma_v4_0_start(struct amdgpu_device *adev) +{ + int r = 0; + + if (amdgpu_sriov_vf(adev)) { + sdma_v4_0_ctx_switch_enable(adev, false); + sdma_v4_0_enable(adev, false); + + /* set RB registers */ + r = sdma_v4_0_gfx_resume(adev); + return r; + } + + if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) { + DRM_INFO("Loading via direct write\n"); + r = sdma_v4_0_load_microcode(adev); + if (r) + return r; + } + + /* unhalt the MEs */ + sdma_v4_0_enable(adev, true); + /* enable sdma ring preemption */ + sdma_v4_0_ctx_switch_enable(adev, true); + + /* start the gfx rings and rlc compute queues */ + r = sdma_v4_0_gfx_resume(adev); + if (r) + return r; + r = sdma_v4_0_rlc_resume(adev); + if (r) + return r; + + return 0; +} + +/** + * sdma_v4_0_ring_test_ring - simple async dma engine test + * + * @ring: amdgpu_ring structure holding ring information + * + * Test the DMA engine by writing using it to write an + * value to memory. (VEGA10). + * Returns 0 for success, error for failure. + */ +static int sdma_v4_0_ring_test_ring(struct amdgpu_ring *ring) +{ + struct amdgpu_device *adev = ring->adev; + unsigned i; + unsigned index; + int r; + u32 tmp; + u64 gpu_addr; + + DRM_INFO("In Ring test func\n"); + + r = amdgpu_wb_get(adev, &index); + if (r) { + dev_err(adev->dev, "(%d) failed to allocate wb slot\n", r); + return r; + } + + gpu_addr = adev->wb.gpu_addr + (index * 4); + tmp = 0xCAFEDEAD; + adev->wb.wb[index] = cpu_to_le32(tmp); + + r = amdgpu_ring_alloc(ring, 5); + if (r) { + DRM_ERROR("amdgpu: dma failed to lock ring %d (%d).\n", ring->idx, r); + amdgpu_wb_free(adev, index); + return r; + } + + amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_WRITE) | + SDMA_PKT_HEADER_SUB_OP(SDMA_SUBOP_WRITE_LINEAR)); + amdgpu_ring_write(ring, lower_32_bits(gpu_addr)); + amdgpu_ring_write(ring, upper_32_bits(gpu_addr)); + amdgpu_ring_write(ring, SDMA_PKT_WRITE_UNTILED_DW_3_COUNT(0)); + amdgpu_ring_write(ring, 0xDEADBEEF); + amdgpu_ring_commit(ring); + + for (i = 0; i < adev->usec_timeout; i++) { + tmp = le32_to_cpu(adev->wb.wb[index]); + if (tmp == 0xDEADBEEF) { + break; + } + DRM_UDELAY(1); + } + + if (i < adev->usec_timeout) { + DRM_INFO("ring test on %d succeeded in %d usecs\n", ring->idx, i); + } else { + DRM_ERROR("amdgpu: ring %d test failed (0x%08X)\n", + ring->idx, tmp); + r = -EINVAL; + } + amdgpu_wb_free(adev, index); + + return r; +} + +/** + * sdma_v4_0_ring_test_ib - test an IB on the DMA engine + * + * @ring: amdgpu_ring structure holding ring information + * + * Test a simple IB in the DMA ring (VEGA10). + * Returns 0 on success, error on failure. + */ +static int sdma_v4_0_ring_test_ib(struct amdgpu_ring *ring, long timeout) +{ + struct amdgpu_device *adev = ring->adev; + struct amdgpu_ib ib; + struct dma_fence *f = NULL; + unsigned index; + long r; + u32 tmp = 0; + u64 gpu_addr; + + r = amdgpu_wb_get(adev, &index); + if (r) { + dev_err(adev->dev, "(%ld) failed to allocate wb slot\n", r); + return r; + } + + gpu_addr = adev->wb.gpu_addr + (index * 4); + tmp = 0xCAFEDEAD; + adev->wb.wb[index] = cpu_to_le32(tmp); + memset(&ib, 0, sizeof(ib)); + r = amdgpu_ib_get(adev, NULL, 256, &ib); + if (r) { + DRM_ERROR("amdgpu: failed to get ib (%ld).\n", r); + goto err0; + } + + ib.ptr[0] = SDMA_PKT_HEADER_OP(SDMA_OP_WRITE) | + SDMA_PKT_HEADER_SUB_OP(SDMA_SUBOP_WRITE_LINEAR); + ib.ptr[1] = lower_32_bits(gpu_addr); + ib.ptr[2] = upper_32_bits(gpu_addr); + ib.ptr[3] = SDMA_PKT_WRITE_UNTILED_DW_3_COUNT(0); + ib.ptr[4] = 0xDEADBEEF; + ib.ptr[5] = SDMA_PKT_NOP_HEADER_OP(SDMA_OP_NOP); + ib.ptr[6] = SDMA_PKT_NOP_HEADER_OP(SDMA_OP_NOP); + ib.ptr[7] = SDMA_PKT_NOP_HEADER_OP(SDMA_OP_NOP); + ib.length_dw = 8; + + r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f); + if (r) + goto err1; + + r = dma_fence_wait_timeout(f, false, timeout); + if (r == 0) { + DRM_ERROR("amdgpu: IB test timed out\n"); + r = -ETIMEDOUT; + goto err1; + } else if (r < 0) { + DRM_ERROR("amdgpu: fence wait failed (%ld).\n", r); + goto err1; + } + tmp = le32_to_cpu(adev->wb.wb[index]); + if (tmp == 0xDEADBEEF) { + DRM_INFO("ib test on ring %d succeeded\n", ring->idx); + r = 0; + } else { + DRM_ERROR("amdgpu: ib test failed (0x%08X)\n", tmp); + r = -EINVAL; + } +err1: + amdgpu_ib_free(adev, &ib, NULL); + dma_fence_put(f); +err0: + amdgpu_wb_free(adev, index); + return r; +} + + +/** + * sdma_v4_0_vm_copy_pte - update PTEs by copying them from the GART + * + * @ib: indirect buffer to fill with commands + * @pe: addr of the page entry + * @src: src addr to copy from + * @count: number of page entries to update + * + * Update PTEs by copying them from the GART using sDMA (VEGA10). + */ +static void sdma_v4_0_vm_copy_pte(struct amdgpu_ib *ib, + uint64_t pe, uint64_t src, + unsigned count) +{ + unsigned bytes = count * 8; + + ib->ptr[ib->length_dw++] = SDMA_PKT_HEADER_OP(SDMA_OP_COPY) | + SDMA_PKT_HEADER_SUB_OP(SDMA_SUBOP_COPY_LINEAR); + ib->ptr[ib->length_dw++] = bytes - 1; + ib->ptr[ib->length_dw++] = 0; /* src/dst endian swap */ + ib->ptr[ib->length_dw++] = lower_32_bits(src); + ib->ptr[ib->length_dw++] = upper_32_bits(src); + ib->ptr[ib->length_dw++] = lower_32_bits(pe); + ib->ptr[ib->length_dw++] = upper_32_bits(pe); + +} + +/** + * sdma_v4_0_vm_write_pte - update PTEs by writing them manually + * + * @ib: indirect buffer to fill with commands + * @pe: addr of the page entry + * @addr: dst addr to write into pe + * @count: number of page entries to update + * @incr: increase next addr by incr bytes + * @flags: access flags + * + * Update PTEs by writing them manually using sDMA (VEGA10). + */ +static void sdma_v4_0_vm_write_pte(struct amdgpu_ib *ib, uint64_t pe, + uint64_t value, unsigned count, + uint32_t incr) +{ + unsigned ndw = count * 2; + + ib->ptr[ib->length_dw++] = SDMA_PKT_HEADER_OP(SDMA_OP_WRITE) | + SDMA_PKT_HEADER_SUB_OP(SDMA_SUBOP_WRITE_LINEAR); + ib->ptr[ib->length_dw++] = lower_32_bits(pe); + ib->ptr[ib->length_dw++] = upper_32_bits(pe); + ib->ptr[ib->length_dw++] = ndw - 1; + for (; ndw > 0; ndw -= 2) { + ib->ptr[ib->length_dw++] = lower_32_bits(value); + ib->ptr[ib->length_dw++] = upper_32_bits(value); + value += incr; + } +} + +/** + * sdma_v4_0_vm_set_pte_pde - update the page tables using sDMA + * + * @ib: indirect buffer to fill with commands + * @pe: addr of the page entry + * @addr: dst addr to write into pe + * @count: number of page entries to update + * @incr: increase next addr by incr bytes + * @flags: access flags + * + * Update the page tables using sDMA (VEGA10). + */ +static void sdma_v4_0_vm_set_pte_pde(struct amdgpu_ib *ib, + uint64_t pe, + uint64_t addr, unsigned count, + uint32_t incr, uint64_t flags) +{ + /* for physically contiguous pages (vram) */ + ib->ptr[ib->length_dw++] = SDMA_PKT_HEADER_OP(SDMA_OP_PTEPDE); + ib->ptr[ib->length_dw++] = lower_32_bits(pe); /* dst addr */ + ib->ptr[ib->length_dw++] = upper_32_bits(pe); + ib->ptr[ib->length_dw++] = lower_32_bits(flags); /* mask */ + ib->ptr[ib->length_dw++] = upper_32_bits(flags); + ib->ptr[ib->length_dw++] = lower_32_bits(addr); /* value */ + ib->ptr[ib->length_dw++] = upper_32_bits(addr); + ib->ptr[ib->length_dw++] = incr; /* increment size */ + ib->ptr[ib->length_dw++] = 0; + ib->ptr[ib->length_dw++] = count - 1; /* number of entries */ +} + +/** + * sdma_v4_0_ring_pad_ib - pad the IB to the required number of dw + * + * @ib: indirect buffer to fill with padding + * + */ +static void sdma_v4_0_ring_pad_ib(struct amdgpu_ring *ring, struct amdgpu_ib *ib) +{ + struct amdgpu_sdma_instance *sdma = amdgpu_get_sdma_instance(ring); + u32 pad_count; + int i; + + pad_count = (8 - (ib->length_dw & 0x7)) % 8; + for (i = 0; i < pad_count; i++) + if (sdma && sdma->burst_nop && (i == 0)) + ib->ptr[ib->length_dw++] = + SDMA_PKT_HEADER_OP(SDMA_OP_NOP) | + SDMA_PKT_NOP_HEADER_COUNT(pad_count - 1); + else + ib->ptr[ib->length_dw++] = + SDMA_PKT_HEADER_OP(SDMA_OP_NOP); +} + + +/** + * sdma_v4_0_ring_emit_pipeline_sync - sync the pipeline + * + * @ring: amdgpu_ring pointer + * + * Make sure all previous operations are completed (CIK). + */ +static void sdma_v4_0_ring_emit_pipeline_sync(struct amdgpu_ring *ring) +{ + uint32_t seq = ring->fence_drv.sync_seq; + uint64_t addr = ring->fence_drv.gpu_addr; + + /* wait for idle */ + amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_POLL_REGMEM) | + SDMA_PKT_POLL_REGMEM_HEADER_HDP_FLUSH(0) | + SDMA_PKT_POLL_REGMEM_HEADER_FUNC(3) | /* equal */ + SDMA_PKT_POLL_REGMEM_HEADER_MEM_POLL(1)); + amdgpu_ring_write(ring, addr & 0xfffffffc); + amdgpu_ring_write(ring, upper_32_bits(addr) & 0xffffffff); + amdgpu_ring_write(ring, seq); /* reference */ + amdgpu_ring_write(ring, 0xfffffff); /* mask */ + amdgpu_ring_write(ring, SDMA_PKT_POLL_REGMEM_DW5_RETRY_COUNT(0xfff) | + SDMA_PKT_POLL_REGMEM_DW5_INTERVAL(4)); /* retry count, poll interval */ +} + + +/** + * sdma_v4_0_ring_emit_vm_flush - vm flush using sDMA + * + * @ring: amdgpu_ring pointer + * @vm: amdgpu_vm pointer + * + * Update the page table base and flush the VM TLB + * using sDMA (VEGA10). + */ +static void sdma_v4_0_ring_emit_vm_flush(struct amdgpu_ring *ring, + unsigned vm_id, uint64_t pd_addr) +{ + uint32_t req = ring->adev->gart.gart_funcs->get_invalidate_req(vm_id); + unsigned eng = ring->idx; + unsigned i; + + pd_addr = pd_addr | 0x1; /* valid bit */ + /* now only use physical base address of PDE and valid */ + BUG_ON(pd_addr & 0xFFFF00000000003EULL); + + for (i = 0; i < AMDGPU_MAX_VMHUBS; ++i) { + struct amdgpu_vmhub *hub = &ring->adev->vmhub[i]; + + amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_SRBM_WRITE) | + SDMA_PKT_SRBM_WRITE_HEADER_BYTE_EN(0xf)); + amdgpu_ring_write(ring, hub->ctx0_ptb_addr_lo32 + vm_id * 2); + amdgpu_ring_write(ring, lower_32_bits(pd_addr)); + + amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_SRBM_WRITE) | + SDMA_PKT_SRBM_WRITE_HEADER_BYTE_EN(0xf)); + amdgpu_ring_write(ring, hub->ctx0_ptb_addr_hi32 + vm_id * 2); + amdgpu_ring_write(ring, upper_32_bits(pd_addr)); + + /* flush TLB */ + amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_SRBM_WRITE) | + SDMA_PKT_SRBM_WRITE_HEADER_BYTE_EN(0xf)); + amdgpu_ring_write(ring, hub->vm_inv_eng0_req + eng); + amdgpu_ring_write(ring, req); + + /* wait for flush */ + amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_POLL_REGMEM) | + SDMA_PKT_POLL_REGMEM_HEADER_HDP_FLUSH(0) | + SDMA_PKT_POLL_REGMEM_HEADER_FUNC(3)); /* equal */ + amdgpu_ring_write(ring, (hub->vm_inv_eng0_ack + eng) << 2); + amdgpu_ring_write(ring, 0); + amdgpu_ring_write(ring, 1 << vm_id); /* reference */ + amdgpu_ring_write(ring, 1 << vm_id); /* mask */ + amdgpu_ring_write(ring, SDMA_PKT_POLL_REGMEM_DW5_RETRY_COUNT(0xfff) | + SDMA_PKT_POLL_REGMEM_DW5_INTERVAL(10)); + } +} + +static int sdma_v4_0_early_init(void *handle) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + + adev->sdma.num_instances = 2; + + sdma_v4_0_set_ring_funcs(adev); + sdma_v4_0_set_buffer_funcs(adev); + sdma_v4_0_set_vm_pte_funcs(adev); + sdma_v4_0_set_irq_funcs(adev); + + return 0; +} + + +static int sdma_v4_0_sw_init(void *handle) +{ + struct amdgpu_ring *ring; + int r, i; + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + + /* SDMA trap event */ + r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_SDMA0, 224, + &adev->sdma.trap_irq); + if (r) + return r; + + /* SDMA trap event */ + r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_SDMA1, 224, + &adev->sdma.trap_irq); + if (r) + return r; + + r = sdma_v4_0_init_microcode(adev); + if (r) { + DRM_ERROR("Failed to load sdma firmware!\n"); + return r; + } + + for (i = 0; i < adev->sdma.num_instances; i++) { + ring = &adev->sdma.instance[i].ring; + ring->ring_obj = NULL; + ring->use_doorbell = true; + + DRM_INFO("use_doorbell being set to: [%s]\n", + ring->use_doorbell?"true":"false"); + + ring->doorbell_index = (i == 0) ? + (AMDGPU_DOORBELL64_sDMA_ENGINE0 << 1) //get DWORD offset + : (AMDGPU_DOORBELL64_sDMA_ENGINE1 << 1); // get DWORD offset + + sprintf(ring->name, "sdma%d", i); + r = amdgpu_ring_init(adev, ring, 1024, + &adev->sdma.trap_irq, + (i == 0) ? + AMDGPU_SDMA_IRQ_TRAP0 : + AMDGPU_SDMA_IRQ_TRAP1); + if (r) + return r; + } + + return r; +} + +static int sdma_v4_0_sw_fini(void *handle) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + int i; + + for (i = 0; i < adev->sdma.num_instances; i++) + amdgpu_ring_fini(&adev->sdma.instance[i].ring); + + return 0; +} + +static int sdma_v4_0_hw_init(void *handle) +{ + int r; + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + + sdma_v4_0_init_golden_registers(adev); + + r = sdma_v4_0_start(adev); + if (r) + return r; + + return r; +} + +static int sdma_v4_0_hw_fini(void *handle) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + + if (amdgpu_sriov_vf(adev)) + return 0; + + sdma_v4_0_ctx_switch_enable(adev, false); + sdma_v4_0_enable(adev, false); + + return 0; +} + +static int sdma_v4_0_suspend(void *handle) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + + return sdma_v4_0_hw_fini(adev); +} + +static int sdma_v4_0_resume(void *handle) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + + return sdma_v4_0_hw_init(adev); +} + +static bool sdma_v4_0_is_idle(void *handle) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + u32 i; + for (i = 0; i < adev->sdma.num_instances; i++) { + u32 tmp = RREG32(sdma_v4_0_get_reg_offset(i, mmSDMA0_STATUS_REG)); + if (!(tmp & SDMA0_STATUS_REG__IDLE_MASK)) + return false; + } + + return true; +} + +static int sdma_v4_0_wait_for_idle(void *handle) +{ + unsigned i; + u32 sdma0,sdma1; + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + for (i = 0; i < adev->usec_timeout; i++) { + sdma0 = RREG32(sdma_v4_0_get_reg_offset(0, mmSDMA0_STATUS_REG)); + sdma1 = RREG32(sdma_v4_0_get_reg_offset(1, mmSDMA0_STATUS_REG)); + + if (sdma0 & sdma1 & SDMA0_STATUS_REG__IDLE_MASK) + return 0; + udelay(1); + } + return -ETIMEDOUT; +} + +static int sdma_v4_0_soft_reset(void *handle) +{ + /* todo */ + + return 0; +} + +static int sdma_v4_0_set_trap_irq_state(struct amdgpu_device *adev, + struct amdgpu_irq_src *source, + unsigned type, + enum amdgpu_interrupt_state state) +{ + u32 sdma_cntl; + + u32 reg_offset = (type == AMDGPU_SDMA_IRQ_TRAP0) ? + sdma_v4_0_get_reg_offset(0, mmSDMA0_CNTL) : + sdma_v4_0_get_reg_offset(1, mmSDMA0_CNTL); + + sdma_cntl = RREG32(reg_offset); + sdma_cntl = REG_SET_FIELD(sdma_cntl, SDMA0_CNTL, TRAP_ENABLE, + state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0); + WREG32(reg_offset, sdma_cntl); + + return 0; +} + +static int sdma_v4_0_process_trap_irq(struct amdgpu_device *adev, + struct amdgpu_irq_src *source, + struct amdgpu_iv_entry *entry) +{ + DRM_DEBUG("IH: SDMA trap\n"); + switch (entry->client_id) { + case AMDGPU_IH_CLIENTID_SDMA0: + switch (entry->ring_id) { + case 0: + amdgpu_fence_process(&adev->sdma.instance[0].ring); + break; + case 1: + /* XXX compute */ + break; + case 2: + /* XXX compute */ + break; + case 3: + /* XXX page queue*/ + break; + } + break; + case AMDGPU_IH_CLIENTID_SDMA1: + switch (entry->ring_id) { + case 0: + amdgpu_fence_process(&adev->sdma.instance[1].ring); + break; + case 1: + /* XXX compute */ + break; + case 2: + /* XXX compute */ + break; + case 3: + /* XXX page queue*/ + break; + } + break; + } + return 0; +} + +static int sdma_v4_0_process_illegal_inst_irq(struct amdgpu_device *adev, + struct amdgpu_irq_src *source, + struct amdgpu_iv_entry *entry) +{ + DRM_ERROR("Illegal instruction in SDMA command stream\n"); + schedule_work(&adev->reset_work); + return 0; +} + + +static void sdma_v4_0_update_medium_grain_clock_gating( + struct amdgpu_device *adev, + bool enable) +{ + uint32_t data, def; + + if (enable && (adev->cg_flags & AMD_CG_SUPPORT_SDMA_MGCG)) { + /* enable sdma0 clock gating */ + def = data = RREG32(SOC15_REG_OFFSET(SDMA0, 0, mmSDMA0_CLK_CTRL)); + data &= ~(SDMA0_CLK_CTRL__SOFT_OVERRIDE7_MASK | + SDMA0_CLK_CTRL__SOFT_OVERRIDE6_MASK | + SDMA0_CLK_CTRL__SOFT_OVERRIDE5_MASK | + SDMA0_CLK_CTRL__SOFT_OVERRIDE4_MASK | + SDMA0_CLK_CTRL__SOFT_OVERRIDE3_MASK | + SDMA0_CLK_CTRL__SOFT_OVERRIDE2_MASK | + SDMA0_CLK_CTRL__SOFT_OVERRIDE1_MASK | + SDMA0_CLK_CTRL__SOFT_OVERRIDE0_MASK); + if (def != data) + WREG32(SOC15_REG_OFFSET(SDMA0, 0, mmSDMA0_CLK_CTRL), data); + + if (adev->asic_type == CHIP_VEGA10) { + def = data = RREG32(SOC15_REG_OFFSET(SDMA1, 0, mmSDMA1_CLK_CTRL)); + data &= ~(SDMA1_CLK_CTRL__SOFT_OVERRIDE7_MASK | + SDMA1_CLK_CTRL__SOFT_OVERRIDE6_MASK | + SDMA1_CLK_CTRL__SOFT_OVERRIDE5_MASK | + SDMA1_CLK_CTRL__SOFT_OVERRIDE4_MASK | + SDMA1_CLK_CTRL__SOFT_OVERRIDE3_MASK | + SDMA1_CLK_CTRL__SOFT_OVERRIDE2_MASK | + SDMA1_CLK_CTRL__SOFT_OVERRIDE1_MASK | + SDMA1_CLK_CTRL__SOFT_OVERRIDE0_MASK); + if(def != data) + WREG32(SOC15_REG_OFFSET(SDMA1, 0, mmSDMA1_CLK_CTRL), data); + } + } else { + /* disable sdma0 clock gating */ + def = data = RREG32(SOC15_REG_OFFSET(SDMA0, 0, mmSDMA0_CLK_CTRL)); + data |= (SDMA0_CLK_CTRL__SOFT_OVERRIDE7_MASK | + SDMA0_CLK_CTRL__SOFT_OVERRIDE6_MASK | + SDMA0_CLK_CTRL__SOFT_OVERRIDE5_MASK | + SDMA0_CLK_CTRL__SOFT_OVERRIDE4_MASK | + SDMA0_CLK_CTRL__SOFT_OVERRIDE3_MASK | + SDMA0_CLK_CTRL__SOFT_OVERRIDE2_MASK | + SDMA0_CLK_CTRL__SOFT_OVERRIDE1_MASK | + SDMA0_CLK_CTRL__SOFT_OVERRIDE0_MASK); + + if (def != data) + WREG32(SOC15_REG_OFFSET(SDMA0, 0, mmSDMA0_CLK_CTRL), data); + + if (adev->asic_type == CHIP_VEGA10) { + def = data = RREG32(SOC15_REG_OFFSET(SDMA1, 0, mmSDMA1_CLK_CTRL)); + data |= (SDMA1_CLK_CTRL__SOFT_OVERRIDE7_MASK | + SDMA1_CLK_CTRL__SOFT_OVERRIDE6_MASK | + SDMA1_CLK_CTRL__SOFT_OVERRIDE5_MASK | + SDMA1_CLK_CTRL__SOFT_OVERRIDE4_MASK | + SDMA1_CLK_CTRL__SOFT_OVERRIDE3_MASK | + SDMA1_CLK_CTRL__SOFT_OVERRIDE2_MASK | + SDMA1_CLK_CTRL__SOFT_OVERRIDE1_MASK | + SDMA1_CLK_CTRL__SOFT_OVERRIDE0_MASK); + if (def != data) + WREG32(SOC15_REG_OFFSET(SDMA1, 0, mmSDMA1_CLK_CTRL), data); + } + } +} + + +static void sdma_v4_0_update_medium_grain_light_sleep( + struct amdgpu_device *adev, + bool enable) +{ + uint32_t data, def; + + if (enable && (adev->cg_flags & AMD_CG_SUPPORT_SDMA_LS)) { + /* 1-not override: enable sdma0 mem light sleep */ + def = data = RREG32(SOC15_REG_OFFSET(SDMA0, 0, mmSDMA0_POWER_CNTL)); + data |= SDMA0_POWER_CNTL__MEM_POWER_OVERRIDE_MASK; + if (def != data) + WREG32(SOC15_REG_OFFSET(SDMA0, 0, mmSDMA0_POWER_CNTL), data); + + /* 1-not override: enable sdma1 mem light sleep */ + if (adev->asic_type == CHIP_VEGA10) { + def = data = RREG32(SOC15_REG_OFFSET(SDMA1, 0, mmSDMA1_POWER_CNTL)); + data |= SDMA1_POWER_CNTL__MEM_POWER_OVERRIDE_MASK; + if (def != data) + WREG32(SOC15_REG_OFFSET(SDMA1, 0, mmSDMA1_POWER_CNTL), data); + } + } else { + /* 0-override:disable sdma0 mem light sleep */ + def = data = RREG32(SOC15_REG_OFFSET(SDMA0, 0, mmSDMA0_POWER_CNTL)); + data &= ~SDMA0_POWER_CNTL__MEM_POWER_OVERRIDE_MASK; + if (def != data) + WREG32(SOC15_REG_OFFSET(SDMA0, 0, mmSDMA0_POWER_CNTL), data); + + /* 0-override:disable sdma1 mem light sleep */ + if (adev->asic_type == CHIP_VEGA10) { + def = data = RREG32(SOC15_REG_OFFSET(SDMA1, 0, mmSDMA1_POWER_CNTL)); + data &= ~SDMA1_POWER_CNTL__MEM_POWER_OVERRIDE_MASK; + if (def != data) + WREG32(SOC15_REG_OFFSET(SDMA1, 0, mmSDMA1_POWER_CNTL), data); + } + } +} + +static int sdma_v4_0_set_clockgating_state(void *handle, + enum amd_clockgating_state state) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + + if (amdgpu_sriov_vf(adev)) + return 0; + + switch (adev->asic_type) { + case CHIP_VEGA10: + sdma_v4_0_update_medium_grain_clock_gating(adev, + state == AMD_CG_STATE_GATE ? true : false); + sdma_v4_0_update_medium_grain_light_sleep(adev, + state == AMD_CG_STATE_GATE ? true : false); + break; + default: + break; + } + return 0; +} + +static int sdma_v4_0_set_powergating_state(void *handle, + enum amd_powergating_state state) +{ + return 0; +} + +static void sdma_v4_0_get_clockgating_state(void *handle, u32 *flags) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + int data; + + if (amdgpu_sriov_vf(adev)) + *flags = 0; + + /* AMD_CG_SUPPORT_SDMA_MGCG */ + data = RREG32(SOC15_REG_OFFSET(SDMA0, 0, mmSDMA0_CLK_CTRL)); + if (!(data & SDMA0_CLK_CTRL__SOFT_OVERRIDE7_MASK)) + *flags |= AMD_CG_SUPPORT_SDMA_MGCG; + + /* AMD_CG_SUPPORT_SDMA_LS */ + data = RREG32(SOC15_REG_OFFSET(SDMA0, 0, mmSDMA0_POWER_CNTL)); + if (data & SDMA0_POWER_CNTL__MEM_POWER_OVERRIDE_MASK) + *flags |= AMD_CG_SUPPORT_SDMA_LS; +} + +const struct amd_ip_funcs sdma_v4_0_ip_funcs = { + .name = "sdma_v4_0", + .early_init = sdma_v4_0_early_init, + .late_init = NULL, + .sw_init = sdma_v4_0_sw_init, + .sw_fini = sdma_v4_0_sw_fini, + .hw_init = sdma_v4_0_hw_init, + .hw_fini = sdma_v4_0_hw_fini, + .suspend = sdma_v4_0_suspend, + .resume = sdma_v4_0_resume, + .is_idle = sdma_v4_0_is_idle, + .wait_for_idle = sdma_v4_0_wait_for_idle, + .soft_reset = sdma_v4_0_soft_reset, + .set_clockgating_state = sdma_v4_0_set_clockgating_state, + .set_powergating_state = sdma_v4_0_set_powergating_state, + .get_clockgating_state = sdma_v4_0_get_clockgating_state, +}; + +static const struct amdgpu_ring_funcs sdma_v4_0_ring_funcs = { + .type = AMDGPU_RING_TYPE_SDMA, + .align_mask = 0xf, + .nop = SDMA_PKT_NOP_HEADER_OP(SDMA_OP_NOP), + .support_64bit_ptrs = true, + .get_rptr = sdma_v4_0_ring_get_rptr, + .get_wptr = sdma_v4_0_ring_get_wptr, + .set_wptr = sdma_v4_0_ring_set_wptr, + .emit_frame_size = + 6 + /* sdma_v4_0_ring_emit_hdp_flush */ + 3 + /* sdma_v4_0_ring_emit_hdp_invalidate */ + 6 + /* sdma_v4_0_ring_emit_pipeline_sync */ + 36 + /* sdma_v4_0_ring_emit_vm_flush */ + 10 + 10 + 10, /* sdma_v4_0_ring_emit_fence x3 for user fence, vm fence */ + .emit_ib_size = 7 + 6, /* sdma_v4_0_ring_emit_ib */ + .emit_ib = sdma_v4_0_ring_emit_ib, + .emit_fence = sdma_v4_0_ring_emit_fence, + .emit_pipeline_sync = sdma_v4_0_ring_emit_pipeline_sync, + .emit_vm_flush = sdma_v4_0_ring_emit_vm_flush, + .emit_hdp_flush = sdma_v4_0_ring_emit_hdp_flush, + .emit_hdp_invalidate = sdma_v4_0_ring_emit_hdp_invalidate, + .test_ring = sdma_v4_0_ring_test_ring, + .test_ib = sdma_v4_0_ring_test_ib, + .insert_nop = sdma_v4_0_ring_insert_nop, + .pad_ib = sdma_v4_0_ring_pad_ib, +}; + +static void sdma_v4_0_set_ring_funcs(struct amdgpu_device *adev) +{ + int i; + + for (i = 0; i < adev->sdma.num_instances; i++) + adev->sdma.instance[i].ring.funcs = &sdma_v4_0_ring_funcs; +} + +static const struct amdgpu_irq_src_funcs sdma_v4_0_trap_irq_funcs = { + .set = sdma_v4_0_set_trap_irq_state, + .process = sdma_v4_0_process_trap_irq, +}; + +static const struct amdgpu_irq_src_funcs sdma_v4_0_illegal_inst_irq_funcs = { + .process = sdma_v4_0_process_illegal_inst_irq, +}; + +static void sdma_v4_0_set_irq_funcs(struct amdgpu_device *adev) +{ + adev->sdma.trap_irq.num_types = AMDGPU_SDMA_IRQ_LAST; + adev->sdma.trap_irq.funcs = &sdma_v4_0_trap_irq_funcs; + adev->sdma.illegal_inst_irq.funcs = &sdma_v4_0_illegal_inst_irq_funcs; +} + +/** + * sdma_v4_0_emit_copy_buffer - copy buffer using the sDMA engine + * + * @ring: amdgpu_ring structure holding ring information + * @src_offset: src GPU address + * @dst_offset: dst GPU address + * @byte_count: number of bytes to xfer + * + * Copy GPU buffers using the DMA engine (VEGA10). + * Used by the amdgpu ttm implementation to move pages if + * registered as the asic copy callback. + */ +static void sdma_v4_0_emit_copy_buffer(struct amdgpu_ib *ib, + uint64_t src_offset, + uint64_t dst_offset, + uint32_t byte_count) +{ + ib->ptr[ib->length_dw++] = SDMA_PKT_HEADER_OP(SDMA_OP_COPY) | + SDMA_PKT_HEADER_SUB_OP(SDMA_SUBOP_COPY_LINEAR); + ib->ptr[ib->length_dw++] = byte_count - 1; + ib->ptr[ib->length_dw++] = 0; /* src/dst endian swap */ + ib->ptr[ib->length_dw++] = lower_32_bits(src_offset); + ib->ptr[ib->length_dw++] = upper_32_bits(src_offset); + ib->ptr[ib->length_dw++] = lower_32_bits(dst_offset); + ib->ptr[ib->length_dw++] = upper_32_bits(dst_offset); +} + +/** + * sdma_v4_0_emit_fill_buffer - fill buffer using the sDMA engine + * + * @ring: amdgpu_ring structure holding ring information + * @src_data: value to write to buffer + * @dst_offset: dst GPU address + * @byte_count: number of bytes to xfer + * + * Fill GPU buffers using the DMA engine (VEGA10). + */ +static void sdma_v4_0_emit_fill_buffer(struct amdgpu_ib *ib, + uint32_t src_data, + uint64_t dst_offset, + uint32_t byte_count) +{ + ib->ptr[ib->length_dw++] = SDMA_PKT_HEADER_OP(SDMA_OP_CONST_FILL); + ib->ptr[ib->length_dw++] = lower_32_bits(dst_offset); + ib->ptr[ib->length_dw++] = upper_32_bits(dst_offset); + ib->ptr[ib->length_dw++] = src_data; + ib->ptr[ib->length_dw++] = byte_count - 1; +} + +static const struct amdgpu_buffer_funcs sdma_v4_0_buffer_funcs = { + .copy_max_bytes = 0x400000, + .copy_num_dw = 7, + .emit_copy_buffer = sdma_v4_0_emit_copy_buffer, + + .fill_max_bytes = 0x400000, + .fill_num_dw = 5, + .emit_fill_buffer = sdma_v4_0_emit_fill_buffer, +}; + +static void sdma_v4_0_set_buffer_funcs(struct amdgpu_device *adev) +{ + if (adev->mman.buffer_funcs == NULL) { + adev->mman.buffer_funcs = &sdma_v4_0_buffer_funcs; + adev->mman.buffer_funcs_ring = &adev->sdma.instance[0].ring; + } +} + +static const struct amdgpu_vm_pte_funcs sdma_v4_0_vm_pte_funcs = { + .copy_pte = sdma_v4_0_vm_copy_pte, + .write_pte = sdma_v4_0_vm_write_pte, + .set_pte_pde = sdma_v4_0_vm_set_pte_pde, +}; + +static void sdma_v4_0_set_vm_pte_funcs(struct amdgpu_device *adev) +{ + unsigned i; + + if (adev->vm_manager.vm_pte_funcs == NULL) { + adev->vm_manager.vm_pte_funcs = &sdma_v4_0_vm_pte_funcs; + for (i = 0; i < adev->sdma.num_instances; i++) + adev->vm_manager.vm_pte_rings[i] = + &adev->sdma.instance[i].ring; + + adev->vm_manager.vm_pte_num_rings = adev->sdma.num_instances; + } +} + +const struct amdgpu_ip_block_version sdma_v4_0_ip_block = +{ + .type = AMD_IP_BLOCK_TYPE_SDMA, + .major = 4, + .minor = 0, + .rev = 0, + .funcs = &sdma_v4_0_ip_funcs, +}; diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.h b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.h new file mode 100644 index 000000000000..5c5a7479a062 --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.h @@ -0,0 +1,30 @@ +/* + * Copyright 2016 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#ifndef __SDMA_V4_0_H__ +#define __SDMA_V4_0_H__ + +extern const struct amd_ip_funcs sdma_v4_0_ip_funcs; +extern const struct amdgpu_ip_block_version sdma_v4_0_ip_block; + +#endif diff --git a/drivers/gpu/drm/amd/amdgpu/si.c b/drivers/gpu/drm/amd/amdgpu/si.c index b71e3faa40db..c0b1aabf282f 100644 --- a/drivers/gpu/drm/amd/amdgpu/si.c +++ b/drivers/gpu/drm/amd/amdgpu/si.c @@ -45,6 +45,7 @@ #include "gmc/gmc_6_0_d.h" #include "dce/dce_6_0_d.h" #include "uvd/uvd_4_0_d.h" +#include "bif/bif_3_0_d.h" static const u32 tahiti_golden_registers[] = { @@ -1155,6 +1156,11 @@ static int si_asic_reset(struct amdgpu_device *adev) return 0; } +static u32 si_get_config_memsize(struct amdgpu_device *adev) +{ + return RREG32(mmCONFIG_MEMSIZE); +} + static void si_vga_set_state(struct amdgpu_device *adev, bool state) { uint32_t temp; @@ -1206,6 +1212,7 @@ static const struct amdgpu_asic_funcs si_asic_funcs = .get_xclk = &si_get_xclk, .set_uvd_clocks = &si_set_uvd_clocks, .set_vce_clocks = NULL, + .get_config_memsize = &si_get_config_memsize, }; static uint32_t si_get_rev_id(struct amdgpu_device *adev) diff --git a/drivers/gpu/drm/amd/amdgpu/si_dma.c b/drivers/gpu/drm/amd/amdgpu/si_dma.c index 3372a071bb85..112969f3301a 100644 --- a/drivers/gpu/drm/amd/amdgpu/si_dma.c +++ b/drivers/gpu/drm/amd/amdgpu/si_dma.c @@ -37,12 +37,12 @@ static void si_dma_set_buffer_funcs(struct amdgpu_device *adev); static void si_dma_set_vm_pte_funcs(struct amdgpu_device *adev); static void si_dma_set_irq_funcs(struct amdgpu_device *adev); -static uint32_t si_dma_ring_get_rptr(struct amdgpu_ring *ring) +static uint64_t si_dma_ring_get_rptr(struct amdgpu_ring *ring) { return ring->adev->wb.wb[ring->rptr_offs>>2]; } -static uint32_t si_dma_ring_get_wptr(struct amdgpu_ring *ring) +static uint64_t si_dma_ring_get_wptr(struct amdgpu_ring *ring) { struct amdgpu_device *adev = ring->adev; u32 me = (ring == &adev->sdma.instance[0].ring) ? 0 : 1; @@ -55,7 +55,8 @@ static void si_dma_ring_set_wptr(struct amdgpu_ring *ring) struct amdgpu_device *adev = ring->adev; u32 me = (ring == &adev->sdma.instance[0].ring) ? 0 : 1; - WREG32(DMA_RB_WPTR + sdma_offsets[me], (ring->wptr << 2) & 0x3fffc); + WREG32(DMA_RB_WPTR + sdma_offsets[me], + (lower_32_bits(ring->wptr) << 2) & 0x3fffc); } static void si_dma_ring_emit_ib(struct amdgpu_ring *ring, @@ -65,7 +66,7 @@ static void si_dma_ring_emit_ib(struct amdgpu_ring *ring, /* The indirect buffer packet must end on an 8 DW boundary in the DMA ring. * Pad as necessary with NOPs. */ - while ((ring->wptr & 7) != 5) + while ((lower_32_bits(ring->wptr) & 7) != 5) amdgpu_ring_write(ring, DMA_PACKET(DMA_PACKET_NOP, 0, 0, 0, 0)); amdgpu_ring_write(ring, DMA_IB_PACKET(DMA_PACKET_INDIRECT_BUFFER, vm_id, 0)); amdgpu_ring_write(ring, (ib->gpu_addr & 0xFFFFFFE0)); @@ -184,7 +185,7 @@ static int si_dma_start(struct amdgpu_device *adev) WREG32(DMA_CNTL + sdma_offsets[i], dma_cntl); ring->wptr = 0; - WREG32(DMA_RB_WPTR + sdma_offsets[i], ring->wptr << 2); + WREG32(DMA_RB_WPTR + sdma_offsets[i], lower_32_bits(ring->wptr) << 2); WREG32(DMA_RB_CNTL + sdma_offsets[i], rb_cntl | DMA_RB_ENABLE); ring->ready = true; @@ -397,7 +398,7 @@ static void si_dma_vm_write_pte(struct amdgpu_ib *ib, uint64_t pe, static void si_dma_vm_set_pte_pde(struct amdgpu_ib *ib, uint64_t pe, uint64_t addr, unsigned count, - uint32_t incr, uint32_t flags) + uint32_t incr, uint64_t flags) { uint64_t value; unsigned ndw; @@ -416,8 +417,8 @@ static void si_dma_vm_set_pte_pde(struct amdgpu_ib *ib, ib->ptr[ib->length_dw++] = DMA_PTE_PDE_PACKET(ndw); ib->ptr[ib->length_dw++] = pe; /* dst addr */ ib->ptr[ib->length_dw++] = upper_32_bits(pe) & 0xff; - ib->ptr[ib->length_dw++] = flags; /* mask */ - ib->ptr[ib->length_dw++] = 0; + ib->ptr[ib->length_dw++] = lower_32_bits(flags); /* mask */ + ib->ptr[ib->length_dw++] = upper_32_bits(flags); ib->ptr[ib->length_dw++] = value; /* value */ ib->ptr[ib->length_dw++] = upper_32_bits(value); ib->ptr[ib->length_dw++] = incr; /* increment size */ @@ -516,12 +517,12 @@ static int si_dma_sw_init(void *handle) struct amdgpu_device *adev = (struct amdgpu_device *)handle; /* DMA0 trap event */ - r = amdgpu_irq_add_id(adev, 224, &adev->sdma.trap_irq); + r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 224, &adev->sdma.trap_irq); if (r) return r; /* DMA1 trap event */ - r = amdgpu_irq_add_id(adev, 244, &adev->sdma.trap_irq_1); + r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 244, &adev->sdma.trap_irq_1); if (r) return r; @@ -766,6 +767,7 @@ static const struct amdgpu_ring_funcs si_dma_ring_funcs = { .type = AMDGPU_RING_TYPE_SDMA, .align_mask = 0xf, .nop = DMA_PACKET(DMA_PACKET_NOP, 0, 0, 0, 0), + .support_64bit_ptrs = false, .get_rptr = si_dma_ring_get_rptr, .get_wptr = si_dma_ring_get_wptr, .set_wptr = si_dma_ring_set_wptr, diff --git a/drivers/gpu/drm/amd/amdgpu/si_dpm.c b/drivers/gpu/drm/amd/amdgpu/si_dpm.c index 33b504bafb88..7c1c5d127281 100644 --- a/drivers/gpu/drm/amd/amdgpu/si_dpm.c +++ b/drivers/gpu/drm/amd/amdgpu/si_dpm.c @@ -3465,9 +3465,13 @@ static void si_apply_state_adjust_rules(struct amdgpu_device *adev, max_sclk = 75000; } } else if (adev->asic_type == CHIP_OLAND) { - if ((adev->pdev->device == 0x6604) && - (adev->pdev->subsystem_vendor == 0x1028) && - (adev->pdev->subsystem_device == 0x066F)) { + if ((adev->pdev->revision == 0xC7) || + (adev->pdev->revision == 0x80) || + (adev->pdev->revision == 0x81) || + (adev->pdev->revision == 0x83) || + (adev->pdev->revision == 0x87) || + (adev->pdev->device == 0x6604) || + (adev->pdev->device == 0x6605)) { max_sclk = 75000; } } @@ -7696,11 +7700,11 @@ static int si_dpm_sw_init(void *handle) int ret; struct amdgpu_device *adev = (struct amdgpu_device *)handle; - ret = amdgpu_irq_add_id(adev, 230, &adev->pm.dpm.thermal.irq); + ret = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 230, &adev->pm.dpm.thermal.irq); if (ret) return ret; - ret = amdgpu_irq_add_id(adev, 231, &adev->pm.dpm.thermal.irq); + ret = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 231, &adev->pm.dpm.thermal.irq); if (ret) return ret; @@ -7978,6 +7982,46 @@ static int si_check_state_equal(struct amdgpu_device *adev, return 0; } +static int si_dpm_read_sensor(struct amdgpu_device *adev, int idx, + void *value, int *size) +{ + struct evergreen_power_info *eg_pi = evergreen_get_pi(adev); + struct amdgpu_ps *rps = &eg_pi->current_rps; + struct si_ps *ps = si_get_ps(rps); + uint32_t sclk, mclk; + u32 pl_index = + (RREG32(TARGET_AND_CURRENT_PROFILE_INDEX) & CURRENT_STATE_INDEX_MASK) >> + CURRENT_STATE_INDEX_SHIFT; + + /* size must be at least 4 bytes for all sensors */ + if (*size < 4) + return -EINVAL; + + switch (idx) { + case AMDGPU_PP_SENSOR_GFX_SCLK: + if (pl_index < ps->performance_level_count) { + sclk = ps->performance_levels[pl_index].sclk; + *((uint32_t *)value) = sclk; + *size = 4; + return 0; + } + return -EINVAL; + case AMDGPU_PP_SENSOR_GFX_MCLK: + if (pl_index < ps->performance_level_count) { + mclk = ps->performance_levels[pl_index].mclk; + *((uint32_t *)value) = mclk; + *size = 4; + return 0; + } + return -EINVAL; + case AMDGPU_PP_SENSOR_GPU_TEMP: + *((uint32_t *)value) = si_dpm_get_temp(adev); + *size = 4; + return 0; + default: + return -EINVAL; + } +} const struct amd_ip_funcs si_dpm_ip_funcs = { .name = "si_dpm", @@ -8014,6 +8058,7 @@ static const struct amdgpu_dpm_funcs si_dpm_funcs = { .get_fan_speed_percent = &si_dpm_get_fan_speed_percent, .check_state_equal = &si_check_state_equal, .get_vce_clock_state = amdgpu_get_vce_clock_state, + .read_sensor = &si_dpm_read_sensor, }; static void si_dpm_set_dpm_funcs(struct amdgpu_device *adev) diff --git a/drivers/gpu/drm/amd/amdgpu/si_ih.c b/drivers/gpu/drm/amd/amdgpu/si_ih.c index 81f90800ba73..e66084211c74 100644 --- a/drivers/gpu/drm/amd/amdgpu/si_ih.c +++ b/drivers/gpu/drm/amd/amdgpu/si_ih.c @@ -129,8 +129,9 @@ static void si_ih_decode_iv(struct amdgpu_device *adev, dw[2] = le32_to_cpu(adev->irq.ih.ring[ring_index + 2]); dw[3] = le32_to_cpu(adev->irq.ih.ring[ring_index + 3]); + entry->client_id = AMDGPU_IH_CLIENTID_LEGACY; entry->src_id = dw[0] & 0xff; - entry->src_data = dw[1] & 0xfffffff; + entry->src_data[0] = dw[1] & 0xfffffff; entry->ring_id = dw[2] & 0xff; entry->vm_id = (dw[2] >> 8) & 0xff; diff --git a/drivers/gpu/drm/amd/amdgpu/soc15.c b/drivers/gpu/drm/amd/amdgpu/soc15.c new file mode 100644 index 000000000000..385de8617075 --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/soc15.c @@ -0,0 +1,894 @@ +/* + * Copyright 2016 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ +#include <linux/firmware.h> +#include <linux/slab.h> +#include <linux/module.h> +#include "drmP.h" +#include "amdgpu.h" +#include "amdgpu_atombios.h" +#include "amdgpu_ih.h" +#include "amdgpu_uvd.h" +#include "amdgpu_vce.h" +#include "amdgpu_ucode.h" +#include "amdgpu_psp.h" +#include "atom.h" +#include "amd_pcie.h" + +#include "vega10/soc15ip.h" +#include "vega10/UVD/uvd_7_0_offset.h" +#include "vega10/GC/gc_9_0_offset.h" +#include "vega10/GC/gc_9_0_sh_mask.h" +#include "vega10/SDMA0/sdma0_4_0_offset.h" +#include "vega10/SDMA1/sdma1_4_0_offset.h" +#include "vega10/HDP/hdp_4_0_offset.h" +#include "vega10/HDP/hdp_4_0_sh_mask.h" +#include "vega10/MP/mp_9_0_offset.h" +#include "vega10/MP/mp_9_0_sh_mask.h" +#include "vega10/SMUIO/smuio_9_0_offset.h" +#include "vega10/SMUIO/smuio_9_0_sh_mask.h" + +#include "soc15.h" +#include "soc15_common.h" +#include "gfx_v9_0.h" +#include "gmc_v9_0.h" +#include "gfxhub_v1_0.h" +#include "mmhub_v1_0.h" +#include "vega10_ih.h" +#include "sdma_v4_0.h" +#include "uvd_v7_0.h" +#include "vce_v4_0.h" +#include "amdgpu_powerplay.h" +#include "dce_virtual.h" +#include "mxgpu_ai.h" + +MODULE_FIRMWARE("amdgpu/vega10_smc.bin"); + +#define mmFabricConfigAccessControl 0x0410 +#define mmFabricConfigAccessControl_BASE_IDX 0 +#define mmFabricConfigAccessControl_DEFAULT 0x00000000 +//FabricConfigAccessControl +#define FabricConfigAccessControl__CfgRegInstAccEn__SHIFT 0x0 +#define FabricConfigAccessControl__CfgRegInstAccRegLock__SHIFT 0x1 +#define FabricConfigAccessControl__CfgRegInstID__SHIFT 0x10 +#define FabricConfigAccessControl__CfgRegInstAccEn_MASK 0x00000001L +#define FabricConfigAccessControl__CfgRegInstAccRegLock_MASK 0x00000002L +#define FabricConfigAccessControl__CfgRegInstID_MASK 0x00FF0000L + + +#define mmDF_PIE_AON0_DfGlobalClkGater 0x00fc +#define mmDF_PIE_AON0_DfGlobalClkGater_BASE_IDX 0 +//DF_PIE_AON0_DfGlobalClkGater +#define DF_PIE_AON0_DfGlobalClkGater__MGCGMode__SHIFT 0x0 +#define DF_PIE_AON0_DfGlobalClkGater__MGCGMode_MASK 0x0000000FL + +enum { + DF_MGCG_DISABLE = 0, + DF_MGCG_ENABLE_00_CYCLE_DELAY =1, + DF_MGCG_ENABLE_01_CYCLE_DELAY =2, + DF_MGCG_ENABLE_15_CYCLE_DELAY =13, + DF_MGCG_ENABLE_31_CYCLE_DELAY =14, + DF_MGCG_ENABLE_63_CYCLE_DELAY =15 +}; + +#define mmMP0_MISC_CGTT_CTRL0 0x01b9 +#define mmMP0_MISC_CGTT_CTRL0_BASE_IDX 0 +#define mmMP0_MISC_LIGHT_SLEEP_CTRL 0x01ba +#define mmMP0_MISC_LIGHT_SLEEP_CTRL_BASE_IDX 0 + +/* + * Indirect registers accessor + */ +static u32 soc15_pcie_rreg(struct amdgpu_device *adev, u32 reg) +{ + unsigned long flags, address, data; + u32 r; + struct nbio_pcie_index_data *nbio_pcie_id; + + if (adev->asic_type == CHIP_VEGA10) + nbio_pcie_id = &nbio_v6_1_pcie_index_data; + else + BUG(); + + address = nbio_pcie_id->index_offset; + data = nbio_pcie_id->data_offset; + + spin_lock_irqsave(&adev->pcie_idx_lock, flags); + WREG32(address, reg); + (void)RREG32(address); + r = RREG32(data); + spin_unlock_irqrestore(&adev->pcie_idx_lock, flags); + return r; +} + +static void soc15_pcie_wreg(struct amdgpu_device *adev, u32 reg, u32 v) +{ + unsigned long flags, address, data; + struct nbio_pcie_index_data *nbio_pcie_id; + + if (adev->asic_type == CHIP_VEGA10) + nbio_pcie_id = &nbio_v6_1_pcie_index_data; + else + BUG(); + + address = nbio_pcie_id->index_offset; + data = nbio_pcie_id->data_offset; + + spin_lock_irqsave(&adev->pcie_idx_lock, flags); + WREG32(address, reg); + (void)RREG32(address); + WREG32(data, v); + (void)RREG32(data); + spin_unlock_irqrestore(&adev->pcie_idx_lock, flags); +} + +static u32 soc15_uvd_ctx_rreg(struct amdgpu_device *adev, u32 reg) +{ + unsigned long flags, address, data; + u32 r; + + address = SOC15_REG_OFFSET(UVD, 0, mmUVD_CTX_INDEX); + data = SOC15_REG_OFFSET(UVD, 0, mmUVD_CTX_DATA); + + spin_lock_irqsave(&adev->uvd_ctx_idx_lock, flags); + WREG32(address, ((reg) & 0x1ff)); + r = RREG32(data); + spin_unlock_irqrestore(&adev->uvd_ctx_idx_lock, flags); + return r; +} + +static void soc15_uvd_ctx_wreg(struct amdgpu_device *adev, u32 reg, u32 v) +{ + unsigned long flags, address, data; + + address = SOC15_REG_OFFSET(UVD, 0, mmUVD_CTX_INDEX); + data = SOC15_REG_OFFSET(UVD, 0, mmUVD_CTX_DATA); + + spin_lock_irqsave(&adev->uvd_ctx_idx_lock, flags); + WREG32(address, ((reg) & 0x1ff)); + WREG32(data, (v)); + spin_unlock_irqrestore(&adev->uvd_ctx_idx_lock, flags); +} + +static u32 soc15_didt_rreg(struct amdgpu_device *adev, u32 reg) +{ + unsigned long flags, address, data; + u32 r; + + address = SOC15_REG_OFFSET(GC, 0, mmDIDT_IND_INDEX); + data = SOC15_REG_OFFSET(GC, 0, mmDIDT_IND_DATA); + + spin_lock_irqsave(&adev->didt_idx_lock, flags); + WREG32(address, (reg)); + r = RREG32(data); + spin_unlock_irqrestore(&adev->didt_idx_lock, flags); + return r; +} + +static void soc15_didt_wreg(struct amdgpu_device *adev, u32 reg, u32 v) +{ + unsigned long flags, address, data; + + address = SOC15_REG_OFFSET(GC, 0, mmDIDT_IND_INDEX); + data = SOC15_REG_OFFSET(GC, 0, mmDIDT_IND_DATA); + + spin_lock_irqsave(&adev->didt_idx_lock, flags); + WREG32(address, (reg)); + WREG32(data, (v)); + spin_unlock_irqrestore(&adev->didt_idx_lock, flags); +} + +static u32 soc15_get_config_memsize(struct amdgpu_device *adev) +{ + return nbio_v6_1_get_memsize(adev); +} + +static const u32 vega10_golden_init[] = +{ +}; + +static void soc15_init_golden_registers(struct amdgpu_device *adev) +{ + /* Some of the registers might be dependent on GRBM_GFX_INDEX */ + mutex_lock(&adev->grbm_idx_mutex); + + switch (adev->asic_type) { + case CHIP_VEGA10: + amdgpu_program_register_sequence(adev, + vega10_golden_init, + (const u32)ARRAY_SIZE(vega10_golden_init)); + break; + default: + break; + } + mutex_unlock(&adev->grbm_idx_mutex); +} +static u32 soc15_get_xclk(struct amdgpu_device *adev) +{ + if (adev->asic_type == CHIP_VEGA10) + return adev->clock.spll.reference_freq/4; + else + return adev->clock.spll.reference_freq; +} + + +void soc15_grbm_select(struct amdgpu_device *adev, + u32 me, u32 pipe, u32 queue, u32 vmid) +{ + u32 grbm_gfx_cntl = 0; + grbm_gfx_cntl = REG_SET_FIELD(grbm_gfx_cntl, GRBM_GFX_CNTL, PIPEID, pipe); + grbm_gfx_cntl = REG_SET_FIELD(grbm_gfx_cntl, GRBM_GFX_CNTL, MEID, me); + grbm_gfx_cntl = REG_SET_FIELD(grbm_gfx_cntl, GRBM_GFX_CNTL, VMID, vmid); + grbm_gfx_cntl = REG_SET_FIELD(grbm_gfx_cntl, GRBM_GFX_CNTL, QUEUEID, queue); + + WREG32(SOC15_REG_OFFSET(GC, 0, mmGRBM_GFX_CNTL), grbm_gfx_cntl); +} + +static void soc15_vga_set_state(struct amdgpu_device *adev, bool state) +{ + /* todo */ +} + +static bool soc15_read_disabled_bios(struct amdgpu_device *adev) +{ + /* todo */ + return false; +} + +static bool soc15_read_bios_from_rom(struct amdgpu_device *adev, + u8 *bios, u32 length_bytes) +{ + u32 *dw_ptr; + u32 i, length_dw; + + if (bios == NULL) + return false; + if (length_bytes == 0) + return false; + /* APU vbios image is part of sbios image */ + if (adev->flags & AMD_IS_APU) + return false; + + dw_ptr = (u32 *)bios; + length_dw = ALIGN(length_bytes, 4) / 4; + + /* set rom index to 0 */ + WREG32(SOC15_REG_OFFSET(SMUIO, 0, mmROM_INDEX), 0); + /* read out the rom data */ + for (i = 0; i < length_dw; i++) + dw_ptr[i] = RREG32(SOC15_REG_OFFSET(SMUIO, 0, mmROM_DATA)); + + return true; +} + +static struct amdgpu_allowed_register_entry vega10_allowed_read_registers[] = { + /* todo */ +}; + +static struct amdgpu_allowed_register_entry soc15_allowed_read_registers[] = { + { SOC15_REG_OFFSET(GC, 0, mmGRBM_STATUS), false}, + { SOC15_REG_OFFSET(GC, 0, mmGRBM_STATUS2), false}, + { SOC15_REG_OFFSET(GC, 0, mmGRBM_STATUS_SE0), false}, + { SOC15_REG_OFFSET(GC, 0, mmGRBM_STATUS_SE1), false}, + { SOC15_REG_OFFSET(GC, 0, mmGRBM_STATUS_SE2), false}, + { SOC15_REG_OFFSET(GC, 0, mmGRBM_STATUS_SE3), false}, + { SOC15_REG_OFFSET(SDMA0, 0, mmSDMA0_STATUS_REG), false}, + { SOC15_REG_OFFSET(SDMA1, 0, mmSDMA1_STATUS_REG), false}, + { SOC15_REG_OFFSET(GC, 0, mmCP_STAT), false}, + { SOC15_REG_OFFSET(GC, 0, mmCP_STALLED_STAT1), false}, + { SOC15_REG_OFFSET(GC, 0, mmCP_STALLED_STAT2), false}, + { SOC15_REG_OFFSET(GC, 0, mmCP_STALLED_STAT3), false}, + { SOC15_REG_OFFSET(GC, 0, mmCP_CPF_BUSY_STAT), false}, + { SOC15_REG_OFFSET(GC, 0, mmCP_CPF_STALLED_STAT1), false}, + { SOC15_REG_OFFSET(GC, 0, mmCP_CPF_STATUS), false}, + { SOC15_REG_OFFSET(GC, 0, mmCP_CPC_STALLED_STAT1), false}, + { SOC15_REG_OFFSET(GC, 0, mmCP_CPC_STATUS), false}, + { SOC15_REG_OFFSET(GC, 0, mmGB_ADDR_CONFIG), false}, +}; + +static uint32_t soc15_read_indexed_register(struct amdgpu_device *adev, u32 se_num, + u32 sh_num, u32 reg_offset) +{ + uint32_t val; + + mutex_lock(&adev->grbm_idx_mutex); + if (se_num != 0xffffffff || sh_num != 0xffffffff) + amdgpu_gfx_select_se_sh(adev, se_num, sh_num, 0xffffffff); + + val = RREG32(reg_offset); + + if (se_num != 0xffffffff || sh_num != 0xffffffff) + amdgpu_gfx_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); + mutex_unlock(&adev->grbm_idx_mutex); + return val; +} + +static uint32_t soc15_get_register_value(struct amdgpu_device *adev, + bool indexed, u32 se_num, + u32 sh_num, u32 reg_offset) +{ + if (indexed) { + return soc15_read_indexed_register(adev, se_num, sh_num, reg_offset); + } else { + switch (reg_offset) { + case SOC15_REG_OFFSET(GC, 0, mmGB_ADDR_CONFIG): + return adev->gfx.config.gb_addr_config; + default: + return RREG32(reg_offset); + } + } +} + +static int soc15_read_register(struct amdgpu_device *adev, u32 se_num, + u32 sh_num, u32 reg_offset, u32 *value) +{ + struct amdgpu_allowed_register_entry *asic_register_table = NULL; + struct amdgpu_allowed_register_entry *asic_register_entry; + uint32_t size, i; + + *value = 0; + switch (adev->asic_type) { + case CHIP_VEGA10: + asic_register_table = vega10_allowed_read_registers; + size = ARRAY_SIZE(vega10_allowed_read_registers); + break; + default: + return -EINVAL; + } + + if (asic_register_table) { + for (i = 0; i < size; i++) { + asic_register_entry = asic_register_table + i; + if (reg_offset != asic_register_entry->reg_offset) + continue; + if (!asic_register_entry->untouched) + *value = soc15_get_register_value(adev, + asic_register_entry->grbm_indexed, + se_num, sh_num, reg_offset); + return 0; + } + } + + for (i = 0; i < ARRAY_SIZE(soc15_allowed_read_registers); i++) { + if (reg_offset != soc15_allowed_read_registers[i].reg_offset) + continue; + + if (!soc15_allowed_read_registers[i].untouched) + *value = soc15_get_register_value(adev, + soc15_allowed_read_registers[i].grbm_indexed, + se_num, sh_num, reg_offset); + return 0; + } + return -EINVAL; +} + +static void soc15_gpu_pci_config_reset(struct amdgpu_device *adev) +{ + u32 i; + + dev_info(adev->dev, "GPU pci config reset\n"); + + /* disable BM */ + pci_clear_master(adev->pdev); + /* reset */ + amdgpu_pci_config_reset(adev); + + udelay(100); + + /* wait for asic to come out of reset */ + for (i = 0; i < adev->usec_timeout; i++) { + if (nbio_v6_1_get_memsize(adev) != 0xffffffff) + break; + udelay(1); + } + +} + +static int soc15_asic_reset(struct amdgpu_device *adev) +{ + amdgpu_atombios_scratch_regs_engine_hung(adev, true); + + soc15_gpu_pci_config_reset(adev); + + amdgpu_atombios_scratch_regs_engine_hung(adev, false); + + return 0; +} + +/*static int soc15_set_uvd_clock(struct amdgpu_device *adev, u32 clock, + u32 cntl_reg, u32 status_reg) +{ + return 0; +}*/ + +static int soc15_set_uvd_clocks(struct amdgpu_device *adev, u32 vclk, u32 dclk) +{ + /*int r; + + r = soc15_set_uvd_clock(adev, vclk, ixCG_VCLK_CNTL, ixCG_VCLK_STATUS); + if (r) + return r; + + r = soc15_set_uvd_clock(adev, dclk, ixCG_DCLK_CNTL, ixCG_DCLK_STATUS); + */ + return 0; +} + +static int soc15_set_vce_clocks(struct amdgpu_device *adev, u32 evclk, u32 ecclk) +{ + /* todo */ + + return 0; +} + +static void soc15_pcie_gen3_enable(struct amdgpu_device *adev) +{ + if (pci_is_root_bus(adev->pdev->bus)) + return; + + if (amdgpu_pcie_gen2 == 0) + return; + + if (adev->flags & AMD_IS_APU) + return; + + if (!(adev->pm.pcie_gen_mask & (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2 | + CAIL_PCIE_LINK_SPEED_SUPPORT_GEN3))) + return; + + /* todo */ +} + +static void soc15_program_aspm(struct amdgpu_device *adev) +{ + + if (amdgpu_aspm == 0) + return; + + /* todo */ +} + +static void soc15_enable_doorbell_aperture(struct amdgpu_device *adev, + bool enable) +{ + nbio_v6_1_enable_doorbell_aperture(adev, enable); + nbio_v6_1_enable_doorbell_selfring_aperture(adev, enable); +} + +static const struct amdgpu_ip_block_version vega10_common_ip_block = +{ + .type = AMD_IP_BLOCK_TYPE_COMMON, + .major = 2, + .minor = 0, + .rev = 0, + .funcs = &soc15_common_ip_funcs, +}; + +int soc15_set_ip_blocks(struct amdgpu_device *adev) +{ + nbio_v6_1_detect_hw_virt(adev); + + if (amdgpu_sriov_vf(adev)) + adev->virt.ops = &xgpu_ai_virt_ops; + + switch (adev->asic_type) { + case CHIP_VEGA10: + amdgpu_ip_block_add(adev, &vega10_common_ip_block); + amdgpu_ip_block_add(adev, &gfxhub_v1_0_ip_block); + amdgpu_ip_block_add(adev, &mmhub_v1_0_ip_block); + amdgpu_ip_block_add(adev, &gmc_v9_0_ip_block); + amdgpu_ip_block_add(adev, &vega10_ih_ip_block); + if (amdgpu_fw_load_type == 2 || amdgpu_fw_load_type == -1) + amdgpu_ip_block_add(adev, &psp_v3_1_ip_block); + if (!amdgpu_sriov_vf(adev)) + amdgpu_ip_block_add(adev, &amdgpu_pp_ip_block); + if (adev->enable_virtual_display || amdgpu_sriov_vf(adev)) + amdgpu_ip_block_add(adev, &dce_virtual_ip_block); + amdgpu_ip_block_add(adev, &gfx_v9_0_ip_block); + amdgpu_ip_block_add(adev, &sdma_v4_0_ip_block); + if (!amdgpu_sriov_vf(adev)) + amdgpu_ip_block_add(adev, &uvd_v7_0_ip_block); + amdgpu_ip_block_add(adev, &vce_v4_0_ip_block); + break; + default: + return -EINVAL; + } + + return 0; +} + +static uint32_t soc15_get_rev_id(struct amdgpu_device *adev) +{ + return nbio_v6_1_get_rev_id(adev); +} + + +int gmc_v9_0_mc_wait_for_idle(struct amdgpu_device *adev) +{ + /* to be implemented in MC IP*/ + return 0; +} + +static const struct amdgpu_asic_funcs soc15_asic_funcs = +{ + .read_disabled_bios = &soc15_read_disabled_bios, + .read_bios_from_rom = &soc15_read_bios_from_rom, + .read_register = &soc15_read_register, + .reset = &soc15_asic_reset, + .set_vga_state = &soc15_vga_set_state, + .get_xclk = &soc15_get_xclk, + .set_uvd_clocks = &soc15_set_uvd_clocks, + .set_vce_clocks = &soc15_set_vce_clocks, + .get_config_memsize = &soc15_get_config_memsize, +}; + +static int soc15_common_early_init(void *handle) +{ + bool psp_enabled = false; + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + + adev->smc_rreg = NULL; + adev->smc_wreg = NULL; + adev->pcie_rreg = &soc15_pcie_rreg; + adev->pcie_wreg = &soc15_pcie_wreg; + adev->uvd_ctx_rreg = &soc15_uvd_ctx_rreg; + adev->uvd_ctx_wreg = &soc15_uvd_ctx_wreg; + adev->didt_rreg = &soc15_didt_rreg; + adev->didt_wreg = &soc15_didt_wreg; + + adev->asic_funcs = &soc15_asic_funcs; + + if (amdgpu_get_ip_block(adev, AMD_IP_BLOCK_TYPE_PSP) && + (amdgpu_ip_block_mask & (1 << AMD_IP_BLOCK_TYPE_PSP))) + psp_enabled = true; + + if (amdgpu_sriov_vf(adev)) { + amdgpu_virt_init_setting(adev); + xgpu_ai_mailbox_set_irq_funcs(adev); + } + + /* + * nbio need be used for both sdma and gfx9, but only + * initializes once + */ + switch(adev->asic_type) { + case CHIP_VEGA10: + nbio_v6_1_init(adev); + break; + default: + return -EINVAL; + } + + adev->rev_id = soc15_get_rev_id(adev); + adev->external_rev_id = 0xFF; + switch (adev->asic_type) { + case CHIP_VEGA10: + adev->cg_flags = AMD_CG_SUPPORT_GFX_MGCG | + AMD_CG_SUPPORT_GFX_MGLS | + AMD_CG_SUPPORT_GFX_RLC_LS | + AMD_CG_SUPPORT_GFX_CP_LS | + AMD_CG_SUPPORT_GFX_3D_CGCG | + AMD_CG_SUPPORT_GFX_3D_CGLS | + AMD_CG_SUPPORT_GFX_CGCG | + AMD_CG_SUPPORT_GFX_CGLS | + AMD_CG_SUPPORT_BIF_MGCG | + AMD_CG_SUPPORT_BIF_LS | + AMD_CG_SUPPORT_HDP_LS | + AMD_CG_SUPPORT_DRM_MGCG | + AMD_CG_SUPPORT_DRM_LS | + AMD_CG_SUPPORT_ROM_MGCG | + AMD_CG_SUPPORT_DF_MGCG | + AMD_CG_SUPPORT_SDMA_MGCG | + AMD_CG_SUPPORT_SDMA_LS | + AMD_CG_SUPPORT_MC_MGCG | + AMD_CG_SUPPORT_MC_LS; + adev->pg_flags = 0; + adev->external_rev_id = 0x1; + break; + default: + /* FIXME: not supported yet */ + return -EINVAL; + } + + adev->firmware.load_type = amdgpu_ucode_get_load_type(adev, amdgpu_fw_load_type); + + amdgpu_get_pcie_info(adev); + + return 0; +} + +static int soc15_common_late_init(void *handle) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + + if (amdgpu_sriov_vf(adev)) + xgpu_ai_mailbox_get_irq(adev); + + return 0; +} + +static int soc15_common_sw_init(void *handle) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + + if (amdgpu_sriov_vf(adev)) + xgpu_ai_mailbox_add_irq_id(adev); + + return 0; +} + +static int soc15_common_sw_fini(void *handle) +{ + return 0; +} + +static int soc15_common_hw_init(void *handle) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + + /* move the golden regs per IP block */ + soc15_init_golden_registers(adev); + /* enable pcie gen2/3 link */ + soc15_pcie_gen3_enable(adev); + /* enable aspm */ + soc15_program_aspm(adev); + /* enable the doorbell aperture */ + soc15_enable_doorbell_aperture(adev, true); + + return 0; +} + +static int soc15_common_hw_fini(void *handle) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + + /* disable the doorbell aperture */ + soc15_enable_doorbell_aperture(adev, false); + if (amdgpu_sriov_vf(adev)) + xgpu_ai_mailbox_put_irq(adev); + + return 0; +} + +static int soc15_common_suspend(void *handle) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + + return soc15_common_hw_fini(adev); +} + +static int soc15_common_resume(void *handle) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + + return soc15_common_hw_init(adev); +} + +static bool soc15_common_is_idle(void *handle) +{ + return true; +} + +static int soc15_common_wait_for_idle(void *handle) +{ + return 0; +} + +static int soc15_common_soft_reset(void *handle) +{ + return 0; +} + +static void soc15_update_hdp_light_sleep(struct amdgpu_device *adev, bool enable) +{ + uint32_t def, data; + + def = data = RREG32(SOC15_REG_OFFSET(HDP, 0, mmHDP_MEM_POWER_LS)); + + if (enable && (adev->cg_flags & AMD_CG_SUPPORT_HDP_LS)) + data |= HDP_MEM_POWER_LS__LS_ENABLE_MASK; + else + data &= ~HDP_MEM_POWER_LS__LS_ENABLE_MASK; + + if (def != data) + WREG32(SOC15_REG_OFFSET(HDP, 0, mmHDP_MEM_POWER_LS), data); +} + +static void soc15_update_drm_clock_gating(struct amdgpu_device *adev, bool enable) +{ + uint32_t def, data; + + def = data = RREG32(SOC15_REG_OFFSET(MP0, 0, mmMP0_MISC_CGTT_CTRL0)); + + if (enable && (adev->cg_flags & AMD_CG_SUPPORT_DRM_MGCG)) + data &= ~(0x01000000 | + 0x02000000 | + 0x04000000 | + 0x08000000 | + 0x10000000 | + 0x20000000 | + 0x40000000 | + 0x80000000); + else + data |= (0x01000000 | + 0x02000000 | + 0x04000000 | + 0x08000000 | + 0x10000000 | + 0x20000000 | + 0x40000000 | + 0x80000000); + + if (def != data) + WREG32(SOC15_REG_OFFSET(MP0, 0, mmMP0_MISC_CGTT_CTRL0), data); +} + +static void soc15_update_drm_light_sleep(struct amdgpu_device *adev, bool enable) +{ + uint32_t def, data; + + def = data = RREG32(SOC15_REG_OFFSET(MP0, 0, mmMP0_MISC_LIGHT_SLEEP_CTRL)); + + if (enable && (adev->cg_flags & AMD_CG_SUPPORT_DRM_LS)) + data |= 1; + else + data &= ~1; + + if (def != data) + WREG32(SOC15_REG_OFFSET(MP0, 0, mmMP0_MISC_LIGHT_SLEEP_CTRL), data); +} + +static void soc15_update_rom_medium_grain_clock_gating(struct amdgpu_device *adev, + bool enable) +{ + uint32_t def, data; + + def = data = RREG32(SOC15_REG_OFFSET(SMUIO, 0, mmCGTT_ROM_CLK_CTRL0)); + + if (enable && (adev->cg_flags & AMD_CG_SUPPORT_ROM_MGCG)) + data &= ~(CGTT_ROM_CLK_CTRL0__SOFT_OVERRIDE0_MASK | + CGTT_ROM_CLK_CTRL0__SOFT_OVERRIDE1_MASK); + else + data |= CGTT_ROM_CLK_CTRL0__SOFT_OVERRIDE0_MASK | + CGTT_ROM_CLK_CTRL0__SOFT_OVERRIDE1_MASK; + + if (def != data) + WREG32(SOC15_REG_OFFSET(SMUIO, 0, mmCGTT_ROM_CLK_CTRL0), data); +} + +static void soc15_update_df_medium_grain_clock_gating(struct amdgpu_device *adev, + bool enable) +{ + uint32_t data; + + /* Put DF on broadcast mode */ + data = RREG32(SOC15_REG_OFFSET(DF, 0, mmFabricConfigAccessControl)); + data &= ~FabricConfigAccessControl__CfgRegInstAccEn_MASK; + WREG32(SOC15_REG_OFFSET(DF, 0, mmFabricConfigAccessControl), data); + + if (enable && (adev->cg_flags & AMD_CG_SUPPORT_DF_MGCG)) { + data = RREG32(SOC15_REG_OFFSET(DF, 0, mmDF_PIE_AON0_DfGlobalClkGater)); + data &= ~DF_PIE_AON0_DfGlobalClkGater__MGCGMode_MASK; + data |= DF_MGCG_ENABLE_15_CYCLE_DELAY; + WREG32(SOC15_REG_OFFSET(DF, 0, mmDF_PIE_AON0_DfGlobalClkGater), data); + } else { + data = RREG32(SOC15_REG_OFFSET(DF, 0, mmDF_PIE_AON0_DfGlobalClkGater)); + data &= ~DF_PIE_AON0_DfGlobalClkGater__MGCGMode_MASK; + data |= DF_MGCG_DISABLE; + WREG32(SOC15_REG_OFFSET(DF, 0, mmDF_PIE_AON0_DfGlobalClkGater), data); + } + + WREG32(SOC15_REG_OFFSET(DF, 0, mmFabricConfigAccessControl), + mmFabricConfigAccessControl_DEFAULT); +} + +static int soc15_common_set_clockgating_state(void *handle, + enum amd_clockgating_state state) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + + if (amdgpu_sriov_vf(adev)) + return 0; + + switch (adev->asic_type) { + case CHIP_VEGA10: + nbio_v6_1_update_medium_grain_clock_gating(adev, + state == AMD_CG_STATE_GATE ? true : false); + nbio_v6_1_update_medium_grain_light_sleep(adev, + state == AMD_CG_STATE_GATE ? true : false); + soc15_update_hdp_light_sleep(adev, + state == AMD_CG_STATE_GATE ? true : false); + soc15_update_drm_clock_gating(adev, + state == AMD_CG_STATE_GATE ? true : false); + soc15_update_drm_light_sleep(adev, + state == AMD_CG_STATE_GATE ? true : false); + soc15_update_rom_medium_grain_clock_gating(adev, + state == AMD_CG_STATE_GATE ? true : false); + soc15_update_df_medium_grain_clock_gating(adev, + state == AMD_CG_STATE_GATE ? true : false); + break; + default: + break; + } + return 0; +} + +static void soc15_common_get_clockgating_state(void *handle, u32 *flags) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + int data; + + if (amdgpu_sriov_vf(adev)) + *flags = 0; + + nbio_v6_1_get_clockgating_state(adev, flags); + + /* AMD_CG_SUPPORT_HDP_LS */ + data = RREG32(SOC15_REG_OFFSET(HDP, 0, mmHDP_MEM_POWER_LS)); + if (data & HDP_MEM_POWER_LS__LS_ENABLE_MASK) + *flags |= AMD_CG_SUPPORT_HDP_LS; + + /* AMD_CG_SUPPORT_DRM_MGCG */ + data = RREG32(SOC15_REG_OFFSET(MP0, 0, mmMP0_MISC_CGTT_CTRL0)); + if (!(data & 0x01000000)) + *flags |= AMD_CG_SUPPORT_DRM_MGCG; + + /* AMD_CG_SUPPORT_DRM_LS */ + data = RREG32(SOC15_REG_OFFSET(MP0, 0, mmMP0_MISC_LIGHT_SLEEP_CTRL)); + if (data & 0x1) + *flags |= AMD_CG_SUPPORT_DRM_LS; + + /* AMD_CG_SUPPORT_ROM_MGCG */ + data = RREG32(SOC15_REG_OFFSET(SMUIO, 0, mmCGTT_ROM_CLK_CTRL0)); + if (!(data & CGTT_ROM_CLK_CTRL0__SOFT_OVERRIDE0_MASK)) + *flags |= AMD_CG_SUPPORT_ROM_MGCG; + + /* AMD_CG_SUPPORT_DF_MGCG */ + data = RREG32(SOC15_REG_OFFSET(DF, 0, mmDF_PIE_AON0_DfGlobalClkGater)); + if (data & DF_MGCG_ENABLE_15_CYCLE_DELAY) + *flags |= AMD_CG_SUPPORT_DF_MGCG; +} + +static int soc15_common_set_powergating_state(void *handle, + enum amd_powergating_state state) +{ + /* todo */ + return 0; +} + +const struct amd_ip_funcs soc15_common_ip_funcs = { + .name = "soc15_common", + .early_init = soc15_common_early_init, + .late_init = soc15_common_late_init, + .sw_init = soc15_common_sw_init, + .sw_fini = soc15_common_sw_fini, + .hw_init = soc15_common_hw_init, + .hw_fini = soc15_common_hw_fini, + .suspend = soc15_common_suspend, + .resume = soc15_common_resume, + .is_idle = soc15_common_is_idle, + .wait_for_idle = soc15_common_wait_for_idle, + .soft_reset = soc15_common_soft_reset, + .set_clockgating_state = soc15_common_set_clockgating_state, + .set_powergating_state = soc15_common_set_powergating_state, + .get_clockgating_state= soc15_common_get_clockgating_state, +}; diff --git a/drivers/gpu/drm/amd/amdgpu/soc15.h b/drivers/gpu/drm/amd/amdgpu/soc15.h new file mode 100644 index 000000000000..378a46da585a --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/soc15.h @@ -0,0 +1,35 @@ +/* + * Copyright 2016 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#ifndef __SOC15_H__ +#define __SOC15_H__ + +#include "nbio_v6_1.h" + +extern const struct amd_ip_funcs soc15_common_ip_funcs; + +void soc15_grbm_select(struct amdgpu_device *adev, + u32 me, u32 pipe, u32 queue, u32 vmid); +int soc15_set_ip_blocks(struct amdgpu_device *adev); + +#endif diff --git a/drivers/gpu/drm/amd/amdgpu/soc15_common.h b/drivers/gpu/drm/amd/amdgpu/soc15_common.h new file mode 100644 index 000000000000..2b96c806baa1 --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/soc15_common.h @@ -0,0 +1,57 @@ +/* + * Copyright 2016 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#ifndef __SOC15_COMMON_H__ +#define __SOC15_COMMON_H__ + +struct nbio_hdp_flush_reg { + u32 hdp_flush_req_offset; + u32 hdp_flush_done_offset; + u32 ref_and_mask_cp0; + u32 ref_and_mask_cp1; + u32 ref_and_mask_cp2; + u32 ref_and_mask_cp3; + u32 ref_and_mask_cp4; + u32 ref_and_mask_cp5; + u32 ref_and_mask_cp6; + u32 ref_and_mask_cp7; + u32 ref_and_mask_cp8; + u32 ref_and_mask_cp9; + u32 ref_and_mask_sdma0; + u32 ref_and_mask_sdma1; +}; + +struct nbio_pcie_index_data { + u32 index_offset; + u32 data_offset; +}; +// Register Access Macro +#define SOC15_REG_OFFSET(ip, inst, reg) (0 == reg##_BASE_IDX ? ip##_BASE__INST##inst##_SEG0 + reg : \ + (1 == reg##_BASE_IDX ? ip##_BASE__INST##inst##_SEG1 + reg : \ + (2 == reg##_BASE_IDX ? ip##_BASE__INST##inst##_SEG2 + reg : \ + (3 == reg##_BASE_IDX ? ip##_BASE__INST##inst##_SEG3 + reg : \ + (ip##_BASE__INST##inst##_SEG4 + reg))))) + +#endif + + diff --git a/drivers/gpu/drm/amd/amdgpu/soc15d.h b/drivers/gpu/drm/amd/amdgpu/soc15d.h new file mode 100644 index 000000000000..75403c7c8c9e --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/soc15d.h @@ -0,0 +1,288 @@ +/* + * Copyright 2014 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ +#ifndef SOC15_H +#define SOC15_H + +#define GFX9_NUM_GFX_RINGS 1 +#define GFX9_NUM_COMPUTE_RINGS 8 + +/* + * PM4 + */ +#define PACKET_TYPE0 0 +#define PACKET_TYPE1 1 +#define PACKET_TYPE2 2 +#define PACKET_TYPE3 3 + +#define CP_PACKET_GET_TYPE(h) (((h) >> 30) & 3) +#define CP_PACKET_GET_COUNT(h) (((h) >> 16) & 0x3FFF) +#define CP_PACKET0_GET_REG(h) ((h) & 0xFFFF) +#define CP_PACKET3_GET_OPCODE(h) (((h) >> 8) & 0xFF) +#define PACKET0(reg, n) ((PACKET_TYPE0 << 30) | \ + ((reg) & 0xFFFF) | \ + ((n) & 0x3FFF) << 16) +#define CP_PACKET2 0x80000000 +#define PACKET2_PAD_SHIFT 0 +#define PACKET2_PAD_MASK (0x3fffffff << 0) + +#define PACKET2(v) (CP_PACKET2 | REG_SET(PACKET2_PAD, (v))) + +#define PACKET3(op, n) ((PACKET_TYPE3 << 30) | \ + (((op) & 0xFF) << 8) | \ + ((n) & 0x3FFF) << 16) + +#define PACKET3_COMPUTE(op, n) (PACKET3(op, n) | 1 << 1) + +/* Packet 3 types */ +#define PACKET3_NOP 0x10 +#define PACKET3_SET_BASE 0x11 +#define PACKET3_BASE_INDEX(x) ((x) << 0) +#define CE_PARTITION_BASE 3 +#define PACKET3_CLEAR_STATE 0x12 +#define PACKET3_INDEX_BUFFER_SIZE 0x13 +#define PACKET3_DISPATCH_DIRECT 0x15 +#define PACKET3_DISPATCH_INDIRECT 0x16 +#define PACKET3_ATOMIC_GDS 0x1D +#define PACKET3_ATOMIC_MEM 0x1E +#define PACKET3_OCCLUSION_QUERY 0x1F +#define PACKET3_SET_PREDICATION 0x20 +#define PACKET3_REG_RMW 0x21 +#define PACKET3_COND_EXEC 0x22 +#define PACKET3_PRED_EXEC 0x23 +#define PACKET3_DRAW_INDIRECT 0x24 +#define PACKET3_DRAW_INDEX_INDIRECT 0x25 +#define PACKET3_INDEX_BASE 0x26 +#define PACKET3_DRAW_INDEX_2 0x27 +#define PACKET3_CONTEXT_CONTROL 0x28 +#define PACKET3_INDEX_TYPE 0x2A +#define PACKET3_DRAW_INDIRECT_MULTI 0x2C +#define PACKET3_DRAW_INDEX_AUTO 0x2D +#define PACKET3_NUM_INSTANCES 0x2F +#define PACKET3_DRAW_INDEX_MULTI_AUTO 0x30 +#define PACKET3_INDIRECT_BUFFER_CONST 0x33 +#define PACKET3_STRMOUT_BUFFER_UPDATE 0x34 +#define PACKET3_DRAW_INDEX_OFFSET_2 0x35 +#define PACKET3_DRAW_PREAMBLE 0x36 +#define PACKET3_WRITE_DATA 0x37 +#define WRITE_DATA_DST_SEL(x) ((x) << 8) + /* 0 - register + * 1 - memory (sync - via GRBM) + * 2 - gl2 + * 3 - gds + * 4 - reserved + * 5 - memory (async - direct) + */ +#define WR_ONE_ADDR (1 << 16) +#define WR_CONFIRM (1 << 20) +#define WRITE_DATA_CACHE_POLICY(x) ((x) << 25) + /* 0 - LRU + * 1 - Stream + */ +#define WRITE_DATA_ENGINE_SEL(x) ((x) << 30) + /* 0 - me + * 1 - pfp + * 2 - ce + */ +#define PACKET3_DRAW_INDEX_INDIRECT_MULTI 0x38 +#define PACKET3_MEM_SEMAPHORE 0x39 +# define PACKET3_SEM_USE_MAILBOX (0x1 << 16) +# define PACKET3_SEM_SEL_SIGNAL_TYPE (0x1 << 20) /* 0 = increment, 1 = write 1 */ +# define PACKET3_SEM_SEL_SIGNAL (0x6 << 29) +# define PACKET3_SEM_SEL_WAIT (0x7 << 29) +#define PACKET3_WAIT_REG_MEM 0x3C +#define WAIT_REG_MEM_FUNCTION(x) ((x) << 0) + /* 0 - always + * 1 - < + * 2 - <= + * 3 - == + * 4 - != + * 5 - >= + * 6 - > + */ +#define WAIT_REG_MEM_MEM_SPACE(x) ((x) << 4) + /* 0 - reg + * 1 - mem + */ +#define WAIT_REG_MEM_OPERATION(x) ((x) << 6) + /* 0 - wait_reg_mem + * 1 - wr_wait_wr_reg + */ +#define WAIT_REG_MEM_ENGINE(x) ((x) << 8) + /* 0 - me + * 1 - pfp + */ +#define PACKET3_INDIRECT_BUFFER 0x3F +#define INDIRECT_BUFFER_CACHE_POLICY(x) ((x) << 28) + /* 0 - LRU + * 1 - Stream + * 2 - Bypass + */ +#define INDIRECT_BUFFER_PRE_ENB(x) ((x) << 21) +#define PACKET3_COPY_DATA 0x40 +#define PACKET3_PFP_SYNC_ME 0x42 +#define PACKET3_COND_WRITE 0x45 +#define PACKET3_EVENT_WRITE 0x46 +#define EVENT_TYPE(x) ((x) << 0) +#define EVENT_INDEX(x) ((x) << 8) + /* 0 - any non-TS event + * 1 - ZPASS_DONE, PIXEL_PIPE_STAT_* + * 2 - SAMPLE_PIPELINESTAT + * 3 - SAMPLE_STREAMOUTSTAT* + * 4 - *S_PARTIAL_FLUSH + */ +#define PACKET3_RELEASE_MEM 0x49 +#define EVENT_TYPE(x) ((x) << 0) +#define EVENT_INDEX(x) ((x) << 8) +#define EOP_TCL1_VOL_ACTION_EN (1 << 12) +#define EOP_TC_VOL_ACTION_EN (1 << 13) /* L2 */ +#define EOP_TC_WB_ACTION_EN (1 << 15) /* L2 */ +#define EOP_TCL1_ACTION_EN (1 << 16) +#define EOP_TC_ACTION_EN (1 << 17) /* L2 */ +#define EOP_TC_MD_ACTION_EN (1 << 21) /* L2 metadata */ + +#define DATA_SEL(x) ((x) << 29) + /* 0 - discard + * 1 - send low 32bit data + * 2 - send 64bit data + * 3 - send 64bit GPU counter value + * 4 - send 64bit sys counter value + */ +#define INT_SEL(x) ((x) << 24) + /* 0 - none + * 1 - interrupt only (DATA_SEL = 0) + * 2 - interrupt when data write is confirmed + */ +#define DST_SEL(x) ((x) << 16) + /* 0 - MC + * 1 - TC/L2 + */ + + + +#define PACKET3_PREAMBLE_CNTL 0x4A +# define PACKET3_PREAMBLE_BEGIN_CLEAR_STATE (2 << 28) +# define PACKET3_PREAMBLE_END_CLEAR_STATE (3 << 28) +#define PACKET3_DMA_DATA 0x50 +/* 1. header + * 2. CONTROL + * 3. SRC_ADDR_LO or DATA [31:0] + * 4. SRC_ADDR_HI [31:0] + * 5. DST_ADDR_LO [31:0] + * 6. DST_ADDR_HI [7:0] + * 7. COMMAND [30:21] | BYTE_COUNT [20:0] + */ +/* CONTROL */ +# define PACKET3_DMA_DATA_ENGINE(x) ((x) << 0) + /* 0 - ME + * 1 - PFP + */ +# define PACKET3_DMA_DATA_SRC_CACHE_POLICY(x) ((x) << 13) + /* 0 - LRU + * 1 - Stream + */ +# define PACKET3_DMA_DATA_DST_SEL(x) ((x) << 20) + /* 0 - DST_ADDR using DAS + * 1 - GDS + * 3 - DST_ADDR using L2 + */ +# define PACKET3_DMA_DATA_DST_CACHE_POLICY(x) ((x) << 25) + /* 0 - LRU + * 1 - Stream + */ +# define PACKET3_DMA_DATA_SRC_SEL(x) ((x) << 29) + /* 0 - SRC_ADDR using SAS + * 1 - GDS + * 2 - DATA + * 3 - SRC_ADDR using L2 + */ +# define PACKET3_DMA_DATA_CP_SYNC (1 << 31) +/* COMMAND */ +# define PACKET3_DMA_DATA_CMD_SAS (1 << 26) + /* 0 - memory + * 1 - register + */ +# define PACKET3_DMA_DATA_CMD_DAS (1 << 27) + /* 0 - memory + * 1 - register + */ +# define PACKET3_DMA_DATA_CMD_SAIC (1 << 28) +# define PACKET3_DMA_DATA_CMD_DAIC (1 << 29) +# define PACKET3_DMA_DATA_CMD_RAW_WAIT (1 << 30) +#define PACKET3_AQUIRE_MEM 0x58 +#define PACKET3_REWIND 0x59 +#define PACKET3_LOAD_UCONFIG_REG 0x5E +#define PACKET3_LOAD_SH_REG 0x5F +#define PACKET3_LOAD_CONFIG_REG 0x60 +#define PACKET3_LOAD_CONTEXT_REG 0x61 +#define PACKET3_SET_CONFIG_REG 0x68 +#define PACKET3_SET_CONFIG_REG_START 0x00002000 +#define PACKET3_SET_CONFIG_REG_END 0x00002c00 +#define PACKET3_SET_CONTEXT_REG 0x69 +#define PACKET3_SET_CONTEXT_REG_START 0x0000a000 +#define PACKET3_SET_CONTEXT_REG_END 0x0000a400 +#define PACKET3_SET_CONTEXT_REG_INDIRECT 0x73 +#define PACKET3_SET_SH_REG 0x76 +#define PACKET3_SET_SH_REG_START 0x00002c00 +#define PACKET3_SET_SH_REG_END 0x00003000 +#define PACKET3_SET_SH_REG_OFFSET 0x77 +#define PACKET3_SET_QUEUE_REG 0x78 +#define PACKET3_SET_UCONFIG_REG 0x79 +#define PACKET3_SET_UCONFIG_REG_START 0x0000c000 +#define PACKET3_SET_UCONFIG_REG_END 0x0000c400 +#define PACKET3_SCRATCH_RAM_WRITE 0x7D +#define PACKET3_SCRATCH_RAM_READ 0x7E +#define PACKET3_LOAD_CONST_RAM 0x80 +#define PACKET3_WRITE_CONST_RAM 0x81 +#define PACKET3_DUMP_CONST_RAM 0x83 +#define PACKET3_INCREMENT_CE_COUNTER 0x84 +#define PACKET3_INCREMENT_DE_COUNTER 0x85 +#define PACKET3_WAIT_ON_CE_COUNTER 0x86 +#define PACKET3_WAIT_ON_DE_COUNTER_DIFF 0x88 +#define PACKET3_SWITCH_BUFFER 0x8B +#define PACKET3_SET_RESOURCES 0xA0 +#define PACKET3_MAP_QUEUES 0xA2 + +#define VCE_CMD_NO_OP 0x00000000 +#define VCE_CMD_END 0x00000001 +#define VCE_CMD_IB 0x00000002 +#define VCE_CMD_FENCE 0x00000003 +#define VCE_CMD_TRAP 0x00000004 +#define VCE_CMD_IB_AUTO 0x00000005 +#define VCE_CMD_SEMAPHORE 0x00000006 + +#define VCE_CMD_IB_VM 0x00000102 +#define VCE_CMD_WAIT_GE 0x00000106 +#define VCE_CMD_UPDATE_PTB 0x00000107 +#define VCE_CMD_FLUSH_TLB 0x00000108 +#define VCE_CMD_REG_WRITE 0x00000109 +#define VCE_CMD_REG_WAIT 0x0000010a + +#define HEVC_ENC_CMD_NO_OP 0x00000000 +#define HEVC_ENC_CMD_END 0x00000001 +#define HEVC_ENC_CMD_FENCE 0x00000003 +#define HEVC_ENC_CMD_TRAP 0x00000004 +#define HEVC_ENC_CMD_IB_VM 0x00000102 +#define HEVC_ENC_CMD_REG_WRITE 0x00000109 +#define HEVC_ENC_CMD_REG_WAIT 0x0000010a + +#endif diff --git a/drivers/gpu/drm/amd/amdgpu/tonga_ih.c b/drivers/gpu/drm/amd/amdgpu/tonga_ih.c index 52b71ee58793..3a5097ac2bb4 100644 --- a/drivers/gpu/drm/amd/amdgpu/tonga_ih.c +++ b/drivers/gpu/drm/amd/amdgpu/tonga_ih.c @@ -238,8 +238,9 @@ static void tonga_ih_decode_iv(struct amdgpu_device *adev, dw[2] = le32_to_cpu(adev->irq.ih.ring[ring_index + 2]); dw[3] = le32_to_cpu(adev->irq.ih.ring[ring_index + 3]); + entry->client_id = AMDGPU_IH_CLIENTID_LEGACY; entry->src_id = dw[0] & 0xff; - entry->src_data = dw[1] & 0xfffffff; + entry->src_data[0] = dw[1] & 0xfffffff; entry->ring_id = dw[2] & 0xff; entry->vm_id = (dw[2] >> 8) & 0xff; entry->pas_id = (dw[2] >> 16) & 0xffff; @@ -288,7 +289,7 @@ static int tonga_ih_sw_init(void *handle) int r; struct amdgpu_device *adev = (struct amdgpu_device *)handle; - r = amdgpu_ih_ring_init(adev, 4 * 1024, true); + r = amdgpu_ih_ring_init(adev, 64 * 1024, true); if (r) return r; diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v4_2.c b/drivers/gpu/drm/amd/amdgpu/uvd_v4_2.c index b34cefc7ebd5..8ab0f78794a5 100644 --- a/drivers/gpu/drm/amd/amdgpu/uvd_v4_2.c +++ b/drivers/gpu/drm/amd/amdgpu/uvd_v4_2.c @@ -55,7 +55,7 @@ static void uvd_v4_2_set_dcm(struct amdgpu_device *adev, * * Returns the current hardware read pointer */ -static uint32_t uvd_v4_2_ring_get_rptr(struct amdgpu_ring *ring) +static uint64_t uvd_v4_2_ring_get_rptr(struct amdgpu_ring *ring) { struct amdgpu_device *adev = ring->adev; @@ -69,7 +69,7 @@ static uint32_t uvd_v4_2_ring_get_rptr(struct amdgpu_ring *ring) * * Returns the current hardware write pointer */ -static uint32_t uvd_v4_2_ring_get_wptr(struct amdgpu_ring *ring) +static uint64_t uvd_v4_2_ring_get_wptr(struct amdgpu_ring *ring) { struct amdgpu_device *adev = ring->adev; @@ -87,7 +87,7 @@ static void uvd_v4_2_ring_set_wptr(struct amdgpu_ring *ring) { struct amdgpu_device *adev = ring->adev; - WREG32(mmUVD_RBC_RB_WPTR, ring->wptr); + WREG32(mmUVD_RBC_RB_WPTR, lower_32_bits(ring->wptr)); } static int uvd_v4_2_early_init(void *handle) @@ -107,7 +107,7 @@ static int uvd_v4_2_sw_init(void *handle) int r; /* UVD TRAP */ - r = amdgpu_irq_add_id(adev, 124, &adev->uvd.irq); + r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 124, &adev->uvd.irq); if (r) return r; @@ -135,12 +135,9 @@ static int uvd_v4_2_sw_fini(void *handle) if (r) return r; - r = amdgpu_uvd_sw_fini(adev); - if (r) - return r; - - return r; + return amdgpu_uvd_sw_fini(adev); } + static void uvd_v4_2_enable_mgcg(struct amdgpu_device *adev, bool enable); /** @@ -230,11 +227,7 @@ static int uvd_v4_2_suspend(void *handle) if (r) return r; - r = amdgpu_uvd_suspend(adev); - if (r) - return r; - - return r; + return amdgpu_uvd_suspend(adev); } static int uvd_v4_2_resume(void *handle) @@ -246,11 +239,7 @@ static int uvd_v4_2_resume(void *handle) if (r) return r; - r = uvd_v4_2_hw_init(adev); - if (r) - return r; - - return r; + return uvd_v4_2_hw_init(adev); } /** @@ -367,7 +356,7 @@ static int uvd_v4_2_start(struct amdgpu_device *adev) WREG32(mmUVD_RBC_RB_RPTR, 0x0); ring->wptr = RREG32(mmUVD_RBC_RB_RPTR); - WREG32(mmUVD_RBC_RB_WPTR, ring->wptr); + WREG32(mmUVD_RBC_RB_WPTR, lower_32_bits(ring->wptr)); /* set the ring address */ WREG32(mmUVD_RBC_RB_BASE, ring->gpu_addr); @@ -770,6 +759,7 @@ static const struct amdgpu_ring_funcs uvd_v4_2_ring_funcs = { .type = AMDGPU_RING_TYPE_UVD, .align_mask = 0xf, .nop = PACKET0(mmUVD_NO_OP, 0), + .support_64bit_ptrs = false, .get_rptr = uvd_v4_2_ring_get_rptr, .get_wptr = uvd_v4_2_ring_get_wptr, .set_wptr = uvd_v4_2_ring_set_wptr, diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v5_0.c b/drivers/gpu/drm/amd/amdgpu/uvd_v5_0.c index ad8c02e423d4..bb6d46e168a3 100644 --- a/drivers/gpu/drm/amd/amdgpu/uvd_v5_0.c +++ b/drivers/gpu/drm/amd/amdgpu/uvd_v5_0.c @@ -51,7 +51,7 @@ static void uvd_v5_0_enable_mgcg(struct amdgpu_device *adev, * * Returns the current hardware read pointer */ -static uint32_t uvd_v5_0_ring_get_rptr(struct amdgpu_ring *ring) +static uint64_t uvd_v5_0_ring_get_rptr(struct amdgpu_ring *ring) { struct amdgpu_device *adev = ring->adev; @@ -65,7 +65,7 @@ static uint32_t uvd_v5_0_ring_get_rptr(struct amdgpu_ring *ring) * * Returns the current hardware write pointer */ -static uint32_t uvd_v5_0_ring_get_wptr(struct amdgpu_ring *ring) +static uint64_t uvd_v5_0_ring_get_wptr(struct amdgpu_ring *ring) { struct amdgpu_device *adev = ring->adev; @@ -83,7 +83,7 @@ static void uvd_v5_0_ring_set_wptr(struct amdgpu_ring *ring) { struct amdgpu_device *adev = ring->adev; - WREG32(mmUVD_RBC_RB_WPTR, ring->wptr); + WREG32(mmUVD_RBC_RB_WPTR, lower_32_bits(ring->wptr)); } static int uvd_v5_0_early_init(void *handle) @@ -103,7 +103,7 @@ static int uvd_v5_0_sw_init(void *handle) int r; /* UVD TRAP */ - r = amdgpu_irq_add_id(adev, 124, &adev->uvd.irq); + r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 124, &adev->uvd.irq); if (r) return r; @@ -131,11 +131,7 @@ static int uvd_v5_0_sw_fini(void *handle) if (r) return r; - r = amdgpu_uvd_sw_fini(adev); - if (r) - return r; - - return r; + return amdgpu_uvd_sw_fini(adev); } /** @@ -228,11 +224,7 @@ static int uvd_v5_0_suspend(void *handle) return r; uvd_v5_0_set_clockgating_state(adev, AMD_CG_STATE_GATE); - r = amdgpu_uvd_suspend(adev); - if (r) - return r; - - return r; + return amdgpu_uvd_suspend(adev); } static int uvd_v5_0_resume(void *handle) @@ -244,11 +236,7 @@ static int uvd_v5_0_resume(void *handle) if (r) return r; - r = uvd_v5_0_hw_init(adev); - if (r) - return r; - - return r; + return uvd_v5_0_hw_init(adev); } /** @@ -424,7 +412,7 @@ static int uvd_v5_0_start(struct amdgpu_device *adev) WREG32(mmUVD_RBC_RB_RPTR, 0); ring->wptr = RREG32(mmUVD_RBC_RB_RPTR); - WREG32(mmUVD_RBC_RB_WPTR, ring->wptr); + WREG32(mmUVD_RBC_RB_WPTR, lower_32_bits(ring->wptr)); WREG32_P(mmUVD_RBC_RB_CNTL, 0, ~UVD_RBC_RB_CNTL__RB_NO_FETCH_MASK); @@ -879,6 +867,7 @@ static const struct amdgpu_ring_funcs uvd_v5_0_ring_funcs = { .type = AMDGPU_RING_TYPE_UVD, .align_mask = 0xf, .nop = PACKET0(mmUVD_NO_OP, 0), + .support_64bit_ptrs = false, .get_rptr = uvd_v5_0_ring_get_rptr, .get_wptr = uvd_v5_0_ring_get_wptr, .set_wptr = uvd_v5_0_ring_set_wptr, diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c b/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c index 18a6de4e1512..31db356476f8 100644 --- a/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c +++ b/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c @@ -54,7 +54,7 @@ static void uvd_v6_0_enable_mgcg(struct amdgpu_device *adev, * * Returns the current hardware read pointer */ -static uint32_t uvd_v6_0_ring_get_rptr(struct amdgpu_ring *ring) +static uint64_t uvd_v6_0_ring_get_rptr(struct amdgpu_ring *ring) { struct amdgpu_device *adev = ring->adev; @@ -68,7 +68,7 @@ static uint32_t uvd_v6_0_ring_get_rptr(struct amdgpu_ring *ring) * * Returns the current hardware write pointer */ -static uint32_t uvd_v6_0_ring_get_wptr(struct amdgpu_ring *ring) +static uint64_t uvd_v6_0_ring_get_wptr(struct amdgpu_ring *ring) { struct amdgpu_device *adev = ring->adev; @@ -86,7 +86,7 @@ static void uvd_v6_0_ring_set_wptr(struct amdgpu_ring *ring) { struct amdgpu_device *adev = ring->adev; - WREG32(mmUVD_RBC_RB_WPTR, ring->wptr); + WREG32(mmUVD_RBC_RB_WPTR, lower_32_bits(ring->wptr)); } static int uvd_v6_0_early_init(void *handle) @@ -106,7 +106,7 @@ static int uvd_v6_0_sw_init(void *handle) struct amdgpu_device *adev = (struct amdgpu_device *)handle; /* UVD TRAP */ - r = amdgpu_irq_add_id(adev, 124, &adev->uvd.irq); + r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 124, &adev->uvd.irq); if (r) return r; @@ -134,11 +134,7 @@ static int uvd_v6_0_sw_fini(void *handle) if (r) return r; - r = amdgpu_uvd_sw_fini(adev); - if (r) - return r; - - return r; + return amdgpu_uvd_sw_fini(adev); } /** @@ -230,11 +226,8 @@ static int uvd_v6_0_suspend(void *handle) return r; /* Skip this for APU for now */ - if (!(adev->flags & AMD_IS_APU)) { + if (!(adev->flags & AMD_IS_APU)) r = amdgpu_uvd_suspend(adev); - if (r) - return r; - } return r; } @@ -250,11 +243,7 @@ static int uvd_v6_0_resume(void *handle) if (r) return r; } - r = uvd_v6_0_hw_init(adev); - if (r) - return r; - - return r; + return uvd_v6_0_hw_init(adev); } /** @@ -521,7 +510,7 @@ static int uvd_v6_0_start(struct amdgpu_device *adev) WREG32(mmUVD_RBC_RB_RPTR, 0); ring->wptr = RREG32(mmUVD_RBC_RB_RPTR); - WREG32(mmUVD_RBC_RB_WPTR, ring->wptr); + WREG32(mmUVD_RBC_RB_WPTR, lower_32_bits(ring->wptr)); WREG32_FIELD(UVD_RBC_RB_CNTL, RB_NO_FETCH, 0); @@ -1068,8 +1057,12 @@ static void uvd_v6_0_get_clockgating_state(void *handle, u32 *flags) mutex_lock(&adev->pm.mutex); - if (RREG32_SMC(ixCURRENT_PG_STATUS) & - CURRENT_PG_STATUS__UVD_PG_STATUS_MASK) { + if (adev->flags & AMD_IS_APU) + data = RREG32_SMC(ixCURRENT_PG_STATUS_APU); + else + data = RREG32_SMC(ixCURRENT_PG_STATUS); + + if (data & CURRENT_PG_STATUS__UVD_PG_STATUS_MASK) { DRM_INFO("Cannot get clockgating state when UVD is powergated.\n"); goto out; } @@ -1108,6 +1101,7 @@ static const struct amdgpu_ring_funcs uvd_v6_0_ring_phys_funcs = { .type = AMDGPU_RING_TYPE_UVD, .align_mask = 0xf, .nop = PACKET0(mmUVD_NO_OP, 0), + .support_64bit_ptrs = false, .get_rptr = uvd_v6_0_ring_get_rptr, .get_wptr = uvd_v6_0_ring_get_wptr, .set_wptr = uvd_v6_0_ring_set_wptr, @@ -1134,6 +1128,7 @@ static const struct amdgpu_ring_funcs uvd_v6_0_ring_vm_funcs = { .type = AMDGPU_RING_TYPE_UVD, .align_mask = 0xf, .nop = PACKET0(mmUVD_NO_OP, 0), + .support_64bit_ptrs = false, .get_rptr = uvd_v6_0_ring_get_rptr, .get_wptr = uvd_v6_0_ring_get_wptr, .set_wptr = uvd_v6_0_ring_set_wptr, diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c b/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c new file mode 100644 index 000000000000..9bcf01469282 --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c @@ -0,0 +1,1532 @@ +/* + * Copyright 2016 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#include <linux/firmware.h> +#include <drm/drmP.h> +#include "amdgpu.h" +#include "amdgpu_uvd.h" +#include "soc15d.h" +#include "soc15_common.h" + +#include "vega10/soc15ip.h" +#include "vega10/UVD/uvd_7_0_offset.h" +#include "vega10/UVD/uvd_7_0_sh_mask.h" +#include "vega10/NBIF/nbif_6_1_offset.h" +#include "vega10/HDP/hdp_4_0_offset.h" +#include "vega10/MMHUB/mmhub_1_0_offset.h" +#include "vega10/MMHUB/mmhub_1_0_sh_mask.h" + +static void uvd_v7_0_set_ring_funcs(struct amdgpu_device *adev); +static void uvd_v7_0_set_enc_ring_funcs(struct amdgpu_device *adev); +static void uvd_v7_0_set_irq_funcs(struct amdgpu_device *adev); +static int uvd_v7_0_start(struct amdgpu_device *adev); +static void uvd_v7_0_stop(struct amdgpu_device *adev); + +/** + * uvd_v7_0_ring_get_rptr - get read pointer + * + * @ring: amdgpu_ring pointer + * + * Returns the current hardware read pointer + */ +static uint64_t uvd_v7_0_ring_get_rptr(struct amdgpu_ring *ring) +{ + struct amdgpu_device *adev = ring->adev; + + return RREG32(SOC15_REG_OFFSET(UVD, 0, mmUVD_RBC_RB_RPTR)); +} + +/** + * uvd_v7_0_enc_ring_get_rptr - get enc read pointer + * + * @ring: amdgpu_ring pointer + * + * Returns the current hardware enc read pointer + */ +static uint64_t uvd_v7_0_enc_ring_get_rptr(struct amdgpu_ring *ring) +{ + struct amdgpu_device *adev = ring->adev; + + if (ring == &adev->uvd.ring_enc[0]) + return RREG32(SOC15_REG_OFFSET(UVD, 0, mmUVD_RB_RPTR)); + else + return RREG32(SOC15_REG_OFFSET(UVD, 0, mmUVD_RB_RPTR2)); +} + +/** + * uvd_v7_0_ring_get_wptr - get write pointer + * + * @ring: amdgpu_ring pointer + * + * Returns the current hardware write pointer + */ +static uint64_t uvd_v7_0_ring_get_wptr(struct amdgpu_ring *ring) +{ + struct amdgpu_device *adev = ring->adev; + + return RREG32(SOC15_REG_OFFSET(UVD, 0, mmUVD_RBC_RB_WPTR)); +} + +/** + * uvd_v7_0_enc_ring_get_wptr - get enc write pointer + * + * @ring: amdgpu_ring pointer + * + * Returns the current hardware enc write pointer + */ +static uint64_t uvd_v7_0_enc_ring_get_wptr(struct amdgpu_ring *ring) +{ + struct amdgpu_device *adev = ring->adev; + + if (ring == &adev->uvd.ring_enc[0]) + return RREG32(SOC15_REG_OFFSET(UVD, 0, mmUVD_RB_WPTR)); + else + return RREG32(SOC15_REG_OFFSET(UVD, 0, mmUVD_RB_WPTR2)); +} + +/** + * uvd_v7_0_ring_set_wptr - set write pointer + * + * @ring: amdgpu_ring pointer + * + * Commits the write pointer to the hardware + */ +static void uvd_v7_0_ring_set_wptr(struct amdgpu_ring *ring) +{ + struct amdgpu_device *adev = ring->adev; + + WREG32(SOC15_REG_OFFSET(UVD, 0, mmUVD_RBC_RB_WPTR), lower_32_bits(ring->wptr)); +} + +/** + * uvd_v7_0_enc_ring_set_wptr - set enc write pointer + * + * @ring: amdgpu_ring pointer + * + * Commits the enc write pointer to the hardware + */ +static void uvd_v7_0_enc_ring_set_wptr(struct amdgpu_ring *ring) +{ + struct amdgpu_device *adev = ring->adev; + + if (ring == &adev->uvd.ring_enc[0]) + WREG32(SOC15_REG_OFFSET(UVD, 0, mmUVD_RB_WPTR), + lower_32_bits(ring->wptr)); + else + WREG32(SOC15_REG_OFFSET(UVD, 0, mmUVD_RB_WPTR2), + lower_32_bits(ring->wptr)); +} + +/** + * uvd_v7_0_enc_ring_test_ring - test if UVD ENC ring is working + * + * @ring: the engine to test on + * + */ +static int uvd_v7_0_enc_ring_test_ring(struct amdgpu_ring *ring) +{ + struct amdgpu_device *adev = ring->adev; + uint32_t rptr = amdgpu_ring_get_rptr(ring); + unsigned i; + int r; + + r = amdgpu_ring_alloc(ring, 16); + if (r) { + DRM_ERROR("amdgpu: uvd enc failed to lock ring %d (%d).\n", + ring->idx, r); + return r; + } + amdgpu_ring_write(ring, HEVC_ENC_CMD_END); + amdgpu_ring_commit(ring); + + for (i = 0; i < adev->usec_timeout; i++) { + if (amdgpu_ring_get_rptr(ring) != rptr) + break; + DRM_UDELAY(1); + } + + if (i < adev->usec_timeout) { + DRM_INFO("ring test on %d succeeded in %d usecs\n", + ring->idx, i); + } else { + DRM_ERROR("amdgpu: ring %d test failed\n", + ring->idx); + r = -ETIMEDOUT; + } + + return r; +} + +/** + * uvd_v7_0_enc_get_create_msg - generate a UVD ENC create msg + * + * @adev: amdgpu_device pointer + * @ring: ring we should submit the msg to + * @handle: session handle to use + * @fence: optional fence to return + * + * Open up a stream for HW test + */ +static int uvd_v7_0_enc_get_create_msg(struct amdgpu_ring *ring, uint32_t handle, + struct dma_fence **fence) +{ + const unsigned ib_size_dw = 16; + struct amdgpu_job *job; + struct amdgpu_ib *ib; + struct dma_fence *f = NULL; + uint64_t dummy; + int i, r; + + r = amdgpu_job_alloc_with_ib(ring->adev, ib_size_dw * 4, &job); + if (r) + return r; + + ib = &job->ibs[0]; + dummy = ib->gpu_addr + 1024; + + ib->length_dw = 0; + ib->ptr[ib->length_dw++] = 0x00000018; + ib->ptr[ib->length_dw++] = 0x00000001; /* session info */ + ib->ptr[ib->length_dw++] = handle; + ib->ptr[ib->length_dw++] = 0x00000000; + ib->ptr[ib->length_dw++] = upper_32_bits(dummy); + ib->ptr[ib->length_dw++] = dummy; + + ib->ptr[ib->length_dw++] = 0x00000014; + ib->ptr[ib->length_dw++] = 0x00000002; /* task info */ + ib->ptr[ib->length_dw++] = 0x0000001c; + ib->ptr[ib->length_dw++] = 0x00000000; + ib->ptr[ib->length_dw++] = 0x00000000; + + ib->ptr[ib->length_dw++] = 0x00000008; + ib->ptr[ib->length_dw++] = 0x08000001; /* op initialize */ + + for (i = ib->length_dw; i < ib_size_dw; ++i) + ib->ptr[i] = 0x0; + + r = amdgpu_ib_schedule(ring, 1, ib, NULL, &f); + job->fence = dma_fence_get(f); + if (r) + goto err; + + amdgpu_job_free(job); + if (fence) + *fence = dma_fence_get(f); + dma_fence_put(f); + return 0; + +err: + amdgpu_job_free(job); + return r; +} + +/** + * uvd_v7_0_enc_get_destroy_msg - generate a UVD ENC destroy msg + * + * @adev: amdgpu_device pointer + * @ring: ring we should submit the msg to + * @handle: session handle to use + * @fence: optional fence to return + * + * Close up a stream for HW test or if userspace failed to do so + */ +int uvd_v7_0_enc_get_destroy_msg(struct amdgpu_ring *ring, uint32_t handle, + bool direct, struct dma_fence **fence) +{ + const unsigned ib_size_dw = 16; + struct amdgpu_job *job; + struct amdgpu_ib *ib; + struct dma_fence *f = NULL; + uint64_t dummy; + int i, r; + + r = amdgpu_job_alloc_with_ib(ring->adev, ib_size_dw * 4, &job); + if (r) + return r; + + ib = &job->ibs[0]; + dummy = ib->gpu_addr + 1024; + + ib->length_dw = 0; + ib->ptr[ib->length_dw++] = 0x00000018; + ib->ptr[ib->length_dw++] = 0x00000001; + ib->ptr[ib->length_dw++] = handle; + ib->ptr[ib->length_dw++] = 0x00000000; + ib->ptr[ib->length_dw++] = upper_32_bits(dummy); + ib->ptr[ib->length_dw++] = dummy; + + ib->ptr[ib->length_dw++] = 0x00000014; + ib->ptr[ib->length_dw++] = 0x00000002; + ib->ptr[ib->length_dw++] = 0x0000001c; + ib->ptr[ib->length_dw++] = 0x00000000; + ib->ptr[ib->length_dw++] = 0x00000000; + + ib->ptr[ib->length_dw++] = 0x00000008; + ib->ptr[ib->length_dw++] = 0x08000002; /* op close session */ + + for (i = ib->length_dw; i < ib_size_dw; ++i) + ib->ptr[i] = 0x0; + + if (direct) { + r = amdgpu_ib_schedule(ring, 1, ib, NULL, &f); + job->fence = dma_fence_get(f); + if (r) + goto err; + + amdgpu_job_free(job); + } else { + r = amdgpu_job_submit(job, ring, &ring->adev->vce.entity, + AMDGPU_FENCE_OWNER_UNDEFINED, &f); + if (r) + goto err; + } + + if (fence) + *fence = dma_fence_get(f); + dma_fence_put(f); + return 0; + +err: + amdgpu_job_free(job); + return r; +} + +/** + * uvd_v7_0_enc_ring_test_ib - test if UVD ENC IBs are working + * + * @ring: the engine to test on + * + */ +static int uvd_v7_0_enc_ring_test_ib(struct amdgpu_ring *ring, long timeout) +{ + struct dma_fence *fence = NULL; + long r; + + r = uvd_v7_0_enc_get_create_msg(ring, 1, NULL); + if (r) { + DRM_ERROR("amdgpu: failed to get create msg (%ld).\n", r); + goto error; + } + + r = uvd_v7_0_enc_get_destroy_msg(ring, 1, true, &fence); + if (r) { + DRM_ERROR("amdgpu: failed to get destroy ib (%ld).\n", r); + goto error; + } + + r = dma_fence_wait_timeout(fence, false, timeout); + if (r == 0) { + DRM_ERROR("amdgpu: IB test timed out.\n"); + r = -ETIMEDOUT; + } else if (r < 0) { + DRM_ERROR("amdgpu: fence wait failed (%ld).\n", r); + } else { + DRM_INFO("ib test on ring %d succeeded\n", ring->idx); + r = 0; + } +error: + dma_fence_put(fence); + return r; +} + +static int uvd_v7_0_early_init(void *handle) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + + adev->uvd.num_enc_rings = 2; + uvd_v7_0_set_ring_funcs(adev); + uvd_v7_0_set_enc_ring_funcs(adev); + uvd_v7_0_set_irq_funcs(adev); + + return 0; +} + +static int uvd_v7_0_sw_init(void *handle) +{ + struct amdgpu_ring *ring; + struct amd_sched_rq *rq; + int i, r; + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + + /* UVD TRAP */ + r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_UVD, 124, &adev->uvd.irq); + if (r) + return r; + + /* UVD ENC TRAP */ + for (i = 0; i < adev->uvd.num_enc_rings; ++i) { + r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_UVD, i + 119, &adev->uvd.irq); + if (r) + return r; + } + + r = amdgpu_uvd_sw_init(adev); + if (r) + return r; + + if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) { + const struct common_firmware_header *hdr; + hdr = (const struct common_firmware_header *)adev->uvd.fw->data; + adev->firmware.ucode[AMDGPU_UCODE_ID_UVD].ucode_id = AMDGPU_UCODE_ID_UVD; + adev->firmware.ucode[AMDGPU_UCODE_ID_UVD].fw = adev->uvd.fw; + adev->firmware.fw_size += + ALIGN(le32_to_cpu(hdr->ucode_size_bytes), PAGE_SIZE); + DRM_INFO("PSP loading UVD firmware\n"); + } + + ring = &adev->uvd.ring_enc[0]; + rq = &ring->sched.sched_rq[AMD_SCHED_PRIORITY_NORMAL]; + r = amd_sched_entity_init(&ring->sched, &adev->uvd.entity_enc, + rq, amdgpu_sched_jobs); + if (r) { + DRM_ERROR("Failed setting up UVD ENC run queue.\n"); + return r; + } + + r = amdgpu_uvd_resume(adev); + if (r) + return r; + + ring = &adev->uvd.ring; + sprintf(ring->name, "uvd"); + r = amdgpu_ring_init(adev, ring, 512, &adev->uvd.irq, 0); + if (r) + return r; + + for (i = 0; i < adev->uvd.num_enc_rings; ++i) { + ring = &adev->uvd.ring_enc[i]; + sprintf(ring->name, "uvd_enc%d", i); + r = amdgpu_ring_init(adev, ring, 512, &adev->uvd.irq, 0); + if (r) + return r; + } + + return r; +} + +static int uvd_v7_0_sw_fini(void *handle) +{ + int i, r; + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + + r = amdgpu_uvd_suspend(adev); + if (r) + return r; + + amd_sched_entity_fini(&adev->uvd.ring_enc[0].sched, &adev->uvd.entity_enc); + + for (i = 0; i < adev->uvd.num_enc_rings; ++i) + amdgpu_ring_fini(&adev->uvd.ring_enc[i]); + + return amdgpu_uvd_sw_fini(adev); +} + +/** + * uvd_v7_0_hw_init - start and test UVD block + * + * @adev: amdgpu_device pointer + * + * Initialize the hardware, boot up the VCPU and do some testing + */ +static int uvd_v7_0_hw_init(void *handle) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_ring *ring = &adev->uvd.ring; + uint32_t tmp; + int i, r; + + r = uvd_v7_0_start(adev); + if (r) + goto done; + + ring->ready = true; + r = amdgpu_ring_test_ring(ring); + if (r) { + ring->ready = false; + goto done; + } + + r = amdgpu_ring_alloc(ring, 10); + if (r) { + DRM_ERROR("amdgpu: ring failed to lock UVD ring (%d).\n", r); + goto done; + } + + tmp = PACKET0(SOC15_REG_OFFSET(UVD, 0, + mmUVD_SEMA_WAIT_FAULT_TIMEOUT_CNTL), 0); + amdgpu_ring_write(ring, tmp); + amdgpu_ring_write(ring, 0xFFFFF); + + tmp = PACKET0(SOC15_REG_OFFSET(UVD, 0, + mmUVD_SEMA_WAIT_INCOMPLETE_TIMEOUT_CNTL), 0); + amdgpu_ring_write(ring, tmp); + amdgpu_ring_write(ring, 0xFFFFF); + + tmp = PACKET0(SOC15_REG_OFFSET(UVD, 0, + mmUVD_SEMA_SIGNAL_INCOMPLETE_TIMEOUT_CNTL), 0); + amdgpu_ring_write(ring, tmp); + amdgpu_ring_write(ring, 0xFFFFF); + + /* Clear timeout status bits */ + amdgpu_ring_write(ring, PACKET0(SOC15_REG_OFFSET(UVD, 0, + mmUVD_SEMA_TIMEOUT_STATUS), 0)); + amdgpu_ring_write(ring, 0x8); + + amdgpu_ring_write(ring, PACKET0(SOC15_REG_OFFSET(UVD, 0, + mmUVD_SEMA_CNTL), 0)); + amdgpu_ring_write(ring, 3); + + amdgpu_ring_commit(ring); + + for (i = 0; i < adev->uvd.num_enc_rings; ++i) { + ring = &adev->uvd.ring_enc[i]; + ring->ready = true; + r = amdgpu_ring_test_ring(ring); + if (r) { + ring->ready = false; + goto done; + } + } + +done: + if (!r) + DRM_INFO("UVD and UVD ENC initialized successfully.\n"); + + return r; +} + +/** + * uvd_v7_0_hw_fini - stop the hardware block + * + * @adev: amdgpu_device pointer + * + * Stop the UVD block, mark ring as not ready any more + */ +static int uvd_v7_0_hw_fini(void *handle) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_ring *ring = &adev->uvd.ring; + + uvd_v7_0_stop(adev); + ring->ready = false; + + return 0; +} + +static int uvd_v7_0_suspend(void *handle) +{ + int r; + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + + r = uvd_v7_0_hw_fini(adev); + if (r) + return r; + + /* Skip this for APU for now */ + if (!(adev->flags & AMD_IS_APU)) + r = amdgpu_uvd_suspend(adev); + + return r; +} + +static int uvd_v7_0_resume(void *handle) +{ + int r; + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + + /* Skip this for APU for now */ + if (!(adev->flags & AMD_IS_APU)) { + r = amdgpu_uvd_resume(adev); + if (r) + return r; + } + return uvd_v7_0_hw_init(adev); +} + +/** + * uvd_v7_0_mc_resume - memory controller programming + * + * @adev: amdgpu_device pointer + * + * Let the UVD memory controller know it's offsets + */ +static void uvd_v7_0_mc_resume(struct amdgpu_device *adev) +{ + uint32_t size = AMDGPU_GPU_PAGE_ALIGN(adev->uvd.fw->size + 4); + uint32_t offset; + + if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) { + WREG32(SOC15_REG_OFFSET(UVD, 0, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW), + lower_32_bits(adev->firmware.ucode[AMDGPU_UCODE_ID_UVD].mc_addr)); + WREG32(SOC15_REG_OFFSET(UVD, 0, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH), + upper_32_bits(adev->firmware.ucode[AMDGPU_UCODE_ID_UVD].mc_addr)); + offset = 0; + } else { + WREG32(SOC15_REG_OFFSET(UVD, 0, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW), + lower_32_bits(adev->uvd.gpu_addr)); + WREG32(SOC15_REG_OFFSET(UVD, 0, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH), + upper_32_bits(adev->uvd.gpu_addr)); + offset = size; + } + + WREG32(SOC15_REG_OFFSET(UVD, 0, mmUVD_VCPU_CACHE_OFFSET0), + AMDGPU_UVD_FIRMWARE_OFFSET >> 3); + WREG32(SOC15_REG_OFFSET(UVD, 0, mmUVD_VCPU_CACHE_SIZE0), size); + + WREG32(SOC15_REG_OFFSET(UVD, 0, mmUVD_LMI_VCPU_CACHE1_64BIT_BAR_LOW), + lower_32_bits(adev->uvd.gpu_addr + offset)); + WREG32(SOC15_REG_OFFSET(UVD, 0, mmUVD_LMI_VCPU_CACHE1_64BIT_BAR_HIGH), + upper_32_bits(adev->uvd.gpu_addr + offset)); + WREG32(SOC15_REG_OFFSET(UVD, 0, mmUVD_VCPU_CACHE_OFFSET1), (1 << 21)); + WREG32(SOC15_REG_OFFSET(UVD, 0, mmUVD_VCPU_CACHE_SIZE1), AMDGPU_UVD_HEAP_SIZE); + + WREG32(SOC15_REG_OFFSET(UVD, 0, mmUVD_LMI_VCPU_CACHE2_64BIT_BAR_LOW), + lower_32_bits(adev->uvd.gpu_addr + offset + AMDGPU_UVD_HEAP_SIZE)); + WREG32(SOC15_REG_OFFSET(UVD, 0, mmUVD_LMI_VCPU_CACHE2_64BIT_BAR_HIGH), + upper_32_bits(adev->uvd.gpu_addr + offset + AMDGPU_UVD_HEAP_SIZE)); + WREG32(SOC15_REG_OFFSET(UVD, 0, mmUVD_VCPU_CACHE_OFFSET2), (2 << 21)); + WREG32(SOC15_REG_OFFSET(UVD, 0, mmUVD_VCPU_CACHE_SIZE2), + AMDGPU_UVD_STACK_SIZE + (AMDGPU_UVD_SESSION_SIZE * 40)); + + WREG32(SOC15_REG_OFFSET(UVD, 0, mmUVD_UDEC_ADDR_CONFIG), + adev->gfx.config.gb_addr_config); + WREG32(SOC15_REG_OFFSET(UVD, 0, mmUVD_UDEC_DB_ADDR_CONFIG), + adev->gfx.config.gb_addr_config); + WREG32(SOC15_REG_OFFSET(UVD, 0, mmUVD_UDEC_DBW_ADDR_CONFIG), + adev->gfx.config.gb_addr_config); + + WREG32(SOC15_REG_OFFSET(UVD, 0, mmUVD_GP_SCRATCH4), adev->uvd.max_handles); +} + +/** + * uvd_v7_0_start - start UVD block + * + * @adev: amdgpu_device pointer + * + * Setup and start the UVD block + */ +static int uvd_v7_0_start(struct amdgpu_device *adev) +{ + struct amdgpu_ring *ring = &adev->uvd.ring; + uint32_t rb_bufsz, tmp; + uint32_t lmi_swap_cntl; + uint32_t mp_swap_cntl; + int i, j, r; + + /* disable DPG */ + WREG32_P(SOC15_REG_OFFSET(UVD, 0, mmUVD_POWER_STATUS), 0, + ~UVD_POWER_STATUS__UVD_PG_MODE_MASK); + + /* disable byte swapping */ + lmi_swap_cntl = 0; + mp_swap_cntl = 0; + + uvd_v7_0_mc_resume(adev); + + /* disable clock gating */ + WREG32_P(SOC15_REG_OFFSET(UVD, 0, mmUVD_CGC_CTRL), 0, + ~UVD_CGC_CTRL__DYN_CLOCK_MODE_MASK); + + /* disable interupt */ + WREG32_P(SOC15_REG_OFFSET(UVD, 0, mmUVD_MASTINT_EN), 0, + ~UVD_MASTINT_EN__VCPU_EN_MASK); + + /* stall UMC and register bus before resetting VCPU */ + WREG32_P(SOC15_REG_OFFSET(UVD, 0, mmUVD_LMI_CTRL2), + UVD_LMI_CTRL2__STALL_ARB_UMC_MASK, + ~UVD_LMI_CTRL2__STALL_ARB_UMC_MASK); + mdelay(1); + + /* put LMI, VCPU, RBC etc... into reset */ + WREG32(SOC15_REG_OFFSET(UVD, 0, mmUVD_SOFT_RESET), + UVD_SOFT_RESET__LMI_SOFT_RESET_MASK | + UVD_SOFT_RESET__VCPU_SOFT_RESET_MASK | + UVD_SOFT_RESET__LBSI_SOFT_RESET_MASK | + UVD_SOFT_RESET__RBC_SOFT_RESET_MASK | + UVD_SOFT_RESET__CSM_SOFT_RESET_MASK | + UVD_SOFT_RESET__CXW_SOFT_RESET_MASK | + UVD_SOFT_RESET__TAP_SOFT_RESET_MASK | + UVD_SOFT_RESET__LMI_UMC_SOFT_RESET_MASK); + mdelay(5); + + /* initialize UVD memory controller */ + WREG32(SOC15_REG_OFFSET(UVD, 0, mmUVD_LMI_CTRL), + (0x40 << UVD_LMI_CTRL__WRITE_CLEAN_TIMER__SHIFT) | + UVD_LMI_CTRL__WRITE_CLEAN_TIMER_EN_MASK | + UVD_LMI_CTRL__DATA_COHERENCY_EN_MASK | + UVD_LMI_CTRL__VCPU_DATA_COHERENCY_EN_MASK | + UVD_LMI_CTRL__REQ_MODE_MASK | + 0x00100000L); + +#ifdef __BIG_ENDIAN + /* swap (8 in 32) RB and IB */ + lmi_swap_cntl = 0xa; + mp_swap_cntl = 0; +#endif + WREG32(SOC15_REG_OFFSET(UVD, 0, mmUVD_LMI_SWAP_CNTL), lmi_swap_cntl); + WREG32(SOC15_REG_OFFSET(UVD, 0, mmUVD_MP_SWAP_CNTL), mp_swap_cntl); + + WREG32(SOC15_REG_OFFSET(UVD, 0, mmUVD_MPC_SET_MUXA0), 0x40c2040); + WREG32(SOC15_REG_OFFSET(UVD, 0, mmUVD_MPC_SET_MUXA1), 0x0); + WREG32(SOC15_REG_OFFSET(UVD, 0, mmUVD_MPC_SET_MUXB0), 0x40c2040); + WREG32(SOC15_REG_OFFSET(UVD, 0, mmUVD_MPC_SET_MUXB1), 0x0); + WREG32(SOC15_REG_OFFSET(UVD, 0, mmUVD_MPC_SET_ALU), 0); + WREG32(SOC15_REG_OFFSET(UVD, 0, mmUVD_MPC_SET_MUX), 0x88); + + /* take all subblocks out of reset, except VCPU */ + WREG32(SOC15_REG_OFFSET(UVD, 0, mmUVD_SOFT_RESET), + UVD_SOFT_RESET__VCPU_SOFT_RESET_MASK); + mdelay(5); + + /* enable VCPU clock */ + WREG32(SOC15_REG_OFFSET(UVD, 0, mmUVD_VCPU_CNTL), + UVD_VCPU_CNTL__CLK_EN_MASK); + + /* enable UMC */ + WREG32_P(SOC15_REG_OFFSET(UVD, 0, mmUVD_LMI_CTRL2), 0, + ~UVD_LMI_CTRL2__STALL_ARB_UMC_MASK); + + /* boot up the VCPU */ + WREG32(SOC15_REG_OFFSET(UVD, 0, mmUVD_SOFT_RESET), 0); + mdelay(10); + + for (i = 0; i < 10; ++i) { + uint32_t status; + + for (j = 0; j < 100; ++j) { + status = RREG32(SOC15_REG_OFFSET(UVD, 0, mmUVD_STATUS)); + if (status & 2) + break; + mdelay(10); + } + r = 0; + if (status & 2) + break; + + DRM_ERROR("UVD not responding, trying to reset the VCPU!!!\n"); + WREG32_P(SOC15_REG_OFFSET(UVD, 0, mmUVD_SOFT_RESET), + UVD_SOFT_RESET__VCPU_SOFT_RESET_MASK, + ~UVD_SOFT_RESET__VCPU_SOFT_RESET_MASK); + mdelay(10); + WREG32_P(SOC15_REG_OFFSET(UVD, 0, mmUVD_SOFT_RESET), 0, + ~UVD_SOFT_RESET__VCPU_SOFT_RESET_MASK); + mdelay(10); + r = -1; + } + + if (r) { + DRM_ERROR("UVD not responding, giving up!!!\n"); + return r; + } + /* enable master interrupt */ + WREG32_P(SOC15_REG_OFFSET(UVD, 0, mmUVD_MASTINT_EN), + (UVD_MASTINT_EN__VCPU_EN_MASK|UVD_MASTINT_EN__SYS_EN_MASK), + ~(UVD_MASTINT_EN__VCPU_EN_MASK|UVD_MASTINT_EN__SYS_EN_MASK)); + + /* clear the bit 4 of UVD_STATUS */ + WREG32_P(SOC15_REG_OFFSET(UVD, 0, mmUVD_STATUS), 0, + ~(2 << UVD_STATUS__VCPU_REPORT__SHIFT)); + + /* force RBC into idle state */ + rb_bufsz = order_base_2(ring->ring_size); + tmp = REG_SET_FIELD(0, UVD_RBC_RB_CNTL, RB_BUFSZ, rb_bufsz); + tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_BLKSZ, 1); + tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_NO_FETCH, 1); + tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_WPTR_POLL_EN, 0); + tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_NO_UPDATE, 1); + tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_RPTR_WR_EN, 1); + WREG32(SOC15_REG_OFFSET(UVD, 0, mmUVD_RBC_RB_CNTL), tmp); + + /* set the write pointer delay */ + WREG32(SOC15_REG_OFFSET(UVD, 0, mmUVD_RBC_RB_WPTR_CNTL), 0); + + /* set the wb address */ + WREG32(SOC15_REG_OFFSET(UVD, 0, mmUVD_RBC_RB_RPTR_ADDR), + (upper_32_bits(ring->gpu_addr) >> 2)); + + /* programm the RB_BASE for ring buffer */ + WREG32(SOC15_REG_OFFSET(UVD, 0, mmUVD_LMI_RBC_RB_64BIT_BAR_LOW), + lower_32_bits(ring->gpu_addr)); + WREG32(SOC15_REG_OFFSET(UVD, 0, mmUVD_LMI_RBC_RB_64BIT_BAR_HIGH), + upper_32_bits(ring->gpu_addr)); + + /* Initialize the ring buffer's read and write pointers */ + WREG32(SOC15_REG_OFFSET(UVD, 0, mmUVD_RBC_RB_RPTR), 0); + + ring->wptr = RREG32(SOC15_REG_OFFSET(UVD, 0, mmUVD_RBC_RB_RPTR)); + WREG32(SOC15_REG_OFFSET(UVD, 0, mmUVD_RBC_RB_WPTR), + lower_32_bits(ring->wptr)); + + WREG32_P(SOC15_REG_OFFSET(UVD, 0, mmUVD_RBC_RB_CNTL), 0, + ~UVD_RBC_RB_CNTL__RB_NO_FETCH_MASK); + + ring = &adev->uvd.ring_enc[0]; + WREG32(SOC15_REG_OFFSET(UVD, 0, mmUVD_RB_RPTR), lower_32_bits(ring->wptr)); + WREG32(SOC15_REG_OFFSET(UVD, 0, mmUVD_RB_WPTR), lower_32_bits(ring->wptr)); + WREG32(SOC15_REG_OFFSET(UVD, 0, mmUVD_RB_BASE_LO), ring->gpu_addr); + WREG32(SOC15_REG_OFFSET(UVD, 0, mmUVD_RB_BASE_HI), upper_32_bits(ring->gpu_addr)); + WREG32(SOC15_REG_OFFSET(UVD, 0, mmUVD_RB_SIZE), ring->ring_size / 4); + + ring = &adev->uvd.ring_enc[1]; + WREG32(SOC15_REG_OFFSET(UVD, 0, mmUVD_RB_RPTR2), lower_32_bits(ring->wptr)); + WREG32(SOC15_REG_OFFSET(UVD, 0, mmUVD_RB_WPTR2), lower_32_bits(ring->wptr)); + WREG32(SOC15_REG_OFFSET(UVD, 0, mmUVD_RB_BASE_LO2), ring->gpu_addr); + WREG32(SOC15_REG_OFFSET(UVD, 0, mmUVD_RB_BASE_HI2), upper_32_bits(ring->gpu_addr)); + WREG32(SOC15_REG_OFFSET(UVD, 0, mmUVD_RB_SIZE2), ring->ring_size / 4); + + return 0; +} + +/** + * uvd_v7_0_stop - stop UVD block + * + * @adev: amdgpu_device pointer + * + * stop the UVD block + */ +static void uvd_v7_0_stop(struct amdgpu_device *adev) +{ + /* force RBC into idle state */ + WREG32(SOC15_REG_OFFSET(UVD, 0, mmUVD_RBC_RB_CNTL), 0x11010101); + + /* Stall UMC and register bus before resetting VCPU */ + WREG32_P(SOC15_REG_OFFSET(UVD, 0, mmUVD_LMI_CTRL2), + UVD_LMI_CTRL2__STALL_ARB_UMC_MASK, + ~UVD_LMI_CTRL2__STALL_ARB_UMC_MASK); + mdelay(1); + + /* put VCPU into reset */ + WREG32(SOC15_REG_OFFSET(UVD, 0, mmUVD_SOFT_RESET), + UVD_SOFT_RESET__VCPU_SOFT_RESET_MASK); + mdelay(5); + + /* disable VCPU clock */ + WREG32(SOC15_REG_OFFSET(UVD, 0, mmUVD_VCPU_CNTL), 0x0); + + /* Unstall UMC and register bus */ + WREG32_P(SOC15_REG_OFFSET(UVD, 0, mmUVD_LMI_CTRL2), 0, + ~UVD_LMI_CTRL2__STALL_ARB_UMC_MASK); +} + +/** + * uvd_v7_0_ring_emit_fence - emit an fence & trap command + * + * @ring: amdgpu_ring pointer + * @fence: fence to emit + * + * Write a fence and a trap command to the ring. + */ +static void uvd_v7_0_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u64 seq, + unsigned flags) +{ + WARN_ON(flags & AMDGPU_FENCE_FLAG_64BIT); + + amdgpu_ring_write(ring, + PACKET0(SOC15_REG_OFFSET(UVD, 0, mmUVD_CONTEXT_ID), 0)); + amdgpu_ring_write(ring, seq); + amdgpu_ring_write(ring, + PACKET0(SOC15_REG_OFFSET(UVD, 0, mmUVD_GPCOM_VCPU_DATA0), 0)); + amdgpu_ring_write(ring, addr & 0xffffffff); + amdgpu_ring_write(ring, + PACKET0(SOC15_REG_OFFSET(UVD, 0, mmUVD_GPCOM_VCPU_DATA1), 0)); + amdgpu_ring_write(ring, upper_32_bits(addr) & 0xff); + amdgpu_ring_write(ring, + PACKET0(SOC15_REG_OFFSET(UVD, 0, mmUVD_GPCOM_VCPU_CMD), 0)); + amdgpu_ring_write(ring, 0); + + amdgpu_ring_write(ring, + PACKET0(SOC15_REG_OFFSET(UVD, 0, mmUVD_GPCOM_VCPU_DATA0), 0)); + amdgpu_ring_write(ring, 0); + amdgpu_ring_write(ring, + PACKET0(SOC15_REG_OFFSET(UVD, 0, mmUVD_GPCOM_VCPU_DATA1), 0)); + amdgpu_ring_write(ring, 0); + amdgpu_ring_write(ring, + PACKET0(SOC15_REG_OFFSET(UVD, 0, mmUVD_GPCOM_VCPU_CMD), 0)); + amdgpu_ring_write(ring, 2); +} + +/** + * uvd_v7_0_enc_ring_emit_fence - emit an enc fence & trap command + * + * @ring: amdgpu_ring pointer + * @fence: fence to emit + * + * Write enc a fence and a trap command to the ring. + */ +static void uvd_v7_0_enc_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, + u64 seq, unsigned flags) +{ + WARN_ON(flags & AMDGPU_FENCE_FLAG_64BIT); + + amdgpu_ring_write(ring, HEVC_ENC_CMD_FENCE); + amdgpu_ring_write(ring, addr); + amdgpu_ring_write(ring, upper_32_bits(addr)); + amdgpu_ring_write(ring, seq); + amdgpu_ring_write(ring, HEVC_ENC_CMD_TRAP); +} + +/** + * uvd_v7_0_ring_emit_hdp_flush - emit an hdp flush + * + * @ring: amdgpu_ring pointer + * + * Emits an hdp flush. + */ +static void uvd_v7_0_ring_emit_hdp_flush(struct amdgpu_ring *ring) +{ + amdgpu_ring_write(ring, PACKET0(SOC15_REG_OFFSET(NBIF, 0, + mmHDP_MEM_COHERENCY_FLUSH_CNTL), 0)); + amdgpu_ring_write(ring, 0); +} + +/** + * uvd_v7_0_ring_hdp_invalidate - emit an hdp invalidate + * + * @ring: amdgpu_ring pointer + * + * Emits an hdp invalidate. + */ +static void uvd_v7_0_ring_emit_hdp_invalidate(struct amdgpu_ring *ring) +{ + amdgpu_ring_write(ring, PACKET0(SOC15_REG_OFFSET(HDP, 0, mmHDP_DEBUG0), 0)); + amdgpu_ring_write(ring, 1); +} + +/** + * uvd_v7_0_ring_test_ring - register write test + * + * @ring: amdgpu_ring pointer + * + * Test if we can successfully write to the context register + */ +static int uvd_v7_0_ring_test_ring(struct amdgpu_ring *ring) +{ + struct amdgpu_device *adev = ring->adev; + uint32_t tmp = 0; + unsigned i; + int r; + + WREG32(SOC15_REG_OFFSET(UVD, 0, mmUVD_CONTEXT_ID), 0xCAFEDEAD); + r = amdgpu_ring_alloc(ring, 3); + if (r) { + DRM_ERROR("amdgpu: cp failed to lock ring %d (%d).\n", + ring->idx, r); + return r; + } + amdgpu_ring_write(ring, + PACKET0(SOC15_REG_OFFSET(UVD, 0, mmUVD_CONTEXT_ID), 0)); + amdgpu_ring_write(ring, 0xDEADBEEF); + amdgpu_ring_commit(ring); + for (i = 0; i < adev->usec_timeout; i++) { + tmp = RREG32(SOC15_REG_OFFSET(UVD, 0, mmUVD_CONTEXT_ID)); + if (tmp == 0xDEADBEEF) + break; + DRM_UDELAY(1); + } + + if (i < adev->usec_timeout) { + DRM_INFO("ring test on %d succeeded in %d usecs\n", + ring->idx, i); + } else { + DRM_ERROR("amdgpu: ring %d test failed (0x%08X)\n", + ring->idx, tmp); + r = -EINVAL; + } + return r; +} + +/** + * uvd_v7_0_ring_emit_ib - execute indirect buffer + * + * @ring: amdgpu_ring pointer + * @ib: indirect buffer to execute + * + * Write ring commands to execute the indirect buffer + */ +static void uvd_v7_0_ring_emit_ib(struct amdgpu_ring *ring, + struct amdgpu_ib *ib, + unsigned vm_id, bool ctx_switch) +{ + amdgpu_ring_write(ring, + PACKET0(SOC15_REG_OFFSET(UVD, 0, mmUVD_LMI_RBC_IB_VMID), 0)); + amdgpu_ring_write(ring, vm_id); + + amdgpu_ring_write(ring, + PACKET0(SOC15_REG_OFFSET(UVD, 0, mmUVD_LMI_RBC_IB_64BIT_BAR_LOW), 0)); + amdgpu_ring_write(ring, lower_32_bits(ib->gpu_addr)); + amdgpu_ring_write(ring, + PACKET0(SOC15_REG_OFFSET(UVD, 0, mmUVD_LMI_RBC_IB_64BIT_BAR_HIGH), 0)); + amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr)); + amdgpu_ring_write(ring, + PACKET0(SOC15_REG_OFFSET(UVD, 0, mmUVD_RBC_IB_SIZE), 0)); + amdgpu_ring_write(ring, ib->length_dw); +} + +/** + * uvd_v7_0_enc_ring_emit_ib - enc execute indirect buffer + * + * @ring: amdgpu_ring pointer + * @ib: indirect buffer to execute + * + * Write enc ring commands to execute the indirect buffer + */ +static void uvd_v7_0_enc_ring_emit_ib(struct amdgpu_ring *ring, + struct amdgpu_ib *ib, unsigned int vm_id, bool ctx_switch) +{ + amdgpu_ring_write(ring, HEVC_ENC_CMD_IB_VM); + amdgpu_ring_write(ring, vm_id); + amdgpu_ring_write(ring, lower_32_bits(ib->gpu_addr)); + amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr)); + amdgpu_ring_write(ring, ib->length_dw); +} + +static void uvd_v7_0_vm_reg_write(struct amdgpu_ring *ring, + uint32_t data0, uint32_t data1) +{ + amdgpu_ring_write(ring, + PACKET0(SOC15_REG_OFFSET(UVD, 0, mmUVD_GPCOM_VCPU_DATA0), 0)); + amdgpu_ring_write(ring, data0); + amdgpu_ring_write(ring, + PACKET0(SOC15_REG_OFFSET(UVD, 0, mmUVD_GPCOM_VCPU_DATA1), 0)); + amdgpu_ring_write(ring, data1); + amdgpu_ring_write(ring, + PACKET0(SOC15_REG_OFFSET(UVD, 0, mmUVD_GPCOM_VCPU_CMD), 0)); + amdgpu_ring_write(ring, 8); +} + +static void uvd_v7_0_vm_reg_wait(struct amdgpu_ring *ring, + uint32_t data0, uint32_t data1, uint32_t mask) +{ + amdgpu_ring_write(ring, + PACKET0(SOC15_REG_OFFSET(UVD, 0, mmUVD_GPCOM_VCPU_DATA0), 0)); + amdgpu_ring_write(ring, data0); + amdgpu_ring_write(ring, + PACKET0(SOC15_REG_OFFSET(UVD, 0, mmUVD_GPCOM_VCPU_DATA1), 0)); + amdgpu_ring_write(ring, data1); + amdgpu_ring_write(ring, + PACKET0(SOC15_REG_OFFSET(UVD, 0, mmUVD_GP_SCRATCH8), 0)); + amdgpu_ring_write(ring, mask); + amdgpu_ring_write(ring, + PACKET0(SOC15_REG_OFFSET(UVD, 0, mmUVD_GPCOM_VCPU_CMD), 0)); + amdgpu_ring_write(ring, 12); +} + +static void uvd_v7_0_ring_emit_vm_flush(struct amdgpu_ring *ring, + unsigned vm_id, uint64_t pd_addr) +{ + uint32_t req = ring->adev->gart.gart_funcs->get_invalidate_req(vm_id); + uint32_t data0, data1, mask; + unsigned eng = ring->idx; + unsigned i; + + pd_addr = pd_addr | 0x1; /* valid bit */ + /* now only use physical base address of PDE and valid */ + BUG_ON(pd_addr & 0xFFFF00000000003EULL); + + for (i = 0; i < AMDGPU_MAX_VMHUBS; ++i) { + struct amdgpu_vmhub *hub = &ring->adev->vmhub[i]; + + data0 = (hub->ctx0_ptb_addr_hi32 + vm_id * 2) << 2; + data1 = upper_32_bits(pd_addr); + uvd_v7_0_vm_reg_write(ring, data0, data1); + + data0 = (hub->ctx0_ptb_addr_lo32 + vm_id * 2) << 2; + data1 = lower_32_bits(pd_addr); + uvd_v7_0_vm_reg_write(ring, data0, data1); + + data0 = (hub->ctx0_ptb_addr_lo32 + vm_id * 2) << 2; + data1 = lower_32_bits(pd_addr); + mask = 0xffffffff; + uvd_v7_0_vm_reg_wait(ring, data0, data1, mask); + + /* flush TLB */ + data0 = (hub->vm_inv_eng0_req + eng) << 2; + data1 = req; + uvd_v7_0_vm_reg_write(ring, data0, data1); + + /* wait for flush */ + data0 = (hub->vm_inv_eng0_ack + eng) << 2; + data1 = 1 << vm_id; + mask = 1 << vm_id; + uvd_v7_0_vm_reg_wait(ring, data0, data1, mask); + } +} + +static void uvd_v7_0_enc_ring_insert_end(struct amdgpu_ring *ring) +{ + amdgpu_ring_write(ring, HEVC_ENC_CMD_END); +} + +static void uvd_v7_0_enc_ring_emit_vm_flush(struct amdgpu_ring *ring, + unsigned int vm_id, uint64_t pd_addr) +{ + uint32_t req = ring->adev->gart.gart_funcs->get_invalidate_req(vm_id); + unsigned eng = ring->idx; + unsigned i; + + pd_addr = pd_addr | 0x1; /* valid bit */ + /* now only use physical base address of PDE and valid */ + BUG_ON(pd_addr & 0xFFFF00000000003EULL); + + for (i = 0; i < AMDGPU_MAX_VMHUBS; ++i) { + struct amdgpu_vmhub *hub = &ring->adev->vmhub[i]; + + amdgpu_ring_write(ring, HEVC_ENC_CMD_REG_WRITE); + amdgpu_ring_write(ring, + (hub->ctx0_ptb_addr_hi32 + vm_id * 2) << 2); + amdgpu_ring_write(ring, upper_32_bits(pd_addr)); + + amdgpu_ring_write(ring, HEVC_ENC_CMD_REG_WRITE); + amdgpu_ring_write(ring, + (hub->ctx0_ptb_addr_lo32 + vm_id * 2) << 2); + amdgpu_ring_write(ring, lower_32_bits(pd_addr)); + + amdgpu_ring_write(ring, HEVC_ENC_CMD_REG_WAIT); + amdgpu_ring_write(ring, + (hub->ctx0_ptb_addr_lo32 + vm_id * 2) << 2); + amdgpu_ring_write(ring, 0xffffffff); + amdgpu_ring_write(ring, lower_32_bits(pd_addr)); + + /* flush TLB */ + amdgpu_ring_write(ring, HEVC_ENC_CMD_REG_WRITE); + amdgpu_ring_write(ring, (hub->vm_inv_eng0_req + eng) << 2); + amdgpu_ring_write(ring, req); + + /* wait for flush */ + amdgpu_ring_write(ring, HEVC_ENC_CMD_REG_WAIT); + amdgpu_ring_write(ring, (hub->vm_inv_eng0_ack + eng) << 2); + amdgpu_ring_write(ring, 1 << vm_id); + amdgpu_ring_write(ring, 1 << vm_id); + } +} + +#if 0 +static bool uvd_v7_0_is_idle(void *handle) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + + return !(RREG32(mmSRBM_STATUS) & SRBM_STATUS__UVD_BUSY_MASK); +} + +static int uvd_v7_0_wait_for_idle(void *handle) +{ + unsigned i; + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + + for (i = 0; i < adev->usec_timeout; i++) { + if (uvd_v7_0_is_idle(handle)) + return 0; + } + return -ETIMEDOUT; +} + +#define AMDGPU_UVD_STATUS_BUSY_MASK 0xfd +static bool uvd_v7_0_check_soft_reset(void *handle) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + u32 srbm_soft_reset = 0; + u32 tmp = RREG32(mmSRBM_STATUS); + + if (REG_GET_FIELD(tmp, SRBM_STATUS, UVD_RQ_PENDING) || + REG_GET_FIELD(tmp, SRBM_STATUS, UVD_BUSY) || + (RREG32(SOC15_REG_OFFSET(UVD, 0, mmUVD_STATUS) & + AMDGPU_UVD_STATUS_BUSY_MASK))) + srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, + SRBM_SOFT_RESET, SOFT_RESET_UVD, 1); + + if (srbm_soft_reset) { + adev->uvd.srbm_soft_reset = srbm_soft_reset; + return true; + } else { + adev->uvd.srbm_soft_reset = 0; + return false; + } +} + +static int uvd_v7_0_pre_soft_reset(void *handle) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + + if (!adev->uvd.srbm_soft_reset) + return 0; + + uvd_v7_0_stop(adev); + return 0; +} + +static int uvd_v7_0_soft_reset(void *handle) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + u32 srbm_soft_reset; + + if (!adev->uvd.srbm_soft_reset) + return 0; + srbm_soft_reset = adev->uvd.srbm_soft_reset; + + if (srbm_soft_reset) { + u32 tmp; + + tmp = RREG32(mmSRBM_SOFT_RESET); + tmp |= srbm_soft_reset; + dev_info(adev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp); + WREG32(mmSRBM_SOFT_RESET, tmp); + tmp = RREG32(mmSRBM_SOFT_RESET); + + udelay(50); + + tmp &= ~srbm_soft_reset; + WREG32(mmSRBM_SOFT_RESET, tmp); + tmp = RREG32(mmSRBM_SOFT_RESET); + + /* Wait a little for things to settle down */ + udelay(50); + } + + return 0; +} + +static int uvd_v7_0_post_soft_reset(void *handle) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + + if (!adev->uvd.srbm_soft_reset) + return 0; + + mdelay(5); + + return uvd_v7_0_start(adev); +} +#endif + +static int uvd_v7_0_set_interrupt_state(struct amdgpu_device *adev, + struct amdgpu_irq_src *source, + unsigned type, + enum amdgpu_interrupt_state state) +{ + // TODO + return 0; +} + +static int uvd_v7_0_process_interrupt(struct amdgpu_device *adev, + struct amdgpu_irq_src *source, + struct amdgpu_iv_entry *entry) +{ + DRM_DEBUG("IH: UVD TRAP\n"); + switch (entry->src_id) { + case 124: + amdgpu_fence_process(&adev->uvd.ring); + break; + case 119: + amdgpu_fence_process(&adev->uvd.ring_enc[0]); + break; + case 120: + amdgpu_fence_process(&adev->uvd.ring_enc[1]); + break; + default: + DRM_ERROR("Unhandled interrupt: %d %d\n", + entry->src_id, entry->src_data[0]); + break; + } + + return 0; +} + +#if 0 +static void uvd_v7_0_set_sw_clock_gating(struct amdgpu_device *adev) +{ + uint32_t data, data1, data2, suvd_flags; + + data = RREG32(SOC15_REG_OFFSET(UVD, 0, mmUVD_CGC_CTRL)); + data1 = RREG32(SOC15_REG_OFFSET(UVD, 0, mmUVD_SUVD_CGC_GATE)); + data2 = RREG32(SOC15_REG_OFFSET(UVD, 0, mmUVD_SUVD_CGC_CTRL)); + + data &= ~(UVD_CGC_CTRL__CLK_OFF_DELAY_MASK | + UVD_CGC_CTRL__CLK_GATE_DLY_TIMER_MASK); + + suvd_flags = UVD_SUVD_CGC_GATE__SRE_MASK | + UVD_SUVD_CGC_GATE__SIT_MASK | + UVD_SUVD_CGC_GATE__SMP_MASK | + UVD_SUVD_CGC_GATE__SCM_MASK | + UVD_SUVD_CGC_GATE__SDB_MASK; + + data |= UVD_CGC_CTRL__DYN_CLOCK_MODE_MASK | + (1 << REG_FIELD_SHIFT(UVD_CGC_CTRL, CLK_GATE_DLY_TIMER)) | + (4 << REG_FIELD_SHIFT(UVD_CGC_CTRL, CLK_OFF_DELAY)); + + data &= ~(UVD_CGC_CTRL__UDEC_RE_MODE_MASK | + UVD_CGC_CTRL__UDEC_CM_MODE_MASK | + UVD_CGC_CTRL__UDEC_IT_MODE_MASK | + UVD_CGC_CTRL__UDEC_DB_MODE_MASK | + UVD_CGC_CTRL__UDEC_MP_MODE_MASK | + UVD_CGC_CTRL__SYS_MODE_MASK | + UVD_CGC_CTRL__UDEC_MODE_MASK | + UVD_CGC_CTRL__MPEG2_MODE_MASK | + UVD_CGC_CTRL__REGS_MODE_MASK | + UVD_CGC_CTRL__RBC_MODE_MASK | + UVD_CGC_CTRL__LMI_MC_MODE_MASK | + UVD_CGC_CTRL__LMI_UMC_MODE_MASK | + UVD_CGC_CTRL__IDCT_MODE_MASK | + UVD_CGC_CTRL__MPRD_MODE_MASK | + UVD_CGC_CTRL__MPC_MODE_MASK | + UVD_CGC_CTRL__LBSI_MODE_MASK | + UVD_CGC_CTRL__LRBBM_MODE_MASK | + UVD_CGC_CTRL__WCB_MODE_MASK | + UVD_CGC_CTRL__VCPU_MODE_MASK | + UVD_CGC_CTRL__JPEG_MODE_MASK | + UVD_CGC_CTRL__JPEG2_MODE_MASK | + UVD_CGC_CTRL__SCPU_MODE_MASK); + data2 &= ~(UVD_SUVD_CGC_CTRL__SRE_MODE_MASK | + UVD_SUVD_CGC_CTRL__SIT_MODE_MASK | + UVD_SUVD_CGC_CTRL__SMP_MODE_MASK | + UVD_SUVD_CGC_CTRL__SCM_MODE_MASK | + UVD_SUVD_CGC_CTRL__SDB_MODE_MASK); + data1 |= suvd_flags; + + WREG32(SOC15_REG_OFFSET(UVD, 0, mmUVD_CGC_CTRL), data); + WREG32(SOC15_REG_OFFSET(UVD, 0, mmUVD_CGC_GATE), 0); + WREG32(SOC15_REG_OFFSET(UVD, 0, mmUVD_SUVD_CGC_GATE), data1); + WREG32(SOC15_REG_OFFSET(UVD, 0, mmUVD_SUVD_CGC_CTRL), data2); +} + +static void uvd_v7_0_set_hw_clock_gating(struct amdgpu_device *adev) +{ + uint32_t data, data1, cgc_flags, suvd_flags; + + data = RREG32(SOC15_REG_OFFSET(UVD, 0, mmUVD_CGC_GATE)); + data1 = RREG32(SOC15_REG_OFFSET(UVD, 0, mmUVD_SUVD_CGC_GATE)); + + cgc_flags = UVD_CGC_GATE__SYS_MASK | + UVD_CGC_GATE__UDEC_MASK | + UVD_CGC_GATE__MPEG2_MASK | + UVD_CGC_GATE__RBC_MASK | + UVD_CGC_GATE__LMI_MC_MASK | + UVD_CGC_GATE__IDCT_MASK | + UVD_CGC_GATE__MPRD_MASK | + UVD_CGC_GATE__MPC_MASK | + UVD_CGC_GATE__LBSI_MASK | + UVD_CGC_GATE__LRBBM_MASK | + UVD_CGC_GATE__UDEC_RE_MASK | + UVD_CGC_GATE__UDEC_CM_MASK | + UVD_CGC_GATE__UDEC_IT_MASK | + UVD_CGC_GATE__UDEC_DB_MASK | + UVD_CGC_GATE__UDEC_MP_MASK | + UVD_CGC_GATE__WCB_MASK | + UVD_CGC_GATE__VCPU_MASK | + UVD_CGC_GATE__SCPU_MASK | + UVD_CGC_GATE__JPEG_MASK | + UVD_CGC_GATE__JPEG2_MASK; + + suvd_flags = UVD_SUVD_CGC_GATE__SRE_MASK | + UVD_SUVD_CGC_GATE__SIT_MASK | + UVD_SUVD_CGC_GATE__SMP_MASK | + UVD_SUVD_CGC_GATE__SCM_MASK | + UVD_SUVD_CGC_GATE__SDB_MASK; + + data |= cgc_flags; + data1 |= suvd_flags; + + WREG32(SOC15_REG_OFFSET(UVD, 0, mmUVD_CGC_GATE), data); + WREG32(SOC15_REG_OFFSET(UVD, 0, mmUVD_SUVD_CGC_GATE), data1); +} + +static void uvd_v7_0_set_bypass_mode(struct amdgpu_device *adev, bool enable) +{ + u32 tmp = RREG32_SMC(ixGCK_DFS_BYPASS_CNTL); + + if (enable) + tmp |= (GCK_DFS_BYPASS_CNTL__BYPASSDCLK_MASK | + GCK_DFS_BYPASS_CNTL__BYPASSVCLK_MASK); + else + tmp &= ~(GCK_DFS_BYPASS_CNTL__BYPASSDCLK_MASK | + GCK_DFS_BYPASS_CNTL__BYPASSVCLK_MASK); + + WREG32_SMC(ixGCK_DFS_BYPASS_CNTL, tmp); +} + + +static int uvd_v7_0_set_clockgating_state(void *handle, + enum amd_clockgating_state state) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + bool enable = (state == AMD_CG_STATE_GATE) ? true : false; + + uvd_v7_0_set_bypass_mode(adev, enable); + + if (!(adev->cg_flags & AMD_CG_SUPPORT_UVD_MGCG)) + return 0; + + if (enable) { + /* disable HW gating and enable Sw gating */ + uvd_v7_0_set_sw_clock_gating(adev); + } else { + /* wait for STATUS to clear */ + if (uvd_v7_0_wait_for_idle(handle)) + return -EBUSY; + + /* enable HW gates because UVD is idle */ + /* uvd_v7_0_set_hw_clock_gating(adev); */ + } + + return 0; +} + +static int uvd_v7_0_set_powergating_state(void *handle, + enum amd_powergating_state state) +{ + /* This doesn't actually powergate the UVD block. + * That's done in the dpm code via the SMC. This + * just re-inits the block as necessary. The actual + * gating still happens in the dpm code. We should + * revisit this when there is a cleaner line between + * the smc and the hw blocks + */ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + + if (!(adev->pg_flags & AMD_PG_SUPPORT_UVD)) + return 0; + + WREG32(SOC15_REG_OFFSET(UVD, 0, mmUVD_POWER_STATUS), UVD_POWER_STATUS__UVD_PG_EN_MASK); + + if (state == AMD_PG_STATE_GATE) { + uvd_v7_0_stop(adev); + return 0; + } else { + return uvd_v7_0_start(adev); + } +} +#endif + +static int uvd_v7_0_set_clockgating_state(void *handle, + enum amd_clockgating_state state) +{ + /* needed for driver unload*/ + return 0; +} + +const struct amd_ip_funcs uvd_v7_0_ip_funcs = { + .name = "uvd_v7_0", + .early_init = uvd_v7_0_early_init, + .late_init = NULL, + .sw_init = uvd_v7_0_sw_init, + .sw_fini = uvd_v7_0_sw_fini, + .hw_init = uvd_v7_0_hw_init, + .hw_fini = uvd_v7_0_hw_fini, + .suspend = uvd_v7_0_suspend, + .resume = uvd_v7_0_resume, + .is_idle = NULL /* uvd_v7_0_is_idle */, + .wait_for_idle = NULL /* uvd_v7_0_wait_for_idle */, + .check_soft_reset = NULL /* uvd_v7_0_check_soft_reset */, + .pre_soft_reset = NULL /* uvd_v7_0_pre_soft_reset */, + .soft_reset = NULL /* uvd_v7_0_soft_reset */, + .post_soft_reset = NULL /* uvd_v7_0_post_soft_reset */, + .set_clockgating_state = uvd_v7_0_set_clockgating_state, + .set_powergating_state = NULL /* uvd_v7_0_set_powergating_state */, +}; + +static const struct amdgpu_ring_funcs uvd_v7_0_ring_vm_funcs = { + .type = AMDGPU_RING_TYPE_UVD, + .align_mask = 0xf, + .nop = PACKET0(SOC15_REG_OFFSET(UVD, 0, mmUVD_NO_OP), 0), + .support_64bit_ptrs = false, + .get_rptr = uvd_v7_0_ring_get_rptr, + .get_wptr = uvd_v7_0_ring_get_wptr, + .set_wptr = uvd_v7_0_ring_set_wptr, + .emit_frame_size = + 2 + /* uvd_v7_0_ring_emit_hdp_flush */ + 2 + /* uvd_v7_0_ring_emit_hdp_invalidate */ + 34 * AMDGPU_MAX_VMHUBS + /* uvd_v7_0_ring_emit_vm_flush */ + 14 + 14, /* uvd_v7_0_ring_emit_fence x2 vm fence */ + .emit_ib_size = 8, /* uvd_v7_0_ring_emit_ib */ + .emit_ib = uvd_v7_0_ring_emit_ib, + .emit_fence = uvd_v7_0_ring_emit_fence, + .emit_vm_flush = uvd_v7_0_ring_emit_vm_flush, + .emit_hdp_flush = uvd_v7_0_ring_emit_hdp_flush, + .emit_hdp_invalidate = uvd_v7_0_ring_emit_hdp_invalidate, + .test_ring = uvd_v7_0_ring_test_ring, + .test_ib = amdgpu_uvd_ring_test_ib, + .insert_nop = amdgpu_ring_insert_nop, + .pad_ib = amdgpu_ring_generic_pad_ib, + .begin_use = amdgpu_uvd_ring_begin_use, + .end_use = amdgpu_uvd_ring_end_use, +}; + +static const struct amdgpu_ring_funcs uvd_v7_0_enc_ring_vm_funcs = { + .type = AMDGPU_RING_TYPE_UVD_ENC, + .align_mask = 0x3f, + .nop = HEVC_ENC_CMD_NO_OP, + .support_64bit_ptrs = false, + .get_rptr = uvd_v7_0_enc_ring_get_rptr, + .get_wptr = uvd_v7_0_enc_ring_get_wptr, + .set_wptr = uvd_v7_0_enc_ring_set_wptr, + .emit_frame_size = + 17 * AMDGPU_MAX_VMHUBS + /* uvd_v7_0_enc_ring_emit_vm_flush */ + 5 + 5 + /* uvd_v7_0_enc_ring_emit_fence x2 vm fence */ + 1, /* uvd_v7_0_enc_ring_insert_end */ + .emit_ib_size = 5, /* uvd_v7_0_enc_ring_emit_ib */ + .emit_ib = uvd_v7_0_enc_ring_emit_ib, + .emit_fence = uvd_v7_0_enc_ring_emit_fence, + .emit_vm_flush = uvd_v7_0_enc_ring_emit_vm_flush, + .test_ring = uvd_v7_0_enc_ring_test_ring, + .test_ib = uvd_v7_0_enc_ring_test_ib, + .insert_nop = amdgpu_ring_insert_nop, + .insert_end = uvd_v7_0_enc_ring_insert_end, + .pad_ib = amdgpu_ring_generic_pad_ib, + .begin_use = amdgpu_uvd_ring_begin_use, + .end_use = amdgpu_uvd_ring_end_use, +}; + +static void uvd_v7_0_set_ring_funcs(struct amdgpu_device *adev) +{ + adev->uvd.ring.funcs = &uvd_v7_0_ring_vm_funcs; + DRM_INFO("UVD is enabled in VM mode\n"); +} + +static void uvd_v7_0_set_enc_ring_funcs(struct amdgpu_device *adev) +{ + int i; + + for (i = 0; i < adev->uvd.num_enc_rings; ++i) + adev->uvd.ring_enc[i].funcs = &uvd_v7_0_enc_ring_vm_funcs; + + DRM_INFO("UVD ENC is enabled in VM mode\n"); +} + +static const struct amdgpu_irq_src_funcs uvd_v7_0_irq_funcs = { + .set = uvd_v7_0_set_interrupt_state, + .process = uvd_v7_0_process_interrupt, +}; + +static void uvd_v7_0_set_irq_funcs(struct amdgpu_device *adev) +{ + adev->uvd.irq.num_types = adev->uvd.num_enc_rings + 1; + adev->uvd.irq.funcs = &uvd_v7_0_irq_funcs; +} + +const struct amdgpu_ip_block_version uvd_v7_0_ip_block = +{ + .type = AMD_IP_BLOCK_TYPE_UVD, + .major = 7, + .minor = 0, + .rev = 0, + .funcs = &uvd_v7_0_ip_funcs, +}; diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.h b/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.h new file mode 100644 index 000000000000..cbe82ab3224f --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.h @@ -0,0 +1,29 @@ +/* + * Copyright 2016 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#ifndef __UVD_V7_0_H__ +#define __UVD_V7_0_H__ + +extern const struct amdgpu_ip_block_version uvd_v7_0_ip_block; + +#endif diff --git a/drivers/gpu/drm/amd/amdgpu/vce_v2_0.c b/drivers/gpu/drm/amd/amdgpu/vce_v2_0.c index 9ea99348e493..47f70827195b 100644 --- a/drivers/gpu/drm/amd/amdgpu/vce_v2_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vce_v2_0.c @@ -52,7 +52,7 @@ static void vce_v2_0_set_irq_funcs(struct amdgpu_device *adev); * * Returns the current hardware read pointer */ -static uint32_t vce_v2_0_ring_get_rptr(struct amdgpu_ring *ring) +static uint64_t vce_v2_0_ring_get_rptr(struct amdgpu_ring *ring) { struct amdgpu_device *adev = ring->adev; @@ -69,7 +69,7 @@ static uint32_t vce_v2_0_ring_get_rptr(struct amdgpu_ring *ring) * * Returns the current hardware write pointer */ -static uint32_t vce_v2_0_ring_get_wptr(struct amdgpu_ring *ring) +static uint64_t vce_v2_0_ring_get_wptr(struct amdgpu_ring *ring) { struct amdgpu_device *adev = ring->adev; @@ -91,9 +91,9 @@ static void vce_v2_0_ring_set_wptr(struct amdgpu_ring *ring) struct amdgpu_device *adev = ring->adev; if (ring == &adev->vce.ring[0]) - WREG32(mmVCE_RB_WPTR, ring->wptr); + WREG32(mmVCE_RB_WPTR, lower_32_bits(ring->wptr)); else - WREG32(mmVCE_RB_WPTR2, ring->wptr); + WREG32(mmVCE_RB_WPTR2, lower_32_bits(ring->wptr)); } static int vce_v2_0_lmi_clean(struct amdgpu_device *adev) @@ -167,8 +167,7 @@ static void vce_v2_0_init_cg(struct amdgpu_device *adev) static void vce_v2_0_mc_resume(struct amdgpu_device *adev) { - uint64_t addr = adev->vce.gpu_addr; - uint32_t size; + uint32_t size, offset; WREG32_P(mmVCE_CLOCK_GATING_A, 0, ~(1 << 16)); WREG32_P(mmVCE_UENC_CLOCK_GATING, 0x1FF000, ~0xFF9FF000); @@ -181,19 +180,21 @@ static void vce_v2_0_mc_resume(struct amdgpu_device *adev) WREG32(mmVCE_LMI_SWAP_CNTL1, 0); WREG32(mmVCE_LMI_VM_CTRL, 0); - addr += AMDGPU_VCE_FIRMWARE_OFFSET; + WREG32(mmVCE_LMI_VCPU_CACHE_40BIT_BAR, (adev->vce.gpu_addr >> 8)); + + offset = AMDGPU_VCE_FIRMWARE_OFFSET; size = VCE_V2_0_FW_SIZE; - WREG32(mmVCE_VCPU_CACHE_OFFSET0, addr & 0x7fffffff); + WREG32(mmVCE_VCPU_CACHE_OFFSET0, offset & 0x7fffffff); WREG32(mmVCE_VCPU_CACHE_SIZE0, size); - addr += size; + offset += size; size = VCE_V2_0_STACK_SIZE; - WREG32(mmVCE_VCPU_CACHE_OFFSET1, addr & 0x7fffffff); + WREG32(mmVCE_VCPU_CACHE_OFFSET1, offset & 0x7fffffff); WREG32(mmVCE_VCPU_CACHE_SIZE1, size); - addr += size; + offset += size; size = VCE_V2_0_DATA_SIZE; - WREG32(mmVCE_VCPU_CACHE_OFFSET2, addr & 0x7fffffff); + WREG32(mmVCE_VCPU_CACHE_OFFSET2, offset & 0x7fffffff); WREG32(mmVCE_VCPU_CACHE_SIZE2, size); WREG32_P(mmVCE_LMI_CTRL2, 0x0, ~0x100); @@ -240,15 +241,15 @@ static int vce_v2_0_start(struct amdgpu_device *adev) vce_v2_0_mc_resume(adev); ring = &adev->vce.ring[0]; - WREG32(mmVCE_RB_RPTR, ring->wptr); - WREG32(mmVCE_RB_WPTR, ring->wptr); + WREG32(mmVCE_RB_RPTR, lower_32_bits(ring->wptr)); + WREG32(mmVCE_RB_WPTR, lower_32_bits(ring->wptr)); WREG32(mmVCE_RB_BASE_LO, ring->gpu_addr); WREG32(mmVCE_RB_BASE_HI, upper_32_bits(ring->gpu_addr)); WREG32(mmVCE_RB_SIZE, ring->ring_size / 4); ring = &adev->vce.ring[1]; - WREG32(mmVCE_RB_RPTR2, ring->wptr); - WREG32(mmVCE_RB_WPTR2, ring->wptr); + WREG32(mmVCE_RB_RPTR2, lower_32_bits(ring->wptr)); + WREG32(mmVCE_RB_WPTR2, lower_32_bits(ring->wptr)); WREG32(mmVCE_RB_BASE_LO2, ring->gpu_addr); WREG32(mmVCE_RB_BASE_HI2, upper_32_bits(ring->gpu_addr)); WREG32(mmVCE_RB_SIZE2, ring->ring_size / 4); @@ -273,24 +274,14 @@ static int vce_v2_0_start(struct amdgpu_device *adev) static int vce_v2_0_stop(struct amdgpu_device *adev) { - int i, j; + int i; int status; if (vce_v2_0_lmi_clean(adev)) { DRM_INFO("vce is not idle \n"); return 0; } -/* - for (i = 0; i < 10; ++i) { - for (j = 0; j < 100; ++j) { - status = RREG32(mmVCE_FW_REG_STATUS); - if (!(status & 1)) - break; - mdelay(1); - } - break; - } -*/ + if (vce_v2_0_wait_for_idle(adev)) { DRM_INFO("VCE is busy, Can't set clock gateing"); return 0; @@ -299,14 +290,11 @@ static int vce_v2_0_stop(struct amdgpu_device *adev) /* Stall UMC and register bus before resetting VCPU */ WREG32_P(mmVCE_LMI_CTRL2, 1 << 8, ~(1 << 8)); - for (i = 0; i < 10; ++i) { - for (j = 0; j < 100; ++j) { - status = RREG32(mmVCE_LMI_STATUS); - if (status & 0x240) - break; - mdelay(1); - } - break; + for (i = 0; i < 100; ++i) { + status = RREG32(mmVCE_LMI_STATUS); + if (status & 0x240) + break; + mdelay(1); } WREG32_P(mmVCE_VCPU_CNTL, 0, ~0x80001); @@ -429,7 +417,7 @@ static int vce_v2_0_sw_init(void *handle) struct amdgpu_device *adev = (struct amdgpu_device *)handle; /* VCE */ - r = amdgpu_irq_add_id(adev, 167, &adev->vce.irq); + r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 167, &adev->vce.irq); if (r) return r; @@ -463,11 +451,7 @@ static int vce_v2_0_sw_fini(void *handle) if (r) return r; - r = amdgpu_vce_sw_fini(adev); - if (r) - return r; - - return r; + return amdgpu_vce_sw_fini(adev); } static int vce_v2_0_hw_init(void *handle) @@ -507,11 +491,7 @@ static int vce_v2_0_suspend(void *handle) if (r) return r; - r = amdgpu_vce_suspend(adev); - if (r) - return r; - - return r; + return amdgpu_vce_suspend(adev); } static int vce_v2_0_resume(void *handle) @@ -523,11 +503,7 @@ static int vce_v2_0_resume(void *handle) if (r) return r; - r = vce_v2_0_hw_init(adev); - if (r) - return r; - - return r; + return vce_v2_0_hw_init(adev); } static int vce_v2_0_soft_reset(void *handle) @@ -559,14 +535,14 @@ static int vce_v2_0_process_interrupt(struct amdgpu_device *adev, struct amdgpu_iv_entry *entry) { DRM_DEBUG("IH: VCE\n"); - switch (entry->src_data) { + switch (entry->src_data[0]) { case 0: case 1: - amdgpu_fence_process(&adev->vce.ring[entry->src_data]); + amdgpu_fence_process(&adev->vce.ring[entry->src_data[0]]); break; default: DRM_ERROR("Unhandled interrupt: %d %d\n", - entry->src_id, entry->src_data); + entry->src_id, entry->src_data[0]); break; } @@ -630,6 +606,7 @@ static const struct amdgpu_ring_funcs vce_v2_0_ring_funcs = { .type = AMDGPU_RING_TYPE_VCE, .align_mask = 0xf, .nop = VCE_CMD_NO_OP, + .support_64bit_ptrs = false, .get_rptr = vce_v2_0_ring_get_rptr, .get_wptr = vce_v2_0_ring_get_wptr, .set_wptr = vce_v2_0_ring_set_wptr, diff --git a/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c b/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c index 93ec8815bb13..fb0819359909 100644 --- a/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c @@ -65,7 +65,8 @@ static void vce_v3_0_mc_resume(struct amdgpu_device *adev, int idx); static void vce_v3_0_set_ring_funcs(struct amdgpu_device *adev); static void vce_v3_0_set_irq_funcs(struct amdgpu_device *adev); static int vce_v3_0_wait_for_idle(void *handle); - +static int vce_v3_0_set_clockgating_state(void *handle, + enum amd_clockgating_state state); /** * vce_v3_0_ring_get_rptr - get read pointer * @@ -73,7 +74,7 @@ static int vce_v3_0_wait_for_idle(void *handle); * * Returns the current hardware read pointer */ -static uint32_t vce_v3_0_ring_get_rptr(struct amdgpu_ring *ring) +static uint64_t vce_v3_0_ring_get_rptr(struct amdgpu_ring *ring) { struct amdgpu_device *adev = ring->adev; @@ -92,7 +93,7 @@ static uint32_t vce_v3_0_ring_get_rptr(struct amdgpu_ring *ring) * * Returns the current hardware write pointer */ -static uint32_t vce_v3_0_ring_get_wptr(struct amdgpu_ring *ring) +static uint64_t vce_v3_0_ring_get_wptr(struct amdgpu_ring *ring) { struct amdgpu_device *adev = ring->adev; @@ -116,11 +117,11 @@ static void vce_v3_0_ring_set_wptr(struct amdgpu_ring *ring) struct amdgpu_device *adev = ring->adev; if (ring == &adev->vce.ring[0]) - WREG32(mmVCE_RB_WPTR, ring->wptr); + WREG32(mmVCE_RB_WPTR, lower_32_bits(ring->wptr)); else if (ring == &adev->vce.ring[1]) - WREG32(mmVCE_RB_WPTR2, ring->wptr); + WREG32(mmVCE_RB_WPTR2, lower_32_bits(ring->wptr)); else - WREG32(mmVCE_RB_WPTR3, ring->wptr); + WREG32(mmVCE_RB_WPTR3, lower_32_bits(ring->wptr)); } static void vce_v3_0_override_vce_clock_gating(struct amdgpu_device *adev, bool override) @@ -231,22 +232,22 @@ static int vce_v3_0_start(struct amdgpu_device *adev) int idx, r; ring = &adev->vce.ring[0]; - WREG32(mmVCE_RB_RPTR, ring->wptr); - WREG32(mmVCE_RB_WPTR, ring->wptr); + WREG32(mmVCE_RB_RPTR, lower_32_bits(ring->wptr)); + WREG32(mmVCE_RB_WPTR, lower_32_bits(ring->wptr)); WREG32(mmVCE_RB_BASE_LO, ring->gpu_addr); WREG32(mmVCE_RB_BASE_HI, upper_32_bits(ring->gpu_addr)); WREG32(mmVCE_RB_SIZE, ring->ring_size / 4); ring = &adev->vce.ring[1]; - WREG32(mmVCE_RB_RPTR2, ring->wptr); - WREG32(mmVCE_RB_WPTR2, ring->wptr); + WREG32(mmVCE_RB_RPTR2, lower_32_bits(ring->wptr)); + WREG32(mmVCE_RB_WPTR2, lower_32_bits(ring->wptr)); WREG32(mmVCE_RB_BASE_LO2, ring->gpu_addr); WREG32(mmVCE_RB_BASE_HI2, upper_32_bits(ring->gpu_addr)); WREG32(mmVCE_RB_SIZE2, ring->ring_size / 4); ring = &adev->vce.ring[2]; - WREG32(mmVCE_RB_RPTR3, ring->wptr); - WREG32(mmVCE_RB_WPTR3, ring->wptr); + WREG32(mmVCE_RB_RPTR3, lower_32_bits(ring->wptr)); + WREG32(mmVCE_RB_WPTR3, lower_32_bits(ring->wptr)); WREG32(mmVCE_RB_BASE_LO3, ring->gpu_addr); WREG32(mmVCE_RB_BASE_HI3, upper_32_bits(ring->gpu_addr)); WREG32(mmVCE_RB_SIZE3, ring->ring_size / 4); @@ -305,12 +306,8 @@ static int vce_v3_0_stop(struct amdgpu_device *adev) /* hold on ECPU */ WREG32_FIELD(VCE_SOFT_RESET, ECPU_SOFT_RESET, 1); - /* clear BUSY flag */ - WREG32_FIELD(VCE_STATUS, JOB_BUSY, 0); - - /* Set Clock-Gating off */ - if (adev->cg_flags & AMD_CG_SUPPORT_VCE_MGCG) - vce_v3_0_set_vce_sw_clock_gating(adev, false); + /* clear VCE STATUS */ + WREG32(mmVCE_STATUS, 0); } WREG32(mmGRBM_GFX_INDEX, mmGRBM_GFX_INDEX_DEFAULT); @@ -383,7 +380,7 @@ static int vce_v3_0_sw_init(void *handle) int r, i; /* VCE */ - r = amdgpu_irq_add_id(adev, 167, &adev->vce.irq); + r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 167, &adev->vce.irq); if (r) return r; @@ -420,11 +417,7 @@ static int vce_v3_0_sw_fini(void *handle) if (r) return r; - r = amdgpu_vce_sw_fini(adev); - if (r) - return r; - - return r; + return amdgpu_vce_sw_fini(adev); } static int vce_v3_0_hw_init(void *handle) @@ -461,7 +454,8 @@ static int vce_v3_0_hw_fini(void *handle) if (r) return r; - return vce_v3_0_stop(adev); + vce_v3_0_stop(adev); + return vce_v3_0_set_clockgating_state(adev, AMD_CG_STATE_GATE); } static int vce_v3_0_suspend(void *handle) @@ -473,11 +467,7 @@ static int vce_v3_0_suspend(void *handle) if (r) return r; - r = amdgpu_vce_suspend(adev); - if (r) - return r; - - return r; + return amdgpu_vce_suspend(adev); } static int vce_v3_0_resume(void *handle) @@ -489,11 +479,7 @@ static int vce_v3_0_resume(void *handle) if (r) return r; - r = vce_v3_0_hw_init(adev); - if (r) - return r; - - return r; + return vce_v3_0_hw_init(adev); } static void vce_v3_0_mc_resume(struct amdgpu_device *adev, int idx) @@ -695,15 +681,15 @@ static int vce_v3_0_process_interrupt(struct amdgpu_device *adev, WREG32_FIELD(VCE_SYS_INT_STATUS, VCE_SYS_INT_TRAP_INTERRUPT_INT, 1); - switch (entry->src_data) { + switch (entry->src_data[0]) { case 0: case 1: case 2: - amdgpu_fence_process(&adev->vce.ring[entry->src_data]); + amdgpu_fence_process(&adev->vce.ring[entry->src_data[0]]); break; default: DRM_ERROR("Unhandled interrupt: %d %d\n", - entry->src_id, entry->src_data); + entry->src_id, entry->src_data[0]); break; } @@ -728,7 +714,7 @@ static int vce_v3_0_set_clockgating_state(void *handle, WREG32(mmGRBM_GFX_INDEX, GET_VCE_INSTANCE(i)); - if (enable) { + if (!enable) { /* initialize VCE_CLOCK_GATING_A: Clock ON/OFF delay */ uint32_t data = RREG32(mmVCE_CLOCK_GATING_A); data &= ~(0xf | 0xff0); @@ -785,8 +771,12 @@ static void vce_v3_0_get_clockgating_state(void *handle, u32 *flags) mutex_lock(&adev->pm.mutex); - if (RREG32_SMC(ixCURRENT_PG_STATUS) & - CURRENT_PG_STATUS__VCE_PG_STATUS_MASK) { + if (adev->flags & AMD_IS_APU) + data = RREG32_SMC(ixCURRENT_PG_STATUS_APU); + else + data = RREG32_SMC(ixCURRENT_PG_STATUS); + + if (data & CURRENT_PG_STATUS__VCE_PG_STATUS_MASK) { DRM_INFO("Cannot get clockgating state when VCE is powergated.\n"); goto out; } @@ -860,6 +850,7 @@ static const struct amdgpu_ring_funcs vce_v3_0_ring_phys_funcs = { .type = AMDGPU_RING_TYPE_VCE, .align_mask = 0xf, .nop = VCE_CMD_NO_OP, + .support_64bit_ptrs = false, .get_rptr = vce_v3_0_ring_get_rptr, .get_wptr = vce_v3_0_ring_get_wptr, .set_wptr = vce_v3_0_ring_set_wptr, @@ -882,6 +873,7 @@ static const struct amdgpu_ring_funcs vce_v3_0_ring_vm_funcs = { .type = AMDGPU_RING_TYPE_VCE, .align_mask = 0xf, .nop = VCE_CMD_NO_OP, + .support_64bit_ptrs = false, .get_rptr = vce_v3_0_ring_get_rptr, .get_wptr = vce_v3_0_ring_get_wptr, .set_wptr = vce_v3_0_ring_set_wptr, diff --git a/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c b/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c new file mode 100644 index 000000000000..edde5fe938d6 --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c @@ -0,0 +1,1129 @@ +/* + * Copyright 2016 Advanced Micro Devices, Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + */ + +#include <linux/firmware.h> +#include <drm/drmP.h> +#include "amdgpu.h" +#include "amdgpu_vce.h" +#include "soc15d.h" +#include "soc15_common.h" +#include "mmsch_v1_0.h" + +#include "vega10/soc15ip.h" +#include "vega10/VCE/vce_4_0_offset.h" +#include "vega10/VCE/vce_4_0_default.h" +#include "vega10/VCE/vce_4_0_sh_mask.h" +#include "vega10/MMHUB/mmhub_1_0_offset.h" +#include "vega10/MMHUB/mmhub_1_0_sh_mask.h" + +#define VCE_STATUS_VCPU_REPORT_FW_LOADED_MASK 0x02 + +#define VCE_V4_0_FW_SIZE (384 * 1024) +#define VCE_V4_0_STACK_SIZE (64 * 1024) +#define VCE_V4_0_DATA_SIZE ((16 * 1024 * AMDGPU_MAX_VCE_HANDLES) + (52 * 1024)) + +static void vce_v4_0_mc_resume(struct amdgpu_device *adev); +static void vce_v4_0_set_ring_funcs(struct amdgpu_device *adev); +static void vce_v4_0_set_irq_funcs(struct amdgpu_device *adev); + +static inline void mmsch_insert_direct_wt(struct mmsch_v1_0_cmd_direct_write *direct_wt, + uint32_t *init_table, + uint32_t reg_offset, + uint32_t value) +{ + direct_wt->cmd_header.reg_offset = reg_offset; + direct_wt->reg_value = value; + memcpy((void *)init_table, direct_wt, sizeof(struct mmsch_v1_0_cmd_direct_write)); +} + +static inline void mmsch_insert_direct_rd_mod_wt(struct mmsch_v1_0_cmd_direct_read_modify_write *direct_rd_mod_wt, + uint32_t *init_table, + uint32_t reg_offset, + uint32_t mask, uint32_t data) +{ + direct_rd_mod_wt->cmd_header.reg_offset = reg_offset; + direct_rd_mod_wt->mask_value = mask; + direct_rd_mod_wt->write_data = data; + memcpy((void *)init_table, direct_rd_mod_wt, + sizeof(struct mmsch_v1_0_cmd_direct_read_modify_write)); +} + +static inline void mmsch_insert_direct_poll(struct mmsch_v1_0_cmd_direct_polling *direct_poll, + uint32_t *init_table, + uint32_t reg_offset, + uint32_t mask, uint32_t wait) +{ + direct_poll->cmd_header.reg_offset = reg_offset; + direct_poll->mask_value = mask; + direct_poll->wait_value = wait; + memcpy((void *)init_table, direct_poll, sizeof(struct mmsch_v1_0_cmd_direct_polling)); +} + +#define INSERT_DIRECT_RD_MOD_WT(reg, mask, data) { \ + mmsch_insert_direct_rd_mod_wt(&direct_rd_mod_wt, \ + init_table, (reg), \ + (mask), (data)); \ + init_table += sizeof(struct mmsch_v1_0_cmd_direct_read_modify_write)/4; \ + table_size += sizeof(struct mmsch_v1_0_cmd_direct_read_modify_write)/4; \ +} + +#define INSERT_DIRECT_WT(reg, value) { \ + mmsch_insert_direct_wt(&direct_wt, \ + init_table, (reg), \ + (value)); \ + init_table += sizeof(struct mmsch_v1_0_cmd_direct_write)/4; \ + table_size += sizeof(struct mmsch_v1_0_cmd_direct_write)/4; \ +} + +#define INSERT_DIRECT_POLL(reg, mask, wait) { \ + mmsch_insert_direct_poll(&direct_poll, \ + init_table, (reg), \ + (mask), (wait)); \ + init_table += sizeof(struct mmsch_v1_0_cmd_direct_polling)/4; \ + table_size += sizeof(struct mmsch_v1_0_cmd_direct_polling)/4; \ +} + +/** + * vce_v4_0_ring_get_rptr - get read pointer + * + * @ring: amdgpu_ring pointer + * + * Returns the current hardware read pointer + */ +static uint64_t vce_v4_0_ring_get_rptr(struct amdgpu_ring *ring) +{ + struct amdgpu_device *adev = ring->adev; + + if (ring == &adev->vce.ring[0]) + return RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_RPTR)); + else if (ring == &adev->vce.ring[1]) + return RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_RPTR2)); + else + return RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_RPTR3)); +} + +/** + * vce_v4_0_ring_get_wptr - get write pointer + * + * @ring: amdgpu_ring pointer + * + * Returns the current hardware write pointer + */ +static uint64_t vce_v4_0_ring_get_wptr(struct amdgpu_ring *ring) +{ + struct amdgpu_device *adev = ring->adev; + + if (ring->use_doorbell) + return adev->wb.wb[ring->wptr_offs]; + + if (ring == &adev->vce.ring[0]) + return RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR)); + else if (ring == &adev->vce.ring[1]) + return RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR2)); + else + return RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR3)); +} + +/** + * vce_v4_0_ring_set_wptr - set write pointer + * + * @ring: amdgpu_ring pointer + * + * Commits the write pointer to the hardware + */ +static void vce_v4_0_ring_set_wptr(struct amdgpu_ring *ring) +{ + struct amdgpu_device *adev = ring->adev; + + if (ring->use_doorbell) { + /* XXX check if swapping is necessary on BE */ + adev->wb.wb[ring->wptr_offs] = lower_32_bits(ring->wptr); + WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr)); + return; + } + + if (ring == &adev->vce.ring[0]) + WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR), + lower_32_bits(ring->wptr)); + else if (ring == &adev->vce.ring[1]) + WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR2), + lower_32_bits(ring->wptr)); + else + WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR3), + lower_32_bits(ring->wptr)); +} + +static int vce_v4_0_firmware_loaded(struct amdgpu_device *adev) +{ + int i, j; + + for (i = 0; i < 10; ++i) { + for (j = 0; j < 100; ++j) { + uint32_t status = + RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS)); + + if (status & VCE_STATUS_VCPU_REPORT_FW_LOADED_MASK) + return 0; + mdelay(10); + } + + DRM_ERROR("VCE not responding, trying to reset the ECPU!!!\n"); + WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_SOFT_RESET), + VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK, + ~VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK); + mdelay(10); + WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_SOFT_RESET), 0, + ~VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK); + mdelay(10); + + } + + return -ETIMEDOUT; +} + +static int vce_v4_0_mmsch_start(struct amdgpu_device *adev, + struct amdgpu_mm_table *table) +{ + uint32_t data = 0, loop; + uint64_t addr = table->gpu_addr; + struct mmsch_v1_0_init_header *header = (struct mmsch_v1_0_init_header *)table->cpu_addr; + uint32_t size; + + size = header->header_size + header->vce_table_size + header->uvd_table_size; + + /* 1, write to vce_mmsch_vf_ctx_addr_lo/hi register with GPU mc addr of memory descriptor location */ + WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_CTX_ADDR_LO), lower_32_bits(addr)); + WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_CTX_ADDR_HI), upper_32_bits(addr)); + + /* 2, update vmid of descriptor */ + data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_VMID)); + data &= ~VCE_MMSCH_VF_VMID__VF_CTX_VMID_MASK; + data |= (0 << VCE_MMSCH_VF_VMID__VF_CTX_VMID__SHIFT); /* use domain0 for MM scheduler */ + WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_VMID), data); + + /* 3, notify mmsch about the size of this descriptor */ + WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_CTX_SIZE), size); + + /* 4, set resp to zero */ + WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_MAILBOX_RESP), 0); + + /* 5, kick off the initialization and wait until VCE_MMSCH_VF_MAILBOX_RESP becomes non-zero */ + WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_MAILBOX_HOST), 0x10000001); + + data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_MAILBOX_RESP)); + loop = 1000; + while ((data & 0x10000002) != 0x10000002) { + udelay(10); + data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_MAILBOX_RESP)); + loop--; + if (!loop) + break; + } + + if (!loop) { + dev_err(adev->dev, "failed to init MMSCH, mmVCE_MMSCH_VF_MAILBOX_RESP = %x\n", data); + return -EBUSY; + } + + return 0; +} + +static int vce_v4_0_sriov_start(struct amdgpu_device *adev) +{ + struct amdgpu_ring *ring; + uint32_t offset, size; + uint32_t table_size = 0; + struct mmsch_v1_0_cmd_direct_write direct_wt = { { 0 } }; + struct mmsch_v1_0_cmd_direct_read_modify_write direct_rd_mod_wt = { { 0 } }; + struct mmsch_v1_0_cmd_direct_polling direct_poll = { { 0 } }; + struct mmsch_v1_0_cmd_end end = { { 0 } }; + uint32_t *init_table = adev->virt.mm_table.cpu_addr; + struct mmsch_v1_0_init_header *header = (struct mmsch_v1_0_init_header *)init_table; + + direct_wt.cmd_header.command_type = MMSCH_COMMAND__DIRECT_REG_WRITE; + direct_rd_mod_wt.cmd_header.command_type = MMSCH_COMMAND__DIRECT_REG_READ_MODIFY_WRITE; + direct_poll.cmd_header.command_type = MMSCH_COMMAND__DIRECT_REG_POLLING; + end.cmd_header.command_type = MMSCH_COMMAND__END; + + if (header->vce_table_offset == 0 && header->vce_table_size == 0) { + header->version = MMSCH_VERSION; + header->header_size = sizeof(struct mmsch_v1_0_init_header) >> 2; + + if (header->uvd_table_offset == 0 && header->uvd_table_size == 0) + header->vce_table_offset = header->header_size; + else + header->vce_table_offset = header->uvd_table_size + header->uvd_table_offset; + + init_table += header->vce_table_offset; + + ring = &adev->vce.ring[0]; + INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_RPTR), ring->wptr); + INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR), ring->wptr); + INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_BASE_LO), lower_32_bits(ring->gpu_addr)); + INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_BASE_HI), upper_32_bits(ring->gpu_addr)); + INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_SIZE), ring->ring_size / 4); + + /* BEGING OF MC_RESUME */ + INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_CLOCK_GATING_A), ~(1 << 16), 0); + INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING), ~0xFF9FF000, 0x1FF000); + INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_REG_CLOCK_GATING), ~0x3F, 0x3F); + INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_CLOCK_GATING_B), 0x1FF); + + INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_CTRL), 0x398000); + INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_CACHE_CTRL), ~0x1, 0); + INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_SWAP_CNTL), 0); + INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_SWAP_CNTL1), 0); + INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VM_CTRL), 0); + + INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_40BIT_BAR0), adev->vce.gpu_addr >> 8); + INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_40BIT_BAR1), adev->vce.gpu_addr >> 8); + INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_40BIT_BAR2), adev->vce.gpu_addr >> 8); + + offset = AMDGPU_VCE_FIRMWARE_OFFSET; + size = VCE_V4_0_FW_SIZE; + INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_OFFSET0), offset & 0x7FFFFFFF); + INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_SIZE0), size); + + offset += size; + size = VCE_V4_0_STACK_SIZE; + INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_OFFSET1), offset & 0x7FFFFFFF); + INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_SIZE1), size); + + offset += size; + size = VCE_V4_0_DATA_SIZE; + INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_OFFSET2), offset & 0x7FFFFFFF); + INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_SIZE2), size); + + INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_CTRL2), ~0x100, 0); + INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_SYS_INT_EN), + 0xffffffff, VCE_SYS_INT_EN__VCE_SYS_INT_TRAP_INTERRUPT_EN_MASK); + + /* end of MC_RESUME */ + INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CNTL), + ~0x200001, VCE_VCPU_CNTL__CLK_EN_MASK); + INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_SOFT_RESET), + ~VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK, 0); + + INSERT_DIRECT_POLL(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS), + VCE_STATUS_VCPU_REPORT_FW_LOADED_MASK, + VCE_STATUS_VCPU_REPORT_FW_LOADED_MASK); + + /* clear BUSY flag */ + INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS), + ~VCE_STATUS__JOB_BUSY_MASK, 0); + + /* add end packet */ + memcpy((void *)init_table, &end, sizeof(struct mmsch_v1_0_cmd_end)); + table_size += sizeof(struct mmsch_v1_0_cmd_end) / 4; + header->vce_table_size = table_size; + + return vce_v4_0_mmsch_start(adev, &adev->virt.mm_table); + } + + return -EINVAL; /* already initializaed ? */ +} + +/** + * vce_v4_0_start - start VCE block + * + * @adev: amdgpu_device pointer + * + * Setup and start the VCE block + */ +static int vce_v4_0_start(struct amdgpu_device *adev) +{ + struct amdgpu_ring *ring; + int r; + + ring = &adev->vce.ring[0]; + + WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_RPTR), lower_32_bits(ring->wptr)); + WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR), lower_32_bits(ring->wptr)); + WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_BASE_LO), ring->gpu_addr); + WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_BASE_HI), upper_32_bits(ring->gpu_addr)); + WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_SIZE), ring->ring_size / 4); + + ring = &adev->vce.ring[1]; + + WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_RPTR2), lower_32_bits(ring->wptr)); + WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR2), lower_32_bits(ring->wptr)); + WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_BASE_LO2), ring->gpu_addr); + WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_BASE_HI2), upper_32_bits(ring->gpu_addr)); + WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_SIZE2), ring->ring_size / 4); + + ring = &adev->vce.ring[2]; + + WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_RPTR3), lower_32_bits(ring->wptr)); + WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR3), lower_32_bits(ring->wptr)); + WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_BASE_LO3), ring->gpu_addr); + WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_BASE_HI3), upper_32_bits(ring->gpu_addr)); + WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_SIZE3), ring->ring_size / 4); + + vce_v4_0_mc_resume(adev); + WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS), VCE_STATUS__JOB_BUSY_MASK, + ~VCE_STATUS__JOB_BUSY_MASK); + + WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CNTL), 1, ~0x200001); + + WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_SOFT_RESET), 0, + ~VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK); + mdelay(100); + + r = vce_v4_0_firmware_loaded(adev); + + /* clear BUSY flag */ + WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS), 0, ~VCE_STATUS__JOB_BUSY_MASK); + + if (r) { + DRM_ERROR("VCE not responding, giving up!!!\n"); + return r; + } + + return 0; +} + +static int vce_v4_0_stop(struct amdgpu_device *adev) +{ + + WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CNTL), 0, ~0x200001); + + /* hold on ECPU */ + WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_SOFT_RESET), + VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK, + ~VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK); + + /* clear BUSY flag */ + WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS), 0, ~VCE_STATUS__JOB_BUSY_MASK); + + /* Set Clock-Gating off */ + /* if (adev->cg_flags & AMD_CG_SUPPORT_VCE_MGCG) + vce_v4_0_set_vce_sw_clock_gating(adev, false); + */ + + return 0; +} + +static int vce_v4_0_early_init(void *handle) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + + if (amdgpu_sriov_vf(adev)) /* currently only VCN0 support SRIOV */ + adev->vce.num_rings = 1; + else + adev->vce.num_rings = 3; + + vce_v4_0_set_ring_funcs(adev); + vce_v4_0_set_irq_funcs(adev); + + return 0; +} + +static int vce_v4_0_sw_init(void *handle) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_ring *ring; + unsigned size; + int r, i; + + r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_VCE0, 167, &adev->vce.irq); + if (r) + return r; + + size = (VCE_V4_0_STACK_SIZE + VCE_V4_0_DATA_SIZE) * 2; + if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) + size += VCE_V4_0_FW_SIZE; + + r = amdgpu_vce_sw_init(adev, size); + if (r) + return r; + + if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) { + const struct common_firmware_header *hdr; + hdr = (const struct common_firmware_header *)adev->vce.fw->data; + adev->firmware.ucode[AMDGPU_UCODE_ID_VCE].ucode_id = AMDGPU_UCODE_ID_VCE; + adev->firmware.ucode[AMDGPU_UCODE_ID_VCE].fw = adev->vce.fw; + adev->firmware.fw_size += + ALIGN(le32_to_cpu(hdr->ucode_size_bytes), PAGE_SIZE); + DRM_INFO("PSP loading VCE firmware\n"); + } + + if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) { + r = amdgpu_vce_resume(adev); + if (r) + return r; + } + + for (i = 0; i < adev->vce.num_rings; i++) { + ring = &adev->vce.ring[i]; + sprintf(ring->name, "vce%d", i); + if (amdgpu_sriov_vf(adev)) { + /* DOORBELL only works under SRIOV */ + ring->use_doorbell = true; + if (i == 0) + ring->doorbell_index = AMDGPU_DOORBELL64_RING0_1 * 2; + else if (i == 1) + ring->doorbell_index = AMDGPU_DOORBELL64_RING2_3 * 2; + else + ring->doorbell_index = AMDGPU_DOORBELL64_RING2_3 * 2 + 1; + } + r = amdgpu_ring_init(adev, ring, 512, &adev->vce.irq, 0); + if (r) + return r; + } + + if (amdgpu_sriov_vf(adev)) { + r = amdgpu_bo_create_kernel(adev, PAGE_SIZE, PAGE_SIZE, + AMDGPU_GEM_DOMAIN_VRAM, + &adev->virt.mm_table.bo, + &adev->virt.mm_table.gpu_addr, + (void *)&adev->virt.mm_table.cpu_addr); + if (!r) { + memset((void *)adev->virt.mm_table.cpu_addr, 0, PAGE_SIZE); + printk("mm table gpu addr = 0x%llx, cpu addr = %p. \n", + adev->virt.mm_table.gpu_addr, + adev->virt.mm_table.cpu_addr); + } + return r; + } + + return r; +} + +static int vce_v4_0_sw_fini(void *handle) +{ + int r; + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + + /* free MM table */ + if (amdgpu_sriov_vf(adev)) + amdgpu_bo_free_kernel(&adev->virt.mm_table.bo, + &adev->virt.mm_table.gpu_addr, + (void *)&adev->virt.mm_table.cpu_addr); + + r = amdgpu_vce_suspend(adev); + if (r) + return r; + + return amdgpu_vce_sw_fini(adev); +} + +static int vce_v4_0_hw_init(void *handle) +{ + int r, i; + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + + if (amdgpu_sriov_vf(adev)) + r = vce_v4_0_sriov_start(adev); + else + r = vce_v4_0_start(adev); + if (r) + return r; + + for (i = 0; i < adev->vce.num_rings; i++) + adev->vce.ring[i].ready = false; + + for (i = 0; i < adev->vce.num_rings; i++) { + r = amdgpu_ring_test_ring(&adev->vce.ring[i]); + if (r) + return r; + else + adev->vce.ring[i].ready = true; + } + + DRM_INFO("VCE initialized successfully.\n"); + + return 0; +} + +static int vce_v4_0_hw_fini(void *handle) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + int i; + + /* vce_v4_0_wait_for_idle(handle); */ + vce_v4_0_stop(adev); + for (i = 0; i < adev->vce.num_rings; i++) + adev->vce.ring[i].ready = false; + + return 0; +} + +static int vce_v4_0_suspend(void *handle) +{ + int r; + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + + r = vce_v4_0_hw_fini(adev); + if (r) + return r; + + return amdgpu_vce_suspend(adev); +} + +static int vce_v4_0_resume(void *handle) +{ + int r; + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + + r = amdgpu_vce_resume(adev); + if (r) + return r; + + return vce_v4_0_hw_init(adev); +} + +static void vce_v4_0_mc_resume(struct amdgpu_device *adev) +{ + uint32_t offset, size; + + WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_CLOCK_GATING_A), 0, ~(1 << 16)); + WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING), 0x1FF000, ~0xFF9FF000); + WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_REG_CLOCK_GATING), 0x3F, ~0x3F); + WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_CLOCK_GATING_B), 0x1FF); + + WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_CTRL), 0x00398000); + WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_CACHE_CTRL), 0x0, ~0x1); + WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_SWAP_CNTL), 0); + WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_SWAP_CNTL1), 0); + WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VM_CTRL), 0); + + if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) { + WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_40BIT_BAR0), + (adev->firmware.ucode[AMDGPU_UCODE_ID_VCE].mc_addr >> 8)); + WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_64BIT_BAR0), + (adev->firmware.ucode[AMDGPU_UCODE_ID_VCE].mc_addr >> 40) & 0xff); + } else { + WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_40BIT_BAR0), + (adev->vce.gpu_addr >> 8)); + WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_64BIT_BAR0), + (adev->vce.gpu_addr >> 40) & 0xff); + } + + offset = AMDGPU_VCE_FIRMWARE_OFFSET; + size = VCE_V4_0_FW_SIZE; + WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_OFFSET0), offset & ~0x0f000000); + WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_SIZE0), size); + + WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_40BIT_BAR1), (adev->vce.gpu_addr >> 8)); + WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_64BIT_BAR1), (adev->vce.gpu_addr >> 40) & 0xff); + offset = (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) ? offset + size : 0; + size = VCE_V4_0_STACK_SIZE; + WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_OFFSET1), (offset & ~0x0f000000) | (1 << 24)); + WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_SIZE1), size); + + WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_40BIT_BAR2), (adev->vce.gpu_addr >> 8)); + WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_64BIT_BAR2), (adev->vce.gpu_addr >> 40) & 0xff); + offset += size; + size = VCE_V4_0_DATA_SIZE; + WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_OFFSET2), (offset & ~0x0f000000) | (2 << 24)); + WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_SIZE2), size); + + WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_CTRL2), 0x0, ~0x100); + WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_SYS_INT_EN), + VCE_SYS_INT_EN__VCE_SYS_INT_TRAP_INTERRUPT_EN_MASK, + ~VCE_SYS_INT_EN__VCE_SYS_INT_TRAP_INTERRUPT_EN_MASK); +} + +static int vce_v4_0_set_clockgating_state(void *handle, + enum amd_clockgating_state state) +{ + /* needed for driver unload*/ + return 0; +} + +#if 0 +static bool vce_v4_0_is_idle(void *handle) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + u32 mask = 0; + + mask |= (adev->vce.harvest_config & AMDGPU_VCE_HARVEST_VCE0) ? 0 : SRBM_STATUS2__VCE0_BUSY_MASK; + mask |= (adev->vce.harvest_config & AMDGPU_VCE_HARVEST_VCE1) ? 0 : SRBM_STATUS2__VCE1_BUSY_MASK; + + return !(RREG32(mmSRBM_STATUS2) & mask); +} + +static int vce_v4_0_wait_for_idle(void *handle) +{ + unsigned i; + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + + for (i = 0; i < adev->usec_timeout; i++) + if (vce_v4_0_is_idle(handle)) + return 0; + + return -ETIMEDOUT; +} + +#define VCE_STATUS_VCPU_REPORT_AUTO_BUSY_MASK 0x00000008L /* AUTO_BUSY */ +#define VCE_STATUS_VCPU_REPORT_RB0_BUSY_MASK 0x00000010L /* RB0_BUSY */ +#define VCE_STATUS_VCPU_REPORT_RB1_BUSY_MASK 0x00000020L /* RB1_BUSY */ +#define AMDGPU_VCE_STATUS_BUSY_MASK (VCE_STATUS_VCPU_REPORT_AUTO_BUSY_MASK | \ + VCE_STATUS_VCPU_REPORT_RB0_BUSY_MASK) + +static bool vce_v4_0_check_soft_reset(void *handle) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + u32 srbm_soft_reset = 0; + + /* According to VCE team , we should use VCE_STATUS instead + * SRBM_STATUS.VCE_BUSY bit for busy status checking. + * GRBM_GFX_INDEX.INSTANCE_INDEX is used to specify which VCE + * instance's registers are accessed + * (0 for 1st instance, 10 for 2nd instance). + * + *VCE_STATUS + *|UENC|ACPI|AUTO ACTIVE|RB1 |RB0 |RB2 | |FW_LOADED|JOB | + *|----+----+-----------+----+----+----+----------+---------+----| + *|bit8|bit7| bit6 |bit5|bit4|bit3| bit2 | bit1 |bit0| + * + * VCE team suggest use bit 3--bit 6 for busy status check + */ + mutex_lock(&adev->grbm_idx_mutex); + WREG32_FIELD(GRBM_GFX_INDEX, INSTANCE_INDEX, 0); + if (RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS) & AMDGPU_VCE_STATUS_BUSY_MASK) { + srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET, SOFT_RESET_VCE0, 1); + srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET, SOFT_RESET_VCE1, 1); + } + WREG32_FIELD(GRBM_GFX_INDEX, INSTANCE_INDEX, 0x10); + if (RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS) & AMDGPU_VCE_STATUS_BUSY_MASK) { + srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET, SOFT_RESET_VCE0, 1); + srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET, SOFT_RESET_VCE1, 1); + } + WREG32_FIELD(GRBM_GFX_INDEX, INSTANCE_INDEX, 0); + mutex_unlock(&adev->grbm_idx_mutex); + + if (srbm_soft_reset) { + adev->vce.srbm_soft_reset = srbm_soft_reset; + return true; + } else { + adev->vce.srbm_soft_reset = 0; + return false; + } +} + +static int vce_v4_0_soft_reset(void *handle) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + u32 srbm_soft_reset; + + if (!adev->vce.srbm_soft_reset) + return 0; + srbm_soft_reset = adev->vce.srbm_soft_reset; + + if (srbm_soft_reset) { + u32 tmp; + + tmp = RREG32(mmSRBM_SOFT_RESET); + tmp |= srbm_soft_reset; + dev_info(adev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp); + WREG32(mmSRBM_SOFT_RESET, tmp); + tmp = RREG32(mmSRBM_SOFT_RESET); + + udelay(50); + + tmp &= ~srbm_soft_reset; + WREG32(mmSRBM_SOFT_RESET, tmp); + tmp = RREG32(mmSRBM_SOFT_RESET); + + /* Wait a little for things to settle down */ + udelay(50); + } + + return 0; +} + +static int vce_v4_0_pre_soft_reset(void *handle) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + + if (!adev->vce.srbm_soft_reset) + return 0; + + mdelay(5); + + return vce_v4_0_suspend(adev); +} + + +static int vce_v4_0_post_soft_reset(void *handle) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + + if (!adev->vce.srbm_soft_reset) + return 0; + + mdelay(5); + + return vce_v4_0_resume(adev); +} + +static void vce_v4_0_override_vce_clock_gating(struct amdgpu_device *adev, bool override) +{ + u32 tmp, data; + + tmp = data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_ARB_CTRL)); + if (override) + data |= VCE_RB_ARB_CTRL__VCE_CGTT_OVERRIDE_MASK; + else + data &= ~VCE_RB_ARB_CTRL__VCE_CGTT_OVERRIDE_MASK; + + if (tmp != data) + WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_ARB_CTRL), data); +} + +static void vce_v4_0_set_vce_sw_clock_gating(struct amdgpu_device *adev, + bool gated) +{ + u32 data; + + /* Set Override to disable Clock Gating */ + vce_v4_0_override_vce_clock_gating(adev, true); + + /* This function enables MGCG which is controlled by firmware. + With the clocks in the gated state the core is still + accessible but the firmware will throttle the clocks on the + fly as necessary. + */ + if (gated) { + data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_CLOCK_GATING_B)); + data |= 0x1ff; + data &= ~0xef0000; + WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_CLOCK_GATING_B), data); + + data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING)); + data |= 0x3ff000; + data &= ~0xffc00000; + WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING), data); + + data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING_2)); + data |= 0x2; + data &= ~0x00010000; + WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING_2), data); + + data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_REG_CLOCK_GATING)); + data |= 0x37f; + WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_REG_CLOCK_GATING), data); + + data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_DMA_DCLK_CTRL)); + data |= VCE_UENC_DMA_DCLK_CTRL__WRDMCLK_FORCEON_MASK | + VCE_UENC_DMA_DCLK_CTRL__RDDMCLK_FORCEON_MASK | + VCE_UENC_DMA_DCLK_CTRL__REGCLK_FORCEON_MASK | + 0x8; + WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_DMA_DCLK_CTRL), data); + } else { + data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_CLOCK_GATING_B)); + data &= ~0x80010; + data |= 0xe70008; + WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_CLOCK_GATING_B), data); + + data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING)); + data |= 0xffc00000; + WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING), data); + + data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING_2)); + data |= 0x10000; + WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING_2), data); + + data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_REG_CLOCK_GATING)); + data &= ~0xffc00000; + WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_REG_CLOCK_GATING), data); + + data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_DMA_DCLK_CTRL)); + data &= ~(VCE_UENC_DMA_DCLK_CTRL__WRDMCLK_FORCEON_MASK | + VCE_UENC_DMA_DCLK_CTRL__RDDMCLK_FORCEON_MASK | + VCE_UENC_DMA_DCLK_CTRL__REGCLK_FORCEON_MASK | + 0x8); + WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_DMA_DCLK_CTRL), data); + } + vce_v4_0_override_vce_clock_gating(adev, false); +} + +static void vce_v4_0_set_bypass_mode(struct amdgpu_device *adev, bool enable) +{ + u32 tmp = RREG32_SMC(ixGCK_DFS_BYPASS_CNTL); + + if (enable) + tmp |= GCK_DFS_BYPASS_CNTL__BYPASSECLK_MASK; + else + tmp &= ~GCK_DFS_BYPASS_CNTL__BYPASSECLK_MASK; + + WREG32_SMC(ixGCK_DFS_BYPASS_CNTL, tmp); +} + +static int vce_v4_0_set_clockgating_state(void *handle, + enum amd_clockgating_state state) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + bool enable = (state == AMD_CG_STATE_GATE) ? true : false; + int i; + + if ((adev->asic_type == CHIP_POLARIS10) || + (adev->asic_type == CHIP_TONGA) || + (adev->asic_type == CHIP_FIJI)) + vce_v4_0_set_bypass_mode(adev, enable); + + if (!(adev->cg_flags & AMD_CG_SUPPORT_VCE_MGCG)) + return 0; + + mutex_lock(&adev->grbm_idx_mutex); + for (i = 0; i < 2; i++) { + /* Program VCE Instance 0 or 1 if not harvested */ + if (adev->vce.harvest_config & (1 << i)) + continue; + + WREG32_FIELD(GRBM_GFX_INDEX, VCE_INSTANCE, i); + + if (enable) { + /* initialize VCE_CLOCK_GATING_A: Clock ON/OFF delay */ + uint32_t data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_CLOCK_GATING_A); + data &= ~(0xf | 0xff0); + data |= ((0x0 << 0) | (0x04 << 4)); + WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_CLOCK_GATING_A, data); + + /* initialize VCE_UENC_CLOCK_GATING: Clock ON/OFF delay */ + data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING); + data &= ~(0xf | 0xff0); + data |= ((0x0 << 0) | (0x04 << 4)); + WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING, data); + } + + vce_v4_0_set_vce_sw_clock_gating(adev, enable); + } + + WREG32_FIELD(GRBM_GFX_INDEX, VCE_INSTANCE, 0); + mutex_unlock(&adev->grbm_idx_mutex); + + return 0; +} + +static int vce_v4_0_set_powergating_state(void *handle, + enum amd_powergating_state state) +{ + /* This doesn't actually powergate the VCE block. + * That's done in the dpm code via the SMC. This + * just re-inits the block as necessary. The actual + * gating still happens in the dpm code. We should + * revisit this when there is a cleaner line between + * the smc and the hw blocks + */ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + + if (!(adev->pg_flags & AMD_PG_SUPPORT_VCE)) + return 0; + + if (state == AMD_PG_STATE_GATE) + /* XXX do we need a vce_v4_0_stop()? */ + return 0; + else + return vce_v4_0_start(adev); +} +#endif + +static void vce_v4_0_ring_emit_ib(struct amdgpu_ring *ring, + struct amdgpu_ib *ib, unsigned int vm_id, bool ctx_switch) +{ + amdgpu_ring_write(ring, VCE_CMD_IB_VM); + amdgpu_ring_write(ring, vm_id); + amdgpu_ring_write(ring, lower_32_bits(ib->gpu_addr)); + amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr)); + amdgpu_ring_write(ring, ib->length_dw); +} + +static void vce_v4_0_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, + u64 seq, unsigned flags) +{ + WARN_ON(flags & AMDGPU_FENCE_FLAG_64BIT); + + amdgpu_ring_write(ring, VCE_CMD_FENCE); + amdgpu_ring_write(ring, addr); + amdgpu_ring_write(ring, upper_32_bits(addr)); + amdgpu_ring_write(ring, seq); + amdgpu_ring_write(ring, VCE_CMD_TRAP); +} + +static void vce_v4_0_ring_insert_end(struct amdgpu_ring *ring) +{ + amdgpu_ring_write(ring, VCE_CMD_END); +} + +static void vce_v4_0_emit_vm_flush(struct amdgpu_ring *ring, + unsigned int vm_id, uint64_t pd_addr) +{ + uint32_t req = ring->adev->gart.gart_funcs->get_invalidate_req(vm_id); + unsigned eng = ring->idx; + unsigned i; + + pd_addr = pd_addr | 0x1; /* valid bit */ + /* now only use physical base address of PDE and valid */ + BUG_ON(pd_addr & 0xFFFF00000000003EULL); + + for (i = 0; i < AMDGPU_MAX_VMHUBS; ++i) { + struct amdgpu_vmhub *hub = &ring->adev->vmhub[i]; + + amdgpu_ring_write(ring, VCE_CMD_REG_WRITE); + amdgpu_ring_write(ring, + (hub->ctx0_ptb_addr_hi32 + vm_id * 2) << 2); + amdgpu_ring_write(ring, upper_32_bits(pd_addr)); + + amdgpu_ring_write(ring, VCE_CMD_REG_WRITE); + amdgpu_ring_write(ring, + (hub->ctx0_ptb_addr_lo32 + vm_id * 2) << 2); + amdgpu_ring_write(ring, lower_32_bits(pd_addr)); + + amdgpu_ring_write(ring, VCE_CMD_REG_WAIT); + amdgpu_ring_write(ring, + (hub->ctx0_ptb_addr_lo32 + vm_id * 2) << 2); + amdgpu_ring_write(ring, 0xffffffff); + amdgpu_ring_write(ring, lower_32_bits(pd_addr)); + + /* flush TLB */ + amdgpu_ring_write(ring, VCE_CMD_REG_WRITE); + amdgpu_ring_write(ring, (hub->vm_inv_eng0_req + eng) << 2); + amdgpu_ring_write(ring, req); + + /* wait for flush */ + amdgpu_ring_write(ring, VCE_CMD_REG_WAIT); + amdgpu_ring_write(ring, (hub->vm_inv_eng0_ack + eng) << 2); + amdgpu_ring_write(ring, 1 << vm_id); + amdgpu_ring_write(ring, 1 << vm_id); + } +} + +static int vce_v4_0_set_interrupt_state(struct amdgpu_device *adev, + struct amdgpu_irq_src *source, + unsigned type, + enum amdgpu_interrupt_state state) +{ + uint32_t val = 0; + + if (state == AMDGPU_IRQ_STATE_ENABLE) + val |= VCE_SYS_INT_EN__VCE_SYS_INT_TRAP_INTERRUPT_EN_MASK; + + WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_SYS_INT_EN), val, + ~VCE_SYS_INT_EN__VCE_SYS_INT_TRAP_INTERRUPT_EN_MASK); + return 0; +} + +static int vce_v4_0_process_interrupt(struct amdgpu_device *adev, + struct amdgpu_irq_src *source, + struct amdgpu_iv_entry *entry) +{ + DRM_DEBUG("IH: VCE\n"); + + WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_SYS_INT_STATUS), + VCE_SYS_INT_STATUS__VCE_SYS_INT_TRAP_INTERRUPT_INT_MASK, + ~VCE_SYS_INT_STATUS__VCE_SYS_INT_TRAP_INTERRUPT_INT_MASK); + + switch (entry->src_data[0]) { + case 0: + case 1: + case 2: + amdgpu_fence_process(&adev->vce.ring[entry->src_data[0]]); + break; + default: + DRM_ERROR("Unhandled interrupt: %d %d\n", + entry->src_id, entry->src_data[0]); + break; + } + + return 0; +} + +const struct amd_ip_funcs vce_v4_0_ip_funcs = { + .name = "vce_v4_0", + .early_init = vce_v4_0_early_init, + .late_init = NULL, + .sw_init = vce_v4_0_sw_init, + .sw_fini = vce_v4_0_sw_fini, + .hw_init = vce_v4_0_hw_init, + .hw_fini = vce_v4_0_hw_fini, + .suspend = vce_v4_0_suspend, + .resume = vce_v4_0_resume, + .is_idle = NULL /* vce_v4_0_is_idle */, + .wait_for_idle = NULL /* vce_v4_0_wait_for_idle */, + .check_soft_reset = NULL /* vce_v4_0_check_soft_reset */, + .pre_soft_reset = NULL /* vce_v4_0_pre_soft_reset */, + .soft_reset = NULL /* vce_v4_0_soft_reset */, + .post_soft_reset = NULL /* vce_v4_0_post_soft_reset */, + .set_clockgating_state = vce_v4_0_set_clockgating_state, + .set_powergating_state = NULL /* vce_v4_0_set_powergating_state */, +}; + +static const struct amdgpu_ring_funcs vce_v4_0_ring_vm_funcs = { + .type = AMDGPU_RING_TYPE_VCE, + .align_mask = 0x3f, + .nop = VCE_CMD_NO_OP, + .support_64bit_ptrs = false, + .get_rptr = vce_v4_0_ring_get_rptr, + .get_wptr = vce_v4_0_ring_get_wptr, + .set_wptr = vce_v4_0_ring_set_wptr, + .parse_cs = amdgpu_vce_ring_parse_cs_vm, + .emit_frame_size = + 17 * AMDGPU_MAX_VMHUBS + /* vce_v4_0_emit_vm_flush */ + 5 + 5 + /* amdgpu_vce_ring_emit_fence x2 vm fence */ + 1, /* vce_v4_0_ring_insert_end */ + .emit_ib_size = 5, /* vce_v4_0_ring_emit_ib */ + .emit_ib = vce_v4_0_ring_emit_ib, + .emit_vm_flush = vce_v4_0_emit_vm_flush, + .emit_fence = vce_v4_0_ring_emit_fence, + .test_ring = amdgpu_vce_ring_test_ring, + .test_ib = amdgpu_vce_ring_test_ib, + .insert_nop = amdgpu_ring_insert_nop, + .insert_end = vce_v4_0_ring_insert_end, + .pad_ib = amdgpu_ring_generic_pad_ib, + .begin_use = amdgpu_vce_ring_begin_use, + .end_use = amdgpu_vce_ring_end_use, +}; + +static void vce_v4_0_set_ring_funcs(struct amdgpu_device *adev) +{ + int i; + + for (i = 0; i < adev->vce.num_rings; i++) + adev->vce.ring[i].funcs = &vce_v4_0_ring_vm_funcs; + DRM_INFO("VCE enabled in VM mode\n"); +} + +static const struct amdgpu_irq_src_funcs vce_v4_0_irq_funcs = { + .set = vce_v4_0_set_interrupt_state, + .process = vce_v4_0_process_interrupt, +}; + +static void vce_v4_0_set_irq_funcs(struct amdgpu_device *adev) +{ + adev->vce.irq.num_types = 1; + adev->vce.irq.funcs = &vce_v4_0_irq_funcs; +}; + +const struct amdgpu_ip_block_version vce_v4_0_ip_block = +{ + .type = AMD_IP_BLOCK_TYPE_VCE, + .major = 4, + .minor = 0, + .rev = 0, + .funcs = &vce_v4_0_ip_funcs, +}; diff --git a/drivers/gpu/drm/amd/amdgpu/vce_v4_0.h b/drivers/gpu/drm/amd/amdgpu/vce_v4_0.h new file mode 100644 index 000000000000..a32beda6a473 --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/vce_v4_0.h @@ -0,0 +1,29 @@ +/* + * Copyright 2016 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#ifndef __VCE_V4_0_H__ +#define __VCE_V4_0_H__ + +extern const struct amdgpu_ip_block_version vce_v4_0_ip_block; + +#endif diff --git a/drivers/gpu/drm/amd/amdgpu/vega10_ih.c b/drivers/gpu/drm/amd/amdgpu/vega10_ih.c new file mode 100644 index 000000000000..071f56e439bb --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/vega10_ih.c @@ -0,0 +1,424 @@ +/* + * Copyright 2016 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ +#include "drmP.h" +#include "amdgpu.h" +#include "amdgpu_ih.h" +#include "soc15.h" + + +#include "vega10/soc15ip.h" +#include "vega10/OSSSYS/osssys_4_0_offset.h" +#include "vega10/OSSSYS/osssys_4_0_sh_mask.h" + +#include "soc15_common.h" +#include "vega10_ih.h" + + + +static void vega10_ih_set_interrupt_funcs(struct amdgpu_device *adev); + +/** + * vega10_ih_enable_interrupts - Enable the interrupt ring buffer + * + * @adev: amdgpu_device pointer + * + * Enable the interrupt ring buffer (VEGA10). + */ +static void vega10_ih_enable_interrupts(struct amdgpu_device *adev) +{ + u32 ih_rb_cntl = RREG32(SOC15_REG_OFFSET(OSSSYS, 0, mmIH_RB_CNTL)); + + ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL, RB_ENABLE, 1); + ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL, ENABLE_INTR, 1); + WREG32(SOC15_REG_OFFSET(OSSSYS, 0, mmIH_RB_CNTL), ih_rb_cntl); + adev->irq.ih.enabled = true; +} + +/** + * vega10_ih_disable_interrupts - Disable the interrupt ring buffer + * + * @adev: amdgpu_device pointer + * + * Disable the interrupt ring buffer (VEGA10). + */ +static void vega10_ih_disable_interrupts(struct amdgpu_device *adev) +{ + u32 ih_rb_cntl = RREG32(SOC15_REG_OFFSET(OSSSYS, 0, mmIH_RB_CNTL)); + + ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL, RB_ENABLE, 0); + ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL, ENABLE_INTR, 0); + WREG32(SOC15_REG_OFFSET(OSSSYS, 0, mmIH_RB_CNTL), ih_rb_cntl); + /* set rptr, wptr to 0 */ + WREG32(SOC15_REG_OFFSET(OSSSYS, 0, mmIH_RB_RPTR), 0); + WREG32(SOC15_REG_OFFSET(OSSSYS, 0, mmIH_RB_WPTR), 0); + adev->irq.ih.enabled = false; + adev->irq.ih.rptr = 0; +} + +/** + * vega10_ih_irq_init - init and enable the interrupt ring + * + * @adev: amdgpu_device pointer + * + * Allocate a ring buffer for the interrupt controller, + * enable the RLC, disable interrupts, enable the IH + * ring buffer and enable it (VI). + * Called at device load and reume. + * Returns 0 for success, errors for failure. + */ +static int vega10_ih_irq_init(struct amdgpu_device *adev) +{ + int ret = 0; + int rb_bufsz; + u32 ih_rb_cntl, ih_doorbell_rtpr; + u32 tmp; + u64 wptr_off; + + /* disable irqs */ + vega10_ih_disable_interrupts(adev); + + nbio_v6_1_ih_control(adev); + + ih_rb_cntl = RREG32(SOC15_REG_OFFSET(OSSSYS, 0, mmIH_RB_CNTL)); + /* Ring Buffer base. [39:8] of 40-bit address of the beginning of the ring buffer*/ + if (adev->irq.ih.use_bus_addr) { + WREG32(SOC15_REG_OFFSET(OSSSYS, 0, mmIH_RB_BASE), adev->irq.ih.rb_dma_addr >> 8); + WREG32(SOC15_REG_OFFSET(OSSSYS, 0, mmIH_RB_BASE_HI), ((u64)adev->irq.ih.rb_dma_addr >> 40) & 0xff); + ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL, MC_SPACE, 1); + } else { + WREG32(SOC15_REG_OFFSET(OSSSYS, 0, mmIH_RB_BASE), adev->irq.ih.gpu_addr >> 8); + WREG32(SOC15_REG_OFFSET(OSSSYS, 0, mmIH_RB_BASE_HI), (adev->irq.ih.gpu_addr >> 40) & 0xff); + ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL, MC_SPACE, 4); + } + rb_bufsz = order_base_2(adev->irq.ih.ring_size / 4); + ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL, WPTR_OVERFLOW_CLEAR, 1); + ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL, WPTR_OVERFLOW_ENABLE, 1); + ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL, RB_SIZE, rb_bufsz); + /* Ring Buffer write pointer writeback. If enabled, IH_RB_WPTR register value is written to memory */ + ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL, WPTR_WRITEBACK_ENABLE, 1); + ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL, MC_SNOOP, 1); + ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL, MC_RO, 0); + ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL, MC_VMID, 0); + + if (adev->irq.msi_enabled) + ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL, RPTR_REARM, 1); + + WREG32(SOC15_REG_OFFSET(OSSSYS, 0, mmIH_RB_CNTL), ih_rb_cntl); + + /* set the writeback address whether it's enabled or not */ + if (adev->irq.ih.use_bus_addr) + wptr_off = adev->irq.ih.rb_dma_addr + (adev->irq.ih.wptr_offs * 4); + else + wptr_off = adev->wb.gpu_addr + (adev->irq.ih.wptr_offs * 4); + WREG32(SOC15_REG_OFFSET(OSSSYS, 0, mmIH_RB_WPTR_ADDR_LO), lower_32_bits(wptr_off)); + WREG32(SOC15_REG_OFFSET(OSSSYS, 0, mmIH_RB_WPTR_ADDR_HI), upper_32_bits(wptr_off) & 0xFF); + + /* set rptr, wptr to 0 */ + WREG32(SOC15_REG_OFFSET(OSSSYS, 0, mmIH_RB_RPTR), 0); + WREG32(SOC15_REG_OFFSET(OSSSYS, 0, mmIH_RB_WPTR), 0); + + ih_doorbell_rtpr = RREG32(SOC15_REG_OFFSET(OSSSYS, 0, mmIH_DOORBELL_RPTR)); + if (adev->irq.ih.use_doorbell) { + ih_doorbell_rtpr = REG_SET_FIELD(ih_doorbell_rtpr, IH_DOORBELL_RPTR, + OFFSET, adev->irq.ih.doorbell_index); + ih_doorbell_rtpr = REG_SET_FIELD(ih_doorbell_rtpr, IH_DOORBELL_RPTR, + ENABLE, 1); + } else { + ih_doorbell_rtpr = REG_SET_FIELD(ih_doorbell_rtpr, IH_DOORBELL_RPTR, + ENABLE, 0); + } + WREG32(SOC15_REG_OFFSET(OSSSYS, 0, mmIH_DOORBELL_RPTR), ih_doorbell_rtpr); + nbio_v6_1_ih_doorbell_range(adev, adev->irq.ih.use_doorbell, adev->irq.ih.doorbell_index); + + tmp = RREG32(SOC15_REG_OFFSET(OSSSYS, 0, mmIH_STORM_CLIENT_LIST_CNTL)); + tmp = REG_SET_FIELD(tmp, IH_STORM_CLIENT_LIST_CNTL, + CLIENT18_IS_STORM_CLIENT, 1); + WREG32(SOC15_REG_OFFSET(OSSSYS, 0, mmIH_STORM_CLIENT_LIST_CNTL), tmp); + + tmp = RREG32(SOC15_REG_OFFSET(OSSSYS, 0, mmIH_INT_FLOOD_CNTL)); + tmp = REG_SET_FIELD(tmp, IH_INT_FLOOD_CNTL, FLOOD_CNTL_ENABLE, 1); + WREG32(SOC15_REG_OFFSET(OSSSYS, 0, mmIH_INT_FLOOD_CNTL), tmp); + + pci_set_master(adev->pdev); + + /* enable interrupts */ + vega10_ih_enable_interrupts(adev); + + return ret; +} + +/** + * vega10_ih_irq_disable - disable interrupts + * + * @adev: amdgpu_device pointer + * + * Disable interrupts on the hw (VEGA10). + */ +static void vega10_ih_irq_disable(struct amdgpu_device *adev) +{ + vega10_ih_disable_interrupts(adev); + + /* Wait and acknowledge irq */ + mdelay(1); +} + +/** + * vega10_ih_get_wptr - get the IH ring buffer wptr + * + * @adev: amdgpu_device pointer + * + * Get the IH ring buffer wptr from either the register + * or the writeback memory buffer (VEGA10). Also check for + * ring buffer overflow and deal with it. + * Returns the value of the wptr. + */ +static u32 vega10_ih_get_wptr(struct amdgpu_device *adev) +{ + u32 wptr, tmp; + + if (adev->irq.ih.use_bus_addr) + wptr = le32_to_cpu(adev->irq.ih.ring[adev->irq.ih.wptr_offs]); + else + wptr = le32_to_cpu(adev->wb.wb[adev->irq.ih.wptr_offs]); + + if (REG_GET_FIELD(wptr, IH_RB_WPTR, RB_OVERFLOW)) { + wptr = REG_SET_FIELD(wptr, IH_RB_WPTR, RB_OVERFLOW, 0); + + /* When a ring buffer overflow happen start parsing interrupt + * from the last not overwritten vector (wptr + 32). Hopefully + * this should allow us to catchup. + */ + tmp = (wptr + 32) & adev->irq.ih.ptr_mask; + dev_warn(adev->dev, "IH ring buffer overflow (0x%08X, 0x%08X, 0x%08X)\n", + wptr, adev->irq.ih.rptr, tmp); + adev->irq.ih.rptr = tmp; + + tmp = RREG32(SOC15_REG_OFFSET(OSSSYS, 0, mmIH_RB_CNTL)); + tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, WPTR_OVERFLOW_CLEAR, 1); + WREG32(SOC15_REG_OFFSET(OSSSYS, 0, mmIH_RB_CNTL), tmp); + } + return (wptr & adev->irq.ih.ptr_mask); +} + +/** + * vega10_ih_decode_iv - decode an interrupt vector + * + * @adev: amdgpu_device pointer + * + * Decodes the interrupt vector at the current rptr + * position and also advance the position. + */ +static void vega10_ih_decode_iv(struct amdgpu_device *adev, + struct amdgpu_iv_entry *entry) +{ + /* wptr/rptr are in bytes! */ + u32 ring_index = adev->irq.ih.rptr >> 2; + uint32_t dw[8]; + + dw[0] = le32_to_cpu(adev->irq.ih.ring[ring_index + 0]); + dw[1] = le32_to_cpu(adev->irq.ih.ring[ring_index + 1]); + dw[2] = le32_to_cpu(adev->irq.ih.ring[ring_index + 2]); + dw[3] = le32_to_cpu(adev->irq.ih.ring[ring_index + 3]); + dw[4] = le32_to_cpu(adev->irq.ih.ring[ring_index + 4]); + dw[5] = le32_to_cpu(adev->irq.ih.ring[ring_index + 5]); + dw[6] = le32_to_cpu(adev->irq.ih.ring[ring_index + 6]); + dw[7] = le32_to_cpu(adev->irq.ih.ring[ring_index + 7]); + + entry->client_id = dw[0] & 0xff; + entry->src_id = (dw[0] >> 8) & 0xff; + entry->ring_id = (dw[0] >> 16) & 0xff; + entry->vm_id = (dw[0] >> 24) & 0xf; + entry->vm_id_src = (dw[0] >> 31); + entry->timestamp = dw[1] | ((u64)(dw[2] & 0xffff) << 32); + entry->timestamp_src = dw[2] >> 31; + entry->pas_id = dw[3] & 0xffff; + entry->pasid_src = dw[3] >> 31; + entry->src_data[0] = dw[4]; + entry->src_data[1] = dw[5]; + entry->src_data[2] = dw[6]; + entry->src_data[3] = dw[7]; + + + /* wptr/rptr are in bytes! */ + adev->irq.ih.rptr += 32; +} + +/** + * vega10_ih_set_rptr - set the IH ring buffer rptr + * + * @adev: amdgpu_device pointer + * + * Set the IH ring buffer rptr. + */ +static void vega10_ih_set_rptr(struct amdgpu_device *adev) +{ + if (adev->irq.ih.use_doorbell) { + /* XXX check if swapping is necessary on BE */ + if (adev->irq.ih.use_bus_addr) + adev->irq.ih.ring[adev->irq.ih.rptr_offs] = adev->irq.ih.rptr; + else + adev->wb.wb[adev->irq.ih.rptr_offs] = adev->irq.ih.rptr; + WDOORBELL32(adev->irq.ih.doorbell_index, adev->irq.ih.rptr); + } else { + WREG32(SOC15_REG_OFFSET(OSSSYS, 0, mmIH_RB_RPTR), adev->irq.ih.rptr); + } +} + +static int vega10_ih_early_init(void *handle) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + + vega10_ih_set_interrupt_funcs(adev); + return 0; +} + +static int vega10_ih_sw_init(void *handle) +{ + int r; + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + + r = amdgpu_ih_ring_init(adev, 256 * 1024, true); + if (r) + return r; + + adev->irq.ih.use_doorbell = true; + adev->irq.ih.doorbell_index = AMDGPU_DOORBELL64_IH << 1; + + r = amdgpu_irq_init(adev); + + return r; +} + +static int vega10_ih_sw_fini(void *handle) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + + amdgpu_irq_fini(adev); + amdgpu_ih_ring_fini(adev); + + return 0; +} + +static int vega10_ih_hw_init(void *handle) +{ + int r; + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + + r = vega10_ih_irq_init(adev); + if (r) + return r; + + return 0; +} + +static int vega10_ih_hw_fini(void *handle) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + + vega10_ih_irq_disable(adev); + + return 0; +} + +static int vega10_ih_suspend(void *handle) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + + return vega10_ih_hw_fini(adev); +} + +static int vega10_ih_resume(void *handle) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + + return vega10_ih_hw_init(adev); +} + +static bool vega10_ih_is_idle(void *handle) +{ + /* todo */ + return true; +} + +static int vega10_ih_wait_for_idle(void *handle) +{ + /* todo */ + return -ETIMEDOUT; +} + +static int vega10_ih_soft_reset(void *handle) +{ + /* todo */ + + return 0; +} + +static int vega10_ih_set_clockgating_state(void *handle, + enum amd_clockgating_state state) +{ + return 0; +} + +static int vega10_ih_set_powergating_state(void *handle, + enum amd_powergating_state state) +{ + return 0; +} + +const struct amd_ip_funcs vega10_ih_ip_funcs = { + .name = "vega10_ih", + .early_init = vega10_ih_early_init, + .late_init = NULL, + .sw_init = vega10_ih_sw_init, + .sw_fini = vega10_ih_sw_fini, + .hw_init = vega10_ih_hw_init, + .hw_fini = vega10_ih_hw_fini, + .suspend = vega10_ih_suspend, + .resume = vega10_ih_resume, + .is_idle = vega10_ih_is_idle, + .wait_for_idle = vega10_ih_wait_for_idle, + .soft_reset = vega10_ih_soft_reset, + .set_clockgating_state = vega10_ih_set_clockgating_state, + .set_powergating_state = vega10_ih_set_powergating_state, +}; + +static const struct amdgpu_ih_funcs vega10_ih_funcs = { + .get_wptr = vega10_ih_get_wptr, + .decode_iv = vega10_ih_decode_iv, + .set_rptr = vega10_ih_set_rptr +}; + +static void vega10_ih_set_interrupt_funcs(struct amdgpu_device *adev) +{ + if (adev->irq.ih_funcs == NULL) + adev->irq.ih_funcs = &vega10_ih_funcs; +} + +const struct amdgpu_ip_block_version vega10_ih_ip_block = +{ + .type = AMD_IP_BLOCK_TYPE_IH, + .major = 4, + .minor = 0, + .rev = 0, + .funcs = &vega10_ih_ip_funcs, +}; diff --git a/drivers/gpu/drm/amd/amdgpu/vega10_ih.h b/drivers/gpu/drm/amd/amdgpu/vega10_ih.h new file mode 100644 index 000000000000..82edd28b9972 --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/vega10_ih.h @@ -0,0 +1,30 @@ +/* + * Copyright 2016 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#ifndef __VEGA10_IH_H__ +#define __VEGA10_IH_H__ + +extern const struct amd_ip_funcs vega10_ih_ip_funcs; +extern const struct amdgpu_ip_block_version vega10_ih_ip_block; + +#endif diff --git a/drivers/gpu/drm/amd/amdgpu/vega10_sdma_pkt_open.h b/drivers/gpu/drm/amd/amdgpu/vega10_sdma_pkt_open.h new file mode 100644 index 000000000000..8de4ccce5e38 --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/vega10_sdma_pkt_open.h @@ -0,0 +1,3335 @@ +/* + * Copyright (C) 2016 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN + * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#ifndef __VEGA10_SDMA_PKT_OPEN_H_ +#define __VEGA10_SDMA_PKT_OPEN_H_ + +#define SDMA_OP_NOP 0 +#define SDMA_OP_COPY 1 +#define SDMA_OP_WRITE 2 +#define SDMA_OP_INDIRECT 4 +#define SDMA_OP_FENCE 5 +#define SDMA_OP_TRAP 6 +#define SDMA_OP_SEM 7 +#define SDMA_OP_POLL_REGMEM 8 +#define SDMA_OP_COND_EXE 9 +#define SDMA_OP_ATOMIC 10 +#define SDMA_OP_CONST_FILL 11 +#define SDMA_OP_PTEPDE 12 +#define SDMA_OP_TIMESTAMP 13 +#define SDMA_OP_SRBM_WRITE 14 +#define SDMA_OP_PRE_EXE 15 +#define SDMA_OP_DUMMY_TRAP 16 +#define SDMA_SUBOP_TIMESTAMP_SET 0 +#define SDMA_SUBOP_TIMESTAMP_GET 1 +#define SDMA_SUBOP_TIMESTAMP_GET_GLOBAL 2 +#define SDMA_SUBOP_COPY_LINEAR 0 +#define SDMA_SUBOP_COPY_LINEAR_SUB_WIND 4 +#define SDMA_SUBOP_COPY_TILED 1 +#define SDMA_SUBOP_COPY_TILED_SUB_WIND 5 +#define SDMA_SUBOP_COPY_T2T_SUB_WIND 6 +#define SDMA_SUBOP_COPY_SOA 3 +#define SDMA_SUBOP_COPY_DIRTY_PAGE 7 +#define SDMA_SUBOP_COPY_LINEAR_PHY 8 +#define SDMA_SUBOP_WRITE_LINEAR 0 +#define SDMA_SUBOP_WRITE_TILED 1 +#define SDMA_SUBOP_PTEPDE_GEN 0 +#define SDMA_SUBOP_PTEPDE_COPY 1 +#define SDMA_SUBOP_PTEPDE_RMW 2 +#define SDMA_SUBOP_PTEPDE_COPY_BACKWARDS 3 +#define SDMA_SUBOP_DATA_FILL_MULTI 1 +#define SDMA_SUBOP_POLL_REG_WRITE_MEM 1 +#define SDMA_SUBOP_POLL_DBIT_WRITE_MEM 2 +#define SDMA_SUBOP_POLL_MEM_VERIFY 3 +#define HEADER_AGENT_DISPATCH 4 +#define HEADER_BARRIER 5 +#define SDMA_OP_AQL_COPY 0 +#define SDMA_OP_AQL_BARRIER_OR 0 + +/*define for op field*/ +#define SDMA_PKT_HEADER_op_offset 0 +#define SDMA_PKT_HEADER_op_mask 0x000000FF +#define SDMA_PKT_HEADER_op_shift 0 +#define SDMA_PKT_HEADER_OP(x) (((x) & SDMA_PKT_HEADER_op_mask) << SDMA_PKT_HEADER_op_shift) + +/*define for sub_op field*/ +#define SDMA_PKT_HEADER_sub_op_offset 0 +#define SDMA_PKT_HEADER_sub_op_mask 0x000000FF +#define SDMA_PKT_HEADER_sub_op_shift 8 +#define SDMA_PKT_HEADER_SUB_OP(x) (((x) & SDMA_PKT_HEADER_sub_op_mask) << SDMA_PKT_HEADER_sub_op_shift) + + +/* +** Definitions for SDMA_PKT_COPY_LINEAR packet +*/ + +/*define for HEADER word*/ +/*define for op field*/ +#define SDMA_PKT_COPY_LINEAR_HEADER_op_offset 0 +#define SDMA_PKT_COPY_LINEAR_HEADER_op_mask 0x000000FF +#define SDMA_PKT_COPY_LINEAR_HEADER_op_shift 0 +#define SDMA_PKT_COPY_LINEAR_HEADER_OP(x) (((x) & SDMA_PKT_COPY_LINEAR_HEADER_op_mask) << SDMA_PKT_COPY_LINEAR_HEADER_op_shift) + +/*define for sub_op field*/ +#define SDMA_PKT_COPY_LINEAR_HEADER_sub_op_offset 0 +#define SDMA_PKT_COPY_LINEAR_HEADER_sub_op_mask 0x000000FF +#define SDMA_PKT_COPY_LINEAR_HEADER_sub_op_shift 8 +#define SDMA_PKT_COPY_LINEAR_HEADER_SUB_OP(x) (((x) & SDMA_PKT_COPY_LINEAR_HEADER_sub_op_mask) << SDMA_PKT_COPY_LINEAR_HEADER_sub_op_shift) + +/*define for encrypt field*/ +#define SDMA_PKT_COPY_LINEAR_HEADER_encrypt_offset 0 +#define SDMA_PKT_COPY_LINEAR_HEADER_encrypt_mask 0x00000001 +#define SDMA_PKT_COPY_LINEAR_HEADER_encrypt_shift 16 +#define SDMA_PKT_COPY_LINEAR_HEADER_ENCRYPT(x) (((x) & SDMA_PKT_COPY_LINEAR_HEADER_encrypt_mask) << SDMA_PKT_COPY_LINEAR_HEADER_encrypt_shift) + +/*define for tmz field*/ +#define SDMA_PKT_COPY_LINEAR_HEADER_tmz_offset 0 +#define SDMA_PKT_COPY_LINEAR_HEADER_tmz_mask 0x00000001 +#define SDMA_PKT_COPY_LINEAR_HEADER_tmz_shift 18 +#define SDMA_PKT_COPY_LINEAR_HEADER_TMZ(x) (((x) & SDMA_PKT_COPY_LINEAR_HEADER_tmz_mask) << SDMA_PKT_COPY_LINEAR_HEADER_tmz_shift) + +/*define for broadcast field*/ +#define SDMA_PKT_COPY_LINEAR_HEADER_broadcast_offset 0 +#define SDMA_PKT_COPY_LINEAR_HEADER_broadcast_mask 0x00000001 +#define SDMA_PKT_COPY_LINEAR_HEADER_broadcast_shift 27 +#define SDMA_PKT_COPY_LINEAR_HEADER_BROADCAST(x) (((x) & SDMA_PKT_COPY_LINEAR_HEADER_broadcast_mask) << SDMA_PKT_COPY_LINEAR_HEADER_broadcast_shift) + +/*define for COUNT word*/ +/*define for count field*/ +#define SDMA_PKT_COPY_LINEAR_COUNT_count_offset 1 +#define SDMA_PKT_COPY_LINEAR_COUNT_count_mask 0x003FFFFF +#define SDMA_PKT_COPY_LINEAR_COUNT_count_shift 0 +#define SDMA_PKT_COPY_LINEAR_COUNT_COUNT(x) (((x) & SDMA_PKT_COPY_LINEAR_COUNT_count_mask) << SDMA_PKT_COPY_LINEAR_COUNT_count_shift) + +/*define for PARAMETER word*/ +/*define for dst_sw field*/ +#define SDMA_PKT_COPY_LINEAR_PARAMETER_dst_sw_offset 2 +#define SDMA_PKT_COPY_LINEAR_PARAMETER_dst_sw_mask 0x00000003 +#define SDMA_PKT_COPY_LINEAR_PARAMETER_dst_sw_shift 16 +#define SDMA_PKT_COPY_LINEAR_PARAMETER_DST_SW(x) (((x) & SDMA_PKT_COPY_LINEAR_PARAMETER_dst_sw_mask) << SDMA_PKT_COPY_LINEAR_PARAMETER_dst_sw_shift) + +/*define for src_sw field*/ +#define SDMA_PKT_COPY_LINEAR_PARAMETER_src_sw_offset 2 +#define SDMA_PKT_COPY_LINEAR_PARAMETER_src_sw_mask 0x00000003 +#define SDMA_PKT_COPY_LINEAR_PARAMETER_src_sw_shift 24 +#define SDMA_PKT_COPY_LINEAR_PARAMETER_SRC_SW(x) (((x) & SDMA_PKT_COPY_LINEAR_PARAMETER_src_sw_mask) << SDMA_PKT_COPY_LINEAR_PARAMETER_src_sw_shift) + +/*define for SRC_ADDR_LO word*/ +/*define for src_addr_31_0 field*/ +#define SDMA_PKT_COPY_LINEAR_SRC_ADDR_LO_src_addr_31_0_offset 3 +#define SDMA_PKT_COPY_LINEAR_SRC_ADDR_LO_src_addr_31_0_mask 0xFFFFFFFF +#define SDMA_PKT_COPY_LINEAR_SRC_ADDR_LO_src_addr_31_0_shift 0 +#define SDMA_PKT_COPY_LINEAR_SRC_ADDR_LO_SRC_ADDR_31_0(x) (((x) & SDMA_PKT_COPY_LINEAR_SRC_ADDR_LO_src_addr_31_0_mask) << SDMA_PKT_COPY_LINEAR_SRC_ADDR_LO_src_addr_31_0_shift) + +/*define for SRC_ADDR_HI word*/ +/*define for src_addr_63_32 field*/ +#define SDMA_PKT_COPY_LINEAR_SRC_ADDR_HI_src_addr_63_32_offset 4 +#define SDMA_PKT_COPY_LINEAR_SRC_ADDR_HI_src_addr_63_32_mask 0xFFFFFFFF +#define SDMA_PKT_COPY_LINEAR_SRC_ADDR_HI_src_addr_63_32_shift 0 +#define SDMA_PKT_COPY_LINEAR_SRC_ADDR_HI_SRC_ADDR_63_32(x) (((x) & SDMA_PKT_COPY_LINEAR_SRC_ADDR_HI_src_addr_63_32_mask) << SDMA_PKT_COPY_LINEAR_SRC_ADDR_HI_src_addr_63_32_shift) + +/*define for DST_ADDR_LO word*/ +/*define for dst_addr_31_0 field*/ +#define SDMA_PKT_COPY_LINEAR_DST_ADDR_LO_dst_addr_31_0_offset 5 +#define SDMA_PKT_COPY_LINEAR_DST_ADDR_LO_dst_addr_31_0_mask 0xFFFFFFFF +#define SDMA_PKT_COPY_LINEAR_DST_ADDR_LO_dst_addr_31_0_shift 0 +#define SDMA_PKT_COPY_LINEAR_DST_ADDR_LO_DST_ADDR_31_0(x) (((x) & SDMA_PKT_COPY_LINEAR_DST_ADDR_LO_dst_addr_31_0_mask) << SDMA_PKT_COPY_LINEAR_DST_ADDR_LO_dst_addr_31_0_shift) + +/*define for DST_ADDR_HI word*/ +/*define for dst_addr_63_32 field*/ +#define SDMA_PKT_COPY_LINEAR_DST_ADDR_HI_dst_addr_63_32_offset 6 +#define SDMA_PKT_COPY_LINEAR_DST_ADDR_HI_dst_addr_63_32_mask 0xFFFFFFFF +#define SDMA_PKT_COPY_LINEAR_DST_ADDR_HI_dst_addr_63_32_shift 0 +#define SDMA_PKT_COPY_LINEAR_DST_ADDR_HI_DST_ADDR_63_32(x) (((x) & SDMA_PKT_COPY_LINEAR_DST_ADDR_HI_dst_addr_63_32_mask) << SDMA_PKT_COPY_LINEAR_DST_ADDR_HI_dst_addr_63_32_shift) + + +/* +** Definitions for SDMA_PKT_COPY_DIRTY_PAGE packet +*/ + +/*define for HEADER word*/ +/*define for op field*/ +#define SDMA_PKT_COPY_DIRTY_PAGE_HEADER_op_offset 0 +#define SDMA_PKT_COPY_DIRTY_PAGE_HEADER_op_mask 0x000000FF +#define SDMA_PKT_COPY_DIRTY_PAGE_HEADER_op_shift 0 +#define SDMA_PKT_COPY_DIRTY_PAGE_HEADER_OP(x) (((x) & SDMA_PKT_COPY_DIRTY_PAGE_HEADER_op_mask) << SDMA_PKT_COPY_DIRTY_PAGE_HEADER_op_shift) + +/*define for sub_op field*/ +#define SDMA_PKT_COPY_DIRTY_PAGE_HEADER_sub_op_offset 0 +#define SDMA_PKT_COPY_DIRTY_PAGE_HEADER_sub_op_mask 0x000000FF +#define SDMA_PKT_COPY_DIRTY_PAGE_HEADER_sub_op_shift 8 +#define SDMA_PKT_COPY_DIRTY_PAGE_HEADER_SUB_OP(x) (((x) & SDMA_PKT_COPY_DIRTY_PAGE_HEADER_sub_op_mask) << SDMA_PKT_COPY_DIRTY_PAGE_HEADER_sub_op_shift) + +/*define for tmz field*/ +#define SDMA_PKT_COPY_DIRTY_PAGE_HEADER_tmz_offset 0 +#define SDMA_PKT_COPY_DIRTY_PAGE_HEADER_tmz_mask 0x00000001 +#define SDMA_PKT_COPY_DIRTY_PAGE_HEADER_tmz_shift 18 +#define SDMA_PKT_COPY_DIRTY_PAGE_HEADER_TMZ(x) (((x) & SDMA_PKT_COPY_DIRTY_PAGE_HEADER_tmz_mask) << SDMA_PKT_COPY_DIRTY_PAGE_HEADER_tmz_shift) + +/*define for all field*/ +#define SDMA_PKT_COPY_DIRTY_PAGE_HEADER_all_offset 0 +#define SDMA_PKT_COPY_DIRTY_PAGE_HEADER_all_mask 0x00000001 +#define SDMA_PKT_COPY_DIRTY_PAGE_HEADER_all_shift 31 +#define SDMA_PKT_COPY_DIRTY_PAGE_HEADER_ALL(x) (((x) & SDMA_PKT_COPY_DIRTY_PAGE_HEADER_all_mask) << SDMA_PKT_COPY_DIRTY_PAGE_HEADER_all_shift) + +/*define for COUNT word*/ +/*define for count field*/ +#define SDMA_PKT_COPY_DIRTY_PAGE_COUNT_count_offset 1 +#define SDMA_PKT_COPY_DIRTY_PAGE_COUNT_count_mask 0x003FFFFF +#define SDMA_PKT_COPY_DIRTY_PAGE_COUNT_count_shift 0 +#define SDMA_PKT_COPY_DIRTY_PAGE_COUNT_COUNT(x) (((x) & SDMA_PKT_COPY_DIRTY_PAGE_COUNT_count_mask) << SDMA_PKT_COPY_DIRTY_PAGE_COUNT_count_shift) + +/*define for PARAMETER word*/ +/*define for dst_sw field*/ +#define SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_dst_sw_offset 2 +#define SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_dst_sw_mask 0x00000003 +#define SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_dst_sw_shift 16 +#define SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_DST_SW(x) (((x) & SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_dst_sw_mask) << SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_dst_sw_shift) + +/*define for dst_gcc field*/ +#define SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_dst_gcc_offset 2 +#define SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_dst_gcc_mask 0x00000001 +#define SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_dst_gcc_shift 19 +#define SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_DST_GCC(x) (((x) & SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_dst_gcc_mask) << SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_dst_gcc_shift) + +/*define for dst_sys field*/ +#define SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_dst_sys_offset 2 +#define SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_dst_sys_mask 0x00000001 +#define SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_dst_sys_shift 20 +#define SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_DST_SYS(x) (((x) & SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_dst_sys_mask) << SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_dst_sys_shift) + +/*define for dst_snoop field*/ +#define SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_dst_snoop_offset 2 +#define SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_dst_snoop_mask 0x00000001 +#define SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_dst_snoop_shift 22 +#define SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_DST_SNOOP(x) (((x) & SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_dst_snoop_mask) << SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_dst_snoop_shift) + +/*define for dst_gpa field*/ +#define SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_dst_gpa_offset 2 +#define SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_dst_gpa_mask 0x00000001 +#define SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_dst_gpa_shift 23 +#define SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_DST_GPA(x) (((x) & SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_dst_gpa_mask) << SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_dst_gpa_shift) + +/*define for src_sw field*/ +#define SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_src_sw_offset 2 +#define SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_src_sw_mask 0x00000003 +#define SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_src_sw_shift 24 +#define SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_SRC_SW(x) (((x) & SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_src_sw_mask) << SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_src_sw_shift) + +/*define for src_sys field*/ +#define SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_src_sys_offset 2 +#define SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_src_sys_mask 0x00000001 +#define SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_src_sys_shift 28 +#define SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_SRC_SYS(x) (((x) & SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_src_sys_mask) << SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_src_sys_shift) + +/*define for src_snoop field*/ +#define SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_src_snoop_offset 2 +#define SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_src_snoop_mask 0x00000001 +#define SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_src_snoop_shift 30 +#define SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_SRC_SNOOP(x) (((x) & SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_src_snoop_mask) << SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_src_snoop_shift) + +/*define for src_gpa field*/ +#define SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_src_gpa_offset 2 +#define SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_src_gpa_mask 0x00000001 +#define SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_src_gpa_shift 31 +#define SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_SRC_GPA(x) (((x) & SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_src_gpa_mask) << SDMA_PKT_COPY_DIRTY_PAGE_PARAMETER_src_gpa_shift) + +/*define for SRC_ADDR_LO word*/ +/*define for src_addr_31_0 field*/ +#define SDMA_PKT_COPY_DIRTY_PAGE_SRC_ADDR_LO_src_addr_31_0_offset 3 +#define SDMA_PKT_COPY_DIRTY_PAGE_SRC_ADDR_LO_src_addr_31_0_mask 0xFFFFFFFF +#define SDMA_PKT_COPY_DIRTY_PAGE_SRC_ADDR_LO_src_addr_31_0_shift 0 +#define SDMA_PKT_COPY_DIRTY_PAGE_SRC_ADDR_LO_SRC_ADDR_31_0(x) (((x) & SDMA_PKT_COPY_DIRTY_PAGE_SRC_ADDR_LO_src_addr_31_0_mask) << SDMA_PKT_COPY_DIRTY_PAGE_SRC_ADDR_LO_src_addr_31_0_shift) + +/*define for SRC_ADDR_HI word*/ +/*define for src_addr_63_32 field*/ +#define SDMA_PKT_COPY_DIRTY_PAGE_SRC_ADDR_HI_src_addr_63_32_offset 4 +#define SDMA_PKT_COPY_DIRTY_PAGE_SRC_ADDR_HI_src_addr_63_32_mask 0xFFFFFFFF +#define SDMA_PKT_COPY_DIRTY_PAGE_SRC_ADDR_HI_src_addr_63_32_shift 0 +#define SDMA_PKT_COPY_DIRTY_PAGE_SRC_ADDR_HI_SRC_ADDR_63_32(x) (((x) & SDMA_PKT_COPY_DIRTY_PAGE_SRC_ADDR_HI_src_addr_63_32_mask) << SDMA_PKT_COPY_DIRTY_PAGE_SRC_ADDR_HI_src_addr_63_32_shift) + +/*define for DST_ADDR_LO word*/ +/*define for dst_addr_31_0 field*/ +#define SDMA_PKT_COPY_DIRTY_PAGE_DST_ADDR_LO_dst_addr_31_0_offset 5 +#define SDMA_PKT_COPY_DIRTY_PAGE_DST_ADDR_LO_dst_addr_31_0_mask 0xFFFFFFFF +#define SDMA_PKT_COPY_DIRTY_PAGE_DST_ADDR_LO_dst_addr_31_0_shift 0 +#define SDMA_PKT_COPY_DIRTY_PAGE_DST_ADDR_LO_DST_ADDR_31_0(x) (((x) & SDMA_PKT_COPY_DIRTY_PAGE_DST_ADDR_LO_dst_addr_31_0_mask) << SDMA_PKT_COPY_DIRTY_PAGE_DST_ADDR_LO_dst_addr_31_0_shift) + +/*define for DST_ADDR_HI word*/ +/*define for dst_addr_63_32 field*/ +#define SDMA_PKT_COPY_DIRTY_PAGE_DST_ADDR_HI_dst_addr_63_32_offset 6 +#define SDMA_PKT_COPY_DIRTY_PAGE_DST_ADDR_HI_dst_addr_63_32_mask 0xFFFFFFFF +#define SDMA_PKT_COPY_DIRTY_PAGE_DST_ADDR_HI_dst_addr_63_32_shift 0 +#define SDMA_PKT_COPY_DIRTY_PAGE_DST_ADDR_HI_DST_ADDR_63_32(x) (((x) & SDMA_PKT_COPY_DIRTY_PAGE_DST_ADDR_HI_dst_addr_63_32_mask) << SDMA_PKT_COPY_DIRTY_PAGE_DST_ADDR_HI_dst_addr_63_32_shift) + + +/* +** Definitions for SDMA_PKT_COPY_PHYSICAL_LINEAR packet +*/ + +/*define for HEADER word*/ +/*define for op field*/ +#define SDMA_PKT_COPY_PHYSICAL_LINEAR_HEADER_op_offset 0 +#define SDMA_PKT_COPY_PHYSICAL_LINEAR_HEADER_op_mask 0x000000FF +#define SDMA_PKT_COPY_PHYSICAL_LINEAR_HEADER_op_shift 0 +#define SDMA_PKT_COPY_PHYSICAL_LINEAR_HEADER_OP(x) (((x) & SDMA_PKT_COPY_PHYSICAL_LINEAR_HEADER_op_mask) << SDMA_PKT_COPY_PHYSICAL_LINEAR_HEADER_op_shift) + +/*define for sub_op field*/ +#define SDMA_PKT_COPY_PHYSICAL_LINEAR_HEADER_sub_op_offset 0 +#define SDMA_PKT_COPY_PHYSICAL_LINEAR_HEADER_sub_op_mask 0x000000FF +#define SDMA_PKT_COPY_PHYSICAL_LINEAR_HEADER_sub_op_shift 8 +#define SDMA_PKT_COPY_PHYSICAL_LINEAR_HEADER_SUB_OP(x) (((x) & SDMA_PKT_COPY_PHYSICAL_LINEAR_HEADER_sub_op_mask) << SDMA_PKT_COPY_PHYSICAL_LINEAR_HEADER_sub_op_shift) + +/*define for tmz field*/ +#define SDMA_PKT_COPY_PHYSICAL_LINEAR_HEADER_tmz_offset 0 +#define SDMA_PKT_COPY_PHYSICAL_LINEAR_HEADER_tmz_mask 0x00000001 +#define SDMA_PKT_COPY_PHYSICAL_LINEAR_HEADER_tmz_shift 18 +#define SDMA_PKT_COPY_PHYSICAL_LINEAR_HEADER_TMZ(x) (((x) & SDMA_PKT_COPY_PHYSICAL_LINEAR_HEADER_tmz_mask) << SDMA_PKT_COPY_PHYSICAL_LINEAR_HEADER_tmz_shift) + +/*define for COUNT word*/ +/*define for count field*/ +#define SDMA_PKT_COPY_PHYSICAL_LINEAR_COUNT_count_offset 1 +#define SDMA_PKT_COPY_PHYSICAL_LINEAR_COUNT_count_mask 0x003FFFFF +#define SDMA_PKT_COPY_PHYSICAL_LINEAR_COUNT_count_shift 0 +#define SDMA_PKT_COPY_PHYSICAL_LINEAR_COUNT_COUNT(x) (((x) & SDMA_PKT_COPY_PHYSICAL_LINEAR_COUNT_count_mask) << SDMA_PKT_COPY_PHYSICAL_LINEAR_COUNT_count_shift) + +/*define for PARAMETER word*/ +/*define for dst_sw field*/ +#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_dst_sw_offset 2 +#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_dst_sw_mask 0x00000003 +#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_dst_sw_shift 16 +#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_DST_SW(x) (((x) & SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_dst_sw_mask) << SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_dst_sw_shift) + +/*define for dst_gcc field*/ +#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_dst_gcc_offset 2 +#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_dst_gcc_mask 0x00000001 +#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_dst_gcc_shift 19 +#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_DST_GCC(x) (((x) & SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_dst_gcc_mask) << SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_dst_gcc_shift) + +/*define for dst_sys field*/ +#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_dst_sys_offset 2 +#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_dst_sys_mask 0x00000001 +#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_dst_sys_shift 20 +#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_DST_SYS(x) (((x) & SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_dst_sys_mask) << SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_dst_sys_shift) + +/*define for dst_log field*/ +#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_dst_log_offset 2 +#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_dst_log_mask 0x00000001 +#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_dst_log_shift 21 +#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_DST_LOG(x) (((x) & SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_dst_log_mask) << SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_dst_log_shift) + +/*define for dst_snoop field*/ +#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_dst_snoop_offset 2 +#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_dst_snoop_mask 0x00000001 +#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_dst_snoop_shift 22 +#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_DST_SNOOP(x) (((x) & SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_dst_snoop_mask) << SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_dst_snoop_shift) + +/*define for dst_gpa field*/ +#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_dst_gpa_offset 2 +#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_dst_gpa_mask 0x00000001 +#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_dst_gpa_shift 23 +#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_DST_GPA(x) (((x) & SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_dst_gpa_mask) << SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_dst_gpa_shift) + +/*define for src_sw field*/ +#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_src_sw_offset 2 +#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_src_sw_mask 0x00000003 +#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_src_sw_shift 24 +#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_SRC_SW(x) (((x) & SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_src_sw_mask) << SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_src_sw_shift) + +/*define for src_gcc field*/ +#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_src_gcc_offset 2 +#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_src_gcc_mask 0x00000001 +#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_src_gcc_shift 27 +#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_SRC_GCC(x) (((x) & SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_src_gcc_mask) << SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_src_gcc_shift) + +/*define for src_sys field*/ +#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_src_sys_offset 2 +#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_src_sys_mask 0x00000001 +#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_src_sys_shift 28 +#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_SRC_SYS(x) (((x) & SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_src_sys_mask) << SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_src_sys_shift) + +/*define for src_snoop field*/ +#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_src_snoop_offset 2 +#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_src_snoop_mask 0x00000001 +#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_src_snoop_shift 30 +#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_SRC_SNOOP(x) (((x) & SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_src_snoop_mask) << SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_src_snoop_shift) + +/*define for src_gpa field*/ +#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_src_gpa_offset 2 +#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_src_gpa_mask 0x00000001 +#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_src_gpa_shift 31 +#define SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_SRC_GPA(x) (((x) & SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_src_gpa_mask) << SDMA_PKT_COPY_PHYSICAL_LINEAR_PARAMETER_src_gpa_shift) + +/*define for SRC_ADDR_LO word*/ +/*define for src_addr_31_0 field*/ +#define SDMA_PKT_COPY_PHYSICAL_LINEAR_SRC_ADDR_LO_src_addr_31_0_offset 3 +#define SDMA_PKT_COPY_PHYSICAL_LINEAR_SRC_ADDR_LO_src_addr_31_0_mask 0xFFFFFFFF +#define SDMA_PKT_COPY_PHYSICAL_LINEAR_SRC_ADDR_LO_src_addr_31_0_shift 0 +#define SDMA_PKT_COPY_PHYSICAL_LINEAR_SRC_ADDR_LO_SRC_ADDR_31_0(x) (((x) & SDMA_PKT_COPY_PHYSICAL_LINEAR_SRC_ADDR_LO_src_addr_31_0_mask) << SDMA_PKT_COPY_PHYSICAL_LINEAR_SRC_ADDR_LO_src_addr_31_0_shift) + +/*define for SRC_ADDR_HI word*/ +/*define for src_addr_63_32 field*/ +#define SDMA_PKT_COPY_PHYSICAL_LINEAR_SRC_ADDR_HI_src_addr_63_32_offset 4 +#define SDMA_PKT_COPY_PHYSICAL_LINEAR_SRC_ADDR_HI_src_addr_63_32_mask 0xFFFFFFFF +#define SDMA_PKT_COPY_PHYSICAL_LINEAR_SRC_ADDR_HI_src_addr_63_32_shift 0 +#define SDMA_PKT_COPY_PHYSICAL_LINEAR_SRC_ADDR_HI_SRC_ADDR_63_32(x) (((x) & SDMA_PKT_COPY_PHYSICAL_LINEAR_SRC_ADDR_HI_src_addr_63_32_mask) << SDMA_PKT_COPY_PHYSICAL_LINEAR_SRC_ADDR_HI_src_addr_63_32_shift) + +/*define for DST_ADDR_LO word*/ +/*define for dst_addr_31_0 field*/ +#define SDMA_PKT_COPY_PHYSICAL_LINEAR_DST_ADDR_LO_dst_addr_31_0_offset 5 +#define SDMA_PKT_COPY_PHYSICAL_LINEAR_DST_ADDR_LO_dst_addr_31_0_mask 0xFFFFFFFF +#define SDMA_PKT_COPY_PHYSICAL_LINEAR_DST_ADDR_LO_dst_addr_31_0_shift 0 +#define SDMA_PKT_COPY_PHYSICAL_LINEAR_DST_ADDR_LO_DST_ADDR_31_0(x) (((x) & SDMA_PKT_COPY_PHYSICAL_LINEAR_DST_ADDR_LO_dst_addr_31_0_mask) << SDMA_PKT_COPY_PHYSICAL_LINEAR_DST_ADDR_LO_dst_addr_31_0_shift) + +/*define for DST_ADDR_HI word*/ +/*define for dst_addr_63_32 field*/ +#define SDMA_PKT_COPY_PHYSICAL_LINEAR_DST_ADDR_HI_dst_addr_63_32_offset 6 +#define SDMA_PKT_COPY_PHYSICAL_LINEAR_DST_ADDR_HI_dst_addr_63_32_mask 0xFFFFFFFF +#define SDMA_PKT_COPY_PHYSICAL_LINEAR_DST_ADDR_HI_dst_addr_63_32_shift 0 +#define SDMA_PKT_COPY_PHYSICAL_LINEAR_DST_ADDR_HI_DST_ADDR_63_32(x) (((x) & SDMA_PKT_COPY_PHYSICAL_LINEAR_DST_ADDR_HI_dst_addr_63_32_mask) << SDMA_PKT_COPY_PHYSICAL_LINEAR_DST_ADDR_HI_dst_addr_63_32_shift) + + +/* +** Definitions for SDMA_PKT_COPY_BROADCAST_LINEAR packet +*/ + +/*define for HEADER word*/ +/*define for op field*/ +#define SDMA_PKT_COPY_BROADCAST_LINEAR_HEADER_op_offset 0 +#define SDMA_PKT_COPY_BROADCAST_LINEAR_HEADER_op_mask 0x000000FF +#define SDMA_PKT_COPY_BROADCAST_LINEAR_HEADER_op_shift 0 +#define SDMA_PKT_COPY_BROADCAST_LINEAR_HEADER_OP(x) (((x) & SDMA_PKT_COPY_BROADCAST_LINEAR_HEADER_op_mask) << SDMA_PKT_COPY_BROADCAST_LINEAR_HEADER_op_shift) + +/*define for sub_op field*/ +#define SDMA_PKT_COPY_BROADCAST_LINEAR_HEADER_sub_op_offset 0 +#define SDMA_PKT_COPY_BROADCAST_LINEAR_HEADER_sub_op_mask 0x000000FF +#define SDMA_PKT_COPY_BROADCAST_LINEAR_HEADER_sub_op_shift 8 +#define SDMA_PKT_COPY_BROADCAST_LINEAR_HEADER_SUB_OP(x) (((x) & SDMA_PKT_COPY_BROADCAST_LINEAR_HEADER_sub_op_mask) << SDMA_PKT_COPY_BROADCAST_LINEAR_HEADER_sub_op_shift) + +/*define for encrypt field*/ +#define SDMA_PKT_COPY_BROADCAST_LINEAR_HEADER_encrypt_offset 0 +#define SDMA_PKT_COPY_BROADCAST_LINEAR_HEADER_encrypt_mask 0x00000001 +#define SDMA_PKT_COPY_BROADCAST_LINEAR_HEADER_encrypt_shift 16 +#define SDMA_PKT_COPY_BROADCAST_LINEAR_HEADER_ENCRYPT(x) (((x) & SDMA_PKT_COPY_BROADCAST_LINEAR_HEADER_encrypt_mask) << SDMA_PKT_COPY_BROADCAST_LINEAR_HEADER_encrypt_shift) + +/*define for tmz field*/ +#define SDMA_PKT_COPY_BROADCAST_LINEAR_HEADER_tmz_offset 0 +#define SDMA_PKT_COPY_BROADCAST_LINEAR_HEADER_tmz_mask 0x00000001 +#define SDMA_PKT_COPY_BROADCAST_LINEAR_HEADER_tmz_shift 18 +#define SDMA_PKT_COPY_BROADCAST_LINEAR_HEADER_TMZ(x) (((x) & SDMA_PKT_COPY_BROADCAST_LINEAR_HEADER_tmz_mask) << SDMA_PKT_COPY_BROADCAST_LINEAR_HEADER_tmz_shift) + +/*define for broadcast field*/ +#define SDMA_PKT_COPY_BROADCAST_LINEAR_HEADER_broadcast_offset 0 +#define SDMA_PKT_COPY_BROADCAST_LINEAR_HEADER_broadcast_mask 0x00000001 +#define SDMA_PKT_COPY_BROADCAST_LINEAR_HEADER_broadcast_shift 27 +#define SDMA_PKT_COPY_BROADCAST_LINEAR_HEADER_BROADCAST(x) (((x) & SDMA_PKT_COPY_BROADCAST_LINEAR_HEADER_broadcast_mask) << SDMA_PKT_COPY_BROADCAST_LINEAR_HEADER_broadcast_shift) + +/*define for COUNT word*/ +/*define for count field*/ +#define SDMA_PKT_COPY_BROADCAST_LINEAR_COUNT_count_offset 1 +#define SDMA_PKT_COPY_BROADCAST_LINEAR_COUNT_count_mask 0x003FFFFF +#define SDMA_PKT_COPY_BROADCAST_LINEAR_COUNT_count_shift 0 +#define SDMA_PKT_COPY_BROADCAST_LINEAR_COUNT_COUNT(x) (((x) & SDMA_PKT_COPY_BROADCAST_LINEAR_COUNT_count_mask) << SDMA_PKT_COPY_BROADCAST_LINEAR_COUNT_count_shift) + +/*define for PARAMETER word*/ +/*define for dst2_sw field*/ +#define SDMA_PKT_COPY_BROADCAST_LINEAR_PARAMETER_dst2_sw_offset 2 +#define SDMA_PKT_COPY_BROADCAST_LINEAR_PARAMETER_dst2_sw_mask 0x00000003 +#define SDMA_PKT_COPY_BROADCAST_LINEAR_PARAMETER_dst2_sw_shift 8 +#define SDMA_PKT_COPY_BROADCAST_LINEAR_PARAMETER_DST2_SW(x) (((x) & SDMA_PKT_COPY_BROADCAST_LINEAR_PARAMETER_dst2_sw_mask) << SDMA_PKT_COPY_BROADCAST_LINEAR_PARAMETER_dst2_sw_shift) + +/*define for dst1_sw field*/ +#define SDMA_PKT_COPY_BROADCAST_LINEAR_PARAMETER_dst1_sw_offset 2 +#define SDMA_PKT_COPY_BROADCAST_LINEAR_PARAMETER_dst1_sw_mask 0x00000003 +#define SDMA_PKT_COPY_BROADCAST_LINEAR_PARAMETER_dst1_sw_shift 16 +#define SDMA_PKT_COPY_BROADCAST_LINEAR_PARAMETER_DST1_SW(x) (((x) & SDMA_PKT_COPY_BROADCAST_LINEAR_PARAMETER_dst1_sw_mask) << SDMA_PKT_COPY_BROADCAST_LINEAR_PARAMETER_dst1_sw_shift) + +/*define for src_sw field*/ +#define SDMA_PKT_COPY_BROADCAST_LINEAR_PARAMETER_src_sw_offset 2 +#define SDMA_PKT_COPY_BROADCAST_LINEAR_PARAMETER_src_sw_mask 0x00000003 +#define SDMA_PKT_COPY_BROADCAST_LINEAR_PARAMETER_src_sw_shift 24 +#define SDMA_PKT_COPY_BROADCAST_LINEAR_PARAMETER_SRC_SW(x) (((x) & SDMA_PKT_COPY_BROADCAST_LINEAR_PARAMETER_src_sw_mask) << SDMA_PKT_COPY_BROADCAST_LINEAR_PARAMETER_src_sw_shift) + +/*define for SRC_ADDR_LO word*/ +/*define for src_addr_31_0 field*/ +#define SDMA_PKT_COPY_BROADCAST_LINEAR_SRC_ADDR_LO_src_addr_31_0_offset 3 +#define SDMA_PKT_COPY_BROADCAST_LINEAR_SRC_ADDR_LO_src_addr_31_0_mask 0xFFFFFFFF +#define SDMA_PKT_COPY_BROADCAST_LINEAR_SRC_ADDR_LO_src_addr_31_0_shift 0 +#define SDMA_PKT_COPY_BROADCAST_LINEAR_SRC_ADDR_LO_SRC_ADDR_31_0(x) (((x) & SDMA_PKT_COPY_BROADCAST_LINEAR_SRC_ADDR_LO_src_addr_31_0_mask) << SDMA_PKT_COPY_BROADCAST_LINEAR_SRC_ADDR_LO_src_addr_31_0_shift) + +/*define for SRC_ADDR_HI word*/ +/*define for src_addr_63_32 field*/ +#define SDMA_PKT_COPY_BROADCAST_LINEAR_SRC_ADDR_HI_src_addr_63_32_offset 4 +#define SDMA_PKT_COPY_BROADCAST_LINEAR_SRC_ADDR_HI_src_addr_63_32_mask 0xFFFFFFFF +#define SDMA_PKT_COPY_BROADCAST_LINEAR_SRC_ADDR_HI_src_addr_63_32_shift 0 +#define SDMA_PKT_COPY_BROADCAST_LINEAR_SRC_ADDR_HI_SRC_ADDR_63_32(x) (((x) & SDMA_PKT_COPY_BROADCAST_LINEAR_SRC_ADDR_HI_src_addr_63_32_mask) << SDMA_PKT_COPY_BROADCAST_LINEAR_SRC_ADDR_HI_src_addr_63_32_shift) + +/*define for DST1_ADDR_LO word*/ +/*define for dst1_addr_31_0 field*/ +#define SDMA_PKT_COPY_BROADCAST_LINEAR_DST1_ADDR_LO_dst1_addr_31_0_offset 5 +#define SDMA_PKT_COPY_BROADCAST_LINEAR_DST1_ADDR_LO_dst1_addr_31_0_mask 0xFFFFFFFF +#define SDMA_PKT_COPY_BROADCAST_LINEAR_DST1_ADDR_LO_dst1_addr_31_0_shift 0 +#define SDMA_PKT_COPY_BROADCAST_LINEAR_DST1_ADDR_LO_DST1_ADDR_31_0(x) (((x) & SDMA_PKT_COPY_BROADCAST_LINEAR_DST1_ADDR_LO_dst1_addr_31_0_mask) << SDMA_PKT_COPY_BROADCAST_LINEAR_DST1_ADDR_LO_dst1_addr_31_0_shift) + +/*define for DST1_ADDR_HI word*/ +/*define for dst1_addr_63_32 field*/ +#define SDMA_PKT_COPY_BROADCAST_LINEAR_DST1_ADDR_HI_dst1_addr_63_32_offset 6 +#define SDMA_PKT_COPY_BROADCAST_LINEAR_DST1_ADDR_HI_dst1_addr_63_32_mask 0xFFFFFFFF +#define SDMA_PKT_COPY_BROADCAST_LINEAR_DST1_ADDR_HI_dst1_addr_63_32_shift 0 +#define SDMA_PKT_COPY_BROADCAST_LINEAR_DST1_ADDR_HI_DST1_ADDR_63_32(x) (((x) & SDMA_PKT_COPY_BROADCAST_LINEAR_DST1_ADDR_HI_dst1_addr_63_32_mask) << SDMA_PKT_COPY_BROADCAST_LINEAR_DST1_ADDR_HI_dst1_addr_63_32_shift) + +/*define for DST2_ADDR_LO word*/ +/*define for dst2_addr_31_0 field*/ +#define SDMA_PKT_COPY_BROADCAST_LINEAR_DST2_ADDR_LO_dst2_addr_31_0_offset 7 +#define SDMA_PKT_COPY_BROADCAST_LINEAR_DST2_ADDR_LO_dst2_addr_31_0_mask 0xFFFFFFFF +#define SDMA_PKT_COPY_BROADCAST_LINEAR_DST2_ADDR_LO_dst2_addr_31_0_shift 0 +#define SDMA_PKT_COPY_BROADCAST_LINEAR_DST2_ADDR_LO_DST2_ADDR_31_0(x) (((x) & SDMA_PKT_COPY_BROADCAST_LINEAR_DST2_ADDR_LO_dst2_addr_31_0_mask) << SDMA_PKT_COPY_BROADCAST_LINEAR_DST2_ADDR_LO_dst2_addr_31_0_shift) + +/*define for DST2_ADDR_HI word*/ +/*define for dst2_addr_63_32 field*/ +#define SDMA_PKT_COPY_BROADCAST_LINEAR_DST2_ADDR_HI_dst2_addr_63_32_offset 8 +#define SDMA_PKT_COPY_BROADCAST_LINEAR_DST2_ADDR_HI_dst2_addr_63_32_mask 0xFFFFFFFF +#define SDMA_PKT_COPY_BROADCAST_LINEAR_DST2_ADDR_HI_dst2_addr_63_32_shift 0 +#define SDMA_PKT_COPY_BROADCAST_LINEAR_DST2_ADDR_HI_DST2_ADDR_63_32(x) (((x) & SDMA_PKT_COPY_BROADCAST_LINEAR_DST2_ADDR_HI_dst2_addr_63_32_mask) << SDMA_PKT_COPY_BROADCAST_LINEAR_DST2_ADDR_HI_dst2_addr_63_32_shift) + + +/* +** Definitions for SDMA_PKT_COPY_LINEAR_SUBWIN packet +*/ + +/*define for HEADER word*/ +/*define for op field*/ +#define SDMA_PKT_COPY_LINEAR_SUBWIN_HEADER_op_offset 0 +#define SDMA_PKT_COPY_LINEAR_SUBWIN_HEADER_op_mask 0x000000FF +#define SDMA_PKT_COPY_LINEAR_SUBWIN_HEADER_op_shift 0 +#define SDMA_PKT_COPY_LINEAR_SUBWIN_HEADER_OP(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_HEADER_op_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_HEADER_op_shift) + +/*define for sub_op field*/ +#define SDMA_PKT_COPY_LINEAR_SUBWIN_HEADER_sub_op_offset 0 +#define SDMA_PKT_COPY_LINEAR_SUBWIN_HEADER_sub_op_mask 0x000000FF +#define SDMA_PKT_COPY_LINEAR_SUBWIN_HEADER_sub_op_shift 8 +#define SDMA_PKT_COPY_LINEAR_SUBWIN_HEADER_SUB_OP(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_HEADER_sub_op_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_HEADER_sub_op_shift) + +/*define for tmz field*/ +#define SDMA_PKT_COPY_LINEAR_SUBWIN_HEADER_tmz_offset 0 +#define SDMA_PKT_COPY_LINEAR_SUBWIN_HEADER_tmz_mask 0x00000001 +#define SDMA_PKT_COPY_LINEAR_SUBWIN_HEADER_tmz_shift 18 +#define SDMA_PKT_COPY_LINEAR_SUBWIN_HEADER_TMZ(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_HEADER_tmz_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_HEADER_tmz_shift) + +/*define for elementsize field*/ +#define SDMA_PKT_COPY_LINEAR_SUBWIN_HEADER_elementsize_offset 0 +#define SDMA_PKT_COPY_LINEAR_SUBWIN_HEADER_elementsize_mask 0x00000007 +#define SDMA_PKT_COPY_LINEAR_SUBWIN_HEADER_elementsize_shift 29 +#define SDMA_PKT_COPY_LINEAR_SUBWIN_HEADER_ELEMENTSIZE(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_HEADER_elementsize_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_HEADER_elementsize_shift) + +/*define for SRC_ADDR_LO word*/ +/*define for src_addr_31_0 field*/ +#define SDMA_PKT_COPY_LINEAR_SUBWIN_SRC_ADDR_LO_src_addr_31_0_offset 1 +#define SDMA_PKT_COPY_LINEAR_SUBWIN_SRC_ADDR_LO_src_addr_31_0_mask 0xFFFFFFFF +#define SDMA_PKT_COPY_LINEAR_SUBWIN_SRC_ADDR_LO_src_addr_31_0_shift 0 +#define SDMA_PKT_COPY_LINEAR_SUBWIN_SRC_ADDR_LO_SRC_ADDR_31_0(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_SRC_ADDR_LO_src_addr_31_0_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_SRC_ADDR_LO_src_addr_31_0_shift) + +/*define for SRC_ADDR_HI word*/ +/*define for src_addr_63_32 field*/ +#define SDMA_PKT_COPY_LINEAR_SUBWIN_SRC_ADDR_HI_src_addr_63_32_offset 2 +#define SDMA_PKT_COPY_LINEAR_SUBWIN_SRC_ADDR_HI_src_addr_63_32_mask 0xFFFFFFFF +#define SDMA_PKT_COPY_LINEAR_SUBWIN_SRC_ADDR_HI_src_addr_63_32_shift 0 +#define SDMA_PKT_COPY_LINEAR_SUBWIN_SRC_ADDR_HI_SRC_ADDR_63_32(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_SRC_ADDR_HI_src_addr_63_32_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_SRC_ADDR_HI_src_addr_63_32_shift) + +/*define for DW_3 word*/ +/*define for src_x field*/ +#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_3_src_x_offset 3 +#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_3_src_x_mask 0x00003FFF +#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_3_src_x_shift 0 +#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_3_SRC_X(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_DW_3_src_x_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_DW_3_src_x_shift) + +/*define for src_y field*/ +#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_3_src_y_offset 3 +#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_3_src_y_mask 0x00003FFF +#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_3_src_y_shift 16 +#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_3_SRC_Y(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_DW_3_src_y_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_DW_3_src_y_shift) + +/*define for DW_4 word*/ +/*define for src_z field*/ +#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_4_src_z_offset 4 +#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_4_src_z_mask 0x000007FF +#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_4_src_z_shift 0 +#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_4_SRC_Z(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_DW_4_src_z_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_DW_4_src_z_shift) + +/*define for src_pitch field*/ +#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_4_src_pitch_offset 4 +#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_4_src_pitch_mask 0x0007FFFF +#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_4_src_pitch_shift 13 +#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_4_SRC_PITCH(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_DW_4_src_pitch_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_DW_4_src_pitch_shift) + +/*define for DW_5 word*/ +/*define for src_slice_pitch field*/ +#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_5_src_slice_pitch_offset 5 +#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_5_src_slice_pitch_mask 0x0FFFFFFF +#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_5_src_slice_pitch_shift 0 +#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_5_SRC_SLICE_PITCH(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_DW_5_src_slice_pitch_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_DW_5_src_slice_pitch_shift) + +/*define for DST_ADDR_LO word*/ +/*define for dst_addr_31_0 field*/ +#define SDMA_PKT_COPY_LINEAR_SUBWIN_DST_ADDR_LO_dst_addr_31_0_offset 6 +#define SDMA_PKT_COPY_LINEAR_SUBWIN_DST_ADDR_LO_dst_addr_31_0_mask 0xFFFFFFFF +#define SDMA_PKT_COPY_LINEAR_SUBWIN_DST_ADDR_LO_dst_addr_31_0_shift 0 +#define SDMA_PKT_COPY_LINEAR_SUBWIN_DST_ADDR_LO_DST_ADDR_31_0(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_DST_ADDR_LO_dst_addr_31_0_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_DST_ADDR_LO_dst_addr_31_0_shift) + +/*define for DST_ADDR_HI word*/ +/*define for dst_addr_63_32 field*/ +#define SDMA_PKT_COPY_LINEAR_SUBWIN_DST_ADDR_HI_dst_addr_63_32_offset 7 +#define SDMA_PKT_COPY_LINEAR_SUBWIN_DST_ADDR_HI_dst_addr_63_32_mask 0xFFFFFFFF +#define SDMA_PKT_COPY_LINEAR_SUBWIN_DST_ADDR_HI_dst_addr_63_32_shift 0 +#define SDMA_PKT_COPY_LINEAR_SUBWIN_DST_ADDR_HI_DST_ADDR_63_32(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_DST_ADDR_HI_dst_addr_63_32_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_DST_ADDR_HI_dst_addr_63_32_shift) + +/*define for DW_8 word*/ +/*define for dst_x field*/ +#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_8_dst_x_offset 8 +#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_8_dst_x_mask 0x00003FFF +#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_8_dst_x_shift 0 +#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_8_DST_X(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_DW_8_dst_x_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_DW_8_dst_x_shift) + +/*define for dst_y field*/ +#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_8_dst_y_offset 8 +#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_8_dst_y_mask 0x00003FFF +#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_8_dst_y_shift 16 +#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_8_DST_Y(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_DW_8_dst_y_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_DW_8_dst_y_shift) + +/*define for DW_9 word*/ +/*define for dst_z field*/ +#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_9_dst_z_offset 9 +#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_9_dst_z_mask 0x000007FF +#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_9_dst_z_shift 0 +#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_9_DST_Z(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_DW_9_dst_z_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_DW_9_dst_z_shift) + +/*define for dst_pitch field*/ +#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_9_dst_pitch_offset 9 +#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_9_dst_pitch_mask 0x0007FFFF +#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_9_dst_pitch_shift 13 +#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_9_DST_PITCH(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_DW_9_dst_pitch_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_DW_9_dst_pitch_shift) + +/*define for DW_10 word*/ +/*define for dst_slice_pitch field*/ +#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_10_dst_slice_pitch_offset 10 +#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_10_dst_slice_pitch_mask 0x0FFFFFFF +#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_10_dst_slice_pitch_shift 0 +#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_10_DST_SLICE_PITCH(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_DW_10_dst_slice_pitch_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_DW_10_dst_slice_pitch_shift) + +/*define for DW_11 word*/ +/*define for rect_x field*/ +#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_11_rect_x_offset 11 +#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_11_rect_x_mask 0x00003FFF +#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_11_rect_x_shift 0 +#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_11_RECT_X(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_DW_11_rect_x_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_DW_11_rect_x_shift) + +/*define for rect_y field*/ +#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_11_rect_y_offset 11 +#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_11_rect_y_mask 0x00003FFF +#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_11_rect_y_shift 16 +#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_11_RECT_Y(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_DW_11_rect_y_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_DW_11_rect_y_shift) + +/*define for DW_12 word*/ +/*define for rect_z field*/ +#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_12_rect_z_offset 12 +#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_12_rect_z_mask 0x000007FF +#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_12_rect_z_shift 0 +#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_12_RECT_Z(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_DW_12_rect_z_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_DW_12_rect_z_shift) + +/*define for dst_sw field*/ +#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_12_dst_sw_offset 12 +#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_12_dst_sw_mask 0x00000003 +#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_12_dst_sw_shift 16 +#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_12_DST_SW(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_DW_12_dst_sw_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_DW_12_dst_sw_shift) + +/*define for src_sw field*/ +#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_12_src_sw_offset 12 +#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_12_src_sw_mask 0x00000003 +#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_12_src_sw_shift 24 +#define SDMA_PKT_COPY_LINEAR_SUBWIN_DW_12_SRC_SW(x) (((x) & SDMA_PKT_COPY_LINEAR_SUBWIN_DW_12_src_sw_mask) << SDMA_PKT_COPY_LINEAR_SUBWIN_DW_12_src_sw_shift) + + +/* +** Definitions for SDMA_PKT_COPY_TILED packet +*/ + +/*define for HEADER word*/ +/*define for op field*/ +#define SDMA_PKT_COPY_TILED_HEADER_op_offset 0 +#define SDMA_PKT_COPY_TILED_HEADER_op_mask 0x000000FF +#define SDMA_PKT_COPY_TILED_HEADER_op_shift 0 +#define SDMA_PKT_COPY_TILED_HEADER_OP(x) (((x) & SDMA_PKT_COPY_TILED_HEADER_op_mask) << SDMA_PKT_COPY_TILED_HEADER_op_shift) + +/*define for sub_op field*/ +#define SDMA_PKT_COPY_TILED_HEADER_sub_op_offset 0 +#define SDMA_PKT_COPY_TILED_HEADER_sub_op_mask 0x000000FF +#define SDMA_PKT_COPY_TILED_HEADER_sub_op_shift 8 +#define SDMA_PKT_COPY_TILED_HEADER_SUB_OP(x) (((x) & SDMA_PKT_COPY_TILED_HEADER_sub_op_mask) << SDMA_PKT_COPY_TILED_HEADER_sub_op_shift) + +/*define for encrypt field*/ +#define SDMA_PKT_COPY_TILED_HEADER_encrypt_offset 0 +#define SDMA_PKT_COPY_TILED_HEADER_encrypt_mask 0x00000001 +#define SDMA_PKT_COPY_TILED_HEADER_encrypt_shift 16 +#define SDMA_PKT_COPY_TILED_HEADER_ENCRYPT(x) (((x) & SDMA_PKT_COPY_TILED_HEADER_encrypt_mask) << SDMA_PKT_COPY_TILED_HEADER_encrypt_shift) + +/*define for tmz field*/ +#define SDMA_PKT_COPY_TILED_HEADER_tmz_offset 0 +#define SDMA_PKT_COPY_TILED_HEADER_tmz_mask 0x00000001 +#define SDMA_PKT_COPY_TILED_HEADER_tmz_shift 18 +#define SDMA_PKT_COPY_TILED_HEADER_TMZ(x) (((x) & SDMA_PKT_COPY_TILED_HEADER_tmz_mask) << SDMA_PKT_COPY_TILED_HEADER_tmz_shift) + +/*define for mip_max field*/ +#define SDMA_PKT_COPY_TILED_HEADER_mip_max_offset 0 +#define SDMA_PKT_COPY_TILED_HEADER_mip_max_mask 0x0000000F +#define SDMA_PKT_COPY_TILED_HEADER_mip_max_shift 20 +#define SDMA_PKT_COPY_TILED_HEADER_MIP_MAX(x) (((x) & SDMA_PKT_COPY_TILED_HEADER_mip_max_mask) << SDMA_PKT_COPY_TILED_HEADER_mip_max_shift) + +/*define for detile field*/ +#define SDMA_PKT_COPY_TILED_HEADER_detile_offset 0 +#define SDMA_PKT_COPY_TILED_HEADER_detile_mask 0x00000001 +#define SDMA_PKT_COPY_TILED_HEADER_detile_shift 31 +#define SDMA_PKT_COPY_TILED_HEADER_DETILE(x) (((x) & SDMA_PKT_COPY_TILED_HEADER_detile_mask) << SDMA_PKT_COPY_TILED_HEADER_detile_shift) + +/*define for TILED_ADDR_LO word*/ +/*define for tiled_addr_31_0 field*/ +#define SDMA_PKT_COPY_TILED_TILED_ADDR_LO_tiled_addr_31_0_offset 1 +#define SDMA_PKT_COPY_TILED_TILED_ADDR_LO_tiled_addr_31_0_mask 0xFFFFFFFF +#define SDMA_PKT_COPY_TILED_TILED_ADDR_LO_tiled_addr_31_0_shift 0 +#define SDMA_PKT_COPY_TILED_TILED_ADDR_LO_TILED_ADDR_31_0(x) (((x) & SDMA_PKT_COPY_TILED_TILED_ADDR_LO_tiled_addr_31_0_mask) << SDMA_PKT_COPY_TILED_TILED_ADDR_LO_tiled_addr_31_0_shift) + +/*define for TILED_ADDR_HI word*/ +/*define for tiled_addr_63_32 field*/ +#define SDMA_PKT_COPY_TILED_TILED_ADDR_HI_tiled_addr_63_32_offset 2 +#define SDMA_PKT_COPY_TILED_TILED_ADDR_HI_tiled_addr_63_32_mask 0xFFFFFFFF +#define SDMA_PKT_COPY_TILED_TILED_ADDR_HI_tiled_addr_63_32_shift 0 +#define SDMA_PKT_COPY_TILED_TILED_ADDR_HI_TILED_ADDR_63_32(x) (((x) & SDMA_PKT_COPY_TILED_TILED_ADDR_HI_tiled_addr_63_32_mask) << SDMA_PKT_COPY_TILED_TILED_ADDR_HI_tiled_addr_63_32_shift) + +/*define for DW_3 word*/ +/*define for width field*/ +#define SDMA_PKT_COPY_TILED_DW_3_width_offset 3 +#define SDMA_PKT_COPY_TILED_DW_3_width_mask 0x00003FFF +#define SDMA_PKT_COPY_TILED_DW_3_width_shift 0 +#define SDMA_PKT_COPY_TILED_DW_3_WIDTH(x) (((x) & SDMA_PKT_COPY_TILED_DW_3_width_mask) << SDMA_PKT_COPY_TILED_DW_3_width_shift) + +/*define for DW_4 word*/ +/*define for height field*/ +#define SDMA_PKT_COPY_TILED_DW_4_height_offset 4 +#define SDMA_PKT_COPY_TILED_DW_4_height_mask 0x00003FFF +#define SDMA_PKT_COPY_TILED_DW_4_height_shift 0 +#define SDMA_PKT_COPY_TILED_DW_4_HEIGHT(x) (((x) & SDMA_PKT_COPY_TILED_DW_4_height_mask) << SDMA_PKT_COPY_TILED_DW_4_height_shift) + +/*define for depth field*/ +#define SDMA_PKT_COPY_TILED_DW_4_depth_offset 4 +#define SDMA_PKT_COPY_TILED_DW_4_depth_mask 0x000007FF +#define SDMA_PKT_COPY_TILED_DW_4_depth_shift 16 +#define SDMA_PKT_COPY_TILED_DW_4_DEPTH(x) (((x) & SDMA_PKT_COPY_TILED_DW_4_depth_mask) << SDMA_PKT_COPY_TILED_DW_4_depth_shift) + +/*define for DW_5 word*/ +/*define for element_size field*/ +#define SDMA_PKT_COPY_TILED_DW_5_element_size_offset 5 +#define SDMA_PKT_COPY_TILED_DW_5_element_size_mask 0x00000007 +#define SDMA_PKT_COPY_TILED_DW_5_element_size_shift 0 +#define SDMA_PKT_COPY_TILED_DW_5_ELEMENT_SIZE(x) (((x) & SDMA_PKT_COPY_TILED_DW_5_element_size_mask) << SDMA_PKT_COPY_TILED_DW_5_element_size_shift) + +/*define for swizzle_mode field*/ +#define SDMA_PKT_COPY_TILED_DW_5_swizzle_mode_offset 5 +#define SDMA_PKT_COPY_TILED_DW_5_swizzle_mode_mask 0x0000001F +#define SDMA_PKT_COPY_TILED_DW_5_swizzle_mode_shift 3 +#define SDMA_PKT_COPY_TILED_DW_5_SWIZZLE_MODE(x) (((x) & SDMA_PKT_COPY_TILED_DW_5_swizzle_mode_mask) << SDMA_PKT_COPY_TILED_DW_5_swizzle_mode_shift) + +/*define for dimension field*/ +#define SDMA_PKT_COPY_TILED_DW_5_dimension_offset 5 +#define SDMA_PKT_COPY_TILED_DW_5_dimension_mask 0x00000003 +#define SDMA_PKT_COPY_TILED_DW_5_dimension_shift 9 +#define SDMA_PKT_COPY_TILED_DW_5_DIMENSION(x) (((x) & SDMA_PKT_COPY_TILED_DW_5_dimension_mask) << SDMA_PKT_COPY_TILED_DW_5_dimension_shift) + +/*define for epitch field*/ +#define SDMA_PKT_COPY_TILED_DW_5_epitch_offset 5 +#define SDMA_PKT_COPY_TILED_DW_5_epitch_mask 0x0000FFFF +#define SDMA_PKT_COPY_TILED_DW_5_epitch_shift 16 +#define SDMA_PKT_COPY_TILED_DW_5_EPITCH(x) (((x) & SDMA_PKT_COPY_TILED_DW_5_epitch_mask) << SDMA_PKT_COPY_TILED_DW_5_epitch_shift) + +/*define for DW_6 word*/ +/*define for x field*/ +#define SDMA_PKT_COPY_TILED_DW_6_x_offset 6 +#define SDMA_PKT_COPY_TILED_DW_6_x_mask 0x00003FFF +#define SDMA_PKT_COPY_TILED_DW_6_x_shift 0 +#define SDMA_PKT_COPY_TILED_DW_6_X(x) (((x) & SDMA_PKT_COPY_TILED_DW_6_x_mask) << SDMA_PKT_COPY_TILED_DW_6_x_shift) + +/*define for y field*/ +#define SDMA_PKT_COPY_TILED_DW_6_y_offset 6 +#define SDMA_PKT_COPY_TILED_DW_6_y_mask 0x00003FFF +#define SDMA_PKT_COPY_TILED_DW_6_y_shift 16 +#define SDMA_PKT_COPY_TILED_DW_6_Y(x) (((x) & SDMA_PKT_COPY_TILED_DW_6_y_mask) << SDMA_PKT_COPY_TILED_DW_6_y_shift) + +/*define for DW_7 word*/ +/*define for z field*/ +#define SDMA_PKT_COPY_TILED_DW_7_z_offset 7 +#define SDMA_PKT_COPY_TILED_DW_7_z_mask 0x000007FF +#define SDMA_PKT_COPY_TILED_DW_7_z_shift 0 +#define SDMA_PKT_COPY_TILED_DW_7_Z(x) (((x) & SDMA_PKT_COPY_TILED_DW_7_z_mask) << SDMA_PKT_COPY_TILED_DW_7_z_shift) + +/*define for linear_sw field*/ +#define SDMA_PKT_COPY_TILED_DW_7_linear_sw_offset 7 +#define SDMA_PKT_COPY_TILED_DW_7_linear_sw_mask 0x00000003 +#define SDMA_PKT_COPY_TILED_DW_7_linear_sw_shift 16 +#define SDMA_PKT_COPY_TILED_DW_7_LINEAR_SW(x) (((x) & SDMA_PKT_COPY_TILED_DW_7_linear_sw_mask) << SDMA_PKT_COPY_TILED_DW_7_linear_sw_shift) + +/*define for tile_sw field*/ +#define SDMA_PKT_COPY_TILED_DW_7_tile_sw_offset 7 +#define SDMA_PKT_COPY_TILED_DW_7_tile_sw_mask 0x00000003 +#define SDMA_PKT_COPY_TILED_DW_7_tile_sw_shift 24 +#define SDMA_PKT_COPY_TILED_DW_7_TILE_SW(x) (((x) & SDMA_PKT_COPY_TILED_DW_7_tile_sw_mask) << SDMA_PKT_COPY_TILED_DW_7_tile_sw_shift) + +/*define for LINEAR_ADDR_LO word*/ +/*define for linear_addr_31_0 field*/ +#define SDMA_PKT_COPY_TILED_LINEAR_ADDR_LO_linear_addr_31_0_offset 8 +#define SDMA_PKT_COPY_TILED_LINEAR_ADDR_LO_linear_addr_31_0_mask 0xFFFFFFFF +#define SDMA_PKT_COPY_TILED_LINEAR_ADDR_LO_linear_addr_31_0_shift 0 +#define SDMA_PKT_COPY_TILED_LINEAR_ADDR_LO_LINEAR_ADDR_31_0(x) (((x) & SDMA_PKT_COPY_TILED_LINEAR_ADDR_LO_linear_addr_31_0_mask) << SDMA_PKT_COPY_TILED_LINEAR_ADDR_LO_linear_addr_31_0_shift) + +/*define for LINEAR_ADDR_HI word*/ +/*define for linear_addr_63_32 field*/ +#define SDMA_PKT_COPY_TILED_LINEAR_ADDR_HI_linear_addr_63_32_offset 9 +#define SDMA_PKT_COPY_TILED_LINEAR_ADDR_HI_linear_addr_63_32_mask 0xFFFFFFFF +#define SDMA_PKT_COPY_TILED_LINEAR_ADDR_HI_linear_addr_63_32_shift 0 +#define SDMA_PKT_COPY_TILED_LINEAR_ADDR_HI_LINEAR_ADDR_63_32(x) (((x) & SDMA_PKT_COPY_TILED_LINEAR_ADDR_HI_linear_addr_63_32_mask) << SDMA_PKT_COPY_TILED_LINEAR_ADDR_HI_linear_addr_63_32_shift) + +/*define for LINEAR_PITCH word*/ +/*define for linear_pitch field*/ +#define SDMA_PKT_COPY_TILED_LINEAR_PITCH_linear_pitch_offset 10 +#define SDMA_PKT_COPY_TILED_LINEAR_PITCH_linear_pitch_mask 0x0007FFFF +#define SDMA_PKT_COPY_TILED_LINEAR_PITCH_linear_pitch_shift 0 +#define SDMA_PKT_COPY_TILED_LINEAR_PITCH_LINEAR_PITCH(x) (((x) & SDMA_PKT_COPY_TILED_LINEAR_PITCH_linear_pitch_mask) << SDMA_PKT_COPY_TILED_LINEAR_PITCH_linear_pitch_shift) + +/*define for LINEAR_SLICE_PITCH word*/ +/*define for linear_slice_pitch field*/ +#define SDMA_PKT_COPY_TILED_LINEAR_SLICE_PITCH_linear_slice_pitch_offset 11 +#define SDMA_PKT_COPY_TILED_LINEAR_SLICE_PITCH_linear_slice_pitch_mask 0xFFFFFFFF +#define SDMA_PKT_COPY_TILED_LINEAR_SLICE_PITCH_linear_slice_pitch_shift 0 +#define SDMA_PKT_COPY_TILED_LINEAR_SLICE_PITCH_LINEAR_SLICE_PITCH(x) (((x) & SDMA_PKT_COPY_TILED_LINEAR_SLICE_PITCH_linear_slice_pitch_mask) << SDMA_PKT_COPY_TILED_LINEAR_SLICE_PITCH_linear_slice_pitch_shift) + +/*define for COUNT word*/ +/*define for count field*/ +#define SDMA_PKT_COPY_TILED_COUNT_count_offset 12 +#define SDMA_PKT_COPY_TILED_COUNT_count_mask 0x000FFFFF +#define SDMA_PKT_COPY_TILED_COUNT_count_shift 0 +#define SDMA_PKT_COPY_TILED_COUNT_COUNT(x) (((x) & SDMA_PKT_COPY_TILED_COUNT_count_mask) << SDMA_PKT_COPY_TILED_COUNT_count_shift) + + +/* +** Definitions for SDMA_PKT_COPY_L2T_BROADCAST packet +*/ + +/*define for HEADER word*/ +/*define for op field*/ +#define SDMA_PKT_COPY_L2T_BROADCAST_HEADER_op_offset 0 +#define SDMA_PKT_COPY_L2T_BROADCAST_HEADER_op_mask 0x000000FF +#define SDMA_PKT_COPY_L2T_BROADCAST_HEADER_op_shift 0 +#define SDMA_PKT_COPY_L2T_BROADCAST_HEADER_OP(x) (((x) & SDMA_PKT_COPY_L2T_BROADCAST_HEADER_op_mask) << SDMA_PKT_COPY_L2T_BROADCAST_HEADER_op_shift) + +/*define for sub_op field*/ +#define SDMA_PKT_COPY_L2T_BROADCAST_HEADER_sub_op_offset 0 +#define SDMA_PKT_COPY_L2T_BROADCAST_HEADER_sub_op_mask 0x000000FF +#define SDMA_PKT_COPY_L2T_BROADCAST_HEADER_sub_op_shift 8 +#define SDMA_PKT_COPY_L2T_BROADCAST_HEADER_SUB_OP(x) (((x) & SDMA_PKT_COPY_L2T_BROADCAST_HEADER_sub_op_mask) << SDMA_PKT_COPY_L2T_BROADCAST_HEADER_sub_op_shift) + +/*define for encrypt field*/ +#define SDMA_PKT_COPY_L2T_BROADCAST_HEADER_encrypt_offset 0 +#define SDMA_PKT_COPY_L2T_BROADCAST_HEADER_encrypt_mask 0x00000001 +#define SDMA_PKT_COPY_L2T_BROADCAST_HEADER_encrypt_shift 16 +#define SDMA_PKT_COPY_L2T_BROADCAST_HEADER_ENCRYPT(x) (((x) & SDMA_PKT_COPY_L2T_BROADCAST_HEADER_encrypt_mask) << SDMA_PKT_COPY_L2T_BROADCAST_HEADER_encrypt_shift) + +/*define for tmz field*/ +#define SDMA_PKT_COPY_L2T_BROADCAST_HEADER_tmz_offset 0 +#define SDMA_PKT_COPY_L2T_BROADCAST_HEADER_tmz_mask 0x00000001 +#define SDMA_PKT_COPY_L2T_BROADCAST_HEADER_tmz_shift 18 +#define SDMA_PKT_COPY_L2T_BROADCAST_HEADER_TMZ(x) (((x) & SDMA_PKT_COPY_L2T_BROADCAST_HEADER_tmz_mask) << SDMA_PKT_COPY_L2T_BROADCAST_HEADER_tmz_shift) + +/*define for mip_max field*/ +#define SDMA_PKT_COPY_L2T_BROADCAST_HEADER_mip_max_offset 0 +#define SDMA_PKT_COPY_L2T_BROADCAST_HEADER_mip_max_mask 0x0000000F +#define SDMA_PKT_COPY_L2T_BROADCAST_HEADER_mip_max_shift 20 +#define SDMA_PKT_COPY_L2T_BROADCAST_HEADER_MIP_MAX(x) (((x) & SDMA_PKT_COPY_L2T_BROADCAST_HEADER_mip_max_mask) << SDMA_PKT_COPY_L2T_BROADCAST_HEADER_mip_max_shift) + +/*define for videocopy field*/ +#define SDMA_PKT_COPY_L2T_BROADCAST_HEADER_videocopy_offset 0 +#define SDMA_PKT_COPY_L2T_BROADCAST_HEADER_videocopy_mask 0x00000001 +#define SDMA_PKT_COPY_L2T_BROADCAST_HEADER_videocopy_shift 26 +#define SDMA_PKT_COPY_L2T_BROADCAST_HEADER_VIDEOCOPY(x) (((x) & SDMA_PKT_COPY_L2T_BROADCAST_HEADER_videocopy_mask) << SDMA_PKT_COPY_L2T_BROADCAST_HEADER_videocopy_shift) + +/*define for broadcast field*/ +#define SDMA_PKT_COPY_L2T_BROADCAST_HEADER_broadcast_offset 0 +#define SDMA_PKT_COPY_L2T_BROADCAST_HEADER_broadcast_mask 0x00000001 +#define SDMA_PKT_COPY_L2T_BROADCAST_HEADER_broadcast_shift 27 +#define SDMA_PKT_COPY_L2T_BROADCAST_HEADER_BROADCAST(x) (((x) & SDMA_PKT_COPY_L2T_BROADCAST_HEADER_broadcast_mask) << SDMA_PKT_COPY_L2T_BROADCAST_HEADER_broadcast_shift) + +/*define for TILED_ADDR_LO_0 word*/ +/*define for tiled_addr0_31_0 field*/ +#define SDMA_PKT_COPY_L2T_BROADCAST_TILED_ADDR_LO_0_tiled_addr0_31_0_offset 1 +#define SDMA_PKT_COPY_L2T_BROADCAST_TILED_ADDR_LO_0_tiled_addr0_31_0_mask 0xFFFFFFFF +#define SDMA_PKT_COPY_L2T_BROADCAST_TILED_ADDR_LO_0_tiled_addr0_31_0_shift 0 +#define SDMA_PKT_COPY_L2T_BROADCAST_TILED_ADDR_LO_0_TILED_ADDR0_31_0(x) (((x) & SDMA_PKT_COPY_L2T_BROADCAST_TILED_ADDR_LO_0_tiled_addr0_31_0_mask) << SDMA_PKT_COPY_L2T_BROADCAST_TILED_ADDR_LO_0_tiled_addr0_31_0_shift) + +/*define for TILED_ADDR_HI_0 word*/ +/*define for tiled_addr0_63_32 field*/ +#define SDMA_PKT_COPY_L2T_BROADCAST_TILED_ADDR_HI_0_tiled_addr0_63_32_offset 2 +#define SDMA_PKT_COPY_L2T_BROADCAST_TILED_ADDR_HI_0_tiled_addr0_63_32_mask 0xFFFFFFFF +#define SDMA_PKT_COPY_L2T_BROADCAST_TILED_ADDR_HI_0_tiled_addr0_63_32_shift 0 +#define SDMA_PKT_COPY_L2T_BROADCAST_TILED_ADDR_HI_0_TILED_ADDR0_63_32(x) (((x) & SDMA_PKT_COPY_L2T_BROADCAST_TILED_ADDR_HI_0_tiled_addr0_63_32_mask) << SDMA_PKT_COPY_L2T_BROADCAST_TILED_ADDR_HI_0_tiled_addr0_63_32_shift) + +/*define for TILED_ADDR_LO_1 word*/ +/*define for tiled_addr1_31_0 field*/ +#define SDMA_PKT_COPY_L2T_BROADCAST_TILED_ADDR_LO_1_tiled_addr1_31_0_offset 3 +#define SDMA_PKT_COPY_L2T_BROADCAST_TILED_ADDR_LO_1_tiled_addr1_31_0_mask 0xFFFFFFFF +#define SDMA_PKT_COPY_L2T_BROADCAST_TILED_ADDR_LO_1_tiled_addr1_31_0_shift 0 +#define SDMA_PKT_COPY_L2T_BROADCAST_TILED_ADDR_LO_1_TILED_ADDR1_31_0(x) (((x) & SDMA_PKT_COPY_L2T_BROADCAST_TILED_ADDR_LO_1_tiled_addr1_31_0_mask) << SDMA_PKT_COPY_L2T_BROADCAST_TILED_ADDR_LO_1_tiled_addr1_31_0_shift) + +/*define for TILED_ADDR_HI_1 word*/ +/*define for tiled_addr1_63_32 field*/ +#define SDMA_PKT_COPY_L2T_BROADCAST_TILED_ADDR_HI_1_tiled_addr1_63_32_offset 4 +#define SDMA_PKT_COPY_L2T_BROADCAST_TILED_ADDR_HI_1_tiled_addr1_63_32_mask 0xFFFFFFFF +#define SDMA_PKT_COPY_L2T_BROADCAST_TILED_ADDR_HI_1_tiled_addr1_63_32_shift 0 +#define SDMA_PKT_COPY_L2T_BROADCAST_TILED_ADDR_HI_1_TILED_ADDR1_63_32(x) (((x) & SDMA_PKT_COPY_L2T_BROADCAST_TILED_ADDR_HI_1_tiled_addr1_63_32_mask) << SDMA_PKT_COPY_L2T_BROADCAST_TILED_ADDR_HI_1_tiled_addr1_63_32_shift) + +/*define for DW_5 word*/ +/*define for width field*/ +#define SDMA_PKT_COPY_L2T_BROADCAST_DW_5_width_offset 5 +#define SDMA_PKT_COPY_L2T_BROADCAST_DW_5_width_mask 0x00003FFF +#define SDMA_PKT_COPY_L2T_BROADCAST_DW_5_width_shift 0 +#define SDMA_PKT_COPY_L2T_BROADCAST_DW_5_WIDTH(x) (((x) & SDMA_PKT_COPY_L2T_BROADCAST_DW_5_width_mask) << SDMA_PKT_COPY_L2T_BROADCAST_DW_5_width_shift) + +/*define for DW_6 word*/ +/*define for height field*/ +#define SDMA_PKT_COPY_L2T_BROADCAST_DW_6_height_offset 6 +#define SDMA_PKT_COPY_L2T_BROADCAST_DW_6_height_mask 0x00003FFF +#define SDMA_PKT_COPY_L2T_BROADCAST_DW_6_height_shift 0 +#define SDMA_PKT_COPY_L2T_BROADCAST_DW_6_HEIGHT(x) (((x) & SDMA_PKT_COPY_L2T_BROADCAST_DW_6_height_mask) << SDMA_PKT_COPY_L2T_BROADCAST_DW_6_height_shift) + +/*define for depth field*/ +#define SDMA_PKT_COPY_L2T_BROADCAST_DW_6_depth_offset 6 +#define SDMA_PKT_COPY_L2T_BROADCAST_DW_6_depth_mask 0x000007FF +#define SDMA_PKT_COPY_L2T_BROADCAST_DW_6_depth_shift 16 +#define SDMA_PKT_COPY_L2T_BROADCAST_DW_6_DEPTH(x) (((x) & SDMA_PKT_COPY_L2T_BROADCAST_DW_6_depth_mask) << SDMA_PKT_COPY_L2T_BROADCAST_DW_6_depth_shift) + +/*define for DW_7 word*/ +/*define for element_size field*/ +#define SDMA_PKT_COPY_L2T_BROADCAST_DW_7_element_size_offset 7 +#define SDMA_PKT_COPY_L2T_BROADCAST_DW_7_element_size_mask 0x00000007 +#define SDMA_PKT_COPY_L2T_BROADCAST_DW_7_element_size_shift 0 +#define SDMA_PKT_COPY_L2T_BROADCAST_DW_7_ELEMENT_SIZE(x) (((x) & SDMA_PKT_COPY_L2T_BROADCAST_DW_7_element_size_mask) << SDMA_PKT_COPY_L2T_BROADCAST_DW_7_element_size_shift) + +/*define for swizzle_mode field*/ +#define SDMA_PKT_COPY_L2T_BROADCAST_DW_7_swizzle_mode_offset 7 +#define SDMA_PKT_COPY_L2T_BROADCAST_DW_7_swizzle_mode_mask 0x0000001F +#define SDMA_PKT_COPY_L2T_BROADCAST_DW_7_swizzle_mode_shift 3 +#define SDMA_PKT_COPY_L2T_BROADCAST_DW_7_SWIZZLE_MODE(x) (((x) & SDMA_PKT_COPY_L2T_BROADCAST_DW_7_swizzle_mode_mask) << SDMA_PKT_COPY_L2T_BROADCAST_DW_7_swizzle_mode_shift) + +/*define for dimension field*/ +#define SDMA_PKT_COPY_L2T_BROADCAST_DW_7_dimension_offset 7 +#define SDMA_PKT_COPY_L2T_BROADCAST_DW_7_dimension_mask 0x00000003 +#define SDMA_PKT_COPY_L2T_BROADCAST_DW_7_dimension_shift 9 +#define SDMA_PKT_COPY_L2T_BROADCAST_DW_7_DIMENSION(x) (((x) & SDMA_PKT_COPY_L2T_BROADCAST_DW_7_dimension_mask) << SDMA_PKT_COPY_L2T_BROADCAST_DW_7_dimension_shift) + +/*define for epitch field*/ +#define SDMA_PKT_COPY_L2T_BROADCAST_DW_7_epitch_offset 7 +#define SDMA_PKT_COPY_L2T_BROADCAST_DW_7_epitch_mask 0x0000FFFF +#define SDMA_PKT_COPY_L2T_BROADCAST_DW_7_epitch_shift 16 +#define SDMA_PKT_COPY_L2T_BROADCAST_DW_7_EPITCH(x) (((x) & SDMA_PKT_COPY_L2T_BROADCAST_DW_7_epitch_mask) << SDMA_PKT_COPY_L2T_BROADCAST_DW_7_epitch_shift) + +/*define for DW_8 word*/ +/*define for x field*/ +#define SDMA_PKT_COPY_L2T_BROADCAST_DW_8_x_offset 8 +#define SDMA_PKT_COPY_L2T_BROADCAST_DW_8_x_mask 0x00003FFF +#define SDMA_PKT_COPY_L2T_BROADCAST_DW_8_x_shift 0 +#define SDMA_PKT_COPY_L2T_BROADCAST_DW_8_X(x) (((x) & SDMA_PKT_COPY_L2T_BROADCAST_DW_8_x_mask) << SDMA_PKT_COPY_L2T_BROADCAST_DW_8_x_shift) + +/*define for y field*/ +#define SDMA_PKT_COPY_L2T_BROADCAST_DW_8_y_offset 8 +#define SDMA_PKT_COPY_L2T_BROADCAST_DW_8_y_mask 0x00003FFF +#define SDMA_PKT_COPY_L2T_BROADCAST_DW_8_y_shift 16 +#define SDMA_PKT_COPY_L2T_BROADCAST_DW_8_Y(x) (((x) & SDMA_PKT_COPY_L2T_BROADCAST_DW_8_y_mask) << SDMA_PKT_COPY_L2T_BROADCAST_DW_8_y_shift) + +/*define for DW_9 word*/ +/*define for z field*/ +#define SDMA_PKT_COPY_L2T_BROADCAST_DW_9_z_offset 9 +#define SDMA_PKT_COPY_L2T_BROADCAST_DW_9_z_mask 0x000007FF +#define SDMA_PKT_COPY_L2T_BROADCAST_DW_9_z_shift 0 +#define SDMA_PKT_COPY_L2T_BROADCAST_DW_9_Z(x) (((x) & SDMA_PKT_COPY_L2T_BROADCAST_DW_9_z_mask) << SDMA_PKT_COPY_L2T_BROADCAST_DW_9_z_shift) + +/*define for DW_10 word*/ +/*define for dst2_sw field*/ +#define SDMA_PKT_COPY_L2T_BROADCAST_DW_10_dst2_sw_offset 10 +#define SDMA_PKT_COPY_L2T_BROADCAST_DW_10_dst2_sw_mask 0x00000003 +#define SDMA_PKT_COPY_L2T_BROADCAST_DW_10_dst2_sw_shift 8 +#define SDMA_PKT_COPY_L2T_BROADCAST_DW_10_DST2_SW(x) (((x) & SDMA_PKT_COPY_L2T_BROADCAST_DW_10_dst2_sw_mask) << SDMA_PKT_COPY_L2T_BROADCAST_DW_10_dst2_sw_shift) + +/*define for linear_sw field*/ +#define SDMA_PKT_COPY_L2T_BROADCAST_DW_10_linear_sw_offset 10 +#define SDMA_PKT_COPY_L2T_BROADCAST_DW_10_linear_sw_mask 0x00000003 +#define SDMA_PKT_COPY_L2T_BROADCAST_DW_10_linear_sw_shift 16 +#define SDMA_PKT_COPY_L2T_BROADCAST_DW_10_LINEAR_SW(x) (((x) & SDMA_PKT_COPY_L2T_BROADCAST_DW_10_linear_sw_mask) << SDMA_PKT_COPY_L2T_BROADCAST_DW_10_linear_sw_shift) + +/*define for tile_sw field*/ +#define SDMA_PKT_COPY_L2T_BROADCAST_DW_10_tile_sw_offset 10 +#define SDMA_PKT_COPY_L2T_BROADCAST_DW_10_tile_sw_mask 0x00000003 +#define SDMA_PKT_COPY_L2T_BROADCAST_DW_10_tile_sw_shift 24 +#define SDMA_PKT_COPY_L2T_BROADCAST_DW_10_TILE_SW(x) (((x) & SDMA_PKT_COPY_L2T_BROADCAST_DW_10_tile_sw_mask) << SDMA_PKT_COPY_L2T_BROADCAST_DW_10_tile_sw_shift) + +/*define for LINEAR_ADDR_LO word*/ +/*define for linear_addr_31_0 field*/ +#define SDMA_PKT_COPY_L2T_BROADCAST_LINEAR_ADDR_LO_linear_addr_31_0_offset 11 +#define SDMA_PKT_COPY_L2T_BROADCAST_LINEAR_ADDR_LO_linear_addr_31_0_mask 0xFFFFFFFF +#define SDMA_PKT_COPY_L2T_BROADCAST_LINEAR_ADDR_LO_linear_addr_31_0_shift 0 +#define SDMA_PKT_COPY_L2T_BROADCAST_LINEAR_ADDR_LO_LINEAR_ADDR_31_0(x) (((x) & SDMA_PKT_COPY_L2T_BROADCAST_LINEAR_ADDR_LO_linear_addr_31_0_mask) << SDMA_PKT_COPY_L2T_BROADCAST_LINEAR_ADDR_LO_linear_addr_31_0_shift) + +/*define for LINEAR_ADDR_HI word*/ +/*define for linear_addr_63_32 field*/ +#define SDMA_PKT_COPY_L2T_BROADCAST_LINEAR_ADDR_HI_linear_addr_63_32_offset 12 +#define SDMA_PKT_COPY_L2T_BROADCAST_LINEAR_ADDR_HI_linear_addr_63_32_mask 0xFFFFFFFF +#define SDMA_PKT_COPY_L2T_BROADCAST_LINEAR_ADDR_HI_linear_addr_63_32_shift 0 +#define SDMA_PKT_COPY_L2T_BROADCAST_LINEAR_ADDR_HI_LINEAR_ADDR_63_32(x) (((x) & SDMA_PKT_COPY_L2T_BROADCAST_LINEAR_ADDR_HI_linear_addr_63_32_mask) << SDMA_PKT_COPY_L2T_BROADCAST_LINEAR_ADDR_HI_linear_addr_63_32_shift) + +/*define for LINEAR_PITCH word*/ +/*define for linear_pitch field*/ +#define SDMA_PKT_COPY_L2T_BROADCAST_LINEAR_PITCH_linear_pitch_offset 13 +#define SDMA_PKT_COPY_L2T_BROADCAST_LINEAR_PITCH_linear_pitch_mask 0x0007FFFF +#define SDMA_PKT_COPY_L2T_BROADCAST_LINEAR_PITCH_linear_pitch_shift 0 +#define SDMA_PKT_COPY_L2T_BROADCAST_LINEAR_PITCH_LINEAR_PITCH(x) (((x) & SDMA_PKT_COPY_L2T_BROADCAST_LINEAR_PITCH_linear_pitch_mask) << SDMA_PKT_COPY_L2T_BROADCAST_LINEAR_PITCH_linear_pitch_shift) + +/*define for LINEAR_SLICE_PITCH word*/ +/*define for linear_slice_pitch field*/ +#define SDMA_PKT_COPY_L2T_BROADCAST_LINEAR_SLICE_PITCH_linear_slice_pitch_offset 14 +#define SDMA_PKT_COPY_L2T_BROADCAST_LINEAR_SLICE_PITCH_linear_slice_pitch_mask 0xFFFFFFFF +#define SDMA_PKT_COPY_L2T_BROADCAST_LINEAR_SLICE_PITCH_linear_slice_pitch_shift 0 +#define SDMA_PKT_COPY_L2T_BROADCAST_LINEAR_SLICE_PITCH_LINEAR_SLICE_PITCH(x) (((x) & SDMA_PKT_COPY_L2T_BROADCAST_LINEAR_SLICE_PITCH_linear_slice_pitch_mask) << SDMA_PKT_COPY_L2T_BROADCAST_LINEAR_SLICE_PITCH_linear_slice_pitch_shift) + +/*define for COUNT word*/ +/*define for count field*/ +#define SDMA_PKT_COPY_L2T_BROADCAST_COUNT_count_offset 15 +#define SDMA_PKT_COPY_L2T_BROADCAST_COUNT_count_mask 0x000FFFFF +#define SDMA_PKT_COPY_L2T_BROADCAST_COUNT_count_shift 0 +#define SDMA_PKT_COPY_L2T_BROADCAST_COUNT_COUNT(x) (((x) & SDMA_PKT_COPY_L2T_BROADCAST_COUNT_count_mask) << SDMA_PKT_COPY_L2T_BROADCAST_COUNT_count_shift) + + +/* +** Definitions for SDMA_PKT_COPY_T2T packet +*/ + +/*define for HEADER word*/ +/*define for op field*/ +#define SDMA_PKT_COPY_T2T_HEADER_op_offset 0 +#define SDMA_PKT_COPY_T2T_HEADER_op_mask 0x000000FF +#define SDMA_PKT_COPY_T2T_HEADER_op_shift 0 +#define SDMA_PKT_COPY_T2T_HEADER_OP(x) (((x) & SDMA_PKT_COPY_T2T_HEADER_op_mask) << SDMA_PKT_COPY_T2T_HEADER_op_shift) + +/*define for sub_op field*/ +#define SDMA_PKT_COPY_T2T_HEADER_sub_op_offset 0 +#define SDMA_PKT_COPY_T2T_HEADER_sub_op_mask 0x000000FF +#define SDMA_PKT_COPY_T2T_HEADER_sub_op_shift 8 +#define SDMA_PKT_COPY_T2T_HEADER_SUB_OP(x) (((x) & SDMA_PKT_COPY_T2T_HEADER_sub_op_mask) << SDMA_PKT_COPY_T2T_HEADER_sub_op_shift) + +/*define for tmz field*/ +#define SDMA_PKT_COPY_T2T_HEADER_tmz_offset 0 +#define SDMA_PKT_COPY_T2T_HEADER_tmz_mask 0x00000001 +#define SDMA_PKT_COPY_T2T_HEADER_tmz_shift 18 +#define SDMA_PKT_COPY_T2T_HEADER_TMZ(x) (((x) & SDMA_PKT_COPY_T2T_HEADER_tmz_mask) << SDMA_PKT_COPY_T2T_HEADER_tmz_shift) + +/*define for mip_max field*/ +#define SDMA_PKT_COPY_T2T_HEADER_mip_max_offset 0 +#define SDMA_PKT_COPY_T2T_HEADER_mip_max_mask 0x0000000F +#define SDMA_PKT_COPY_T2T_HEADER_mip_max_shift 20 +#define SDMA_PKT_COPY_T2T_HEADER_MIP_MAX(x) (((x) & SDMA_PKT_COPY_T2T_HEADER_mip_max_mask) << SDMA_PKT_COPY_T2T_HEADER_mip_max_shift) + +/*define for SRC_ADDR_LO word*/ +/*define for src_addr_31_0 field*/ +#define SDMA_PKT_COPY_T2T_SRC_ADDR_LO_src_addr_31_0_offset 1 +#define SDMA_PKT_COPY_T2T_SRC_ADDR_LO_src_addr_31_0_mask 0xFFFFFFFF +#define SDMA_PKT_COPY_T2T_SRC_ADDR_LO_src_addr_31_0_shift 0 +#define SDMA_PKT_COPY_T2T_SRC_ADDR_LO_SRC_ADDR_31_0(x) (((x) & SDMA_PKT_COPY_T2T_SRC_ADDR_LO_src_addr_31_0_mask) << SDMA_PKT_COPY_T2T_SRC_ADDR_LO_src_addr_31_0_shift) + +/*define for SRC_ADDR_HI word*/ +/*define for src_addr_63_32 field*/ +#define SDMA_PKT_COPY_T2T_SRC_ADDR_HI_src_addr_63_32_offset 2 +#define SDMA_PKT_COPY_T2T_SRC_ADDR_HI_src_addr_63_32_mask 0xFFFFFFFF +#define SDMA_PKT_COPY_T2T_SRC_ADDR_HI_src_addr_63_32_shift 0 +#define SDMA_PKT_COPY_T2T_SRC_ADDR_HI_SRC_ADDR_63_32(x) (((x) & SDMA_PKT_COPY_T2T_SRC_ADDR_HI_src_addr_63_32_mask) << SDMA_PKT_COPY_T2T_SRC_ADDR_HI_src_addr_63_32_shift) + +/*define for DW_3 word*/ +/*define for src_x field*/ +#define SDMA_PKT_COPY_T2T_DW_3_src_x_offset 3 +#define SDMA_PKT_COPY_T2T_DW_3_src_x_mask 0x00003FFF +#define SDMA_PKT_COPY_T2T_DW_3_src_x_shift 0 +#define SDMA_PKT_COPY_T2T_DW_3_SRC_X(x) (((x) & SDMA_PKT_COPY_T2T_DW_3_src_x_mask) << SDMA_PKT_COPY_T2T_DW_3_src_x_shift) + +/*define for src_y field*/ +#define SDMA_PKT_COPY_T2T_DW_3_src_y_offset 3 +#define SDMA_PKT_COPY_T2T_DW_3_src_y_mask 0x00003FFF +#define SDMA_PKT_COPY_T2T_DW_3_src_y_shift 16 +#define SDMA_PKT_COPY_T2T_DW_3_SRC_Y(x) (((x) & SDMA_PKT_COPY_T2T_DW_3_src_y_mask) << SDMA_PKT_COPY_T2T_DW_3_src_y_shift) + +/*define for DW_4 word*/ +/*define for src_z field*/ +#define SDMA_PKT_COPY_T2T_DW_4_src_z_offset 4 +#define SDMA_PKT_COPY_T2T_DW_4_src_z_mask 0x000007FF +#define SDMA_PKT_COPY_T2T_DW_4_src_z_shift 0 +#define SDMA_PKT_COPY_T2T_DW_4_SRC_Z(x) (((x) & SDMA_PKT_COPY_T2T_DW_4_src_z_mask) << SDMA_PKT_COPY_T2T_DW_4_src_z_shift) + +/*define for src_width field*/ +#define SDMA_PKT_COPY_T2T_DW_4_src_width_offset 4 +#define SDMA_PKT_COPY_T2T_DW_4_src_width_mask 0x00003FFF +#define SDMA_PKT_COPY_T2T_DW_4_src_width_shift 16 +#define SDMA_PKT_COPY_T2T_DW_4_SRC_WIDTH(x) (((x) & SDMA_PKT_COPY_T2T_DW_4_src_width_mask) << SDMA_PKT_COPY_T2T_DW_4_src_width_shift) + +/*define for DW_5 word*/ +/*define for src_height field*/ +#define SDMA_PKT_COPY_T2T_DW_5_src_height_offset 5 +#define SDMA_PKT_COPY_T2T_DW_5_src_height_mask 0x00003FFF +#define SDMA_PKT_COPY_T2T_DW_5_src_height_shift 0 +#define SDMA_PKT_COPY_T2T_DW_5_SRC_HEIGHT(x) (((x) & SDMA_PKT_COPY_T2T_DW_5_src_height_mask) << SDMA_PKT_COPY_T2T_DW_5_src_height_shift) + +/*define for src_depth field*/ +#define SDMA_PKT_COPY_T2T_DW_5_src_depth_offset 5 +#define SDMA_PKT_COPY_T2T_DW_5_src_depth_mask 0x000007FF +#define SDMA_PKT_COPY_T2T_DW_5_src_depth_shift 16 +#define SDMA_PKT_COPY_T2T_DW_5_SRC_DEPTH(x) (((x) & SDMA_PKT_COPY_T2T_DW_5_src_depth_mask) << SDMA_PKT_COPY_T2T_DW_5_src_depth_shift) + +/*define for DW_6 word*/ +/*define for src_element_size field*/ +#define SDMA_PKT_COPY_T2T_DW_6_src_element_size_offset 6 +#define SDMA_PKT_COPY_T2T_DW_6_src_element_size_mask 0x00000007 +#define SDMA_PKT_COPY_T2T_DW_6_src_element_size_shift 0 +#define SDMA_PKT_COPY_T2T_DW_6_SRC_ELEMENT_SIZE(x) (((x) & SDMA_PKT_COPY_T2T_DW_6_src_element_size_mask) << SDMA_PKT_COPY_T2T_DW_6_src_element_size_shift) + +/*define for src_swizzle_mode field*/ +#define SDMA_PKT_COPY_T2T_DW_6_src_swizzle_mode_offset 6 +#define SDMA_PKT_COPY_T2T_DW_6_src_swizzle_mode_mask 0x0000001F +#define SDMA_PKT_COPY_T2T_DW_6_src_swizzle_mode_shift 3 +#define SDMA_PKT_COPY_T2T_DW_6_SRC_SWIZZLE_MODE(x) (((x) & SDMA_PKT_COPY_T2T_DW_6_src_swizzle_mode_mask) << SDMA_PKT_COPY_T2T_DW_6_src_swizzle_mode_shift) + +/*define for src_dimension field*/ +#define SDMA_PKT_COPY_T2T_DW_6_src_dimension_offset 6 +#define SDMA_PKT_COPY_T2T_DW_6_src_dimension_mask 0x00000003 +#define SDMA_PKT_COPY_T2T_DW_6_src_dimension_shift 9 +#define SDMA_PKT_COPY_T2T_DW_6_SRC_DIMENSION(x) (((x) & SDMA_PKT_COPY_T2T_DW_6_src_dimension_mask) << SDMA_PKT_COPY_T2T_DW_6_src_dimension_shift) + +/*define for src_epitch field*/ +#define SDMA_PKT_COPY_T2T_DW_6_src_epitch_offset 6 +#define SDMA_PKT_COPY_T2T_DW_6_src_epitch_mask 0x0000FFFF +#define SDMA_PKT_COPY_T2T_DW_6_src_epitch_shift 16 +#define SDMA_PKT_COPY_T2T_DW_6_SRC_EPITCH(x) (((x) & SDMA_PKT_COPY_T2T_DW_6_src_epitch_mask) << SDMA_PKT_COPY_T2T_DW_6_src_epitch_shift) + +/*define for DST_ADDR_LO word*/ +/*define for dst_addr_31_0 field*/ +#define SDMA_PKT_COPY_T2T_DST_ADDR_LO_dst_addr_31_0_offset 7 +#define SDMA_PKT_COPY_T2T_DST_ADDR_LO_dst_addr_31_0_mask 0xFFFFFFFF +#define SDMA_PKT_COPY_T2T_DST_ADDR_LO_dst_addr_31_0_shift 0 +#define SDMA_PKT_COPY_T2T_DST_ADDR_LO_DST_ADDR_31_0(x) (((x) & SDMA_PKT_COPY_T2T_DST_ADDR_LO_dst_addr_31_0_mask) << SDMA_PKT_COPY_T2T_DST_ADDR_LO_dst_addr_31_0_shift) + +/*define for DST_ADDR_HI word*/ +/*define for dst_addr_63_32 field*/ +#define SDMA_PKT_COPY_T2T_DST_ADDR_HI_dst_addr_63_32_offset 8 +#define SDMA_PKT_COPY_T2T_DST_ADDR_HI_dst_addr_63_32_mask 0xFFFFFFFF +#define SDMA_PKT_COPY_T2T_DST_ADDR_HI_dst_addr_63_32_shift 0 +#define SDMA_PKT_COPY_T2T_DST_ADDR_HI_DST_ADDR_63_32(x) (((x) & SDMA_PKT_COPY_T2T_DST_ADDR_HI_dst_addr_63_32_mask) << SDMA_PKT_COPY_T2T_DST_ADDR_HI_dst_addr_63_32_shift) + +/*define for DW_9 word*/ +/*define for dst_x field*/ +#define SDMA_PKT_COPY_T2T_DW_9_dst_x_offset 9 +#define SDMA_PKT_COPY_T2T_DW_9_dst_x_mask 0x00003FFF +#define SDMA_PKT_COPY_T2T_DW_9_dst_x_shift 0 +#define SDMA_PKT_COPY_T2T_DW_9_DST_X(x) (((x) & SDMA_PKT_COPY_T2T_DW_9_dst_x_mask) << SDMA_PKT_COPY_T2T_DW_9_dst_x_shift) + +/*define for dst_y field*/ +#define SDMA_PKT_COPY_T2T_DW_9_dst_y_offset 9 +#define SDMA_PKT_COPY_T2T_DW_9_dst_y_mask 0x00003FFF +#define SDMA_PKT_COPY_T2T_DW_9_dst_y_shift 16 +#define SDMA_PKT_COPY_T2T_DW_9_DST_Y(x) (((x) & SDMA_PKT_COPY_T2T_DW_9_dst_y_mask) << SDMA_PKT_COPY_T2T_DW_9_dst_y_shift) + +/*define for DW_10 word*/ +/*define for dst_z field*/ +#define SDMA_PKT_COPY_T2T_DW_10_dst_z_offset 10 +#define SDMA_PKT_COPY_T2T_DW_10_dst_z_mask 0x000007FF +#define SDMA_PKT_COPY_T2T_DW_10_dst_z_shift 0 +#define SDMA_PKT_COPY_T2T_DW_10_DST_Z(x) (((x) & SDMA_PKT_COPY_T2T_DW_10_dst_z_mask) << SDMA_PKT_COPY_T2T_DW_10_dst_z_shift) + +/*define for dst_width field*/ +#define SDMA_PKT_COPY_T2T_DW_10_dst_width_offset 10 +#define SDMA_PKT_COPY_T2T_DW_10_dst_width_mask 0x00003FFF +#define SDMA_PKT_COPY_T2T_DW_10_dst_width_shift 16 +#define SDMA_PKT_COPY_T2T_DW_10_DST_WIDTH(x) (((x) & SDMA_PKT_COPY_T2T_DW_10_dst_width_mask) << SDMA_PKT_COPY_T2T_DW_10_dst_width_shift) + +/*define for DW_11 word*/ +/*define for dst_height field*/ +#define SDMA_PKT_COPY_T2T_DW_11_dst_height_offset 11 +#define SDMA_PKT_COPY_T2T_DW_11_dst_height_mask 0x00003FFF +#define SDMA_PKT_COPY_T2T_DW_11_dst_height_shift 0 +#define SDMA_PKT_COPY_T2T_DW_11_DST_HEIGHT(x) (((x) & SDMA_PKT_COPY_T2T_DW_11_dst_height_mask) << SDMA_PKT_COPY_T2T_DW_11_dst_height_shift) + +/*define for dst_depth field*/ +#define SDMA_PKT_COPY_T2T_DW_11_dst_depth_offset 11 +#define SDMA_PKT_COPY_T2T_DW_11_dst_depth_mask 0x000007FF +#define SDMA_PKT_COPY_T2T_DW_11_dst_depth_shift 16 +#define SDMA_PKT_COPY_T2T_DW_11_DST_DEPTH(x) (((x) & SDMA_PKT_COPY_T2T_DW_11_dst_depth_mask) << SDMA_PKT_COPY_T2T_DW_11_dst_depth_shift) + +/*define for DW_12 word*/ +/*define for dst_element_size field*/ +#define SDMA_PKT_COPY_T2T_DW_12_dst_element_size_offset 12 +#define SDMA_PKT_COPY_T2T_DW_12_dst_element_size_mask 0x00000007 +#define SDMA_PKT_COPY_T2T_DW_12_dst_element_size_shift 0 +#define SDMA_PKT_COPY_T2T_DW_12_DST_ELEMENT_SIZE(x) (((x) & SDMA_PKT_COPY_T2T_DW_12_dst_element_size_mask) << SDMA_PKT_COPY_T2T_DW_12_dst_element_size_shift) + +/*define for dst_swizzle_mode field*/ +#define SDMA_PKT_COPY_T2T_DW_12_dst_swizzle_mode_offset 12 +#define SDMA_PKT_COPY_T2T_DW_12_dst_swizzle_mode_mask 0x0000001F +#define SDMA_PKT_COPY_T2T_DW_12_dst_swizzle_mode_shift 3 +#define SDMA_PKT_COPY_T2T_DW_12_DST_SWIZZLE_MODE(x) (((x) & SDMA_PKT_COPY_T2T_DW_12_dst_swizzle_mode_mask) << SDMA_PKT_COPY_T2T_DW_12_dst_swizzle_mode_shift) + +/*define for dst_dimension field*/ +#define SDMA_PKT_COPY_T2T_DW_12_dst_dimension_offset 12 +#define SDMA_PKT_COPY_T2T_DW_12_dst_dimension_mask 0x00000003 +#define SDMA_PKT_COPY_T2T_DW_12_dst_dimension_shift 9 +#define SDMA_PKT_COPY_T2T_DW_12_DST_DIMENSION(x) (((x) & SDMA_PKT_COPY_T2T_DW_12_dst_dimension_mask) << SDMA_PKT_COPY_T2T_DW_12_dst_dimension_shift) + +/*define for dst_epitch field*/ +#define SDMA_PKT_COPY_T2T_DW_12_dst_epitch_offset 12 +#define SDMA_PKT_COPY_T2T_DW_12_dst_epitch_mask 0x0000FFFF +#define SDMA_PKT_COPY_T2T_DW_12_dst_epitch_shift 16 +#define SDMA_PKT_COPY_T2T_DW_12_DST_EPITCH(x) (((x) & SDMA_PKT_COPY_T2T_DW_12_dst_epitch_mask) << SDMA_PKT_COPY_T2T_DW_12_dst_epitch_shift) + +/*define for DW_13 word*/ +/*define for rect_x field*/ +#define SDMA_PKT_COPY_T2T_DW_13_rect_x_offset 13 +#define SDMA_PKT_COPY_T2T_DW_13_rect_x_mask 0x00003FFF +#define SDMA_PKT_COPY_T2T_DW_13_rect_x_shift 0 +#define SDMA_PKT_COPY_T2T_DW_13_RECT_X(x) (((x) & SDMA_PKT_COPY_T2T_DW_13_rect_x_mask) << SDMA_PKT_COPY_T2T_DW_13_rect_x_shift) + +/*define for rect_y field*/ +#define SDMA_PKT_COPY_T2T_DW_13_rect_y_offset 13 +#define SDMA_PKT_COPY_T2T_DW_13_rect_y_mask 0x00003FFF +#define SDMA_PKT_COPY_T2T_DW_13_rect_y_shift 16 +#define SDMA_PKT_COPY_T2T_DW_13_RECT_Y(x) (((x) & SDMA_PKT_COPY_T2T_DW_13_rect_y_mask) << SDMA_PKT_COPY_T2T_DW_13_rect_y_shift) + +/*define for DW_14 word*/ +/*define for rect_z field*/ +#define SDMA_PKT_COPY_T2T_DW_14_rect_z_offset 14 +#define SDMA_PKT_COPY_T2T_DW_14_rect_z_mask 0x000007FF +#define SDMA_PKT_COPY_T2T_DW_14_rect_z_shift 0 +#define SDMA_PKT_COPY_T2T_DW_14_RECT_Z(x) (((x) & SDMA_PKT_COPY_T2T_DW_14_rect_z_mask) << SDMA_PKT_COPY_T2T_DW_14_rect_z_shift) + +/*define for dst_sw field*/ +#define SDMA_PKT_COPY_T2T_DW_14_dst_sw_offset 14 +#define SDMA_PKT_COPY_T2T_DW_14_dst_sw_mask 0x00000003 +#define SDMA_PKT_COPY_T2T_DW_14_dst_sw_shift 16 +#define SDMA_PKT_COPY_T2T_DW_14_DST_SW(x) (((x) & SDMA_PKT_COPY_T2T_DW_14_dst_sw_mask) << SDMA_PKT_COPY_T2T_DW_14_dst_sw_shift) + +/*define for src_sw field*/ +#define SDMA_PKT_COPY_T2T_DW_14_src_sw_offset 14 +#define SDMA_PKT_COPY_T2T_DW_14_src_sw_mask 0x00000003 +#define SDMA_PKT_COPY_T2T_DW_14_src_sw_shift 24 +#define SDMA_PKT_COPY_T2T_DW_14_SRC_SW(x) (((x) & SDMA_PKT_COPY_T2T_DW_14_src_sw_mask) << SDMA_PKT_COPY_T2T_DW_14_src_sw_shift) + + +/* +** Definitions for SDMA_PKT_COPY_TILED_SUBWIN packet +*/ + +/*define for HEADER word*/ +/*define for op field*/ +#define SDMA_PKT_COPY_TILED_SUBWIN_HEADER_op_offset 0 +#define SDMA_PKT_COPY_TILED_SUBWIN_HEADER_op_mask 0x000000FF +#define SDMA_PKT_COPY_TILED_SUBWIN_HEADER_op_shift 0 +#define SDMA_PKT_COPY_TILED_SUBWIN_HEADER_OP(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_HEADER_op_mask) << SDMA_PKT_COPY_TILED_SUBWIN_HEADER_op_shift) + +/*define for sub_op field*/ +#define SDMA_PKT_COPY_TILED_SUBWIN_HEADER_sub_op_offset 0 +#define SDMA_PKT_COPY_TILED_SUBWIN_HEADER_sub_op_mask 0x000000FF +#define SDMA_PKT_COPY_TILED_SUBWIN_HEADER_sub_op_shift 8 +#define SDMA_PKT_COPY_TILED_SUBWIN_HEADER_SUB_OP(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_HEADER_sub_op_mask) << SDMA_PKT_COPY_TILED_SUBWIN_HEADER_sub_op_shift) + +/*define for tmz field*/ +#define SDMA_PKT_COPY_TILED_SUBWIN_HEADER_tmz_offset 0 +#define SDMA_PKT_COPY_TILED_SUBWIN_HEADER_tmz_mask 0x00000001 +#define SDMA_PKT_COPY_TILED_SUBWIN_HEADER_tmz_shift 18 +#define SDMA_PKT_COPY_TILED_SUBWIN_HEADER_TMZ(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_HEADER_tmz_mask) << SDMA_PKT_COPY_TILED_SUBWIN_HEADER_tmz_shift) + +/*define for mip_max field*/ +#define SDMA_PKT_COPY_TILED_SUBWIN_HEADER_mip_max_offset 0 +#define SDMA_PKT_COPY_TILED_SUBWIN_HEADER_mip_max_mask 0x0000000F +#define SDMA_PKT_COPY_TILED_SUBWIN_HEADER_mip_max_shift 20 +#define SDMA_PKT_COPY_TILED_SUBWIN_HEADER_MIP_MAX(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_HEADER_mip_max_mask) << SDMA_PKT_COPY_TILED_SUBWIN_HEADER_mip_max_shift) + +/*define for mip_id field*/ +#define SDMA_PKT_COPY_TILED_SUBWIN_HEADER_mip_id_offset 0 +#define SDMA_PKT_COPY_TILED_SUBWIN_HEADER_mip_id_mask 0x0000000F +#define SDMA_PKT_COPY_TILED_SUBWIN_HEADER_mip_id_shift 24 +#define SDMA_PKT_COPY_TILED_SUBWIN_HEADER_MIP_ID(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_HEADER_mip_id_mask) << SDMA_PKT_COPY_TILED_SUBWIN_HEADER_mip_id_shift) + +/*define for detile field*/ +#define SDMA_PKT_COPY_TILED_SUBWIN_HEADER_detile_offset 0 +#define SDMA_PKT_COPY_TILED_SUBWIN_HEADER_detile_mask 0x00000001 +#define SDMA_PKT_COPY_TILED_SUBWIN_HEADER_detile_shift 31 +#define SDMA_PKT_COPY_TILED_SUBWIN_HEADER_DETILE(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_HEADER_detile_mask) << SDMA_PKT_COPY_TILED_SUBWIN_HEADER_detile_shift) + +/*define for TILED_ADDR_LO word*/ +/*define for tiled_addr_31_0 field*/ +#define SDMA_PKT_COPY_TILED_SUBWIN_TILED_ADDR_LO_tiled_addr_31_0_offset 1 +#define SDMA_PKT_COPY_TILED_SUBWIN_TILED_ADDR_LO_tiled_addr_31_0_mask 0xFFFFFFFF +#define SDMA_PKT_COPY_TILED_SUBWIN_TILED_ADDR_LO_tiled_addr_31_0_shift 0 +#define SDMA_PKT_COPY_TILED_SUBWIN_TILED_ADDR_LO_TILED_ADDR_31_0(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_TILED_ADDR_LO_tiled_addr_31_0_mask) << SDMA_PKT_COPY_TILED_SUBWIN_TILED_ADDR_LO_tiled_addr_31_0_shift) + +/*define for TILED_ADDR_HI word*/ +/*define for tiled_addr_63_32 field*/ +#define SDMA_PKT_COPY_TILED_SUBWIN_TILED_ADDR_HI_tiled_addr_63_32_offset 2 +#define SDMA_PKT_COPY_TILED_SUBWIN_TILED_ADDR_HI_tiled_addr_63_32_mask 0xFFFFFFFF +#define SDMA_PKT_COPY_TILED_SUBWIN_TILED_ADDR_HI_tiled_addr_63_32_shift 0 +#define SDMA_PKT_COPY_TILED_SUBWIN_TILED_ADDR_HI_TILED_ADDR_63_32(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_TILED_ADDR_HI_tiled_addr_63_32_mask) << SDMA_PKT_COPY_TILED_SUBWIN_TILED_ADDR_HI_tiled_addr_63_32_shift) + +/*define for DW_3 word*/ +/*define for tiled_x field*/ +#define SDMA_PKT_COPY_TILED_SUBWIN_DW_3_tiled_x_offset 3 +#define SDMA_PKT_COPY_TILED_SUBWIN_DW_3_tiled_x_mask 0x00003FFF +#define SDMA_PKT_COPY_TILED_SUBWIN_DW_3_tiled_x_shift 0 +#define SDMA_PKT_COPY_TILED_SUBWIN_DW_3_TILED_X(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_DW_3_tiled_x_mask) << SDMA_PKT_COPY_TILED_SUBWIN_DW_3_tiled_x_shift) + +/*define for tiled_y field*/ +#define SDMA_PKT_COPY_TILED_SUBWIN_DW_3_tiled_y_offset 3 +#define SDMA_PKT_COPY_TILED_SUBWIN_DW_3_tiled_y_mask 0x00003FFF +#define SDMA_PKT_COPY_TILED_SUBWIN_DW_3_tiled_y_shift 16 +#define SDMA_PKT_COPY_TILED_SUBWIN_DW_3_TILED_Y(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_DW_3_tiled_y_mask) << SDMA_PKT_COPY_TILED_SUBWIN_DW_3_tiled_y_shift) + +/*define for DW_4 word*/ +/*define for tiled_z field*/ +#define SDMA_PKT_COPY_TILED_SUBWIN_DW_4_tiled_z_offset 4 +#define SDMA_PKT_COPY_TILED_SUBWIN_DW_4_tiled_z_mask 0x000007FF +#define SDMA_PKT_COPY_TILED_SUBWIN_DW_4_tiled_z_shift 0 +#define SDMA_PKT_COPY_TILED_SUBWIN_DW_4_TILED_Z(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_DW_4_tiled_z_mask) << SDMA_PKT_COPY_TILED_SUBWIN_DW_4_tiled_z_shift) + +/*define for width field*/ +#define SDMA_PKT_COPY_TILED_SUBWIN_DW_4_width_offset 4 +#define SDMA_PKT_COPY_TILED_SUBWIN_DW_4_width_mask 0x00003FFF +#define SDMA_PKT_COPY_TILED_SUBWIN_DW_4_width_shift 16 +#define SDMA_PKT_COPY_TILED_SUBWIN_DW_4_WIDTH(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_DW_4_width_mask) << SDMA_PKT_COPY_TILED_SUBWIN_DW_4_width_shift) + +/*define for DW_5 word*/ +/*define for height field*/ +#define SDMA_PKT_COPY_TILED_SUBWIN_DW_5_height_offset 5 +#define SDMA_PKT_COPY_TILED_SUBWIN_DW_5_height_mask 0x00003FFF +#define SDMA_PKT_COPY_TILED_SUBWIN_DW_5_height_shift 0 +#define SDMA_PKT_COPY_TILED_SUBWIN_DW_5_HEIGHT(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_DW_5_height_mask) << SDMA_PKT_COPY_TILED_SUBWIN_DW_5_height_shift) + +/*define for depth field*/ +#define SDMA_PKT_COPY_TILED_SUBWIN_DW_5_depth_offset 5 +#define SDMA_PKT_COPY_TILED_SUBWIN_DW_5_depth_mask 0x000007FF +#define SDMA_PKT_COPY_TILED_SUBWIN_DW_5_depth_shift 16 +#define SDMA_PKT_COPY_TILED_SUBWIN_DW_5_DEPTH(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_DW_5_depth_mask) << SDMA_PKT_COPY_TILED_SUBWIN_DW_5_depth_shift) + +/*define for DW_6 word*/ +/*define for element_size field*/ +#define SDMA_PKT_COPY_TILED_SUBWIN_DW_6_element_size_offset 6 +#define SDMA_PKT_COPY_TILED_SUBWIN_DW_6_element_size_mask 0x00000007 +#define SDMA_PKT_COPY_TILED_SUBWIN_DW_6_element_size_shift 0 +#define SDMA_PKT_COPY_TILED_SUBWIN_DW_6_ELEMENT_SIZE(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_DW_6_element_size_mask) << SDMA_PKT_COPY_TILED_SUBWIN_DW_6_element_size_shift) + +/*define for swizzle_mode field*/ +#define SDMA_PKT_COPY_TILED_SUBWIN_DW_6_swizzle_mode_offset 6 +#define SDMA_PKT_COPY_TILED_SUBWIN_DW_6_swizzle_mode_mask 0x0000001F +#define SDMA_PKT_COPY_TILED_SUBWIN_DW_6_swizzle_mode_shift 3 +#define SDMA_PKT_COPY_TILED_SUBWIN_DW_6_SWIZZLE_MODE(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_DW_6_swizzle_mode_mask) << SDMA_PKT_COPY_TILED_SUBWIN_DW_6_swizzle_mode_shift) + +/*define for dimension field*/ +#define SDMA_PKT_COPY_TILED_SUBWIN_DW_6_dimension_offset 6 +#define SDMA_PKT_COPY_TILED_SUBWIN_DW_6_dimension_mask 0x00000003 +#define SDMA_PKT_COPY_TILED_SUBWIN_DW_6_dimension_shift 9 +#define SDMA_PKT_COPY_TILED_SUBWIN_DW_6_DIMENSION(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_DW_6_dimension_mask) << SDMA_PKT_COPY_TILED_SUBWIN_DW_6_dimension_shift) + +/*define for epitch field*/ +#define SDMA_PKT_COPY_TILED_SUBWIN_DW_6_epitch_offset 6 +#define SDMA_PKT_COPY_TILED_SUBWIN_DW_6_epitch_mask 0x0000FFFF +#define SDMA_PKT_COPY_TILED_SUBWIN_DW_6_epitch_shift 16 +#define SDMA_PKT_COPY_TILED_SUBWIN_DW_6_EPITCH(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_DW_6_epitch_mask) << SDMA_PKT_COPY_TILED_SUBWIN_DW_6_epitch_shift) + +/*define for LINEAR_ADDR_LO word*/ +/*define for linear_addr_31_0 field*/ +#define SDMA_PKT_COPY_TILED_SUBWIN_LINEAR_ADDR_LO_linear_addr_31_0_offset 7 +#define SDMA_PKT_COPY_TILED_SUBWIN_LINEAR_ADDR_LO_linear_addr_31_0_mask 0xFFFFFFFF +#define SDMA_PKT_COPY_TILED_SUBWIN_LINEAR_ADDR_LO_linear_addr_31_0_shift 0 +#define SDMA_PKT_COPY_TILED_SUBWIN_LINEAR_ADDR_LO_LINEAR_ADDR_31_0(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_LINEAR_ADDR_LO_linear_addr_31_0_mask) << SDMA_PKT_COPY_TILED_SUBWIN_LINEAR_ADDR_LO_linear_addr_31_0_shift) + +/*define for LINEAR_ADDR_HI word*/ +/*define for linear_addr_63_32 field*/ +#define SDMA_PKT_COPY_TILED_SUBWIN_LINEAR_ADDR_HI_linear_addr_63_32_offset 8 +#define SDMA_PKT_COPY_TILED_SUBWIN_LINEAR_ADDR_HI_linear_addr_63_32_mask 0xFFFFFFFF +#define SDMA_PKT_COPY_TILED_SUBWIN_LINEAR_ADDR_HI_linear_addr_63_32_shift 0 +#define SDMA_PKT_COPY_TILED_SUBWIN_LINEAR_ADDR_HI_LINEAR_ADDR_63_32(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_LINEAR_ADDR_HI_linear_addr_63_32_mask) << SDMA_PKT_COPY_TILED_SUBWIN_LINEAR_ADDR_HI_linear_addr_63_32_shift) + +/*define for DW_9 word*/ +/*define for linear_x field*/ +#define SDMA_PKT_COPY_TILED_SUBWIN_DW_9_linear_x_offset 9 +#define SDMA_PKT_COPY_TILED_SUBWIN_DW_9_linear_x_mask 0x00003FFF +#define SDMA_PKT_COPY_TILED_SUBWIN_DW_9_linear_x_shift 0 +#define SDMA_PKT_COPY_TILED_SUBWIN_DW_9_LINEAR_X(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_DW_9_linear_x_mask) << SDMA_PKT_COPY_TILED_SUBWIN_DW_9_linear_x_shift) + +/*define for linear_y field*/ +#define SDMA_PKT_COPY_TILED_SUBWIN_DW_9_linear_y_offset 9 +#define SDMA_PKT_COPY_TILED_SUBWIN_DW_9_linear_y_mask 0x00003FFF +#define SDMA_PKT_COPY_TILED_SUBWIN_DW_9_linear_y_shift 16 +#define SDMA_PKT_COPY_TILED_SUBWIN_DW_9_LINEAR_Y(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_DW_9_linear_y_mask) << SDMA_PKT_COPY_TILED_SUBWIN_DW_9_linear_y_shift) + +/*define for DW_10 word*/ +/*define for linear_z field*/ +#define SDMA_PKT_COPY_TILED_SUBWIN_DW_10_linear_z_offset 10 +#define SDMA_PKT_COPY_TILED_SUBWIN_DW_10_linear_z_mask 0x000007FF +#define SDMA_PKT_COPY_TILED_SUBWIN_DW_10_linear_z_shift 0 +#define SDMA_PKT_COPY_TILED_SUBWIN_DW_10_LINEAR_Z(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_DW_10_linear_z_mask) << SDMA_PKT_COPY_TILED_SUBWIN_DW_10_linear_z_shift) + +/*define for linear_pitch field*/ +#define SDMA_PKT_COPY_TILED_SUBWIN_DW_10_linear_pitch_offset 10 +#define SDMA_PKT_COPY_TILED_SUBWIN_DW_10_linear_pitch_mask 0x00003FFF +#define SDMA_PKT_COPY_TILED_SUBWIN_DW_10_linear_pitch_shift 16 +#define SDMA_PKT_COPY_TILED_SUBWIN_DW_10_LINEAR_PITCH(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_DW_10_linear_pitch_mask) << SDMA_PKT_COPY_TILED_SUBWIN_DW_10_linear_pitch_shift) + +/*define for DW_11 word*/ +/*define for linear_slice_pitch field*/ +#define SDMA_PKT_COPY_TILED_SUBWIN_DW_11_linear_slice_pitch_offset 11 +#define SDMA_PKT_COPY_TILED_SUBWIN_DW_11_linear_slice_pitch_mask 0x0FFFFFFF +#define SDMA_PKT_COPY_TILED_SUBWIN_DW_11_linear_slice_pitch_shift 0 +#define SDMA_PKT_COPY_TILED_SUBWIN_DW_11_LINEAR_SLICE_PITCH(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_DW_11_linear_slice_pitch_mask) << SDMA_PKT_COPY_TILED_SUBWIN_DW_11_linear_slice_pitch_shift) + +/*define for DW_12 word*/ +/*define for rect_x field*/ +#define SDMA_PKT_COPY_TILED_SUBWIN_DW_12_rect_x_offset 12 +#define SDMA_PKT_COPY_TILED_SUBWIN_DW_12_rect_x_mask 0x00003FFF +#define SDMA_PKT_COPY_TILED_SUBWIN_DW_12_rect_x_shift 0 +#define SDMA_PKT_COPY_TILED_SUBWIN_DW_12_RECT_X(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_DW_12_rect_x_mask) << SDMA_PKT_COPY_TILED_SUBWIN_DW_12_rect_x_shift) + +/*define for rect_y field*/ +#define SDMA_PKT_COPY_TILED_SUBWIN_DW_12_rect_y_offset 12 +#define SDMA_PKT_COPY_TILED_SUBWIN_DW_12_rect_y_mask 0x00003FFF +#define SDMA_PKT_COPY_TILED_SUBWIN_DW_12_rect_y_shift 16 +#define SDMA_PKT_COPY_TILED_SUBWIN_DW_12_RECT_Y(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_DW_12_rect_y_mask) << SDMA_PKT_COPY_TILED_SUBWIN_DW_12_rect_y_shift) + +/*define for DW_13 word*/ +/*define for rect_z field*/ +#define SDMA_PKT_COPY_TILED_SUBWIN_DW_13_rect_z_offset 13 +#define SDMA_PKT_COPY_TILED_SUBWIN_DW_13_rect_z_mask 0x000007FF +#define SDMA_PKT_COPY_TILED_SUBWIN_DW_13_rect_z_shift 0 +#define SDMA_PKT_COPY_TILED_SUBWIN_DW_13_RECT_Z(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_DW_13_rect_z_mask) << SDMA_PKT_COPY_TILED_SUBWIN_DW_13_rect_z_shift) + +/*define for linear_sw field*/ +#define SDMA_PKT_COPY_TILED_SUBWIN_DW_13_linear_sw_offset 13 +#define SDMA_PKT_COPY_TILED_SUBWIN_DW_13_linear_sw_mask 0x00000003 +#define SDMA_PKT_COPY_TILED_SUBWIN_DW_13_linear_sw_shift 16 +#define SDMA_PKT_COPY_TILED_SUBWIN_DW_13_LINEAR_SW(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_DW_13_linear_sw_mask) << SDMA_PKT_COPY_TILED_SUBWIN_DW_13_linear_sw_shift) + +/*define for tile_sw field*/ +#define SDMA_PKT_COPY_TILED_SUBWIN_DW_13_tile_sw_offset 13 +#define SDMA_PKT_COPY_TILED_SUBWIN_DW_13_tile_sw_mask 0x00000003 +#define SDMA_PKT_COPY_TILED_SUBWIN_DW_13_tile_sw_shift 24 +#define SDMA_PKT_COPY_TILED_SUBWIN_DW_13_TILE_SW(x) (((x) & SDMA_PKT_COPY_TILED_SUBWIN_DW_13_tile_sw_mask) << SDMA_PKT_COPY_TILED_SUBWIN_DW_13_tile_sw_shift) + + +/* +** Definitions for SDMA_PKT_COPY_STRUCT packet +*/ + +/*define for HEADER word*/ +/*define for op field*/ +#define SDMA_PKT_COPY_STRUCT_HEADER_op_offset 0 +#define SDMA_PKT_COPY_STRUCT_HEADER_op_mask 0x000000FF +#define SDMA_PKT_COPY_STRUCT_HEADER_op_shift 0 +#define SDMA_PKT_COPY_STRUCT_HEADER_OP(x) (((x) & SDMA_PKT_COPY_STRUCT_HEADER_op_mask) << SDMA_PKT_COPY_STRUCT_HEADER_op_shift) + +/*define for sub_op field*/ +#define SDMA_PKT_COPY_STRUCT_HEADER_sub_op_offset 0 +#define SDMA_PKT_COPY_STRUCT_HEADER_sub_op_mask 0x000000FF +#define SDMA_PKT_COPY_STRUCT_HEADER_sub_op_shift 8 +#define SDMA_PKT_COPY_STRUCT_HEADER_SUB_OP(x) (((x) & SDMA_PKT_COPY_STRUCT_HEADER_sub_op_mask) << SDMA_PKT_COPY_STRUCT_HEADER_sub_op_shift) + +/*define for tmz field*/ +#define SDMA_PKT_COPY_STRUCT_HEADER_tmz_offset 0 +#define SDMA_PKT_COPY_STRUCT_HEADER_tmz_mask 0x00000001 +#define SDMA_PKT_COPY_STRUCT_HEADER_tmz_shift 18 +#define SDMA_PKT_COPY_STRUCT_HEADER_TMZ(x) (((x) & SDMA_PKT_COPY_STRUCT_HEADER_tmz_mask) << SDMA_PKT_COPY_STRUCT_HEADER_tmz_shift) + +/*define for detile field*/ +#define SDMA_PKT_COPY_STRUCT_HEADER_detile_offset 0 +#define SDMA_PKT_COPY_STRUCT_HEADER_detile_mask 0x00000001 +#define SDMA_PKT_COPY_STRUCT_HEADER_detile_shift 31 +#define SDMA_PKT_COPY_STRUCT_HEADER_DETILE(x) (((x) & SDMA_PKT_COPY_STRUCT_HEADER_detile_mask) << SDMA_PKT_COPY_STRUCT_HEADER_detile_shift) + +/*define for SB_ADDR_LO word*/ +/*define for sb_addr_31_0 field*/ +#define SDMA_PKT_COPY_STRUCT_SB_ADDR_LO_sb_addr_31_0_offset 1 +#define SDMA_PKT_COPY_STRUCT_SB_ADDR_LO_sb_addr_31_0_mask 0xFFFFFFFF +#define SDMA_PKT_COPY_STRUCT_SB_ADDR_LO_sb_addr_31_0_shift 0 +#define SDMA_PKT_COPY_STRUCT_SB_ADDR_LO_SB_ADDR_31_0(x) (((x) & SDMA_PKT_COPY_STRUCT_SB_ADDR_LO_sb_addr_31_0_mask) << SDMA_PKT_COPY_STRUCT_SB_ADDR_LO_sb_addr_31_0_shift) + +/*define for SB_ADDR_HI word*/ +/*define for sb_addr_63_32 field*/ +#define SDMA_PKT_COPY_STRUCT_SB_ADDR_HI_sb_addr_63_32_offset 2 +#define SDMA_PKT_COPY_STRUCT_SB_ADDR_HI_sb_addr_63_32_mask 0xFFFFFFFF +#define SDMA_PKT_COPY_STRUCT_SB_ADDR_HI_sb_addr_63_32_shift 0 +#define SDMA_PKT_COPY_STRUCT_SB_ADDR_HI_SB_ADDR_63_32(x) (((x) & SDMA_PKT_COPY_STRUCT_SB_ADDR_HI_sb_addr_63_32_mask) << SDMA_PKT_COPY_STRUCT_SB_ADDR_HI_sb_addr_63_32_shift) + +/*define for START_INDEX word*/ +/*define for start_index field*/ +#define SDMA_PKT_COPY_STRUCT_START_INDEX_start_index_offset 3 +#define SDMA_PKT_COPY_STRUCT_START_INDEX_start_index_mask 0xFFFFFFFF +#define SDMA_PKT_COPY_STRUCT_START_INDEX_start_index_shift 0 +#define SDMA_PKT_COPY_STRUCT_START_INDEX_START_INDEX(x) (((x) & SDMA_PKT_COPY_STRUCT_START_INDEX_start_index_mask) << SDMA_PKT_COPY_STRUCT_START_INDEX_start_index_shift) + +/*define for COUNT word*/ +/*define for count field*/ +#define SDMA_PKT_COPY_STRUCT_COUNT_count_offset 4 +#define SDMA_PKT_COPY_STRUCT_COUNT_count_mask 0xFFFFFFFF +#define SDMA_PKT_COPY_STRUCT_COUNT_count_shift 0 +#define SDMA_PKT_COPY_STRUCT_COUNT_COUNT(x) (((x) & SDMA_PKT_COPY_STRUCT_COUNT_count_mask) << SDMA_PKT_COPY_STRUCT_COUNT_count_shift) + +/*define for DW_5 word*/ +/*define for stride field*/ +#define SDMA_PKT_COPY_STRUCT_DW_5_stride_offset 5 +#define SDMA_PKT_COPY_STRUCT_DW_5_stride_mask 0x000007FF +#define SDMA_PKT_COPY_STRUCT_DW_5_stride_shift 0 +#define SDMA_PKT_COPY_STRUCT_DW_5_STRIDE(x) (((x) & SDMA_PKT_COPY_STRUCT_DW_5_stride_mask) << SDMA_PKT_COPY_STRUCT_DW_5_stride_shift) + +/*define for linear_sw field*/ +#define SDMA_PKT_COPY_STRUCT_DW_5_linear_sw_offset 5 +#define SDMA_PKT_COPY_STRUCT_DW_5_linear_sw_mask 0x00000003 +#define SDMA_PKT_COPY_STRUCT_DW_5_linear_sw_shift 16 +#define SDMA_PKT_COPY_STRUCT_DW_5_LINEAR_SW(x) (((x) & SDMA_PKT_COPY_STRUCT_DW_5_linear_sw_mask) << SDMA_PKT_COPY_STRUCT_DW_5_linear_sw_shift) + +/*define for struct_sw field*/ +#define SDMA_PKT_COPY_STRUCT_DW_5_struct_sw_offset 5 +#define SDMA_PKT_COPY_STRUCT_DW_5_struct_sw_mask 0x00000003 +#define SDMA_PKT_COPY_STRUCT_DW_5_struct_sw_shift 24 +#define SDMA_PKT_COPY_STRUCT_DW_5_STRUCT_SW(x) (((x) & SDMA_PKT_COPY_STRUCT_DW_5_struct_sw_mask) << SDMA_PKT_COPY_STRUCT_DW_5_struct_sw_shift) + +/*define for LINEAR_ADDR_LO word*/ +/*define for linear_addr_31_0 field*/ +#define SDMA_PKT_COPY_STRUCT_LINEAR_ADDR_LO_linear_addr_31_0_offset 6 +#define SDMA_PKT_COPY_STRUCT_LINEAR_ADDR_LO_linear_addr_31_0_mask 0xFFFFFFFF +#define SDMA_PKT_COPY_STRUCT_LINEAR_ADDR_LO_linear_addr_31_0_shift 0 +#define SDMA_PKT_COPY_STRUCT_LINEAR_ADDR_LO_LINEAR_ADDR_31_0(x) (((x) & SDMA_PKT_COPY_STRUCT_LINEAR_ADDR_LO_linear_addr_31_0_mask) << SDMA_PKT_COPY_STRUCT_LINEAR_ADDR_LO_linear_addr_31_0_shift) + +/*define for LINEAR_ADDR_HI word*/ +/*define for linear_addr_63_32 field*/ +#define SDMA_PKT_COPY_STRUCT_LINEAR_ADDR_HI_linear_addr_63_32_offset 7 +#define SDMA_PKT_COPY_STRUCT_LINEAR_ADDR_HI_linear_addr_63_32_mask 0xFFFFFFFF +#define SDMA_PKT_COPY_STRUCT_LINEAR_ADDR_HI_linear_addr_63_32_shift 0 +#define SDMA_PKT_COPY_STRUCT_LINEAR_ADDR_HI_LINEAR_ADDR_63_32(x) (((x) & SDMA_PKT_COPY_STRUCT_LINEAR_ADDR_HI_linear_addr_63_32_mask) << SDMA_PKT_COPY_STRUCT_LINEAR_ADDR_HI_linear_addr_63_32_shift) + + +/* +** Definitions for SDMA_PKT_WRITE_UNTILED packet +*/ + +/*define for HEADER word*/ +/*define for op field*/ +#define SDMA_PKT_WRITE_UNTILED_HEADER_op_offset 0 +#define SDMA_PKT_WRITE_UNTILED_HEADER_op_mask 0x000000FF +#define SDMA_PKT_WRITE_UNTILED_HEADER_op_shift 0 +#define SDMA_PKT_WRITE_UNTILED_HEADER_OP(x) (((x) & SDMA_PKT_WRITE_UNTILED_HEADER_op_mask) << SDMA_PKT_WRITE_UNTILED_HEADER_op_shift) + +/*define for sub_op field*/ +#define SDMA_PKT_WRITE_UNTILED_HEADER_sub_op_offset 0 +#define SDMA_PKT_WRITE_UNTILED_HEADER_sub_op_mask 0x000000FF +#define SDMA_PKT_WRITE_UNTILED_HEADER_sub_op_shift 8 +#define SDMA_PKT_WRITE_UNTILED_HEADER_SUB_OP(x) (((x) & SDMA_PKT_WRITE_UNTILED_HEADER_sub_op_mask) << SDMA_PKT_WRITE_UNTILED_HEADER_sub_op_shift) + +/*define for encrypt field*/ +#define SDMA_PKT_WRITE_UNTILED_HEADER_encrypt_offset 0 +#define SDMA_PKT_WRITE_UNTILED_HEADER_encrypt_mask 0x00000001 +#define SDMA_PKT_WRITE_UNTILED_HEADER_encrypt_shift 16 +#define SDMA_PKT_WRITE_UNTILED_HEADER_ENCRYPT(x) (((x) & SDMA_PKT_WRITE_UNTILED_HEADER_encrypt_mask) << SDMA_PKT_WRITE_UNTILED_HEADER_encrypt_shift) + +/*define for tmz field*/ +#define SDMA_PKT_WRITE_UNTILED_HEADER_tmz_offset 0 +#define SDMA_PKT_WRITE_UNTILED_HEADER_tmz_mask 0x00000001 +#define SDMA_PKT_WRITE_UNTILED_HEADER_tmz_shift 18 +#define SDMA_PKT_WRITE_UNTILED_HEADER_TMZ(x) (((x) & SDMA_PKT_WRITE_UNTILED_HEADER_tmz_mask) << SDMA_PKT_WRITE_UNTILED_HEADER_tmz_shift) + +/*define for DST_ADDR_LO word*/ +/*define for dst_addr_31_0 field*/ +#define SDMA_PKT_WRITE_UNTILED_DST_ADDR_LO_dst_addr_31_0_offset 1 +#define SDMA_PKT_WRITE_UNTILED_DST_ADDR_LO_dst_addr_31_0_mask 0xFFFFFFFF +#define SDMA_PKT_WRITE_UNTILED_DST_ADDR_LO_dst_addr_31_0_shift 0 +#define SDMA_PKT_WRITE_UNTILED_DST_ADDR_LO_DST_ADDR_31_0(x) (((x) & SDMA_PKT_WRITE_UNTILED_DST_ADDR_LO_dst_addr_31_0_mask) << SDMA_PKT_WRITE_UNTILED_DST_ADDR_LO_dst_addr_31_0_shift) + +/*define for DST_ADDR_HI word*/ +/*define for dst_addr_63_32 field*/ +#define SDMA_PKT_WRITE_UNTILED_DST_ADDR_HI_dst_addr_63_32_offset 2 +#define SDMA_PKT_WRITE_UNTILED_DST_ADDR_HI_dst_addr_63_32_mask 0xFFFFFFFF +#define SDMA_PKT_WRITE_UNTILED_DST_ADDR_HI_dst_addr_63_32_shift 0 +#define SDMA_PKT_WRITE_UNTILED_DST_ADDR_HI_DST_ADDR_63_32(x) (((x) & SDMA_PKT_WRITE_UNTILED_DST_ADDR_HI_dst_addr_63_32_mask) << SDMA_PKT_WRITE_UNTILED_DST_ADDR_HI_dst_addr_63_32_shift) + +/*define for DW_3 word*/ +/*define for count field*/ +#define SDMA_PKT_WRITE_UNTILED_DW_3_count_offset 3 +#define SDMA_PKT_WRITE_UNTILED_DW_3_count_mask 0x000FFFFF +#define SDMA_PKT_WRITE_UNTILED_DW_3_count_shift 0 +#define SDMA_PKT_WRITE_UNTILED_DW_3_COUNT(x) (((x) & SDMA_PKT_WRITE_UNTILED_DW_3_count_mask) << SDMA_PKT_WRITE_UNTILED_DW_3_count_shift) + +/*define for sw field*/ +#define SDMA_PKT_WRITE_UNTILED_DW_3_sw_offset 3 +#define SDMA_PKT_WRITE_UNTILED_DW_3_sw_mask 0x00000003 +#define SDMA_PKT_WRITE_UNTILED_DW_3_sw_shift 24 +#define SDMA_PKT_WRITE_UNTILED_DW_3_SW(x) (((x) & SDMA_PKT_WRITE_UNTILED_DW_3_sw_mask) << SDMA_PKT_WRITE_UNTILED_DW_3_sw_shift) + +/*define for DATA0 word*/ +/*define for data0 field*/ +#define SDMA_PKT_WRITE_UNTILED_DATA0_data0_offset 4 +#define SDMA_PKT_WRITE_UNTILED_DATA0_data0_mask 0xFFFFFFFF +#define SDMA_PKT_WRITE_UNTILED_DATA0_data0_shift 0 +#define SDMA_PKT_WRITE_UNTILED_DATA0_DATA0(x) (((x) & SDMA_PKT_WRITE_UNTILED_DATA0_data0_mask) << SDMA_PKT_WRITE_UNTILED_DATA0_data0_shift) + + +/* +** Definitions for SDMA_PKT_WRITE_TILED packet +*/ + +/*define for HEADER word*/ +/*define for op field*/ +#define SDMA_PKT_WRITE_TILED_HEADER_op_offset 0 +#define SDMA_PKT_WRITE_TILED_HEADER_op_mask 0x000000FF +#define SDMA_PKT_WRITE_TILED_HEADER_op_shift 0 +#define SDMA_PKT_WRITE_TILED_HEADER_OP(x) (((x) & SDMA_PKT_WRITE_TILED_HEADER_op_mask) << SDMA_PKT_WRITE_TILED_HEADER_op_shift) + +/*define for sub_op field*/ +#define SDMA_PKT_WRITE_TILED_HEADER_sub_op_offset 0 +#define SDMA_PKT_WRITE_TILED_HEADER_sub_op_mask 0x000000FF +#define SDMA_PKT_WRITE_TILED_HEADER_sub_op_shift 8 +#define SDMA_PKT_WRITE_TILED_HEADER_SUB_OP(x) (((x) & SDMA_PKT_WRITE_TILED_HEADER_sub_op_mask) << SDMA_PKT_WRITE_TILED_HEADER_sub_op_shift) + +/*define for encrypt field*/ +#define SDMA_PKT_WRITE_TILED_HEADER_encrypt_offset 0 +#define SDMA_PKT_WRITE_TILED_HEADER_encrypt_mask 0x00000001 +#define SDMA_PKT_WRITE_TILED_HEADER_encrypt_shift 16 +#define SDMA_PKT_WRITE_TILED_HEADER_ENCRYPT(x) (((x) & SDMA_PKT_WRITE_TILED_HEADER_encrypt_mask) << SDMA_PKT_WRITE_TILED_HEADER_encrypt_shift) + +/*define for tmz field*/ +#define SDMA_PKT_WRITE_TILED_HEADER_tmz_offset 0 +#define SDMA_PKT_WRITE_TILED_HEADER_tmz_mask 0x00000001 +#define SDMA_PKT_WRITE_TILED_HEADER_tmz_shift 18 +#define SDMA_PKT_WRITE_TILED_HEADER_TMZ(x) (((x) & SDMA_PKT_WRITE_TILED_HEADER_tmz_mask) << SDMA_PKT_WRITE_TILED_HEADER_tmz_shift) + +/*define for mip_max field*/ +#define SDMA_PKT_WRITE_TILED_HEADER_mip_max_offset 0 +#define SDMA_PKT_WRITE_TILED_HEADER_mip_max_mask 0x0000000F +#define SDMA_PKT_WRITE_TILED_HEADER_mip_max_shift 20 +#define SDMA_PKT_WRITE_TILED_HEADER_MIP_MAX(x) (((x) & SDMA_PKT_WRITE_TILED_HEADER_mip_max_mask) << SDMA_PKT_WRITE_TILED_HEADER_mip_max_shift) + +/*define for DST_ADDR_LO word*/ +/*define for dst_addr_31_0 field*/ +#define SDMA_PKT_WRITE_TILED_DST_ADDR_LO_dst_addr_31_0_offset 1 +#define SDMA_PKT_WRITE_TILED_DST_ADDR_LO_dst_addr_31_0_mask 0xFFFFFFFF +#define SDMA_PKT_WRITE_TILED_DST_ADDR_LO_dst_addr_31_0_shift 0 +#define SDMA_PKT_WRITE_TILED_DST_ADDR_LO_DST_ADDR_31_0(x) (((x) & SDMA_PKT_WRITE_TILED_DST_ADDR_LO_dst_addr_31_0_mask) << SDMA_PKT_WRITE_TILED_DST_ADDR_LO_dst_addr_31_0_shift) + +/*define for DST_ADDR_HI word*/ +/*define for dst_addr_63_32 field*/ +#define SDMA_PKT_WRITE_TILED_DST_ADDR_HI_dst_addr_63_32_offset 2 +#define SDMA_PKT_WRITE_TILED_DST_ADDR_HI_dst_addr_63_32_mask 0xFFFFFFFF +#define SDMA_PKT_WRITE_TILED_DST_ADDR_HI_dst_addr_63_32_shift 0 +#define SDMA_PKT_WRITE_TILED_DST_ADDR_HI_DST_ADDR_63_32(x) (((x) & SDMA_PKT_WRITE_TILED_DST_ADDR_HI_dst_addr_63_32_mask) << SDMA_PKT_WRITE_TILED_DST_ADDR_HI_dst_addr_63_32_shift) + +/*define for DW_3 word*/ +/*define for width field*/ +#define SDMA_PKT_WRITE_TILED_DW_3_width_offset 3 +#define SDMA_PKT_WRITE_TILED_DW_3_width_mask 0x00003FFF +#define SDMA_PKT_WRITE_TILED_DW_3_width_shift 0 +#define SDMA_PKT_WRITE_TILED_DW_3_WIDTH(x) (((x) & SDMA_PKT_WRITE_TILED_DW_3_width_mask) << SDMA_PKT_WRITE_TILED_DW_3_width_shift) + +/*define for DW_4 word*/ +/*define for height field*/ +#define SDMA_PKT_WRITE_TILED_DW_4_height_offset 4 +#define SDMA_PKT_WRITE_TILED_DW_4_height_mask 0x00003FFF +#define SDMA_PKT_WRITE_TILED_DW_4_height_shift 0 +#define SDMA_PKT_WRITE_TILED_DW_4_HEIGHT(x) (((x) & SDMA_PKT_WRITE_TILED_DW_4_height_mask) << SDMA_PKT_WRITE_TILED_DW_4_height_shift) + +/*define for depth field*/ +#define SDMA_PKT_WRITE_TILED_DW_4_depth_offset 4 +#define SDMA_PKT_WRITE_TILED_DW_4_depth_mask 0x000007FF +#define SDMA_PKT_WRITE_TILED_DW_4_depth_shift 16 +#define SDMA_PKT_WRITE_TILED_DW_4_DEPTH(x) (((x) & SDMA_PKT_WRITE_TILED_DW_4_depth_mask) << SDMA_PKT_WRITE_TILED_DW_4_depth_shift) + +/*define for DW_5 word*/ +/*define for element_size field*/ +#define SDMA_PKT_WRITE_TILED_DW_5_element_size_offset 5 +#define SDMA_PKT_WRITE_TILED_DW_5_element_size_mask 0x00000007 +#define SDMA_PKT_WRITE_TILED_DW_5_element_size_shift 0 +#define SDMA_PKT_WRITE_TILED_DW_5_ELEMENT_SIZE(x) (((x) & SDMA_PKT_WRITE_TILED_DW_5_element_size_mask) << SDMA_PKT_WRITE_TILED_DW_5_element_size_shift) + +/*define for swizzle_mode field*/ +#define SDMA_PKT_WRITE_TILED_DW_5_swizzle_mode_offset 5 +#define SDMA_PKT_WRITE_TILED_DW_5_swizzle_mode_mask 0x0000001F +#define SDMA_PKT_WRITE_TILED_DW_5_swizzle_mode_shift 3 +#define SDMA_PKT_WRITE_TILED_DW_5_SWIZZLE_MODE(x) (((x) & SDMA_PKT_WRITE_TILED_DW_5_swizzle_mode_mask) << SDMA_PKT_WRITE_TILED_DW_5_swizzle_mode_shift) + +/*define for dimension field*/ +#define SDMA_PKT_WRITE_TILED_DW_5_dimension_offset 5 +#define SDMA_PKT_WRITE_TILED_DW_5_dimension_mask 0x00000003 +#define SDMA_PKT_WRITE_TILED_DW_5_dimension_shift 9 +#define SDMA_PKT_WRITE_TILED_DW_5_DIMENSION(x) (((x) & SDMA_PKT_WRITE_TILED_DW_5_dimension_mask) << SDMA_PKT_WRITE_TILED_DW_5_dimension_shift) + +/*define for epitch field*/ +#define SDMA_PKT_WRITE_TILED_DW_5_epitch_offset 5 +#define SDMA_PKT_WRITE_TILED_DW_5_epitch_mask 0x0000FFFF +#define SDMA_PKT_WRITE_TILED_DW_5_epitch_shift 16 +#define SDMA_PKT_WRITE_TILED_DW_5_EPITCH(x) (((x) & SDMA_PKT_WRITE_TILED_DW_5_epitch_mask) << SDMA_PKT_WRITE_TILED_DW_5_epitch_shift) + +/*define for DW_6 word*/ +/*define for x field*/ +#define SDMA_PKT_WRITE_TILED_DW_6_x_offset 6 +#define SDMA_PKT_WRITE_TILED_DW_6_x_mask 0x00003FFF +#define SDMA_PKT_WRITE_TILED_DW_6_x_shift 0 +#define SDMA_PKT_WRITE_TILED_DW_6_X(x) (((x) & SDMA_PKT_WRITE_TILED_DW_6_x_mask) << SDMA_PKT_WRITE_TILED_DW_6_x_shift) + +/*define for y field*/ +#define SDMA_PKT_WRITE_TILED_DW_6_y_offset 6 +#define SDMA_PKT_WRITE_TILED_DW_6_y_mask 0x00003FFF +#define SDMA_PKT_WRITE_TILED_DW_6_y_shift 16 +#define SDMA_PKT_WRITE_TILED_DW_6_Y(x) (((x) & SDMA_PKT_WRITE_TILED_DW_6_y_mask) << SDMA_PKT_WRITE_TILED_DW_6_y_shift) + +/*define for DW_7 word*/ +/*define for z field*/ +#define SDMA_PKT_WRITE_TILED_DW_7_z_offset 7 +#define SDMA_PKT_WRITE_TILED_DW_7_z_mask 0x000007FF +#define SDMA_PKT_WRITE_TILED_DW_7_z_shift 0 +#define SDMA_PKT_WRITE_TILED_DW_7_Z(x) (((x) & SDMA_PKT_WRITE_TILED_DW_7_z_mask) << SDMA_PKT_WRITE_TILED_DW_7_z_shift) + +/*define for sw field*/ +#define SDMA_PKT_WRITE_TILED_DW_7_sw_offset 7 +#define SDMA_PKT_WRITE_TILED_DW_7_sw_mask 0x00000003 +#define SDMA_PKT_WRITE_TILED_DW_7_sw_shift 24 +#define SDMA_PKT_WRITE_TILED_DW_7_SW(x) (((x) & SDMA_PKT_WRITE_TILED_DW_7_sw_mask) << SDMA_PKT_WRITE_TILED_DW_7_sw_shift) + +/*define for COUNT word*/ +/*define for count field*/ +#define SDMA_PKT_WRITE_TILED_COUNT_count_offset 8 +#define SDMA_PKT_WRITE_TILED_COUNT_count_mask 0x000FFFFF +#define SDMA_PKT_WRITE_TILED_COUNT_count_shift 0 +#define SDMA_PKT_WRITE_TILED_COUNT_COUNT(x) (((x) & SDMA_PKT_WRITE_TILED_COUNT_count_mask) << SDMA_PKT_WRITE_TILED_COUNT_count_shift) + +/*define for DATA0 word*/ +/*define for data0 field*/ +#define SDMA_PKT_WRITE_TILED_DATA0_data0_offset 9 +#define SDMA_PKT_WRITE_TILED_DATA0_data0_mask 0xFFFFFFFF +#define SDMA_PKT_WRITE_TILED_DATA0_data0_shift 0 +#define SDMA_PKT_WRITE_TILED_DATA0_DATA0(x) (((x) & SDMA_PKT_WRITE_TILED_DATA0_data0_mask) << SDMA_PKT_WRITE_TILED_DATA0_data0_shift) + + +/* +** Definitions for SDMA_PKT_PTEPDE_COPY packet +*/ + +/*define for HEADER word*/ +/*define for op field*/ +#define SDMA_PKT_PTEPDE_COPY_HEADER_op_offset 0 +#define SDMA_PKT_PTEPDE_COPY_HEADER_op_mask 0x000000FF +#define SDMA_PKT_PTEPDE_COPY_HEADER_op_shift 0 +#define SDMA_PKT_PTEPDE_COPY_HEADER_OP(x) (((x) & SDMA_PKT_PTEPDE_COPY_HEADER_op_mask) << SDMA_PKT_PTEPDE_COPY_HEADER_op_shift) + +/*define for sub_op field*/ +#define SDMA_PKT_PTEPDE_COPY_HEADER_sub_op_offset 0 +#define SDMA_PKT_PTEPDE_COPY_HEADER_sub_op_mask 0x000000FF +#define SDMA_PKT_PTEPDE_COPY_HEADER_sub_op_shift 8 +#define SDMA_PKT_PTEPDE_COPY_HEADER_SUB_OP(x) (((x) & SDMA_PKT_PTEPDE_COPY_HEADER_sub_op_mask) << SDMA_PKT_PTEPDE_COPY_HEADER_sub_op_shift) + +/*define for ptepde_op field*/ +#define SDMA_PKT_PTEPDE_COPY_HEADER_ptepde_op_offset 0 +#define SDMA_PKT_PTEPDE_COPY_HEADER_ptepde_op_mask 0x00000001 +#define SDMA_PKT_PTEPDE_COPY_HEADER_ptepde_op_shift 31 +#define SDMA_PKT_PTEPDE_COPY_HEADER_PTEPDE_OP(x) (((x) & SDMA_PKT_PTEPDE_COPY_HEADER_ptepde_op_mask) << SDMA_PKT_PTEPDE_COPY_HEADER_ptepde_op_shift) + +/*define for SRC_ADDR_LO word*/ +/*define for src_addr_31_0 field*/ +#define SDMA_PKT_PTEPDE_COPY_SRC_ADDR_LO_src_addr_31_0_offset 1 +#define SDMA_PKT_PTEPDE_COPY_SRC_ADDR_LO_src_addr_31_0_mask 0xFFFFFFFF +#define SDMA_PKT_PTEPDE_COPY_SRC_ADDR_LO_src_addr_31_0_shift 0 +#define SDMA_PKT_PTEPDE_COPY_SRC_ADDR_LO_SRC_ADDR_31_0(x) (((x) & SDMA_PKT_PTEPDE_COPY_SRC_ADDR_LO_src_addr_31_0_mask) << SDMA_PKT_PTEPDE_COPY_SRC_ADDR_LO_src_addr_31_0_shift) + +/*define for SRC_ADDR_HI word*/ +/*define for src_addr_63_32 field*/ +#define SDMA_PKT_PTEPDE_COPY_SRC_ADDR_HI_src_addr_63_32_offset 2 +#define SDMA_PKT_PTEPDE_COPY_SRC_ADDR_HI_src_addr_63_32_mask 0xFFFFFFFF +#define SDMA_PKT_PTEPDE_COPY_SRC_ADDR_HI_src_addr_63_32_shift 0 +#define SDMA_PKT_PTEPDE_COPY_SRC_ADDR_HI_SRC_ADDR_63_32(x) (((x) & SDMA_PKT_PTEPDE_COPY_SRC_ADDR_HI_src_addr_63_32_mask) << SDMA_PKT_PTEPDE_COPY_SRC_ADDR_HI_src_addr_63_32_shift) + +/*define for DST_ADDR_LO word*/ +/*define for dst_addr_31_0 field*/ +#define SDMA_PKT_PTEPDE_COPY_DST_ADDR_LO_dst_addr_31_0_offset 3 +#define SDMA_PKT_PTEPDE_COPY_DST_ADDR_LO_dst_addr_31_0_mask 0xFFFFFFFF +#define SDMA_PKT_PTEPDE_COPY_DST_ADDR_LO_dst_addr_31_0_shift 0 +#define SDMA_PKT_PTEPDE_COPY_DST_ADDR_LO_DST_ADDR_31_0(x) (((x) & SDMA_PKT_PTEPDE_COPY_DST_ADDR_LO_dst_addr_31_0_mask) << SDMA_PKT_PTEPDE_COPY_DST_ADDR_LO_dst_addr_31_0_shift) + +/*define for DST_ADDR_HI word*/ +/*define for dst_addr_63_32 field*/ +#define SDMA_PKT_PTEPDE_COPY_DST_ADDR_HI_dst_addr_63_32_offset 4 +#define SDMA_PKT_PTEPDE_COPY_DST_ADDR_HI_dst_addr_63_32_mask 0xFFFFFFFF +#define SDMA_PKT_PTEPDE_COPY_DST_ADDR_HI_dst_addr_63_32_shift 0 +#define SDMA_PKT_PTEPDE_COPY_DST_ADDR_HI_DST_ADDR_63_32(x) (((x) & SDMA_PKT_PTEPDE_COPY_DST_ADDR_HI_dst_addr_63_32_mask) << SDMA_PKT_PTEPDE_COPY_DST_ADDR_HI_dst_addr_63_32_shift) + +/*define for MASK_DW0 word*/ +/*define for mask_dw0 field*/ +#define SDMA_PKT_PTEPDE_COPY_MASK_DW0_mask_dw0_offset 5 +#define SDMA_PKT_PTEPDE_COPY_MASK_DW0_mask_dw0_mask 0xFFFFFFFF +#define SDMA_PKT_PTEPDE_COPY_MASK_DW0_mask_dw0_shift 0 +#define SDMA_PKT_PTEPDE_COPY_MASK_DW0_MASK_DW0(x) (((x) & SDMA_PKT_PTEPDE_COPY_MASK_DW0_mask_dw0_mask) << SDMA_PKT_PTEPDE_COPY_MASK_DW0_mask_dw0_shift) + +/*define for MASK_DW1 word*/ +/*define for mask_dw1 field*/ +#define SDMA_PKT_PTEPDE_COPY_MASK_DW1_mask_dw1_offset 6 +#define SDMA_PKT_PTEPDE_COPY_MASK_DW1_mask_dw1_mask 0xFFFFFFFF +#define SDMA_PKT_PTEPDE_COPY_MASK_DW1_mask_dw1_shift 0 +#define SDMA_PKT_PTEPDE_COPY_MASK_DW1_MASK_DW1(x) (((x) & SDMA_PKT_PTEPDE_COPY_MASK_DW1_mask_dw1_mask) << SDMA_PKT_PTEPDE_COPY_MASK_DW1_mask_dw1_shift) + +/*define for COUNT word*/ +/*define for count field*/ +#define SDMA_PKT_PTEPDE_COPY_COUNT_count_offset 7 +#define SDMA_PKT_PTEPDE_COPY_COUNT_count_mask 0x0007FFFF +#define SDMA_PKT_PTEPDE_COPY_COUNT_count_shift 0 +#define SDMA_PKT_PTEPDE_COPY_COUNT_COUNT(x) (((x) & SDMA_PKT_PTEPDE_COPY_COUNT_count_mask) << SDMA_PKT_PTEPDE_COPY_COUNT_count_shift) + + +/* +** Definitions for SDMA_PKT_PTEPDE_COPY_BACKWARDS packet +*/ + +/*define for HEADER word*/ +/*define for op field*/ +#define SDMA_PKT_PTEPDE_COPY_BACKWARDS_HEADER_op_offset 0 +#define SDMA_PKT_PTEPDE_COPY_BACKWARDS_HEADER_op_mask 0x000000FF +#define SDMA_PKT_PTEPDE_COPY_BACKWARDS_HEADER_op_shift 0 +#define SDMA_PKT_PTEPDE_COPY_BACKWARDS_HEADER_OP(x) (((x) & SDMA_PKT_PTEPDE_COPY_BACKWARDS_HEADER_op_mask) << SDMA_PKT_PTEPDE_COPY_BACKWARDS_HEADER_op_shift) + +/*define for sub_op field*/ +#define SDMA_PKT_PTEPDE_COPY_BACKWARDS_HEADER_sub_op_offset 0 +#define SDMA_PKT_PTEPDE_COPY_BACKWARDS_HEADER_sub_op_mask 0x000000FF +#define SDMA_PKT_PTEPDE_COPY_BACKWARDS_HEADER_sub_op_shift 8 +#define SDMA_PKT_PTEPDE_COPY_BACKWARDS_HEADER_SUB_OP(x) (((x) & SDMA_PKT_PTEPDE_COPY_BACKWARDS_HEADER_sub_op_mask) << SDMA_PKT_PTEPDE_COPY_BACKWARDS_HEADER_sub_op_shift) + +/*define for pte_size field*/ +#define SDMA_PKT_PTEPDE_COPY_BACKWARDS_HEADER_pte_size_offset 0 +#define SDMA_PKT_PTEPDE_COPY_BACKWARDS_HEADER_pte_size_mask 0x00000003 +#define SDMA_PKT_PTEPDE_COPY_BACKWARDS_HEADER_pte_size_shift 28 +#define SDMA_PKT_PTEPDE_COPY_BACKWARDS_HEADER_PTE_SIZE(x) (((x) & SDMA_PKT_PTEPDE_COPY_BACKWARDS_HEADER_pte_size_mask) << SDMA_PKT_PTEPDE_COPY_BACKWARDS_HEADER_pte_size_shift) + +/*define for direction field*/ +#define SDMA_PKT_PTEPDE_COPY_BACKWARDS_HEADER_direction_offset 0 +#define SDMA_PKT_PTEPDE_COPY_BACKWARDS_HEADER_direction_mask 0x00000001 +#define SDMA_PKT_PTEPDE_COPY_BACKWARDS_HEADER_direction_shift 30 +#define SDMA_PKT_PTEPDE_COPY_BACKWARDS_HEADER_DIRECTION(x) (((x) & SDMA_PKT_PTEPDE_COPY_BACKWARDS_HEADER_direction_mask) << SDMA_PKT_PTEPDE_COPY_BACKWARDS_HEADER_direction_shift) + +/*define for ptepde_op field*/ +#define SDMA_PKT_PTEPDE_COPY_BACKWARDS_HEADER_ptepde_op_offset 0 +#define SDMA_PKT_PTEPDE_COPY_BACKWARDS_HEADER_ptepde_op_mask 0x00000001 +#define SDMA_PKT_PTEPDE_COPY_BACKWARDS_HEADER_ptepde_op_shift 31 +#define SDMA_PKT_PTEPDE_COPY_BACKWARDS_HEADER_PTEPDE_OP(x) (((x) & SDMA_PKT_PTEPDE_COPY_BACKWARDS_HEADER_ptepde_op_mask) << SDMA_PKT_PTEPDE_COPY_BACKWARDS_HEADER_ptepde_op_shift) + +/*define for SRC_ADDR_LO word*/ +/*define for src_addr_31_0 field*/ +#define SDMA_PKT_PTEPDE_COPY_BACKWARDS_SRC_ADDR_LO_src_addr_31_0_offset 1 +#define SDMA_PKT_PTEPDE_COPY_BACKWARDS_SRC_ADDR_LO_src_addr_31_0_mask 0xFFFFFFFF +#define SDMA_PKT_PTEPDE_COPY_BACKWARDS_SRC_ADDR_LO_src_addr_31_0_shift 0 +#define SDMA_PKT_PTEPDE_COPY_BACKWARDS_SRC_ADDR_LO_SRC_ADDR_31_0(x) (((x) & SDMA_PKT_PTEPDE_COPY_BACKWARDS_SRC_ADDR_LO_src_addr_31_0_mask) << SDMA_PKT_PTEPDE_COPY_BACKWARDS_SRC_ADDR_LO_src_addr_31_0_shift) + +/*define for SRC_ADDR_HI word*/ +/*define for src_addr_63_32 field*/ +#define SDMA_PKT_PTEPDE_COPY_BACKWARDS_SRC_ADDR_HI_src_addr_63_32_offset 2 +#define SDMA_PKT_PTEPDE_COPY_BACKWARDS_SRC_ADDR_HI_src_addr_63_32_mask 0xFFFFFFFF +#define SDMA_PKT_PTEPDE_COPY_BACKWARDS_SRC_ADDR_HI_src_addr_63_32_shift 0 +#define SDMA_PKT_PTEPDE_COPY_BACKWARDS_SRC_ADDR_HI_SRC_ADDR_63_32(x) (((x) & SDMA_PKT_PTEPDE_COPY_BACKWARDS_SRC_ADDR_HI_src_addr_63_32_mask) << SDMA_PKT_PTEPDE_COPY_BACKWARDS_SRC_ADDR_HI_src_addr_63_32_shift) + +/*define for DST_ADDR_LO word*/ +/*define for dst_addr_31_0 field*/ +#define SDMA_PKT_PTEPDE_COPY_BACKWARDS_DST_ADDR_LO_dst_addr_31_0_offset 3 +#define SDMA_PKT_PTEPDE_COPY_BACKWARDS_DST_ADDR_LO_dst_addr_31_0_mask 0xFFFFFFFF +#define SDMA_PKT_PTEPDE_COPY_BACKWARDS_DST_ADDR_LO_dst_addr_31_0_shift 0 +#define SDMA_PKT_PTEPDE_COPY_BACKWARDS_DST_ADDR_LO_DST_ADDR_31_0(x) (((x) & SDMA_PKT_PTEPDE_COPY_BACKWARDS_DST_ADDR_LO_dst_addr_31_0_mask) << SDMA_PKT_PTEPDE_COPY_BACKWARDS_DST_ADDR_LO_dst_addr_31_0_shift) + +/*define for DST_ADDR_HI word*/ +/*define for dst_addr_63_32 field*/ +#define SDMA_PKT_PTEPDE_COPY_BACKWARDS_DST_ADDR_HI_dst_addr_63_32_offset 4 +#define SDMA_PKT_PTEPDE_COPY_BACKWARDS_DST_ADDR_HI_dst_addr_63_32_mask 0xFFFFFFFF +#define SDMA_PKT_PTEPDE_COPY_BACKWARDS_DST_ADDR_HI_dst_addr_63_32_shift 0 +#define SDMA_PKT_PTEPDE_COPY_BACKWARDS_DST_ADDR_HI_DST_ADDR_63_32(x) (((x) & SDMA_PKT_PTEPDE_COPY_BACKWARDS_DST_ADDR_HI_dst_addr_63_32_mask) << SDMA_PKT_PTEPDE_COPY_BACKWARDS_DST_ADDR_HI_dst_addr_63_32_shift) + +/*define for MASK_BIT_FOR_DW word*/ +/*define for mask_first_xfer field*/ +#define SDMA_PKT_PTEPDE_COPY_BACKWARDS_MASK_BIT_FOR_DW_mask_first_xfer_offset 5 +#define SDMA_PKT_PTEPDE_COPY_BACKWARDS_MASK_BIT_FOR_DW_mask_first_xfer_mask 0x000000FF +#define SDMA_PKT_PTEPDE_COPY_BACKWARDS_MASK_BIT_FOR_DW_mask_first_xfer_shift 0 +#define SDMA_PKT_PTEPDE_COPY_BACKWARDS_MASK_BIT_FOR_DW_MASK_FIRST_XFER(x) (((x) & SDMA_PKT_PTEPDE_COPY_BACKWARDS_MASK_BIT_FOR_DW_mask_first_xfer_mask) << SDMA_PKT_PTEPDE_COPY_BACKWARDS_MASK_BIT_FOR_DW_mask_first_xfer_shift) + +/*define for mask_last_xfer field*/ +#define SDMA_PKT_PTEPDE_COPY_BACKWARDS_MASK_BIT_FOR_DW_mask_last_xfer_offset 5 +#define SDMA_PKT_PTEPDE_COPY_BACKWARDS_MASK_BIT_FOR_DW_mask_last_xfer_mask 0x000000FF +#define SDMA_PKT_PTEPDE_COPY_BACKWARDS_MASK_BIT_FOR_DW_mask_last_xfer_shift 8 +#define SDMA_PKT_PTEPDE_COPY_BACKWARDS_MASK_BIT_FOR_DW_MASK_LAST_XFER(x) (((x) & SDMA_PKT_PTEPDE_COPY_BACKWARDS_MASK_BIT_FOR_DW_mask_last_xfer_mask) << SDMA_PKT_PTEPDE_COPY_BACKWARDS_MASK_BIT_FOR_DW_mask_last_xfer_shift) + +/*define for COUNT_IN_32B_XFER word*/ +/*define for count field*/ +#define SDMA_PKT_PTEPDE_COPY_BACKWARDS_COUNT_IN_32B_XFER_count_offset 6 +#define SDMA_PKT_PTEPDE_COPY_BACKWARDS_COUNT_IN_32B_XFER_count_mask 0x0001FFFF +#define SDMA_PKT_PTEPDE_COPY_BACKWARDS_COUNT_IN_32B_XFER_count_shift 0 +#define SDMA_PKT_PTEPDE_COPY_BACKWARDS_COUNT_IN_32B_XFER_COUNT(x) (((x) & SDMA_PKT_PTEPDE_COPY_BACKWARDS_COUNT_IN_32B_XFER_count_mask) << SDMA_PKT_PTEPDE_COPY_BACKWARDS_COUNT_IN_32B_XFER_count_shift) + + +/* +** Definitions for SDMA_PKT_PTEPDE_RMW packet +*/ + +/*define for HEADER word*/ +/*define for op field*/ +#define SDMA_PKT_PTEPDE_RMW_HEADER_op_offset 0 +#define SDMA_PKT_PTEPDE_RMW_HEADER_op_mask 0x000000FF +#define SDMA_PKT_PTEPDE_RMW_HEADER_op_shift 0 +#define SDMA_PKT_PTEPDE_RMW_HEADER_OP(x) (((x) & SDMA_PKT_PTEPDE_RMW_HEADER_op_mask) << SDMA_PKT_PTEPDE_RMW_HEADER_op_shift) + +/*define for sub_op field*/ +#define SDMA_PKT_PTEPDE_RMW_HEADER_sub_op_offset 0 +#define SDMA_PKT_PTEPDE_RMW_HEADER_sub_op_mask 0x000000FF +#define SDMA_PKT_PTEPDE_RMW_HEADER_sub_op_shift 8 +#define SDMA_PKT_PTEPDE_RMW_HEADER_SUB_OP(x) (((x) & SDMA_PKT_PTEPDE_RMW_HEADER_sub_op_mask) << SDMA_PKT_PTEPDE_RMW_HEADER_sub_op_shift) + +/*define for gcc field*/ +#define SDMA_PKT_PTEPDE_RMW_HEADER_gcc_offset 0 +#define SDMA_PKT_PTEPDE_RMW_HEADER_gcc_mask 0x00000001 +#define SDMA_PKT_PTEPDE_RMW_HEADER_gcc_shift 19 +#define SDMA_PKT_PTEPDE_RMW_HEADER_GCC(x) (((x) & SDMA_PKT_PTEPDE_RMW_HEADER_gcc_mask) << SDMA_PKT_PTEPDE_RMW_HEADER_gcc_shift) + +/*define for sys field*/ +#define SDMA_PKT_PTEPDE_RMW_HEADER_sys_offset 0 +#define SDMA_PKT_PTEPDE_RMW_HEADER_sys_mask 0x00000001 +#define SDMA_PKT_PTEPDE_RMW_HEADER_sys_shift 20 +#define SDMA_PKT_PTEPDE_RMW_HEADER_SYS(x) (((x) & SDMA_PKT_PTEPDE_RMW_HEADER_sys_mask) << SDMA_PKT_PTEPDE_RMW_HEADER_sys_shift) + +/*define for snp field*/ +#define SDMA_PKT_PTEPDE_RMW_HEADER_snp_offset 0 +#define SDMA_PKT_PTEPDE_RMW_HEADER_snp_mask 0x00000001 +#define SDMA_PKT_PTEPDE_RMW_HEADER_snp_shift 22 +#define SDMA_PKT_PTEPDE_RMW_HEADER_SNP(x) (((x) & SDMA_PKT_PTEPDE_RMW_HEADER_snp_mask) << SDMA_PKT_PTEPDE_RMW_HEADER_snp_shift) + +/*define for gpa field*/ +#define SDMA_PKT_PTEPDE_RMW_HEADER_gpa_offset 0 +#define SDMA_PKT_PTEPDE_RMW_HEADER_gpa_mask 0x00000001 +#define SDMA_PKT_PTEPDE_RMW_HEADER_gpa_shift 23 +#define SDMA_PKT_PTEPDE_RMW_HEADER_GPA(x) (((x) & SDMA_PKT_PTEPDE_RMW_HEADER_gpa_mask) << SDMA_PKT_PTEPDE_RMW_HEADER_gpa_shift) + +/*define for ADDR_LO word*/ +/*define for addr_31_0 field*/ +#define SDMA_PKT_PTEPDE_RMW_ADDR_LO_addr_31_0_offset 1 +#define SDMA_PKT_PTEPDE_RMW_ADDR_LO_addr_31_0_mask 0xFFFFFFFF +#define SDMA_PKT_PTEPDE_RMW_ADDR_LO_addr_31_0_shift 0 +#define SDMA_PKT_PTEPDE_RMW_ADDR_LO_ADDR_31_0(x) (((x) & SDMA_PKT_PTEPDE_RMW_ADDR_LO_addr_31_0_mask) << SDMA_PKT_PTEPDE_RMW_ADDR_LO_addr_31_0_shift) + +/*define for ADDR_HI word*/ +/*define for addr_63_32 field*/ +#define SDMA_PKT_PTEPDE_RMW_ADDR_HI_addr_63_32_offset 2 +#define SDMA_PKT_PTEPDE_RMW_ADDR_HI_addr_63_32_mask 0xFFFFFFFF +#define SDMA_PKT_PTEPDE_RMW_ADDR_HI_addr_63_32_shift 0 +#define SDMA_PKT_PTEPDE_RMW_ADDR_HI_ADDR_63_32(x) (((x) & SDMA_PKT_PTEPDE_RMW_ADDR_HI_addr_63_32_mask) << SDMA_PKT_PTEPDE_RMW_ADDR_HI_addr_63_32_shift) + +/*define for MASK_LO word*/ +/*define for mask_31_0 field*/ +#define SDMA_PKT_PTEPDE_RMW_MASK_LO_mask_31_0_offset 3 +#define SDMA_PKT_PTEPDE_RMW_MASK_LO_mask_31_0_mask 0xFFFFFFFF +#define SDMA_PKT_PTEPDE_RMW_MASK_LO_mask_31_0_shift 0 +#define SDMA_PKT_PTEPDE_RMW_MASK_LO_MASK_31_0(x) (((x) & SDMA_PKT_PTEPDE_RMW_MASK_LO_mask_31_0_mask) << SDMA_PKT_PTEPDE_RMW_MASK_LO_mask_31_0_shift) + +/*define for MASK_HI word*/ +/*define for mask_63_32 field*/ +#define SDMA_PKT_PTEPDE_RMW_MASK_HI_mask_63_32_offset 4 +#define SDMA_PKT_PTEPDE_RMW_MASK_HI_mask_63_32_mask 0xFFFFFFFF +#define SDMA_PKT_PTEPDE_RMW_MASK_HI_mask_63_32_shift 0 +#define SDMA_PKT_PTEPDE_RMW_MASK_HI_MASK_63_32(x) (((x) & SDMA_PKT_PTEPDE_RMW_MASK_HI_mask_63_32_mask) << SDMA_PKT_PTEPDE_RMW_MASK_HI_mask_63_32_shift) + +/*define for VALUE_LO word*/ +/*define for value_31_0 field*/ +#define SDMA_PKT_PTEPDE_RMW_VALUE_LO_value_31_0_offset 5 +#define SDMA_PKT_PTEPDE_RMW_VALUE_LO_value_31_0_mask 0xFFFFFFFF +#define SDMA_PKT_PTEPDE_RMW_VALUE_LO_value_31_0_shift 0 +#define SDMA_PKT_PTEPDE_RMW_VALUE_LO_VALUE_31_0(x) (((x) & SDMA_PKT_PTEPDE_RMW_VALUE_LO_value_31_0_mask) << SDMA_PKT_PTEPDE_RMW_VALUE_LO_value_31_0_shift) + +/*define for VALUE_HI word*/ +/*define for value_63_32 field*/ +#define SDMA_PKT_PTEPDE_RMW_VALUE_HI_value_63_32_offset 6 +#define SDMA_PKT_PTEPDE_RMW_VALUE_HI_value_63_32_mask 0xFFFFFFFF +#define SDMA_PKT_PTEPDE_RMW_VALUE_HI_value_63_32_shift 0 +#define SDMA_PKT_PTEPDE_RMW_VALUE_HI_VALUE_63_32(x) (((x) & SDMA_PKT_PTEPDE_RMW_VALUE_HI_value_63_32_mask) << SDMA_PKT_PTEPDE_RMW_VALUE_HI_value_63_32_shift) + + +/* +** Definitions for SDMA_PKT_WRITE_INCR packet +*/ + +/*define for HEADER word*/ +/*define for op field*/ +#define SDMA_PKT_WRITE_INCR_HEADER_op_offset 0 +#define SDMA_PKT_WRITE_INCR_HEADER_op_mask 0x000000FF +#define SDMA_PKT_WRITE_INCR_HEADER_op_shift 0 +#define SDMA_PKT_WRITE_INCR_HEADER_OP(x) (((x) & SDMA_PKT_WRITE_INCR_HEADER_op_mask) << SDMA_PKT_WRITE_INCR_HEADER_op_shift) + +/*define for sub_op field*/ +#define SDMA_PKT_WRITE_INCR_HEADER_sub_op_offset 0 +#define SDMA_PKT_WRITE_INCR_HEADER_sub_op_mask 0x000000FF +#define SDMA_PKT_WRITE_INCR_HEADER_sub_op_shift 8 +#define SDMA_PKT_WRITE_INCR_HEADER_SUB_OP(x) (((x) & SDMA_PKT_WRITE_INCR_HEADER_sub_op_mask) << SDMA_PKT_WRITE_INCR_HEADER_sub_op_shift) + +/*define for DST_ADDR_LO word*/ +/*define for dst_addr_31_0 field*/ +#define SDMA_PKT_WRITE_INCR_DST_ADDR_LO_dst_addr_31_0_offset 1 +#define SDMA_PKT_WRITE_INCR_DST_ADDR_LO_dst_addr_31_0_mask 0xFFFFFFFF +#define SDMA_PKT_WRITE_INCR_DST_ADDR_LO_dst_addr_31_0_shift 0 +#define SDMA_PKT_WRITE_INCR_DST_ADDR_LO_DST_ADDR_31_0(x) (((x) & SDMA_PKT_WRITE_INCR_DST_ADDR_LO_dst_addr_31_0_mask) << SDMA_PKT_WRITE_INCR_DST_ADDR_LO_dst_addr_31_0_shift) + +/*define for DST_ADDR_HI word*/ +/*define for dst_addr_63_32 field*/ +#define SDMA_PKT_WRITE_INCR_DST_ADDR_HI_dst_addr_63_32_offset 2 +#define SDMA_PKT_WRITE_INCR_DST_ADDR_HI_dst_addr_63_32_mask 0xFFFFFFFF +#define SDMA_PKT_WRITE_INCR_DST_ADDR_HI_dst_addr_63_32_shift 0 +#define SDMA_PKT_WRITE_INCR_DST_ADDR_HI_DST_ADDR_63_32(x) (((x) & SDMA_PKT_WRITE_INCR_DST_ADDR_HI_dst_addr_63_32_mask) << SDMA_PKT_WRITE_INCR_DST_ADDR_HI_dst_addr_63_32_shift) + +/*define for MASK_DW0 word*/ +/*define for mask_dw0 field*/ +#define SDMA_PKT_WRITE_INCR_MASK_DW0_mask_dw0_offset 3 +#define SDMA_PKT_WRITE_INCR_MASK_DW0_mask_dw0_mask 0xFFFFFFFF +#define SDMA_PKT_WRITE_INCR_MASK_DW0_mask_dw0_shift 0 +#define SDMA_PKT_WRITE_INCR_MASK_DW0_MASK_DW0(x) (((x) & SDMA_PKT_WRITE_INCR_MASK_DW0_mask_dw0_mask) << SDMA_PKT_WRITE_INCR_MASK_DW0_mask_dw0_shift) + +/*define for MASK_DW1 word*/ +/*define for mask_dw1 field*/ +#define SDMA_PKT_WRITE_INCR_MASK_DW1_mask_dw1_offset 4 +#define SDMA_PKT_WRITE_INCR_MASK_DW1_mask_dw1_mask 0xFFFFFFFF +#define SDMA_PKT_WRITE_INCR_MASK_DW1_mask_dw1_shift 0 +#define SDMA_PKT_WRITE_INCR_MASK_DW1_MASK_DW1(x) (((x) & SDMA_PKT_WRITE_INCR_MASK_DW1_mask_dw1_mask) << SDMA_PKT_WRITE_INCR_MASK_DW1_mask_dw1_shift) + +/*define for INIT_DW0 word*/ +/*define for init_dw0 field*/ +#define SDMA_PKT_WRITE_INCR_INIT_DW0_init_dw0_offset 5 +#define SDMA_PKT_WRITE_INCR_INIT_DW0_init_dw0_mask 0xFFFFFFFF +#define SDMA_PKT_WRITE_INCR_INIT_DW0_init_dw0_shift 0 +#define SDMA_PKT_WRITE_INCR_INIT_DW0_INIT_DW0(x) (((x) & SDMA_PKT_WRITE_INCR_INIT_DW0_init_dw0_mask) << SDMA_PKT_WRITE_INCR_INIT_DW0_init_dw0_shift) + +/*define for INIT_DW1 word*/ +/*define for init_dw1 field*/ +#define SDMA_PKT_WRITE_INCR_INIT_DW1_init_dw1_offset 6 +#define SDMA_PKT_WRITE_INCR_INIT_DW1_init_dw1_mask 0xFFFFFFFF +#define SDMA_PKT_WRITE_INCR_INIT_DW1_init_dw1_shift 0 +#define SDMA_PKT_WRITE_INCR_INIT_DW1_INIT_DW1(x) (((x) & SDMA_PKT_WRITE_INCR_INIT_DW1_init_dw1_mask) << SDMA_PKT_WRITE_INCR_INIT_DW1_init_dw1_shift) + +/*define for INCR_DW0 word*/ +/*define for incr_dw0 field*/ +#define SDMA_PKT_WRITE_INCR_INCR_DW0_incr_dw0_offset 7 +#define SDMA_PKT_WRITE_INCR_INCR_DW0_incr_dw0_mask 0xFFFFFFFF +#define SDMA_PKT_WRITE_INCR_INCR_DW0_incr_dw0_shift 0 +#define SDMA_PKT_WRITE_INCR_INCR_DW0_INCR_DW0(x) (((x) & SDMA_PKT_WRITE_INCR_INCR_DW0_incr_dw0_mask) << SDMA_PKT_WRITE_INCR_INCR_DW0_incr_dw0_shift) + +/*define for INCR_DW1 word*/ +/*define for incr_dw1 field*/ +#define SDMA_PKT_WRITE_INCR_INCR_DW1_incr_dw1_offset 8 +#define SDMA_PKT_WRITE_INCR_INCR_DW1_incr_dw1_mask 0xFFFFFFFF +#define SDMA_PKT_WRITE_INCR_INCR_DW1_incr_dw1_shift 0 +#define SDMA_PKT_WRITE_INCR_INCR_DW1_INCR_DW1(x) (((x) & SDMA_PKT_WRITE_INCR_INCR_DW1_incr_dw1_mask) << SDMA_PKT_WRITE_INCR_INCR_DW1_incr_dw1_shift) + +/*define for COUNT word*/ +/*define for count field*/ +#define SDMA_PKT_WRITE_INCR_COUNT_count_offset 9 +#define SDMA_PKT_WRITE_INCR_COUNT_count_mask 0x0007FFFF +#define SDMA_PKT_WRITE_INCR_COUNT_count_shift 0 +#define SDMA_PKT_WRITE_INCR_COUNT_COUNT(x) (((x) & SDMA_PKT_WRITE_INCR_COUNT_count_mask) << SDMA_PKT_WRITE_INCR_COUNT_count_shift) + + +/* +** Definitions for SDMA_PKT_INDIRECT packet +*/ + +/*define for HEADER word*/ +/*define for op field*/ +#define SDMA_PKT_INDIRECT_HEADER_op_offset 0 +#define SDMA_PKT_INDIRECT_HEADER_op_mask 0x000000FF +#define SDMA_PKT_INDIRECT_HEADER_op_shift 0 +#define SDMA_PKT_INDIRECT_HEADER_OP(x) (((x) & SDMA_PKT_INDIRECT_HEADER_op_mask) << SDMA_PKT_INDIRECT_HEADER_op_shift) + +/*define for sub_op field*/ +#define SDMA_PKT_INDIRECT_HEADER_sub_op_offset 0 +#define SDMA_PKT_INDIRECT_HEADER_sub_op_mask 0x000000FF +#define SDMA_PKT_INDIRECT_HEADER_sub_op_shift 8 +#define SDMA_PKT_INDIRECT_HEADER_SUB_OP(x) (((x) & SDMA_PKT_INDIRECT_HEADER_sub_op_mask) << SDMA_PKT_INDIRECT_HEADER_sub_op_shift) + +/*define for vmid field*/ +#define SDMA_PKT_INDIRECT_HEADER_vmid_offset 0 +#define SDMA_PKT_INDIRECT_HEADER_vmid_mask 0x0000000F +#define SDMA_PKT_INDIRECT_HEADER_vmid_shift 16 +#define SDMA_PKT_INDIRECT_HEADER_VMID(x) (((x) & SDMA_PKT_INDIRECT_HEADER_vmid_mask) << SDMA_PKT_INDIRECT_HEADER_vmid_shift) + +/*define for BASE_LO word*/ +/*define for ib_base_31_0 field*/ +#define SDMA_PKT_INDIRECT_BASE_LO_ib_base_31_0_offset 1 +#define SDMA_PKT_INDIRECT_BASE_LO_ib_base_31_0_mask 0xFFFFFFFF +#define SDMA_PKT_INDIRECT_BASE_LO_ib_base_31_0_shift 0 +#define SDMA_PKT_INDIRECT_BASE_LO_IB_BASE_31_0(x) (((x) & SDMA_PKT_INDIRECT_BASE_LO_ib_base_31_0_mask) << SDMA_PKT_INDIRECT_BASE_LO_ib_base_31_0_shift) + +/*define for BASE_HI word*/ +/*define for ib_base_63_32 field*/ +#define SDMA_PKT_INDIRECT_BASE_HI_ib_base_63_32_offset 2 +#define SDMA_PKT_INDIRECT_BASE_HI_ib_base_63_32_mask 0xFFFFFFFF +#define SDMA_PKT_INDIRECT_BASE_HI_ib_base_63_32_shift 0 +#define SDMA_PKT_INDIRECT_BASE_HI_IB_BASE_63_32(x) (((x) & SDMA_PKT_INDIRECT_BASE_HI_ib_base_63_32_mask) << SDMA_PKT_INDIRECT_BASE_HI_ib_base_63_32_shift) + +/*define for IB_SIZE word*/ +/*define for ib_size field*/ +#define SDMA_PKT_INDIRECT_IB_SIZE_ib_size_offset 3 +#define SDMA_PKT_INDIRECT_IB_SIZE_ib_size_mask 0x000FFFFF +#define SDMA_PKT_INDIRECT_IB_SIZE_ib_size_shift 0 +#define SDMA_PKT_INDIRECT_IB_SIZE_IB_SIZE(x) (((x) & SDMA_PKT_INDIRECT_IB_SIZE_ib_size_mask) << SDMA_PKT_INDIRECT_IB_SIZE_ib_size_shift) + +/*define for CSA_ADDR_LO word*/ +/*define for csa_addr_31_0 field*/ +#define SDMA_PKT_INDIRECT_CSA_ADDR_LO_csa_addr_31_0_offset 4 +#define SDMA_PKT_INDIRECT_CSA_ADDR_LO_csa_addr_31_0_mask 0xFFFFFFFF +#define SDMA_PKT_INDIRECT_CSA_ADDR_LO_csa_addr_31_0_shift 0 +#define SDMA_PKT_INDIRECT_CSA_ADDR_LO_CSA_ADDR_31_0(x) (((x) & SDMA_PKT_INDIRECT_CSA_ADDR_LO_csa_addr_31_0_mask) << SDMA_PKT_INDIRECT_CSA_ADDR_LO_csa_addr_31_0_shift) + +/*define for CSA_ADDR_HI word*/ +/*define for csa_addr_63_32 field*/ +#define SDMA_PKT_INDIRECT_CSA_ADDR_HI_csa_addr_63_32_offset 5 +#define SDMA_PKT_INDIRECT_CSA_ADDR_HI_csa_addr_63_32_mask 0xFFFFFFFF +#define SDMA_PKT_INDIRECT_CSA_ADDR_HI_csa_addr_63_32_shift 0 +#define SDMA_PKT_INDIRECT_CSA_ADDR_HI_CSA_ADDR_63_32(x) (((x) & SDMA_PKT_INDIRECT_CSA_ADDR_HI_csa_addr_63_32_mask) << SDMA_PKT_INDIRECT_CSA_ADDR_HI_csa_addr_63_32_shift) + + +/* +** Definitions for SDMA_PKT_SEMAPHORE packet +*/ + +/*define for HEADER word*/ +/*define for op field*/ +#define SDMA_PKT_SEMAPHORE_HEADER_op_offset 0 +#define SDMA_PKT_SEMAPHORE_HEADER_op_mask 0x000000FF +#define SDMA_PKT_SEMAPHORE_HEADER_op_shift 0 +#define SDMA_PKT_SEMAPHORE_HEADER_OP(x) (((x) & SDMA_PKT_SEMAPHORE_HEADER_op_mask) << SDMA_PKT_SEMAPHORE_HEADER_op_shift) + +/*define for sub_op field*/ +#define SDMA_PKT_SEMAPHORE_HEADER_sub_op_offset 0 +#define SDMA_PKT_SEMAPHORE_HEADER_sub_op_mask 0x000000FF +#define SDMA_PKT_SEMAPHORE_HEADER_sub_op_shift 8 +#define SDMA_PKT_SEMAPHORE_HEADER_SUB_OP(x) (((x) & SDMA_PKT_SEMAPHORE_HEADER_sub_op_mask) << SDMA_PKT_SEMAPHORE_HEADER_sub_op_shift) + +/*define for write_one field*/ +#define SDMA_PKT_SEMAPHORE_HEADER_write_one_offset 0 +#define SDMA_PKT_SEMAPHORE_HEADER_write_one_mask 0x00000001 +#define SDMA_PKT_SEMAPHORE_HEADER_write_one_shift 29 +#define SDMA_PKT_SEMAPHORE_HEADER_WRITE_ONE(x) (((x) & SDMA_PKT_SEMAPHORE_HEADER_write_one_mask) << SDMA_PKT_SEMAPHORE_HEADER_write_one_shift) + +/*define for signal field*/ +#define SDMA_PKT_SEMAPHORE_HEADER_signal_offset 0 +#define SDMA_PKT_SEMAPHORE_HEADER_signal_mask 0x00000001 +#define SDMA_PKT_SEMAPHORE_HEADER_signal_shift 30 +#define SDMA_PKT_SEMAPHORE_HEADER_SIGNAL(x) (((x) & SDMA_PKT_SEMAPHORE_HEADER_signal_mask) << SDMA_PKT_SEMAPHORE_HEADER_signal_shift) + +/*define for mailbox field*/ +#define SDMA_PKT_SEMAPHORE_HEADER_mailbox_offset 0 +#define SDMA_PKT_SEMAPHORE_HEADER_mailbox_mask 0x00000001 +#define SDMA_PKT_SEMAPHORE_HEADER_mailbox_shift 31 +#define SDMA_PKT_SEMAPHORE_HEADER_MAILBOX(x) (((x) & SDMA_PKT_SEMAPHORE_HEADER_mailbox_mask) << SDMA_PKT_SEMAPHORE_HEADER_mailbox_shift) + +/*define for ADDR_LO word*/ +/*define for addr_31_0 field*/ +#define SDMA_PKT_SEMAPHORE_ADDR_LO_addr_31_0_offset 1 +#define SDMA_PKT_SEMAPHORE_ADDR_LO_addr_31_0_mask 0xFFFFFFFF +#define SDMA_PKT_SEMAPHORE_ADDR_LO_addr_31_0_shift 0 +#define SDMA_PKT_SEMAPHORE_ADDR_LO_ADDR_31_0(x) (((x) & SDMA_PKT_SEMAPHORE_ADDR_LO_addr_31_0_mask) << SDMA_PKT_SEMAPHORE_ADDR_LO_addr_31_0_shift) + +/*define for ADDR_HI word*/ +/*define for addr_63_32 field*/ +#define SDMA_PKT_SEMAPHORE_ADDR_HI_addr_63_32_offset 2 +#define SDMA_PKT_SEMAPHORE_ADDR_HI_addr_63_32_mask 0xFFFFFFFF +#define SDMA_PKT_SEMAPHORE_ADDR_HI_addr_63_32_shift 0 +#define SDMA_PKT_SEMAPHORE_ADDR_HI_ADDR_63_32(x) (((x) & SDMA_PKT_SEMAPHORE_ADDR_HI_addr_63_32_mask) << SDMA_PKT_SEMAPHORE_ADDR_HI_addr_63_32_shift) + + +/* +** Definitions for SDMA_PKT_FENCE packet +*/ + +/*define for HEADER word*/ +/*define for op field*/ +#define SDMA_PKT_FENCE_HEADER_op_offset 0 +#define SDMA_PKT_FENCE_HEADER_op_mask 0x000000FF +#define SDMA_PKT_FENCE_HEADER_op_shift 0 +#define SDMA_PKT_FENCE_HEADER_OP(x) (((x) & SDMA_PKT_FENCE_HEADER_op_mask) << SDMA_PKT_FENCE_HEADER_op_shift) + +/*define for sub_op field*/ +#define SDMA_PKT_FENCE_HEADER_sub_op_offset 0 +#define SDMA_PKT_FENCE_HEADER_sub_op_mask 0x000000FF +#define SDMA_PKT_FENCE_HEADER_sub_op_shift 8 +#define SDMA_PKT_FENCE_HEADER_SUB_OP(x) (((x) & SDMA_PKT_FENCE_HEADER_sub_op_mask) << SDMA_PKT_FENCE_HEADER_sub_op_shift) + +/*define for ADDR_LO word*/ +/*define for addr_31_0 field*/ +#define SDMA_PKT_FENCE_ADDR_LO_addr_31_0_offset 1 +#define SDMA_PKT_FENCE_ADDR_LO_addr_31_0_mask 0xFFFFFFFF +#define SDMA_PKT_FENCE_ADDR_LO_addr_31_0_shift 0 +#define SDMA_PKT_FENCE_ADDR_LO_ADDR_31_0(x) (((x) & SDMA_PKT_FENCE_ADDR_LO_addr_31_0_mask) << SDMA_PKT_FENCE_ADDR_LO_addr_31_0_shift) + +/*define for ADDR_HI word*/ +/*define for addr_63_32 field*/ +#define SDMA_PKT_FENCE_ADDR_HI_addr_63_32_offset 2 +#define SDMA_PKT_FENCE_ADDR_HI_addr_63_32_mask 0xFFFFFFFF +#define SDMA_PKT_FENCE_ADDR_HI_addr_63_32_shift 0 +#define SDMA_PKT_FENCE_ADDR_HI_ADDR_63_32(x) (((x) & SDMA_PKT_FENCE_ADDR_HI_addr_63_32_mask) << SDMA_PKT_FENCE_ADDR_HI_addr_63_32_shift) + +/*define for DATA word*/ +/*define for data field*/ +#define SDMA_PKT_FENCE_DATA_data_offset 3 +#define SDMA_PKT_FENCE_DATA_data_mask 0xFFFFFFFF +#define SDMA_PKT_FENCE_DATA_data_shift 0 +#define SDMA_PKT_FENCE_DATA_DATA(x) (((x) & SDMA_PKT_FENCE_DATA_data_mask) << SDMA_PKT_FENCE_DATA_data_shift) + + +/* +** Definitions for SDMA_PKT_SRBM_WRITE packet +*/ + +/*define for HEADER word*/ +/*define for op field*/ +#define SDMA_PKT_SRBM_WRITE_HEADER_op_offset 0 +#define SDMA_PKT_SRBM_WRITE_HEADER_op_mask 0x000000FF +#define SDMA_PKT_SRBM_WRITE_HEADER_op_shift 0 +#define SDMA_PKT_SRBM_WRITE_HEADER_OP(x) (((x) & SDMA_PKT_SRBM_WRITE_HEADER_op_mask) << SDMA_PKT_SRBM_WRITE_HEADER_op_shift) + +/*define for sub_op field*/ +#define SDMA_PKT_SRBM_WRITE_HEADER_sub_op_offset 0 +#define SDMA_PKT_SRBM_WRITE_HEADER_sub_op_mask 0x000000FF +#define SDMA_PKT_SRBM_WRITE_HEADER_sub_op_shift 8 +#define SDMA_PKT_SRBM_WRITE_HEADER_SUB_OP(x) (((x) & SDMA_PKT_SRBM_WRITE_HEADER_sub_op_mask) << SDMA_PKT_SRBM_WRITE_HEADER_sub_op_shift) + +/*define for byte_en field*/ +#define SDMA_PKT_SRBM_WRITE_HEADER_byte_en_offset 0 +#define SDMA_PKT_SRBM_WRITE_HEADER_byte_en_mask 0x0000000F +#define SDMA_PKT_SRBM_WRITE_HEADER_byte_en_shift 28 +#define SDMA_PKT_SRBM_WRITE_HEADER_BYTE_EN(x) (((x) & SDMA_PKT_SRBM_WRITE_HEADER_byte_en_mask) << SDMA_PKT_SRBM_WRITE_HEADER_byte_en_shift) + +/*define for ADDR word*/ +/*define for addr field*/ +#define SDMA_PKT_SRBM_WRITE_ADDR_addr_offset 1 +#define SDMA_PKT_SRBM_WRITE_ADDR_addr_mask 0x0003FFFF +#define SDMA_PKT_SRBM_WRITE_ADDR_addr_shift 0 +#define SDMA_PKT_SRBM_WRITE_ADDR_ADDR(x) (((x) & SDMA_PKT_SRBM_WRITE_ADDR_addr_mask) << SDMA_PKT_SRBM_WRITE_ADDR_addr_shift) + +/*define for DATA word*/ +/*define for data field*/ +#define SDMA_PKT_SRBM_WRITE_DATA_data_offset 2 +#define SDMA_PKT_SRBM_WRITE_DATA_data_mask 0xFFFFFFFF +#define SDMA_PKT_SRBM_WRITE_DATA_data_shift 0 +#define SDMA_PKT_SRBM_WRITE_DATA_DATA(x) (((x) & SDMA_PKT_SRBM_WRITE_DATA_data_mask) << SDMA_PKT_SRBM_WRITE_DATA_data_shift) + + +/* +** Definitions for SDMA_PKT_PRE_EXE packet +*/ + +/*define for HEADER word*/ +/*define for op field*/ +#define SDMA_PKT_PRE_EXE_HEADER_op_offset 0 +#define SDMA_PKT_PRE_EXE_HEADER_op_mask 0x000000FF +#define SDMA_PKT_PRE_EXE_HEADER_op_shift 0 +#define SDMA_PKT_PRE_EXE_HEADER_OP(x) (((x) & SDMA_PKT_PRE_EXE_HEADER_op_mask) << SDMA_PKT_PRE_EXE_HEADER_op_shift) + +/*define for sub_op field*/ +#define SDMA_PKT_PRE_EXE_HEADER_sub_op_offset 0 +#define SDMA_PKT_PRE_EXE_HEADER_sub_op_mask 0x000000FF +#define SDMA_PKT_PRE_EXE_HEADER_sub_op_shift 8 +#define SDMA_PKT_PRE_EXE_HEADER_SUB_OP(x) (((x) & SDMA_PKT_PRE_EXE_HEADER_sub_op_mask) << SDMA_PKT_PRE_EXE_HEADER_sub_op_shift) + +/*define for dev_sel field*/ +#define SDMA_PKT_PRE_EXE_HEADER_dev_sel_offset 0 +#define SDMA_PKT_PRE_EXE_HEADER_dev_sel_mask 0x000000FF +#define SDMA_PKT_PRE_EXE_HEADER_dev_sel_shift 16 +#define SDMA_PKT_PRE_EXE_HEADER_DEV_SEL(x) (((x) & SDMA_PKT_PRE_EXE_HEADER_dev_sel_mask) << SDMA_PKT_PRE_EXE_HEADER_dev_sel_shift) + +/*define for EXEC_COUNT word*/ +/*define for exec_count field*/ +#define SDMA_PKT_PRE_EXE_EXEC_COUNT_exec_count_offset 1 +#define SDMA_PKT_PRE_EXE_EXEC_COUNT_exec_count_mask 0x00003FFF +#define SDMA_PKT_PRE_EXE_EXEC_COUNT_exec_count_shift 0 +#define SDMA_PKT_PRE_EXE_EXEC_COUNT_EXEC_COUNT(x) (((x) & SDMA_PKT_PRE_EXE_EXEC_COUNT_exec_count_mask) << SDMA_PKT_PRE_EXE_EXEC_COUNT_exec_count_shift) + + +/* +** Definitions for SDMA_PKT_COND_EXE packet +*/ + +/*define for HEADER word*/ +/*define for op field*/ +#define SDMA_PKT_COND_EXE_HEADER_op_offset 0 +#define SDMA_PKT_COND_EXE_HEADER_op_mask 0x000000FF +#define SDMA_PKT_COND_EXE_HEADER_op_shift 0 +#define SDMA_PKT_COND_EXE_HEADER_OP(x) (((x) & SDMA_PKT_COND_EXE_HEADER_op_mask) << SDMA_PKT_COND_EXE_HEADER_op_shift) + +/*define for sub_op field*/ +#define SDMA_PKT_COND_EXE_HEADER_sub_op_offset 0 +#define SDMA_PKT_COND_EXE_HEADER_sub_op_mask 0x000000FF +#define SDMA_PKT_COND_EXE_HEADER_sub_op_shift 8 +#define SDMA_PKT_COND_EXE_HEADER_SUB_OP(x) (((x) & SDMA_PKT_COND_EXE_HEADER_sub_op_mask) << SDMA_PKT_COND_EXE_HEADER_sub_op_shift) + +/*define for ADDR_LO word*/ +/*define for addr_31_0 field*/ +#define SDMA_PKT_COND_EXE_ADDR_LO_addr_31_0_offset 1 +#define SDMA_PKT_COND_EXE_ADDR_LO_addr_31_0_mask 0xFFFFFFFF +#define SDMA_PKT_COND_EXE_ADDR_LO_addr_31_0_shift 0 +#define SDMA_PKT_COND_EXE_ADDR_LO_ADDR_31_0(x) (((x) & SDMA_PKT_COND_EXE_ADDR_LO_addr_31_0_mask) << SDMA_PKT_COND_EXE_ADDR_LO_addr_31_0_shift) + +/*define for ADDR_HI word*/ +/*define for addr_63_32 field*/ +#define SDMA_PKT_COND_EXE_ADDR_HI_addr_63_32_offset 2 +#define SDMA_PKT_COND_EXE_ADDR_HI_addr_63_32_mask 0xFFFFFFFF +#define SDMA_PKT_COND_EXE_ADDR_HI_addr_63_32_shift 0 +#define SDMA_PKT_COND_EXE_ADDR_HI_ADDR_63_32(x) (((x) & SDMA_PKT_COND_EXE_ADDR_HI_addr_63_32_mask) << SDMA_PKT_COND_EXE_ADDR_HI_addr_63_32_shift) + +/*define for REFERENCE word*/ +/*define for reference field*/ +#define SDMA_PKT_COND_EXE_REFERENCE_reference_offset 3 +#define SDMA_PKT_COND_EXE_REFERENCE_reference_mask 0xFFFFFFFF +#define SDMA_PKT_COND_EXE_REFERENCE_reference_shift 0 +#define SDMA_PKT_COND_EXE_REFERENCE_REFERENCE(x) (((x) & SDMA_PKT_COND_EXE_REFERENCE_reference_mask) << SDMA_PKT_COND_EXE_REFERENCE_reference_shift) + +/*define for EXEC_COUNT word*/ +/*define for exec_count field*/ +#define SDMA_PKT_COND_EXE_EXEC_COUNT_exec_count_offset 4 +#define SDMA_PKT_COND_EXE_EXEC_COUNT_exec_count_mask 0x00003FFF +#define SDMA_PKT_COND_EXE_EXEC_COUNT_exec_count_shift 0 +#define SDMA_PKT_COND_EXE_EXEC_COUNT_EXEC_COUNT(x) (((x) & SDMA_PKT_COND_EXE_EXEC_COUNT_exec_count_mask) << SDMA_PKT_COND_EXE_EXEC_COUNT_exec_count_shift) + + +/* +** Definitions for SDMA_PKT_CONSTANT_FILL packet +*/ + +/*define for HEADER word*/ +/*define for op field*/ +#define SDMA_PKT_CONSTANT_FILL_HEADER_op_offset 0 +#define SDMA_PKT_CONSTANT_FILL_HEADER_op_mask 0x000000FF +#define SDMA_PKT_CONSTANT_FILL_HEADER_op_shift 0 +#define SDMA_PKT_CONSTANT_FILL_HEADER_OP(x) (((x) & SDMA_PKT_CONSTANT_FILL_HEADER_op_mask) << SDMA_PKT_CONSTANT_FILL_HEADER_op_shift) + +/*define for sub_op field*/ +#define SDMA_PKT_CONSTANT_FILL_HEADER_sub_op_offset 0 +#define SDMA_PKT_CONSTANT_FILL_HEADER_sub_op_mask 0x000000FF +#define SDMA_PKT_CONSTANT_FILL_HEADER_sub_op_shift 8 +#define SDMA_PKT_CONSTANT_FILL_HEADER_SUB_OP(x) (((x) & SDMA_PKT_CONSTANT_FILL_HEADER_sub_op_mask) << SDMA_PKT_CONSTANT_FILL_HEADER_sub_op_shift) + +/*define for sw field*/ +#define SDMA_PKT_CONSTANT_FILL_HEADER_sw_offset 0 +#define SDMA_PKT_CONSTANT_FILL_HEADER_sw_mask 0x00000003 +#define SDMA_PKT_CONSTANT_FILL_HEADER_sw_shift 16 +#define SDMA_PKT_CONSTANT_FILL_HEADER_SW(x) (((x) & SDMA_PKT_CONSTANT_FILL_HEADER_sw_mask) << SDMA_PKT_CONSTANT_FILL_HEADER_sw_shift) + +/*define for fillsize field*/ +#define SDMA_PKT_CONSTANT_FILL_HEADER_fillsize_offset 0 +#define SDMA_PKT_CONSTANT_FILL_HEADER_fillsize_mask 0x00000003 +#define SDMA_PKT_CONSTANT_FILL_HEADER_fillsize_shift 30 +#define SDMA_PKT_CONSTANT_FILL_HEADER_FILLSIZE(x) (((x) & SDMA_PKT_CONSTANT_FILL_HEADER_fillsize_mask) << SDMA_PKT_CONSTANT_FILL_HEADER_fillsize_shift) + +/*define for DST_ADDR_LO word*/ +/*define for dst_addr_31_0 field*/ +#define SDMA_PKT_CONSTANT_FILL_DST_ADDR_LO_dst_addr_31_0_offset 1 +#define SDMA_PKT_CONSTANT_FILL_DST_ADDR_LO_dst_addr_31_0_mask 0xFFFFFFFF +#define SDMA_PKT_CONSTANT_FILL_DST_ADDR_LO_dst_addr_31_0_shift 0 +#define SDMA_PKT_CONSTANT_FILL_DST_ADDR_LO_DST_ADDR_31_0(x) (((x) & SDMA_PKT_CONSTANT_FILL_DST_ADDR_LO_dst_addr_31_0_mask) << SDMA_PKT_CONSTANT_FILL_DST_ADDR_LO_dst_addr_31_0_shift) + +/*define for DST_ADDR_HI word*/ +/*define for dst_addr_63_32 field*/ +#define SDMA_PKT_CONSTANT_FILL_DST_ADDR_HI_dst_addr_63_32_offset 2 +#define SDMA_PKT_CONSTANT_FILL_DST_ADDR_HI_dst_addr_63_32_mask 0xFFFFFFFF +#define SDMA_PKT_CONSTANT_FILL_DST_ADDR_HI_dst_addr_63_32_shift 0 +#define SDMA_PKT_CONSTANT_FILL_DST_ADDR_HI_DST_ADDR_63_32(x) (((x) & SDMA_PKT_CONSTANT_FILL_DST_ADDR_HI_dst_addr_63_32_mask) << SDMA_PKT_CONSTANT_FILL_DST_ADDR_HI_dst_addr_63_32_shift) + +/*define for DATA word*/ +/*define for src_data_31_0 field*/ +#define SDMA_PKT_CONSTANT_FILL_DATA_src_data_31_0_offset 3 +#define SDMA_PKT_CONSTANT_FILL_DATA_src_data_31_0_mask 0xFFFFFFFF +#define SDMA_PKT_CONSTANT_FILL_DATA_src_data_31_0_shift 0 +#define SDMA_PKT_CONSTANT_FILL_DATA_SRC_DATA_31_0(x) (((x) & SDMA_PKT_CONSTANT_FILL_DATA_src_data_31_0_mask) << SDMA_PKT_CONSTANT_FILL_DATA_src_data_31_0_shift) + +/*define for COUNT word*/ +/*define for count field*/ +#define SDMA_PKT_CONSTANT_FILL_COUNT_count_offset 4 +#define SDMA_PKT_CONSTANT_FILL_COUNT_count_mask 0x003FFFFF +#define SDMA_PKT_CONSTANT_FILL_COUNT_count_shift 0 +#define SDMA_PKT_CONSTANT_FILL_COUNT_COUNT(x) (((x) & SDMA_PKT_CONSTANT_FILL_COUNT_count_mask) << SDMA_PKT_CONSTANT_FILL_COUNT_count_shift) + + +/* +** Definitions for SDMA_PKT_DATA_FILL_MULTI packet +*/ + +/*define for HEADER word*/ +/*define for op field*/ +#define SDMA_PKT_DATA_FILL_MULTI_HEADER_op_offset 0 +#define SDMA_PKT_DATA_FILL_MULTI_HEADER_op_mask 0x000000FF +#define SDMA_PKT_DATA_FILL_MULTI_HEADER_op_shift 0 +#define SDMA_PKT_DATA_FILL_MULTI_HEADER_OP(x) (((x) & SDMA_PKT_DATA_FILL_MULTI_HEADER_op_mask) << SDMA_PKT_DATA_FILL_MULTI_HEADER_op_shift) + +/*define for sub_op field*/ +#define SDMA_PKT_DATA_FILL_MULTI_HEADER_sub_op_offset 0 +#define SDMA_PKT_DATA_FILL_MULTI_HEADER_sub_op_mask 0x000000FF +#define SDMA_PKT_DATA_FILL_MULTI_HEADER_sub_op_shift 8 +#define SDMA_PKT_DATA_FILL_MULTI_HEADER_SUB_OP(x) (((x) & SDMA_PKT_DATA_FILL_MULTI_HEADER_sub_op_mask) << SDMA_PKT_DATA_FILL_MULTI_HEADER_sub_op_shift) + +/*define for memlog_clr field*/ +#define SDMA_PKT_DATA_FILL_MULTI_HEADER_memlog_clr_offset 0 +#define SDMA_PKT_DATA_FILL_MULTI_HEADER_memlog_clr_mask 0x00000001 +#define SDMA_PKT_DATA_FILL_MULTI_HEADER_memlog_clr_shift 31 +#define SDMA_PKT_DATA_FILL_MULTI_HEADER_MEMLOG_CLR(x) (((x) & SDMA_PKT_DATA_FILL_MULTI_HEADER_memlog_clr_mask) << SDMA_PKT_DATA_FILL_MULTI_HEADER_memlog_clr_shift) + +/*define for BYTE_STRIDE word*/ +/*define for byte_stride field*/ +#define SDMA_PKT_DATA_FILL_MULTI_BYTE_STRIDE_byte_stride_offset 1 +#define SDMA_PKT_DATA_FILL_MULTI_BYTE_STRIDE_byte_stride_mask 0xFFFFFFFF +#define SDMA_PKT_DATA_FILL_MULTI_BYTE_STRIDE_byte_stride_shift 0 +#define SDMA_PKT_DATA_FILL_MULTI_BYTE_STRIDE_BYTE_STRIDE(x) (((x) & SDMA_PKT_DATA_FILL_MULTI_BYTE_STRIDE_byte_stride_mask) << SDMA_PKT_DATA_FILL_MULTI_BYTE_STRIDE_byte_stride_shift) + +/*define for DMA_COUNT word*/ +/*define for dma_count field*/ +#define SDMA_PKT_DATA_FILL_MULTI_DMA_COUNT_dma_count_offset 2 +#define SDMA_PKT_DATA_FILL_MULTI_DMA_COUNT_dma_count_mask 0xFFFFFFFF +#define SDMA_PKT_DATA_FILL_MULTI_DMA_COUNT_dma_count_shift 0 +#define SDMA_PKT_DATA_FILL_MULTI_DMA_COUNT_DMA_COUNT(x) (((x) & SDMA_PKT_DATA_FILL_MULTI_DMA_COUNT_dma_count_mask) << SDMA_PKT_DATA_FILL_MULTI_DMA_COUNT_dma_count_shift) + +/*define for DST_ADDR_LO word*/ +/*define for dst_addr_31_0 field*/ +#define SDMA_PKT_DATA_FILL_MULTI_DST_ADDR_LO_dst_addr_31_0_offset 3 +#define SDMA_PKT_DATA_FILL_MULTI_DST_ADDR_LO_dst_addr_31_0_mask 0xFFFFFFFF +#define SDMA_PKT_DATA_FILL_MULTI_DST_ADDR_LO_dst_addr_31_0_shift 0 +#define SDMA_PKT_DATA_FILL_MULTI_DST_ADDR_LO_DST_ADDR_31_0(x) (((x) & SDMA_PKT_DATA_FILL_MULTI_DST_ADDR_LO_dst_addr_31_0_mask) << SDMA_PKT_DATA_FILL_MULTI_DST_ADDR_LO_dst_addr_31_0_shift) + +/*define for DST_ADDR_HI word*/ +/*define for dst_addr_63_32 field*/ +#define SDMA_PKT_DATA_FILL_MULTI_DST_ADDR_HI_dst_addr_63_32_offset 4 +#define SDMA_PKT_DATA_FILL_MULTI_DST_ADDR_HI_dst_addr_63_32_mask 0xFFFFFFFF +#define SDMA_PKT_DATA_FILL_MULTI_DST_ADDR_HI_dst_addr_63_32_shift 0 +#define SDMA_PKT_DATA_FILL_MULTI_DST_ADDR_HI_DST_ADDR_63_32(x) (((x) & SDMA_PKT_DATA_FILL_MULTI_DST_ADDR_HI_dst_addr_63_32_mask) << SDMA_PKT_DATA_FILL_MULTI_DST_ADDR_HI_dst_addr_63_32_shift) + +/*define for BYTE_COUNT word*/ +/*define for count field*/ +#define SDMA_PKT_DATA_FILL_MULTI_BYTE_COUNT_count_offset 5 +#define SDMA_PKT_DATA_FILL_MULTI_BYTE_COUNT_count_mask 0x03FFFFFF +#define SDMA_PKT_DATA_FILL_MULTI_BYTE_COUNT_count_shift 0 +#define SDMA_PKT_DATA_FILL_MULTI_BYTE_COUNT_COUNT(x) (((x) & SDMA_PKT_DATA_FILL_MULTI_BYTE_COUNT_count_mask) << SDMA_PKT_DATA_FILL_MULTI_BYTE_COUNT_count_shift) + + +/* +** Definitions for SDMA_PKT_POLL_REGMEM packet +*/ + +/*define for HEADER word*/ +/*define for op field*/ +#define SDMA_PKT_POLL_REGMEM_HEADER_op_offset 0 +#define SDMA_PKT_POLL_REGMEM_HEADER_op_mask 0x000000FF +#define SDMA_PKT_POLL_REGMEM_HEADER_op_shift 0 +#define SDMA_PKT_POLL_REGMEM_HEADER_OP(x) (((x) & SDMA_PKT_POLL_REGMEM_HEADER_op_mask) << SDMA_PKT_POLL_REGMEM_HEADER_op_shift) + +/*define for sub_op field*/ +#define SDMA_PKT_POLL_REGMEM_HEADER_sub_op_offset 0 +#define SDMA_PKT_POLL_REGMEM_HEADER_sub_op_mask 0x000000FF +#define SDMA_PKT_POLL_REGMEM_HEADER_sub_op_shift 8 +#define SDMA_PKT_POLL_REGMEM_HEADER_SUB_OP(x) (((x) & SDMA_PKT_POLL_REGMEM_HEADER_sub_op_mask) << SDMA_PKT_POLL_REGMEM_HEADER_sub_op_shift) + +/*define for hdp_flush field*/ +#define SDMA_PKT_POLL_REGMEM_HEADER_hdp_flush_offset 0 +#define SDMA_PKT_POLL_REGMEM_HEADER_hdp_flush_mask 0x00000001 +#define SDMA_PKT_POLL_REGMEM_HEADER_hdp_flush_shift 26 +#define SDMA_PKT_POLL_REGMEM_HEADER_HDP_FLUSH(x) (((x) & SDMA_PKT_POLL_REGMEM_HEADER_hdp_flush_mask) << SDMA_PKT_POLL_REGMEM_HEADER_hdp_flush_shift) + +/*define for func field*/ +#define SDMA_PKT_POLL_REGMEM_HEADER_func_offset 0 +#define SDMA_PKT_POLL_REGMEM_HEADER_func_mask 0x00000007 +#define SDMA_PKT_POLL_REGMEM_HEADER_func_shift 28 +#define SDMA_PKT_POLL_REGMEM_HEADER_FUNC(x) (((x) & SDMA_PKT_POLL_REGMEM_HEADER_func_mask) << SDMA_PKT_POLL_REGMEM_HEADER_func_shift) + +/*define for mem_poll field*/ +#define SDMA_PKT_POLL_REGMEM_HEADER_mem_poll_offset 0 +#define SDMA_PKT_POLL_REGMEM_HEADER_mem_poll_mask 0x00000001 +#define SDMA_PKT_POLL_REGMEM_HEADER_mem_poll_shift 31 +#define SDMA_PKT_POLL_REGMEM_HEADER_MEM_POLL(x) (((x) & SDMA_PKT_POLL_REGMEM_HEADER_mem_poll_mask) << SDMA_PKT_POLL_REGMEM_HEADER_mem_poll_shift) + +/*define for ADDR_LO word*/ +/*define for addr_31_0 field*/ +#define SDMA_PKT_POLL_REGMEM_ADDR_LO_addr_31_0_offset 1 +#define SDMA_PKT_POLL_REGMEM_ADDR_LO_addr_31_0_mask 0xFFFFFFFF +#define SDMA_PKT_POLL_REGMEM_ADDR_LO_addr_31_0_shift 0 +#define SDMA_PKT_POLL_REGMEM_ADDR_LO_ADDR_31_0(x) (((x) & SDMA_PKT_POLL_REGMEM_ADDR_LO_addr_31_0_mask) << SDMA_PKT_POLL_REGMEM_ADDR_LO_addr_31_0_shift) + +/*define for ADDR_HI word*/ +/*define for addr_63_32 field*/ +#define SDMA_PKT_POLL_REGMEM_ADDR_HI_addr_63_32_offset 2 +#define SDMA_PKT_POLL_REGMEM_ADDR_HI_addr_63_32_mask 0xFFFFFFFF +#define SDMA_PKT_POLL_REGMEM_ADDR_HI_addr_63_32_shift 0 +#define SDMA_PKT_POLL_REGMEM_ADDR_HI_ADDR_63_32(x) (((x) & SDMA_PKT_POLL_REGMEM_ADDR_HI_addr_63_32_mask) << SDMA_PKT_POLL_REGMEM_ADDR_HI_addr_63_32_shift) + +/*define for VALUE word*/ +/*define for value field*/ +#define SDMA_PKT_POLL_REGMEM_VALUE_value_offset 3 +#define SDMA_PKT_POLL_REGMEM_VALUE_value_mask 0xFFFFFFFF +#define SDMA_PKT_POLL_REGMEM_VALUE_value_shift 0 +#define SDMA_PKT_POLL_REGMEM_VALUE_VALUE(x) (((x) & SDMA_PKT_POLL_REGMEM_VALUE_value_mask) << SDMA_PKT_POLL_REGMEM_VALUE_value_shift) + +/*define for MASK word*/ +/*define for mask field*/ +#define SDMA_PKT_POLL_REGMEM_MASK_mask_offset 4 +#define SDMA_PKT_POLL_REGMEM_MASK_mask_mask 0xFFFFFFFF +#define SDMA_PKT_POLL_REGMEM_MASK_mask_shift 0 +#define SDMA_PKT_POLL_REGMEM_MASK_MASK(x) (((x) & SDMA_PKT_POLL_REGMEM_MASK_mask_mask) << SDMA_PKT_POLL_REGMEM_MASK_mask_shift) + +/*define for DW5 word*/ +/*define for interval field*/ +#define SDMA_PKT_POLL_REGMEM_DW5_interval_offset 5 +#define SDMA_PKT_POLL_REGMEM_DW5_interval_mask 0x0000FFFF +#define SDMA_PKT_POLL_REGMEM_DW5_interval_shift 0 +#define SDMA_PKT_POLL_REGMEM_DW5_INTERVAL(x) (((x) & SDMA_PKT_POLL_REGMEM_DW5_interval_mask) << SDMA_PKT_POLL_REGMEM_DW5_interval_shift) + +/*define for retry_count field*/ +#define SDMA_PKT_POLL_REGMEM_DW5_retry_count_offset 5 +#define SDMA_PKT_POLL_REGMEM_DW5_retry_count_mask 0x00000FFF +#define SDMA_PKT_POLL_REGMEM_DW5_retry_count_shift 16 +#define SDMA_PKT_POLL_REGMEM_DW5_RETRY_COUNT(x) (((x) & SDMA_PKT_POLL_REGMEM_DW5_retry_count_mask) << SDMA_PKT_POLL_REGMEM_DW5_retry_count_shift) + + +/* +** Definitions for SDMA_PKT_POLL_REG_WRITE_MEM packet +*/ + +/*define for HEADER word*/ +/*define for op field*/ +#define SDMA_PKT_POLL_REG_WRITE_MEM_HEADER_op_offset 0 +#define SDMA_PKT_POLL_REG_WRITE_MEM_HEADER_op_mask 0x000000FF +#define SDMA_PKT_POLL_REG_WRITE_MEM_HEADER_op_shift 0 +#define SDMA_PKT_POLL_REG_WRITE_MEM_HEADER_OP(x) (((x) & SDMA_PKT_POLL_REG_WRITE_MEM_HEADER_op_mask) << SDMA_PKT_POLL_REG_WRITE_MEM_HEADER_op_shift) + +/*define for sub_op field*/ +#define SDMA_PKT_POLL_REG_WRITE_MEM_HEADER_sub_op_offset 0 +#define SDMA_PKT_POLL_REG_WRITE_MEM_HEADER_sub_op_mask 0x000000FF +#define SDMA_PKT_POLL_REG_WRITE_MEM_HEADER_sub_op_shift 8 +#define SDMA_PKT_POLL_REG_WRITE_MEM_HEADER_SUB_OP(x) (((x) & SDMA_PKT_POLL_REG_WRITE_MEM_HEADER_sub_op_mask) << SDMA_PKT_POLL_REG_WRITE_MEM_HEADER_sub_op_shift) + +/*define for SRC_ADDR word*/ +/*define for addr_31_2 field*/ +#define SDMA_PKT_POLL_REG_WRITE_MEM_SRC_ADDR_addr_31_2_offset 1 +#define SDMA_PKT_POLL_REG_WRITE_MEM_SRC_ADDR_addr_31_2_mask 0x3FFFFFFF +#define SDMA_PKT_POLL_REG_WRITE_MEM_SRC_ADDR_addr_31_2_shift 2 +#define SDMA_PKT_POLL_REG_WRITE_MEM_SRC_ADDR_ADDR_31_2(x) (((x) & SDMA_PKT_POLL_REG_WRITE_MEM_SRC_ADDR_addr_31_2_mask) << SDMA_PKT_POLL_REG_WRITE_MEM_SRC_ADDR_addr_31_2_shift) + +/*define for DST_ADDR_LO word*/ +/*define for addr_31_0 field*/ +#define SDMA_PKT_POLL_REG_WRITE_MEM_DST_ADDR_LO_addr_31_0_offset 2 +#define SDMA_PKT_POLL_REG_WRITE_MEM_DST_ADDR_LO_addr_31_0_mask 0xFFFFFFFF +#define SDMA_PKT_POLL_REG_WRITE_MEM_DST_ADDR_LO_addr_31_0_shift 0 +#define SDMA_PKT_POLL_REG_WRITE_MEM_DST_ADDR_LO_ADDR_31_0(x) (((x) & SDMA_PKT_POLL_REG_WRITE_MEM_DST_ADDR_LO_addr_31_0_mask) << SDMA_PKT_POLL_REG_WRITE_MEM_DST_ADDR_LO_addr_31_0_shift) + +/*define for DST_ADDR_HI word*/ +/*define for addr_63_32 field*/ +#define SDMA_PKT_POLL_REG_WRITE_MEM_DST_ADDR_HI_addr_63_32_offset 3 +#define SDMA_PKT_POLL_REG_WRITE_MEM_DST_ADDR_HI_addr_63_32_mask 0xFFFFFFFF +#define SDMA_PKT_POLL_REG_WRITE_MEM_DST_ADDR_HI_addr_63_32_shift 0 +#define SDMA_PKT_POLL_REG_WRITE_MEM_DST_ADDR_HI_ADDR_63_32(x) (((x) & SDMA_PKT_POLL_REG_WRITE_MEM_DST_ADDR_HI_addr_63_32_mask) << SDMA_PKT_POLL_REG_WRITE_MEM_DST_ADDR_HI_addr_63_32_shift) + + +/* +** Definitions for SDMA_PKT_POLL_DBIT_WRITE_MEM packet +*/ + +/*define for HEADER word*/ +/*define for op field*/ +#define SDMA_PKT_POLL_DBIT_WRITE_MEM_HEADER_op_offset 0 +#define SDMA_PKT_POLL_DBIT_WRITE_MEM_HEADER_op_mask 0x000000FF +#define SDMA_PKT_POLL_DBIT_WRITE_MEM_HEADER_op_shift 0 +#define SDMA_PKT_POLL_DBIT_WRITE_MEM_HEADER_OP(x) (((x) & SDMA_PKT_POLL_DBIT_WRITE_MEM_HEADER_op_mask) << SDMA_PKT_POLL_DBIT_WRITE_MEM_HEADER_op_shift) + +/*define for sub_op field*/ +#define SDMA_PKT_POLL_DBIT_WRITE_MEM_HEADER_sub_op_offset 0 +#define SDMA_PKT_POLL_DBIT_WRITE_MEM_HEADER_sub_op_mask 0x000000FF +#define SDMA_PKT_POLL_DBIT_WRITE_MEM_HEADER_sub_op_shift 8 +#define SDMA_PKT_POLL_DBIT_WRITE_MEM_HEADER_SUB_OP(x) (((x) & SDMA_PKT_POLL_DBIT_WRITE_MEM_HEADER_sub_op_mask) << SDMA_PKT_POLL_DBIT_WRITE_MEM_HEADER_sub_op_shift) + +/*define for ea field*/ +#define SDMA_PKT_POLL_DBIT_WRITE_MEM_HEADER_ea_offset 0 +#define SDMA_PKT_POLL_DBIT_WRITE_MEM_HEADER_ea_mask 0x00000003 +#define SDMA_PKT_POLL_DBIT_WRITE_MEM_HEADER_ea_shift 16 +#define SDMA_PKT_POLL_DBIT_WRITE_MEM_HEADER_EA(x) (((x) & SDMA_PKT_POLL_DBIT_WRITE_MEM_HEADER_ea_mask) << SDMA_PKT_POLL_DBIT_WRITE_MEM_HEADER_ea_shift) + +/*define for DST_ADDR_LO word*/ +/*define for addr_31_0 field*/ +#define SDMA_PKT_POLL_DBIT_WRITE_MEM_DST_ADDR_LO_addr_31_0_offset 1 +#define SDMA_PKT_POLL_DBIT_WRITE_MEM_DST_ADDR_LO_addr_31_0_mask 0xFFFFFFFF +#define SDMA_PKT_POLL_DBIT_WRITE_MEM_DST_ADDR_LO_addr_31_0_shift 0 +#define SDMA_PKT_POLL_DBIT_WRITE_MEM_DST_ADDR_LO_ADDR_31_0(x) (((x) & SDMA_PKT_POLL_DBIT_WRITE_MEM_DST_ADDR_LO_addr_31_0_mask) << SDMA_PKT_POLL_DBIT_WRITE_MEM_DST_ADDR_LO_addr_31_0_shift) + +/*define for DST_ADDR_HI word*/ +/*define for addr_63_32 field*/ +#define SDMA_PKT_POLL_DBIT_WRITE_MEM_DST_ADDR_HI_addr_63_32_offset 2 +#define SDMA_PKT_POLL_DBIT_WRITE_MEM_DST_ADDR_HI_addr_63_32_mask 0xFFFFFFFF +#define SDMA_PKT_POLL_DBIT_WRITE_MEM_DST_ADDR_HI_addr_63_32_shift 0 +#define SDMA_PKT_POLL_DBIT_WRITE_MEM_DST_ADDR_HI_ADDR_63_32(x) (((x) & SDMA_PKT_POLL_DBIT_WRITE_MEM_DST_ADDR_HI_addr_63_32_mask) << SDMA_PKT_POLL_DBIT_WRITE_MEM_DST_ADDR_HI_addr_63_32_shift) + +/*define for START_PAGE word*/ +/*define for addr_31_4 field*/ +#define SDMA_PKT_POLL_DBIT_WRITE_MEM_START_PAGE_addr_31_4_offset 3 +#define SDMA_PKT_POLL_DBIT_WRITE_MEM_START_PAGE_addr_31_4_mask 0x0FFFFFFF +#define SDMA_PKT_POLL_DBIT_WRITE_MEM_START_PAGE_addr_31_4_shift 4 +#define SDMA_PKT_POLL_DBIT_WRITE_MEM_START_PAGE_ADDR_31_4(x) (((x) & SDMA_PKT_POLL_DBIT_WRITE_MEM_START_PAGE_addr_31_4_mask) << SDMA_PKT_POLL_DBIT_WRITE_MEM_START_PAGE_addr_31_4_shift) + +/*define for PAGE_NUM word*/ +/*define for page_num_31_0 field*/ +#define SDMA_PKT_POLL_DBIT_WRITE_MEM_PAGE_NUM_page_num_31_0_offset 4 +#define SDMA_PKT_POLL_DBIT_WRITE_MEM_PAGE_NUM_page_num_31_0_mask 0xFFFFFFFF +#define SDMA_PKT_POLL_DBIT_WRITE_MEM_PAGE_NUM_page_num_31_0_shift 0 +#define SDMA_PKT_POLL_DBIT_WRITE_MEM_PAGE_NUM_PAGE_NUM_31_0(x) (((x) & SDMA_PKT_POLL_DBIT_WRITE_MEM_PAGE_NUM_page_num_31_0_mask) << SDMA_PKT_POLL_DBIT_WRITE_MEM_PAGE_NUM_page_num_31_0_shift) + + +/* +** Definitions for SDMA_PKT_POLL_MEM_VERIFY packet +*/ + +/*define for HEADER word*/ +/*define for op field*/ +#define SDMA_PKT_POLL_MEM_VERIFY_HEADER_op_offset 0 +#define SDMA_PKT_POLL_MEM_VERIFY_HEADER_op_mask 0x000000FF +#define SDMA_PKT_POLL_MEM_VERIFY_HEADER_op_shift 0 +#define SDMA_PKT_POLL_MEM_VERIFY_HEADER_OP(x) (((x) & SDMA_PKT_POLL_MEM_VERIFY_HEADER_op_mask) << SDMA_PKT_POLL_MEM_VERIFY_HEADER_op_shift) + +/*define for sub_op field*/ +#define SDMA_PKT_POLL_MEM_VERIFY_HEADER_sub_op_offset 0 +#define SDMA_PKT_POLL_MEM_VERIFY_HEADER_sub_op_mask 0x000000FF +#define SDMA_PKT_POLL_MEM_VERIFY_HEADER_sub_op_shift 8 +#define SDMA_PKT_POLL_MEM_VERIFY_HEADER_SUB_OP(x) (((x) & SDMA_PKT_POLL_MEM_VERIFY_HEADER_sub_op_mask) << SDMA_PKT_POLL_MEM_VERIFY_HEADER_sub_op_shift) + +/*define for mode field*/ +#define SDMA_PKT_POLL_MEM_VERIFY_HEADER_mode_offset 0 +#define SDMA_PKT_POLL_MEM_VERIFY_HEADER_mode_mask 0x00000001 +#define SDMA_PKT_POLL_MEM_VERIFY_HEADER_mode_shift 31 +#define SDMA_PKT_POLL_MEM_VERIFY_HEADER_MODE(x) (((x) & SDMA_PKT_POLL_MEM_VERIFY_HEADER_mode_mask) << SDMA_PKT_POLL_MEM_VERIFY_HEADER_mode_shift) + +/*define for PATTERN word*/ +/*define for pattern field*/ +#define SDMA_PKT_POLL_MEM_VERIFY_PATTERN_pattern_offset 1 +#define SDMA_PKT_POLL_MEM_VERIFY_PATTERN_pattern_mask 0xFFFFFFFF +#define SDMA_PKT_POLL_MEM_VERIFY_PATTERN_pattern_shift 0 +#define SDMA_PKT_POLL_MEM_VERIFY_PATTERN_PATTERN(x) (((x) & SDMA_PKT_POLL_MEM_VERIFY_PATTERN_pattern_mask) << SDMA_PKT_POLL_MEM_VERIFY_PATTERN_pattern_shift) + +/*define for CMP0_ADDR_START_LO word*/ +/*define for cmp0_start_31_0 field*/ +#define SDMA_PKT_POLL_MEM_VERIFY_CMP0_ADDR_START_LO_cmp0_start_31_0_offset 2 +#define SDMA_PKT_POLL_MEM_VERIFY_CMP0_ADDR_START_LO_cmp0_start_31_0_mask 0xFFFFFFFF +#define SDMA_PKT_POLL_MEM_VERIFY_CMP0_ADDR_START_LO_cmp0_start_31_0_shift 0 +#define SDMA_PKT_POLL_MEM_VERIFY_CMP0_ADDR_START_LO_CMP0_START_31_0(x) (((x) & SDMA_PKT_POLL_MEM_VERIFY_CMP0_ADDR_START_LO_cmp0_start_31_0_mask) << SDMA_PKT_POLL_MEM_VERIFY_CMP0_ADDR_START_LO_cmp0_start_31_0_shift) + +/*define for CMP0_ADDR_START_HI word*/ +/*define for cmp0_start_63_32 field*/ +#define SDMA_PKT_POLL_MEM_VERIFY_CMP0_ADDR_START_HI_cmp0_start_63_32_offset 3 +#define SDMA_PKT_POLL_MEM_VERIFY_CMP0_ADDR_START_HI_cmp0_start_63_32_mask 0xFFFFFFFF +#define SDMA_PKT_POLL_MEM_VERIFY_CMP0_ADDR_START_HI_cmp0_start_63_32_shift 0 +#define SDMA_PKT_POLL_MEM_VERIFY_CMP0_ADDR_START_HI_CMP0_START_63_32(x) (((x) & SDMA_PKT_POLL_MEM_VERIFY_CMP0_ADDR_START_HI_cmp0_start_63_32_mask) << SDMA_PKT_POLL_MEM_VERIFY_CMP0_ADDR_START_HI_cmp0_start_63_32_shift) + +/*define for CMP0_ADDR_END_LO word*/ +/*define for cmp1_end_31_0 field*/ +#define SDMA_PKT_POLL_MEM_VERIFY_CMP0_ADDR_END_LO_cmp1_end_31_0_offset 4 +#define SDMA_PKT_POLL_MEM_VERIFY_CMP0_ADDR_END_LO_cmp1_end_31_0_mask 0xFFFFFFFF +#define SDMA_PKT_POLL_MEM_VERIFY_CMP0_ADDR_END_LO_cmp1_end_31_0_shift 0 +#define SDMA_PKT_POLL_MEM_VERIFY_CMP0_ADDR_END_LO_CMP1_END_31_0(x) (((x) & SDMA_PKT_POLL_MEM_VERIFY_CMP0_ADDR_END_LO_cmp1_end_31_0_mask) << SDMA_PKT_POLL_MEM_VERIFY_CMP0_ADDR_END_LO_cmp1_end_31_0_shift) + +/*define for CMP0_ADDR_END_HI word*/ +/*define for cmp1_end_63_32 field*/ +#define SDMA_PKT_POLL_MEM_VERIFY_CMP0_ADDR_END_HI_cmp1_end_63_32_offset 5 +#define SDMA_PKT_POLL_MEM_VERIFY_CMP0_ADDR_END_HI_cmp1_end_63_32_mask 0xFFFFFFFF +#define SDMA_PKT_POLL_MEM_VERIFY_CMP0_ADDR_END_HI_cmp1_end_63_32_shift 0 +#define SDMA_PKT_POLL_MEM_VERIFY_CMP0_ADDR_END_HI_CMP1_END_63_32(x) (((x) & SDMA_PKT_POLL_MEM_VERIFY_CMP0_ADDR_END_HI_cmp1_end_63_32_mask) << SDMA_PKT_POLL_MEM_VERIFY_CMP0_ADDR_END_HI_cmp1_end_63_32_shift) + +/*define for CMP1_ADDR_START_LO word*/ +/*define for cmp1_start_31_0 field*/ +#define SDMA_PKT_POLL_MEM_VERIFY_CMP1_ADDR_START_LO_cmp1_start_31_0_offset 6 +#define SDMA_PKT_POLL_MEM_VERIFY_CMP1_ADDR_START_LO_cmp1_start_31_0_mask 0xFFFFFFFF +#define SDMA_PKT_POLL_MEM_VERIFY_CMP1_ADDR_START_LO_cmp1_start_31_0_shift 0 +#define SDMA_PKT_POLL_MEM_VERIFY_CMP1_ADDR_START_LO_CMP1_START_31_0(x) (((x) & SDMA_PKT_POLL_MEM_VERIFY_CMP1_ADDR_START_LO_cmp1_start_31_0_mask) << SDMA_PKT_POLL_MEM_VERIFY_CMP1_ADDR_START_LO_cmp1_start_31_0_shift) + +/*define for CMP1_ADDR_START_HI word*/ +/*define for cmp1_start_63_32 field*/ +#define SDMA_PKT_POLL_MEM_VERIFY_CMP1_ADDR_START_HI_cmp1_start_63_32_offset 7 +#define SDMA_PKT_POLL_MEM_VERIFY_CMP1_ADDR_START_HI_cmp1_start_63_32_mask 0xFFFFFFFF +#define SDMA_PKT_POLL_MEM_VERIFY_CMP1_ADDR_START_HI_cmp1_start_63_32_shift 0 +#define SDMA_PKT_POLL_MEM_VERIFY_CMP1_ADDR_START_HI_CMP1_START_63_32(x) (((x) & SDMA_PKT_POLL_MEM_VERIFY_CMP1_ADDR_START_HI_cmp1_start_63_32_mask) << SDMA_PKT_POLL_MEM_VERIFY_CMP1_ADDR_START_HI_cmp1_start_63_32_shift) + +/*define for CMP1_ADDR_END_LO word*/ +/*define for cmp1_end_31_0 field*/ +#define SDMA_PKT_POLL_MEM_VERIFY_CMP1_ADDR_END_LO_cmp1_end_31_0_offset 8 +#define SDMA_PKT_POLL_MEM_VERIFY_CMP1_ADDR_END_LO_cmp1_end_31_0_mask 0xFFFFFFFF +#define SDMA_PKT_POLL_MEM_VERIFY_CMP1_ADDR_END_LO_cmp1_end_31_0_shift 0 +#define SDMA_PKT_POLL_MEM_VERIFY_CMP1_ADDR_END_LO_CMP1_END_31_0(x) (((x) & SDMA_PKT_POLL_MEM_VERIFY_CMP1_ADDR_END_LO_cmp1_end_31_0_mask) << SDMA_PKT_POLL_MEM_VERIFY_CMP1_ADDR_END_LO_cmp1_end_31_0_shift) + +/*define for CMP1_ADDR_END_HI word*/ +/*define for cmp1_end_63_32 field*/ +#define SDMA_PKT_POLL_MEM_VERIFY_CMP1_ADDR_END_HI_cmp1_end_63_32_offset 9 +#define SDMA_PKT_POLL_MEM_VERIFY_CMP1_ADDR_END_HI_cmp1_end_63_32_mask 0xFFFFFFFF +#define SDMA_PKT_POLL_MEM_VERIFY_CMP1_ADDR_END_HI_cmp1_end_63_32_shift 0 +#define SDMA_PKT_POLL_MEM_VERIFY_CMP1_ADDR_END_HI_CMP1_END_63_32(x) (((x) & SDMA_PKT_POLL_MEM_VERIFY_CMP1_ADDR_END_HI_cmp1_end_63_32_mask) << SDMA_PKT_POLL_MEM_VERIFY_CMP1_ADDR_END_HI_cmp1_end_63_32_shift) + +/*define for REC_ADDR_LO word*/ +/*define for rec_31_0 field*/ +#define SDMA_PKT_POLL_MEM_VERIFY_REC_ADDR_LO_rec_31_0_offset 10 +#define SDMA_PKT_POLL_MEM_VERIFY_REC_ADDR_LO_rec_31_0_mask 0xFFFFFFFF +#define SDMA_PKT_POLL_MEM_VERIFY_REC_ADDR_LO_rec_31_0_shift 0 +#define SDMA_PKT_POLL_MEM_VERIFY_REC_ADDR_LO_REC_31_0(x) (((x) & SDMA_PKT_POLL_MEM_VERIFY_REC_ADDR_LO_rec_31_0_mask) << SDMA_PKT_POLL_MEM_VERIFY_REC_ADDR_LO_rec_31_0_shift) + +/*define for REC_ADDR_HI word*/ +/*define for rec_63_32 field*/ +#define SDMA_PKT_POLL_MEM_VERIFY_REC_ADDR_HI_rec_63_32_offset 11 +#define SDMA_PKT_POLL_MEM_VERIFY_REC_ADDR_HI_rec_63_32_mask 0xFFFFFFFF +#define SDMA_PKT_POLL_MEM_VERIFY_REC_ADDR_HI_rec_63_32_shift 0 +#define SDMA_PKT_POLL_MEM_VERIFY_REC_ADDR_HI_REC_63_32(x) (((x) & SDMA_PKT_POLL_MEM_VERIFY_REC_ADDR_HI_rec_63_32_mask) << SDMA_PKT_POLL_MEM_VERIFY_REC_ADDR_HI_rec_63_32_shift) + +/*define for RESERVED word*/ +/*define for reserved field*/ +#define SDMA_PKT_POLL_MEM_VERIFY_RESERVED_reserved_offset 12 +#define SDMA_PKT_POLL_MEM_VERIFY_RESERVED_reserved_mask 0xFFFFFFFF +#define SDMA_PKT_POLL_MEM_VERIFY_RESERVED_reserved_shift 0 +#define SDMA_PKT_POLL_MEM_VERIFY_RESERVED_RESERVED(x) (((x) & SDMA_PKT_POLL_MEM_VERIFY_RESERVED_reserved_mask) << SDMA_PKT_POLL_MEM_VERIFY_RESERVED_reserved_shift) + + +/* +** Definitions for SDMA_PKT_ATOMIC packet +*/ + +/*define for HEADER word*/ +/*define for op field*/ +#define SDMA_PKT_ATOMIC_HEADER_op_offset 0 +#define SDMA_PKT_ATOMIC_HEADER_op_mask 0x000000FF +#define SDMA_PKT_ATOMIC_HEADER_op_shift 0 +#define SDMA_PKT_ATOMIC_HEADER_OP(x) (((x) & SDMA_PKT_ATOMIC_HEADER_op_mask) << SDMA_PKT_ATOMIC_HEADER_op_shift) + +/*define for loop field*/ +#define SDMA_PKT_ATOMIC_HEADER_loop_offset 0 +#define SDMA_PKT_ATOMIC_HEADER_loop_mask 0x00000001 +#define SDMA_PKT_ATOMIC_HEADER_loop_shift 16 +#define SDMA_PKT_ATOMIC_HEADER_LOOP(x) (((x) & SDMA_PKT_ATOMIC_HEADER_loop_mask) << SDMA_PKT_ATOMIC_HEADER_loop_shift) + +/*define for tmz field*/ +#define SDMA_PKT_ATOMIC_HEADER_tmz_offset 0 +#define SDMA_PKT_ATOMIC_HEADER_tmz_mask 0x00000001 +#define SDMA_PKT_ATOMIC_HEADER_tmz_shift 18 +#define SDMA_PKT_ATOMIC_HEADER_TMZ(x) (((x) & SDMA_PKT_ATOMIC_HEADER_tmz_mask) << SDMA_PKT_ATOMIC_HEADER_tmz_shift) + +/*define for atomic_op field*/ +#define SDMA_PKT_ATOMIC_HEADER_atomic_op_offset 0 +#define SDMA_PKT_ATOMIC_HEADER_atomic_op_mask 0x0000007F +#define SDMA_PKT_ATOMIC_HEADER_atomic_op_shift 25 +#define SDMA_PKT_ATOMIC_HEADER_ATOMIC_OP(x) (((x) & SDMA_PKT_ATOMIC_HEADER_atomic_op_mask) << SDMA_PKT_ATOMIC_HEADER_atomic_op_shift) + +/*define for ADDR_LO word*/ +/*define for addr_31_0 field*/ +#define SDMA_PKT_ATOMIC_ADDR_LO_addr_31_0_offset 1 +#define SDMA_PKT_ATOMIC_ADDR_LO_addr_31_0_mask 0xFFFFFFFF +#define SDMA_PKT_ATOMIC_ADDR_LO_addr_31_0_shift 0 +#define SDMA_PKT_ATOMIC_ADDR_LO_ADDR_31_0(x) (((x) & SDMA_PKT_ATOMIC_ADDR_LO_addr_31_0_mask) << SDMA_PKT_ATOMIC_ADDR_LO_addr_31_0_shift) + +/*define for ADDR_HI word*/ +/*define for addr_63_32 field*/ +#define SDMA_PKT_ATOMIC_ADDR_HI_addr_63_32_offset 2 +#define SDMA_PKT_ATOMIC_ADDR_HI_addr_63_32_mask 0xFFFFFFFF +#define SDMA_PKT_ATOMIC_ADDR_HI_addr_63_32_shift 0 +#define SDMA_PKT_ATOMIC_ADDR_HI_ADDR_63_32(x) (((x) & SDMA_PKT_ATOMIC_ADDR_HI_addr_63_32_mask) << SDMA_PKT_ATOMIC_ADDR_HI_addr_63_32_shift) + +/*define for SRC_DATA_LO word*/ +/*define for src_data_31_0 field*/ +#define SDMA_PKT_ATOMIC_SRC_DATA_LO_src_data_31_0_offset 3 +#define SDMA_PKT_ATOMIC_SRC_DATA_LO_src_data_31_0_mask 0xFFFFFFFF +#define SDMA_PKT_ATOMIC_SRC_DATA_LO_src_data_31_0_shift 0 +#define SDMA_PKT_ATOMIC_SRC_DATA_LO_SRC_DATA_31_0(x) (((x) & SDMA_PKT_ATOMIC_SRC_DATA_LO_src_data_31_0_mask) << SDMA_PKT_ATOMIC_SRC_DATA_LO_src_data_31_0_shift) + +/*define for SRC_DATA_HI word*/ +/*define for src_data_63_32 field*/ +#define SDMA_PKT_ATOMIC_SRC_DATA_HI_src_data_63_32_offset 4 +#define SDMA_PKT_ATOMIC_SRC_DATA_HI_src_data_63_32_mask 0xFFFFFFFF +#define SDMA_PKT_ATOMIC_SRC_DATA_HI_src_data_63_32_shift 0 +#define SDMA_PKT_ATOMIC_SRC_DATA_HI_SRC_DATA_63_32(x) (((x) & SDMA_PKT_ATOMIC_SRC_DATA_HI_src_data_63_32_mask) << SDMA_PKT_ATOMIC_SRC_DATA_HI_src_data_63_32_shift) + +/*define for CMP_DATA_LO word*/ +/*define for cmp_data_31_0 field*/ +#define SDMA_PKT_ATOMIC_CMP_DATA_LO_cmp_data_31_0_offset 5 +#define SDMA_PKT_ATOMIC_CMP_DATA_LO_cmp_data_31_0_mask 0xFFFFFFFF +#define SDMA_PKT_ATOMIC_CMP_DATA_LO_cmp_data_31_0_shift 0 +#define SDMA_PKT_ATOMIC_CMP_DATA_LO_CMP_DATA_31_0(x) (((x) & SDMA_PKT_ATOMIC_CMP_DATA_LO_cmp_data_31_0_mask) << SDMA_PKT_ATOMIC_CMP_DATA_LO_cmp_data_31_0_shift) + +/*define for CMP_DATA_HI word*/ +/*define for cmp_data_63_32 field*/ +#define SDMA_PKT_ATOMIC_CMP_DATA_HI_cmp_data_63_32_offset 6 +#define SDMA_PKT_ATOMIC_CMP_DATA_HI_cmp_data_63_32_mask 0xFFFFFFFF +#define SDMA_PKT_ATOMIC_CMP_DATA_HI_cmp_data_63_32_shift 0 +#define SDMA_PKT_ATOMIC_CMP_DATA_HI_CMP_DATA_63_32(x) (((x) & SDMA_PKT_ATOMIC_CMP_DATA_HI_cmp_data_63_32_mask) << SDMA_PKT_ATOMIC_CMP_DATA_HI_cmp_data_63_32_shift) + +/*define for LOOP_INTERVAL word*/ +/*define for loop_interval field*/ +#define SDMA_PKT_ATOMIC_LOOP_INTERVAL_loop_interval_offset 7 +#define SDMA_PKT_ATOMIC_LOOP_INTERVAL_loop_interval_mask 0x00001FFF +#define SDMA_PKT_ATOMIC_LOOP_INTERVAL_loop_interval_shift 0 +#define SDMA_PKT_ATOMIC_LOOP_INTERVAL_LOOP_INTERVAL(x) (((x) & SDMA_PKT_ATOMIC_LOOP_INTERVAL_loop_interval_mask) << SDMA_PKT_ATOMIC_LOOP_INTERVAL_loop_interval_shift) + + +/* +** Definitions for SDMA_PKT_TIMESTAMP_SET packet +*/ + +/*define for HEADER word*/ +/*define for op field*/ +#define SDMA_PKT_TIMESTAMP_SET_HEADER_op_offset 0 +#define SDMA_PKT_TIMESTAMP_SET_HEADER_op_mask 0x000000FF +#define SDMA_PKT_TIMESTAMP_SET_HEADER_op_shift 0 +#define SDMA_PKT_TIMESTAMP_SET_HEADER_OP(x) (((x) & SDMA_PKT_TIMESTAMP_SET_HEADER_op_mask) << SDMA_PKT_TIMESTAMP_SET_HEADER_op_shift) + +/*define for sub_op field*/ +#define SDMA_PKT_TIMESTAMP_SET_HEADER_sub_op_offset 0 +#define SDMA_PKT_TIMESTAMP_SET_HEADER_sub_op_mask 0x000000FF +#define SDMA_PKT_TIMESTAMP_SET_HEADER_sub_op_shift 8 +#define SDMA_PKT_TIMESTAMP_SET_HEADER_SUB_OP(x) (((x) & SDMA_PKT_TIMESTAMP_SET_HEADER_sub_op_mask) << SDMA_PKT_TIMESTAMP_SET_HEADER_sub_op_shift) + +/*define for INIT_DATA_LO word*/ +/*define for init_data_31_0 field*/ +#define SDMA_PKT_TIMESTAMP_SET_INIT_DATA_LO_init_data_31_0_offset 1 +#define SDMA_PKT_TIMESTAMP_SET_INIT_DATA_LO_init_data_31_0_mask 0xFFFFFFFF +#define SDMA_PKT_TIMESTAMP_SET_INIT_DATA_LO_init_data_31_0_shift 0 +#define SDMA_PKT_TIMESTAMP_SET_INIT_DATA_LO_INIT_DATA_31_0(x) (((x) & SDMA_PKT_TIMESTAMP_SET_INIT_DATA_LO_init_data_31_0_mask) << SDMA_PKT_TIMESTAMP_SET_INIT_DATA_LO_init_data_31_0_shift) + +/*define for INIT_DATA_HI word*/ +/*define for init_data_63_32 field*/ +#define SDMA_PKT_TIMESTAMP_SET_INIT_DATA_HI_init_data_63_32_offset 2 +#define SDMA_PKT_TIMESTAMP_SET_INIT_DATA_HI_init_data_63_32_mask 0xFFFFFFFF +#define SDMA_PKT_TIMESTAMP_SET_INIT_DATA_HI_init_data_63_32_shift 0 +#define SDMA_PKT_TIMESTAMP_SET_INIT_DATA_HI_INIT_DATA_63_32(x) (((x) & SDMA_PKT_TIMESTAMP_SET_INIT_DATA_HI_init_data_63_32_mask) << SDMA_PKT_TIMESTAMP_SET_INIT_DATA_HI_init_data_63_32_shift) + + +/* +** Definitions for SDMA_PKT_TIMESTAMP_GET packet +*/ + +/*define for HEADER word*/ +/*define for op field*/ +#define SDMA_PKT_TIMESTAMP_GET_HEADER_op_offset 0 +#define SDMA_PKT_TIMESTAMP_GET_HEADER_op_mask 0x000000FF +#define SDMA_PKT_TIMESTAMP_GET_HEADER_op_shift 0 +#define SDMA_PKT_TIMESTAMP_GET_HEADER_OP(x) (((x) & SDMA_PKT_TIMESTAMP_GET_HEADER_op_mask) << SDMA_PKT_TIMESTAMP_GET_HEADER_op_shift) + +/*define for sub_op field*/ +#define SDMA_PKT_TIMESTAMP_GET_HEADER_sub_op_offset 0 +#define SDMA_PKT_TIMESTAMP_GET_HEADER_sub_op_mask 0x000000FF +#define SDMA_PKT_TIMESTAMP_GET_HEADER_sub_op_shift 8 +#define SDMA_PKT_TIMESTAMP_GET_HEADER_SUB_OP(x) (((x) & SDMA_PKT_TIMESTAMP_GET_HEADER_sub_op_mask) << SDMA_PKT_TIMESTAMP_GET_HEADER_sub_op_shift) + +/*define for WRITE_ADDR_LO word*/ +/*define for write_addr_31_3 field*/ +#define SDMA_PKT_TIMESTAMP_GET_WRITE_ADDR_LO_write_addr_31_3_offset 1 +#define SDMA_PKT_TIMESTAMP_GET_WRITE_ADDR_LO_write_addr_31_3_mask 0x1FFFFFFF +#define SDMA_PKT_TIMESTAMP_GET_WRITE_ADDR_LO_write_addr_31_3_shift 3 +#define SDMA_PKT_TIMESTAMP_GET_WRITE_ADDR_LO_WRITE_ADDR_31_3(x) (((x) & SDMA_PKT_TIMESTAMP_GET_WRITE_ADDR_LO_write_addr_31_3_mask) << SDMA_PKT_TIMESTAMP_GET_WRITE_ADDR_LO_write_addr_31_3_shift) + +/*define for WRITE_ADDR_HI word*/ +/*define for write_addr_63_32 field*/ +#define SDMA_PKT_TIMESTAMP_GET_WRITE_ADDR_HI_write_addr_63_32_offset 2 +#define SDMA_PKT_TIMESTAMP_GET_WRITE_ADDR_HI_write_addr_63_32_mask 0xFFFFFFFF +#define SDMA_PKT_TIMESTAMP_GET_WRITE_ADDR_HI_write_addr_63_32_shift 0 +#define SDMA_PKT_TIMESTAMP_GET_WRITE_ADDR_HI_WRITE_ADDR_63_32(x) (((x) & SDMA_PKT_TIMESTAMP_GET_WRITE_ADDR_HI_write_addr_63_32_mask) << SDMA_PKT_TIMESTAMP_GET_WRITE_ADDR_HI_write_addr_63_32_shift) + + +/* +** Definitions for SDMA_PKT_TIMESTAMP_GET_GLOBAL packet +*/ + +/*define for HEADER word*/ +/*define for op field*/ +#define SDMA_PKT_TIMESTAMP_GET_GLOBAL_HEADER_op_offset 0 +#define SDMA_PKT_TIMESTAMP_GET_GLOBAL_HEADER_op_mask 0x000000FF +#define SDMA_PKT_TIMESTAMP_GET_GLOBAL_HEADER_op_shift 0 +#define SDMA_PKT_TIMESTAMP_GET_GLOBAL_HEADER_OP(x) (((x) & SDMA_PKT_TIMESTAMP_GET_GLOBAL_HEADER_op_mask) << SDMA_PKT_TIMESTAMP_GET_GLOBAL_HEADER_op_shift) + +/*define for sub_op field*/ +#define SDMA_PKT_TIMESTAMP_GET_GLOBAL_HEADER_sub_op_offset 0 +#define SDMA_PKT_TIMESTAMP_GET_GLOBAL_HEADER_sub_op_mask 0x000000FF +#define SDMA_PKT_TIMESTAMP_GET_GLOBAL_HEADER_sub_op_shift 8 +#define SDMA_PKT_TIMESTAMP_GET_GLOBAL_HEADER_SUB_OP(x) (((x) & SDMA_PKT_TIMESTAMP_GET_GLOBAL_HEADER_sub_op_mask) << SDMA_PKT_TIMESTAMP_GET_GLOBAL_HEADER_sub_op_shift) + +/*define for WRITE_ADDR_LO word*/ +/*define for write_addr_31_3 field*/ +#define SDMA_PKT_TIMESTAMP_GET_GLOBAL_WRITE_ADDR_LO_write_addr_31_3_offset 1 +#define SDMA_PKT_TIMESTAMP_GET_GLOBAL_WRITE_ADDR_LO_write_addr_31_3_mask 0x1FFFFFFF +#define SDMA_PKT_TIMESTAMP_GET_GLOBAL_WRITE_ADDR_LO_write_addr_31_3_shift 3 +#define SDMA_PKT_TIMESTAMP_GET_GLOBAL_WRITE_ADDR_LO_WRITE_ADDR_31_3(x) (((x) & SDMA_PKT_TIMESTAMP_GET_GLOBAL_WRITE_ADDR_LO_write_addr_31_3_mask) << SDMA_PKT_TIMESTAMP_GET_GLOBAL_WRITE_ADDR_LO_write_addr_31_3_shift) + +/*define for WRITE_ADDR_HI word*/ +/*define for write_addr_63_32 field*/ +#define SDMA_PKT_TIMESTAMP_GET_GLOBAL_WRITE_ADDR_HI_write_addr_63_32_offset 2 +#define SDMA_PKT_TIMESTAMP_GET_GLOBAL_WRITE_ADDR_HI_write_addr_63_32_mask 0xFFFFFFFF +#define SDMA_PKT_TIMESTAMP_GET_GLOBAL_WRITE_ADDR_HI_write_addr_63_32_shift 0 +#define SDMA_PKT_TIMESTAMP_GET_GLOBAL_WRITE_ADDR_HI_WRITE_ADDR_63_32(x) (((x) & SDMA_PKT_TIMESTAMP_GET_GLOBAL_WRITE_ADDR_HI_write_addr_63_32_mask) << SDMA_PKT_TIMESTAMP_GET_GLOBAL_WRITE_ADDR_HI_write_addr_63_32_shift) + + +/* +** Definitions for SDMA_PKT_TRAP packet +*/ + +/*define for HEADER word*/ +/*define for op field*/ +#define SDMA_PKT_TRAP_HEADER_op_offset 0 +#define SDMA_PKT_TRAP_HEADER_op_mask 0x000000FF +#define SDMA_PKT_TRAP_HEADER_op_shift 0 +#define SDMA_PKT_TRAP_HEADER_OP(x) (((x) & SDMA_PKT_TRAP_HEADER_op_mask) << SDMA_PKT_TRAP_HEADER_op_shift) + +/*define for sub_op field*/ +#define SDMA_PKT_TRAP_HEADER_sub_op_offset 0 +#define SDMA_PKT_TRAP_HEADER_sub_op_mask 0x000000FF +#define SDMA_PKT_TRAP_HEADER_sub_op_shift 8 +#define SDMA_PKT_TRAP_HEADER_SUB_OP(x) (((x) & SDMA_PKT_TRAP_HEADER_sub_op_mask) << SDMA_PKT_TRAP_HEADER_sub_op_shift) + +/*define for INT_CONTEXT word*/ +/*define for int_context field*/ +#define SDMA_PKT_TRAP_INT_CONTEXT_int_context_offset 1 +#define SDMA_PKT_TRAP_INT_CONTEXT_int_context_mask 0x0FFFFFFF +#define SDMA_PKT_TRAP_INT_CONTEXT_int_context_shift 0 +#define SDMA_PKT_TRAP_INT_CONTEXT_INT_CONTEXT(x) (((x) & SDMA_PKT_TRAP_INT_CONTEXT_int_context_mask) << SDMA_PKT_TRAP_INT_CONTEXT_int_context_shift) + + +/* +** Definitions for SDMA_PKT_DUMMY_TRAP packet +*/ + +/*define for HEADER word*/ +/*define for op field*/ +#define SDMA_PKT_DUMMY_TRAP_HEADER_op_offset 0 +#define SDMA_PKT_DUMMY_TRAP_HEADER_op_mask 0x000000FF +#define SDMA_PKT_DUMMY_TRAP_HEADER_op_shift 0 +#define SDMA_PKT_DUMMY_TRAP_HEADER_OP(x) (((x) & SDMA_PKT_DUMMY_TRAP_HEADER_op_mask) << SDMA_PKT_DUMMY_TRAP_HEADER_op_shift) + +/*define for sub_op field*/ +#define SDMA_PKT_DUMMY_TRAP_HEADER_sub_op_offset 0 +#define SDMA_PKT_DUMMY_TRAP_HEADER_sub_op_mask 0x000000FF +#define SDMA_PKT_DUMMY_TRAP_HEADER_sub_op_shift 8 +#define SDMA_PKT_DUMMY_TRAP_HEADER_SUB_OP(x) (((x) & SDMA_PKT_DUMMY_TRAP_HEADER_sub_op_mask) << SDMA_PKT_DUMMY_TRAP_HEADER_sub_op_shift) + +/*define for INT_CONTEXT word*/ +/*define for int_context field*/ +#define SDMA_PKT_DUMMY_TRAP_INT_CONTEXT_int_context_offset 1 +#define SDMA_PKT_DUMMY_TRAP_INT_CONTEXT_int_context_mask 0x0FFFFFFF +#define SDMA_PKT_DUMMY_TRAP_INT_CONTEXT_int_context_shift 0 +#define SDMA_PKT_DUMMY_TRAP_INT_CONTEXT_INT_CONTEXT(x) (((x) & SDMA_PKT_DUMMY_TRAP_INT_CONTEXT_int_context_mask) << SDMA_PKT_DUMMY_TRAP_INT_CONTEXT_int_context_shift) + + +/* +** Definitions for SDMA_PKT_NOP packet +*/ + +/*define for HEADER word*/ +/*define for op field*/ +#define SDMA_PKT_NOP_HEADER_op_offset 0 +#define SDMA_PKT_NOP_HEADER_op_mask 0x000000FF +#define SDMA_PKT_NOP_HEADER_op_shift 0 +#define SDMA_PKT_NOP_HEADER_OP(x) (((x) & SDMA_PKT_NOP_HEADER_op_mask) << SDMA_PKT_NOP_HEADER_op_shift) + +/*define for sub_op field*/ +#define SDMA_PKT_NOP_HEADER_sub_op_offset 0 +#define SDMA_PKT_NOP_HEADER_sub_op_mask 0x000000FF +#define SDMA_PKT_NOP_HEADER_sub_op_shift 8 +#define SDMA_PKT_NOP_HEADER_SUB_OP(x) (((x) & SDMA_PKT_NOP_HEADER_sub_op_mask) << SDMA_PKT_NOP_HEADER_sub_op_shift) + +/*define for count field*/ +#define SDMA_PKT_NOP_HEADER_count_offset 0 +#define SDMA_PKT_NOP_HEADER_count_mask 0x00003FFF +#define SDMA_PKT_NOP_HEADER_count_shift 16 +#define SDMA_PKT_NOP_HEADER_COUNT(x) (((x) & SDMA_PKT_NOP_HEADER_count_mask) << SDMA_PKT_NOP_HEADER_count_shift) + +/*define for DATA0 word*/ +/*define for data0 field*/ +#define SDMA_PKT_NOP_DATA0_data0_offset 1 +#define SDMA_PKT_NOP_DATA0_data0_mask 0xFFFFFFFF +#define SDMA_PKT_NOP_DATA0_data0_shift 0 +#define SDMA_PKT_NOP_DATA0_DATA0(x) (((x) & SDMA_PKT_NOP_DATA0_data0_mask) << SDMA_PKT_NOP_DATA0_data0_shift) + + +/* +** Definitions for SDMA_AQL_PKT_HEADER packet +*/ + +/*define for HEADER word*/ +/*define for format field*/ +#define SDMA_AQL_PKT_HEADER_HEADER_format_offset 0 +#define SDMA_AQL_PKT_HEADER_HEADER_format_mask 0x000000FF +#define SDMA_AQL_PKT_HEADER_HEADER_format_shift 0 +#define SDMA_AQL_PKT_HEADER_HEADER_FORMAT(x) (((x) & SDMA_AQL_PKT_HEADER_HEADER_format_mask) << SDMA_AQL_PKT_HEADER_HEADER_format_shift) + +/*define for barrier field*/ +#define SDMA_AQL_PKT_HEADER_HEADER_barrier_offset 0 +#define SDMA_AQL_PKT_HEADER_HEADER_barrier_mask 0x00000001 +#define SDMA_AQL_PKT_HEADER_HEADER_barrier_shift 8 +#define SDMA_AQL_PKT_HEADER_HEADER_BARRIER(x) (((x) & SDMA_AQL_PKT_HEADER_HEADER_barrier_mask) << SDMA_AQL_PKT_HEADER_HEADER_barrier_shift) + +/*define for acquire_fence_scope field*/ +#define SDMA_AQL_PKT_HEADER_HEADER_acquire_fence_scope_offset 0 +#define SDMA_AQL_PKT_HEADER_HEADER_acquire_fence_scope_mask 0x00000003 +#define SDMA_AQL_PKT_HEADER_HEADER_acquire_fence_scope_shift 9 +#define SDMA_AQL_PKT_HEADER_HEADER_ACQUIRE_FENCE_SCOPE(x) (((x) & SDMA_AQL_PKT_HEADER_HEADER_acquire_fence_scope_mask) << SDMA_AQL_PKT_HEADER_HEADER_acquire_fence_scope_shift) + +/*define for release_fence_scope field*/ +#define SDMA_AQL_PKT_HEADER_HEADER_release_fence_scope_offset 0 +#define SDMA_AQL_PKT_HEADER_HEADER_release_fence_scope_mask 0x00000003 +#define SDMA_AQL_PKT_HEADER_HEADER_release_fence_scope_shift 11 +#define SDMA_AQL_PKT_HEADER_HEADER_RELEASE_FENCE_SCOPE(x) (((x) & SDMA_AQL_PKT_HEADER_HEADER_release_fence_scope_mask) << SDMA_AQL_PKT_HEADER_HEADER_release_fence_scope_shift) + +/*define for reserved field*/ +#define SDMA_AQL_PKT_HEADER_HEADER_reserved_offset 0 +#define SDMA_AQL_PKT_HEADER_HEADER_reserved_mask 0x00000007 +#define SDMA_AQL_PKT_HEADER_HEADER_reserved_shift 13 +#define SDMA_AQL_PKT_HEADER_HEADER_RESERVED(x) (((x) & SDMA_AQL_PKT_HEADER_HEADER_reserved_mask) << SDMA_AQL_PKT_HEADER_HEADER_reserved_shift) + +/*define for op field*/ +#define SDMA_AQL_PKT_HEADER_HEADER_op_offset 0 +#define SDMA_AQL_PKT_HEADER_HEADER_op_mask 0x0000000F +#define SDMA_AQL_PKT_HEADER_HEADER_op_shift 16 +#define SDMA_AQL_PKT_HEADER_HEADER_OP(x) (((x) & SDMA_AQL_PKT_HEADER_HEADER_op_mask) << SDMA_AQL_PKT_HEADER_HEADER_op_shift) + +/*define for subop field*/ +#define SDMA_AQL_PKT_HEADER_HEADER_subop_offset 0 +#define SDMA_AQL_PKT_HEADER_HEADER_subop_mask 0x00000007 +#define SDMA_AQL_PKT_HEADER_HEADER_subop_shift 20 +#define SDMA_AQL_PKT_HEADER_HEADER_SUBOP(x) (((x) & SDMA_AQL_PKT_HEADER_HEADER_subop_mask) << SDMA_AQL_PKT_HEADER_HEADER_subop_shift) + + +/* +** Definitions for SDMA_AQL_PKT_COPY_LINEAR packet +*/ + +/*define for HEADER word*/ +/*define for format field*/ +#define SDMA_AQL_PKT_COPY_LINEAR_HEADER_format_offset 0 +#define SDMA_AQL_PKT_COPY_LINEAR_HEADER_format_mask 0x000000FF +#define SDMA_AQL_PKT_COPY_LINEAR_HEADER_format_shift 0 +#define SDMA_AQL_PKT_COPY_LINEAR_HEADER_FORMAT(x) (((x) & SDMA_AQL_PKT_COPY_LINEAR_HEADER_format_mask) << SDMA_AQL_PKT_COPY_LINEAR_HEADER_format_shift) + +/*define for barrier field*/ +#define SDMA_AQL_PKT_COPY_LINEAR_HEADER_barrier_offset 0 +#define SDMA_AQL_PKT_COPY_LINEAR_HEADER_barrier_mask 0x00000001 +#define SDMA_AQL_PKT_COPY_LINEAR_HEADER_barrier_shift 8 +#define SDMA_AQL_PKT_COPY_LINEAR_HEADER_BARRIER(x) (((x) & SDMA_AQL_PKT_COPY_LINEAR_HEADER_barrier_mask) << SDMA_AQL_PKT_COPY_LINEAR_HEADER_barrier_shift) + +/*define for acquire_fence_scope field*/ +#define SDMA_AQL_PKT_COPY_LINEAR_HEADER_acquire_fence_scope_offset 0 +#define SDMA_AQL_PKT_COPY_LINEAR_HEADER_acquire_fence_scope_mask 0x00000003 +#define SDMA_AQL_PKT_COPY_LINEAR_HEADER_acquire_fence_scope_shift 9 +#define SDMA_AQL_PKT_COPY_LINEAR_HEADER_ACQUIRE_FENCE_SCOPE(x) (((x) & SDMA_AQL_PKT_COPY_LINEAR_HEADER_acquire_fence_scope_mask) << SDMA_AQL_PKT_COPY_LINEAR_HEADER_acquire_fence_scope_shift) + +/*define for release_fence_scope field*/ +#define SDMA_AQL_PKT_COPY_LINEAR_HEADER_release_fence_scope_offset 0 +#define SDMA_AQL_PKT_COPY_LINEAR_HEADER_release_fence_scope_mask 0x00000003 +#define SDMA_AQL_PKT_COPY_LINEAR_HEADER_release_fence_scope_shift 11 +#define SDMA_AQL_PKT_COPY_LINEAR_HEADER_RELEASE_FENCE_SCOPE(x) (((x) & SDMA_AQL_PKT_COPY_LINEAR_HEADER_release_fence_scope_mask) << SDMA_AQL_PKT_COPY_LINEAR_HEADER_release_fence_scope_shift) + +/*define for reserved field*/ +#define SDMA_AQL_PKT_COPY_LINEAR_HEADER_reserved_offset 0 +#define SDMA_AQL_PKT_COPY_LINEAR_HEADER_reserved_mask 0x00000007 +#define SDMA_AQL_PKT_COPY_LINEAR_HEADER_reserved_shift 13 +#define SDMA_AQL_PKT_COPY_LINEAR_HEADER_RESERVED(x) (((x) & SDMA_AQL_PKT_COPY_LINEAR_HEADER_reserved_mask) << SDMA_AQL_PKT_COPY_LINEAR_HEADER_reserved_shift) + +/*define for op field*/ +#define SDMA_AQL_PKT_COPY_LINEAR_HEADER_op_offset 0 +#define SDMA_AQL_PKT_COPY_LINEAR_HEADER_op_mask 0x0000000F +#define SDMA_AQL_PKT_COPY_LINEAR_HEADER_op_shift 16 +#define SDMA_AQL_PKT_COPY_LINEAR_HEADER_OP(x) (((x) & SDMA_AQL_PKT_COPY_LINEAR_HEADER_op_mask) << SDMA_AQL_PKT_COPY_LINEAR_HEADER_op_shift) + +/*define for subop field*/ +#define SDMA_AQL_PKT_COPY_LINEAR_HEADER_subop_offset 0 +#define SDMA_AQL_PKT_COPY_LINEAR_HEADER_subop_mask 0x00000007 +#define SDMA_AQL_PKT_COPY_LINEAR_HEADER_subop_shift 20 +#define SDMA_AQL_PKT_COPY_LINEAR_HEADER_SUBOP(x) (((x) & SDMA_AQL_PKT_COPY_LINEAR_HEADER_subop_mask) << SDMA_AQL_PKT_COPY_LINEAR_HEADER_subop_shift) + +/*define for RESERVED_DW1 word*/ +/*define for reserved_dw1 field*/ +#define SDMA_AQL_PKT_COPY_LINEAR_RESERVED_DW1_reserved_dw1_offset 1 +#define SDMA_AQL_PKT_COPY_LINEAR_RESERVED_DW1_reserved_dw1_mask 0xFFFFFFFF +#define SDMA_AQL_PKT_COPY_LINEAR_RESERVED_DW1_reserved_dw1_shift 0 +#define SDMA_AQL_PKT_COPY_LINEAR_RESERVED_DW1_RESERVED_DW1(x) (((x) & SDMA_AQL_PKT_COPY_LINEAR_RESERVED_DW1_reserved_dw1_mask) << SDMA_AQL_PKT_COPY_LINEAR_RESERVED_DW1_reserved_dw1_shift) + +/*define for RETURN_ADDR_LO word*/ +/*define for return_addr_31_0 field*/ +#define SDMA_AQL_PKT_COPY_LINEAR_RETURN_ADDR_LO_return_addr_31_0_offset 2 +#define SDMA_AQL_PKT_COPY_LINEAR_RETURN_ADDR_LO_return_addr_31_0_mask 0xFFFFFFFF +#define SDMA_AQL_PKT_COPY_LINEAR_RETURN_ADDR_LO_return_addr_31_0_shift 0 +#define SDMA_AQL_PKT_COPY_LINEAR_RETURN_ADDR_LO_RETURN_ADDR_31_0(x) (((x) & SDMA_AQL_PKT_COPY_LINEAR_RETURN_ADDR_LO_return_addr_31_0_mask) << SDMA_AQL_PKT_COPY_LINEAR_RETURN_ADDR_LO_return_addr_31_0_shift) + +/*define for RETURN_ADDR_HI word*/ +/*define for return_addr_63_32 field*/ +#define SDMA_AQL_PKT_COPY_LINEAR_RETURN_ADDR_HI_return_addr_63_32_offset 3 +#define SDMA_AQL_PKT_COPY_LINEAR_RETURN_ADDR_HI_return_addr_63_32_mask 0xFFFFFFFF +#define SDMA_AQL_PKT_COPY_LINEAR_RETURN_ADDR_HI_return_addr_63_32_shift 0 +#define SDMA_AQL_PKT_COPY_LINEAR_RETURN_ADDR_HI_RETURN_ADDR_63_32(x) (((x) & SDMA_AQL_PKT_COPY_LINEAR_RETURN_ADDR_HI_return_addr_63_32_mask) << SDMA_AQL_PKT_COPY_LINEAR_RETURN_ADDR_HI_return_addr_63_32_shift) + +/*define for COUNT word*/ +/*define for count field*/ +#define SDMA_AQL_PKT_COPY_LINEAR_COUNT_count_offset 4 +#define SDMA_AQL_PKT_COPY_LINEAR_COUNT_count_mask 0x003FFFFF +#define SDMA_AQL_PKT_COPY_LINEAR_COUNT_count_shift 0 +#define SDMA_AQL_PKT_COPY_LINEAR_COUNT_COUNT(x) (((x) & SDMA_AQL_PKT_COPY_LINEAR_COUNT_count_mask) << SDMA_AQL_PKT_COPY_LINEAR_COUNT_count_shift) + +/*define for PARAMETER word*/ +/*define for dst_sw field*/ +#define SDMA_AQL_PKT_COPY_LINEAR_PARAMETER_dst_sw_offset 5 +#define SDMA_AQL_PKT_COPY_LINEAR_PARAMETER_dst_sw_mask 0x00000003 +#define SDMA_AQL_PKT_COPY_LINEAR_PARAMETER_dst_sw_shift 16 +#define SDMA_AQL_PKT_COPY_LINEAR_PARAMETER_DST_SW(x) (((x) & SDMA_AQL_PKT_COPY_LINEAR_PARAMETER_dst_sw_mask) << SDMA_AQL_PKT_COPY_LINEAR_PARAMETER_dst_sw_shift) + +/*define for src_sw field*/ +#define SDMA_AQL_PKT_COPY_LINEAR_PARAMETER_src_sw_offset 5 +#define SDMA_AQL_PKT_COPY_LINEAR_PARAMETER_src_sw_mask 0x00000003 +#define SDMA_AQL_PKT_COPY_LINEAR_PARAMETER_src_sw_shift 24 +#define SDMA_AQL_PKT_COPY_LINEAR_PARAMETER_SRC_SW(x) (((x) & SDMA_AQL_PKT_COPY_LINEAR_PARAMETER_src_sw_mask) << SDMA_AQL_PKT_COPY_LINEAR_PARAMETER_src_sw_shift) + +/*define for SRC_ADDR_LO word*/ +/*define for src_addr_31_0 field*/ +#define SDMA_AQL_PKT_COPY_LINEAR_SRC_ADDR_LO_src_addr_31_0_offset 6 +#define SDMA_AQL_PKT_COPY_LINEAR_SRC_ADDR_LO_src_addr_31_0_mask 0xFFFFFFFF +#define SDMA_AQL_PKT_COPY_LINEAR_SRC_ADDR_LO_src_addr_31_0_shift 0 +#define SDMA_AQL_PKT_COPY_LINEAR_SRC_ADDR_LO_SRC_ADDR_31_0(x) (((x) & SDMA_AQL_PKT_COPY_LINEAR_SRC_ADDR_LO_src_addr_31_0_mask) << SDMA_AQL_PKT_COPY_LINEAR_SRC_ADDR_LO_src_addr_31_0_shift) + +/*define for SRC_ADDR_HI word*/ +/*define for src_addr_63_32 field*/ +#define SDMA_AQL_PKT_COPY_LINEAR_SRC_ADDR_HI_src_addr_63_32_offset 7 +#define SDMA_AQL_PKT_COPY_LINEAR_SRC_ADDR_HI_src_addr_63_32_mask 0xFFFFFFFF +#define SDMA_AQL_PKT_COPY_LINEAR_SRC_ADDR_HI_src_addr_63_32_shift 0 +#define SDMA_AQL_PKT_COPY_LINEAR_SRC_ADDR_HI_SRC_ADDR_63_32(x) (((x) & SDMA_AQL_PKT_COPY_LINEAR_SRC_ADDR_HI_src_addr_63_32_mask) << SDMA_AQL_PKT_COPY_LINEAR_SRC_ADDR_HI_src_addr_63_32_shift) + +/*define for DST_ADDR_LO word*/ +/*define for dst_addr_31_0 field*/ +#define SDMA_AQL_PKT_COPY_LINEAR_DST_ADDR_LO_dst_addr_31_0_offset 8 +#define SDMA_AQL_PKT_COPY_LINEAR_DST_ADDR_LO_dst_addr_31_0_mask 0xFFFFFFFF +#define SDMA_AQL_PKT_COPY_LINEAR_DST_ADDR_LO_dst_addr_31_0_shift 0 +#define SDMA_AQL_PKT_COPY_LINEAR_DST_ADDR_LO_DST_ADDR_31_0(x) (((x) & SDMA_AQL_PKT_COPY_LINEAR_DST_ADDR_LO_dst_addr_31_0_mask) << SDMA_AQL_PKT_COPY_LINEAR_DST_ADDR_LO_dst_addr_31_0_shift) + +/*define for DST_ADDR_HI word*/ +/*define for dst_addr_63_32 field*/ +#define SDMA_AQL_PKT_COPY_LINEAR_DST_ADDR_HI_dst_addr_63_32_offset 9 +#define SDMA_AQL_PKT_COPY_LINEAR_DST_ADDR_HI_dst_addr_63_32_mask 0xFFFFFFFF +#define SDMA_AQL_PKT_COPY_LINEAR_DST_ADDR_HI_dst_addr_63_32_shift 0 +#define SDMA_AQL_PKT_COPY_LINEAR_DST_ADDR_HI_DST_ADDR_63_32(x) (((x) & SDMA_AQL_PKT_COPY_LINEAR_DST_ADDR_HI_dst_addr_63_32_mask) << SDMA_AQL_PKT_COPY_LINEAR_DST_ADDR_HI_dst_addr_63_32_shift) + +/*define for RESERVED_DW10 word*/ +/*define for reserved_dw10 field*/ +#define SDMA_AQL_PKT_COPY_LINEAR_RESERVED_DW10_reserved_dw10_offset 10 +#define SDMA_AQL_PKT_COPY_LINEAR_RESERVED_DW10_reserved_dw10_mask 0xFFFFFFFF +#define SDMA_AQL_PKT_COPY_LINEAR_RESERVED_DW10_reserved_dw10_shift 0 +#define SDMA_AQL_PKT_COPY_LINEAR_RESERVED_DW10_RESERVED_DW10(x) (((x) & SDMA_AQL_PKT_COPY_LINEAR_RESERVED_DW10_reserved_dw10_mask) << SDMA_AQL_PKT_COPY_LINEAR_RESERVED_DW10_reserved_dw10_shift) + +/*define for RESERVED_DW11 word*/ +/*define for reserved_dw11 field*/ +#define SDMA_AQL_PKT_COPY_LINEAR_RESERVED_DW11_reserved_dw11_offset 11 +#define SDMA_AQL_PKT_COPY_LINEAR_RESERVED_DW11_reserved_dw11_mask 0xFFFFFFFF +#define SDMA_AQL_PKT_COPY_LINEAR_RESERVED_DW11_reserved_dw11_shift 0 +#define SDMA_AQL_PKT_COPY_LINEAR_RESERVED_DW11_RESERVED_DW11(x) (((x) & SDMA_AQL_PKT_COPY_LINEAR_RESERVED_DW11_reserved_dw11_mask) << SDMA_AQL_PKT_COPY_LINEAR_RESERVED_DW11_reserved_dw11_shift) + +/*define for RESERVED_DW12 word*/ +/*define for reserved_dw12 field*/ +#define SDMA_AQL_PKT_COPY_LINEAR_RESERVED_DW12_reserved_dw12_offset 12 +#define SDMA_AQL_PKT_COPY_LINEAR_RESERVED_DW12_reserved_dw12_mask 0xFFFFFFFF +#define SDMA_AQL_PKT_COPY_LINEAR_RESERVED_DW12_reserved_dw12_shift 0 +#define SDMA_AQL_PKT_COPY_LINEAR_RESERVED_DW12_RESERVED_DW12(x) (((x) & SDMA_AQL_PKT_COPY_LINEAR_RESERVED_DW12_reserved_dw12_mask) << SDMA_AQL_PKT_COPY_LINEAR_RESERVED_DW12_reserved_dw12_shift) + +/*define for RESERVED_DW13 word*/ +/*define for reserved_dw13 field*/ +#define SDMA_AQL_PKT_COPY_LINEAR_RESERVED_DW13_reserved_dw13_offset 13 +#define SDMA_AQL_PKT_COPY_LINEAR_RESERVED_DW13_reserved_dw13_mask 0xFFFFFFFF +#define SDMA_AQL_PKT_COPY_LINEAR_RESERVED_DW13_reserved_dw13_shift 0 +#define SDMA_AQL_PKT_COPY_LINEAR_RESERVED_DW13_RESERVED_DW13(x) (((x) & SDMA_AQL_PKT_COPY_LINEAR_RESERVED_DW13_reserved_dw13_mask) << SDMA_AQL_PKT_COPY_LINEAR_RESERVED_DW13_reserved_dw13_shift) + +/*define for COMPLETION_SIGNAL_LO word*/ +/*define for completion_signal_31_0 field*/ +#define SDMA_AQL_PKT_COPY_LINEAR_COMPLETION_SIGNAL_LO_completion_signal_31_0_offset 14 +#define SDMA_AQL_PKT_COPY_LINEAR_COMPLETION_SIGNAL_LO_completion_signal_31_0_mask 0xFFFFFFFF +#define SDMA_AQL_PKT_COPY_LINEAR_COMPLETION_SIGNAL_LO_completion_signal_31_0_shift 0 +#define SDMA_AQL_PKT_COPY_LINEAR_COMPLETION_SIGNAL_LO_COMPLETION_SIGNAL_31_0(x) (((x) & SDMA_AQL_PKT_COPY_LINEAR_COMPLETION_SIGNAL_LO_completion_signal_31_0_mask) << SDMA_AQL_PKT_COPY_LINEAR_COMPLETION_SIGNAL_LO_completion_signal_31_0_shift) + +/*define for COMPLETION_SIGNAL_HI word*/ +/*define for completion_signal_63_32 field*/ +#define SDMA_AQL_PKT_COPY_LINEAR_COMPLETION_SIGNAL_HI_completion_signal_63_32_offset 15 +#define SDMA_AQL_PKT_COPY_LINEAR_COMPLETION_SIGNAL_HI_completion_signal_63_32_mask 0xFFFFFFFF +#define SDMA_AQL_PKT_COPY_LINEAR_COMPLETION_SIGNAL_HI_completion_signal_63_32_shift 0 +#define SDMA_AQL_PKT_COPY_LINEAR_COMPLETION_SIGNAL_HI_COMPLETION_SIGNAL_63_32(x) (((x) & SDMA_AQL_PKT_COPY_LINEAR_COMPLETION_SIGNAL_HI_completion_signal_63_32_mask) << SDMA_AQL_PKT_COPY_LINEAR_COMPLETION_SIGNAL_HI_completion_signal_63_32_shift) + + +/* +** Definitions for SDMA_AQL_PKT_BARRIER_OR packet +*/ + +/*define for HEADER word*/ +/*define for format field*/ +#define SDMA_AQL_PKT_BARRIER_OR_HEADER_format_offset 0 +#define SDMA_AQL_PKT_BARRIER_OR_HEADER_format_mask 0x000000FF +#define SDMA_AQL_PKT_BARRIER_OR_HEADER_format_shift 0 +#define SDMA_AQL_PKT_BARRIER_OR_HEADER_FORMAT(x) (((x) & SDMA_AQL_PKT_BARRIER_OR_HEADER_format_mask) << SDMA_AQL_PKT_BARRIER_OR_HEADER_format_shift) + +/*define for barrier field*/ +#define SDMA_AQL_PKT_BARRIER_OR_HEADER_barrier_offset 0 +#define SDMA_AQL_PKT_BARRIER_OR_HEADER_barrier_mask 0x00000001 +#define SDMA_AQL_PKT_BARRIER_OR_HEADER_barrier_shift 8 +#define SDMA_AQL_PKT_BARRIER_OR_HEADER_BARRIER(x) (((x) & SDMA_AQL_PKT_BARRIER_OR_HEADER_barrier_mask) << SDMA_AQL_PKT_BARRIER_OR_HEADER_barrier_shift) + +/*define for acquire_fence_scope field*/ +#define SDMA_AQL_PKT_BARRIER_OR_HEADER_acquire_fence_scope_offset 0 +#define SDMA_AQL_PKT_BARRIER_OR_HEADER_acquire_fence_scope_mask 0x00000003 +#define SDMA_AQL_PKT_BARRIER_OR_HEADER_acquire_fence_scope_shift 9 +#define SDMA_AQL_PKT_BARRIER_OR_HEADER_ACQUIRE_FENCE_SCOPE(x) (((x) & SDMA_AQL_PKT_BARRIER_OR_HEADER_acquire_fence_scope_mask) << SDMA_AQL_PKT_BARRIER_OR_HEADER_acquire_fence_scope_shift) + +/*define for release_fence_scope field*/ +#define SDMA_AQL_PKT_BARRIER_OR_HEADER_release_fence_scope_offset 0 +#define SDMA_AQL_PKT_BARRIER_OR_HEADER_release_fence_scope_mask 0x00000003 +#define SDMA_AQL_PKT_BARRIER_OR_HEADER_release_fence_scope_shift 11 +#define SDMA_AQL_PKT_BARRIER_OR_HEADER_RELEASE_FENCE_SCOPE(x) (((x) & SDMA_AQL_PKT_BARRIER_OR_HEADER_release_fence_scope_mask) << SDMA_AQL_PKT_BARRIER_OR_HEADER_release_fence_scope_shift) + +/*define for reserved field*/ +#define SDMA_AQL_PKT_BARRIER_OR_HEADER_reserved_offset 0 +#define SDMA_AQL_PKT_BARRIER_OR_HEADER_reserved_mask 0x00000007 +#define SDMA_AQL_PKT_BARRIER_OR_HEADER_reserved_shift 13 +#define SDMA_AQL_PKT_BARRIER_OR_HEADER_RESERVED(x) (((x) & SDMA_AQL_PKT_BARRIER_OR_HEADER_reserved_mask) << SDMA_AQL_PKT_BARRIER_OR_HEADER_reserved_shift) + +/*define for op field*/ +#define SDMA_AQL_PKT_BARRIER_OR_HEADER_op_offset 0 +#define SDMA_AQL_PKT_BARRIER_OR_HEADER_op_mask 0x0000000F +#define SDMA_AQL_PKT_BARRIER_OR_HEADER_op_shift 16 +#define SDMA_AQL_PKT_BARRIER_OR_HEADER_OP(x) (((x) & SDMA_AQL_PKT_BARRIER_OR_HEADER_op_mask) << SDMA_AQL_PKT_BARRIER_OR_HEADER_op_shift) + +/*define for subop field*/ +#define SDMA_AQL_PKT_BARRIER_OR_HEADER_subop_offset 0 +#define SDMA_AQL_PKT_BARRIER_OR_HEADER_subop_mask 0x00000007 +#define SDMA_AQL_PKT_BARRIER_OR_HEADER_subop_shift 20 +#define SDMA_AQL_PKT_BARRIER_OR_HEADER_SUBOP(x) (((x) & SDMA_AQL_PKT_BARRIER_OR_HEADER_subop_mask) << SDMA_AQL_PKT_BARRIER_OR_HEADER_subop_shift) + +/*define for RESERVED_DW1 word*/ +/*define for reserved_dw1 field*/ +#define SDMA_AQL_PKT_BARRIER_OR_RESERVED_DW1_reserved_dw1_offset 1 +#define SDMA_AQL_PKT_BARRIER_OR_RESERVED_DW1_reserved_dw1_mask 0xFFFFFFFF +#define SDMA_AQL_PKT_BARRIER_OR_RESERVED_DW1_reserved_dw1_shift 0 +#define SDMA_AQL_PKT_BARRIER_OR_RESERVED_DW1_RESERVED_DW1(x) (((x) & SDMA_AQL_PKT_BARRIER_OR_RESERVED_DW1_reserved_dw1_mask) << SDMA_AQL_PKT_BARRIER_OR_RESERVED_DW1_reserved_dw1_shift) + +/*define for DEPENDENT_ADDR_0_LO word*/ +/*define for dependent_addr_0_31_0 field*/ +#define SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_0_LO_dependent_addr_0_31_0_offset 2 +#define SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_0_LO_dependent_addr_0_31_0_mask 0xFFFFFFFF +#define SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_0_LO_dependent_addr_0_31_0_shift 0 +#define SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_0_LO_DEPENDENT_ADDR_0_31_0(x) (((x) & SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_0_LO_dependent_addr_0_31_0_mask) << SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_0_LO_dependent_addr_0_31_0_shift) + +/*define for DEPENDENT_ADDR_0_HI word*/ +/*define for dependent_addr_0_63_32 field*/ +#define SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_0_HI_dependent_addr_0_63_32_offset 3 +#define SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_0_HI_dependent_addr_0_63_32_mask 0xFFFFFFFF +#define SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_0_HI_dependent_addr_0_63_32_shift 0 +#define SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_0_HI_DEPENDENT_ADDR_0_63_32(x) (((x) & SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_0_HI_dependent_addr_0_63_32_mask) << SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_0_HI_dependent_addr_0_63_32_shift) + +/*define for DEPENDENT_ADDR_1_LO word*/ +/*define for dependent_addr_1_31_0 field*/ +#define SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_1_LO_dependent_addr_1_31_0_offset 4 +#define SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_1_LO_dependent_addr_1_31_0_mask 0xFFFFFFFF +#define SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_1_LO_dependent_addr_1_31_0_shift 0 +#define SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_1_LO_DEPENDENT_ADDR_1_31_0(x) (((x) & SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_1_LO_dependent_addr_1_31_0_mask) << SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_1_LO_dependent_addr_1_31_0_shift) + +/*define for DEPENDENT_ADDR_1_HI word*/ +/*define for dependent_addr_1_63_32 field*/ +#define SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_1_HI_dependent_addr_1_63_32_offset 5 +#define SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_1_HI_dependent_addr_1_63_32_mask 0xFFFFFFFF +#define SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_1_HI_dependent_addr_1_63_32_shift 0 +#define SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_1_HI_DEPENDENT_ADDR_1_63_32(x) (((x) & SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_1_HI_dependent_addr_1_63_32_mask) << SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_1_HI_dependent_addr_1_63_32_shift) + +/*define for DEPENDENT_ADDR_2_LO word*/ +/*define for dependent_addr_2_31_0 field*/ +#define SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_2_LO_dependent_addr_2_31_0_offset 6 +#define SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_2_LO_dependent_addr_2_31_0_mask 0xFFFFFFFF +#define SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_2_LO_dependent_addr_2_31_0_shift 0 +#define SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_2_LO_DEPENDENT_ADDR_2_31_0(x) (((x) & SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_2_LO_dependent_addr_2_31_0_mask) << SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_2_LO_dependent_addr_2_31_0_shift) + +/*define for DEPENDENT_ADDR_2_HI word*/ +/*define for dependent_addr_2_63_32 field*/ +#define SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_2_HI_dependent_addr_2_63_32_offset 7 +#define SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_2_HI_dependent_addr_2_63_32_mask 0xFFFFFFFF +#define SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_2_HI_dependent_addr_2_63_32_shift 0 +#define SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_2_HI_DEPENDENT_ADDR_2_63_32(x) (((x) & SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_2_HI_dependent_addr_2_63_32_mask) << SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_2_HI_dependent_addr_2_63_32_shift) + +/*define for DEPENDENT_ADDR_3_LO word*/ +/*define for dependent_addr_3_31_0 field*/ +#define SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_3_LO_dependent_addr_3_31_0_offset 8 +#define SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_3_LO_dependent_addr_3_31_0_mask 0xFFFFFFFF +#define SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_3_LO_dependent_addr_3_31_0_shift 0 +#define SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_3_LO_DEPENDENT_ADDR_3_31_0(x) (((x) & SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_3_LO_dependent_addr_3_31_0_mask) << SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_3_LO_dependent_addr_3_31_0_shift) + +/*define for DEPENDENT_ADDR_3_HI word*/ +/*define for dependent_addr_3_63_32 field*/ +#define SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_3_HI_dependent_addr_3_63_32_offset 9 +#define SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_3_HI_dependent_addr_3_63_32_mask 0xFFFFFFFF +#define SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_3_HI_dependent_addr_3_63_32_shift 0 +#define SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_3_HI_DEPENDENT_ADDR_3_63_32(x) (((x) & SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_3_HI_dependent_addr_3_63_32_mask) << SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_3_HI_dependent_addr_3_63_32_shift) + +/*define for DEPENDENT_ADDR_4_LO word*/ +/*define for dependent_addr_4_31_0 field*/ +#define SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_4_LO_dependent_addr_4_31_0_offset 10 +#define SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_4_LO_dependent_addr_4_31_0_mask 0xFFFFFFFF +#define SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_4_LO_dependent_addr_4_31_0_shift 0 +#define SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_4_LO_DEPENDENT_ADDR_4_31_0(x) (((x) & SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_4_LO_dependent_addr_4_31_0_mask) << SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_4_LO_dependent_addr_4_31_0_shift) + +/*define for DEPENDENT_ADDR_4_HI word*/ +/*define for dependent_addr_4_63_32 field*/ +#define SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_4_HI_dependent_addr_4_63_32_offset 11 +#define SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_4_HI_dependent_addr_4_63_32_mask 0xFFFFFFFF +#define SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_4_HI_dependent_addr_4_63_32_shift 0 +#define SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_4_HI_DEPENDENT_ADDR_4_63_32(x) (((x) & SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_4_HI_dependent_addr_4_63_32_mask) << SDMA_AQL_PKT_BARRIER_OR_DEPENDENT_ADDR_4_HI_dependent_addr_4_63_32_shift) + +/*define for RESERVED_DW12 word*/ +/*define for reserved_dw12 field*/ +#define SDMA_AQL_PKT_BARRIER_OR_RESERVED_DW12_reserved_dw12_offset 12 +#define SDMA_AQL_PKT_BARRIER_OR_RESERVED_DW12_reserved_dw12_mask 0xFFFFFFFF +#define SDMA_AQL_PKT_BARRIER_OR_RESERVED_DW12_reserved_dw12_shift 0 +#define SDMA_AQL_PKT_BARRIER_OR_RESERVED_DW12_RESERVED_DW12(x) (((x) & SDMA_AQL_PKT_BARRIER_OR_RESERVED_DW12_reserved_dw12_mask) << SDMA_AQL_PKT_BARRIER_OR_RESERVED_DW12_reserved_dw12_shift) + +/*define for RESERVED_DW13 word*/ +/*define for reserved_dw13 field*/ +#define SDMA_AQL_PKT_BARRIER_OR_RESERVED_DW13_reserved_dw13_offset 13 +#define SDMA_AQL_PKT_BARRIER_OR_RESERVED_DW13_reserved_dw13_mask 0xFFFFFFFF +#define SDMA_AQL_PKT_BARRIER_OR_RESERVED_DW13_reserved_dw13_shift 0 +#define SDMA_AQL_PKT_BARRIER_OR_RESERVED_DW13_RESERVED_DW13(x) (((x) & SDMA_AQL_PKT_BARRIER_OR_RESERVED_DW13_reserved_dw13_mask) << SDMA_AQL_PKT_BARRIER_OR_RESERVED_DW13_reserved_dw13_shift) + +/*define for COMPLETION_SIGNAL_LO word*/ +/*define for completion_signal_31_0 field*/ +#define SDMA_AQL_PKT_BARRIER_OR_COMPLETION_SIGNAL_LO_completion_signal_31_0_offset 14 +#define SDMA_AQL_PKT_BARRIER_OR_COMPLETION_SIGNAL_LO_completion_signal_31_0_mask 0xFFFFFFFF +#define SDMA_AQL_PKT_BARRIER_OR_COMPLETION_SIGNAL_LO_completion_signal_31_0_shift 0 +#define SDMA_AQL_PKT_BARRIER_OR_COMPLETION_SIGNAL_LO_COMPLETION_SIGNAL_31_0(x) (((x) & SDMA_AQL_PKT_BARRIER_OR_COMPLETION_SIGNAL_LO_completion_signal_31_0_mask) << SDMA_AQL_PKT_BARRIER_OR_COMPLETION_SIGNAL_LO_completion_signal_31_0_shift) + +/*define for COMPLETION_SIGNAL_HI word*/ +/*define for completion_signal_63_32 field*/ +#define SDMA_AQL_PKT_BARRIER_OR_COMPLETION_SIGNAL_HI_completion_signal_63_32_offset 15 +#define SDMA_AQL_PKT_BARRIER_OR_COMPLETION_SIGNAL_HI_completion_signal_63_32_mask 0xFFFFFFFF +#define SDMA_AQL_PKT_BARRIER_OR_COMPLETION_SIGNAL_HI_completion_signal_63_32_shift 0 +#define SDMA_AQL_PKT_BARRIER_OR_COMPLETION_SIGNAL_HI_COMPLETION_SIGNAL_63_32(x) (((x) & SDMA_AQL_PKT_BARRIER_OR_COMPLETION_SIGNAL_HI_completion_signal_63_32_mask) << SDMA_AQL_PKT_BARRIER_OR_COMPLETION_SIGNAL_HI_completion_signal_63_32_shift) + + +#endif /* __SDMA_PKT_OPEN_H_ */ diff --git a/drivers/gpu/drm/amd/amdgpu/vi.c b/drivers/gpu/drm/amd/amdgpu/vi.c index 4a785d6acfb9..b1132f5e84fc 100644 --- a/drivers/gpu/drm/amd/amdgpu/vi.c +++ b/drivers/gpu/drm/amd/amdgpu/vi.c @@ -464,15 +464,9 @@ static void vi_detect_hw_virtualization(struct amdgpu_device *adev) } static const struct amdgpu_allowed_register_entry tonga_allowed_read_registers[] = { - {mmGB_MACROTILE_MODE7, true}, }; static const struct amdgpu_allowed_register_entry cz_allowed_read_registers[] = { - {mmGB_TILE_MODE7, true}, - {mmGB_TILE_MODE12, true}, - {mmGB_TILE_MODE17, true}, - {mmGB_TILE_MODE23, true}, - {mmGB_MACROTILE_MODE7, true}, }; static const struct amdgpu_allowed_register_entry vi_allowed_read_registers[] = { @@ -751,6 +745,11 @@ static int vi_asic_reset(struct amdgpu_device *adev) return r; } +static u32 vi_get_config_memsize(struct amdgpu_device *adev) +{ + return RREG32(mmCONFIG_MEMSIZE); +} + static int vi_set_uvd_clock(struct amdgpu_device *adev, u32 clock, u32 cntl_reg, u32 status_reg) { @@ -790,6 +789,8 @@ static int vi_set_uvd_clocks(struct amdgpu_device *adev, u32 vclk, u32 dclk) return r; r = vi_set_uvd_clock(adev, dclk, ixCG_DCLK_CNTL, ixCG_DCLK_STATUS); + if (r) + return r; return 0; } @@ -900,8 +901,12 @@ static const struct amdgpu_asic_funcs vi_asic_funcs = .get_xclk = &vi_get_xclk, .set_uvd_clocks = &vi_set_uvd_clocks, .set_vce_clocks = &vi_set_vce_clocks, + .get_config_memsize = &vi_get_config_memsize, }; +#define CZ_REV_BRISTOL(rev) \ + ((rev >= 0xC8 && rev <= 0xCE) || (rev >= 0xE1 && rev <= 0xE6)) + static int vi_common_early_init(void *handle) { bool smc_enabled = false; @@ -1027,7 +1032,25 @@ static int vi_common_early_init(void *handle) adev->external_rev_id = adev->rev_id + 0x50; break; case CHIP_POLARIS12: - adev->cg_flags = AMD_CG_SUPPORT_UVD_MGCG; + adev->cg_flags = AMD_CG_SUPPORT_GFX_MGCG | + AMD_CG_SUPPORT_GFX_RLC_LS | + AMD_CG_SUPPORT_GFX_CP_LS | + AMD_CG_SUPPORT_GFX_CGCG | + AMD_CG_SUPPORT_GFX_CGLS | + AMD_CG_SUPPORT_GFX_3D_CGCG | + AMD_CG_SUPPORT_GFX_3D_CGLS | + AMD_CG_SUPPORT_SDMA_MGCG | + AMD_CG_SUPPORT_SDMA_LS | + AMD_CG_SUPPORT_BIF_MGCG | + AMD_CG_SUPPORT_BIF_LS | + AMD_CG_SUPPORT_HDP_MGCG | + AMD_CG_SUPPORT_HDP_LS | + AMD_CG_SUPPORT_ROM_MGCG | + AMD_CG_SUPPORT_MC_MGCG | + AMD_CG_SUPPORT_MC_LS | + AMD_CG_SUPPORT_DRM_LS | + AMD_CG_SUPPORT_UVD_MGCG | + AMD_CG_SUPPORT_VCE_MGCG; adev->pg_flags = 0; adev->external_rev_id = adev->rev_id + 0x64; break; @@ -1038,7 +1061,6 @@ static int vi_common_early_init(void *handle) AMD_CG_SUPPORT_GFX_RLC_LS | AMD_CG_SUPPORT_GFX_CP_LS | AMD_CG_SUPPORT_GFX_CGTS | - AMD_CG_SUPPORT_GFX_MGLS | AMD_CG_SUPPORT_GFX_CGTS_LS | AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_CGLS | @@ -1050,7 +1072,7 @@ static int vi_common_early_init(void *handle) AMD_CG_SUPPORT_VCE_MGCG; /* rev0 hardware requires workarounds to support PG */ adev->pg_flags = 0; - if (adev->rev_id != 0x00) { + if (adev->rev_id != 0x00 || CZ_REV_BRISTOL(adev->pdev->revision)) { adev->pg_flags |= AMD_PG_SUPPORT_GFX_SMG | AMD_PG_SUPPORT_GFX_PIPELINE | @@ -1067,7 +1089,6 @@ static int vi_common_early_init(void *handle) AMD_CG_SUPPORT_GFX_RLC_LS | AMD_CG_SUPPORT_GFX_CP_LS | AMD_CG_SUPPORT_GFX_CGTS | - AMD_CG_SUPPORT_GFX_MGLS | AMD_CG_SUPPORT_GFX_CGTS_LS | AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_CGLS | @@ -1090,8 +1111,8 @@ static int vi_common_early_init(void *handle) return -EINVAL; } - if (amdgpu_smc_load_fw && smc_enabled) - adev->firmware.smu_load = true; + /* vi use smc load by default */ + adev->firmware.load_type = amdgpu_ucode_get_load_type(adev, amdgpu_fw_load_type); amdgpu_get_pcie_info(adev); @@ -1391,27 +1412,30 @@ static int vi_common_set_clockgating_state(void *handle, { struct amdgpu_device *adev = (struct amdgpu_device *)handle; + if (amdgpu_sriov_vf(adev)) + return 0; + switch (adev->asic_type) { case CHIP_FIJI: vi_update_bif_medium_grain_light_sleep(adev, - state == AMD_CG_STATE_GATE ? true : false); + state == AMD_CG_STATE_GATE); vi_update_hdp_medium_grain_clock_gating(adev, - state == AMD_CG_STATE_GATE ? true : false); + state == AMD_CG_STATE_GATE); vi_update_hdp_light_sleep(adev, - state == AMD_CG_STATE_GATE ? true : false); + state == AMD_CG_STATE_GATE); vi_update_rom_medium_grain_clock_gating(adev, - state == AMD_CG_STATE_GATE ? true : false); + state == AMD_CG_STATE_GATE); break; case CHIP_CARRIZO: case CHIP_STONEY: vi_update_bif_medium_grain_light_sleep(adev, - state == AMD_CG_STATE_GATE ? true : false); + state == AMD_CG_STATE_GATE); vi_update_hdp_medium_grain_clock_gating(adev, - state == AMD_CG_STATE_GATE ? true : false); + state == AMD_CG_STATE_GATE); vi_update_hdp_light_sleep(adev, - state == AMD_CG_STATE_GATE ? true : false); + state == AMD_CG_STATE_GATE); vi_update_drm_light_sleep(adev, - state == AMD_CG_STATE_GATE ? true : false); + state == AMD_CG_STATE_GATE); break; case CHIP_TONGA: case CHIP_POLARIS10: @@ -1435,6 +1459,9 @@ static void vi_common_get_clockgating_state(void *handle, u32 *flags) struct amdgpu_device *adev = (struct amdgpu_device *)handle; int data; + if (amdgpu_sriov_vf(adev)) + *flags = 0; + /* AMD_CG_SUPPORT_BIF_LS */ data = RREG32_PCIE(ixPCIE_CNTL2); if (data & PCIE_CNTL2__SLV_MEM_LS_EN_MASK) diff --git a/drivers/gpu/drm/amd/amdgpu/vi.h b/drivers/gpu/drm/amd/amdgpu/vi.h index 719587b8b0cb..575d7aed5d32 100644 --- a/drivers/gpu/drm/amd/amdgpu/vi.h +++ b/drivers/gpu/drm/amd/amdgpu/vi.h @@ -28,116 +28,4 @@ void vi_srbm_select(struct amdgpu_device *adev, u32 me, u32 pipe, u32 queue, u32 vmid); int vi_set_ip_blocks(struct amdgpu_device *adev); -struct amdgpu_ce_ib_state -{ - uint32_t ce_ib_completion_status; - uint32_t ce_constegnine_count; - uint32_t ce_ibOffset_ib1; - uint32_t ce_ibOffset_ib2; -}; /* Total of 4 DWORD */ - -struct amdgpu_de_ib_state -{ - uint32_t ib_completion_status; - uint32_t de_constEngine_count; - uint32_t ib_offset_ib1; - uint32_t ib_offset_ib2; - uint32_t preamble_begin_ib1; - uint32_t preamble_begin_ib2; - uint32_t preamble_end_ib1; - uint32_t preamble_end_ib2; - uint32_t draw_indirect_baseLo; - uint32_t draw_indirect_baseHi; - uint32_t disp_indirect_baseLo; - uint32_t disp_indirect_baseHi; - uint32_t gds_backup_addrlo; - uint32_t gds_backup_addrhi; - uint32_t index_base_addrlo; - uint32_t index_base_addrhi; - uint32_t sample_cntl; -}; /* Total of 17 DWORD */ - -struct amdgpu_ce_ib_state_chained_ib -{ - /* section of non chained ib part */ - uint32_t ce_ib_completion_status; - uint32_t ce_constegnine_count; - uint32_t ce_ibOffset_ib1; - uint32_t ce_ibOffset_ib2; - - /* section of chained ib */ - uint32_t ce_chainib_addrlo_ib1; - uint32_t ce_chainib_addrlo_ib2; - uint32_t ce_chainib_addrhi_ib1; - uint32_t ce_chainib_addrhi_ib2; - uint32_t ce_chainib_size_ib1; - uint32_t ce_chainib_size_ib2; -}; /* total 10 DWORD */ - -struct amdgpu_de_ib_state_chained_ib -{ - /* section of non chained ib part */ - uint32_t ib_completion_status; - uint32_t de_constEngine_count; - uint32_t ib_offset_ib1; - uint32_t ib_offset_ib2; - - /* section of chained ib */ - uint32_t chain_ib_addrlo_ib1; - uint32_t chain_ib_addrlo_ib2; - uint32_t chain_ib_addrhi_ib1; - uint32_t chain_ib_addrhi_ib2; - uint32_t chain_ib_size_ib1; - uint32_t chain_ib_size_ib2; - - /* section of non chained ib part */ - uint32_t preamble_begin_ib1; - uint32_t preamble_begin_ib2; - uint32_t preamble_end_ib1; - uint32_t preamble_end_ib2; - - /* section of chained ib */ - uint32_t chain_ib_pream_addrlo_ib1; - uint32_t chain_ib_pream_addrlo_ib2; - uint32_t chain_ib_pream_addrhi_ib1; - uint32_t chain_ib_pream_addrhi_ib2; - - /* section of non chained ib part */ - uint32_t draw_indirect_baseLo; - uint32_t draw_indirect_baseHi; - uint32_t disp_indirect_baseLo; - uint32_t disp_indirect_baseHi; - uint32_t gds_backup_addrlo; - uint32_t gds_backup_addrhi; - uint32_t index_base_addrlo; - uint32_t index_base_addrhi; - uint32_t sample_cntl; -}; /* Total of 27 DWORD */ - -struct amdgpu_gfx_meta_data -{ - /* 4 DWORD, address must be 4KB aligned */ - struct amdgpu_ce_ib_state ce_payload; - uint32_t reserved1[60]; - /* 17 DWORD, address must be 64B aligned */ - struct amdgpu_de_ib_state de_payload; - /* PFP IB base address which get pre-empted */ - uint32_t DeIbBaseAddrLo; - uint32_t DeIbBaseAddrHi; - uint32_t reserved2[941]; -}; /* Total of 4K Bytes */ - -struct amdgpu_gfx_meta_data_chained_ib -{ - /* 10 DWORD, address must be 4KB aligned */ - struct amdgpu_ce_ib_state_chained_ib ce_payload; - uint32_t reserved1[54]; - /* 27 DWORD, address must be 64B aligned */ - struct amdgpu_de_ib_state_chained_ib de_payload; - /* PFP IB base address which get pre-empted */ - uint32_t DeIbBaseAddrLo; - uint32_t DeIbBaseAddrHi; - uint32_t reserved2[931]; -}; /* Total of 4K Bytes */ - #endif diff --git a/drivers/gpu/drm/amd/amdgpu/vid.h b/drivers/gpu/drm/amd/amdgpu/vid.h index 7a3863a45f0a..5f2ab9c1609a 100644 --- a/drivers/gpu/drm/amd/amdgpu/vid.h +++ b/drivers/gpu/drm/amd/amdgpu/vid.h @@ -195,6 +195,7 @@ * 1 - Stream * 2 - Bypass */ +#define INDIRECT_BUFFER_PRE_ENB(x) ((x) << 21) #define PACKET3_COPY_DATA 0x40 #define PACKET3_PFP_SYNC_ME 0x42 #define PACKET3_SURFACE_SYNC 0x43 @@ -361,7 +362,89 @@ #define PACKET3_WAIT_ON_DE_COUNTER_DIFF 0x88 #define PACKET3_SWITCH_BUFFER 0x8B #define PACKET3_SET_RESOURCES 0xA0 +/* 1. header + * 2. CONTROL + * 3. QUEUE_MASK_LO [31:0] + * 4. QUEUE_MASK_HI [31:0] + * 5. GWS_MASK_LO [31:0] + * 6. GWS_MASK_HI [31:0] + * 7. OAC_MASK [15:0] + * 8. GDS_HEAP_SIZE [16:11] | GDS_HEAP_BASE [5:0] + */ +# define PACKET3_SET_RESOURCES_VMID_MASK(x) ((x) << 0) +# define PACKET3_SET_RESOURCES_UNMAP_LATENTY(x) ((x) << 16) +# define PACKET3_SET_RESOURCES_QUEUE_TYPE(x) ((x) << 29) #define PACKET3_MAP_QUEUES 0xA2 +/* 1. header + * 2. CONTROL + * 3. CONTROL2 + * 4. MQD_ADDR_LO [31:0] + * 5. MQD_ADDR_HI [31:0] + * 6. WPTR_ADDR_LO [31:0] + * 7. WPTR_ADDR_HI [31:0] + */ +/* CONTROL */ +# define PACKET3_MAP_QUEUES_QUEUE_SEL(x) ((x) << 4) +# define PACKET3_MAP_QUEUES_VMID(x) ((x) << 8) +# define PACKET3_MAP_QUEUES_QUEUE_TYPE(x) ((x) << 21) +# define PACKET3_MAP_QUEUES_ALLOC_FORMAT(x) ((x) << 24) +# define PACKET3_MAP_QUEUES_ENGINE_SEL(x) ((x) << 26) +# define PACKET3_MAP_QUEUES_NUM_QUEUES(x) ((x) << 29) +/* CONTROL2 */ +# define PACKET3_MAP_QUEUES_CHECK_DISABLE(x) ((x) << 1) +# define PACKET3_MAP_QUEUES_DOORBELL_OFFSET(x) ((x) << 2) +# define PACKET3_MAP_QUEUES_QUEUE(x) ((x) << 26) +# define PACKET3_MAP_QUEUES_PIPE(x) ((x) << 29) +# define PACKET3_MAP_QUEUES_ME(x) ((x) << 31) +#define PACKET3_UNMAP_QUEUES 0xA3 +/* 1. header + * 2. CONTROL + * 3. CONTROL2 + * 4. CONTROL3 + * 5. CONTROL4 + * 6. CONTROL5 + */ +/* CONTROL */ +# define PACKET3_UNMAP_QUEUES_ACTION(x) ((x) << 0) + /* 0 - PREEMPT_QUEUES + * 1 - RESET_QUEUES + * 2 - DISABLE_PROCESS_QUEUES + * 3 - PREEMPT_QUEUES_NO_UNMAP + */ +# define PACKET3_UNMAP_QUEUES_QUEUE_SEL(x) ((x) << 4) +# define PACKET3_UNMAP_QUEUES_ENGINE_SEL(x) ((x) << 26) +# define PACKET3_UNMAP_QUEUES_NUM_QUEUES(x) ((x) << 29) +/* CONTROL2a */ +# define PACKET3_UNMAP_QUEUES_PASID(x) ((x) << 0) +/* CONTROL2b */ +# define PACKET3_UNMAP_QUEUES_DOORBELL_OFFSET0(x) ((x) << 2) +/* CONTROL3a */ +# define PACKET3_UNMAP_QUEUES_DOORBELL_OFFSET1(x) ((x) << 2) +/* CONTROL3b */ +# define PACKET3_UNMAP_QUEUES_RB_WPTR(x) ((x) << 0) +/* CONTROL4 */ +# define PACKET3_UNMAP_QUEUES_DOORBELL_OFFSET2(x) ((x) << 2) +/* CONTROL5 */ +# define PACKET3_UNMAP_QUEUES_DOORBELL_OFFSET3(x) ((x) << 2) +#define PACKET3_QUERY_STATUS 0xA4 +/* 1. header + * 2. CONTROL + * 3. CONTROL2 + * 4. ADDR_LO [31:0] + * 5. ADDR_HI [31:0] + * 6. DATA_LO [31:0] + * 7. DATA_HI [31:0] + */ +/* CONTROL */ +# define PACKET3_QUERY_STATUS_CONTEXT_ID(x) ((x) << 0) +# define PACKET3_QUERY_STATUS_INTERRUPT_SEL(x) ((x) << 28) +# define PACKET3_QUERY_STATUS_COMMAND(x) ((x) << 30) +/* CONTROL2a */ +# define PACKET3_QUERY_STATUS_PASID(x) ((x) << 0) +/* CONTROL2b */ +# define PACKET3_QUERY_STATUS_DOORBELL_OFFSET(x) ((x) << 2) +# define PACKET3_QUERY_STATUS_ENG_SEL(x) ((x) << 25) + #define VCE_CMD_NO_OP 0x00000000 #define VCE_CMD_END 0x00000001 |