diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2020-12-14 11:07:56 -0800 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2020-12-14 11:07:56 -0800 |
commit | 1d36dffa5d887715dacca0f717f4519b7be5e498 (patch) | |
tree | a68f7c00dbb3036a67806ed6c6b8cc61c3cff60d /drivers/gpu/drm/amd/amdgpu | |
parent | 2c85ebc57b3e1817b6ce1a6b703928e113a90442 (diff) | |
parent | b10733527bfd864605c33ab2e9a886eec317ec39 (diff) |
Merge tag 'drm-next-2020-12-11' of git://anongit.freedesktop.org/drm/drm
Pull drm updates from Dave Airlie:
"Not a huge amount of big things here, AMD has support for a few new HW
variants (vangogh, green sardine, dimgrey cavefish), Intel has some
more DG1 enablement. We have a few big reworks of the TTM layers and
interfaces, GEM and atomic internal API reworks cross tree. fbdev is
marked orphaned in here as well to reflect the current reality.
core:
- documentation updates
- deprecate DRM_FORMAT_MOD_NONE
- atomic crtc enable/disable rework
- GEM convert drivers to gem object functions
- remove SCATTER_LIST_MAX_SEGMENT
sched:
- avoid infinite waits
ttm:
- remove AGP support
- don't modify caching for swapout
- ttm pinning rework
- major TTM reworks
- new backend allocator
- multihop support
vram-helper:
- top down BO placement fix
- TTM changes
- GEM object support
displayport:
- DP 2.0 DPCD prep work
- DP MST extended DPCD caps
fbdev:
- mark as orphaned
amdgpu:
- Initial Vangogh support
- Green Sardine support
- Dimgrey Cavefish support
- SG display support for renoir
- SMU7 improvements
- gfx9+ modiifier support
- CI BACO fixes
radeon:
- expose voltage via hwmon on SUMO
amdkfd:
- fix unique id handling
i915:
- more DG1 enablement
- bigjoiner support
- integer scaling filter support
- async flip support
- ICL+ DSI command mode
- Improve display shutdown
- Display refactoring
- eLLC machine fbdev loading fix
- dma scatterlist fixes
- TGL hang fixes
- eLLC display buffer caching on SKL+
- MOCS PTE seeting for gen9+
msm:
- Shutdown hook
- GPU cooling device support
- DSI 7nm and 10nm phy/pll updates
- sm8150/sm2850 DPU support
- GEM locking re-work
- LLCC system cache support
aspeed:
- sysfs output config support
ast:
- LUT fix
- new display mode
gma500:
- remove 2d framebuffer accel
panfrost:
- move gpu reset to a worker
exynos:
- new HDMI mode support
mediatek:
- MT8167 support
- yaml bindings
- MIPI DSI phy code moved
etnaviv:
- new perf counter
- more lockdep annotation
hibmc:
- i2c DDC support
ingenic:
- pixel clock reset fix
- reserved memory support
- allow both DMA channels at once
- different pixel format support
- 30/24/8-bit palette modes
tilcdc:
- don't keep vblank irq enabled
vc4:
- new maintainer added
- DSI registration fix
virtio:
- blob resource support
- host visible and cross-device support
- uuid api support"
* tag 'drm-next-2020-12-11' of git://anongit.freedesktop.org/drm/drm: (1754 commits)
drm/amdgpu: Initialise drm_gem_object_funcs for imported BOs
drm/amdgpu: fix size calculation with stolen vga memory
drm/amdgpu: remove amdgpu_ttm_late_init and amdgpu_bo_late_init
drm/amdgpu: free the pre-OS console framebuffer after the first modeset
drm/amdgpu: enable runtime pm using BACO on CI dGPUs
drm/amdgpu/cik: enable BACO reset on Bonaire
drm/amd/pm: update smu10.h WORKLOAD_PPLIB setting for raven
drm/amd/pm: remove one unsupported smu function for vangogh
drm/amd/display: setup system context for APUs
drm/amd/display: add S/G support for Vangogh
drm/amdkfd: Fix leak in dmabuf import
drm/amdgpu: use AMDGPU_NUM_VMID when possible
drm/amdgpu: fix sdma instance fw version and feature version init
drm/amd/pm: update driver if version for dimgrey_cavefish
drm/amd/display: 3.2.115
drm/amd/display: [FW Promotion] Release 0.0.45
drm/amd/display: Revert DCN2.1 dram_clock_change_latency update
drm/amd/display: Enable gpu_vm_support for dcn3.01
drm/amd/display: Fixed the audio noise during mode switching with HDCP mode on
drm/amd/display: Add wm table for Renoir
...
Diffstat (limited to 'drivers/gpu/drm/amd/amdgpu')
162 files changed, 5522 insertions, 1963 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/Makefile b/drivers/gpu/drm/amd/amdgpu/Makefile index 39976c7b100c..6bf6cfaea3f1 100644 --- a/drivers/gpu/drm/amd/amdgpu/Makefile +++ b/drivers/gpu/drm/amd/amdgpu/Makefile @@ -55,7 +55,8 @@ amdgpu-y += amdgpu_device.o amdgpu_kms.o \ amdgpu_vf_error.o amdgpu_sched.o amdgpu_debugfs.o amdgpu_ids.o \ amdgpu_gmc.o amdgpu_mmhub.o amdgpu_xgmi.o amdgpu_csa.o amdgpu_ras.o amdgpu_vm_cpu.o \ amdgpu_vm_sdma.o amdgpu_discovery.o amdgpu_ras_eeprom.o amdgpu_nbio.o \ - amdgpu_umc.o smu_v11_0_i2c.o amdgpu_fru_eeprom.o amdgpu_rap.o + amdgpu_umc.o smu_v11_0_i2c.o amdgpu_fru_eeprom.o amdgpu_rap.o \ + amdgpu_fw_attestation.o amdgpu-$(CONFIG_PERF_EVENTS) += amdgpu_pmu.o @@ -69,7 +70,8 @@ amdgpu-$(CONFIG_DRM_AMDGPU_SI)+= si.o gmc_v6_0.o gfx_v6_0.o si_ih.o si_dma.o dce amdgpu-y += \ vi.o mxgpu_vi.o nbio_v6_1.o soc15.o emu_soc.o mxgpu_ai.o nbio_v7_0.o vega10_reg_init.o \ vega20_reg_init.o nbio_v7_4.o nbio_v2_3.o nv.o navi10_reg_init.o navi14_reg_init.o \ - arct_reg_init.o navi12_reg_init.o mxgpu_nv.o sienna_cichlid_reg_init.o + arct_reg_init.o navi12_reg_init.o mxgpu_nv.o sienna_cichlid_reg_init.o vangogh_reg_init.o \ + nbio_v7_2.o dimgrey_cavefish_reg_init.o # add DF block amdgpu-y += \ @@ -81,7 +83,7 @@ amdgpu-y += \ gmc_v7_0.o \ gmc_v8_0.o \ gfxhub_v1_0.o mmhub_v1_0.o gmc_v9_0.o gfxhub_v1_1.o mmhub_v9_4.o \ - gfxhub_v2_0.o mmhub_v2_0.o gmc_v10_0.o gfxhub_v2_1.o + gfxhub_v2_0.o mmhub_v2_0.o gmc_v10_0.o gfxhub_v2_1.o mmhub_v2_3.o # add UMC block amdgpu-y += \ @@ -165,6 +167,11 @@ amdgpu-y += \ athub_v2_0.o \ athub_v2_1.o +# add SMUIO block +amdgpu-y += \ + smuio_v9_0.o \ + smuio_v11_0.o + # add amdkfd interfaces amdgpu-y += amdgpu_amdkfd.o diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index 87f095dc385c..6e2953233231 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -106,6 +106,7 @@ #include "amdgpu_mmhub.h" #include "amdgpu_gfxhub.h" #include "amdgpu_df.h" +#include "amdgpu_smuio.h" #define MAX_GPU_INSTANCE 16 @@ -193,9 +194,9 @@ extern int sched_policy; extern bool debug_evictions; extern bool no_system_mem_limit; #else -static const int sched_policy = KFD_SCHED_POLICY_HWS; -static const bool debug_evictions; /* = false */ -static const bool no_system_mem_limit; +static const int __maybe_unused sched_policy = KFD_SCHED_POLICY_HWS; +static const bool __maybe_unused debug_evictions; /* = false */ +static const bool __maybe_unused no_system_mem_limit; #endif extern int amdgpu_tmz; @@ -623,6 +624,8 @@ struct amdgpu_asic_funcs { bool (*supports_baco)(struct amdgpu_device *adev); /* pre asic_init quirks */ void (*pre_asic_init)(struct amdgpu_device *adev); + /* enter/exit umd stable pstate */ + int (*update_umd_stable_pstate)(struct amdgpu_device *adev, bool enter); }; /* @@ -723,6 +726,45 @@ struct amd_powerplay { const struct amd_pm_funcs *pp_funcs; }; +/* polaris10 kickers */ +#define ASICID_IS_P20(did, rid) (((did == 0x67DF) && \ + ((rid == 0xE3) || \ + (rid == 0xE4) || \ + (rid == 0xE5) || \ + (rid == 0xE7) || \ + (rid == 0xEF))) || \ + ((did == 0x6FDF) && \ + ((rid == 0xE7) || \ + (rid == 0xEF) || \ + (rid == 0xFF)))) + +#define ASICID_IS_P30(did, rid) ((did == 0x67DF) && \ + ((rid == 0xE1) || \ + (rid == 0xF7))) + +/* polaris11 kickers */ +#define ASICID_IS_P21(did, rid) (((did == 0x67EF) && \ + ((rid == 0xE0) || \ + (rid == 0xE5))) || \ + ((did == 0x67FF) && \ + ((rid == 0xCF) || \ + (rid == 0xEF) || \ + (rid == 0xFF)))) + +#define ASICID_IS_P31(did, rid) ((did == 0x67EF) && \ + ((rid == 0xE2))) + +/* polaris12 kickers */ +#define ASICID_IS_P23(did, rid) (((did == 0x6987) && \ + ((rid == 0xC0) || \ + (rid == 0xC1) || \ + (rid == 0xC3) || \ + (rid == 0xC7))) || \ + ((did == 0x6981) && \ + ((rid == 0x00) || \ + (rid == 0x01) || \ + (rid == 0x10)))) + #define AMDGPU_RESET_MAGIC_NUM 64 #define AMDGPU_MAX_DF_PERFMONS 4 struct amdgpu_device { @@ -879,6 +921,9 @@ struct amdgpu_device { /* nbio */ struct amdgpu_nbio nbio; + /* smuio */ + struct amdgpu_smuio smuio; + /* mmhub */ struct amdgpu_mmhub mmhub; @@ -1165,6 +1210,8 @@ int emu_soc_asic_init(struct amdgpu_device *adev); #define amdgpu_asic_get_pcie_replay_count(adev) ((adev)->asic_funcs->get_pcie_replay_count((adev))) #define amdgpu_asic_supports_baco(adev) (adev)->asic_funcs->supports_baco((adev)) #define amdgpu_asic_pre_asic_init(adev) (adev)->asic_funcs->pre_asic_init((adev)) +#define amdgpu_asic_update_umd_stable_pstate(adev, enter) \ + ((adev)->asic_funcs->update_umd_stable_pstate ? (adev)->asic_funcs->update_umd_stable_pstate((adev), (enter)) : 0) #define amdgpu_inc_vram_lost(adev) atomic_inc(&((adev)->vram_lost_counter)); @@ -1264,9 +1311,11 @@ int amdgpu_acpi_pcie_notify_device_ready(struct amdgpu_device *adev); void amdgpu_acpi_get_backlight_caps(struct amdgpu_device *adev, struct amdgpu_dm_backlight_caps *caps); +bool amdgpu_acpi_is_s0ix_supported(void); #else static inline int amdgpu_acpi_init(struct amdgpu_device *adev) { return 0; } static inline void amdgpu_acpi_fini(struct amdgpu_device *adev) { } +static inline bool amdgpu_acpi_is_s0ix_supported(void) { return false; } #endif int amdgpu_cs_find_mapping(struct amdgpu_cs_parser *parser, @@ -1294,19 +1343,6 @@ bool amdgpu_device_load_pci_state(struct pci_dev *pdev); #include "amdgpu_object.h" -/* used by df_v3_6.c and amdgpu_pmu.c */ -#define AMDGPU_PMU_ATTR(_name, _object) \ -static ssize_t \ -_name##_show(struct device *dev, \ - struct device_attribute *attr, \ - char *page) \ -{ \ - BUILD_BUG_ON(sizeof(_object) >= PAGE_SIZE - 1); \ - return sprintf(page, _object "\n"); \ -} \ - \ -static struct device_attribute pmu_attr_##_name = __ATTR_RO(_name) - static inline bool amdgpu_is_tmz(struct amdgpu_device *adev) { return adev->gmc.tmz_enabled; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_acp.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_acp.c index d3e51d361179..b8655ff73a65 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_acp.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_acp.c @@ -36,17 +36,17 @@ #include "acp_gfx_if.h" -#define ACP_TILE_ON_MASK 0x03 -#define ACP_TILE_OFF_MASK 0x02 -#define ACP_TILE_ON_RETAIN_REG_MASK 0x1f -#define ACP_TILE_OFF_RETAIN_REG_MASK 0x20 +#define ACP_TILE_ON_MASK 0x03 +#define ACP_TILE_OFF_MASK 0x02 +#define ACP_TILE_ON_RETAIN_REG_MASK 0x1f +#define ACP_TILE_OFF_RETAIN_REG_MASK 0x20 -#define ACP_TILE_P1_MASK 0x3e -#define ACP_TILE_P2_MASK 0x3d -#define ACP_TILE_DSP0_MASK 0x3b -#define ACP_TILE_DSP1_MASK 0x37 +#define ACP_TILE_P1_MASK 0x3e +#define ACP_TILE_P2_MASK 0x3d +#define ACP_TILE_DSP0_MASK 0x3b +#define ACP_TILE_DSP1_MASK 0x37 -#define ACP_TILE_DSP2_MASK 0x2f +#define ACP_TILE_DSP2_MASK 0x2f #define ACP_DMA_REGS_END 0x146c0 #define ACP_I2S_PLAY_REGS_START 0x14840 @@ -75,8 +75,8 @@ #define mmACP_CONTROL 0x5131 #define mmACP_STATUS 0x5133 #define mmACP_SOFT_RESET 0x5134 -#define ACP_CONTROL__ClkEn_MASK 0x1 -#define ACP_SOFT_RESET__SoftResetAud_MASK 0x100 +#define ACP_CONTROL__ClkEn_MASK 0x1 +#define ACP_SOFT_RESET__SoftResetAud_MASK 0x100 #define ACP_SOFT_RESET__SoftResetAudDone_MASK 0x1000000 #define ACP_CLOCK_EN_TIME_OUT_VALUE 0x000000FF #define ACP_SOFT_RESET_DONE_TIME_OUT_VALUE 0x000000FF @@ -176,7 +176,7 @@ static struct device *get_mfd_cell_dev(const char *device_name, int r) /** * acp_hw_init - start and test ACP block * - * @adev: amdgpu_device pointer + * @handle: handle used to pass amdgpu_device pointer * */ static int acp_hw_init(void *handle) @@ -405,7 +405,7 @@ failure: /** * acp_hw_fini - stop the hardware block * - * @adev: amdgpu_device pointer + * @handle: handle used to pass amdgpu_device pointer * */ static int acp_hw_fini(void *handle) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c index 165b02e267b0..4f4fda53c08a 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c @@ -27,6 +27,7 @@ #include <linux/power_supply.h> #include <linux/pm_runtime.h> #include <acpi/video.h> +#include <acpi/actbl.h> #include <drm/drm_crtc_helper.h> #include "amdgpu.h" @@ -894,3 +895,16 @@ void amdgpu_acpi_fini(struct amdgpu_device *adev) unregister_acpi_notifier(&adev->acpi_nb); kfree(adev->atif); } + +/** + * amdgpu_acpi_is_s0ix_supported + * + * returns true if supported, false if not. + */ +bool amdgpu_acpi_is_s0ix_supported(void) +{ + if (acpi_gbl_FADT.flags & ACPI_FADT_LOW_POWER_S0) + return true; + + return false; +} diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c index 0544460653b9..db96d69eb45e 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c @@ -390,23 +390,17 @@ void amdgpu_amdkfd_get_local_mem_info(struct kgd_dev *kgd, struct kfd_local_mem_info *mem_info) { struct amdgpu_device *adev = (struct amdgpu_device *)kgd; - uint64_t address_mask = adev->dev->dma_mask ? ~*adev->dev->dma_mask : - ~((1ULL << 32) - 1); - resource_size_t aper_limit = adev->gmc.aper_base + adev->gmc.aper_size; memset(mem_info, 0, sizeof(*mem_info)); - if (!(adev->gmc.aper_base & address_mask || aper_limit & address_mask)) { - mem_info->local_mem_size_public = adev->gmc.visible_vram_size; - mem_info->local_mem_size_private = adev->gmc.real_vram_size - - adev->gmc.visible_vram_size; - } else { - mem_info->local_mem_size_public = 0; - mem_info->local_mem_size_private = adev->gmc.real_vram_size; - } + + mem_info->local_mem_size_public = adev->gmc.visible_vram_size; + mem_info->local_mem_size_private = adev->gmc.real_vram_size - + adev->gmc.visible_vram_size; + mem_info->vram_width = adev->gmc.vram_width; - pr_debug("Address base: %pap limit %pap public 0x%llx private 0x%llx\n", - &adev->gmc.aper_base, &aper_limit, + pr_debug("Address base: %pap public 0x%llx private 0x%llx\n", + &adev->gmc.aper_base, mem_info->local_mem_size_public, mem_info->local_mem_size_private); @@ -648,6 +642,13 @@ void amdgpu_amdkfd_set_compute_idle(struct kgd_dev *kgd, bool idle) { struct amdgpu_device *adev = (struct amdgpu_device *)kgd; + /* Temp workaround to fix the soft hang observed in certain compute + * applications if GFXOFF is enabled. + */ + if (adev->asic_type == CHIP_SIENNA_CICHLID) { + pr_debug("GFXOFF is %s\n", idle ? "enabled" : "disabled"); + amdgpu_gfx_off_ctrl(adev, idle); + } amdgpu_dpm_switch_power_profile(adev, PP_SMC_POWER_PROFILE_COMPUTE, !idle); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c index 1afa8f122e7d..604757a1e440 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c @@ -304,4 +304,5 @@ const struct kfd2kgd_calls arcturus_kfd2kgd = { kgd_gfx_v9_get_atc_vmid_pasid_mapping_info, .set_vm_context_page_table_base = kgd_gfx_v9_set_vm_context_page_table_base, + .get_cu_occupancy = kgd_gfx_v9_get_cu_occupancy }; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c index 43b18863a8b8..b43e68fc1378 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c @@ -37,6 +37,7 @@ #include "soc15.h" #include "soc15d.h" #include "gfx_v9_0.h" +#include "amdgpu_amdkfd_gfx_v9.h" enum hqd_dequeue_request_type { NO_ACTION = 0, @@ -799,7 +800,7 @@ static void get_wave_count(struct amdgpu_device *adev, int queue_idx, * * Reading registers referenced above involves programming GRBM appropriately */ -static void kgd_gfx_v9_get_cu_occupancy(struct kgd_dev *kgd, int pasid, +void kgd_gfx_v9_get_cu_occupancy(struct kgd_dev *kgd, int pasid, int *pasid_wave_cnt, int *max_waves_per_cu) { int qidx; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.h index fc8934b86d93..e64deba8900f 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.h @@ -63,3 +63,5 @@ bool kgd_gfx_v9_get_atc_vmid_pasid_mapping_info(struct kgd_dev *kgd, void kgd_gfx_v9_set_vm_context_page_table_base(struct kgd_dev *kgd, uint32_t vmid, uint64_t page_table_base); +void kgd_gfx_v9_get_cu_occupancy(struct kgd_dev *kgd, int pasid, + int *pasid_wave_cnt, int *max_waves_per_cu); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c index 5da487b64a66..7791d074bd32 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c @@ -239,8 +239,7 @@ static int amdgpu_amdkfd_remove_eviction_fence(struct amdgpu_bo *bo, if (!old) return 0; - new = kmalloc(offsetof(typeof(*new), shared[old->shared_max]), - GFP_KERNEL); + new = kmalloc(struct_size(new, shared, old->shared_max), GFP_KERNEL); if (!new) return -ENOMEM; @@ -1115,19 +1114,19 @@ void amdgpu_amdkfd_gpuvm_destroy_process_vm(struct kgd_dev *kgd, void *vm) void amdgpu_amdkfd_gpuvm_release_process_vm(struct kgd_dev *kgd, void *vm) { struct amdgpu_device *adev = get_amdgpu_device(kgd); - struct amdgpu_vm *avm = (struct amdgpu_vm *)vm; + struct amdgpu_vm *avm = (struct amdgpu_vm *)vm; if (WARN_ON(!kgd || !vm)) - return; + return; - pr_debug("Releasing process vm %p\n", vm); + pr_debug("Releasing process vm %p\n", vm); - /* The original pasid of amdgpu vm has already been - * released during making a amdgpu vm to a compute vm - * The current pasid is managed by kfd and will be - * released on kfd process destroy. Set amdgpu pasid - * to 0 to avoid duplicate release. - */ + /* The original pasid of amdgpu vm has already been + * released during making a amdgpu vm to a compute vm + * The current pasid is managed by kfd and will be + * released on kfd process destroy. Set amdgpu pasid + * to 0 to avoid duplicate release. + */ amdgpu_vm_release_compute(adev, avm); } @@ -1288,7 +1287,7 @@ int amdgpu_amdkfd_gpuvm_free_memory_of_gpu( struct ttm_validate_buffer *bo_list_entry; unsigned int mapped_to_gpu_memory; int ret; - bool is_imported = 0; + bool is_imported = false; mutex_lock(&mem->lock); mapped_to_gpu_memory = mem->mapped_to_gpu_memory; @@ -1479,7 +1478,7 @@ int amdgpu_amdkfd_gpuvm_map_memory_to_gpu( } } - if (!amdgpu_ttm_tt_get_usermm(bo->tbo.ttm) && !bo->pin_count) + if (!amdgpu_ttm_tt_get_usermm(bo->tbo.ttm) && !bo->tbo.pin_count) amdgpu_bo_fence(bo, &avm->process_info->eviction_fence->base, true); @@ -1558,7 +1557,8 @@ int amdgpu_amdkfd_gpuvm_unmap_memory_from_gpu( * required. */ if (mem->mapped_to_gpu_memory == 0 && - !amdgpu_ttm_tt_get_usermm(mem->bo->tbo.ttm) && !mem->bo->pin_count) + !amdgpu_ttm_tt_get_usermm(mem->bo->tbo.ttm) && + !mem->bo->tbo.pin_count) amdgpu_amdkfd_remove_eviction_fence(mem->bo, process_info->eviction_fence); @@ -1694,7 +1694,7 @@ int amdgpu_amdkfd_gpuvm_import_dmabuf(struct kgd_dev *kgd, INIT_LIST_HEAD(&(*mem)->bo_va_list); mutex_init(&(*mem)->lock); - + (*mem)->alloc_flags = ((bo->preferred_domains & AMDGPU_GEM_DOMAIN_VRAM) ? KFD_IOC_ALLOC_MEM_FLAGS_VRAM : KFD_IOC_ALLOC_MEM_FLAGS_GTT) @@ -2043,6 +2043,8 @@ int amdgpu_amdkfd_gpuvm_restore_process_bos(void *info, struct dma_fence **ef) int ret = 0, i; struct list_head duplicate_save; struct amdgpu_sync sync_obj; + unsigned long failed_size = 0; + unsigned long total_size = 0; INIT_LIST_HEAD(&duplicate_save); INIT_LIST_HEAD(&ctx.list); @@ -2099,10 +2101,18 @@ int amdgpu_amdkfd_gpuvm_restore_process_bos(void *info, struct dma_fence **ef) uint32_t domain = mem->domain; struct kfd_bo_va_list *bo_va_entry; + total_size += amdgpu_bo_size(bo); + ret = amdgpu_amdkfd_bo_validate(bo, domain, false); if (ret) { - pr_debug("Memory eviction: Validate BOs failed. Try again\n"); - goto validate_map_fail; + pr_debug("Memory eviction: Validate BOs failed\n"); + failed_size += amdgpu_bo_size(bo); + ret = amdgpu_amdkfd_bo_validate(bo, + AMDGPU_GEM_DOMAIN_GTT, false); + if (ret) { + pr_debug("Memory eviction: Try again\n"); + goto validate_map_fail; + } } ret = amdgpu_sync_fence(&sync_obj, bo->tbo.moving); if (ret) { @@ -2122,6 +2132,9 @@ int amdgpu_amdkfd_gpuvm_restore_process_bos(void *info, struct dma_fence **ef) } } + if (failed_size) + pr_debug("0x%lx/0x%lx in system\n", failed_size, total_size); + /* Update page directories */ ret = process_update_pds(process_info, &sync_obj); if (ret) { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.c index 469352e2d6ec..86add0f4ea4d 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.c @@ -1401,7 +1401,7 @@ static ATOM_VOLTAGE_OBJECT_V3 *amdgpu_atombios_lookup_voltage_object_v3(ATOM_VOL { u32 size = le16_to_cpu(v3->sHeader.usStructureSize); u32 offset = offsetof(ATOM_VOLTAGE_OBJECT_INFO_V3_1, asVoltageObj[0]); - u8 *start = (u8*)v3; + u8 *start = (u8 *)v3; while (offset < size) { ATOM_VOLTAGE_OBJECT_V3 *vo = (ATOM_VOLTAGE_OBJECT_V3 *)(start + offset); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.c index b4df6460e45a..306077884a67 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.c @@ -70,7 +70,7 @@ int amdgpu_atomfirmware_allocate_fb_scratch(struct amdgpu_device *adev) struct atom_context *ctx = adev->mode_info.atom_context; int index = get_index_into_master_table(atom_master_list_of_data_tables_v2_1, vram_usagebyfirmware); - struct vram_usagebyfirmware_v2_1 * firmware_usage; + struct vram_usagebyfirmware_v2_1 *firmware_usage; uint32_t start_addr, size; uint16_t data_offset; int usage_bytes = 0; @@ -149,6 +149,10 @@ static int convert_atom_mem_type_to_vram_type(struct amdgpu_device *adev, case LpDdr4MemType: vram_type = AMDGPU_VRAM_TYPE_DDR4; break; + case Ddr5MemType: + case LpDdr5MemType: + vram_type = AMDGPU_VRAM_TYPE_DDR5; + break; default: vram_type = AMDGPU_VRAM_TYPE_UNKNOWN; break; @@ -544,6 +548,7 @@ int amdgpu_mem_train_support(struct amdgpu_device *adev) case HW_REV(11, 0, 5): case HW_REV(11, 0, 7): case HW_REV(11, 0, 11): + case HW_REV(11, 0, 12): ret = 1; break; default: diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c index 78ac6dbe70d8..f1a050379190 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c @@ -352,17 +352,10 @@ static int amdgpu_cgs_get_firmware_info(struct cgs_device *cgs_device, break; case CHIP_POLARIS11: if (type == CGS_UCODE_ID_SMU) { - if (((adev->pdev->device == 0x67ef) && - ((adev->pdev->revision == 0xe0) || - (adev->pdev->revision == 0xe5))) || - ((adev->pdev->device == 0x67ff) && - ((adev->pdev->revision == 0xcf) || - (adev->pdev->revision == 0xef) || - (adev->pdev->revision == 0xff)))) { + if (ASICID_IS_P21(adev->pdev->device, adev->pdev->revision)) { info->is_kicker = true; strcpy(fw_name, "amdgpu/polaris11_k_smc.bin"); - } else if ((adev->pdev->device == 0x67ef) && - (adev->pdev->revision == 0xe2)) { + } else if (ASICID_IS_P31(adev->pdev->device, adev->pdev->revision)) { info->is_kicker = true; strcpy(fw_name, "amdgpu/polaris11_k2_smc.bin"); } else { @@ -374,21 +367,10 @@ static int amdgpu_cgs_get_firmware_info(struct cgs_device *cgs_device, break; case CHIP_POLARIS10: if (type == CGS_UCODE_ID_SMU) { - if (((adev->pdev->device == 0x67df) && - ((adev->pdev->revision == 0xe0) || - (adev->pdev->revision == 0xe3) || - (adev->pdev->revision == 0xe4) || - (adev->pdev->revision == 0xe5) || - (adev->pdev->revision == 0xe7) || - (adev->pdev->revision == 0xef))) || - ((adev->pdev->device == 0x6fdf) && - ((adev->pdev->revision == 0xef) || - (adev->pdev->revision == 0xff)))) { + if (ASICID_IS_P20(adev->pdev->device, adev->pdev->revision)) { info->is_kicker = true; strcpy(fw_name, "amdgpu/polaris10_k_smc.bin"); - } else if ((adev->pdev->device == 0x67df) && - ((adev->pdev->revision == 0xe1) || - (adev->pdev->revision == 0xf7))) { + } else if (ASICID_IS_P30(adev->pdev->device, adev->pdev->revision)) { info->is_kicker = true; strcpy(fw_name, "amdgpu/polaris10_k2_smc.bin"); } else { @@ -399,13 +381,7 @@ static int amdgpu_cgs_get_firmware_info(struct cgs_device *cgs_device, } break; case CHIP_POLARIS12: - if (((adev->pdev->device == 0x6987) && - ((adev->pdev->revision == 0xc0) || - (adev->pdev->revision == 0xc3))) || - ((adev->pdev->device == 0x6981) && - ((adev->pdev->revision == 0x00) || - (adev->pdev->revision == 0x01) || - (adev->pdev->revision == 0x10)))) { + if (ASICID_IS_P23(adev->pdev->device, adev->pdev->revision)) { info->is_kicker = true; strcpy(fw_name, "amdgpu/polaris12_k_smc.bin"); } else { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c index 12598a4b5c78..594a0108e90f 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c @@ -326,7 +326,7 @@ static void amdgpu_cs_get_threshold_for_moves(struct amdgpu_device *adev, increment_us = time_us - adev->mm_stats.last_update_us; adev->mm_stats.last_update_us = time_us; adev->mm_stats.accum_us = min(adev->mm_stats.accum_us + increment_us, - us_upper_bound); + us_upper_bound); /* This prevents the short period of low performance when the VRAM * usage is low and the driver is in debt or doesn't have enough @@ -404,13 +404,12 @@ static int amdgpu_cs_bo_validate(struct amdgpu_cs_parser *p, struct ttm_operation_ctx ctx = { .interruptible = true, .no_wait_gpu = false, - .resv = bo->tbo.base.resv, - .flags = 0 + .resv = bo->tbo.base.resv }; uint32_t domain; int r; - if (bo->pin_count) + if (bo->tbo.pin_count) return 0; /* Don't move this buffer if we have depleted our allowance @@ -677,6 +676,7 @@ static int amdgpu_cs_sync_rings(struct amdgpu_cs_parser *p) * cs_parser_fini() - clean parser states * @parser: parser structure holding parsing context. * @error: error number + * @backoff: indicator to backoff the reservation * * If error is set than unvalidate buffer, otherwise just free memory * used by parsing context. @@ -1461,7 +1461,7 @@ int amdgpu_cs_fence_to_handle_ioctl(struct drm_device *dev, void *data, dma_fence_put(fence); if (r) return r; - r = drm_syncobj_get_fd(syncobj, (int*)&info->out.handle); + r = drm_syncobj_get_fd(syncobj, (int *)&info->out.handle); drm_syncobj_put(syncobj); return r; @@ -1645,6 +1645,7 @@ err_free_fences: * @parser: command submission parser context * @addr: VM address * @bo: resulting BO of the mapping found + * @map: Placeholder to return found BO mapping * * Search the buffer objects in the command submission context for a certain * virtual memory address. Returns allocation structure when found, NULL diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_csa.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_csa.c index 08047bc4d588..da21e60bb827 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_csa.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_csa.c @@ -37,10 +37,9 @@ uint64_t amdgpu_csa_vaddr(struct amdgpu_device *adev) int amdgpu_allocate_static_csa(struct amdgpu_device *adev, struct amdgpu_bo **bo, u32 domain, uint32_t size) { - int r; void *ptr; - r = amdgpu_bo_create_kernel(adev, size, PAGE_SIZE, + amdgpu_bo_create_kernel(adev, size, PAGE_SIZE, domain, bo, NULL, &ptr); if (!*bo) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c index c80d8339f58c..0350205c4897 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c @@ -100,7 +100,7 @@ static int amdgpu_ctx_init_entity(struct amdgpu_ctx *ctx, u32 hw_ip, enum drm_sched_priority priority; int r; - entity = kcalloc(1, offsetof(typeof(*entity), fences[amdgpu_sched_jobs]), + entity = kzalloc(struct_size(entity, fences, amdgpu_sched_jobs), GFP_KERNEL); if (!entity) return -ENOMEM; @@ -450,7 +450,7 @@ int amdgpu_ctx_put(struct amdgpu_ctx *ctx) void amdgpu_ctx_add_fence(struct amdgpu_ctx *ctx, struct drm_sched_entity *entity, - struct dma_fence *fence, uint64_t* handle) + struct dma_fence *fence, uint64_t *handle) { struct amdgpu_ctx_entity *centity = to_amdgpu_ctx_entity(entity); uint64_t seq = centity->sequence; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c index 2d125b8b15ee..a6667a2ca0db 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c @@ -35,6 +35,7 @@ #include "amdgpu_dm_debugfs.h" #include "amdgpu_ras.h" #include "amdgpu_rap.h" +#include "amdgpu_fw_attestation.h" /** * amdgpu_debugfs_add_files - Add simple debugfs entries @@ -169,14 +170,14 @@ static void amdgpu_debugfs_autodump_init(struct amdgpu_device *adev) * * Bit 62: Indicates a GRBM bank switch is needed * Bit 61: Indicates a SRBM bank switch is needed (implies bit 62 is - * zero) + * zero) * Bits 24..33: The SE or ME selector if needed * Bits 34..43: The SH (or SA) or PIPE selector if needed * Bits 44..53: The INSTANCE (or CU/WGP) or QUEUE selector if needed * * Bit 23: Indicates that the PM power gating lock should be held - * This is necessary to read registers that might be - * unreliable during a power gating transistion. + * This is necessary to read registers that might be + * unreliable during a power gating transistion. * * The lower bits are the BYTE offset of the register to read. This * allows reading multiple registers in a single call and having @@ -299,7 +300,7 @@ end: return result; } -/** +/* * amdgpu_debugfs_regs_read - Callback for reading MMIO registers */ static ssize_t amdgpu_debugfs_regs_read(struct file *f, char __user *buf, @@ -308,7 +309,7 @@ static ssize_t amdgpu_debugfs_regs_read(struct file *f, char __user *buf, return amdgpu_debugfs_process_reg_op(true, f, buf, size, pos); } -/** +/* * amdgpu_debugfs_regs_write - Callback for writing MMIO registers */ static ssize_t amdgpu_debugfs_regs_write(struct file *f, const char __user *buf, @@ -864,7 +865,7 @@ static ssize_t amdgpu_debugfs_wave_read(struct file *f, char __user *buf, { struct amdgpu_device *adev = f->f_inode->i_private; int r, x; - ssize_t result=0; + ssize_t result = 0; uint32_t offset, se, sh, cu, wave, simd, data[32]; if (size & 3 || *pos & 3) @@ -1210,7 +1211,7 @@ static const char *debugfs_regs_names[] = { /** * amdgpu_debugfs_regs_init - Initialize debugfs entries that provide - * register access. + * register access. * * @adev: The device to attach the debugfs entries to */ @@ -1319,6 +1320,7 @@ static int amdgpu_debugfs_evict_gtt(struct seq_file *m, void *data) struct drm_info_node *node = (struct drm_info_node *)m->private; struct drm_device *dev = node->minor->dev; struct amdgpu_device *adev = drm_to_adev(dev); + struct ttm_resource_manager *man; int r; r = pm_runtime_get_sync(dev->dev); @@ -1327,7 +1329,9 @@ static int amdgpu_debugfs_evict_gtt(struct seq_file *m, void *data) return r; } - seq_printf(m, "(%d)\n", ttm_bo_evict_mm(&adev->mman.bdev, TTM_PL_TT)); + man = ttm_manager_type(&adev->mman.bdev, TTM_PL_TT); + r = ttm_resource_manager_evict_all(&adev->mman.bdev, man); + seq_printf(m, "(%d)\n", r); pm_runtime_mark_last_busy(dev->dev); pm_runtime_put_autosuspend(dev->dev); @@ -1335,11 +1339,41 @@ static int amdgpu_debugfs_evict_gtt(struct seq_file *m, void *data) return 0; } +static int amdgpu_debugfs_vm_info(struct seq_file *m, void *data) +{ + struct drm_info_node *node = (struct drm_info_node *)m->private; + struct drm_device *dev = node->minor->dev; + struct drm_file *file; + int r; + + r = mutex_lock_interruptible(&dev->filelist_mutex); + if (r) + return r; + + list_for_each_entry(file, &dev->filelist, lhead) { + struct amdgpu_fpriv *fpriv = file->driver_priv; + struct amdgpu_vm *vm = &fpriv->vm; + + seq_printf(m, "pid:%d\tProcess:%s ----------\n", + vm->task_info.pid, vm->task_info.process_name); + r = amdgpu_bo_reserve(vm->root.base.bo, true); + if (r) + break; + amdgpu_debugfs_vm_bo_info(vm, m); + amdgpu_bo_unreserve(vm->root.base.bo); + } + + mutex_unlock(&dev->filelist_mutex); + + return r; +} + static const struct drm_info_list amdgpu_debugfs_list[] = { {"amdgpu_vbios", amdgpu_debugfs_get_vbios_dump}, {"amdgpu_test_ib", &amdgpu_debugfs_test_ib}, {"amdgpu_evict_vram", &amdgpu_debugfs_evict_vram}, {"amdgpu_evict_gtt", &amdgpu_debugfs_evict_gtt}, + {"amdgpu_vm_info", &amdgpu_debugfs_vm_info}, }; static void amdgpu_ib_preempt_fences_swap(struct amdgpu_ring *ring, @@ -1635,6 +1669,8 @@ int amdgpu_debugfs_init(struct amdgpu_device *adev) amdgpu_rap_debugfs_init(adev); + amdgpu_fw_attestation_debugfs_init(adev); + return amdgpu_debugfs_add_files(adev, amdgpu_debugfs_list, ARRAY_SIZE(amdgpu_debugfs_list)); } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index 026789b466db..79dd85f71fab 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -80,6 +80,7 @@ MODULE_FIRMWARE("amdgpu/renoir_gpu_info.bin"); MODULE_FIRMWARE("amdgpu/navi10_gpu_info.bin"); MODULE_FIRMWARE("amdgpu/navi14_gpu_info.bin"); MODULE_FIRMWARE("amdgpu/navi12_gpu_info.bin"); +MODULE_FIRMWARE("amdgpu/vangogh_gpu_info.bin"); MODULE_FIRMWARE("amdgpu/green_sardine_gpu_info.bin"); #define AMDGPU_RESUME_MS 2000 @@ -115,6 +116,8 @@ const char *amdgpu_asic_name[] = { "NAVI12", "SIENNA_CICHLID", "NAVY_FLOUNDER", + "VANGOGH", + "DIMGREY_CAVEFISH", "LAST", }; @@ -582,6 +585,7 @@ void amdgpu_mm_wdoorbell64(struct amdgpu_device *adev, u32 index, u64 v) * @adev: amdgpu_device pointer * @pcie_index: mmio register offset * @pcie_data: mmio register offset + * @reg_addr: indirect register address to read from * * Returns the value of indirect register @reg_addr */ @@ -612,6 +616,7 @@ u32 amdgpu_device_indirect_rreg(struct amdgpu_device *adev, * @adev: amdgpu_device pointer * @pcie_index: mmio register offset * @pcie_data: mmio register offset + * @reg_addr: indirect register address to read from * * Returns the value of indirect register @reg_addr */ @@ -1373,13 +1378,6 @@ static int amdgpu_device_check_arguments(struct amdgpu_device *adev) amdgpu_gmc_tmz_set(adev); - if (amdgpu_num_kcq == -1) { - amdgpu_num_kcq = 8; - } else if (amdgpu_num_kcq > 8 || amdgpu_num_kcq < 0) { - amdgpu_num_kcq = 8; - dev_warn(adev->dev, "set kernel compute queue number to 8 due to invalid parameter provided by user\n"); - } - amdgpu_gmc_noretry_set(adev); return 0; @@ -1786,6 +1784,7 @@ static int amdgpu_device_parse_gpu_info_fw(struct amdgpu_device *adev) case CHIP_VEGA20: case CHIP_SIENNA_CICHLID: case CHIP_NAVY_FLOUNDER: + case CHIP_DIMGREY_CAVEFISH: default: return 0; case CHIP_VEGA10: @@ -1820,6 +1819,9 @@ static int amdgpu_device_parse_gpu_info_fw(struct amdgpu_device *adev) case CHIP_NAVI12: chip_name = "navi12"; break; + case CHIP_VANGOGH: + chip_name = "vangogh"; + break; } snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_gpu_info.bin", chip_name); @@ -1994,7 +1996,12 @@ static int amdgpu_device_ip_early_init(struct amdgpu_device *adev) case CHIP_NAVI12: case CHIP_SIENNA_CICHLID: case CHIP_NAVY_FLOUNDER: - adev->family = AMDGPU_FAMILY_NV; + case CHIP_DIMGREY_CAVEFISH: + case CHIP_VANGOGH: + if (adev->asic_type == CHIP_VANGOGH) + adev->family = AMDGPU_FAMILY_VGH; + else + adev->family = AMDGPU_FAMILY_NV; r = nv_set_ip_blocks(adev); if (r) @@ -2643,8 +2650,10 @@ static int amdgpu_device_ip_suspend_phase1(struct amdgpu_device *adev) { int i, r; - amdgpu_device_set_pg_state(adev, AMD_PG_STATE_UNGATE); - amdgpu_device_set_cg_state(adev, AMD_CG_STATE_UNGATE); + if (!amdgpu_acpi_is_s0ix_supported() || amdgpu_in_reset(adev)) { + amdgpu_device_set_pg_state(adev, AMD_PG_STATE_UNGATE); + amdgpu_device_set_cg_state(adev, AMD_CG_STATE_UNGATE); + } for (i = adev->num_ip_blocks - 1; i >= 0; i--) { if (!adev->ip_blocks[i].status.valid) @@ -2999,10 +3008,10 @@ bool amdgpu_device_asic_has_dc_support(enum amd_asic_type asic_type) case CHIP_NAVI14: case CHIP_NAVI12: case CHIP_RENOIR: -#endif -#if defined(CONFIG_DRM_AMD_DC_DCN3_0) case CHIP_SIENNA_CICHLID: case CHIP_NAVY_FLOUNDER: + case CHIP_DIMGREY_CAVEFISH: + case CHIP_VANGOGH: #endif return amdgpu_dc != 0; #endif @@ -3337,7 +3346,8 @@ int amdgpu_device_init(struct amdgpu_device *adev, /* if we have > 1 VGA cards, then disable the amdgpu VGA resources */ /* this will fail for cards that aren't VGA class devices, just * ignore it */ - vga_client_register(adev->pdev, adev, NULL, amdgpu_device_vga_set_decode); + if ((adev->pdev->class >> 8) == PCI_CLASS_DISPLAY_VGA) + vga_client_register(adev->pdev, adev, NULL, amdgpu_device_vga_set_decode); if (amdgpu_device_supports_boco(ddev)) boco = true; @@ -3596,7 +3606,8 @@ void amdgpu_device_fini(struct amdgpu_device *adev) vga_switcheroo_unregister_client(adev->pdev); if (amdgpu_device_supports_boco(adev_to_drm(adev))) vga_switcheroo_fini_domain_pm_ops(adev->dev); - vga_client_register(adev->pdev, NULL, NULL, NULL); + if ((adev->pdev->class >> 8) == PCI_CLASS_DISPLAY_VGA) + vga_client_register(adev->pdev, NULL, NULL, NULL); if (adev->rio_mem) pci_iounmap(adev->pdev, adev->rio_mem); adev->rio_mem = NULL; @@ -3699,8 +3710,10 @@ int amdgpu_device_suspend(struct drm_device *dev, bool fbcon) amdgpu_fence_driver_suspend(adev); - r = amdgpu_device_ip_suspend_phase2(adev); - + if (!amdgpu_acpi_is_s0ix_supported() || amdgpu_in_reset(adev)) + r = amdgpu_device_ip_suspend_phase2(adev); + else + amdgpu_gfx_state_change_set(adev, sGpuChangeState_D3Entry); /* evict remaining vram memory * This second call to evict vram is to evict the gart page table * using the CPU. @@ -3731,6 +3744,9 @@ int amdgpu_device_resume(struct drm_device *dev, bool fbcon) if (dev->switch_power_state == DRM_SWITCH_POWER_OFF) return 0; + if (amdgpu_acpi_is_s0ix_supported()) + amdgpu_gfx_state_change_set(adev, sGpuChangeState_D0Entry); + /* post card */ if (amdgpu_device_need_post(adev)) { r = amdgpu_device_asic_init(adev); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_df.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_df.h index 373cdebe0e2f..52488bb45112 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_df.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_df.h @@ -44,11 +44,11 @@ struct amdgpu_df_funcs { void (*enable_ecc_force_par_wr_rmw)(struct amdgpu_device *adev, bool enable); int (*pmc_start)(struct amdgpu_device *adev, uint64_t config, - int is_add); + int counter_idx, int is_add); int (*pmc_stop)(struct amdgpu_device *adev, uint64_t config, - int is_remove); + int counter_idx, int is_remove); void (*pmc_get_count)(struct amdgpu_device *adev, uint64_t config, - uint64_t *count); + int counter_idx, uint64_t *count); uint64_t (*get_fica)(struct amdgpu_device *adev, uint32_t ficaa_val); void (*set_fica)(struct amdgpu_device *adev, uint32_t ficaa_val, uint32_t ficadl_val, uint32_t ficadh_val); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c index bfb95143ba5e..b2dbcb4df020 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c @@ -128,6 +128,7 @@ static int hw_id_map[MAX_HWIP] = { [NBIF_HWIP] = NBIF_HWID, [THM_HWIP] = THM_HWID, [CLK_HWIP] = CLKA_HWID, + [UMC_HWIP] = UMC_HWID, }; static int amdgpu_discovery_read_binary(struct amdgpu_device *adev, uint8_t *binary) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c index 7cc7af2a6822..f764803c53a4 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c @@ -38,6 +38,7 @@ #include <drm/drm_edid.h> #include <drm/drm_gem_framebuffer_helper.h> #include <drm/drm_fb_helper.h> +#include <drm/drm_fourcc.h> #include <drm/drm_vblank.h> static void amdgpu_display_flip_callback(struct dma_fence *f, @@ -132,10 +133,7 @@ static void amdgpu_display_unpin_work_func(struct work_struct *__work) /* unpin of the old buffer */ r = amdgpu_bo_reserve(work->old_abo, true); if (likely(r == 0)) { - r = amdgpu_bo_unpin(work->old_abo); - if (unlikely(r != 0)) { - DRM_ERROR("failed to unpin buffer after flip\n"); - } + amdgpu_bo_unpin(work->old_abo); amdgpu_bo_unreserve(work->old_abo); } else DRM_ERROR("failed to reserve buffer after flip\n"); @@ -249,8 +247,7 @@ pflip_cleanup: } unpin: if (!adev->enable_virtual_display) - if (unlikely(amdgpu_bo_unpin(new_abo) != 0)) - DRM_ERROR("failed to unpin new abo in error path\n"); + amdgpu_bo_unpin(new_abo); unreserve: amdgpu_bo_unreserve(new_abo); @@ -444,10 +441,6 @@ void amdgpu_display_print_display_setup(struct drm_device *dev) drm_connector_list_iter_end(&iter); } -/** - * amdgpu_display_ddc_probe - * - */ bool amdgpu_display_ddc_probe(struct amdgpu_connector *amdgpu_connector, bool use_aux) { @@ -512,7 +505,7 @@ uint32_t amdgpu_display_supported_domains(struct amdgpu_device *adev, * to avoid hang caused by placement of scanout BO in GTT on certain * APUs. So force the BO placement to VRAM in case this architecture * will not allow USWC mappings. - * Also, don't allow GTT domain if the BO doens't have USWC falg set. + * Also, don't allow GTT domain if the BO doesn't have USWC flag set. */ if ((bo_flags & AMDGPU_GEM_CREATE_CPU_GTT_USWC) && amdgpu_bo_support_uswc(bo_flags) && @@ -528,6 +521,11 @@ uint32_t amdgpu_display_supported_domains(struct amdgpu_device *adev, (adev->apu_flags & AMD_APU_IS_PICASSO)) domain |= AMDGPU_GEM_DOMAIN_GTT; break; + case CHIP_RENOIR: + case CHIP_VANGOGH: + domain |= AMDGPU_GEM_DOMAIN_GTT; + break; + default: break; } @@ -537,20 +535,390 @@ uint32_t amdgpu_display_supported_domains(struct amdgpu_device *adev, return domain; } +static const struct drm_format_info dcc_formats[] = { + { .format = DRM_FORMAT_XRGB8888, .depth = 24, .num_planes = 2, + .cpp = { 4, 0, }, .block_w = {1, 1, 1}, .block_h = {1, 1, 1}, .hsub = 1, .vsub = 1, }, + { .format = DRM_FORMAT_XBGR8888, .depth = 24, .num_planes = 2, + .cpp = { 4, 0, }, .block_w = {1, 1, 1}, .block_h = {1, 1, 1}, .hsub = 1, .vsub = 1, }, + { .format = DRM_FORMAT_ARGB8888, .depth = 32, .num_planes = 2, + .cpp = { 4, 0, }, .block_w = {1, 1, 1}, .block_h = {1, 1, 1}, .hsub = 1, .vsub = 1, + .has_alpha = true, }, + { .format = DRM_FORMAT_ABGR8888, .depth = 32, .num_planes = 2, + .cpp = { 4, 0, }, .block_w = {1, 1, 1}, .block_h = {1, 1, 1}, .hsub = 1, .vsub = 1, + .has_alpha = true, }, + { .format = DRM_FORMAT_BGRA8888, .depth = 32, .num_planes = 2, + .cpp = { 4, 0, }, .block_w = {1, 1, 1}, .block_h = {1, 1, 1}, .hsub = 1, .vsub = 1, + .has_alpha = true, }, + { .format = DRM_FORMAT_XRGB2101010, .depth = 30, .num_planes = 2, + .cpp = { 4, 0, }, .block_w = {1, 1, 1}, .block_h = {1, 1, 1}, .hsub = 1, .vsub = 1, }, + { .format = DRM_FORMAT_XBGR2101010, .depth = 30, .num_planes = 2, + .cpp = { 4, 0, }, .block_w = {1, 1, 1}, .block_h = {1, 1, 1}, .hsub = 1, .vsub = 1, }, + { .format = DRM_FORMAT_ARGB2101010, .depth = 30, .num_planes = 2, + .cpp = { 4, 0, }, .block_w = {1, 1, 1}, .block_h = {1, 1, 1}, .hsub = 1, .vsub = 1, + .has_alpha = true, }, + { .format = DRM_FORMAT_ABGR2101010, .depth = 30, .num_planes = 2, + .cpp = { 4, 0, }, .block_w = {1, 1, 1}, .block_h = {1, 1, 1}, .hsub = 1, .vsub = 1, + .has_alpha = true, }, + { .format = DRM_FORMAT_RGB565, .depth = 16, .num_planes = 2, + .cpp = { 2, 0, }, .block_w = {1, 1, 1}, .block_h = {1, 1, 1}, .hsub = 1, .vsub = 1, }, +}; + +static const struct drm_format_info dcc_retile_formats[] = { + { .format = DRM_FORMAT_XRGB8888, .depth = 24, .num_planes = 3, + .cpp = { 4, 0, 0 }, .block_w = {1, 1, 1}, .block_h = {1, 1, 1}, .hsub = 1, .vsub = 1, }, + { .format = DRM_FORMAT_XBGR8888, .depth = 24, .num_planes = 3, + .cpp = { 4, 0, 0 }, .block_w = {1, 1, 1}, .block_h = {1, 1, 1}, .hsub = 1, .vsub = 1, }, + { .format = DRM_FORMAT_ARGB8888, .depth = 32, .num_planes = 3, + .cpp = { 4, 0, 0 }, .block_w = {1, 1, 1}, .block_h = {1, 1, 1}, .hsub = 1, .vsub = 1, + .has_alpha = true, }, + { .format = DRM_FORMAT_ABGR8888, .depth = 32, .num_planes = 3, + .cpp = { 4, 0, 0 }, .block_w = {1, 1, 1}, .block_h = {1, 1, 1}, .hsub = 1, .vsub = 1, + .has_alpha = true, }, + { .format = DRM_FORMAT_BGRA8888, .depth = 32, .num_planes = 3, + .cpp = { 4, 0, 0 }, .block_w = {1, 1, 1}, .block_h = {1, 1, 1}, .hsub = 1, .vsub = 1, + .has_alpha = true, }, + { .format = DRM_FORMAT_XRGB2101010, .depth = 30, .num_planes = 3, + .cpp = { 4, 0, 0 }, .block_w = {1, 1, 1}, .block_h = {1, 1, 1}, .hsub = 1, .vsub = 1, }, + { .format = DRM_FORMAT_XBGR2101010, .depth = 30, .num_planes = 3, + .cpp = { 4, 0, 0 }, .block_w = {1, 1, 1}, .block_h = {1, 1, 1}, .hsub = 1, .vsub = 1, }, + { .format = DRM_FORMAT_ARGB2101010, .depth = 30, .num_planes = 3, + .cpp = { 4, 0, 0 }, .block_w = {1, 1, 1}, .block_h = {1, 1, 1}, .hsub = 1, .vsub = 1, + .has_alpha = true, }, + { .format = DRM_FORMAT_ABGR2101010, .depth = 30, .num_planes = 3, + .cpp = { 4, 0, 0 }, .block_w = {1, 1, 1}, .block_h = {1, 1, 1}, .hsub = 1, .vsub = 1, + .has_alpha = true, }, + { .format = DRM_FORMAT_RGB565, .depth = 16, .num_planes = 3, + .cpp = { 2, 0, 0 }, .block_w = {1, 1, 1}, .block_h = {1, 1, 1}, .hsub = 1, .vsub = 1, }, +}; + +static const struct drm_format_info * +lookup_format_info(const struct drm_format_info formats[], + int num_formats, u32 format) +{ + int i; + + for (i = 0; i < num_formats; i++) { + if (formats[i].format == format) + return &formats[i]; + } + + return NULL; +} + +const struct drm_format_info * +amdgpu_lookup_format_info(u32 format, uint64_t modifier) +{ + if (!IS_AMD_FMT_MOD(modifier)) + return NULL; + + if (AMD_FMT_MOD_GET(DCC_RETILE, modifier)) + return lookup_format_info(dcc_retile_formats, + ARRAY_SIZE(dcc_retile_formats), + format); + + if (AMD_FMT_MOD_GET(DCC, modifier)) + return lookup_format_info(dcc_formats, ARRAY_SIZE(dcc_formats), + format); + + /* returning NULL will cause the default format structs to be used. */ + return NULL; +} + + +/* + * Tries to extract the renderable DCC offset from the opaque metadata attached + * to the buffer. + */ +static int +extract_render_dcc_offset(struct amdgpu_device *adev, + struct drm_gem_object *obj, + uint64_t *offset) +{ + struct amdgpu_bo *rbo; + int r = 0; + uint32_t metadata[10]; /* Something that fits a descriptor + header. */ + uint32_t size; + + rbo = gem_to_amdgpu_bo(obj); + r = amdgpu_bo_reserve(rbo, false); + + if (unlikely(r)) { + /* Don't show error message when returning -ERESTARTSYS */ + if (r != -ERESTARTSYS) + DRM_ERROR("Unable to reserve buffer: %d\n", r); + return r; + } + + r = amdgpu_bo_get_metadata(rbo, metadata, sizeof(metadata), &size, NULL); + amdgpu_bo_unreserve(rbo); + + if (r) + return r; + + /* + * The first word is the metadata version, and we need space for at least + * the version + pci vendor+device id + 8 words for a descriptor. + */ + if (size < 40 || metadata[0] != 1) + return -EINVAL; + + if (adev->family >= AMDGPU_FAMILY_NV) { + /* resource word 6/7 META_DATA_ADDRESS{_LO} */ + *offset = ((u64)metadata[9] << 16u) | + ((metadata[8] & 0xFF000000u) >> 16); + } else { + /* resource word 5/7 META_DATA_ADDRESS */ + *offset = ((u64)metadata[9] << 8u) | + ((u64)(metadata[7] & 0x1FE0000u) << 23); + } + + return 0; +} + +static int convert_tiling_flags_to_modifier(struct amdgpu_framebuffer *afb) +{ + struct amdgpu_device *adev = drm_to_adev(afb->base.dev); + uint64_t modifier = 0; + + if (!afb->tiling_flags || !AMDGPU_TILING_GET(afb->tiling_flags, SWIZZLE_MODE)) { + modifier = DRM_FORMAT_MOD_LINEAR; + } else { + int swizzle = AMDGPU_TILING_GET(afb->tiling_flags, SWIZZLE_MODE); + bool has_xor = swizzle >= 16; + int block_size_bits; + int version; + int pipe_xor_bits = 0; + int bank_xor_bits = 0; + int packers = 0; + int rb = 0; + int pipes = ilog2(adev->gfx.config.gb_addr_config_fields.num_pipes); + uint32_t dcc_offset = AMDGPU_TILING_GET(afb->tiling_flags, DCC_OFFSET_256B); + + switch (swizzle >> 2) { + case 0: /* 256B */ + block_size_bits = 8; + break; + case 1: /* 4KiB */ + case 5: /* 4KiB _X */ + block_size_bits = 12; + break; + case 2: /* 64KiB */ + case 4: /* 64 KiB _T */ + case 6: /* 64 KiB _X */ + block_size_bits = 16; + break; + default: + /* RESERVED or VAR */ + return -EINVAL; + } + + if (adev->asic_type >= CHIP_SIENNA_CICHLID) + version = AMD_FMT_MOD_TILE_VER_GFX10_RBPLUS; + else if (adev->family == AMDGPU_FAMILY_NV) + version = AMD_FMT_MOD_TILE_VER_GFX10; + else + version = AMD_FMT_MOD_TILE_VER_GFX9; + + switch (swizzle & 3) { + case 0: /* Z microtiling */ + return -EINVAL; + case 1: /* S microtiling */ + if (!has_xor) + version = AMD_FMT_MOD_TILE_VER_GFX9; + break; + case 2: + if (!has_xor && afb->base.format->cpp[0] != 4) + version = AMD_FMT_MOD_TILE_VER_GFX9; + break; + case 3: + break; + } + + if (has_xor) { + switch (version) { + case AMD_FMT_MOD_TILE_VER_GFX10_RBPLUS: + pipe_xor_bits = min(block_size_bits - 8, pipes); + packers = min(block_size_bits - 8 - pipe_xor_bits, + ilog2(adev->gfx.config.gb_addr_config_fields.num_pkrs)); + break; + case AMD_FMT_MOD_TILE_VER_GFX10: + pipe_xor_bits = min(block_size_bits - 8, pipes); + break; + case AMD_FMT_MOD_TILE_VER_GFX9: + rb = ilog2(adev->gfx.config.gb_addr_config_fields.num_se) + + ilog2(adev->gfx.config.gb_addr_config_fields.num_rb_per_se); + pipe_xor_bits = min(block_size_bits - 8, pipes + + ilog2(adev->gfx.config.gb_addr_config_fields.num_se)); + bank_xor_bits = min(block_size_bits - 8 - pipe_xor_bits, + ilog2(adev->gfx.config.gb_addr_config_fields.num_banks)); + break; + } + } + + modifier = AMD_FMT_MOD | + AMD_FMT_MOD_SET(TILE, AMDGPU_TILING_GET(afb->tiling_flags, SWIZZLE_MODE)) | + AMD_FMT_MOD_SET(TILE_VERSION, version) | + AMD_FMT_MOD_SET(PIPE_XOR_BITS, pipe_xor_bits) | + AMD_FMT_MOD_SET(BANK_XOR_BITS, bank_xor_bits) | + AMD_FMT_MOD_SET(PACKERS, packers); + + if (dcc_offset != 0) { + bool dcc_i64b = AMDGPU_TILING_GET(afb->tiling_flags, DCC_INDEPENDENT_64B) != 0; + bool dcc_i128b = version >= AMD_FMT_MOD_TILE_VER_GFX10_RBPLUS; + const struct drm_format_info *format_info; + u64 render_dcc_offset; + + /* Enable constant encode on RAVEN2 and later. */ + bool dcc_constant_encode = adev->asic_type > CHIP_RAVEN || + (adev->asic_type == CHIP_RAVEN && + adev->external_rev_id >= 0x81); + + int max_cblock_size = dcc_i64b ? AMD_FMT_MOD_DCC_BLOCK_64B : + dcc_i128b ? AMD_FMT_MOD_DCC_BLOCK_128B : + AMD_FMT_MOD_DCC_BLOCK_256B; + + modifier |= AMD_FMT_MOD_SET(DCC, 1) | + AMD_FMT_MOD_SET(DCC_CONSTANT_ENCODE, dcc_constant_encode) | + AMD_FMT_MOD_SET(DCC_INDEPENDENT_64B, dcc_i64b) | + AMD_FMT_MOD_SET(DCC_INDEPENDENT_128B, dcc_i128b) | + AMD_FMT_MOD_SET(DCC_MAX_COMPRESSED_BLOCK, max_cblock_size); + + afb->base.offsets[1] = dcc_offset * 256 + afb->base.offsets[0]; + afb->base.pitches[1] = + AMDGPU_TILING_GET(afb->tiling_flags, DCC_PITCH_MAX) + 1; + + /* + * If the userspace driver uses retiling the tiling flags do not contain + * info on the renderable DCC buffer. Luckily the opaque metadata contains + * the info so we can try to extract it. The kernel does not use this info + * but we should convert it to a modifier plane for getfb2, so the + * userspace driver that gets it doesn't have to juggle around another DCC + * plane internally. + */ + if (extract_render_dcc_offset(adev, afb->base.obj[0], + &render_dcc_offset) == 0 && + render_dcc_offset != 0 && + render_dcc_offset != afb->base.offsets[1] && + render_dcc_offset < UINT_MAX) { + uint32_t dcc_block_bits; /* of base surface data */ + + modifier |= AMD_FMT_MOD_SET(DCC_RETILE, 1); + afb->base.offsets[2] = render_dcc_offset; + + if (adev->family >= AMDGPU_FAMILY_NV) { + int extra_pipe = 0; + + if (adev->asic_type >= CHIP_SIENNA_CICHLID && + pipes == packers && pipes > 1) + extra_pipe = 1; + + dcc_block_bits = max(20, 16 + pipes + extra_pipe); + } else { + modifier |= AMD_FMT_MOD_SET(RB, rb) | + AMD_FMT_MOD_SET(PIPE, pipes); + dcc_block_bits = max(20, 18 + rb); + } + + dcc_block_bits -= ilog2(afb->base.format->cpp[0]); + afb->base.pitches[2] = ALIGN(afb->base.width, + 1u << ((dcc_block_bits + 1) / 2)); + } + format_info = amdgpu_lookup_format_info(afb->base.format->format, + modifier); + if (!format_info) + return -EINVAL; + + afb->base.format = format_info; + } + } + + afb->base.modifier = modifier; + afb->base.flags |= DRM_MODE_FB_MODIFIERS; + return 0; +} + +static int amdgpu_display_get_fb_info(const struct amdgpu_framebuffer *amdgpu_fb, + uint64_t *tiling_flags, bool *tmz_surface) +{ + struct amdgpu_bo *rbo; + int r; + + if (!amdgpu_fb) { + *tiling_flags = 0; + *tmz_surface = false; + return 0; + } + + rbo = gem_to_amdgpu_bo(amdgpu_fb->base.obj[0]); + r = amdgpu_bo_reserve(rbo, false); + + if (unlikely(r)) { + /* Don't show error message when returning -ERESTARTSYS */ + if (r != -ERESTARTSYS) + DRM_ERROR("Unable to reserve buffer: %d\n", r); + return r; + } + + if (tiling_flags) + amdgpu_bo_get_tiling_flags(rbo, tiling_flags); + + if (tmz_surface) + *tmz_surface = amdgpu_bo_encrypted(rbo); + + amdgpu_bo_unreserve(rbo); + + return r; +} + int amdgpu_display_framebuffer_init(struct drm_device *dev, struct amdgpu_framebuffer *rfb, const struct drm_mode_fb_cmd2 *mode_cmd, struct drm_gem_object *obj) { - int ret; + int ret, i; rfb->base.obj[0] = obj; drm_helper_mode_fill_fb_struct(dev, &rfb->base, mode_cmd); ret = drm_framebuffer_init(dev, &rfb->base, &amdgpu_fb_funcs); - if (ret) { - rfb->base.obj[0] = NULL; - return ret; + if (ret) + goto fail; + + /* + * This needs to happen before modifier conversion as that might change + * the number of planes. + */ + for (i = 1; i < rfb->base.format->num_planes; ++i) { + if (mode_cmd->handles[i] != mode_cmd->handles[0]) { + drm_dbg_kms(dev, "Plane 0 and %d have different BOs: %u vs. %u\n", + i, mode_cmd->handles[0], mode_cmd->handles[i]); + ret = -EINVAL; + goto fail; + } + } + + ret = amdgpu_display_get_fb_info(rfb, &rfb->tiling_flags, &rfb->tmz_surface); + if (ret) + goto fail; + + if (dev->mode_config.allow_fb_modifiers && + !(rfb->base.flags & DRM_MODE_FB_MODIFIERS)) { + ret = convert_tiling_flags_to_modifier(rfb); + if (ret) { + drm_dbg_kms(dev, "Failed to convert tiling flags 0x%llX to a modifier", + rfb->tiling_flags); + goto fail; + } + } + + for (i = 1; i < rfb->base.format->num_planes; ++i) { + rfb->base.obj[i] = rfb->base.obj[0]; + drm_gem_object_get(rfb->base.obj[i]); } + return 0; + +fail: + rfb->base.obj[0] = NULL; + return ret; } struct drm_framebuffer * @@ -564,14 +932,14 @@ amdgpu_display_user_framebuffer_create(struct drm_device *dev, obj = drm_gem_object_lookup(file_priv, mode_cmd->handles[0]); if (obj == NULL) { - dev_err(&dev->pdev->dev, "No GEM object associated to handle 0x%08X, " - "can't create framebuffer\n", mode_cmd->handles[0]); + drm_dbg_kms(dev, "No GEM object associated to handle 0x%08X, " + "can't create framebuffer\n", mode_cmd->handles[0]); return ERR_PTR(-ENOENT); } /* Handle is imported dma-buf, so cannot be migrated to VRAM for scanout */ if (obj->import_attach) { - DRM_DEBUG_KMS("Cannot create framebuffer from imported dma_buf\n"); + drm_dbg_kms(dev, "Cannot create framebuffer from imported dma_buf\n"); return ERR_PTR(-EINVAL); } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.h index 3620b24785e1..dc7b7d116549 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.h @@ -44,5 +44,7 @@ struct drm_framebuffer * amdgpu_display_user_framebuffer_create(struct drm_device *dev, struct drm_file *file_priv, const struct drm_mode_fb_cmd2 *mode_cmd); +const struct drm_format_info * +amdgpu_lookup_format_info(u32 format, uint64_t modifier); #endif diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c index 1b56dbc1f304..e42175e1acf1 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c @@ -42,42 +42,6 @@ #include <linux/pci-p2pdma.h> /** - * amdgpu_gem_prime_vmap - &dma_buf_ops.vmap implementation - * @obj: GEM BO - * - * Sets up an in-kernel virtual mapping of the BO's memory. - * - * Returns: - * The virtual address of the mapping or an error pointer. - */ -void *amdgpu_gem_prime_vmap(struct drm_gem_object *obj) -{ - struct amdgpu_bo *bo = gem_to_amdgpu_bo(obj); - int ret; - - ret = ttm_bo_kmap(&bo->tbo, 0, bo->tbo.num_pages, - &bo->dma_buf_vmap); - if (ret) - return ERR_PTR(ret); - - return bo->dma_buf_vmap.virtual; -} - -/** - * amdgpu_gem_prime_vunmap - &dma_buf_ops.vunmap implementation - * @obj: GEM BO - * @vaddr: Virtual address (unused) - * - * Tears down the in-kernel virtual mapping of the BO's memory. - */ -void amdgpu_gem_prime_vunmap(struct drm_gem_object *obj, void *vaddr) -{ - struct amdgpu_bo *bo = gem_to_amdgpu_bo(obj); - - ttm_bo_kunmap(&bo->dma_buf_vmap); -} - -/** * amdgpu_gem_prime_mmap - &drm_driver.gem_prime_mmap implementation * @obj: GEM BO * @vma: Virtual memory area @@ -281,7 +245,7 @@ static struct sg_table *amdgpu_dma_buf_map(struct dma_buf_attachment *attach, struct sg_table *sgt; long r; - if (!bo->pin_count) { + if (!bo->tbo.pin_count) { /* move buffer into GTT or VRAM */ struct ttm_operation_ctx ctx = { false, false }; unsigned domains = AMDGPU_GEM_DOMAIN_GTT; @@ -390,7 +354,8 @@ static int amdgpu_dma_buf_begin_cpu_access(struct dma_buf *dma_buf, if (unlikely(ret != 0)) return ret; - if (!bo->pin_count && (bo->allowed_domains & AMDGPU_GEM_DOMAIN_GTT)) { + if (!bo->tbo.pin_count && + (bo->allowed_domains & AMDGPU_GEM_DOMAIN_GTT)) { amdgpu_bo_placement_from_domain(bo, AMDGPU_GEM_DOMAIN_GTT); ret = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx); } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.h index 2c5c84a06bb9..39b5b9616fd8 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.h @@ -31,8 +31,6 @@ struct drm_gem_object *amdgpu_gem_prime_import(struct drm_device *dev, struct dma_buf *dma_buf); bool amdgpu_dmabuf_is_xgmi_accessible(struct amdgpu_device *adev, struct amdgpu_bo *bo); -void *amdgpu_gem_prime_vmap(struct drm_gem_object *obj); -void amdgpu_gem_prime_vunmap(struct drm_gem_object *obj, void *vaddr); int amdgpu_gem_prime_mmap(struct drm_gem_object *obj, struct vm_area_struct *vma); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c index 8e988f07f085..ebdab31f9de9 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c @@ -40,6 +40,7 @@ #include "amdgpu.h" #include "amdgpu_irq.h" #include "amdgpu_dma_buf.h" +#include "amdgpu_sched.h" #include "amdgpu_amdkfd.h" @@ -94,16 +95,16 @@ #define KMS_DRIVER_MINOR 40 #define KMS_DRIVER_PATCHLEVEL 0 -int amdgpu_vram_limit = 0; -int amdgpu_vis_vram_limit = 0; +int amdgpu_vram_limit; +int amdgpu_vis_vram_limit; int amdgpu_gart_size = -1; /* auto */ int amdgpu_gtt_size = -1; /* auto */ int amdgpu_moverate = -1; /* auto */ -int amdgpu_benchmarking = 0; -int amdgpu_testing = 0; +int amdgpu_benchmarking; +int amdgpu_testing; int amdgpu_audio = -1; -int amdgpu_disp_priority = 0; -int amdgpu_hw_i2c = 0; +int amdgpu_disp_priority; +int amdgpu_hw_i2c; int amdgpu_pcie_gen2 = -1; int amdgpu_msi = -1; char amdgpu_lockup_timeout[AMDGPU_MAX_TIMEOUT_PARAM_LENGTH]; @@ -113,19 +114,19 @@ int amdgpu_aspm = -1; int amdgpu_runtime_pm = -1; uint amdgpu_ip_block_mask = 0xffffffff; int amdgpu_bapm = -1; -int amdgpu_deep_color = 0; +int amdgpu_deep_color; int amdgpu_vm_size = -1; int amdgpu_vm_fragment_size = -1; int amdgpu_vm_block_size = -1; -int amdgpu_vm_fault_stop = 0; -int amdgpu_vm_debug = 0; +int amdgpu_vm_fault_stop; +int amdgpu_vm_debug; int amdgpu_vm_update_mode = -1; -int amdgpu_exp_hw_support = 0; +int amdgpu_exp_hw_support; int amdgpu_dc = -1; int amdgpu_sched_jobs = 32; int amdgpu_sched_hw_submission = 2; -uint amdgpu_pcie_gen_cap = 0; -uint amdgpu_pcie_lane_cap = 0; +uint amdgpu_pcie_gen_cap; +uint amdgpu_pcie_lane_cap; uint amdgpu_cg_mask = 0xffffffff; uint amdgpu_pg_mask = 0xffffffff; uint amdgpu_sdma_phase_quantum = 32; @@ -133,23 +134,31 @@ char *amdgpu_disable_cu = NULL; char *amdgpu_virtual_display = NULL; /* OverDrive(bit 14) disabled by default*/ uint amdgpu_pp_feature_mask = 0xffffbfff; -uint amdgpu_force_long_training = 0; -int amdgpu_job_hang_limit = 0; +uint amdgpu_force_long_training; +int amdgpu_job_hang_limit; int amdgpu_lbpw = -1; int amdgpu_compute_multipipe = -1; int amdgpu_gpu_recovery = -1; /* auto */ -int amdgpu_emu_mode = 0; -uint amdgpu_smu_memory_pool_size = 0; -/* FBC (bit 0) disabled by default*/ -uint amdgpu_dc_feature_mask = 0; -uint amdgpu_dc_debug_mask = 0; +int amdgpu_emu_mode; +uint amdgpu_smu_memory_pool_size; +/* + * FBC (bit 0) disabled by default + * MULTI_MON_PP_MCLK_SWITCH (bit 1) enabled by default + * - With this, for multiple monitors in sync(e.g. with the same model), + * mclk switching will be allowed. And the mclk will be not foced to the + * highest. That helps saving some idle power. + * DISABLE_FRACTIONAL_PWM (bit 2) disabled by default + * PSR (bit 3) disabled by default + */ +uint amdgpu_dc_feature_mask = 2; +uint amdgpu_dc_debug_mask; int amdgpu_async_gfx_ring = 1; -int amdgpu_mcbp = 0; +int amdgpu_mcbp; int amdgpu_discovery = -1; -int amdgpu_mes = 0; +int amdgpu_mes; int amdgpu_noretry = -1; int amdgpu_force_asic_type = -1; -int amdgpu_tmz = 0; +int amdgpu_tmz; int amdgpu_reset_method = -1; /* auto */ int amdgpu_num_kcq = -1; @@ -271,7 +280,7 @@ module_param_string(lockup_timeout, amdgpu_lockup_timeout, sizeof(amdgpu_lockup_ /** * DOC: dpm (int) * Override for dynamic power management setting - * (0 = disable, 1 = enable, 2 = enable sw smu driver for vega20) + * (0 = disable, 1 = enable) * The default is -1 (auto). */ MODULE_PARM_DESC(dpm, "DPM support (1 = enable, 0 = disable, -1 = auto)"); @@ -296,7 +305,7 @@ module_param_named(aspm, amdgpu_aspm, int, 0444); * Override for runtime power management control for dGPUs in PX/HG laptops. The amdgpu driver can dynamically power down * the dGPU on PX/HG laptops when it is idle. The default is -1 (auto enable). Setting the value to 0 disables this functionality. */ -MODULE_PARM_DESC(runpm, "PX runtime pm (1 = force enable, 0 = disable, -1 = PX only default)"); +MODULE_PARM_DESC(runpm, "PX runtime pm (2 = force enable with BAMACO, 1 = force enable with BACO, 0 = disable, -1 = PX only default)"); module_param_named(runpm, amdgpu_runtime_pm, int, 0444); /** @@ -764,7 +773,7 @@ module_param_named(dcdebugmask, amdgpu_dc_debug_mask, uint, 0444); * Defaults to 0, or disabled. Userspace can still override this level later * after boot. */ -uint amdgpu_dm_abm_level = 0; +uint amdgpu_dm_abm_level; MODULE_PARM_DESC(abmlevel, "ABM level (0 = off (default), 1-4 = backlight reduction level) "); module_param_named(abmlevel, amdgpu_dm_abm_level, uint, 0444); @@ -782,7 +791,7 @@ module_param_named(tmz, amdgpu_tmz, int, 0444); * DOC: reset_method (int) * GPU reset method (-1 = auto (default), 0 = legacy, 1 = mode0, 2 = mode1, 3 = mode2, 4 = baco) */ -MODULE_PARM_DESC(reset_method, "GPU reset method (-1 = auto (default), 0 = legacy, 1 = mode0, 2 = mode1, 3 = mode2, 4 = baco)"); +MODULE_PARM_DESC(reset_method, "GPU reset method (-1 = auto (default), 0 = legacy, 1 = mode0, 2 = mode1, 3 = mode2, 4 = baco/bamaco)"); module_param_named(reset_method, amdgpu_reset_method, int, 0444); /** @@ -1089,12 +1098,27 @@ static const struct pci_device_id pciidlist[] = { {0x1002, 0x73AE, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_SIENNA_CICHLID}, {0x1002, 0x73BF, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_SIENNA_CICHLID}, + /* Van Gogh */ + {0x1002, 0x163F, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VANGOGH|AMD_IS_APU}, + + /* Navy_Flounder */ + {0x1002, 0x73C0, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_NAVY_FLOUNDER}, + {0x1002, 0x73C1, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_NAVY_FLOUNDER}, + {0x1002, 0x73C3, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_NAVY_FLOUNDER}, + {0x1002, 0x73DF, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_NAVY_FLOUNDER}, + + /* DIMGREY_CAVEFISH */ + {0x1002, 0x73E0, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_DIMGREY_CAVEFISH}, + {0x1002, 0x73E1, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_DIMGREY_CAVEFISH}, + {0x1002, 0x73E2, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_DIMGREY_CAVEFISH}, + {0x1002, 0x73FF, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_DIMGREY_CAVEFISH}, + {0, 0, 0} }; MODULE_DEVICE_TABLE(pci, pciidlist); -static struct drm_driver kms_driver; +static const struct drm_driver amdgpu_kms_driver; static int amdgpu_pci_probe(struct pci_dev *pdev, const struct pci_device_id *ent) @@ -1165,7 +1189,7 @@ static int amdgpu_pci_probe(struct pci_dev *pdev, if (ret) return ret; - adev = devm_drm_dev_alloc(&pdev->dev, &kms_driver, typeof(*adev), ddev); + adev = devm_drm_dev_alloc(&pdev->dev, &amdgpu_kms_driver, typeof(*adev), ddev); if (IS_ERR(adev)) return PTR_ERR(adev); @@ -1495,7 +1519,7 @@ static const struct file_operations amdgpu_driver_kms_fops = { int amdgpu_file_to_fpriv(struct file *filp, struct amdgpu_fpriv **fpriv) { - struct drm_file *file; + struct drm_file *file; if (!filp) return -EINVAL; @@ -1509,7 +1533,29 @@ int amdgpu_file_to_fpriv(struct file *filp, struct amdgpu_fpriv **fpriv) return 0; } -static struct drm_driver kms_driver = { +int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file *filp); + +const struct drm_ioctl_desc amdgpu_ioctls_kms[] = { + DRM_IOCTL_DEF_DRV(AMDGPU_GEM_CREATE, amdgpu_gem_create_ioctl, DRM_AUTH|DRM_RENDER_ALLOW), + DRM_IOCTL_DEF_DRV(AMDGPU_CTX, amdgpu_ctx_ioctl, DRM_AUTH|DRM_RENDER_ALLOW), + DRM_IOCTL_DEF_DRV(AMDGPU_VM, amdgpu_vm_ioctl, DRM_AUTH|DRM_RENDER_ALLOW), + DRM_IOCTL_DEF_DRV(AMDGPU_SCHED, amdgpu_sched_ioctl, DRM_MASTER), + DRM_IOCTL_DEF_DRV(AMDGPU_BO_LIST, amdgpu_bo_list_ioctl, DRM_AUTH|DRM_RENDER_ALLOW), + DRM_IOCTL_DEF_DRV(AMDGPU_FENCE_TO_HANDLE, amdgpu_cs_fence_to_handle_ioctl, DRM_AUTH|DRM_RENDER_ALLOW), + /* KMS */ + DRM_IOCTL_DEF_DRV(AMDGPU_GEM_MMAP, amdgpu_gem_mmap_ioctl, DRM_AUTH|DRM_RENDER_ALLOW), + DRM_IOCTL_DEF_DRV(AMDGPU_GEM_WAIT_IDLE, amdgpu_gem_wait_idle_ioctl, DRM_AUTH|DRM_RENDER_ALLOW), + DRM_IOCTL_DEF_DRV(AMDGPU_CS, amdgpu_cs_ioctl, DRM_AUTH|DRM_RENDER_ALLOW), + DRM_IOCTL_DEF_DRV(AMDGPU_INFO, amdgpu_info_ioctl, DRM_AUTH|DRM_RENDER_ALLOW), + DRM_IOCTL_DEF_DRV(AMDGPU_WAIT_CS, amdgpu_cs_wait_ioctl, DRM_AUTH|DRM_RENDER_ALLOW), + DRM_IOCTL_DEF_DRV(AMDGPU_WAIT_FENCES, amdgpu_cs_wait_fences_ioctl, DRM_AUTH|DRM_RENDER_ALLOW), + DRM_IOCTL_DEF_DRV(AMDGPU_GEM_METADATA, amdgpu_gem_metadata_ioctl, DRM_AUTH|DRM_RENDER_ALLOW), + DRM_IOCTL_DEF_DRV(AMDGPU_GEM_VA, amdgpu_gem_va_ioctl, DRM_AUTH|DRM_RENDER_ALLOW), + DRM_IOCTL_DEF_DRV(AMDGPU_GEM_OP, amdgpu_gem_op_ioctl, DRM_AUTH|DRM_RENDER_ALLOW), + DRM_IOCTL_DEF_DRV(AMDGPU_GEM_USERPTR, amdgpu_gem_userptr_ioctl, DRM_AUTH|DRM_RENDER_ALLOW), +}; + +static const struct drm_driver amdgpu_kms_driver = { .driver_features = DRIVER_ATOMIC | DRIVER_GEM | @@ -1520,19 +1566,14 @@ static struct drm_driver kms_driver = { .lastclose = amdgpu_driver_lastclose_kms, .irq_handler = amdgpu_irq_handler, .ioctls = amdgpu_ioctls_kms, - .gem_free_object_unlocked = amdgpu_gem_object_free, - .gem_open_object = amdgpu_gem_object_open, - .gem_close_object = amdgpu_gem_object_close, + .num_ioctls = ARRAY_SIZE(amdgpu_ioctls_kms), .dumb_create = amdgpu_mode_dumb_create, .dumb_map_offset = amdgpu_mode_dumb_mmap, .fops = &amdgpu_driver_kms_fops, .prime_handle_to_fd = drm_gem_prime_handle_to_fd, .prime_fd_to_handle = drm_gem_prime_fd_to_handle, - .gem_prime_export = amdgpu_gem_prime_export, .gem_prime_import = amdgpu_gem_prime_import, - .gem_prime_vmap = amdgpu_gem_prime_vmap, - .gem_prime_vunmap = amdgpu_gem_prime_vunmap, .gem_prime_mmap = amdgpu_gem_prime_mmap, .name = DRIVER_NAME, @@ -1578,7 +1619,6 @@ static int __init amdgpu_init(void) goto error_fence; DRM_INFO("amdgpu kernel modesetting enabled.\n"); - kms_driver.num_ioctls = amdgpu_max_kms_ioctl; amdgpu_register_atpx_handler(); /* Ignore KFD init failures. Normal when CONFIG_HSA_AMD is not set. */ diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c index e2c2eb45a793..0bf7d36c6686 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c @@ -207,6 +207,7 @@ static int amdgpufb_create(struct drm_fb_helper *helper, int ret; unsigned long tmp; + memset(&mode_cmd, 0, sizeof(mode_cmd)); mode_cmd.width = sizes->surface_width; mode_cmd.height = sizes->surface_height; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c index fe2d495d08ab..d56f4023ebb3 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c @@ -130,6 +130,7 @@ static u32 amdgpu_fence_read(struct amdgpu_ring *ring) * * @ring: ring the fence is associated with * @f: resulting fence object + * @flags: flags to pass into the subordinate .emit_fence() call * * Emits a fence command on the requested ring (all asics). * Returns 0 on success, -ENOMEM on failure. @@ -187,6 +188,7 @@ int amdgpu_fence_emit(struct amdgpu_ring *ring, struct dma_fence **f, * * @ring: ring the fence is associated with * @s: resulting sequence number + * @timeout: the timeout for waiting in usecs * * Emits a fence command on the requested ring (all asics). * Used For polling fence. @@ -294,7 +296,7 @@ bool amdgpu_fence_process(struct amdgpu_ring *ring) /** * amdgpu_fence_fallback - fallback for hardware interrupts * - * @work: delayed work item + * @t: timer context used to obtain the pointer to ring structure * * Checks for fence activity. */ @@ -310,7 +312,6 @@ static void amdgpu_fence_fallback(struct timer_list *t) /** * amdgpu_fence_wait_empty - wait for all fences to signal * - * @adev: amdgpu device pointer * @ring: ring index the fence is associated with * * Wait for all fences on the requested ring to signal (all asics). @@ -639,7 +640,7 @@ static const char *amdgpu_fence_get_timeline_name(struct dma_fence *f) /** * amdgpu_fence_enable_signaling - enable signalling on fence - * @fence: fence + * @f: fence * * This function is called with fence_queue lock held, and adds a callback * to fence_queue that checks if this fence is signaled, and if so it @@ -675,7 +676,7 @@ static void amdgpu_fence_free(struct rcu_head *rcu) /** * amdgpu_fence_release - callback that fence can be freed * - * @fence: fence + * @f: fence * * This function is called when the reference count becomes zero. * It just RCU schedules freeing up the fence. @@ -740,7 +741,7 @@ static int amdgpu_debugfs_fence_info(struct seq_file *m, void *data) return 0; } -/** +/* * amdgpu_debugfs_gpu_recover - manually trigger a gpu reset & recover * * Manually trigger a gpu reset at the next fence wait. diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fw_attestation.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_fw_attestation.c new file mode 100644 index 000000000000..7c6e02e35573 --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fw_attestation.c @@ -0,0 +1,141 @@ +/* + * Copyright 2020 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#include <linux/debugfs.h> +#include <linux/firmware.h> +#include <linux/dma-mapping.h> + +#include "amdgpu.h" +#include "amdgpu_fw_attestation.h" +#include "amdgpu_psp.h" +#include "amdgpu_ucode.h" +#include "soc15_common.h" + +#define FW_ATTESTATION_DB_COOKIE 0x143b6a37 +#define FW_ATTESTATION_RECORD_VALID 1 +#define FW_ATTESTATION_MAX_SIZE 4096 + +typedef struct FW_ATT_DB_HEADER +{ + uint32_t AttDbVersion; /* version of the fwar feature */ + uint32_t AttDbCookie; /* cookie as an extra check for corrupt data */ +} FW_ATT_DB_HEADER; + +typedef struct FW_ATT_RECORD +{ + uint16_t AttFwIdV1; /* Legacy FW Type field */ + uint16_t AttFwIdV2; /* V2 FW ID field */ + uint32_t AttFWVersion; /* FW Version */ + uint16_t AttFWActiveFunctionID; /* The VF ID (only in VF Attestation Table) */ + uint16_t AttSource; /* FW source indicator */ + uint16_t RecordValid; /* Indicates whether the record is a valid entry */ + uint8_t AttFwTaId; /* Ta ID (only in TA Attestation Table) */ + uint8_t Reserved; +} FW_ATT_RECORD; + +static ssize_t amdgpu_fw_attestation_debugfs_read(struct file *f, + char __user *buf, + size_t size, + loff_t *pos) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)file_inode(f)->i_private; + uint64_t records_addr = 0; + uint64_t vram_pos = 0; + FW_ATT_DB_HEADER fw_att_hdr = {0}; + FW_ATT_RECORD fw_att_record = {0}; + + if (size < sizeof(FW_ATT_RECORD)) { + DRM_WARN("FW attestation input buffer not enough memory"); + return -EINVAL; + } + + if ((*pos + sizeof(FW_ATT_DB_HEADER)) >= FW_ATTESTATION_MAX_SIZE) { + DRM_WARN("FW attestation out of bounds"); + return 0; + } + + if (psp_get_fw_attestation_records_addr(&adev->psp, &records_addr)) { + DRM_WARN("Failed to get FW attestation record address"); + return -EINVAL; + } + + vram_pos = records_addr - adev->gmc.vram_start; + + if (*pos == 0) { + amdgpu_device_vram_access(adev, + vram_pos, + (uint32_t*)&fw_att_hdr, + sizeof(FW_ATT_DB_HEADER), + false); + + if (fw_att_hdr.AttDbCookie != FW_ATTESTATION_DB_COOKIE) { + DRM_WARN("Invalid FW attestation cookie"); + return -EINVAL; + } + + DRM_INFO("FW attestation version = 0x%X", fw_att_hdr.AttDbVersion); + } + + amdgpu_device_vram_access(adev, + vram_pos + sizeof(FW_ATT_DB_HEADER) + *pos, + (uint32_t*)&fw_att_record, + sizeof(FW_ATT_RECORD), + false); + + if (fw_att_record.RecordValid != FW_ATTESTATION_RECORD_VALID) + return 0; + + if (copy_to_user(buf, (void*)&fw_att_record, sizeof(FW_ATT_RECORD))) + return -EINVAL; + + *pos += sizeof(FW_ATT_RECORD); + + return sizeof(FW_ATT_RECORD); +} + +static const struct file_operations amdgpu_fw_attestation_debugfs_ops = { + .owner = THIS_MODULE, + .read = amdgpu_fw_attestation_debugfs_read, + .write = NULL, + .llseek = default_llseek +}; + +static int amdgpu_is_fw_attestation_supported(struct amdgpu_device *adev) +{ + if (adev->asic_type >= CHIP_SIENNA_CICHLID) + return 1; + + return 0; +} + +void amdgpu_fw_attestation_debugfs_init(struct amdgpu_device *adev) +{ + if (!amdgpu_is_fw_attestation_supported(adev)) + return; + + debugfs_create_file("amdgpu_fw_attestation", + S_IRUSR, + adev_to_drm(adev)->primary->debugfs_root, + adev, + &amdgpu_fw_attestation_debugfs_ops); +} diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fw_attestation.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_fw_attestation.h new file mode 100644 index 000000000000..90af4fe58c99 --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fw_attestation.h @@ -0,0 +1,30 @@ +/* + * Copyright 2020 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + * + */ +#ifndef _AMDGPU_FW_ATTESTATION_H +#define _AMDGPU_FW_ATTESTATION_H + +#include "amdgpu.h" + +void amdgpu_fw_attestation_debugfs_init(struct amdgpu_device *adev); +#endif diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c index e01e681d2a60..0db933026722 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c @@ -75,9 +75,9 @@ static int amdgpu_gart_dummy_page_init(struct amdgpu_device *adev) if (adev->dummy_page_addr) return 0; - adev->dummy_page_addr = pci_map_page(adev->pdev, dummy_page, 0, + adev->dummy_page_addr = dma_map_page(&adev->pdev->dev, dummy_page, 0, PAGE_SIZE, PCI_DMA_BIDIRECTIONAL); - if (pci_dma_mapping_error(adev->pdev, adev->dummy_page_addr)) { + if (dma_mapping_error(&adev->pdev->dev, adev->dummy_page_addr)) { dev_err(&adev->pdev->dev, "Failed to DMA MAP the dummy page\n"); adev->dummy_page_addr = 0; return -ENOMEM; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c index e8c76bd8c501..d0a1fee1f5f6 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c @@ -33,12 +33,16 @@ #include <drm/amdgpu_drm.h> #include <drm/drm_debugfs.h> +#include <drm/drm_gem_ttm_helper.h> #include "amdgpu.h" #include "amdgpu_display.h" +#include "amdgpu_dma_buf.h" #include "amdgpu_xgmi.h" -void amdgpu_gem_object_free(struct drm_gem_object *gobj) +static const struct drm_gem_object_funcs amdgpu_gem_object_funcs; + +static void amdgpu_gem_object_free(struct drm_gem_object *gobj) { struct amdgpu_bo *robj = gem_to_amdgpu_bo(gobj); @@ -73,6 +77,7 @@ int amdgpu_gem_object_create(struct amdgpu_device *adev, unsigned long size, return r; *obj = &bo->tbo.base; + (*obj)->funcs = &amdgpu_gem_object_funcs; return 0; } @@ -105,8 +110,8 @@ void amdgpu_gem_force_release(struct amdgpu_device *adev) * Call from drm_gem_handle_create which appear in both new and open ioctl * case. */ -int amdgpu_gem_object_open(struct drm_gem_object *obj, - struct drm_file *file_priv) +static int amdgpu_gem_object_open(struct drm_gem_object *obj, + struct drm_file *file_priv) { struct amdgpu_bo *abo = gem_to_amdgpu_bo(obj); struct amdgpu_device *adev = amdgpu_ttm_adev(abo->tbo.bdev); @@ -138,8 +143,8 @@ int amdgpu_gem_object_open(struct drm_gem_object *obj, return 0; } -void amdgpu_gem_object_close(struct drm_gem_object *obj, - struct drm_file *file_priv) +static void amdgpu_gem_object_close(struct drm_gem_object *obj, + struct drm_file *file_priv) { struct amdgpu_bo *bo = gem_to_amdgpu_bo(obj); struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev); @@ -197,6 +202,15 @@ out_unlock: ttm_eu_backoff_reservation(&ticket, &list); } +static const struct drm_gem_object_funcs amdgpu_gem_object_funcs = { + .free = amdgpu_gem_object_free, + .open = amdgpu_gem_object_open, + .close = amdgpu_gem_object_close, + .export = amdgpu_gem_prime_export, + .vmap = drm_gem_ttm_vmap, + .vunmap = drm_gem_ttm_vunmap, +}; + /* * GEM ioctls. */ @@ -841,67 +855,6 @@ int amdgpu_mode_dumb_create(struct drm_file *file_priv, } #if defined(CONFIG_DEBUG_FS) - -#define amdgpu_debugfs_gem_bo_print_flag(m, bo, flag) \ - if (bo->flags & (AMDGPU_GEM_CREATE_ ## flag)) { \ - seq_printf((m), " " #flag); \ - } - -static int amdgpu_debugfs_gem_bo_info(int id, void *ptr, void *data) -{ - struct drm_gem_object *gobj = ptr; - struct amdgpu_bo *bo = gem_to_amdgpu_bo(gobj); - struct seq_file *m = data; - - struct dma_buf_attachment *attachment; - struct dma_buf *dma_buf; - unsigned domain; - const char *placement; - unsigned pin_count; - - domain = amdgpu_mem_type_to_domain(bo->tbo.mem.mem_type); - switch (domain) { - case AMDGPU_GEM_DOMAIN_VRAM: - placement = "VRAM"; - break; - case AMDGPU_GEM_DOMAIN_GTT: - placement = " GTT"; - break; - case AMDGPU_GEM_DOMAIN_CPU: - default: - placement = " CPU"; - break; - } - seq_printf(m, "\t0x%08x: %12ld byte %s", - id, amdgpu_bo_size(bo), placement); - - pin_count = READ_ONCE(bo->pin_count); - if (pin_count) - seq_printf(m, " pin count %d", pin_count); - - dma_buf = READ_ONCE(bo->tbo.base.dma_buf); - attachment = READ_ONCE(bo->tbo.base.import_attach); - - if (attachment) - seq_printf(m, " imported from %p%s", dma_buf, - attachment->peer2peer ? " P2P" : ""); - else if (dma_buf) - seq_printf(m, " exported as %p", dma_buf); - - amdgpu_debugfs_gem_bo_print_flag(m, bo, CPU_ACCESS_REQUIRED); - amdgpu_debugfs_gem_bo_print_flag(m, bo, NO_CPU_ACCESS); - amdgpu_debugfs_gem_bo_print_flag(m, bo, CPU_GTT_USWC); - amdgpu_debugfs_gem_bo_print_flag(m, bo, VRAM_CLEARED); - amdgpu_debugfs_gem_bo_print_flag(m, bo, SHADOW); - amdgpu_debugfs_gem_bo_print_flag(m, bo, VRAM_CONTIGUOUS); - amdgpu_debugfs_gem_bo_print_flag(m, bo, VM_ALWAYS_VALID); - amdgpu_debugfs_gem_bo_print_flag(m, bo, EXPLICIT_SYNC); - - seq_printf(m, "\n"); - - return 0; -} - static int amdgpu_debugfs_gem_info(struct seq_file *m, void *data) { struct drm_info_node *node = (struct drm_info_node *)m->private; @@ -915,6 +868,8 @@ static int amdgpu_debugfs_gem_info(struct seq_file *m, void *data) list_for_each_entry(file, &dev->filelist, lhead) { struct task_struct *task; + struct drm_gem_object *gobj; + int id; /* * Although we have a valid reference on file->pid, that does @@ -929,7 +884,11 @@ static int amdgpu_debugfs_gem_info(struct seq_file *m, void *data) rcu_read_unlock(); spin_lock(&file->table_lock); - idr_for_each(&file->object_idr, amdgpu_debugfs_gem_bo_info, m); + idr_for_each_entry(&file->object_idr, gobj, id) { + struct amdgpu_bo *bo = gem_to_amdgpu_bo(gobj); + + amdgpu_bo_print_info(id, bo, m); + } spin_unlock(&file->table_lock); } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.h index e0f025dd1b14..637bf51dbf06 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.h @@ -33,11 +33,6 @@ #define AMDGPU_GEM_DOMAIN_MAX 0x3 #define gem_to_amdgpu_bo(gobj) container_of((gobj), struct amdgpu_bo, tbo.base) -void amdgpu_gem_object_free(struct drm_gem_object *obj); -int amdgpu_gem_object_open(struct drm_gem_object *obj, - struct drm_file *file_priv); -void amdgpu_gem_object_close(struct drm_gem_object *obj, - struct drm_file *file_priv); unsigned long amdgpu_gem_timeout(uint64_t timeout_ns); /* diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c index 8c9bacfdbc30..cd2c676a2797 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c @@ -193,10 +193,14 @@ static bool amdgpu_gfx_is_multipipe_capable(struct amdgpu_device *adev) } bool amdgpu_gfx_is_high_priority_compute_queue(struct amdgpu_device *adev, - int queue) + int pipe, int queue) { - /* Policy: make queue 0 of each pipe as high priority compute queue */ - return (queue == 0); + bool multipipe_policy = amdgpu_gfx_is_multipipe_capable(adev); + int cond; + /* Policy: alternate between normal and high priority */ + cond = multipipe_policy ? pipe : queue; + + return ((cond % 2) != 0); } @@ -804,3 +808,34 @@ failed_undo: failed_kiq_write: dev_err(adev->dev, "failed to write reg:%x\n", reg); } + +int amdgpu_gfx_get_num_kcq(struct amdgpu_device *adev) +{ + if (amdgpu_num_kcq == -1) { + return 8; + } else if (amdgpu_num_kcq > 8 || amdgpu_num_kcq < 0) { + dev_warn(adev->dev, "set kernel compute queue number to 8 due to invalid parameter provided by user\n"); + return 8; + } + return amdgpu_num_kcq; +} + +/* amdgpu_gfx_state_change_set - Handle gfx power state change set + * @adev: amdgpu_device pointer + * @state: gfx power state(1 -sGpuChangeState_D0Entry and 2 -sGpuChangeState_D3Entry) + * + */ + +void amdgpu_gfx_state_change_set(struct amdgpu_device *adev, enum gfx_change_state state) +{ + if (is_support_sw_smu(adev)) { + smu_gfx_state_change_set(&adev->smu, state); + } else { + mutex_lock(&adev->pm.mutex); + if (adev->powerplay.pp_funcs && + adev->powerplay.pp_funcs->gfx_state_change_set) + ((adev)->powerplay.pp_funcs->gfx_state_change_set( + (adev)->powerplay.pp_handle, state)); + mutex_unlock(&adev->pm.mutex); + } +} diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h index 258498cbf1eb..6b5a8f4642cc 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h @@ -47,6 +47,12 @@ enum gfx_pipe_priority { AMDGPU_GFX_PIPE_PRIO_MAX }; +/* Argument for PPSMC_MSG_GpuChangeState */ +enum gfx_change_state { + sGpuChangeState_D0Entry = 1, + sGpuChangeState_D3Entry, +}; + #define AMDGPU_GFX_QUEUE_PRIORITY_MINIMUM 0 #define AMDGPU_GFX_QUEUE_PRIORITY_MAXIMUM 15 @@ -218,6 +224,7 @@ struct amdgpu_gfx_funcs { void (*reset_ras_error_count) (struct amdgpu_device *adev); void (*init_spm_golden)(struct amdgpu_device *adev); void (*query_ras_error_status) (struct amdgpu_device *adev); + void (*update_perfmon_mgcg)(struct amdgpu_device *adev, bool enable); }; struct sq_work { @@ -373,7 +380,7 @@ void amdgpu_queue_mask_bit_to_mec_queue(struct amdgpu_device *adev, int bit, bool amdgpu_gfx_is_mec_queue_enabled(struct amdgpu_device *adev, int mec, int pipe, int queue); bool amdgpu_gfx_is_high_priority_compute_queue(struct amdgpu_device *adev, - int queue); + int pipe, int queue); int amdgpu_gfx_me_queue_to_bit(struct amdgpu_device *adev, int me, int pipe, int queue); void amdgpu_gfx_bit_to_me_queue(struct amdgpu_device *adev, int bit, @@ -392,4 +399,6 @@ int amdgpu_gfx_cp_ecc_error_irq(struct amdgpu_device *adev, struct amdgpu_iv_entry *entry); uint32_t amdgpu_kiq_rreg(struct amdgpu_device *adev, uint32_t reg); void amdgpu_kiq_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v); +int amdgpu_gfx_get_num_kcq(struct amdgpu_device *adev); +void amdgpu_gfx_state_change_set(struct amdgpu_device *adev, enum gfx_change_state state); #endif diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c index 3e4892b7b7d3..02af47ddddbc 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c @@ -45,12 +45,10 @@ void amdgpu_gmc_get_pde_for_bo(struct amdgpu_bo *bo, int level, uint64_t *addr, uint64_t *flags) { struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev); - struct ttm_dma_tt *ttm; switch (bo->tbo.mem.mem_type) { case TTM_PL_TT: - ttm = container_of(bo->tbo.ttm, struct ttm_dma_tt, ttm); - *addr = ttm->dma_address[0]; + *addr = bo->tbo.ttm->dma_address[0]; break; case TTM_PL_VRAM: *addr = amdgpu_bo_gpu_offset(bo); @@ -63,9 +61,8 @@ void amdgpu_gmc_get_pde_for_bo(struct amdgpu_bo *bo, int level, amdgpu_gmc_get_vm_pde(adev, level, addr, flags); } -/** +/* * amdgpu_gmc_pd_addr - return the address of the root directory - * */ uint64_t amdgpu_gmc_pd_addr(struct amdgpu_bo *bo) { @@ -114,7 +111,7 @@ int amdgpu_gmc_set_pte_pde(struct amdgpu_device *adev, void *cpu_pt_addr, /** * amdgpu_gmc_agp_addr - return the address in the AGP address space * - * @tbo: TTM BO which needs the address, must be in GTT domain + * @bo: TTM BO which needs the address, must be in GTT domain * * Tries to figure out how to access the BO through the AGP aperture. Returns * AMDGPU_BO_INVALID_OFFSET if that is not possible. @@ -122,16 +119,14 @@ int amdgpu_gmc_set_pte_pde(struct amdgpu_device *adev, void *cpu_pt_addr, uint64_t amdgpu_gmc_agp_addr(struct ttm_buffer_object *bo) { struct amdgpu_device *adev = amdgpu_ttm_adev(bo->bdev); - struct ttm_dma_tt *ttm; - if (bo->num_pages != 1 || bo->ttm->caching_state == tt_cached) + if (bo->num_pages != 1 || bo->ttm->caching == ttm_cached) return AMDGPU_BO_INVALID_OFFSET; - ttm = container_of(bo->ttm, struct ttm_dma_tt, ttm); - if (ttm->dma_address[0] + PAGE_SIZE >= adev->gmc.agp_size) + if (bo->ttm->dma_address[0] + PAGE_SIZE >= adev->gmc.agp_size) return AMDGPU_BO_INVALID_OFFSET; - return adev->gmc.agp_start + ttm->dma_address[0]; + return adev->gmc.agp_start + bo->ttm->dma_address[0]; } /** @@ -393,6 +388,7 @@ void amdgpu_gmc_tmz_set(struct amdgpu_device *adev) case CHIP_NAVI10: case CHIP_NAVI14: case CHIP_NAVI12: + case CHIP_VANGOGH: /* Don't enable it by default yet. */ if (amdgpu_tmz < 1) { @@ -425,20 +421,26 @@ void amdgpu_gmc_noretry_set(struct amdgpu_device *adev) struct amdgpu_gmc *gmc = &adev->gmc; switch (adev->asic_type) { - case CHIP_RAVEN: - /* Raven currently has issues with noretry - * regardless of what we decide for other - * asics, we should leave raven with - * noretry = 0 until we root cause the - * issues. + case CHIP_VEGA10: + case CHIP_VEGA20: + /* + * noretry = 0 will cause kfd page fault tests fail + * for some ASICs, so set default to 1 for these ASICs. */ if (amdgpu_noretry == -1) - gmc->noretry = 0; + gmc->noretry = 1; else gmc->noretry = amdgpu_noretry; break; + case CHIP_RAVEN: default: - /* default this to 0 for now, but we may want + /* Raven currently has issues with noretry + * regardless of what we decide for other + * asics, we should leave raven with + * noretry = 0 until we root cause the + * issues. + * + * default this to 0 for now, but we may want * to change this in the future for certain * GPUs as it can increase performance in * certain cases. diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gtt_mgr.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gtt_mgr.c index 731f3aa2e6ba..8980329cded0 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gtt_mgr.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gtt_mgr.c @@ -136,7 +136,7 @@ void amdgpu_gtt_mgr_fini(struct amdgpu_device *adev) ttm_resource_manager_set_used(man, false); - ret = ttm_resource_manager_force_list_clean(&adev->mman.bdev, man); + ret = ttm_resource_manager_evict_all(&adev->mman.bdev, man); if (ret) return; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c index 2f53fa0ae9a6..024d0a563a65 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c @@ -52,8 +52,10 @@ /** * amdgpu_ib_get - request an IB (Indirect Buffer) * - * @ring: ring index the IB is associated with + * @adev: amdgpu_device pointer + * @vm: amdgpu_vm pointer * @size: requested IB size + * @pool_type: IB pool type (delayed, immediate, direct) * @ib: IB object returned * * Request an IB (all asics). IBs are allocated using the @@ -101,9 +103,10 @@ void amdgpu_ib_free(struct amdgpu_device *adev, struct amdgpu_ib *ib, /** * amdgpu_ib_schedule - schedule an IB (Indirect Buffer) on the ring * - * @adev: amdgpu_device pointer + * @ring: ring index the IB is associated with * @num_ibs: number of IBs to schedule * @ibs: IB objects to schedule + * @job: job to schedule * @f: fence created during this submission * * Schedule an IB on the associated ring (all asics). diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c index 6e9a9e5dbea0..94b069630db3 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c @@ -208,7 +208,7 @@ static int amdgpu_vmid_grab_idle(struct amdgpu_vm *vm, if (ring->vmid_wait && !dma_fence_is_signaled(ring->vmid_wait)) return amdgpu_sync_fence(sync, ring->vmid_wait); - fences = kmalloc_array(sizeof(void *), id_mgr->num_ids, GFP_KERNEL); + fences = kmalloc_array(id_mgr->num_ids, sizeof(void *), GFP_KERNEL); if (!fences) return -ENOMEM; @@ -259,6 +259,7 @@ static int amdgpu_vmid_grab_idle(struct amdgpu_vm *vm, * @sync: sync object where we add dependencies * @fence: fence protecting ID from reuse * @job: job who wants to use the VMID + * @id: resulting VMID * * Try to assign a reserved VMID. */ @@ -514,6 +515,7 @@ void amdgpu_vmid_free_reserved(struct amdgpu_device *adev, * amdgpu_vmid_reset - reset VMID to zero * * @adev: amdgpu device structure + * @vmhub: vmhub type * @vmid: vmid number to use * * Reset saved GDW, GWS and OA to force switch on next flush. diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.c index 111a301ce878..dcd9b4a8e20b 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.c @@ -132,6 +132,35 @@ void amdgpu_ih_ring_fini(struct amdgpu_device *adev, struct amdgpu_ih_ring *ih) } /** + * amdgpu_ih_ring_write - write IV to the ring buffer + * + * @ih: ih ring to write to + * @iv: the iv to write + * @num_dw: size of the iv in dw + * + * Writes an IV to the ring buffer using the CPU and increment the wptr. + * Used for testing and delegating IVs to a software ring. + */ +void amdgpu_ih_ring_write(struct amdgpu_ih_ring *ih, const uint32_t *iv, + unsigned int num_dw) +{ + uint32_t wptr = le32_to_cpu(*ih->wptr_cpu) >> 2; + unsigned int i; + + for (i = 0; i < num_dw; ++i) + ih->ring[wptr++] = cpu_to_le32(iv[i]); + + wptr <<= 2; + wptr &= ih->ptr_mask; + + /* Only commit the new wptr if we don't overflow */ + if (wptr != READ_ONCE(ih->rptr)) { + wmb(); + WRITE_ONCE(*ih->wptr_cpu, cpu_to_le32(wptr)); + } +} + +/** * amdgpu_ih_process - interrupt handler * * @adev: amdgpu_device pointer diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.h index 4e0bb645176d..3c9cfe7eecff 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.h @@ -72,6 +72,8 @@ struct amdgpu_ih_funcs { int amdgpu_ih_ring_init(struct amdgpu_device *adev, struct amdgpu_ih_ring *ih, unsigned ring_size, bool use_bus_addr); void amdgpu_ih_ring_fini(struct amdgpu_device *adev, struct amdgpu_ih_ring *ih); +void amdgpu_ih_ring_write(struct amdgpu_ih_ring *ih, const uint32_t *iv, + unsigned int num_dw); int amdgpu_ih_process(struct amdgpu_device *adev, struct amdgpu_ih_ring *ih); #endif diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c index 300ac73b4738..bea57e8e793f 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c @@ -207,6 +207,21 @@ static void amdgpu_irq_handle_ih2(struct work_struct *work) } /** + * amdgpu_irq_handle_ih_soft - kick of processing for ih_soft + * + * @work: work structure in struct amdgpu_irq + * + * Kick of processing IH soft ring. + */ +static void amdgpu_irq_handle_ih_soft(struct work_struct *work) +{ + struct amdgpu_device *adev = container_of(work, struct amdgpu_device, + irq.ih_soft_work); + + amdgpu_ih_process(adev, &adev->irq.ih_soft); +} + +/** * amdgpu_msi_ok - check whether MSI functionality is enabled * * @adev: amdgpu device pointer (unused) @@ -281,6 +296,7 @@ int amdgpu_irq_init(struct amdgpu_device *adev) INIT_WORK(&adev->irq.ih1_work, amdgpu_irq_handle_ih1); INIT_WORK(&adev->irq.ih2_work, amdgpu_irq_handle_ih2); + INIT_WORK(&adev->irq.ih_soft_work, amdgpu_irq_handle_ih_soft); adev->irq.installed = true; /* Use vector 0 for MSI-X */ @@ -413,6 +429,7 @@ void amdgpu_irq_dispatch(struct amdgpu_device *adev, bool handled = false; int r; + entry.ih = ih; entry.iv_entry = (const uint32_t *)&ih->ring[ring_index]; amdgpu_ih_decode_iv(adev, &entry); @@ -451,6 +468,24 @@ void amdgpu_irq_dispatch(struct amdgpu_device *adev, } /** + * amdgpu_irq_delegate - delegate IV to soft IH ring + * + * @adev: amdgpu device pointer + * @entry: IV entry + * @num_dw: size of IV + * + * Delegate the IV to the soft IH ring and schedule processing of it. Used + * if the hardware delegation to IH1 or IH2 doesn't work for some reason. + */ +void amdgpu_irq_delegate(struct amdgpu_device *adev, + struct amdgpu_iv_entry *entry, + unsigned int num_dw) +{ + amdgpu_ih_ring_write(&adev->irq.ih_soft, entry->iv_entry, num_dw); + schedule_work(&adev->irq.ih_soft_work); +} + +/** * amdgpu_irq_update - update hardware interrupt state * * @adev: amdgpu device pointer diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.h index c718e94a55c9..ac527e5deae6 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.h @@ -44,6 +44,7 @@ enum amdgpu_interrupt_state { }; struct amdgpu_iv_entry { + struct amdgpu_ih_ring *ih; unsigned client_id; unsigned src_id; unsigned ring_id; @@ -88,9 +89,9 @@ struct amdgpu_irq { bool msi_enabled; /* msi enabled */ /* interrupt rings */ - struct amdgpu_ih_ring ih, ih1, ih2; + struct amdgpu_ih_ring ih, ih1, ih2, ih_soft; const struct amdgpu_ih_funcs *ih_funcs; - struct work_struct ih1_work, ih2_work; + struct work_struct ih1_work, ih2_work, ih_soft_work; struct amdgpu_irq_src self_irq; /* gen irq stuff */ @@ -109,6 +110,9 @@ int amdgpu_irq_add_id(struct amdgpu_device *adev, struct amdgpu_irq_src *source); void amdgpu_irq_dispatch(struct amdgpu_device *adev, struct amdgpu_ih_ring *ih); +void amdgpu_irq_delegate(struct amdgpu_device *adev, + struct amdgpu_iv_entry *entry, + unsigned int num_dw); int amdgpu_irq_update(struct amdgpu_device *adev, struct amdgpu_irq_src *src, unsigned type); int amdgpu_irq_get(struct amdgpu_device *adev, struct amdgpu_irq_src *src, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c index efda38349a03..fc12fc72366f 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c @@ -29,7 +29,6 @@ #include "amdgpu.h" #include <drm/drm_debugfs.h> #include <drm/amdgpu_drm.h> -#include "amdgpu_sched.h" #include "amdgpu_uvd.h" #include "amdgpu_vce.h" #include "atom.h" @@ -163,10 +162,6 @@ int amdgpu_driver_load_kms(struct amdgpu_device *adev, unsigned long flags) } else if (amdgpu_device_supports_baco(dev) && (amdgpu_runtime_pm != 0)) { switch (adev->asic_type) { -#ifdef CONFIG_DRM_AMDGPU_CIK - case CHIP_BONAIRE: - case CHIP_HAWAII: -#endif case CHIP_VEGA20: case CHIP_ARCTURUS: case CHIP_SIENNA_CICHLID: @@ -181,7 +176,7 @@ int amdgpu_driver_load_kms(struct amdgpu_device *adev, unsigned long flags) adev->runpm = true; break; default: - /* enable runpm on VI+ */ + /* enable runpm on CI+ */ adev->runpm = true; break; } @@ -325,6 +320,10 @@ static int amdgpu_firmware_info(struct drm_amdgpu_info_firmware *fw_info, fw_info->ver = adev->dm.dmcub_fw_version; fw_info->feature = 0; break; + case AMDGPU_INFO_FW_TOC: + fw_info->ver = adev->psp.toc_fw_version; + fw_info->feature = adev->psp.toc_feature_version; + break; default: return -EINVAL; } @@ -471,7 +470,7 @@ static int amdgpu_hw_ip_info(struct amdgpu_device *adev, /** * amdgpu_info_ioctl - answer a device specific request. * - * @adev: amdgpu device pointer + * @dev: drm device pointer * @data: request object * @filp: drm filp * @@ -480,7 +479,7 @@ static int amdgpu_hw_ip_info(struct amdgpu_device *adev, * etc. (all asics). * Returns 0 on success, -EINVAL on failure. */ -static int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) +int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) { struct amdgpu_device *adev = drm_to_adev(dev); struct drm_amdgpu_info *info = data; @@ -717,38 +716,42 @@ static int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file return n ? -EFAULT : 0; } case AMDGPU_INFO_DEV_INFO: { - struct drm_amdgpu_info_device dev_info; + struct drm_amdgpu_info_device *dev_info; uint64_t vm_size; + int ret; - memset(&dev_info, 0, sizeof(dev_info)); - dev_info.device_id = dev->pdev->device; - dev_info.chip_rev = adev->rev_id; - dev_info.external_rev = adev->external_rev_id; - dev_info.pci_rev = dev->pdev->revision; - dev_info.family = adev->family; - dev_info.num_shader_engines = adev->gfx.config.max_shader_engines; - dev_info.num_shader_arrays_per_engine = adev->gfx.config.max_sh_per_se; + dev_info = kzalloc(sizeof(*dev_info), GFP_KERNEL); + if (!dev_info) + return -ENOMEM; + + dev_info->device_id = dev->pdev->device; + dev_info->chip_rev = adev->rev_id; + dev_info->external_rev = adev->external_rev_id; + dev_info->pci_rev = dev->pdev->revision; + dev_info->family = adev->family; + dev_info->num_shader_engines = adev->gfx.config.max_shader_engines; + dev_info->num_shader_arrays_per_engine = adev->gfx.config.max_sh_per_se; /* return all clocks in KHz */ - dev_info.gpu_counter_freq = amdgpu_asic_get_xclk(adev) * 10; + dev_info->gpu_counter_freq = amdgpu_asic_get_xclk(adev) * 10; if (adev->pm.dpm_enabled) { - dev_info.max_engine_clock = amdgpu_dpm_get_sclk(adev, false) * 10; - dev_info.max_memory_clock = amdgpu_dpm_get_mclk(adev, false) * 10; + dev_info->max_engine_clock = amdgpu_dpm_get_sclk(adev, false) * 10; + dev_info->max_memory_clock = amdgpu_dpm_get_mclk(adev, false) * 10; } else { - dev_info.max_engine_clock = adev->clock.default_sclk * 10; - dev_info.max_memory_clock = adev->clock.default_mclk * 10; + dev_info->max_engine_clock = adev->clock.default_sclk * 10; + dev_info->max_memory_clock = adev->clock.default_mclk * 10; } - dev_info.enabled_rb_pipes_mask = adev->gfx.config.backend_enable_mask; - dev_info.num_rb_pipes = adev->gfx.config.max_backends_per_se * + dev_info->enabled_rb_pipes_mask = adev->gfx.config.backend_enable_mask; + dev_info->num_rb_pipes = adev->gfx.config.max_backends_per_se * adev->gfx.config.max_shader_engines; - dev_info.num_hw_gfx_contexts = adev->gfx.config.max_hw_contexts; - dev_info._pad = 0; - dev_info.ids_flags = 0; + dev_info->num_hw_gfx_contexts = adev->gfx.config.max_hw_contexts; + dev_info->_pad = 0; + dev_info->ids_flags = 0; if (adev->flags & AMD_IS_APU) - dev_info.ids_flags |= AMDGPU_IDS_FLAGS_FUSION; + dev_info->ids_flags |= AMDGPU_IDS_FLAGS_FUSION; if (amdgpu_mcbp || amdgpu_sriov_vf(adev)) - dev_info.ids_flags |= AMDGPU_IDS_FLAGS_PREEMPTION; + dev_info->ids_flags |= AMDGPU_IDS_FLAGS_PREEMPTION; if (amdgpu_is_tmz(adev)) - dev_info.ids_flags |= AMDGPU_IDS_FLAGS_TMZ; + dev_info->ids_flags |= AMDGPU_IDS_FLAGS_TMZ; vm_size = adev->vm_manager.max_pfn * AMDGPU_GPU_PAGE_SIZE; vm_size -= AMDGPU_VA_RESERVED_SIZE; @@ -758,45 +761,47 @@ static int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file adev->vce.fw_version < AMDGPU_VCE_FW_53_45) vm_size = min(vm_size, 1ULL << 40); - dev_info.virtual_address_offset = AMDGPU_VA_RESERVED_SIZE; - dev_info.virtual_address_max = + dev_info->virtual_address_offset = AMDGPU_VA_RESERVED_SIZE; + dev_info->virtual_address_max = min(vm_size, AMDGPU_GMC_HOLE_START); if (vm_size > AMDGPU_GMC_HOLE_START) { - dev_info.high_va_offset = AMDGPU_GMC_HOLE_END; - dev_info.high_va_max = AMDGPU_GMC_HOLE_END | vm_size; + dev_info->high_va_offset = AMDGPU_GMC_HOLE_END; + dev_info->high_va_max = AMDGPU_GMC_HOLE_END | vm_size; } - dev_info.virtual_address_alignment = max((int)PAGE_SIZE, AMDGPU_GPU_PAGE_SIZE); - dev_info.pte_fragment_size = (1 << adev->vm_manager.fragment_size) * AMDGPU_GPU_PAGE_SIZE; - dev_info.gart_page_size = AMDGPU_GPU_PAGE_SIZE; - dev_info.cu_active_number = adev->gfx.cu_info.number; - dev_info.cu_ao_mask = adev->gfx.cu_info.ao_cu_mask; - dev_info.ce_ram_size = adev->gfx.ce_ram_size; - memcpy(&dev_info.cu_ao_bitmap[0], &adev->gfx.cu_info.ao_cu_bitmap[0], + dev_info->virtual_address_alignment = max((int)PAGE_SIZE, AMDGPU_GPU_PAGE_SIZE); + dev_info->pte_fragment_size = (1 << adev->vm_manager.fragment_size) * AMDGPU_GPU_PAGE_SIZE; + dev_info->gart_page_size = AMDGPU_GPU_PAGE_SIZE; + dev_info->cu_active_number = adev->gfx.cu_info.number; + dev_info->cu_ao_mask = adev->gfx.cu_info.ao_cu_mask; + dev_info->ce_ram_size = adev->gfx.ce_ram_size; + memcpy(&dev_info->cu_ao_bitmap[0], &adev->gfx.cu_info.ao_cu_bitmap[0], sizeof(adev->gfx.cu_info.ao_cu_bitmap)); - memcpy(&dev_info.cu_bitmap[0], &adev->gfx.cu_info.bitmap[0], + memcpy(&dev_info->cu_bitmap[0], &adev->gfx.cu_info.bitmap[0], sizeof(adev->gfx.cu_info.bitmap)); - dev_info.vram_type = adev->gmc.vram_type; - dev_info.vram_bit_width = adev->gmc.vram_width; - dev_info.vce_harvest_config = adev->vce.harvest_config; - dev_info.gc_double_offchip_lds_buf = + dev_info->vram_type = adev->gmc.vram_type; + dev_info->vram_bit_width = adev->gmc.vram_width; + dev_info->vce_harvest_config = adev->vce.harvest_config; + dev_info->gc_double_offchip_lds_buf = adev->gfx.config.double_offchip_lds_buf; - dev_info.wave_front_size = adev->gfx.cu_info.wave_front_size; - dev_info.num_shader_visible_vgprs = adev->gfx.config.max_gprs; - dev_info.num_cu_per_sh = adev->gfx.config.max_cu_per_sh; - dev_info.num_tcc_blocks = adev->gfx.config.max_texture_channel_caches; - dev_info.gs_vgt_table_depth = adev->gfx.config.gs_vgt_table_depth; - dev_info.gs_prim_buffer_depth = adev->gfx.config.gs_prim_buffer_depth; - dev_info.max_gs_waves_per_vgt = adev->gfx.config.max_gs_threads; + dev_info->wave_front_size = adev->gfx.cu_info.wave_front_size; + dev_info->num_shader_visible_vgprs = adev->gfx.config.max_gprs; + dev_info->num_cu_per_sh = adev->gfx.config.max_cu_per_sh; + dev_info->num_tcc_blocks = adev->gfx.config.max_texture_channel_caches; + dev_info->gs_vgt_table_depth = adev->gfx.config.gs_vgt_table_depth; + dev_info->gs_prim_buffer_depth = adev->gfx.config.gs_prim_buffer_depth; + dev_info->max_gs_waves_per_vgt = adev->gfx.config.max_gs_threads; if (adev->family >= AMDGPU_FAMILY_NV) - dev_info.pa_sc_tile_steering_override = + dev_info->pa_sc_tile_steering_override = adev->gfx.config.pa_sc_tile_steering_override; - dev_info.tcc_disabled_mask = adev->gfx.config.tcc_disabled_mask; + dev_info->tcc_disabled_mask = adev->gfx.config.tcc_disabled_mask; - return copy_to_user(out, &dev_info, - min((size_t)size, sizeof(dev_info))) ? -EFAULT : 0; + ret = copy_to_user(out, dev_info, + min((size_t)size, sizeof(*dev_info))) ? -EFAULT : 0; + kfree(dev_info); + return ret; } case AMDGPU_INFO_VCE_CLOCK_TABLE: { unsigned i; @@ -1243,27 +1248,6 @@ void amdgpu_disable_vblank_kms(struct drm_crtc *crtc) amdgpu_irq_put(adev, &adev->crtc_irq, idx); } -const struct drm_ioctl_desc amdgpu_ioctls_kms[] = { - DRM_IOCTL_DEF_DRV(AMDGPU_GEM_CREATE, amdgpu_gem_create_ioctl, DRM_AUTH|DRM_RENDER_ALLOW), - DRM_IOCTL_DEF_DRV(AMDGPU_CTX, amdgpu_ctx_ioctl, DRM_AUTH|DRM_RENDER_ALLOW), - DRM_IOCTL_DEF_DRV(AMDGPU_VM, amdgpu_vm_ioctl, DRM_AUTH|DRM_RENDER_ALLOW), - DRM_IOCTL_DEF_DRV(AMDGPU_SCHED, amdgpu_sched_ioctl, DRM_MASTER), - DRM_IOCTL_DEF_DRV(AMDGPU_BO_LIST, amdgpu_bo_list_ioctl, DRM_AUTH|DRM_RENDER_ALLOW), - DRM_IOCTL_DEF_DRV(AMDGPU_FENCE_TO_HANDLE, amdgpu_cs_fence_to_handle_ioctl, DRM_AUTH|DRM_RENDER_ALLOW), - /* KMS */ - DRM_IOCTL_DEF_DRV(AMDGPU_GEM_MMAP, amdgpu_gem_mmap_ioctl, DRM_AUTH|DRM_RENDER_ALLOW), - DRM_IOCTL_DEF_DRV(AMDGPU_GEM_WAIT_IDLE, amdgpu_gem_wait_idle_ioctl, DRM_AUTH|DRM_RENDER_ALLOW), - DRM_IOCTL_DEF_DRV(AMDGPU_CS, amdgpu_cs_ioctl, DRM_AUTH|DRM_RENDER_ALLOW), - DRM_IOCTL_DEF_DRV(AMDGPU_INFO, amdgpu_info_ioctl, DRM_AUTH|DRM_RENDER_ALLOW), - DRM_IOCTL_DEF_DRV(AMDGPU_WAIT_CS, amdgpu_cs_wait_ioctl, DRM_AUTH|DRM_RENDER_ALLOW), - DRM_IOCTL_DEF_DRV(AMDGPU_WAIT_FENCES, amdgpu_cs_wait_fences_ioctl, DRM_AUTH|DRM_RENDER_ALLOW), - DRM_IOCTL_DEF_DRV(AMDGPU_GEM_METADATA, amdgpu_gem_metadata_ioctl, DRM_AUTH|DRM_RENDER_ALLOW), - DRM_IOCTL_DEF_DRV(AMDGPU_GEM_VA, amdgpu_gem_va_ioctl, DRM_AUTH|DRM_RENDER_ALLOW), - DRM_IOCTL_DEF_DRV(AMDGPU_GEM_OP, amdgpu_gem_op_ioctl, DRM_AUTH|DRM_RENDER_ALLOW), - DRM_IOCTL_DEF_DRV(AMDGPU_GEM_USERPTR, amdgpu_gem_userptr_ioctl, DRM_AUTH|DRM_RENDER_ALLOW) -}; -const int amdgpu_max_kms_ioctl = ARRAY_SIZE(amdgpu_ioctls_kms); - /* * Debugfs info */ @@ -1466,6 +1450,13 @@ static int amdgpu_debugfs_firmware_info(struct seq_file *m, void *data) seq_printf(m, "DMCUB feature version: %u, firmware version: 0x%08x\n", fw_info.feature, fw_info.ver); + /* TOC */ + query_fw.fw_type = AMDGPU_INFO_FW_TOC; + ret = amdgpu_firmware_info(&fw_info, &query_fw, adev); + if (ret) + return ret; + seq_printf(m, "TOC feature version: %u, firmware version: 0x%08x\n", + fw_info.feature, fw_info.ver); seq_printf(m, "VBIOS version: %s\n", ctx->vbios_version); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h index a04decb934b0..319cb19e1b99 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h @@ -302,6 +302,9 @@ struct amdgpu_display_funcs { struct amdgpu_framebuffer { struct drm_framebuffer base; + uint64_t tiling_flags; + bool tmz_surface; + /* caching for later use */ uint64_t address; }; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_nbio.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_nbio.h index edaac242ff85..e62cc0e1a5ad 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_nbio.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_nbio.h @@ -53,6 +53,8 @@ struct amdgpu_nbio_funcs { u32 (*get_hdp_flush_done_offset)(struct amdgpu_device *adev); u32 (*get_pcie_index_offset)(struct amdgpu_device *adev); u32 (*get_pcie_data_offset)(struct amdgpu_device *adev); + u32 (*get_pcie_port_index_offset)(struct amdgpu_device *adev); + u32 (*get_pcie_port_data_offset)(struct amdgpu_device *adev); u32 (*get_rev_id)(struct amdgpu_device *adev); void (*mc_access_enable)(struct amdgpu_device *adev, bool enable); void (*hdp_flush)(struct amdgpu_device *adev, struct amdgpu_ring *ring); @@ -85,6 +87,8 @@ struct amdgpu_nbio_funcs { void (*query_ras_error_count)(struct amdgpu_device *adev, void *ras_error_status); int (*ras_late_init)(struct amdgpu_device *adev); + void (*enable_aspm)(struct amdgpu_device *adev, + bool enable); }; struct amdgpu_nbio { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c index ac043baac05d..25ec4d57333f 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c @@ -78,7 +78,7 @@ static void amdgpu_bo_destroy(struct ttm_buffer_object *tbo) struct amdgpu_device *adev = amdgpu_ttm_adev(tbo->bdev); struct amdgpu_bo *bo = ttm_to_amdgpu_bo(tbo); - if (bo->pin_count > 0) + if (bo->tbo.pin_count > 0) amdgpu_bo_subtract_pin_size(bo); amdgpu_bo_kunmap(bo); @@ -137,7 +137,7 @@ void amdgpu_bo_placement_from_domain(struct amdgpu_bo *abo, u32 domain) places[c].fpfn = 0; places[c].lpfn = 0; places[c].mem_type = TTM_PL_VRAM; - places[c].flags = TTM_PL_FLAG_WC | TTM_PL_FLAG_UNCACHED; + places[c].flags = 0; if (flags & AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED) places[c].lpfn = visible_pfn; @@ -154,11 +154,6 @@ void amdgpu_bo_placement_from_domain(struct amdgpu_bo *abo, u32 domain) places[c].lpfn = 0; places[c].mem_type = TTM_PL_TT; places[c].flags = 0; - if (flags & AMDGPU_GEM_CREATE_CPU_GTT_USWC) - places[c].flags |= TTM_PL_FLAG_WC | - TTM_PL_FLAG_UNCACHED; - else - places[c].flags |= TTM_PL_FLAG_CACHED; c++; } @@ -167,11 +162,6 @@ void amdgpu_bo_placement_from_domain(struct amdgpu_bo *abo, u32 domain) places[c].lpfn = 0; places[c].mem_type = TTM_PL_SYSTEM; places[c].flags = 0; - if (flags & AMDGPU_GEM_CREATE_CPU_GTT_USWC) - places[c].flags |= TTM_PL_FLAG_WC | - TTM_PL_FLAG_UNCACHED; - else - places[c].flags |= TTM_PL_FLAG_CACHED; c++; } @@ -179,7 +169,7 @@ void amdgpu_bo_placement_from_domain(struct amdgpu_bo *abo, u32 domain) places[c].fpfn = 0; places[c].lpfn = 0; places[c].mem_type = AMDGPU_PL_GDS; - places[c].flags = TTM_PL_FLAG_UNCACHED; + places[c].flags = 0; c++; } @@ -187,7 +177,7 @@ void amdgpu_bo_placement_from_domain(struct amdgpu_bo *abo, u32 domain) places[c].fpfn = 0; places[c].lpfn = 0; places[c].mem_type = AMDGPU_PL_GWS; - places[c].flags = TTM_PL_FLAG_UNCACHED; + places[c].flags = 0; c++; } @@ -195,7 +185,7 @@ void amdgpu_bo_placement_from_domain(struct amdgpu_bo *abo, u32 domain) places[c].fpfn = 0; places[c].lpfn = 0; places[c].mem_type = AMDGPU_PL_OA; - places[c].flags = TTM_PL_FLAG_UNCACHED; + places[c].flags = 0; c++; } @@ -203,7 +193,7 @@ void amdgpu_bo_placement_from_domain(struct amdgpu_bo *abo, u32 domain) places[c].fpfn = 0; places[c].lpfn = 0; places[c].mem_type = TTM_PL_SYSTEM; - places[c].flags = TTM_PL_MASK_CACHING; + places[c].flags = 0; c++; } @@ -526,9 +516,10 @@ static int amdgpu_bo_do_create(struct amdgpu_device *adev, struct ttm_operation_ctx ctx = { .interruptible = (bp->type != ttm_bo_type_kernel), .no_wait_gpu = bp->no_wait_gpu, - .resv = bp->resv, - .flags = bp->type != ttm_bo_type_kernel ? - TTM_OPT_FLAG_ALLOW_RES_EVICT : 0 + /* We opt to avoid OOM on system pages allocations */ + .gfp_retry_mayfail = true, + .allow_res_evict = bp->type != ttm_bo_type_kernel, + .resv = bp->resv }; struct amdgpu_bo *bo; unsigned long page_align, size = bp->size; @@ -721,7 +712,7 @@ int amdgpu_bo_validate(struct amdgpu_bo *bo) uint32_t domain; int r; - if (bo->pin_count) + if (bo->tbo.pin_count) return 0; domain = bo->preferred_domains; @@ -918,13 +909,13 @@ int amdgpu_bo_pin_restricted(struct amdgpu_bo *bo, u32 domain, */ domain = amdgpu_bo_get_preferred_pin_domain(adev, domain); - if (bo->pin_count) { + if (bo->tbo.pin_count) { uint32_t mem_type = bo->tbo.mem.mem_type; if (!(domain & amdgpu_mem_type_to_domain(mem_type))) return -EINVAL; - bo->pin_count++; + ttm_bo_pin(&bo->tbo); if (max_offset != 0) { u64 domain_start = amdgpu_ttm_domain_start(adev, @@ -955,7 +946,6 @@ int amdgpu_bo_pin_restricted(struct amdgpu_bo *bo, u32 domain, if (!bo->placements[i].lpfn || (lpfn && lpfn < bo->placements[i].lpfn)) bo->placements[i].lpfn = lpfn; - bo->placements[i].flags |= TTM_PL_FLAG_NO_EVICT; } r = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx); @@ -964,7 +954,7 @@ int amdgpu_bo_pin_restricted(struct amdgpu_bo *bo, u32 domain, goto error; } - bo->pin_count = 1; + ttm_bo_pin(&bo->tbo); domain = amdgpu_mem_type_to_domain(bo->tbo.mem.mem_type); if (domain == AMDGPU_GEM_DOMAIN_VRAM) { @@ -1006,34 +996,16 @@ int amdgpu_bo_pin(struct amdgpu_bo *bo, u32 domain) * Returns: * 0 for success or a negative error code on failure. */ -int amdgpu_bo_unpin(struct amdgpu_bo *bo) +void amdgpu_bo_unpin(struct amdgpu_bo *bo) { - struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev); - struct ttm_operation_ctx ctx = { false, false }; - int r, i; - - if (WARN_ON_ONCE(!bo->pin_count)) { - dev_warn(adev->dev, "%p unpin not necessary\n", bo); - return 0; - } - bo->pin_count--; - if (bo->pin_count) - return 0; + ttm_bo_unpin(&bo->tbo); + if (bo->tbo.pin_count) + return; amdgpu_bo_subtract_pin_size(bo); if (bo->tbo.base.import_attach) dma_buf_unpin(bo->tbo.base.import_attach); - - for (i = 0; i < bo->placement.num_placement; i++) { - bo->placements[i].lpfn = 0; - bo->placements[i].flags &= ~TTM_PL_FLAG_NO_EVICT; - } - r = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx); - if (unlikely(r)) - dev_err(adev->dev, "%p validate failed for unpin\n", bo); - - return r; } /** @@ -1048,6 +1020,8 @@ int amdgpu_bo_unpin(struct amdgpu_bo *bo) */ int amdgpu_bo_evict_vram(struct amdgpu_device *adev) { + struct ttm_resource_manager *man; + /* late 2.6.33 fix IGP hibernate - we need pm ops to do this correct */ #ifndef CONFIG_HIBERNATION if (adev->flags & AMD_IS_APU) { @@ -1055,7 +1029,9 @@ int amdgpu_bo_evict_vram(struct amdgpu_device *adev) return 0; } #endif - return ttm_bo_evict_mm(&adev->mman.bdev, TTM_PL_VRAM); + + man = ttm_manager_type(&adev->mman.bdev, TTM_PL_VRAM); + return ttm_resource_manager_evict_all(&adev->mman.bdev, man); } static const char *amdgpu_vram_names[] = { @@ -1069,6 +1045,7 @@ static const char *amdgpu_vram_names[] = { "DDR3", "DDR4", "GDDR6", + "DDR5" }; /** @@ -1098,23 +1075,6 @@ int amdgpu_bo_init(struct amdgpu_device *adev) } /** - * amdgpu_bo_late_init - late init - * @adev: amdgpu device object - * - * Calls amdgpu_ttm_late_init() to free resources used earlier during - * initialization. - * - * Returns: - * 0 for success or a negative error code on failure. - */ -int amdgpu_bo_late_init(struct amdgpu_device *adev) -{ - amdgpu_ttm_late_init(adev); - - return 0; -} - -/** * amdgpu_bo_fini - tear down memory manager * @adev: amdgpu device object * @@ -1360,19 +1320,14 @@ void amdgpu_bo_release_notify(struct ttm_buffer_object *bo) * Returns: * 0 for success or a negative error code on failure. */ -int amdgpu_bo_fault_reserve_notify(struct ttm_buffer_object *bo) +vm_fault_t amdgpu_bo_fault_reserve_notify(struct ttm_buffer_object *bo) { struct amdgpu_device *adev = amdgpu_ttm_adev(bo->bdev); struct ttm_operation_ctx ctx = { false, false }; - struct amdgpu_bo *abo; + struct amdgpu_bo *abo = ttm_to_amdgpu_bo(bo); unsigned long offset, size; int r; - if (!amdgpu_bo_is_amdgpu_bo(bo)) - return 0; - - abo = ttm_to_amdgpu_bo(bo); - /* Remember that this BO was accessed by the CPU */ abo->flags |= AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED; @@ -1385,8 +1340,8 @@ int amdgpu_bo_fault_reserve_notify(struct ttm_buffer_object *bo) return 0; /* Can't move a pinned BO to visible VRAM */ - if (abo->pin_count > 0) - return -EINVAL; + if (abo->tbo.pin_count > 0) + return VM_FAULT_SIGBUS; /* hurrah the memory is not visible ! */ atomic64_inc(&adev->num_vram_cpu_page_faults); @@ -1398,15 +1353,18 @@ int amdgpu_bo_fault_reserve_notify(struct ttm_buffer_object *bo) abo->placement.busy_placement = &abo->placements[1]; r = ttm_bo_validate(bo, &abo->placement, &ctx); - if (unlikely(r != 0)) - return r; + if (unlikely(r == -EBUSY || r == -ERESTARTSYS)) + return VM_FAULT_NOPAGE; + else if (unlikely(r)) + return VM_FAULT_SIGBUS; offset = bo->mem.start << PAGE_SHIFT; /* this should never happen */ if (bo->mem.mem_type == TTM_PL_VRAM && (offset + size) > adev->gmc.visible_vram_size) - return -EINVAL; + return VM_FAULT_SIGBUS; + ttm_bo_move_to_lru_tail_unlocked(bo); return 0; } @@ -1489,7 +1447,7 @@ u64 amdgpu_bo_gpu_offset(struct amdgpu_bo *bo) { WARN_ON_ONCE(bo->tbo.mem.mem_type == TTM_PL_SYSTEM); WARN_ON_ONCE(!dma_resv_is_locked(bo->tbo.base.resv) && - !bo->pin_count && bo->tbo.type != ttm_bo_type_kernel); + !bo->tbo.pin_count && bo->tbo.type != ttm_bo_type_kernel); WARN_ON_ONCE(bo->tbo.mem.start == AMDGPU_BO_INVALID_OFFSET); WARN_ON_ONCE(bo->tbo.mem.mem_type == TTM_PL_VRAM && !(bo->flags & AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS)); @@ -1533,3 +1491,77 @@ uint32_t amdgpu_bo_get_preferred_pin_domain(struct amdgpu_device *adev, } return domain; } + +#if defined(CONFIG_DEBUG_FS) +#define amdgpu_bo_print_flag(m, bo, flag) \ + do { \ + if (bo->flags & (AMDGPU_GEM_CREATE_ ## flag)) { \ + seq_printf((m), " " #flag); \ + } \ + } while (0) + +/** + * amdgpu_bo_print_info - print BO info in debugfs file + * + * @id: Index or Id of the BO + * @bo: Requested BO for printing info + * @m: debugfs file + * + * Print BO information in debugfs file + * + * Returns: + * Size of the BO in bytes. + */ +u64 amdgpu_bo_print_info(int id, struct amdgpu_bo *bo, struct seq_file *m) +{ + struct dma_buf_attachment *attachment; + struct dma_buf *dma_buf; + unsigned int domain; + const char *placement; + unsigned int pin_count; + u64 size; + + domain = amdgpu_mem_type_to_domain(bo->tbo.mem.mem_type); + switch (domain) { + case AMDGPU_GEM_DOMAIN_VRAM: + placement = "VRAM"; + break; + case AMDGPU_GEM_DOMAIN_GTT: + placement = " GTT"; + break; + case AMDGPU_GEM_DOMAIN_CPU: + default: + placement = " CPU"; + break; + } + + size = amdgpu_bo_size(bo); + seq_printf(m, "\t\t0x%08x: %12lld byte %s", + id, size, placement); + + pin_count = READ_ONCE(bo->tbo.pin_count); + if (pin_count) + seq_printf(m, " pin count %d", pin_count); + + dma_buf = READ_ONCE(bo->tbo.base.dma_buf); + attachment = READ_ONCE(bo->tbo.base.import_attach); + + if (attachment) + seq_printf(m, " imported from %p", dma_buf); + else if (dma_buf) + seq_printf(m, " exported as %p", dma_buf); + + amdgpu_bo_print_flag(m, bo, CPU_ACCESS_REQUIRED); + amdgpu_bo_print_flag(m, bo, NO_CPU_ACCESS); + amdgpu_bo_print_flag(m, bo, CPU_GTT_USWC); + amdgpu_bo_print_flag(m, bo, VRAM_CLEARED); + amdgpu_bo_print_flag(m, bo, SHADOW); + amdgpu_bo_print_flag(m, bo, VRAM_CONTIGUOUS); + amdgpu_bo_print_flag(m, bo, VM_ALWAYS_VALID); + amdgpu_bo_print_flag(m, bo, EXPLICIT_SYNC); + + seq_puts(m, "\n"); + + return size; +} +#endif diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h index 5ddb6cf96030..79120ec41396 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h @@ -89,7 +89,6 @@ struct amdgpu_bo { struct ttm_buffer_object tbo; struct ttm_bo_kmap_obj kmap; u64 flags; - unsigned pin_count; u64 tiling_flags; u64 metadata_flags; void *metadata; @@ -101,7 +100,6 @@ struct amdgpu_bo { struct amdgpu_bo *parent; struct amdgpu_bo *shadow; - struct ttm_bo_kmap_obj dma_buf_vmap; struct amdgpu_mn *mn; @@ -267,10 +265,9 @@ void amdgpu_bo_unref(struct amdgpu_bo **bo); int amdgpu_bo_pin(struct amdgpu_bo *bo, u32 domain); int amdgpu_bo_pin_restricted(struct amdgpu_bo *bo, u32 domain, u64 min_offset, u64 max_offset); -int amdgpu_bo_unpin(struct amdgpu_bo *bo); +void amdgpu_bo_unpin(struct amdgpu_bo *bo); int amdgpu_bo_evict_vram(struct amdgpu_device *adev); int amdgpu_bo_init(struct amdgpu_device *adev); -int amdgpu_bo_late_init(struct amdgpu_device *adev); void amdgpu_bo_fini(struct amdgpu_device *adev); int amdgpu_bo_fbdev_mmap(struct amdgpu_bo *bo, struct vm_area_struct *vma); @@ -285,7 +282,7 @@ void amdgpu_bo_move_notify(struct ttm_buffer_object *bo, bool evict, struct ttm_resource *new_mem); void amdgpu_bo_release_notify(struct ttm_buffer_object *bo); -int amdgpu_bo_fault_reserve_notify(struct ttm_buffer_object *bo); +vm_fault_t amdgpu_bo_fault_reserve_notify(struct ttm_buffer_object *bo); void amdgpu_bo_fence(struct amdgpu_bo *bo, struct dma_fence *fence, bool shared); int amdgpu_bo_sync_wait_resv(struct amdgpu_device *adev, struct dma_resv *resv, @@ -330,6 +327,7 @@ void amdgpu_sa_bo_free(struct amdgpu_device *adev, #if defined(CONFIG_DEBUG_FS) void amdgpu_sa_bo_dump_debug_info(struct amdgpu_sa_manager *sa_manager, struct seq_file *m); +u64 amdgpu_bo_print_info(int id, struct amdgpu_bo *bo, struct seq_file *m); #endif int amdgpu_debugfs_sa_init(struct amdgpu_device *adev); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_pll.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_pll.c index 1f2305b7bd13..f2e20666c9c1 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_pll.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_pll.c @@ -102,11 +102,12 @@ static void amdgpu_pll_get_fb_ref_div(unsigned nom, unsigned den, unsigned post_ * amdgpu_pll_compute - compute PLL paramaters * * @pll: information about the PLL + * @freq: requested frequency * @dot_clock_p: resulting pixel clock - * fb_div_p: resulting feedback divider - * frac_fb_div_p: fractional part of the feedback divider - * ref_div_p: resulting reference divider - * post_div_p: resulting reference divider + * @fb_div_p: resulting feedback divider + * @frac_fb_div_p: fractional part of the feedback divider + * @ref_div_p: resulting reference divider + * @post_div_p: resulting reference divider * * Try to calculate the PLL parameters to generate the given frequency: * dot_clock = (ref_freq * feedback_div) / (ref_div * post_div) @@ -308,7 +309,6 @@ int amdgpu_pll_get_shared_dp_ppll(struct drm_crtc *crtc) * amdgpu_pll_get_shared_nondp_ppll - return the PPLL used by another non-DP crtc * * @crtc: drm crtc - * @encoder: drm encoder * * Returns the PPLL (Pixel PLL) used by another non-DP crtc/encoder which can * be shared (i.e., same clock). diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_pmu.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_pmu.c index 69af462db34d..19c0a3655228 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_pmu.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_pmu.c @@ -19,17 +19,29 @@ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR * OTHER DEALINGS IN THE SOFTWARE. * - * Author: Jonathan Kim <jonathan.kim@amd.com> - * */ #include <linux/perf_event.h> #include <linux/init.h> #include "amdgpu.h" #include "amdgpu_pmu.h" -#include "df_v3_6.h" #define PMU_NAME_SIZE 32 +#define NUM_FORMATS_AMDGPU_PMU 4 +#define NUM_FORMATS_DF_VEGA20 3 +#define NUM_EVENTS_DF_VEGA20 8 +#define NUM_EVENT_TYPES_VEGA20 1 +#define NUM_EVENTS_VEGA20_XGMI 2 +#define NUM_EVENTS_VEGA20_MAX NUM_EVENTS_VEGA20_XGMI +#define NUM_EVENT_TYPES_ARCTURUS 1 +#define NUM_EVENTS_ARCTURUS_XGMI 6 +#define NUM_EVENTS_ARCTURUS_MAX NUM_EVENTS_ARCTURUS_XGMI + +struct amdgpu_pmu_event_attribute { + struct device_attribute attr; + const char *event_str; + unsigned int type; +}; /* record to keep track of pmu entry per pmu type per device */ struct amdgpu_pmu_entry { @@ -37,11 +49,162 @@ struct amdgpu_pmu_entry { struct amdgpu_device *adev; struct pmu pmu; unsigned int pmu_perf_type; + char *pmu_type_name; + char *pmu_file_prefix; + struct attribute_group fmt_attr_group; + struct amdgpu_pmu_event_attribute *fmt_attr; + struct attribute_group evt_attr_group; + struct amdgpu_pmu_event_attribute *evt_attr; }; +static ssize_t amdgpu_pmu_event_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct amdgpu_pmu_event_attribute *amdgpu_pmu_attr; + + amdgpu_pmu_attr = container_of(attr, struct amdgpu_pmu_event_attribute, + attr); + + if (!amdgpu_pmu_attr->type) + return sprintf(buf, "%s\n", amdgpu_pmu_attr->event_str); + + return sprintf(buf, "%s,type=0x%x\n", + amdgpu_pmu_attr->event_str, amdgpu_pmu_attr->type); +} + static LIST_HEAD(amdgpu_pmu_list); +struct amdgpu_pmu_attr { + const char *name; + const char *config; +}; + +struct amdgpu_pmu_type { + const unsigned int type; + const unsigned int num_of_type; +}; + +struct amdgpu_pmu_config { + struct amdgpu_pmu_attr *formats; + unsigned int num_formats; + struct amdgpu_pmu_attr *events; + unsigned int num_events; + struct amdgpu_pmu_type *types; + unsigned int num_types; +}; + +/* + * Events fall under two categories: + * - PMU typed + * Events in /sys/bus/event_source/devices/amdgpu_<pmu_type>_<dev_num> have + * performance counter operations handled by one IP <pmu_type>. Formats and + * events should be defined by <pmu_type>_<asic_type>_formats and + * <pmu_type>_<asic_type>_events respectively. + * + * - Event config typed + * Events in /sys/bus/event_source/devices/amdgpu_<dev_num> have performance + * counter operations that can be handled by multiple IPs dictated by their + * "type" format field. Formats and events should be defined by + * amdgpu_pmu_formats and <asic_type>_events respectively. Format field + * "type" is generated in amdgpu_pmu_event_show and defined in + * <asic_type>_event_config_types. + */ + +static struct amdgpu_pmu_attr amdgpu_pmu_formats[NUM_FORMATS_AMDGPU_PMU] = { + { .name = "event", .config = "config:0-7" }, + { .name = "instance", .config = "config:8-15" }, + { .name = "umask", .config = "config:16-23"}, + { .name = "type", .config = "config:56-63"} +}; + +/* Vega20 events */ +static struct amdgpu_pmu_attr vega20_events[NUM_EVENTS_VEGA20_MAX] = { + { .name = "xgmi_link0_data_outbound", + .config = "event=0x7,instance=0x46,umask=0x2" }, + { .name = "xgmi_link1_data_outbound", + .config = "event=0x7,instance=0x47,umask=0x2" } +}; + +static struct amdgpu_pmu_type vega20_types[NUM_EVENT_TYPES_VEGA20] = { + { .type = AMDGPU_PMU_EVENT_CONFIG_TYPE_XGMI, + .num_of_type = NUM_EVENTS_VEGA20_XGMI } +}; + +static struct amdgpu_pmu_config vega20_config = { + .formats = amdgpu_pmu_formats, + .num_formats = ARRAY_SIZE(amdgpu_pmu_formats), + .events = vega20_events, + .num_events = ARRAY_SIZE(vega20_events), + .types = vega20_types, + .num_types = ARRAY_SIZE(vega20_types) +}; + +/* Vega20 data fabric (DF) events */ +static struct amdgpu_pmu_attr df_vega20_formats[NUM_FORMATS_DF_VEGA20] = { + { .name = "event", .config = "config:0-7" }, + { .name = "instance", .config = "config:8-15" }, + { .name = "umask", .config = "config:16-23"} +}; + +static struct amdgpu_pmu_attr df_vega20_events[NUM_EVENTS_DF_VEGA20] = { + { .name = "cake0_pcsout_txdata", + .config = "event=0x7,instance=0x46,umask=0x2" }, + { .name = "cake1_pcsout_txdata", + .config = "event=0x7,instance=0x47,umask=0x2" }, + { .name = "cake0_pcsout_txmeta", + .config = "event=0x7,instance=0x46,umask=0x4" }, + { .name = "cake1_pcsout_txmeta", + .config = "event=0x7,instance=0x47,umask=0x4" }, + { .name = "cake0_ftiinstat_reqalloc", + .config = "event=0xb,instance=0x46,umask=0x4" }, + { .name = "cake1_ftiinstat_reqalloc", + .config = "event=0xb,instance=0x47,umask=0x4" }, + { .name = "cake0_ftiinstat_rspalloc", + .config = "event=0xb,instance=0x46,umask=0x8" }, + { .name = "cake1_ftiinstat_rspalloc", + .config = "event=0xb,instance=0x47,umask=0x8" } +}; + +static struct amdgpu_pmu_config df_vega20_config = { + .formats = df_vega20_formats, + .num_formats = ARRAY_SIZE(df_vega20_formats), + .events = df_vega20_events, + .num_events = ARRAY_SIZE(df_vega20_events), + .types = NULL, + .num_types = 0 +}; + +/* Arcturus events */ +static struct amdgpu_pmu_attr arcturus_events[NUM_EVENTS_ARCTURUS_MAX] = { + { .name = "xgmi_link0_data_outbound", + .config = "event=0x7,instance=0x4b,umask=0x2" }, + { .name = "xgmi_link1_data_outbound", + .config = "event=0x7,instance=0x4c,umask=0x2" }, + { .name = "xgmi_link2_data_outbound", + .config = "event=0x7,instance=0x4d,umask=0x2" }, + { .name = "xgmi_link3_data_outbound", + .config = "event=0x7,instance=0x4e,umask=0x2" }, + { .name = "xgmi_link4_data_outbound", + .config = "event=0x7,instance=0x4f,umask=0x2" }, + { .name = "xgmi_link5_data_outbound", + .config = "event=0x7,instance=0x50,umask=0x2" } +}; + +static struct amdgpu_pmu_type arcturus_types[NUM_EVENT_TYPES_ARCTURUS] = { + { .type = AMDGPU_PMU_EVENT_CONFIG_TYPE_XGMI, + .num_of_type = NUM_EVENTS_ARCTURUS_XGMI } +}; + +static struct amdgpu_pmu_config arcturus_config = { + .formats = amdgpu_pmu_formats, + .num_formats = ARRAY_SIZE(amdgpu_pmu_formats), + .events = arcturus_events, + .num_events = ARRAY_SIZE(arcturus_events), + .types = arcturus_types, + .num_types = ARRAY_SIZE(arcturus_types) +}; + /* initialize perf counter */ static int amdgpu_perf_event_init(struct perf_event *event) { @@ -53,6 +216,7 @@ static int amdgpu_perf_event_init(struct perf_event *event) /* update the hw_perf_event struct with config data */ hwc->config = event->attr.config; + hwc->config_base = AMDGPU_PMU_PERF_TYPE_NONE; return 0; } @@ -64,6 +228,7 @@ static void amdgpu_perf_start(struct perf_event *event, int flags) struct amdgpu_pmu_entry *pe = container_of(event->pmu, struct amdgpu_pmu_entry, pmu); + int target_cntr = 0; if (WARN_ON_ONCE(!(hwc->state & PERF_HES_STOPPED))) return; @@ -71,19 +236,27 @@ static void amdgpu_perf_start(struct perf_event *event, int flags) WARN_ON_ONCE(!(hwc->state & PERF_HES_UPTODATE)); hwc->state = 0; - switch (pe->pmu_perf_type) { - case PERF_TYPE_AMDGPU_DF: - if (!(flags & PERF_EF_RELOAD)) - pe->adev->df.funcs->pmc_start(pe->adev, hwc->config, 1); + switch (hwc->config_base) { + case AMDGPU_PMU_EVENT_CONFIG_TYPE_DF: + case AMDGPU_PMU_EVENT_CONFIG_TYPE_XGMI: + if (!(flags & PERF_EF_RELOAD)) { + target_cntr = pe->adev->df.funcs->pmc_start(pe->adev, + hwc->config, 0 /* unused */, + 1 /* add counter */); + if (target_cntr < 0) + break; + + hwc->idx = target_cntr; + } - pe->adev->df.funcs->pmc_start(pe->adev, hwc->config, 0); + pe->adev->df.funcs->pmc_start(pe->adev, hwc->config, + hwc->idx, 0); break; default: break; } perf_event_update_userpage(event); - } /* read perf counter */ @@ -93,16 +266,16 @@ static void amdgpu_perf_read(struct perf_event *event) struct amdgpu_pmu_entry *pe = container_of(event->pmu, struct amdgpu_pmu_entry, pmu); - u64 count, prev; do { prev = local64_read(&hwc->prev_count); - switch (pe->pmu_perf_type) { - case PERF_TYPE_AMDGPU_DF: - pe->adev->df.funcs->pmc_get_count(pe->adev, hwc->config, - &count); + switch (hwc->config_base) { + case AMDGPU_PMU_EVENT_CONFIG_TYPE_DF: + case AMDGPU_PMU_EVENT_CONFIG_TYPE_XGMI: + pe->adev->df.funcs->pmc_get_count(pe->adev, + hwc->config, hwc->idx, &count); break; default: count = 0; @@ -124,9 +297,11 @@ static void amdgpu_perf_stop(struct perf_event *event, int flags) if (hwc->state & PERF_HES_UPTODATE) return; - switch (pe->pmu_perf_type) { - case PERF_TYPE_AMDGPU_DF: - pe->adev->df.funcs->pmc_stop(pe->adev, hwc->config, 0); + switch (hwc->config_base) { + case AMDGPU_PMU_EVENT_CONFIG_TYPE_DF: + case AMDGPU_PMU_EVENT_CONFIG_TYPE_XGMI: + pe->adev->df.funcs->pmc_stop(pe->adev, hwc->config, hwc->idx, + 0); break; default: break; @@ -142,22 +317,39 @@ static void amdgpu_perf_stop(struct perf_event *event, int flags) hwc->state |= PERF_HES_UPTODATE; } -/* add perf counter */ +/* add perf counter */ static int amdgpu_perf_add(struct perf_event *event, int flags) { struct hw_perf_event *hwc = &event->hw; - int retval; - + int retval = 0, target_cntr; struct amdgpu_pmu_entry *pe = container_of(event->pmu, struct amdgpu_pmu_entry, pmu); + switch (pe->pmu_perf_type) { + case AMDGPU_PMU_PERF_TYPE_DF: + hwc->config_base = AMDGPU_PMU_EVENT_CONFIG_TYPE_DF; + break; + case AMDGPU_PMU_PERF_TYPE_ALL: + hwc->config_base = (hwc->config >> + AMDGPU_PMU_EVENT_CONFIG_TYPE_SHIFT) & + AMDGPU_PMU_EVENT_CONFIG_TYPE_MASK; + break; + } + event->hw.state = PERF_HES_UPTODATE | PERF_HES_STOPPED; - switch (pe->pmu_perf_type) { - case PERF_TYPE_AMDGPU_DF: - retval = pe->adev->df.funcs->pmc_start(pe->adev, - hwc->config, 1); + switch (hwc->config_base) { + case AMDGPU_PMU_EVENT_CONFIG_TYPE_DF: + case AMDGPU_PMU_EVENT_CONFIG_TYPE_XGMI: + target_cntr = pe->adev->df.funcs->pmc_start(pe->adev, + hwc->config, 0 /* unused */, + 1 /* add counter */); + if (target_cntr < 0) + retval = target_cntr; + else + hwc->idx = target_cntr; + break; default: return 0; @@ -170,7 +362,6 @@ static int amdgpu_perf_add(struct perf_event *event, int flags) amdgpu_perf_start(event, PERF_EF_RELOAD); return retval; - } /* delete perf counter */ @@ -183,9 +374,11 @@ static void amdgpu_perf_del(struct perf_event *event, int flags) amdgpu_perf_stop(event, PERF_EF_UPDATE); - switch (pe->pmu_perf_type) { - case PERF_TYPE_AMDGPU_DF: - pe->adev->df.funcs->pmc_stop(pe->adev, hwc->config, 1); + switch (hwc->config_base) { + case AMDGPU_PMU_EVENT_CONFIG_TYPE_DF: + case AMDGPU_PMU_EVENT_CONFIG_TYPE_XGMI: + pe->adev->df.funcs->pmc_stop(pe->adev, hwc->config, hwc->idx, + 1); break; default: break; @@ -194,25 +387,92 @@ static void amdgpu_perf_del(struct perf_event *event, int flags) perf_event_update_userpage(event); } -/* vega20 pmus */ +static void amdgpu_pmu_create_event_attrs_by_type( + struct attribute_group *attr_group, + struct amdgpu_pmu_event_attribute *pmu_attr, + struct amdgpu_pmu_attr events[], + int s_offset, + int e_offset, + unsigned int type) +{ + int i; + + pmu_attr += s_offset; + + for (i = s_offset; i < e_offset; i++) { + attr_group->attrs[i] = &pmu_attr->attr.attr; + sysfs_attr_init(&pmu_attr->attr.attr); + pmu_attr->attr.attr.name = events[i].name; + pmu_attr->attr.attr.mode = 0444; + pmu_attr->attr.show = amdgpu_pmu_event_show; + pmu_attr->event_str = events[i].config; + pmu_attr->type = type; + pmu_attr++; + } +} -/* init pmu tracking per pmu type */ -static int init_pmu_by_type(struct amdgpu_device *adev, - const struct attribute_group *attr_groups[], - char *pmu_type_name, char *pmu_file_prefix, - unsigned int pmu_perf_type, - unsigned int num_counters) +static void amdgpu_pmu_create_attrs(struct attribute_group *attr_group, + struct amdgpu_pmu_event_attribute *pmu_attr, + struct amdgpu_pmu_attr events[], + int num_events) { - char pmu_name[PMU_NAME_SIZE]; - struct amdgpu_pmu_entry *pmu_entry; - int ret = 0; + amdgpu_pmu_create_event_attrs_by_type(attr_group, pmu_attr, events, 0, + num_events, AMDGPU_PMU_EVENT_CONFIG_TYPE_NONE); +} - pmu_entry = kzalloc(sizeof(struct amdgpu_pmu_entry), GFP_KERNEL); - if (!pmu_entry) +static int amdgpu_pmu_alloc_pmu_attrs( + struct attribute_group *fmt_attr_group, + struct amdgpu_pmu_event_attribute **fmt_attr, + struct attribute_group *evt_attr_group, + struct amdgpu_pmu_event_attribute **evt_attr, + struct amdgpu_pmu_config *config) +{ + *fmt_attr = kcalloc(config->num_formats, sizeof(**fmt_attr), + GFP_KERNEL); + + if (!(*fmt_attr)) return -ENOMEM; - pmu_entry->adev = adev; + fmt_attr_group->attrs = kcalloc(config->num_formats + 1, + sizeof(*fmt_attr_group->attrs), GFP_KERNEL); + + if (!fmt_attr_group->attrs) + goto err_fmt_attr_grp; + + *evt_attr = kcalloc(config->num_events, sizeof(**evt_attr), GFP_KERNEL); + + if (!(*evt_attr)) + goto err_evt_attr; + + evt_attr_group->attrs = kcalloc(config->num_events + 1, + sizeof(*evt_attr_group->attrs), GFP_KERNEL); + + if (!evt_attr_group->attrs) + goto err_evt_attr_grp; + + return 0; +err_evt_attr_grp: + kfree(*evt_attr); +err_evt_attr: + kfree(fmt_attr_group->attrs); +err_fmt_attr_grp: + kfree(*fmt_attr); + return -ENOMEM; +} + +/* init pmu tracking per pmu type */ +static int init_pmu_entry_by_type_and_add(struct amdgpu_pmu_entry *pmu_entry, + struct amdgpu_pmu_config *config) +{ + const struct attribute_group *attr_groups[] = { + &pmu_entry->fmt_attr_group, + &pmu_entry->evt_attr_group, + NULL + }; + char pmu_name[PMU_NAME_SIZE]; + int ret = 0, total_num_events = 0; + pmu_entry->pmu = (struct pmu){ .event_init = amdgpu_perf_event_init, .add = amdgpu_perf_add, @@ -223,59 +483,178 @@ static int init_pmu_by_type(struct amdgpu_device *adev, .task_ctx_nr = perf_invalid_context, }; - pmu_entry->pmu.attr_groups = attr_groups; - pmu_entry->pmu_perf_type = pmu_perf_type; - snprintf(pmu_name, PMU_NAME_SIZE, "%s_%d", - pmu_file_prefix, adev_to_drm(adev)->primary->index); + ret = amdgpu_pmu_alloc_pmu_attrs(&pmu_entry->fmt_attr_group, + &pmu_entry->fmt_attr, + &pmu_entry->evt_attr_group, + &pmu_entry->evt_attr, + config); + + if (ret) + goto err_out; + + amdgpu_pmu_create_attrs(&pmu_entry->fmt_attr_group, pmu_entry->fmt_attr, + config->formats, config->num_formats); + + if (pmu_entry->pmu_perf_type == AMDGPU_PMU_PERF_TYPE_ALL) { + int i; + + for (i = 0; i < config->num_types; i++) { + amdgpu_pmu_create_event_attrs_by_type( + &pmu_entry->evt_attr_group, + pmu_entry->evt_attr, + config->events, + total_num_events, + total_num_events + + config->types[i].num_of_type, + config->types[i].type); + total_num_events += config->types[i].num_of_type; + } + } else { + amdgpu_pmu_create_attrs(&pmu_entry->evt_attr_group, + pmu_entry->evt_attr, + config->events, config->num_events); + total_num_events = config->num_events; + } + + pmu_entry->pmu.attr_groups = kmemdup(attr_groups, sizeof(attr_groups), + GFP_KERNEL); + + if (!pmu_entry->pmu.attr_groups) + goto err_attr_group; + + snprintf(pmu_name, PMU_NAME_SIZE, "%s_%d", pmu_entry->pmu_file_prefix, + adev_to_drm(pmu_entry->adev)->primary->index); ret = perf_pmu_register(&pmu_entry->pmu, pmu_name, -1); - if (ret) { - kfree(pmu_entry); - pr_warn("Error initializing AMDGPU %s PMUs.\n", pmu_type_name); - return ret; - } + if (ret) + goto err_register; + + if (pmu_entry->pmu_perf_type != AMDGPU_PMU_PERF_TYPE_ALL) + pr_info("Detected AMDGPU %s Counters. # of Counters = %d.\n", + pmu_entry->pmu_type_name, total_num_events); + else + pr_info("Detected AMDGPU %d Perf Events.\n", total_num_events); - pr_info("Detected AMDGPU %s Counters. # of Counters = %d.\n", - pmu_type_name, num_counters); list_add_tail(&pmu_entry->entry, &amdgpu_pmu_list); return 0; +err_register: + kfree(pmu_entry->pmu.attr_groups); +err_attr_group: + kfree(pmu_entry->fmt_attr_group.attrs); + kfree(pmu_entry->fmt_attr); + kfree(pmu_entry->evt_attr_group.attrs); + kfree(pmu_entry->evt_attr); +err_out: + pr_warn("Error initializing AMDGPU %s PMUs.\n", + pmu_entry->pmu_type_name); + return ret; +} + +/* destroy all pmu data associated with target device */ +void amdgpu_pmu_fini(struct amdgpu_device *adev) +{ + struct amdgpu_pmu_entry *pe, *temp; + + list_for_each_entry_safe(pe, temp, &amdgpu_pmu_list, entry) { + if (pe->adev != adev) + continue; + list_del(&pe->entry); + perf_pmu_unregister(&pe->pmu); + kfree(pe->pmu.attr_groups); + kfree(pe->fmt_attr_group.attrs); + kfree(pe->fmt_attr); + kfree(pe->evt_attr_group.attrs); + kfree(pe->evt_attr); + kfree(pe); + } +} + +static struct amdgpu_pmu_entry *create_pmu_entry(struct amdgpu_device *adev, + unsigned int pmu_type, + char *pmu_type_name, + char *pmu_file_prefix) +{ + struct amdgpu_pmu_entry *pmu_entry; + + pmu_entry = kzalloc(sizeof(struct amdgpu_pmu_entry), GFP_KERNEL); + + if (!pmu_entry) + return pmu_entry; + + pmu_entry->adev = adev; + pmu_entry->fmt_attr_group.name = "format"; + pmu_entry->fmt_attr_group.attrs = NULL; + pmu_entry->evt_attr_group.name = "events"; + pmu_entry->evt_attr_group.attrs = NULL; + pmu_entry->pmu_perf_type = pmu_type; + pmu_entry->pmu_type_name = pmu_type_name; + pmu_entry->pmu_file_prefix = pmu_file_prefix; + + return pmu_entry; } /* init amdgpu_pmu */ int amdgpu_pmu_init(struct amdgpu_device *adev) { int ret = 0; + struct amdgpu_pmu_entry *pmu_entry, *pmu_entry_df; switch (adev->asic_type) { case CHIP_VEGA20: - /* init df */ - ret = init_pmu_by_type(adev, df_v3_6_attr_groups, - "DF", "amdgpu_df", PERF_TYPE_AMDGPU_DF, - DF_V3_6_MAX_COUNTERS); + pmu_entry_df = create_pmu_entry(adev, AMDGPU_PMU_PERF_TYPE_DF, + "DF", "amdgpu_df"); - /* other pmu types go here*/ - break; - default: - return 0; - } + if (!pmu_entry_df) + return -ENOMEM; - return 0; -} + ret = init_pmu_entry_by_type_and_add(pmu_entry_df, + &df_vega20_config); + if (ret) { + kfree(pmu_entry_df); + return ret; + } -/* destroy all pmu data associated with target device */ -void amdgpu_pmu_fini(struct amdgpu_device *adev) -{ - struct amdgpu_pmu_entry *pe, *temp; + pmu_entry = create_pmu_entry(adev, AMDGPU_PMU_PERF_TYPE_ALL, + "", "amdgpu"); - list_for_each_entry_safe(pe, temp, &amdgpu_pmu_list, entry) { - if (pe->adev == adev) { - list_del(&pe->entry); - perf_pmu_unregister(&pe->pmu); - kfree(pe); + if (!pmu_entry) { + amdgpu_pmu_fini(adev); + return -ENOMEM; + } + + ret = init_pmu_entry_by_type_and_add(pmu_entry, + &vega20_config); + + if (ret) { + kfree(pmu_entry); + amdgpu_pmu_fini(adev); + return ret; + } + + break; + case CHIP_ARCTURUS: + pmu_entry = create_pmu_entry(adev, AMDGPU_PMU_PERF_TYPE_ALL, + "", "amdgpu"); + if (!pmu_entry) + return -ENOMEM; + + ret = init_pmu_entry_by_type_and_add(pmu_entry, + &arcturus_config); + + if (ret) { + kfree(pmu_entry); + return -ENOMEM; } + + break; + + default: + return 0; } + + return ret; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_pmu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_pmu.h index 7dddb7160a11..6882dc48c5d8 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_pmu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_pmu.h @@ -19,18 +19,38 @@ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR * OTHER DEALINGS IN THE SOFTWARE. * - * Author: Jonathan Kim <jonathan.kim@amd.com> - * */ #ifndef _AMDGPU_PMU_H_ #define _AMDGPU_PMU_H_ +/* PMU types. */ enum amdgpu_pmu_perf_type { - PERF_TYPE_AMDGPU_DF = 0, - PERF_TYPE_AMDGPU_MAX + AMDGPU_PMU_PERF_TYPE_NONE = 0, + AMDGPU_PMU_PERF_TYPE_DF, + AMDGPU_PMU_PERF_TYPE_ALL }; +/* + * PMU type AMDGPU_PMU_PERF_TYPE_ALL can hold events of different "type" + * configurations. Event config types are parsed from the 64-bit raw + * config (See EVENT_CONFIG_TYPE_SHIFT and EVENT_CONFIG_TYPE_MASK) and + * are registered into the HW perf events config_base. + * + * PMU types with only a single event configuration type + * (non-AMDGPU_PMU_PERF_TYPE_ALL) have their event config type auto generated + * when the performance counter is added. + */ +enum amdgpu_pmu_event_config_type { + AMDGPU_PMU_EVENT_CONFIG_TYPE_NONE = 0, + AMDGPU_PMU_EVENT_CONFIG_TYPE_DF, + AMDGPU_PMU_EVENT_CONFIG_TYPE_XGMI, + AMDGPU_PMU_EVENT_CONFIG_TYPE_MAX +}; + +#define AMDGPU_PMU_EVENT_CONFIG_TYPE_SHIFT 56 +#define AMDGPU_PMU_EVENT_CONFIG_TYPE_MASK 0xff + int amdgpu_pmu_init(struct amdgpu_device *adev); void amdgpu_pmu_fini(struct amdgpu_device *adev); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c index a6dbe4b83533..523d22db094b 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c @@ -100,6 +100,8 @@ static int psp_early_init(void *handle) case CHIP_NAVI12: case CHIP_SIENNA_CICHLID: case CHIP_NAVY_FLOUNDER: + case CHIP_VANGOGH: + case CHIP_DIMGREY_CAVEFISH: psp_v11_0_set_psp_funcs(psp); psp->autoload_supported = true; break; @@ -288,6 +290,8 @@ psp_cmd_submit_buf(struct psp_context *psp, skip_unsupport = (psp->cmd_buf_mem->resp.status == TEE_ERROR_NOT_SUPPORTED || psp->cmd_buf_mem->resp.status == PSP_ERR_UNKNOWN_COMMAND) && amdgpu_sriov_vf(psp->adev); + memcpy((void*)&cmd->resp, (void*)&psp->cmd_buf_mem->resp, sizeof(struct psp_gfx_resp)); + /* In some cases, psp response status is not 0 even there is no * problem while the command is submitted. Some version of PSP FW * doesn't write 0 to that field. @@ -308,9 +312,6 @@ psp_cmd_submit_buf(struct psp_context *psp, } } - /* get xGMI session id from response buffer */ - cmd->resp.session_id = psp->cmd_buf_mem->resp.session_id; - if (ucode) { ucode->tmr_mc_addr_lo = psp->cmd_buf_mem->resp.fw_addr_lo; ucode->tmr_mc_addr_hi = psp->cmd_buf_mem->resp.fw_addr_hi; @@ -509,6 +510,37 @@ static int psp_tmr_terminate(struct psp_context *psp) return 0; } +int psp_get_fw_attestation_records_addr(struct psp_context *psp, + uint64_t *output_ptr) +{ + int ret; + struct psp_gfx_cmd_resp *cmd; + + if (!output_ptr) + return -EINVAL; + + if (amdgpu_sriov_vf(psp->adev)) + return 0; + + cmd = kzalloc(sizeof(struct psp_gfx_cmd_resp), GFP_KERNEL); + if (!cmd) + return -ENOMEM; + + cmd->cmd_id = GFX_CMD_ID_GET_FW_ATTESTATION; + + ret = psp_cmd_submit_buf(psp, NULL, cmd, + psp->fence_buf_mc_addr); + + if (!ret) { + *output_ptr = ((uint64_t)cmd->resp.uresp.fwar_db_info.fwar_db_addr_lo) + + ((uint64_t)cmd->resp.uresp.fwar_db_info.fwar_db_addr_hi << 32); + } + + kfree(cmd); + + return ret; +} + static void psp_prep_asd_load_cmd_buf(struct psp_gfx_cmd_resp *cmd, uint64_t asd_mc, uint32_t size) { @@ -624,14 +656,14 @@ static void psp_prep_ta_load_cmd_buf(struct psp_gfx_cmd_resp *cmd, uint64_t ta_shared_mc, uint32_t ta_shared_size) { - cmd->cmd_id = GFX_CMD_ID_LOAD_TA; + cmd->cmd_id = GFX_CMD_ID_LOAD_TA; cmd->cmd.cmd_load_ta.app_phy_addr_lo = lower_32_bits(ta_bin_mc); - cmd->cmd.cmd_load_ta.app_phy_addr_hi = upper_32_bits(ta_bin_mc); - cmd->cmd.cmd_load_ta.app_len = ta_bin_size; + cmd->cmd.cmd_load_ta.app_phy_addr_hi = upper_32_bits(ta_bin_mc); + cmd->cmd.cmd_load_ta.app_len = ta_bin_size; cmd->cmd.cmd_load_ta.cmd_buf_phy_addr_lo = lower_32_bits(ta_shared_mc); cmd->cmd.cmd_load_ta.cmd_buf_phy_addr_hi = upper_32_bits(ta_shared_mc); - cmd->cmd.cmd_load_ta.cmd_buf_len = ta_shared_size; + cmd->cmd.cmd_load_ta.cmd_buf_len = ta_shared_size; } static int psp_xgmi_init_shared_buf(struct psp_context *psp) @@ -655,9 +687,9 @@ static void psp_prep_ta_invoke_cmd_buf(struct psp_gfx_cmd_resp *cmd, uint32_t ta_cmd_id, uint32_t session_id) { - cmd->cmd_id = GFX_CMD_ID_INVOKE_CMD; - cmd->cmd.cmd_invoke_cmd.session_id = session_id; - cmd->cmd.cmd_invoke_cmd.ta_cmd_id = ta_cmd_id; + cmd->cmd_id = GFX_CMD_ID_INVOKE_CMD; + cmd->cmd.cmd_invoke_cmd.session_id = session_id; + cmd->cmd.cmd_invoke_cmd.ta_cmd_id = ta_cmd_id; } static int psp_ta_invoke(struct psp_context *psp, @@ -806,7 +838,7 @@ int psp_xgmi_get_hive_id(struct psp_context *psp, uint64_t *hive_id) struct ta_xgmi_shared_memory *xgmi_cmd; int ret; - xgmi_cmd = (struct ta_xgmi_shared_memory*)psp->xgmi_context.xgmi_shared_buf; + xgmi_cmd = (struct ta_xgmi_shared_memory *)psp->xgmi_context.xgmi_shared_buf; memset(xgmi_cmd, 0, sizeof(struct ta_xgmi_shared_memory)); xgmi_cmd->cmd_id = TA_COMMAND_XGMI__GET_HIVE_ID; @@ -826,7 +858,7 @@ int psp_xgmi_get_node_id(struct psp_context *psp, uint64_t *node_id) struct ta_xgmi_shared_memory *xgmi_cmd; int ret; - xgmi_cmd = (struct ta_xgmi_shared_memory*)psp->xgmi_context.xgmi_shared_buf; + xgmi_cmd = (struct ta_xgmi_shared_memory *)psp->xgmi_context.xgmi_shared_buf; memset(xgmi_cmd, 0, sizeof(struct ta_xgmi_shared_memory)); xgmi_cmd->cmd_id = TA_COMMAND_XGMI__GET_NODE_ID; @@ -854,7 +886,7 @@ int psp_xgmi_get_topology_info(struct psp_context *psp, if (!topology || topology->num_nodes > TA_XGMI__MAX_CONNECTED_NODES) return -EINVAL; - xgmi_cmd = (struct ta_xgmi_shared_memory*)psp->xgmi_context.xgmi_shared_buf; + xgmi_cmd = (struct ta_xgmi_shared_memory *)psp->xgmi_context.xgmi_shared_buf; memset(xgmi_cmd, 0, sizeof(struct ta_xgmi_shared_memory)); /* Fill in the shared memory with topology information as input */ @@ -898,7 +930,7 @@ int psp_xgmi_set_topology_info(struct psp_context *psp, if (!topology || topology->num_nodes > TA_XGMI__MAX_CONNECTED_NODES) return -EINVAL; - xgmi_cmd = (struct ta_xgmi_shared_memory*)psp->xgmi_context.xgmi_shared_buf; + xgmi_cmd = (struct ta_xgmi_shared_memory *)psp->xgmi_context.xgmi_shared_buf; memset(xgmi_cmd, 0, sizeof(struct ta_xgmi_shared_memory)); topology_info_input = &xgmi_cmd->xgmi_in_message.get_topology_info; @@ -962,7 +994,7 @@ static int psp_ras_load(struct psp_context *psp) ret = psp_cmd_submit_buf(psp, NULL, cmd, psp->fence_buf_mc_addr); - ras_cmd = (struct ta_ras_shared_memory*)psp->ras.ras_shared_buf; + ras_cmd = (struct ta_ras_shared_memory *)psp->ras.ras_shared_buf; if (!ret) { psp->ras.session_id = cmd->resp.session_id; @@ -1884,7 +1916,7 @@ static int psp_execute_np_fw_load(struct psp_context *psp, static int psp_load_smu_fw(struct psp_context *psp) { int ret; - struct amdgpu_device* adev = psp->adev; + struct amdgpu_device *adev = psp->adev; struct amdgpu_firmware_info *ucode = &adev->firmware.ucode[AMDGPU_UCODE_ID_SMC]; struct amdgpu_ras *ras = psp->ras.ras; @@ -1893,7 +1925,8 @@ static int psp_load_smu_fw(struct psp_context *psp) return 0; - if (amdgpu_in_reset(adev) && ras && ras->supported) { + if (amdgpu_in_reset(adev) && ras && ras->supported && + adev->asic_type == CHIP_ARCTURUS) { ret = amdgpu_dpm_set_mp1_state(adev, PP_MP1_STATE_UNLOAD); if (ret) { DRM_WARN("Failed to set MP1 state prepare for reload\n"); @@ -1950,7 +1983,7 @@ static int psp_np_fw_load(struct psp_context *psp) { int i, ret; struct amdgpu_firmware_info *ucode; - struct amdgpu_device* adev = psp->adev; + struct amdgpu_device *adev = psp->adev; if (psp->autoload_supported && !psp->pmfw_centralized_cstate_management) { @@ -1974,8 +2007,8 @@ static int psp_np_fw_load(struct psp_context *psp) continue; if (psp->autoload_supported && - (adev->asic_type == CHIP_SIENNA_CICHLID || - adev->asic_type == CHIP_NAVY_FLOUNDER) && + (adev->asic_type >= CHIP_SIENNA_CICHLID && + adev->asic_type <= CHIP_DIMGREY_CAVEFISH) && (ucode->ucode_id == AMDGPU_UCODE_ID_SDMA1 || ucode->ucode_id == AMDGPU_UCODE_ID_SDMA2 || ucode->ucode_id == AMDGPU_UCODE_ID_SDMA3)) @@ -2390,7 +2423,7 @@ int psp_init_asd_microcode(struct psp_context *psp, const char *chip_name) { struct amdgpu_device *adev = psp->adev; - char fw_name[30]; + char fw_name[PSP_FW_NAME_LEN]; const struct psp_firmware_header_v1_0 *asd_hdr; int err = 0; @@ -2422,11 +2455,47 @@ out: return err; } -int psp_init_sos_microcode(struct psp_context *psp, +int psp_init_toc_microcode(struct psp_context *psp, const char *chip_name) { struct amdgpu_device *adev = psp->adev; char fw_name[30]; + const struct psp_firmware_header_v1_0 *toc_hdr; + int err = 0; + + if (!chip_name) { + dev_err(adev->dev, "invalid chip name for toc microcode\n"); + return -EINVAL; + } + + snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_toc.bin", chip_name); + err = request_firmware(&adev->psp.toc_fw, fw_name, adev->dev); + if (err) + goto out; + + err = amdgpu_ucode_validate(adev->psp.toc_fw); + if (err) + goto out; + + toc_hdr = (const struct psp_firmware_header_v1_0 *)adev->psp.toc_fw->data; + adev->psp.toc_fw_version = le32_to_cpu(toc_hdr->header.ucode_version); + adev->psp.toc_feature_version = le32_to_cpu(toc_hdr->ucode_feature_version); + adev->psp.toc_bin_size = le32_to_cpu(toc_hdr->header.ucode_size_bytes); + adev->psp.toc_start_addr = (uint8_t *)toc_hdr + + le32_to_cpu(toc_hdr->header.ucode_array_offset_bytes); + return 0; +out: + dev_err(adev->dev, "fail to request/validate toc microcode\n"); + release_firmware(adev->psp.toc_fw); + adev->psp.toc_fw = NULL; + return err; +} + +int psp_init_sos_microcode(struct psp_context *psp, + const char *chip_name) +{ + struct amdgpu_device *adev = psp->adev; + char fw_name[PSP_FW_NAME_LEN]; const struct psp_firmware_header_v1_0 *sos_hdr; const struct psp_firmware_header_v1_1 *sos_hdr_v1_1; const struct psp_firmware_header_v1_2 *sos_hdr_v1_2; @@ -2505,9 +2574,9 @@ out: return err; } -int parse_ta_bin_descriptor(struct psp_context *psp, - const struct ta_fw_bin_desc *desc, - const struct ta_firmware_header_v2_0 *ta_hdr) +static int parse_ta_bin_descriptor(struct psp_context *psp, + const struct ta_fw_bin_desc *desc, + const struct ta_firmware_header_v2_0 *ta_hdr) { uint8_t *ucode_start_addr = NULL; @@ -2520,9 +2589,9 @@ int parse_ta_bin_descriptor(struct psp_context *psp, switch (desc->fw_type) { case TA_FW_TYPE_PSP_ASD: - psp->asd_fw_version = le32_to_cpu(desc->fw_version); + psp->asd_fw_version = le32_to_cpu(desc->fw_version); psp->asd_feature_version = le32_to_cpu(desc->fw_version); - psp->asd_ucode_size = le32_to_cpu(desc->size_bytes); + psp->asd_ucode_size = le32_to_cpu(desc->size_bytes); psp->asd_start_addr = ucode_start_addr; psp->asd_fw = psp->ta_fw; break; @@ -2563,7 +2632,7 @@ int psp_init_ta_microcode(struct psp_context *psp, const char *chip_name) { struct amdgpu_device *adev = psp->adev; - char fw_name[30]; + char fw_name[PSP_FW_NAME_LEN]; const struct ta_firmware_header_v2_0 *ta_hdr; int err = 0; int ta_index = 0; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h index 919d2fb7427b..da250bc1ac57 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h @@ -41,6 +41,7 @@ #define PSP_DTM_SHARED_MEM_SIZE 0x4000 #define PSP_RAP_SHARED_MEM_SIZE 0x4000 #define PSP_SHARED_MEM_SIZE 0x4000 +#define PSP_FW_NAME_LEN 0x24 struct psp_context; struct psp_xgmi_node_info; @@ -253,6 +254,11 @@ struct psp_context uint32_t asd_ucode_size; uint8_t *asd_start_addr; + /* toc firmware */ + const struct firmware *toc_fw; + uint32_t toc_fw_version; + uint32_t toc_feature_version; + /* fence buffer */ struct amdgpu_bo *fence_buf_bo; uint64_t fence_buf_mc_addr; @@ -386,8 +392,12 @@ int psp_ring_cmd_submit(struct psp_context *psp, int index); int psp_init_asd_microcode(struct psp_context *psp, const char *chip_name); +int psp_init_toc_microcode(struct psp_context *psp, + const char *chip_name); int psp_init_sos_microcode(struct psp_context *psp, const char *chip_name); int psp_init_ta_microcode(struct psp_context *psp, const char *chip_name); +int psp_get_fw_attestation_records_addr(struct psp_context *psp, + uint64_t *output_ptr); #endif diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c index 82cd8e55595a..c136bd449744 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c @@ -80,6 +80,8 @@ enum amdgpu_ras_retire_page_reservation { atomic_t amdgpu_ras_in_intr = ATOMIC_INIT(0); +static bool amdgpu_ras_check_bad_page_unlock(struct amdgpu_ras *con, + uint64_t addr); static bool amdgpu_ras_check_bad_page(struct amdgpu_device *adev, uint64_t addr); @@ -516,9 +518,9 @@ struct ras_manager *amdgpu_ras_find_obj(struct amdgpu_device *adev, /* obj end */ static void amdgpu_ras_parse_status_code(struct amdgpu_device *adev, - const char* invoke_type, - const char* block_name, - enum ta_ras_status ret) + const char* invoke_type, + const char* block_name, + enum ta_ras_status ret) { switch (ret) { case TA_RAS_STATUS__SUCCESS: @@ -607,7 +609,7 @@ int amdgpu_ras_feature_enable(struct amdgpu_device *adev, if (!con) return -EINVAL; - info = kzalloc(sizeof(union ta_ras_cmd_input), GFP_KERNEL); + info = kzalloc(sizeof(union ta_ras_cmd_input), GFP_KERNEL); if (!info) return -ENOMEM; @@ -903,13 +905,6 @@ int amdgpu_ras_error_inject(struct amdgpu_device *adev, return ret; } -int amdgpu_ras_error_cure(struct amdgpu_device *adev, - struct ras_cure_if *info) -{ - /* psp fw has no cure interface for now. */ - return 0; -} - /* get the total error counts on all IPs */ unsigned long amdgpu_ras_query_error_count(struct amdgpu_device *adev, bool is_ce) @@ -953,7 +948,7 @@ static char *amdgpu_ras_badpage_flags_str(unsigned int flags) case AMDGPU_RAS_RETIRE_PAGE_FAULT: default: return "F"; - }; + } } /** @@ -1474,8 +1469,8 @@ static void amdgpu_ras_log_on_err_counter(struct amdgpu_device *adev) } /* Parse RdRspStatus and WrRspStatus */ -void amdgpu_ras_error_status_query(struct amdgpu_device *adev, - struct ras_query_if *info) +static void amdgpu_ras_error_status_query(struct amdgpu_device *adev, + struct ras_query_if *info) { /* * Only two block need to query read/write @@ -1548,10 +1543,12 @@ static int amdgpu_ras_badpages_read(struct amdgpu_device *adev, .size = AMDGPU_GPU_PAGE_SIZE, .flags = AMDGPU_RAS_RETIRE_PAGE_RESERVED, }; - - if (data->last_reserved <= i) + ret = amdgpu_vram_mgr_query_page_status( + ttm_manager_type(&adev->mman.bdev, TTM_PL_VRAM), + data->bps[i].retired_page); + if (ret == -EBUSY) (*bps)[i].flags = AMDGPU_RAS_RETIRE_PAGE_PENDING; - else if (data->bps_bo[i] == NULL) + else if (ret == -ENOENT) (*bps)[i].flags = AMDGPU_RAS_RETIRE_PAGE_FAULT; } @@ -1603,12 +1600,9 @@ static int amdgpu_ras_realloc_eh_data_space(struct amdgpu_device *adev, unsigned int new_space = old_space + pages; unsigned int align_space = ALIGN(new_space, 512); void *bps = kmalloc(align_space * sizeof(*data->bps), GFP_KERNEL); - struct amdgpu_bo **bps_bo = - kmalloc(align_space * sizeof(*data->bps_bo), GFP_KERNEL); - if (!bps || !bps_bo) { + if (!bps) { kfree(bps); - kfree(bps_bo); return -ENOMEM; } @@ -1617,14 +1611,8 @@ static int amdgpu_ras_realloc_eh_data_space(struct amdgpu_device *adev, data->count * sizeof(*data->bps)); kfree(data->bps); } - if (data->bps_bo) { - memcpy(bps_bo, data->bps_bo, - data->count * sizeof(*data->bps_bo)); - kfree(data->bps_bo); - } data->bps = bps; - data->bps_bo = bps_bo; data->space_left += align_space - old_space; return 0; } @@ -1636,6 +1624,7 @@ int amdgpu_ras_add_bad_pages(struct amdgpu_device *adev, struct amdgpu_ras *con = amdgpu_ras_get_context(adev); struct ras_err_handler_data *data; int ret = 0; + uint32_t i; if (!con || !con->eh_data || !bps || pages <= 0) return 0; @@ -1645,16 +1634,26 @@ int amdgpu_ras_add_bad_pages(struct amdgpu_device *adev, if (!data) goto out; - if (data->space_left <= pages) - if (amdgpu_ras_realloc_eh_data_space(adev, data, pages)) { + for (i = 0; i < pages; i++) { + if (amdgpu_ras_check_bad_page_unlock(con, + bps[i].retired_page << AMDGPU_GPU_PAGE_SHIFT)) + continue; + + if (!data->space_left && + amdgpu_ras_realloc_eh_data_space(adev, data, 256)) { ret = -ENOMEM; goto out; } - memcpy(&data->bps[data->count], bps, pages * sizeof(*data->bps)); - data->count += pages; - data->space_left -= pages; + amdgpu_vram_mgr_reserve_range( + ttm_manager_type(&adev->mman.bdev, TTM_PL_VRAM), + bps[i].retired_page << AMDGPU_GPU_PAGE_SHIFT, + AMDGPU_GPU_PAGE_SIZE); + memcpy(&data->bps[data->count], &bps[i], sizeof(*data->bps)); + data->count++; + data->space_left--; + } out: mutex_unlock(&con->recovery_lock); @@ -1665,7 +1664,7 @@ out: * write error record array to eeprom, the function should be * protected by recovery_lock */ -static int amdgpu_ras_save_bad_pages(struct amdgpu_device *adev) +int amdgpu_ras_save_bad_pages(struct amdgpu_device *adev) { struct amdgpu_ras *con = amdgpu_ras_get_context(adev); struct ras_err_handler_data *data; @@ -1727,6 +1726,20 @@ out: return ret; } +static bool amdgpu_ras_check_bad_page_unlock(struct amdgpu_ras *con, + uint64_t addr) +{ + struct ras_err_handler_data *data = con->eh_data; + int i; + + addr >>= AMDGPU_GPU_PAGE_SHIFT; + for (i = 0; i < data->count; i++) + if (addr == data->bps[i].retired_page) + return true; + + return false; +} + /* * check if an address belongs to bad page * @@ -1736,26 +1749,13 @@ static bool amdgpu_ras_check_bad_page(struct amdgpu_device *adev, uint64_t addr) { struct amdgpu_ras *con = amdgpu_ras_get_context(adev); - struct ras_err_handler_data *data; - int i; bool ret = false; if (!con || !con->eh_data) return ret; mutex_lock(&con->recovery_lock); - data = con->eh_data; - if (!data) - goto out; - - addr >>= AMDGPU_GPU_PAGE_SHIFT; - for (i = 0; i < data->count; i++) - if (addr == data->bps[i].retired_page) { - ret = true; - goto out; - } - -out: + ret = amdgpu_ras_check_bad_page_unlock(con, addr); mutex_unlock(&con->recovery_lock); return ret; } @@ -1801,80 +1801,6 @@ static void amdgpu_ras_validate_threshold(struct amdgpu_device *adev, } } -/* called in gpu recovery/init */ -int amdgpu_ras_reserve_bad_pages(struct amdgpu_device *adev) -{ - struct amdgpu_ras *con = amdgpu_ras_get_context(adev); - struct ras_err_handler_data *data; - uint64_t bp; - struct amdgpu_bo *bo = NULL; - int i, ret = 0; - - /* Not reserve bad page when amdgpu_bad_page_threshold == 0. */ - if (!con || !con->eh_data || (amdgpu_bad_page_threshold == 0)) - return 0; - - mutex_lock(&con->recovery_lock); - data = con->eh_data; - if (!data) - goto out; - /* reserve vram at driver post stage. */ - for (i = data->last_reserved; i < data->count; i++) { - bp = data->bps[i].retired_page; - - /* There are two cases of reserve error should be ignored: - * 1) a ras bad page has been allocated (used by someone); - * 2) a ras bad page has been reserved (duplicate error injection - * for one page); - */ - if (amdgpu_bo_create_kernel_at(adev, bp << AMDGPU_GPU_PAGE_SHIFT, - AMDGPU_GPU_PAGE_SIZE, - AMDGPU_GEM_DOMAIN_VRAM, - &bo, NULL)) - dev_warn(adev->dev, "RAS WARN: reserve vram for " - "retired page %llx fail\n", bp); - - data->bps_bo[i] = bo; - data->last_reserved = i + 1; - bo = NULL; - } - - /* continue to save bad pages to eeprom even reesrve_vram fails */ - ret = amdgpu_ras_save_bad_pages(adev); -out: - mutex_unlock(&con->recovery_lock); - return ret; -} - -/* called when driver unload */ -static int amdgpu_ras_release_bad_pages(struct amdgpu_device *adev) -{ - struct amdgpu_ras *con = amdgpu_ras_get_context(adev); - struct ras_err_handler_data *data; - struct amdgpu_bo *bo; - int i; - - if (!con || !con->eh_data) - return 0; - - mutex_lock(&con->recovery_lock); - data = con->eh_data; - if (!data) - goto out; - - for (i = data->last_reserved - 1; i >= 0; i--) { - bo = data->bps_bo[i]; - - amdgpu_bo_free_kernel(&bo, NULL, NULL); - - data->bps_bo[i] = bo; - data->last_reserved = i; - } -out: - mutex_unlock(&con->recovery_lock); - return 0; -} - int amdgpu_ras_recovery_init(struct amdgpu_device *adev) { struct amdgpu_ras *con = amdgpu_ras_get_context(adev); @@ -1914,18 +1840,12 @@ int amdgpu_ras_recovery_init(struct amdgpu_device *adev) ret = amdgpu_ras_load_bad_pages(adev); if (ret) goto free; - ret = amdgpu_ras_reserve_bad_pages(adev); - if (ret) - goto release; } return 0; -release: - amdgpu_ras_release_bad_pages(adev); free: kfree((*data)->bps); - kfree((*data)->bps_bo); kfree(*data); con->eh_data = NULL; out: @@ -1953,12 +1873,10 @@ static int amdgpu_ras_recovery_fini(struct amdgpu_device *adev) return 0; cancel_work_sync(&con->recovery_work); - amdgpu_ras_release_bad_pages(adev); mutex_lock(&con->recovery_lock); con->eh_data = NULL; kfree(data->bps); - kfree(data->bps_bo); kfree(data); mutex_unlock(&con->recovery_lock); @@ -2153,7 +2071,7 @@ void amdgpu_ras_late_fini(struct amdgpu_device *adev, amdgpu_ras_sysfs_remove(adev, ras_block); if (ih_info->cb) - amdgpu_ras_interrupt_remove_handler(adev, ih_info); + amdgpu_ras_interrupt_remove_handler(adev, ih_info); amdgpu_ras_feature_enable(adev, ras_block, 0); } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h index ec398ed7deb8..762f5e46c007 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h @@ -33,7 +33,6 @@ #define AMDGPU_RAS_FLAG_INIT_BY_VBIOS (0x1 << 0) #define AMDGPU_RAS_FLAG_INIT_NEED_RESET (0x1 << 1) -#define AMDGPU_RAS_FLAG_SKIP_BAD_PAGE_RESV (0x1 << 2) enum amdgpu_ras_block { AMDGPU_RAS_BLOCK__UMC = 0, @@ -363,14 +362,10 @@ struct ras_err_data { struct ras_err_handler_data { /* point to bad page records array */ struct eeprom_table_record *bps; - /* point to reserved bo array */ - struct amdgpu_bo **bps_bo; /* the count of entries */ int count; /* the space can place new entries */ int space_left; - /* last reserved entry's index + 1 */ - int last_reserved; }; typedef int (*ras_ih_cb)(struct amdgpu_device *adev, @@ -506,22 +501,12 @@ bool amdgpu_ras_check_err_threshold(struct amdgpu_device *adev); int amdgpu_ras_add_bad_pages(struct amdgpu_device *adev, struct eeprom_table_record *bps, int pages); -int amdgpu_ras_reserve_bad_pages(struct amdgpu_device *adev); +int amdgpu_ras_save_bad_pages(struct amdgpu_device *adev); static inline int amdgpu_ras_reset_gpu(struct amdgpu_device *adev) { struct amdgpu_ras *ras = amdgpu_ras_get_context(adev); - /* - * Save bad page to eeprom before gpu reset, i2c may be unstable - * in gpu reset. - * - * Also, exclude the case when ras recovery issuer is - * eeprom page write itself. - */ - if (!(ras->flags & AMDGPU_RAS_FLAG_SKIP_BAD_PAGE_RESV) && in_task()) - amdgpu_ras_reserve_bad_pages(adev); - if (atomic_cmpxchg(&ras->in_recovery, 0, 1) == 0) schedule_work(&ras->recovery_work); return 0; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c index 0e64c39a2372..1dd040166c63 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c @@ -27,9 +27,9 @@ #include <linux/bits.h> #include "atom.h" -#define EEPROM_I2C_TARGET_ADDR_VEGA20 0xA0 -#define EEPROM_I2C_TARGET_ADDR_ARCTURUS 0xA8 -#define EEPROM_I2C_TARGET_ADDR_ARCTURUS_D342 0xA0 +#define EEPROM_I2C_TARGET_ADDR_VEGA20 0xA0 +#define EEPROM_I2C_TARGET_ADDR_ARCTURUS 0xA8 +#define EEPROM_I2C_TARGET_ADDR_ARCTURUS_D342 0xA0 /* * The 2 macros bellow represent the actual size in bytes that @@ -124,11 +124,11 @@ static void __decode_table_header_from_buff(struct amdgpu_ras_eeprom_table_heade { uint32_t *pp = (uint32_t *)buff; - hdr->header = le32_to_cpu(pp[0]); - hdr->version = le32_to_cpu(pp[1]); + hdr->header = le32_to_cpu(pp[0]); + hdr->version = le32_to_cpu(pp[1]); hdr->first_rec_offset = le32_to_cpu(pp[2]); - hdr->tbl_size = le32_to_cpu(pp[3]); - hdr->checksum = le32_to_cpu(pp[4]); + hdr->tbl_size = le32_to_cpu(pp[3]); + hdr->checksum = le32_to_cpu(pp[4]); } static int __update_table_header(struct amdgpu_ras_eeprom_control *control, @@ -149,7 +149,11 @@ static int __update_table_header(struct amdgpu_ras_eeprom_control *control, msg.addr = control->i2c_address; + /* i2c may be unstable in gpu reset */ + down_read(&adev->reset_sem); ret = i2c_transfer(&adev->pm.smu_i2c, &msg, 1); + up_read(&adev->reset_sem); + if (ret < 1) DRM_ERROR("Failed to write EEPROM table header, ret:%d", ret); @@ -475,7 +479,6 @@ int amdgpu_ras_eeprom_process_recods(struct amdgpu_ras_eeprom_control *control, int i, ret = 0; struct i2c_msg *msgs, *msg; unsigned char *buffs, *buff; - bool sched_ras_recovery = false; struct eeprom_table_record *record; struct amdgpu_device *adev = to_amdgpu_device(control); struct amdgpu_ras *ras = amdgpu_ras_get_context(adev); @@ -513,7 +516,6 @@ int amdgpu_ras_eeprom_process_recods(struct amdgpu_ras_eeprom_control *control, "Saved bad pages(%d) reaches threshold value(%d).\n", control->num_recs + num, ras->bad_page_cnt_threshold); control->tbl_hdr.header = EEPROM_TABLE_HDR_BAD; - sched_ras_recovery = true; } /* In case of overflow just start from beginning to not lose newest records */ @@ -557,7 +559,11 @@ int amdgpu_ras_eeprom_process_recods(struct amdgpu_ras_eeprom_control *control, control->next_addr += EEPROM_TABLE_RECORD_SIZE; } + /* i2c may be unstable in gpu reset */ + down_read(&adev->reset_sem); ret = i2c_transfer(&adev->pm.smu_i2c, msgs, num); + up_read(&adev->reset_sem); + if (ret < 1) { DRM_ERROR("Failed to process EEPROM table records, ret:%d", ret); @@ -595,20 +601,6 @@ int amdgpu_ras_eeprom_process_recods(struct amdgpu_ras_eeprom_control *control, __update_tbl_checksum(control, records, num, old_hdr_byte_sum); __update_table_header(control, buffs); - - if (sched_ras_recovery) { - /* - * Before scheduling ras recovery, assert the related - * flag first, which shall bypass common bad page - * reservation execution in amdgpu_ras_reset_gpu. - */ - amdgpu_ras_get_context(adev)->flags |= - AMDGPU_RAS_FLAG_SKIP_BAD_PAGE_RESV; - - dev_warn(adev->dev, "Conduct ras recovery due to bad " - "page threshold reached.\n"); - amdgpu_ras_reset_gpu(adev); - } } else if (!__validate_tbl_checksum(control, records, num)) { DRM_WARN("EEPROM Table checksum mismatch!"); /* TODO Uncomment when EEPROM read/write is relliable */ diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c index 15ee13c3bd9e..1a612f51ecd9 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c @@ -52,7 +52,6 @@ /** * amdgpu_ring_alloc - allocate space on the ring buffer * - * @adev: amdgpu_device pointer * @ring: amdgpu_ring structure holding ring information * @ndw: number of dwords to allocate in the ring buffer * @@ -95,7 +94,8 @@ void amdgpu_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count) amdgpu_ring_write(ring, ring->funcs->nop); } -/** amdgpu_ring_generic_pad_ib - pad IB with NOP packets +/** + * amdgpu_ring_generic_pad_ib - pad IB with NOP packets * * @ring: amdgpu_ring structure holding ring information * @ib: IB to add NOP packets to @@ -112,7 +112,6 @@ void amdgpu_ring_generic_pad_ib(struct amdgpu_ring *ring, struct amdgpu_ib *ib) * amdgpu_ring_commit - tell the GPU to execute the new * commands on the ring buffer * - * @adev: amdgpu_device pointer * @ring: amdgpu_ring structure holding ring information * * Update the wptr (write pointer) to tell the GPU to @@ -155,8 +154,10 @@ void amdgpu_ring_undo(struct amdgpu_ring *ring) * * @adev: amdgpu_device pointer * @ring: amdgpu_ring structure holding ring information - * @max_ndw: maximum number of dw for ring alloc - * @nop: nop packet for this ring + * @max_dw: maximum number of dw for ring alloc + * @irq_src: interrupt source to use for this ring + * @irq_type: interrupt type to use for this ring + * @hw_prio: ring priority (NORMAL/HIGH) * * Initialize the driver information for the selected ring (all asics). * Returns 0 on success, error on failure. @@ -276,7 +277,6 @@ int amdgpu_ring_init(struct amdgpu_device *adev, struct amdgpu_ring *ring, /** * amdgpu_ring_fini - tear down the driver ring struct. * - * @adev: amdgpu_device pointer * @ring: amdgpu_ring structure holding ring information * * Tear down the driver information for the selected ring (all asics). @@ -310,7 +310,7 @@ void amdgpu_ring_fini(struct amdgpu_ring *ring) /** * amdgpu_ring_emit_reg_write_reg_wait_helper - ring helper * - * @adev: amdgpu_device pointer + * @ring: ring to write to * @reg0: register to write * @reg1: register to wait on * @ref: reference value to write/wait on @@ -396,7 +396,7 @@ static ssize_t amdgpu_debugfs_ring_read(struct file *f, char __user *buf, return result; value = ring->ring[(*pos - 12)/4]; - r = put_user(value, (uint32_t*)buf); + r = put_user(value, (uint32_t *)buf); if (r) return r; buf += 4; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sa.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_sa.c index 0bd1d4ffc19e..524d10b21041 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sa.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sa.c @@ -75,7 +75,7 @@ int amdgpu_sa_bo_manager_init(struct amdgpu_device *adev, } void amdgpu_sa_bo_manager_fini(struct amdgpu_device *adev, - struct amdgpu_sa_manager *sa_manager) + struct amdgpu_sa_manager *sa_manager) { struct amdgpu_sa_bo *sa_bo, *tmp; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sched.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_sched.c index 0da0a0d98672..b7d861ed5284 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sched.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sched.c @@ -29,7 +29,7 @@ #include <drm/amdgpu_drm.h> #include "amdgpu.h" - +#include "amdgpu_sched.h" #include "amdgpu_vm.h" int amdgpu_to_sched_priority(int amdgpu_priority, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c index 250a309e4dee..de91d29c9d96 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c @@ -126,7 +126,7 @@ int amdgpu_sdma_ras_late_init(struct amdgpu_device *adev, goto free; } - return 0; + return 0; late_fini: amdgpu_ras_late_fini(adev, adev->sdma.ras_if, ih_info); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_smuio.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_smuio.h new file mode 100644 index 000000000000..03009157aec8 --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_smuio.h @@ -0,0 +1,37 @@ +/* + * Copyright 2020 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ +#ifndef __AMDGPU_SMUIO_H__ +#define __AMDGPU_SMUIO_H__ + +struct amdgpu_smuio_funcs { + u32 (*get_rom_index_offset)(struct amdgpu_device *adev); + u32 (*get_rom_data_offset)(struct amdgpu_device *adev); + void (*update_rom_clock_gating)(struct amdgpu_device *adev, bool enable); + void (*get_clock_gating_state)(struct amdgpu_device *adev, u32 *flags); +}; + +struct amdgpu_smuio { + const struct amdgpu_smuio_funcs *funcs; +}; + +#endif /* __AMDGPU_SMUIO_H__ */ diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c index 8ea6c49529e7..4e558632a5d2 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c @@ -78,7 +78,7 @@ static bool amdgpu_sync_same_dev(struct amdgpu_device *adev, /** * amdgpu_sync_get_owner - extract the owner of a fence * - * @fence: fence get the owner from + * @f: fence get the owner from * * Extract who originally created the fence. */ @@ -172,7 +172,6 @@ int amdgpu_sync_fence(struct amdgpu_sync *sync, struct dma_fence *f) /** * amdgpu_sync_vm_fence - remember to sync to this VM fence * - * @adev: amdgpu device * @sync: sync object to add fence to * @fence: the VM fence to add * @@ -190,6 +189,7 @@ int amdgpu_sync_vm_fence(struct amdgpu_sync *sync, struct dma_fence *fence) /** * amdgpu_sync_resv - sync to a reservation object * + * @adev: amdgpu device * @sync: sync object to add fences from reservation object to * @resv: reservation object with embedded fence * @mode: how owner affects which fences we sync to diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_test.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_test.c index 2f4d5ca9894f..7b230bcbf2c6 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_test.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_test.c @@ -42,16 +42,11 @@ static void amdgpu_do_test_moves(struct amdgpu_device *adev) size = 1024 * 1024; /* Number of tests = - * (Total GTT - IB pool - writeback page - ring buffers) / test size + * (Total GTT - gart_pin_size - (2 transfer windows for buffer moves)) / test size */ - n = adev->gmc.gart_size - AMDGPU_IB_POOL_SIZE; - for (i = 0; i < AMDGPU_MAX_RINGS; ++i) - if (adev->rings[i]) - n -= adev->rings[i]->ring_size; - if (adev->wb.wb_obj) - n -= AMDGPU_GPU_PAGE_SIZE; - if (adev->irq.ih.ring_obj) - n -= adev->irq.ih.ring_size; + n = adev->gmc.gart_size - atomic64_read(&adev->gart_pin_size); + n -= AMDGPU_GTT_MAX_TRANSFER_SIZE * AMDGPU_GTT_NUM_TRANSFER_WINDOWS * + AMDGPU_GPU_PAGE_SIZE; n /= size; gtt_obj = kcalloc(n, sizeof(*gtt_obj), GFP_KERNEL); @@ -157,10 +152,10 @@ static void amdgpu_do_test_moves(struct amdgpu_device *adev) i, *vram_start, gart_start, (unsigned long long) (gart_addr - adev->gmc.gart_start + - (void*)gart_start - gtt_map), + (void *)gart_start - gtt_map), (unsigned long long) (vram_addr - adev->gmc.vram_start + - (void*)gart_start - gtt_map)); + (void *)gart_start - gtt_map)); amdgpu_bo_kunmap(vram_obj); goto out_lclean_unpin; } @@ -203,10 +198,10 @@ static void amdgpu_do_test_moves(struct amdgpu_device *adev) i, *gart_start, vram_start, (unsigned long long) (vram_addr - adev->gmc.vram_start + - (void*)vram_start - vram_map), + (void *)vram_start - vram_map), (unsigned long long) (gart_addr - adev->gmc.gart_start + - (void*)vram_start - vram_map)); + (void *)vram_start - vram_map)); amdgpu_bo_kunmap(gtt_obj[i]); goto out_lclean_unpin; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h index ee9480d14cbc..324d5e3f3579 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h @@ -366,15 +366,15 @@ TRACE_EVENT(amdgpu_vm_update_ptes, TRACE_EVENT(amdgpu_vm_set_ptes, TP_PROTO(uint64_t pe, uint64_t addr, unsigned count, - uint32_t incr, uint64_t flags, bool direct), - TP_ARGS(pe, addr, count, incr, flags, direct), + uint32_t incr, uint64_t flags, bool immediate), + TP_ARGS(pe, addr, count, incr, flags, immediate), TP_STRUCT__entry( __field(u64, pe) __field(u64, addr) __field(u32, count) __field(u32, incr) __field(u64, flags) - __field(bool, direct) + __field(bool, immediate) ), TP_fast_assign( @@ -383,32 +383,32 @@ TRACE_EVENT(amdgpu_vm_set_ptes, __entry->count = count; __entry->incr = incr; __entry->flags = flags; - __entry->direct = direct; + __entry->immediate = immediate; ), TP_printk("pe=%010Lx, addr=%010Lx, incr=%u, flags=%llx, count=%u, " - "direct=%d", __entry->pe, __entry->addr, __entry->incr, - __entry->flags, __entry->count, __entry->direct) + "immediate=%d", __entry->pe, __entry->addr, __entry->incr, + __entry->flags, __entry->count, __entry->immediate) ); TRACE_EVENT(amdgpu_vm_copy_ptes, - TP_PROTO(uint64_t pe, uint64_t src, unsigned count, bool direct), - TP_ARGS(pe, src, count, direct), + TP_PROTO(uint64_t pe, uint64_t src, unsigned count, bool immediate), + TP_ARGS(pe, src, count, immediate), TP_STRUCT__entry( __field(u64, pe) __field(u64, src) __field(u32, count) - __field(bool, direct) + __field(bool, immediate) ), TP_fast_assign( __entry->pe = pe; __entry->src = src; __entry->count = count; - __entry->direct = direct; + __entry->immediate = immediate; ), - TP_printk("pe=%010Lx, src=%010Lx, count=%u, direct=%d", + TP_printk("pe=%010Lx, src=%010Lx, count=%u, immediate=%d", __entry->pe, __entry->src, __entry->count, - __entry->direct) + __entry->immediate) ); TRACE_EVENT(amdgpu_vm_flush, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c index a0248d78190f..b848f9e97613 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c @@ -47,7 +47,6 @@ #include <drm/ttm/ttm_bo_driver.h> #include <drm/ttm/ttm_placement.h> #include <drm/ttm/ttm_module.h> -#include <drm/ttm/ttm_page_alloc.h> #include <drm/drm_debugfs.h> #include <drm/amdgpu_drm.h> @@ -66,6 +65,8 @@ static int amdgpu_ttm_backend_bind(struct ttm_bo_device *bdev, struct ttm_tt *ttm, struct ttm_resource *bo_mem); +static void amdgpu_ttm_backend_unbind(struct ttm_bo_device *bdev, + struct ttm_tt *ttm); static int amdgpu_ttm_init_on_chip(struct amdgpu_device *adev, unsigned int type, @@ -92,7 +93,7 @@ static void amdgpu_evict_flags(struct ttm_buffer_object *bo, .fpfn = 0, .lpfn = 0, .mem_type = TTM_PL_SYSTEM, - .flags = TTM_PL_MASK_CACHING + .flags = 0 }; /* Don't handle scatter gather BOs */ @@ -292,11 +293,9 @@ static int amdgpu_ttm_map_buffer(struct ttm_buffer_object *bo, cpu_addr = &job->ibs[0].ptr[num_dw]; if (mem->mem_type == TTM_PL_TT) { - struct ttm_dma_tt *dma; dma_addr_t *dma_address; - dma = container_of(bo->ttm, struct ttm_dma_tt, ttm); - dma_address = &dma->dma_address[offset >> PAGE_SHIFT]; + dma_address = &bo->ttm->dma_address[offset >> PAGE_SHIFT]; r = amdgpu_gart_map(adev, 0, num_pages, dma_address, flags, cpu_addr); if (r) @@ -452,7 +451,7 @@ error: return r; } -/** +/* * amdgpu_move_blit - Copy an entire buffer to another buffer * * This is a helper called by amdgpu_bo_move() and amdgpu_move_vram_ram() to @@ -513,116 +512,7 @@ error: return r; } -/** - * amdgpu_move_vram_ram - Copy VRAM buffer to RAM buffer - * - * Called by amdgpu_bo_move(). - */ -static int amdgpu_move_vram_ram(struct ttm_buffer_object *bo, bool evict, - struct ttm_operation_ctx *ctx, - struct ttm_resource *new_mem) -{ - struct ttm_resource *old_mem = &bo->mem; - struct ttm_resource tmp_mem; - struct ttm_place placements; - struct ttm_placement placement; - int r; - - /* create space/pages for new_mem in GTT space */ - tmp_mem = *new_mem; - tmp_mem.mm_node = NULL; - placement.num_placement = 1; - placement.placement = &placements; - placement.num_busy_placement = 1; - placement.busy_placement = &placements; - placements.fpfn = 0; - placements.lpfn = 0; - placements.mem_type = TTM_PL_TT; - placements.flags = TTM_PL_MASK_CACHING; - r = ttm_bo_mem_space(bo, &placement, &tmp_mem, ctx); - if (unlikely(r)) { - pr_err("Failed to find GTT space for blit from VRAM\n"); - return r; - } - - /* set caching flags */ - r = ttm_tt_set_placement_caching(bo->ttm, tmp_mem.placement); - if (unlikely(r)) { - goto out_cleanup; - } - - r = ttm_tt_populate(bo->bdev, bo->ttm, ctx); - if (unlikely(r)) - goto out_cleanup; - - /* Bind the memory to the GTT space */ - r = amdgpu_ttm_backend_bind(bo->bdev, bo->ttm, &tmp_mem); - if (unlikely(r)) { - goto out_cleanup; - } - - /* blit VRAM to GTT */ - r = amdgpu_move_blit(bo, evict, &tmp_mem, old_mem); - if (unlikely(r)) { - goto out_cleanup; - } - - /* move BO (in tmp_mem) to new_mem */ - r = ttm_bo_move_ttm(bo, ctx, new_mem); -out_cleanup: - ttm_resource_free(bo, &tmp_mem); - return r; -} - -/** - * amdgpu_move_ram_vram - Copy buffer from RAM to VRAM - * - * Called by amdgpu_bo_move(). - */ -static int amdgpu_move_ram_vram(struct ttm_buffer_object *bo, bool evict, - struct ttm_operation_ctx *ctx, - struct ttm_resource *new_mem) -{ - struct ttm_resource *old_mem = &bo->mem; - struct ttm_resource tmp_mem; - struct ttm_placement placement; - struct ttm_place placements; - int r; - - /* make space in GTT for old_mem buffer */ - tmp_mem = *new_mem; - tmp_mem.mm_node = NULL; - placement.num_placement = 1; - placement.placement = &placements; - placement.num_busy_placement = 1; - placement.busy_placement = &placements; - placements.fpfn = 0; - placements.lpfn = 0; - placements.mem_type = TTM_PL_TT; - placements.flags = TTM_PL_MASK_CACHING; - r = ttm_bo_mem_space(bo, &placement, &tmp_mem, ctx); - if (unlikely(r)) { - pr_err("Failed to find GTT space for blit to VRAM\n"); - return r; - } - - /* move/bind old memory to GTT space */ - r = ttm_bo_move_ttm(bo, ctx, &tmp_mem); - if (unlikely(r)) { - goto out_cleanup; - } - - /* copy to VRAM */ - r = amdgpu_move_blit(bo, evict, new_mem, old_mem); - if (unlikely(r)) { - goto out_cleanup; - } -out_cleanup: - ttm_resource_free(bo, &tmp_mem); - return r; -} - -/** +/* * amdgpu_mem_visible - Check that memory can be accessed by ttm_bo_move_memcpy * * Called by amdgpu_bo_move() @@ -646,23 +536,43 @@ static bool amdgpu_mem_visible(struct amdgpu_device *adev, <= adev->gmc.visible_vram_size; } -/** +/* * amdgpu_bo_move - Move a buffer object to a new memory location * * Called by ttm_bo_handle_move_mem() */ static int amdgpu_bo_move(struct ttm_buffer_object *bo, bool evict, struct ttm_operation_ctx *ctx, - struct ttm_resource *new_mem) + struct ttm_resource *new_mem, + struct ttm_place *hop) { struct amdgpu_device *adev; struct amdgpu_bo *abo; struct ttm_resource *old_mem = &bo->mem; int r; + if ((old_mem->mem_type == TTM_PL_SYSTEM && + new_mem->mem_type == TTM_PL_VRAM) || + (old_mem->mem_type == TTM_PL_VRAM && + new_mem->mem_type == TTM_PL_SYSTEM)) { + hop->fpfn = 0; + hop->lpfn = 0; + hop->mem_type = TTM_PL_TT; + hop->flags = 0; + return -EMULTIHOP; + } + + if (new_mem->mem_type == TTM_PL_TT) { + r = amdgpu_ttm_backend_bind(bo->bdev, bo->ttm, new_mem); + if (r) + return r; + } + + amdgpu_bo_move_notify(bo, evict, new_mem); + /* Can't move a pinned BO */ abo = ttm_to_amdgpu_bo(bo); - if (WARN_ON_ONCE(abo->pin_count > 0)) + if (WARN_ON_ONCE(abo->tbo.pin_count > 0)) return -EINVAL; adev = amdgpu_ttm_adev(bo->bdev); @@ -671,14 +581,24 @@ static int amdgpu_bo_move(struct ttm_buffer_object *bo, bool evict, ttm_bo_move_null(bo, new_mem); return 0; } - if ((old_mem->mem_type == TTM_PL_TT && - new_mem->mem_type == TTM_PL_SYSTEM) || - (old_mem->mem_type == TTM_PL_SYSTEM && - new_mem->mem_type == TTM_PL_TT)) { - /* bind is enough */ + if (old_mem->mem_type == TTM_PL_SYSTEM && + new_mem->mem_type == TTM_PL_TT) { ttm_bo_move_null(bo, new_mem); return 0; } + + if (old_mem->mem_type == TTM_PL_TT && + new_mem->mem_type == TTM_PL_SYSTEM) { + r = ttm_bo_wait_ctx(bo, ctx); + if (r) + goto fail; + + amdgpu_ttm_backend_unbind(bo->bdev, bo->ttm); + ttm_resource_free(bo, &bo->mem); + ttm_bo_assign_mem(bo, new_mem); + return 0; + } + if (old_mem->mem_type == AMDGPU_PL_GDS || old_mem->mem_type == AMDGPU_PL_GWS || old_mem->mem_type == AMDGPU_PL_OA || @@ -695,29 +615,19 @@ static int amdgpu_bo_move(struct ttm_buffer_object *bo, bool evict, goto memcpy; } - if (old_mem->mem_type == TTM_PL_VRAM && - new_mem->mem_type == TTM_PL_SYSTEM) { - r = amdgpu_move_vram_ram(bo, evict, ctx, new_mem); - } else if (old_mem->mem_type == TTM_PL_SYSTEM && - new_mem->mem_type == TTM_PL_VRAM) { - r = amdgpu_move_ram_vram(bo, evict, ctx, new_mem); - } else { - r = amdgpu_move_blit(bo, evict, - new_mem, old_mem); - } - + r = amdgpu_move_blit(bo, evict, new_mem, old_mem); if (r) { memcpy: /* Check that all memory is CPU accessible */ if (!amdgpu_mem_visible(adev, old_mem) || !amdgpu_mem_visible(adev, new_mem)) { pr_err("Move buffer fallback to memcpy unavailable\n"); - return r; + goto fail; } r = ttm_bo_move_memcpy(bo, ctx, new_mem); if (r) - return r; + goto fail; } if (bo->type == ttm_bo_type_device && @@ -732,9 +642,14 @@ memcpy: /* update statistics */ atomic64_add((u64)bo->num_pages << PAGE_SHIFT, &adev->num_bytes_moved); return 0; +fail: + swap(*new_mem, bo->mem); + amdgpu_bo_move_notify(bo, false, new_mem); + swap(*new_mem, bo->mem); + return r; } -/** +/* * amdgpu_ttm_io_mem_reserve - Reserve a block of memory during a fault * * Called by ttm_mem_io_reserve() ultimately via ttm_bo_vm_fault() @@ -767,6 +682,7 @@ static int amdgpu_ttm_io_mem_reserve(struct ttm_bo_device *bdev, struct ttm_reso mem->bus.offset += adev->gmc.aper_base; mem->bus.is_iomem = true; + mem->bus.caching = ttm_write_combined; break; default: return -EINVAL; @@ -811,7 +727,7 @@ uint64_t amdgpu_ttm_domain_start(struct amdgpu_device *adev, uint32_t type) * TTM backend functions. */ struct amdgpu_ttm_tt { - struct ttm_dma_tt ttm; + struct ttm_tt ttm; struct drm_gem_object *gobj; u64 offset; uint64_t userptr; @@ -824,7 +740,7 @@ struct amdgpu_ttm_tt { }; #ifdef CONFIG_DRM_AMDGPU_USERPTR -/** +/* * amdgpu_ttm_tt_get_user_pages - get device accessible pages that back user * memory and start HMM tracking CPU page table update * @@ -929,7 +845,7 @@ out: return r; } -/** +/* * amdgpu_ttm_tt_userptr_range_done - stop HMM track the CPU page table change * Check if the pages backing this ttm range have been invalidated * @@ -943,7 +859,7 @@ bool amdgpu_ttm_tt_get_user_pages_done(struct ttm_tt *ttm) if (!gtt || !gtt->userptr) return false; - DRM_DEBUG_DRIVER("user_pages_done 0x%llx pages 0x%lx\n", + DRM_DEBUG_DRIVER("user_pages_done 0x%llx pages 0x%x\n", gtt->userptr, ttm->num_pages); WARN_ONCE(!gtt->range || !gtt->range->hmm_pfns, @@ -965,7 +881,7 @@ bool amdgpu_ttm_tt_get_user_pages_done(struct ttm_tt *ttm) } #endif -/** +/* * amdgpu_ttm_tt_set_user_pages - Copy pages in, putting old pages as necessary. * * Called by amdgpu_cs_list_validate(). This creates the page list @@ -980,8 +896,8 @@ void amdgpu_ttm_tt_set_user_pages(struct ttm_tt *ttm, struct page **pages) ttm->pages[i] = pages ? pages[i] : NULL; } -/** - * amdgpu_ttm_tt_pin_userptr - prepare the sg table with the user pages +/* + * amdgpu_ttm_tt_pin_userptr - prepare the sg table with the user pages * * Called by amdgpu_ttm_backend_bind() **/ @@ -1020,7 +936,7 @@ release_sg: return r; } -/** +/* * amdgpu_ttm_tt_unpin_userptr - Unpin and unmap userptr pages */ static void amdgpu_ttm_tt_unpin_userptr(struct ttm_bo_device *bdev, @@ -1095,13 +1011,13 @@ static int amdgpu_ttm_gart_bind(struct amdgpu_device *adev, gart_bind_fail: if (r) - DRM_ERROR("failed to bind %lu pages at 0x%08llX\n", + DRM_ERROR("failed to bind %u pages at 0x%08llX\n", ttm->num_pages, gtt->offset); return r; } -/** +/* * amdgpu_ttm_backend_bind - Bind GTT memory * * Called by ttm_tt_bind() on behalf of ttm_bo_handle_move_mem(). @@ -1130,7 +1046,7 @@ static int amdgpu_ttm_backend_bind(struct ttm_bo_device *bdev, } } if (!ttm->num_pages) { - WARN(1, "nothing to bind %lu pages for mreg %p back %p!\n", + WARN(1, "nothing to bind %u pages for mreg %p back %p!\n", ttm->num_pages, bo_mem, ttm); } @@ -1153,13 +1069,13 @@ static int amdgpu_ttm_backend_bind(struct ttm_bo_device *bdev, ttm->pages, gtt->ttm.dma_address, flags); if (r) - DRM_ERROR("failed to bind %lu pages at 0x%08llX\n", + DRM_ERROR("failed to bind %u pages at 0x%08llX\n", ttm->num_pages, gtt->offset); gtt->bound = true; return r; } -/** +/* * amdgpu_ttm_alloc_gart - Make sure buffer object is accessible either * through AGP or GART aperture. * @@ -1171,7 +1087,7 @@ int amdgpu_ttm_alloc_gart(struct ttm_buffer_object *bo) { struct amdgpu_device *adev = amdgpu_ttm_adev(bo->bdev); struct ttm_operation_ctx ctx = { false, false }; - struct amdgpu_ttm_tt *gtt = (void*)bo->ttm; + struct amdgpu_ttm_tt *gtt = (void *)bo->ttm; struct ttm_resource tmp; struct ttm_placement placement; struct ttm_place placements; @@ -1220,7 +1136,7 @@ int amdgpu_ttm_alloc_gart(struct ttm_buffer_object *bo) return 0; } -/** +/* * amdgpu_ttm_recover_gart - Rebind GTT pages * * Called by amdgpu_gtt_mgr_recover() from amdgpu_device_reset() to @@ -1241,7 +1157,7 @@ int amdgpu_ttm_recover_gart(struct ttm_buffer_object *tbo) return r; } -/** +/* * amdgpu_ttm_backend_unbind - Unbind GTT mapped pages * * Called by ttm_tt_unbind() on behalf of ttm_bo_move_ttm() and @@ -1267,8 +1183,8 @@ static void amdgpu_ttm_backend_unbind(struct ttm_bo_device *bdev, /* unbind shouldn't be done for GDS/GWS/OA in ttm_bo_clean_mm */ r = amdgpu_gart_unbind(adev, gtt->offset, ttm->num_pages); if (r) - DRM_ERROR("failed to unbind %lu pages at 0x%08llX\n", - gtt->ttm.ttm.num_pages, gtt->offset); + DRM_ERROR("failed to unbind %u pages at 0x%08llX\n", + gtt->ttm.num_pages, gtt->offset); gtt->bound = false; } @@ -1282,7 +1198,7 @@ static void amdgpu_ttm_backend_destroy(struct ttm_bo_device *bdev, if (gtt->usertask) put_task_struct(gtt->usertask); - ttm_dma_tt_fini(>t->ttm); + ttm_tt_fini(>t->ttm); kfree(gtt); } @@ -1290,13 +1206,16 @@ static void amdgpu_ttm_backend_destroy(struct ttm_bo_device *bdev, * amdgpu_ttm_tt_create - Create a ttm_tt object for a given BO * * @bo: The buffer object to create a GTT ttm_tt object around + * @page_flags: Page flags to be added to the ttm_tt object * * Called by ttm_tt_create(). */ static struct ttm_tt *amdgpu_ttm_tt_create(struct ttm_buffer_object *bo, uint32_t page_flags) { + struct amdgpu_bo *abo = ttm_to_amdgpu_bo(bo); struct amdgpu_ttm_tt *gtt; + enum ttm_caching caching; gtt = kzalloc(sizeof(struct amdgpu_ttm_tt), GFP_KERNEL); if (gtt == NULL) { @@ -1304,15 +1223,20 @@ static struct ttm_tt *amdgpu_ttm_tt_create(struct ttm_buffer_object *bo, } gtt->gobj = &bo->base; + if (abo->flags & AMDGPU_GEM_CREATE_CPU_GTT_USWC) + caching = ttm_write_combined; + else + caching = ttm_cached; + /* allocate space for the uninitialized page entries */ - if (ttm_sg_tt_init(>t->ttm, bo, page_flags)) { + if (ttm_sg_tt_init(>t->ttm, bo, page_flags, caching)) { kfree(gtt); return NULL; } - return >t->ttm.ttm; + return >t->ttm; } -/** +/* * amdgpu_ttm_tt_populate - Map GTT pages visible to the device * * Map the pages of a ttm_tt object to an address space visible @@ -1332,7 +1256,6 @@ static int amdgpu_ttm_tt_populate(struct ttm_bo_device *bdev, return -ENOMEM; ttm->page_flags |= TTM_PAGE_FLAG_SG; - ttm_tt_set_populated(ttm); return 0; } @@ -1352,28 +1275,20 @@ static int amdgpu_ttm_tt_populate(struct ttm_bo_device *bdev, drm_prime_sg_to_page_addr_arrays(ttm->sg, ttm->pages, gtt->ttm.dma_address, ttm->num_pages); - ttm_tt_set_populated(ttm); return 0; } -#ifdef CONFIG_SWIOTLB - if (adev->need_swiotlb && swiotlb_nr_tbl()) { - return ttm_dma_populate(>t->ttm, adev->dev, ctx); - } -#endif - - /* fall back to generic helper to populate the page array - * and map them to the device */ - return ttm_populate_and_map_pages(adev->dev, >t->ttm, ctx); + return ttm_pool_alloc(&adev->mman.bdev.pool, ttm, ctx); } -/** +/* * amdgpu_ttm_tt_unpopulate - unmap GTT pages and unpopulate page arrays * * Unmaps pages of a ttm_tt object from the device address space and * unpopulates the page array backing it. */ -static void amdgpu_ttm_tt_unpopulate(struct ttm_bo_device *bdev, struct ttm_tt *ttm) +static void amdgpu_ttm_tt_unpopulate(struct ttm_bo_device *bdev, + struct ttm_tt *ttm) { struct amdgpu_ttm_tt *gtt = (void *)ttm; struct amdgpu_device *adev; @@ -1398,16 +1313,7 @@ static void amdgpu_ttm_tt_unpopulate(struct ttm_bo_device *bdev, struct ttm_tt * return; adev = amdgpu_ttm_adev(bdev); - -#ifdef CONFIG_SWIOTLB - if (adev->need_swiotlb && swiotlb_nr_tbl()) { - ttm_dma_unpopulate(>t->ttm, adev->dev); - return; - } -#endif - - /* fall back to generic helper to unmap and unpopulate array */ - ttm_unmap_and_unpopulate_pages(adev->dev, >t->ttm); + return ttm_pool_free(&adev->mman.bdev.pool, ttm); } /** @@ -1433,7 +1339,7 @@ int amdgpu_ttm_tt_set_userptr(struct ttm_buffer_object *bo, return -ENOMEM; } - gtt = (void*)bo->ttm; + gtt = (void *)bo->ttm; gtt->userptr = addr; gtt->userflags = flags; @@ -1445,7 +1351,7 @@ int amdgpu_ttm_tt_set_userptr(struct ttm_buffer_object *bo, return 0; } -/** +/* * amdgpu_ttm_tt_get_usermm - Return memory manager for ttm_tt object */ struct mm_struct *amdgpu_ttm_tt_get_usermm(struct ttm_tt *ttm) @@ -1461,7 +1367,7 @@ struct mm_struct *amdgpu_ttm_tt_get_usermm(struct ttm_tt *ttm) return gtt->usertask->mm; } -/** +/* * amdgpu_ttm_tt_affect_userptr - Determine if a ttm_tt object lays inside an * address range for the current task. * @@ -1478,14 +1384,14 @@ bool amdgpu_ttm_tt_affect_userptr(struct ttm_tt *ttm, unsigned long start, /* Return false if no part of the ttm_tt object lies within * the range */ - size = (unsigned long)gtt->ttm.ttm.num_pages * PAGE_SIZE; + size = (unsigned long)gtt->ttm.num_pages * PAGE_SIZE; if (gtt->userptr > end || gtt->userptr + size <= start) return false; return true; } -/** +/* * amdgpu_ttm_tt_is_userptr - Have the pages backing by userptr? */ bool amdgpu_ttm_tt_is_userptr(struct ttm_tt *ttm) @@ -1498,7 +1404,7 @@ bool amdgpu_ttm_tt_is_userptr(struct ttm_tt *ttm) return true; } -/** +/* * amdgpu_ttm_tt_is_readonly - Is the ttm_tt object read only? */ bool amdgpu_ttm_tt_is_readonly(struct ttm_tt *ttm) @@ -1529,7 +1435,7 @@ uint64_t amdgpu_ttm_tt_pde_flags(struct ttm_tt *ttm, struct ttm_resource *mem) if (mem && mem->mem_type == TTM_PL_TT) { flags |= AMDGPU_PTE_SYSTEM; - if (ttm->caching_state == tt_cached) + if (ttm->caching == ttm_cached) flags |= AMDGPU_PTE_SNOOPED; } @@ -1539,9 +1445,10 @@ uint64_t amdgpu_ttm_tt_pde_flags(struct ttm_tt *ttm, struct ttm_resource *mem) /** * amdgpu_ttm_tt_pte_flags - Compute PTE flags for ttm_tt object * + * @adev: amdgpu_device pointer * @ttm: The ttm_tt object to compute the flags for * @mem: The memory registry backing this ttm_tt object - + * * Figure out the flags to use for a VM PTE (Page Table Entry). */ uint64_t amdgpu_ttm_tt_pte_flags(struct amdgpu_device *adev, struct ttm_tt *ttm, @@ -1558,7 +1465,7 @@ uint64_t amdgpu_ttm_tt_pte_flags(struct amdgpu_device *adev, struct ttm_tt *ttm, return flags; } -/** +/* * amdgpu_ttm_bo_eviction_valuable - Check to see if we can evict a buffer * object. * @@ -1699,20 +1606,23 @@ static int amdgpu_ttm_access_memory(struct ttm_buffer_object *bo, return ret; } +static void +amdgpu_bo_delete_mem_notify(struct ttm_buffer_object *bo) +{ + amdgpu_bo_move_notify(bo, false, NULL); +} + static struct ttm_bo_driver amdgpu_bo_driver = { .ttm_tt_create = &amdgpu_ttm_tt_create, .ttm_tt_populate = &amdgpu_ttm_tt_populate, .ttm_tt_unpopulate = &amdgpu_ttm_tt_unpopulate, - .ttm_tt_bind = &amdgpu_ttm_backend_bind, - .ttm_tt_unbind = &amdgpu_ttm_backend_unbind, .ttm_tt_destroy = &amdgpu_ttm_backend_destroy, .eviction_valuable = amdgpu_ttm_bo_eviction_valuable, .evict_flags = &amdgpu_evict_flags, .move = &amdgpu_bo_move, .verify_access = &amdgpu_verify_access, - .move_notify = &amdgpu_bo_move_notify, + .delete_mem_notify = &amdgpu_bo_delete_mem_notify, .release_notify = &amdgpu_bo_release_notify, - .fault_reserve_notify = &amdgpu_bo_fault_reserve_notify, .io_mem_reserve = &amdgpu_ttm_io_mem_reserve, .io_mem_pfn = amdgpu_ttm_io_mem_pfn, .access_memory = &amdgpu_ttm_access_memory, @@ -1866,7 +1776,7 @@ static int amdgpu_ttm_reserve_tmr(struct amdgpu_device *adev) return 0; } -/** +/* * amdgpu_ttm_init - Init the memory management (ttm) as well as various * gtt/vram related fields. * @@ -1884,10 +1794,10 @@ int amdgpu_ttm_init(struct amdgpu_device *adev) mutex_init(&adev->mman.gtt_window_lock); /* No others user of address space so set it to 0 */ - r = ttm_bo_device_init(&adev->mman.bdev, - &amdgpu_bo_driver, + r = ttm_bo_device_init(&adev->mman.bdev, &amdgpu_bo_driver, adev->dev, adev_to_drm(adev)->anon_inode->i_mapping, adev_to_drm(adev)->vma_offset_manager, + adev->need_swiotlb, dma_addressing_limited(adev->dev)); if (r) { DRM_ERROR("failed initializing buffer object driver(%d).\n", r); @@ -1895,9 +1805,6 @@ int amdgpu_ttm_init(struct amdgpu_device *adev) } adev->mman.initialized = true; - /* We opt to avoid OOM on system pages allocations */ - adev->mman.bdev.no_retry = true; - /* Initialize VRAM pool with all of VRAM divided into pages */ r = amdgpu_vram_mgr_init(adev); if (r) { @@ -2003,18 +1910,7 @@ int amdgpu_ttm_init(struct amdgpu_device *adev) return 0; } -/** - * amdgpu_ttm_late_init - Handle any late initialization for amdgpu_ttm - */ -void amdgpu_ttm_late_init(struct amdgpu_device *adev) -{ - /* return the VGA stolen memory (if any) back to VRAM */ - if (!adev->mman.keep_stolen_vga_memory) - amdgpu_bo_free_kernel(&adev->mman.stolen_vga_memory, NULL, NULL); - amdgpu_bo_free_kernel(&adev->mman.stolen_extended_memory, NULL, NULL); -} - -/** +/* * amdgpu_ttm_fini - De-initialize the TTM memory pools */ void amdgpu_ttm_fini(struct amdgpu_device *adev) @@ -2024,8 +1920,8 @@ void amdgpu_ttm_fini(struct amdgpu_device *adev) amdgpu_ttm_training_reserve_vram_fini(adev); /* return the stolen vga memory back to VRAM */ - if (adev->mman.keep_stolen_vga_memory) - amdgpu_bo_free_kernel(&adev->mman.stolen_vga_memory, NULL, NULL); + amdgpu_bo_free_kernel(&adev->mman.stolen_vga_memory, NULL, NULL); + amdgpu_bo_free_kernel(&adev->mman.stolen_extended_memory, NULL, NULL); /* return the IP Discovery TMR memory back to VRAM */ amdgpu_bo_free_kernel(&adev->mman.discovery_memory, NULL, NULL); amdgpu_ttm_fw_reserve_vram_fini(adev); @@ -2092,15 +1988,48 @@ void amdgpu_ttm_set_buffer_funcs_status(struct amdgpu_device *adev, bool enable) adev->mman.buffer_funcs_enabled = enable; } +static vm_fault_t amdgpu_ttm_fault(struct vm_fault *vmf) +{ + struct ttm_buffer_object *bo = vmf->vma->vm_private_data; + vm_fault_t ret; + + ret = ttm_bo_vm_reserve(bo, vmf); + if (ret) + return ret; + + ret = amdgpu_bo_fault_reserve_notify(bo); + if (ret) + goto unlock; + + ret = ttm_bo_vm_fault_reserved(vmf, vmf->vma->vm_page_prot, + TTM_BO_VM_NUM_PREFAULT, 1); + if (ret == VM_FAULT_RETRY && !(vmf->flags & FAULT_FLAG_RETRY_NOWAIT)) + return ret; + +unlock: + dma_resv_unlock(bo->base.resv); + return ret; +} + +static struct vm_operations_struct amdgpu_ttm_vm_ops = { + .fault = amdgpu_ttm_fault, + .open = ttm_bo_vm_open, + .close = ttm_bo_vm_close, + .access = ttm_bo_vm_access +}; + int amdgpu_mmap(struct file *filp, struct vm_area_struct *vma) { struct drm_file *file_priv = filp->private_data; struct amdgpu_device *adev = drm_to_adev(file_priv->minor->dev); + int r; - if (adev == NULL) - return -EINVAL; + r = ttm_bo_mmap(filp, vma, &adev->mman.bdev); + if (unlikely(r != 0)) + return r; - return ttm_bo_mmap(filp, vma, &adev->mman.bdev); + vma->vm_ops = &amdgpu_ttm_vm_ops; + return 0; } int amdgpu_copy_buffer(struct amdgpu_ring *ring, uint64_t src_offset, @@ -2284,19 +2213,25 @@ static int amdgpu_mm_dump_table(struct seq_file *m, void *data) return 0; } +static int amdgpu_ttm_pool_debugfs(struct seq_file *m, void *data) +{ + struct drm_info_node *node = (struct drm_info_node *)m->private; + struct drm_device *dev = node->minor->dev; + struct amdgpu_device *adev = drm_to_adev(dev); + + return ttm_pool_debugfs(&adev->mman.bdev.pool, m); +} + static const struct drm_info_list amdgpu_ttm_debugfs_list[] = { {"amdgpu_vram_mm", amdgpu_mm_dump_table, 0, (void *)TTM_PL_VRAM}, {"amdgpu_gtt_mm", amdgpu_mm_dump_table, 0, (void *)TTM_PL_TT}, {"amdgpu_gds_mm", amdgpu_mm_dump_table, 0, (void *)AMDGPU_PL_GDS}, {"amdgpu_gws_mm", amdgpu_mm_dump_table, 0, (void *)AMDGPU_PL_GWS}, {"amdgpu_oa_mm", amdgpu_mm_dump_table, 0, (void *)AMDGPU_PL_OA}, - {"ttm_page_pool", ttm_page_alloc_debugfs, 0, NULL}, -#ifdef CONFIG_SWIOTLB - {"ttm_dma_page_pool", ttm_dma_page_alloc_debugfs, 0, NULL} -#endif + {"ttm_page_pool", amdgpu_ttm_pool_debugfs, 0, NULL}, }; -/** +/* * amdgpu_ttm_vram_read - Linear read access to VRAM * * Accesses VRAM via MMIO for debugging purposes. @@ -2331,7 +2266,7 @@ static ssize_t amdgpu_ttm_vram_read(struct file *f, char __user *buf, return result; } -/** +/* * amdgpu_ttm_vram_write - Linear write access to VRAM * * Accesses VRAM via MMIO for debugging purposes. @@ -2384,7 +2319,7 @@ static const struct file_operations amdgpu_ttm_vram_fops = { #ifdef CONFIG_DRM_AMDGPU_GART_DEBUGFS -/** +/* * amdgpu_ttm_gtt_read - Linear read access to GTT memory */ static ssize_t amdgpu_ttm_gtt_read(struct file *f, char __user *buf, @@ -2434,7 +2369,7 @@ static const struct file_operations amdgpu_ttm_gtt_fops = { #endif -/** +/* * amdgpu_iomem_read - Virtual read access to GPU mapped memory * * This function is used to read memory that has been mapped to the @@ -2490,7 +2425,7 @@ static ssize_t amdgpu_iomem_read(struct file *f, char __user *buf, return result; } -/** +/* * amdgpu_iomem_write - Virtual write access to GPU mapped memory * * This function is used to write memory that has been mapped to the @@ -2586,12 +2521,6 @@ int amdgpu_ttm_debugfs_init(struct amdgpu_device *adev) } count = ARRAY_SIZE(amdgpu_ttm_debugfs_list); - -#ifdef CONFIG_SWIOTLB - if (!(adev->need_swiotlb && swiotlb_nr_tbl())) - --count; -#endif - return amdgpu_debugfs_add_files(adev, amdgpu_ttm_debugfs_list, count); #else return 0; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h index a87951b2f06d..d2987536d7cd 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h @@ -37,10 +37,17 @@ #define AMDGPU_POISON 0xd0bed0be +struct amdgpu_vram_reservation { + struct list_head node; + struct drm_mm_node mm_node; +}; + struct amdgpu_vram_mgr { struct ttm_resource_manager manager; struct drm_mm mm; spinlock_t lock; + struct list_head reservations_pending; + struct list_head reserved_pages; atomic64_t usage; atomic64_t vis_usage; }; @@ -54,7 +61,6 @@ struct amdgpu_gtt_mgr { struct amdgpu_mman { struct ttm_bo_device bdev; - bool mem_global_referenced; bool initialized; void __iomem *aper_base_kaddr; @@ -119,9 +125,12 @@ void amdgpu_vram_mgr_free_sgt(struct amdgpu_device *adev, struct sg_table *sgt); uint64_t amdgpu_vram_mgr_usage(struct ttm_resource_manager *man); uint64_t amdgpu_vram_mgr_vis_usage(struct ttm_resource_manager *man); +int amdgpu_vram_mgr_reserve_range(struct ttm_resource_manager *man, + uint64_t start, uint64_t size); +int amdgpu_vram_mgr_query_page_status(struct ttm_resource_manager *man, + uint64_t start); int amdgpu_ttm_init(struct amdgpu_device *adev); -void amdgpu_ttm_late_init(struct amdgpu_device *adev); void amdgpu_ttm_fini(struct amdgpu_device *adev); void amdgpu_ttm_set_buffer_funcs_status(struct amdgpu_device *adev, bool enable); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c index b313ce4c3e97..1beb08af347f 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c @@ -68,23 +68,32 @@ void amdgpu_ucode_print_smc_hdr(const struct common_firmware_header *hdr) { uint16_t version_major = le16_to_cpu(hdr->header_version_major); uint16_t version_minor = le16_to_cpu(hdr->header_version_minor); + const struct smc_firmware_header_v1_0 *v1_0_hdr; + const struct smc_firmware_header_v2_0 *v2_0_hdr; + const struct smc_firmware_header_v2_1 *v2_1_hdr; DRM_DEBUG("SMC\n"); amdgpu_ucode_print_common_hdr(hdr); if (version_major == 1) { - const struct smc_firmware_header_v1_0 *smc_hdr = - container_of(hdr, struct smc_firmware_header_v1_0, header); - - DRM_DEBUG("ucode_start_addr: %u\n", le32_to_cpu(smc_hdr->ucode_start_addr)); + v1_0_hdr = container_of(hdr, struct smc_firmware_header_v1_0, header); + DRM_DEBUG("ucode_start_addr: %u\n", le32_to_cpu(v1_0_hdr->ucode_start_addr)); } else if (version_major == 2) { - const struct smc_firmware_header_v1_0 *v1_hdr = - container_of(hdr, struct smc_firmware_header_v1_0, header); - const struct smc_firmware_header_v2_0 *v2_hdr = - container_of(v1_hdr, struct smc_firmware_header_v2_0, v1_0); + switch (version_minor) { + case 0: + v2_0_hdr = container_of(hdr, struct smc_firmware_header_v2_0, v1_0.header); + DRM_DEBUG("ppt_offset_bytes: %u\n", le32_to_cpu(v2_0_hdr->ppt_offset_bytes)); + DRM_DEBUG("ppt_size_bytes: %u\n", le32_to_cpu(v2_0_hdr->ppt_size_bytes)); + break; + case 1: + v2_1_hdr = container_of(hdr, struct smc_firmware_header_v2_1, v1_0.header); + DRM_DEBUG("pptable_count: %u\n", le32_to_cpu(v2_1_hdr->pptable_count)); + DRM_DEBUG("pptable_entry_offset: %u\n", le32_to_cpu(v2_1_hdr->pptable_entry_offset)); + break; + default: + break; + } - DRM_DEBUG("ppt_offset_bytes: %u\n", le32_to_cpu(v2_hdr->ppt_offset_bytes)); - DRM_DEBUG("ppt_size_bytes: %u\n", le32_to_cpu(v2_hdr->ppt_size_bytes)); } else { DRM_ERROR("Unknown SMC ucode version: %u.%u\n", version_major, version_minor); } @@ -391,6 +400,8 @@ amdgpu_ucode_get_load_type(struct amdgpu_device *adev, int load_type) case CHIP_NAVI12: case CHIP_SIENNA_CICHLID: case CHIP_NAVY_FLOUNDER: + case CHIP_VANGOGH: + case CHIP_DIMGREY_CAVEFISH: if (!load_type) return AMDGPU_FW_LOAD_DIRECT; else @@ -586,8 +597,8 @@ static int amdgpu_ucode_patch_jt(struct amdgpu_firmware_info *ucode, { const struct gfx_firmware_header_v1_0 *header = NULL; const struct common_firmware_header *comm_hdr = NULL; - uint8_t* src_addr = NULL; - uint8_t* dst_addr = NULL; + uint8_t *src_addr = NULL; + uint8_t *dst_addr = NULL; if (NULL == ucode->fw) return 0; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c index 262baf0f61ea..a2975c8092a9 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c @@ -126,10 +126,11 @@ int amdgpu_umc_process_ras_data_cb(struct amdgpu_device *adev, err_data->ue_count); if ((amdgpu_bad_page_threshold != 0) && - err_data->err_addr_cnt && + err_data->err_addr_cnt) { amdgpu_ras_add_bad_pages(adev, err_data->err_addr, - err_data->err_addr_cnt)) - dev_warn(adev->dev, "Failed to add ras bad page!\n"); + err_data->err_addr_cnt); + amdgpu_ras_save_bad_pages(adev); + } amdgpu_ras_reset_gpu(adev); } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c index f8bebf18ee36..7c5b60e53482 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c @@ -87,7 +87,7 @@ #define UVD_NO_OP 0x03ff #define UVD_BASE_SI 0x3800 -/** +/* * amdgpu_uvd_cs_ctx - Command submission parser context * * Used for emulating virtual memory support on UVD 4.2. @@ -545,8 +545,9 @@ static int amdgpu_uvd_cs_pass1(struct amdgpu_uvd_cs_ctx *ctx) /** * amdgpu_uvd_cs_msg_decode - handle UVD decode message * + * @adev: amdgpu_device pointer * @msg: pointer to message structure - * @buf_sizes: returned buffer sizes + * @buf_sizes: placeholder to put the different buffer lengths * * Peek into the decode message and calculate the necessary buffer sizes. */ @@ -1005,6 +1006,7 @@ static int amdgpu_uvd_cs_packets(struct amdgpu_uvd_cs_ctx *ctx, * amdgpu_uvd_ring_parse_cs - UVD command submission parser * * @parser: Command submission parser context + * @ib_idx: Which indirect buffer to use * * Parse the command stream, patch in addresses as necessary. */ @@ -1279,6 +1281,7 @@ void amdgpu_uvd_ring_end_use(struct amdgpu_ring *ring) * amdgpu_uvd_ring_test_ib - test ib execution * * @ring: amdgpu_ring pointer + * @timeout: timeout value in jiffies, or MAX_SCHEDULE_TIMEOUT * * Test if we can successfully execute an IB */ diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c index ecaa2d7483b2..9791a4057e8b 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c @@ -90,6 +90,7 @@ static int amdgpu_vce_get_destroy_msg(struct amdgpu_ring *ring, uint32_t handle, * amdgpu_vce_init - allocate memory, load vce firmware * * @adev: amdgpu_device pointer + * @size: size for the new BO * * First step to get VCE online, allocate memory and load the firmware */ @@ -428,9 +429,9 @@ void amdgpu_vce_free_handles(struct amdgpu_device *adev, struct drm_file *filp) /** * amdgpu_vce_get_create_msg - generate a VCE create msg * - * @adev: amdgpu_device pointer * @ring: ring we should submit the msg to * @handle: VCE session handle to use + * @bo: amdgpu object for which we query the offset * @fence: optional fence to return * * Open up a stream for HW test @@ -509,9 +510,9 @@ err: /** * amdgpu_vce_get_destroy_msg - generate a VCE destroy msg * - * @adev: amdgpu_device pointer * @ring: ring we should submit the msg to * @handle: VCE session handle to use + * @direct: direct or delayed pool * @fence: optional fence to return * * Close up a stream for HW test or if userspace failed to do so @@ -576,6 +577,7 @@ err: * amdgpu_vce_cs_validate_bo - make sure not to cross 4GB boundary * * @p: parser context + * @ib_idx: indirect buffer to use * @lo: address of lower dword * @hi: address of higher dword * @size: minimum size @@ -625,9 +627,11 @@ static int amdgpu_vce_validate_bo(struct amdgpu_cs_parser *p, uint32_t ib_idx, * amdgpu_vce_cs_reloc - command submission relocation * * @p: parser context + * @ib_idx: indirect buffer to use * @lo: address of lower dword * @hi: address of higher dword * @size: minimum size + * @index: bs/fb index * * Patch relocation inside command stream with real buffer address */ @@ -714,7 +718,7 @@ static int amdgpu_vce_validate_handle(struct amdgpu_cs_parser *p, * amdgpu_vce_cs_parse - parse and validate the command stream * * @p: parser context - * + * @ib_idx: indirect buffer to use */ int amdgpu_vce_ring_parse_cs(struct amdgpu_cs_parser *p, uint32_t ib_idx) { @@ -950,7 +954,7 @@ out: * amdgpu_vce_cs_parse_vm - parse the command stream in VM mode * * @p: parser context - * + * @ib_idx: indirect buffer to use */ int amdgpu_vce_ring_parse_cs_vm(struct amdgpu_cs_parser *p, uint32_t ib_idx) { @@ -1040,7 +1044,9 @@ out: * amdgpu_vce_ring_emit_ib - execute indirect buffer * * @ring: engine to use + * @job: job to retrieve vmid from * @ib: the IB to execute + * @flags: unused * */ void amdgpu_vce_ring_emit_ib(struct amdgpu_ring *ring, @@ -1058,7 +1064,9 @@ void amdgpu_vce_ring_emit_ib(struct amdgpu_ring *ring, * amdgpu_vce_ring_emit_fence - add a fence command to the ring * * @ring: engine to use - * @fence: the fence + * @addr: address + * @seq: sequence number + * @flags: fence related flags * */ void amdgpu_vce_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u64 seq, @@ -1116,6 +1124,7 @@ int amdgpu_vce_ring_test_ring(struct amdgpu_ring *ring) * amdgpu_vce_ring_test_ib - test if VCE IBs are working * * @ring: the engine to test on + * @timeout: timeout value in jiffies, or MAX_SCHEDULE_TIMEOUT * */ int amdgpu_vce_ring_test_ib(struct amdgpu_ring *ring, long timeout) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c index a563328e3dae..1c97244e0d74 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c @@ -37,14 +37,16 @@ #define FIRMWARE_RAVEN "amdgpu/raven_vcn.bin" #define FIRMWARE_PICASSO "amdgpu/picasso_vcn.bin" #define FIRMWARE_RAVEN2 "amdgpu/raven2_vcn.bin" -#define FIRMWARE_ARCTURUS "amdgpu/arcturus_vcn.bin" -#define FIRMWARE_RENOIR "amdgpu/renoir_vcn.bin" -#define FIRMWARE_GREEN_SARDINE "amdgpu/green_sardine_vcn.bin" -#define FIRMWARE_NAVI10 "amdgpu/navi10_vcn.bin" -#define FIRMWARE_NAVI14 "amdgpu/navi14_vcn.bin" -#define FIRMWARE_NAVI12 "amdgpu/navi12_vcn.bin" -#define FIRMWARE_SIENNA_CICHLID "amdgpu/sienna_cichlid_vcn.bin" -#define FIRMWARE_NAVY_FLOUNDER "amdgpu/navy_flounder_vcn.bin" +#define FIRMWARE_ARCTURUS "amdgpu/arcturus_vcn.bin" +#define FIRMWARE_RENOIR "amdgpu/renoir_vcn.bin" +#define FIRMWARE_GREEN_SARDINE "amdgpu/green_sardine_vcn.bin" +#define FIRMWARE_NAVI10 "amdgpu/navi10_vcn.bin" +#define FIRMWARE_NAVI14 "amdgpu/navi14_vcn.bin" +#define FIRMWARE_NAVI12 "amdgpu/navi12_vcn.bin" +#define FIRMWARE_SIENNA_CICHLID "amdgpu/sienna_cichlid_vcn.bin" +#define FIRMWARE_NAVY_FLOUNDER "amdgpu/navy_flounder_vcn.bin" +#define FIRMWARE_VANGOGH "amdgpu/vangogh_vcn.bin" +#define FIRMWARE_DIMGREY_CAVEFISH "amdgpu/dimgrey_cavefish_vcn.bin" MODULE_FIRMWARE(FIRMWARE_RAVEN); MODULE_FIRMWARE(FIRMWARE_PICASSO); @@ -57,6 +59,8 @@ MODULE_FIRMWARE(FIRMWARE_NAVI14); MODULE_FIRMWARE(FIRMWARE_NAVI12); MODULE_FIRMWARE(FIRMWARE_SIENNA_CICHLID); MODULE_FIRMWARE(FIRMWARE_NAVY_FLOUNDER); +MODULE_FIRMWARE(FIRMWARE_VANGOGH); +MODULE_FIRMWARE(FIRMWARE_DIMGREY_CAVEFISH); static void amdgpu_vcn_idle_work_handler(struct work_struct *work); @@ -130,6 +134,15 @@ int amdgpu_vcn_sw_init(struct amdgpu_device *adev) (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG)) adev->vcn.indirect_sram = true; break; + case CHIP_VANGOGH: + fw_name = FIRMWARE_VANGOGH; + break; + case CHIP_DIMGREY_CAVEFISH: + fw_name = FIRMWARE_DIMGREY_CAVEFISH; + if ((adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) && + (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG)) + adev->vcn.indirect_sram = true; + break; default: return -EINVAL; } @@ -319,6 +332,7 @@ static void amdgpu_vcn_idle_work_handler(struct work_struct *work) container_of(work, struct amdgpu_device, vcn.idle_work.work); unsigned int fences = 0, fence[AMDGPU_MAX_VCN_INSTANCES] = {0}; unsigned int i, j; + int r = 0; for (j = 0; j < adev->vcn.num_vcn_inst; ++j) { if (adev->vcn.harvest_config & (1 << j)) @@ -345,8 +359,13 @@ static void amdgpu_vcn_idle_work_handler(struct work_struct *work) } if (!fences && !atomic_read(&adev->vcn.total_submission_cnt)) { + amdgpu_gfx_off_ctrl(adev, true); amdgpu_device_ip_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_VCN, AMD_PG_STATE_GATE); + r = amdgpu_dpm_switch_power_profile(adev, PP_SMC_POWER_PROFILE_VIDEO, + false); + if (r) + dev_warn(adev->dev, "(%d) failed to disable video power profile mode\n", r); } else { schedule_delayed_work(&adev->vcn.idle_work, VCN_IDLE_TIMEOUT); } @@ -355,9 +374,17 @@ static void amdgpu_vcn_idle_work_handler(struct work_struct *work) void amdgpu_vcn_ring_begin_use(struct amdgpu_ring *ring) { struct amdgpu_device *adev = ring->adev; + int r = 0; atomic_inc(&adev->vcn.total_submission_cnt); - cancel_delayed_work_sync(&adev->vcn.idle_work); + + if (!cancel_delayed_work_sync(&adev->vcn.idle_work)) { + amdgpu_gfx_off_ctrl(adev, false); + r = amdgpu_dpm_switch_power_profile(adev, PP_SMC_POWER_PROFILE_VIDEO, + true); + if (r) + dev_warn(adev->dev, "(%d) failed to switch to video power profile mode\n", r); + } mutex_lock(&adev->vcn.vcn_pg_lock); amdgpu_device_ip_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_VCN, @@ -429,6 +456,37 @@ int amdgpu_vcn_dec_ring_test_ring(struct amdgpu_ring *ring) return r; } +int amdgpu_vcn_dec_sw_ring_test_ring(struct amdgpu_ring *ring) +{ + struct amdgpu_device *adev = ring->adev; + uint32_t rptr; + unsigned int i; + int r; + + if (amdgpu_sriov_vf(adev)) + return 0; + + r = amdgpu_ring_alloc(ring, 16); + if (r) + return r; + + rptr = amdgpu_ring_get_rptr(ring); + + amdgpu_ring_write(ring, VCN_DEC_SW_CMD_END); + amdgpu_ring_commit(ring); + + for (i = 0; i < adev->usec_timeout; i++) { + if (amdgpu_ring_get_rptr(ring) != rptr) + break; + udelay(1); + } + + if (i >= adev->usec_timeout) + r = -ETIMEDOUT; + + return r; +} + static int amdgpu_vcn_dec_send_msg(struct amdgpu_ring *ring, struct amdgpu_bo *bo, struct dma_fence **fence) @@ -483,16 +541,16 @@ err: } static int amdgpu_vcn_dec_get_create_msg(struct amdgpu_ring *ring, uint32_t handle, - struct dma_fence **fence) + struct amdgpu_bo **bo) { struct amdgpu_device *adev = ring->adev; - struct amdgpu_bo *bo = NULL; uint32_t *msg; int r, i; + *bo = NULL; r = amdgpu_bo_create_reserved(adev, 1024, PAGE_SIZE, AMDGPU_GEM_DOMAIN_VRAM, - &bo, NULL, (void **)&msg); + bo, NULL, (void **)&msg); if (r) return r; @@ -513,20 +571,20 @@ static int amdgpu_vcn_dec_get_create_msg(struct amdgpu_ring *ring, uint32_t hand for (i = 14; i < 1024; ++i) msg[i] = cpu_to_le32(0x0); - return amdgpu_vcn_dec_send_msg(ring, bo, fence); + return 0; } static int amdgpu_vcn_dec_get_destroy_msg(struct amdgpu_ring *ring, uint32_t handle, - struct dma_fence **fence) + struct amdgpu_bo **bo) { struct amdgpu_device *adev = ring->adev; - struct amdgpu_bo *bo = NULL; uint32_t *msg; int r, i; + *bo = NULL; r = amdgpu_bo_create_reserved(adev, 1024, PAGE_SIZE, AMDGPU_GEM_DOMAIN_VRAM, - &bo, NULL, (void **)&msg); + bo, NULL, (void **)&msg); if (r) return r; @@ -539,19 +597,117 @@ static int amdgpu_vcn_dec_get_destroy_msg(struct amdgpu_ring *ring, uint32_t han for (i = 6; i < 1024; ++i) msg[i] = cpu_to_le32(0x0); - return amdgpu_vcn_dec_send_msg(ring, bo, fence); + return 0; } int amdgpu_vcn_dec_ring_test_ib(struct amdgpu_ring *ring, long timeout) { - struct dma_fence *fence; + struct dma_fence *fence = NULL; + struct amdgpu_bo *bo; long r; - r = amdgpu_vcn_dec_get_create_msg(ring, 1, NULL); + r = amdgpu_vcn_dec_get_create_msg(ring, 1, &bo); + if (r) + goto error; + + r = amdgpu_vcn_dec_send_msg(ring, bo, NULL); + if (r) + goto error; + r = amdgpu_vcn_dec_get_destroy_msg(ring, 1, &bo); + if (r) + goto error; + + r = amdgpu_vcn_dec_send_msg(ring, bo, &fence); + if (r) + goto error; + + r = dma_fence_wait_timeout(fence, false, timeout); + if (r == 0) + r = -ETIMEDOUT; + else if (r > 0) + r = 0; + + dma_fence_put(fence); +error: + return r; +} + +static int amdgpu_vcn_dec_sw_send_msg(struct amdgpu_ring *ring, + struct amdgpu_bo *bo, + struct dma_fence **fence) +{ + struct amdgpu_vcn_decode_buffer *decode_buffer = NULL; + const unsigned int ib_size_dw = 64; + struct amdgpu_device *adev = ring->adev; + struct dma_fence *f = NULL; + struct amdgpu_job *job; + struct amdgpu_ib *ib; + uint64_t addr; + int i, r; + + r = amdgpu_job_alloc_with_ib(adev, ib_size_dw * 4, + AMDGPU_IB_POOL_DIRECT, &job); + if (r) + goto err; + + ib = &job->ibs[0]; + addr = amdgpu_bo_gpu_offset(bo); + ib->length_dw = 0; + + ib->ptr[ib->length_dw++] = sizeof(struct amdgpu_vcn_decode_buffer) + 8; + ib->ptr[ib->length_dw++] = cpu_to_le32(AMDGPU_VCN_IB_FLAG_DECODE_BUFFER); + decode_buffer = (struct amdgpu_vcn_decode_buffer *)&(ib->ptr[ib->length_dw]); + ib->length_dw += sizeof(struct amdgpu_vcn_decode_buffer) / 4; + memset(decode_buffer, 0, sizeof(struct amdgpu_vcn_decode_buffer)); + + decode_buffer->valid_buf_flag |= cpu_to_le32(AMDGPU_VCN_CMD_FLAG_MSG_BUFFER); + decode_buffer->msg_buffer_address_hi = cpu_to_le32(addr >> 32); + decode_buffer->msg_buffer_address_lo = cpu_to_le32(addr); + + for (i = ib->length_dw; i < ib_size_dw; ++i) + ib->ptr[i] = 0x0; + + r = amdgpu_job_submit_direct(job, ring, &f); + if (r) + goto err_free; + + amdgpu_bo_fence(bo, f, false); + amdgpu_bo_unreserve(bo); + amdgpu_bo_unref(&bo); + + if (fence) + *fence = dma_fence_get(f); + dma_fence_put(f); + + return 0; + +err_free: + amdgpu_job_free(job); + +err: + amdgpu_bo_unreserve(bo); + amdgpu_bo_unref(&bo); + return r; +} + +int amdgpu_vcn_dec_sw_ring_test_ib(struct amdgpu_ring *ring, long timeout) +{ + struct dma_fence *fence = NULL; + struct amdgpu_bo *bo; + long r; + + r = amdgpu_vcn_dec_get_create_msg(ring, 1, &bo); + if (r) + goto error; + + r = amdgpu_vcn_dec_sw_send_msg(ring, bo, NULL); + if (r) + goto error; + r = amdgpu_vcn_dec_get_destroy_msg(ring, 1, &bo); if (r) goto error; - r = amdgpu_vcn_dec_get_destroy_msg(ring, 1, &fence); + r = amdgpu_vcn_dec_sw_send_msg(ring, bo, &fence); if (r) goto error; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h index 17691158f783..13aa417f6be7 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h @@ -44,6 +44,17 @@ #define VCN_DEC_CMD_PACKET_START 0x0000000a #define VCN_DEC_CMD_PACKET_END 0x0000000b +#define VCN_DEC_SW_CMD_NO_OP 0x00000000 +#define VCN_DEC_SW_CMD_END 0x00000001 +#define VCN_DEC_SW_CMD_IB 0x00000002 +#define VCN_DEC_SW_CMD_FENCE 0x00000003 +#define VCN_DEC_SW_CMD_TRAP 0x00000004 +#define VCN_DEC_SW_CMD_IB_AUTO 0x00000005 +#define VCN_DEC_SW_CMD_SEMAPHORE 0x00000006 +#define VCN_DEC_SW_CMD_PREEMPT_FENCE 0x00000009 +#define VCN_DEC_SW_CMD_REG_WRITE 0x0000000b +#define VCN_DEC_SW_CMD_REG_WAIT 0x0000000c + #define VCN_ENC_CMD_NO_OP 0x00000000 #define VCN_ENC_CMD_END 0x00000001 #define VCN_ENC_CMD_IB 0x00000002 @@ -145,6 +156,10 @@ } while (0) #define AMDGPU_VCN_MULTI_QUEUE_FLAG (1 << 8) +#define AMDGPU_VCN_SW_RING_FLAG (1 << 9) + +#define AMDGPU_VCN_IB_FLAG_DECODE_BUFFER 0x00000001 +#define AMDGPU_VCN_CMD_FLAG_MSG_BUFFER 0x00000001 enum fw_queue_mode { FW_QUEUE_RING_RESET = 1, @@ -236,12 +251,25 @@ struct amdgpu_fw_shared_multi_queue { uint8_t padding[4]; }; +struct amdgpu_fw_shared_sw_ring { + uint8_t is_enabled; + uint8_t padding[3]; +}; + struct amdgpu_fw_shared { uint32_t present_flag_0; uint8_t pad[53]; struct amdgpu_fw_shared_multi_queue multi_queue; + struct amdgpu_fw_shared_sw_ring sw_ring; } __attribute__((__packed__)); +struct amdgpu_vcn_decode_buffer { + uint32_t valid_buf_flag; + uint32_t msg_buffer_address_hi; + uint32_t msg_buffer_address_lo; + uint32_t pad[30]; +}; + int amdgpu_vcn_sw_init(struct amdgpu_device *adev); int amdgpu_vcn_sw_fini(struct amdgpu_device *adev); int amdgpu_vcn_suspend(struct amdgpu_device *adev); @@ -251,6 +279,8 @@ void amdgpu_vcn_ring_end_use(struct amdgpu_ring *ring); int amdgpu_vcn_dec_ring_test_ring(struct amdgpu_ring *ring); int amdgpu_vcn_dec_ring_test_ib(struct amdgpu_ring *ring, long timeout); +int amdgpu_vcn_dec_sw_ring_test_ring(struct amdgpu_ring *ring); +int amdgpu_vcn_dec_sw_ring_test_ib(struct amdgpu_ring *ring, long timeout); int amdgpu_vcn_enc_ring_test_ring(struct amdgpu_ring *ring); int amdgpu_vcn_enc_ring_test_ib(struct amdgpu_ring *ring, long timeout); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vf_error.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vf_error.c index 7f7097931c6f..f9d3d79f68b1 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vf_error.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vf_error.c @@ -59,7 +59,7 @@ void amdgpu_vf_error_trans_all(struct amdgpu_device *adev) return; } /* - TODO: Enable these code when pv2vf_info is merged + TODO: Enable these code when pv2vf_info is merged AMDGPU_FW_VRAM_PF2VF_READ (adev, feature_flags, &pf2vf_flags); if (!(pf2vf_flags & AMDGIM_FEATURE_ERROR_LOG_COLLECT)) { return; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c index d0aea5e39531..2d51b7694d1f 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c @@ -47,11 +47,13 @@ bool amdgpu_virt_mmio_blocked(struct amdgpu_device *adev) void amdgpu_virt_init_setting(struct amdgpu_device *adev) { + struct drm_device *ddev = adev_to_drm(adev); + /* enable virtual display */ if (adev->mode_info.num_crtc == 0) adev->mode_info.num_crtc = 1; adev->enable_virtual_display = true; - adev_to_drm(adev)->driver->driver_features &= ~DRIVER_ATOMIC; + ddev->driver_features &= ~DRIVER_ATOMIC; adev->cg_flags = 0; adev->pg_flags = 0; } @@ -104,7 +106,7 @@ failed_kiq: /** * amdgpu_virt_request_full_gpu() - request full gpu access - * @amdgpu: amdgpu device. + * @adev: amdgpu device. * @init: is driver init time. * When start to init/fini driver, first need to request full gpu access. * Return: Zero if request success, otherwise will return error. @@ -127,7 +129,7 @@ int amdgpu_virt_request_full_gpu(struct amdgpu_device *adev, bool init) /** * amdgpu_virt_release_full_gpu() - release full gpu access - * @amdgpu: amdgpu device. + * @adev: amdgpu device. * @init: is driver init time. * When finishing driver init/fini, need to release full gpu access. * Return: Zero if release success, otherwise will returen error. @@ -149,7 +151,7 @@ int amdgpu_virt_release_full_gpu(struct amdgpu_device *adev, bool init) /** * amdgpu_virt_reset_gpu() - reset gpu - * @amdgpu: amdgpu device. + * @adev: amdgpu device. * Send reset command to GPU hypervisor to reset GPU that VM is using * Return: Zero if reset success, otherwise will return error. */ @@ -184,7 +186,7 @@ void amdgpu_virt_request_init_data(struct amdgpu_device *adev) /** * amdgpu_virt_wait_reset() - wait for reset gpu completed - * @amdgpu: amdgpu device. + * @adev: amdgpu device. * Wait for GPU reset completed. * Return: Zero if reset success, otherwise will return error. */ @@ -200,7 +202,7 @@ int amdgpu_virt_wait_reset(struct amdgpu_device *adev) /** * amdgpu_virt_alloc_mm_table() - alloc memory for mm table - * @amdgpu: amdgpu device. + * @adev: amdgpu device. * MM table is used by UVD and VCE for its initialization * Return: Zero if allocate success. */ @@ -230,7 +232,7 @@ int amdgpu_virt_alloc_mm_table(struct amdgpu_device *adev) /** * amdgpu_virt_free_mm_table() - free mm table memory - * @amdgpu: amdgpu device. + * @adev: amdgpu device. * Free MM table memory */ void amdgpu_virt_free_mm_table(struct amdgpu_device *adev) @@ -280,8 +282,8 @@ static int amdgpu_virt_init_ras_err_handler_data(struct amdgpu_device *adev) if (!*data) return -ENOMEM; - bps = kmalloc(align_space * sizeof((*data)->bps), GFP_KERNEL); - bps_bo = kmalloc(align_space * sizeof((*data)->bps_bo), GFP_KERNEL); + bps = kmalloc_array(align_space, sizeof((*data)->bps), GFP_KERNEL); + bps_bo = kmalloc_array(align_space, sizeof((*data)->bps_bo), GFP_KERNEL); if (!bps || !bps_bo) { kfree(bps); @@ -555,7 +557,7 @@ static int amdgpu_virt_write_vf2pf_data(struct amdgpu_device *adev) return 0; } -void amdgpu_virt_update_vf2pf_work_item(struct work_struct *work) +static void amdgpu_virt_update_vf2pf_work_item(struct work_struct *work) { struct amdgpu_device *adev = container_of(work, struct amdgpu_device, virt.vf2pf_work.work); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c index df110afa97bf..0768c8686983 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c @@ -300,7 +300,7 @@ static void amdgpu_vm_bo_relocated(struct amdgpu_vm_bo_base *vm_bo) static void amdgpu_vm_bo_done(struct amdgpu_vm_bo_base *vm_bo) { spin_lock(&vm_bo->vm->invalidated_lock); - list_del_init(&vm_bo->vm_status); + list_move(&vm_bo->vm_status, &vm_bo->vm->done); spin_unlock(&vm_bo->vm->invalidated_lock); } @@ -609,7 +609,7 @@ void amdgpu_vm_del_from_lru_notify(struct ttm_buffer_object *bo) if (!amdgpu_bo_is_amdgpu_bo(bo)) return; - if (bo->mem.placement & TTM_PL_FLAG_NO_EVICT) + if (bo->pin_count) return; abo = ttm_to_amdgpu_bo(bo); @@ -1570,7 +1570,8 @@ static int amdgpu_vm_update_ptes(struct amdgpu_vm_update_params *params, /** * amdgpu_vm_bo_update_mapping - update a mapping in the vm page table * - * @adev: amdgpu_device pointer + * @adev: amdgpu_device pointer of the VM + * @bo_adev: amdgpu_device pointer of the mapped BO * @vm: requested vm * @immediate: immediate submission in a page fault * @unlocked: unlocked invalidation during MM callback @@ -1578,7 +1579,8 @@ static int amdgpu_vm_update_ptes(struct amdgpu_vm_update_params *params, * @start: start of mapped range * @last: last mapped entry * @flags: flags for the entries - * @addr: addr to set the area to + * @offset: offset into nodes and pages_addr + * @nodes: array of drm_mm_nodes with the MC addresses * @pages_addr: DMA addresses to use for mapping * @fence: optional resulting fence * @@ -1588,15 +1590,18 @@ static int amdgpu_vm_update_ptes(struct amdgpu_vm_update_params *params, * 0 for success, -EINVAL for failure. */ static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev, + struct amdgpu_device *bo_adev, struct amdgpu_vm *vm, bool immediate, bool unlocked, struct dma_resv *resv, uint64_t start, uint64_t last, - uint64_t flags, uint64_t addr, + uint64_t flags, uint64_t offset, + struct drm_mm_node *nodes, dma_addr_t *pages_addr, struct dma_fence **fence) { struct amdgpu_vm_update_params params; enum amdgpu_sync_mode sync_mode; + uint64_t pfn; int r; memset(¶ms, 0, sizeof(params)); @@ -1614,6 +1619,14 @@ static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev, else sync_mode = AMDGPU_SYNC_EXPLICIT; + pfn = offset >> PAGE_SHIFT; + if (nodes) { + while (pfn >= nodes->size) { + pfn -= nodes->size; + ++nodes; + } + } + amdgpu_vm_eviction_lock(vm); if (vm->evicting) { r = -EBUSY; @@ -1632,105 +1645,47 @@ static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev, if (r) goto error_unlock; - r = amdgpu_vm_update_ptes(¶ms, start, last + 1, addr, flags); - if (r) - goto error_unlock; - - r = vm->update_funcs->commit(¶ms, fence); - -error_unlock: - amdgpu_vm_eviction_unlock(vm); - return r; -} - -/** - * amdgpu_vm_bo_split_mapping - split a mapping into smaller chunks - * - * @adev: amdgpu_device pointer - * @resv: fences we need to sync to - * @pages_addr: DMA addresses to use for mapping - * @vm: requested vm - * @mapping: mapped range and flags to use for the update - * @flags: HW flags for the mapping - * @bo_adev: amdgpu_device pointer that bo actually been allocated - * @nodes: array of drm_mm_nodes with the MC addresses - * @fence: optional resulting fence - * - * Split the mapping into smaller chunks so that each update fits - * into a SDMA IB. - * - * Returns: - * 0 for success, -EINVAL for failure. - */ -static int amdgpu_vm_bo_split_mapping(struct amdgpu_device *adev, - struct dma_resv *resv, - dma_addr_t *pages_addr, - struct amdgpu_vm *vm, - struct amdgpu_bo_va_mapping *mapping, - uint64_t flags, - struct amdgpu_device *bo_adev, - struct drm_mm_node *nodes, - struct dma_fence **fence) -{ - unsigned min_linear_pages = 1 << adev->vm_manager.fragment_size; - uint64_t pfn, start = mapping->start; - int r; - - /* normally,bo_va->flags only contians READABLE and WIRTEABLE bit go here - * but in case of something, we filter the flags in first place - */ - if (!(mapping->flags & AMDGPU_PTE_READABLE)) - flags &= ~AMDGPU_PTE_READABLE; - if (!(mapping->flags & AMDGPU_PTE_WRITEABLE)) - flags &= ~AMDGPU_PTE_WRITEABLE; - - /* Apply ASIC specific mapping flags */ - amdgpu_gmc_get_vm_pte(adev, mapping, &flags); - - trace_amdgpu_vm_bo_update(mapping); - - pfn = mapping->offset >> PAGE_SHIFT; - if (nodes) { - while (pfn >= nodes->size) { - pfn -= nodes->size; - ++nodes; - } - } - do { - dma_addr_t *dma_addr = NULL; - uint64_t max_entries; - uint64_t addr, last; + uint64_t tmp, num_entries, addr; + - max_entries = mapping->last - start + 1; + num_entries = last - start + 1; if (nodes) { addr = nodes->start << PAGE_SHIFT; - max_entries = min((nodes->size - pfn) * - AMDGPU_GPU_PAGES_IN_CPU_PAGE, max_entries); + num_entries = min((nodes->size - pfn) * + AMDGPU_GPU_PAGES_IN_CPU_PAGE, num_entries); } else { addr = 0; } if (pages_addr) { - uint64_t count; + bool contiguous = true; - for (count = 1; - count < max_entries / AMDGPU_GPU_PAGES_IN_CPU_PAGE; - ++count) { - uint64_t idx = pfn + count; + if (num_entries > AMDGPU_GPU_PAGES_IN_CPU_PAGE) { + uint64_t count; - if (pages_addr[idx] != - (pages_addr[idx - 1] + PAGE_SIZE)) - break; + contiguous = pages_addr[pfn + 1] == + pages_addr[pfn] + PAGE_SIZE; + + tmp = num_entries / + AMDGPU_GPU_PAGES_IN_CPU_PAGE; + for (count = 2; count < tmp; ++count) { + uint64_t idx = pfn + count; + + if (contiguous != (pages_addr[idx] == + pages_addr[idx - 1] + PAGE_SIZE)) + break; + } + num_entries = count * + AMDGPU_GPU_PAGES_IN_CPU_PAGE; } - if (count < min_linear_pages) { + if (!contiguous) { addr = pfn << PAGE_SHIFT; - dma_addr = pages_addr; + params.pages_addr = pages_addr; } else { addr = pages_addr[pfn]; - max_entries = count * - AMDGPU_GPU_PAGES_IN_CPU_PAGE; + params.pages_addr = NULL; } } else if (flags & (AMDGPU_PTE_VALID | AMDGPU_PTE_PRT)) { @@ -1738,23 +1693,25 @@ static int amdgpu_vm_bo_split_mapping(struct amdgpu_device *adev, addr += pfn << PAGE_SHIFT; } - last = start + max_entries - 1; - r = amdgpu_vm_bo_update_mapping(adev, vm, false, false, resv, - start, last, flags, addr, - dma_addr, fence); + tmp = start + num_entries; + r = amdgpu_vm_update_ptes(¶ms, start, tmp, addr, flags); if (r) - return r; + goto error_unlock; - pfn += (last - start + 1) / AMDGPU_GPU_PAGES_IN_CPU_PAGE; + pfn += num_entries / AMDGPU_GPU_PAGES_IN_CPU_PAGE; if (nodes && nodes->size == pfn) { pfn = 0; ++nodes; } - start = last + 1; + start = tmp; - } while (unlikely(start != mapping->last + 1)); + } while (unlikely(start != last + 1)); - return 0; + r = vm->update_funcs->commit(¶ms, fence); + +error_unlock: + amdgpu_vm_eviction_unlock(vm); + return r; } /** @@ -1790,7 +1747,6 @@ int amdgpu_vm_bo_update(struct amdgpu_device *adev, struct amdgpu_bo_va *bo_va, resv = vm->root.base.bo->tbo.base.resv; } else { struct drm_gem_object *obj = &bo->tbo.base; - struct ttm_dma_tt *ttm; resv = bo->tbo.base.resv; if (obj->import_attach && bo_va->is_xgmi) { @@ -1803,10 +1759,8 @@ int amdgpu_vm_bo_update(struct amdgpu_device *adev, struct amdgpu_bo_va *bo_va, } mem = &bo->tbo.mem; nodes = mem->mm_node; - if (mem->mem_type == TTM_PL_TT) { - ttm = container_of(bo->tbo.ttm, struct ttm_dma_tt, ttm); - pages_addr = ttm->dma_address; - } + if (mem->mem_type == TTM_PL_TT) + pages_addr = bo->tbo.ttm->dma_address; } if (bo) { @@ -1835,9 +1789,26 @@ int amdgpu_vm_bo_update(struct amdgpu_device *adev, struct amdgpu_bo_va *bo_va, } list_for_each_entry(mapping, &bo_va->invalids, list) { - r = amdgpu_vm_bo_split_mapping(adev, resv, pages_addr, vm, - mapping, flags, bo_adev, nodes, - last_update); + uint64_t update_flags = flags; + + /* normally,bo_va->flags only contians READABLE and WIRTEABLE bit go here + * but in case of something, we filter the flags in first place + */ + if (!(mapping->flags & AMDGPU_PTE_READABLE)) + update_flags &= ~AMDGPU_PTE_READABLE; + if (!(mapping->flags & AMDGPU_PTE_WRITEABLE)) + update_flags &= ~AMDGPU_PTE_WRITEABLE; + + /* Apply ASIC specific mapping flags */ + amdgpu_gmc_get_vm_pte(adev, mapping, &update_flags); + + trace_amdgpu_vm_bo_update(mapping); + + r = amdgpu_vm_bo_update_mapping(adev, bo_adev, vm, false, false, + resv, mapping->start, + mapping->last, update_flags, + mapping->offset, nodes, + pages_addr, last_update); if (r) return r; } @@ -2045,9 +2016,10 @@ int amdgpu_vm_clear_freed(struct amdgpu_device *adev, mapping->start < AMDGPU_GMC_HOLE_START) init_pte_value = AMDGPU_PTE_DEFAULT_ATC; - r = amdgpu_vm_bo_update_mapping(adev, vm, false, false, resv, - mapping->start, mapping->last, - init_pte_value, 0, NULL, &f); + r = amdgpu_vm_bo_update_mapping(adev, adev, vm, false, false, + resv, mapping->start, + mapping->last, init_pte_value, + 0, NULL, NULL, &f); amdgpu_vm_free_mapping(adev, vm, mapping, f); if (r) { dma_fence_put(f); @@ -2166,7 +2138,7 @@ struct amdgpu_bo_va *amdgpu_vm_bo_add(struct amdgpu_device *adev, /** - * amdgpu_vm_bo_insert_mapping - insert a new mapping + * amdgpu_vm_bo_insert_map - insert a new mapping * * @adev: amdgpu_device pointer * @bo_va: bo_va to store the address @@ -2823,7 +2795,7 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm, INIT_LIST_HEAD(&vm->invalidated); spin_lock_init(&vm->invalidated_lock); INIT_LIST_HEAD(&vm->freed); - + INIT_LIST_HEAD(&vm->done); /* create scheduler entities for page table updates */ r = drm_sched_entity_init(&vm->immediate, DRM_SCHED_PRIORITY_NORMAL, @@ -3375,8 +3347,9 @@ bool amdgpu_vm_handle_fault(struct amdgpu_device *adev, u32 pasid, value = 0; } - r = amdgpu_vm_bo_update_mapping(adev, vm, true, false, NULL, addr, - addr + 1, flags, value, NULL, NULL); + r = amdgpu_vm_bo_update_mapping(adev, adev, vm, true, false, NULL, addr, + addr, flags, value, NULL, NULL, + NULL); if (r) goto error_unlock; @@ -3392,3 +3365,99 @@ error_unref: return false; } + +#if defined(CONFIG_DEBUG_FS) +/** + * amdgpu_debugfs_vm_bo_info - print BO info for the VM + * + * @vm: Requested VM for printing BO info + * @m: debugfs file + * + * Print BO information in debugfs file for the VM + */ +void amdgpu_debugfs_vm_bo_info(struct amdgpu_vm *vm, struct seq_file *m) +{ + struct amdgpu_bo_va *bo_va, *tmp; + u64 total_idle = 0; + u64 total_evicted = 0; + u64 total_relocated = 0; + u64 total_moved = 0; + u64 total_invalidated = 0; + u64 total_done = 0; + unsigned int total_idle_objs = 0; + unsigned int total_evicted_objs = 0; + unsigned int total_relocated_objs = 0; + unsigned int total_moved_objs = 0; + unsigned int total_invalidated_objs = 0; + unsigned int total_done_objs = 0; + unsigned int id = 0; + + seq_puts(m, "\tIdle BOs:\n"); + list_for_each_entry_safe(bo_va, tmp, &vm->idle, base.vm_status) { + if (!bo_va->base.bo) + continue; + total_idle += amdgpu_bo_print_info(id++, bo_va->base.bo, m); + } + total_idle_objs = id; + id = 0; + + seq_puts(m, "\tEvicted BOs:\n"); + list_for_each_entry_safe(bo_va, tmp, &vm->evicted, base.vm_status) { + if (!bo_va->base.bo) + continue; + total_evicted += amdgpu_bo_print_info(id++, bo_va->base.bo, m); + } + total_evicted_objs = id; + id = 0; + + seq_puts(m, "\tRelocated BOs:\n"); + list_for_each_entry_safe(bo_va, tmp, &vm->relocated, base.vm_status) { + if (!bo_va->base.bo) + continue; + total_relocated += amdgpu_bo_print_info(id++, bo_va->base.bo, m); + } + total_relocated_objs = id; + id = 0; + + seq_puts(m, "\tMoved BOs:\n"); + list_for_each_entry_safe(bo_va, tmp, &vm->moved, base.vm_status) { + if (!bo_va->base.bo) + continue; + total_moved += amdgpu_bo_print_info(id++, bo_va->base.bo, m); + } + total_moved_objs = id; + id = 0; + + seq_puts(m, "\tInvalidated BOs:\n"); + spin_lock(&vm->invalidated_lock); + list_for_each_entry_safe(bo_va, tmp, &vm->invalidated, base.vm_status) { + if (!bo_va->base.bo) + continue; + total_invalidated += amdgpu_bo_print_info(id++, bo_va->base.bo, m); + } + total_invalidated_objs = id; + id = 0; + + seq_puts(m, "\tDone BOs:\n"); + list_for_each_entry_safe(bo_va, tmp, &vm->done, base.vm_status) { + if (!bo_va->base.bo) + continue; + total_done += amdgpu_bo_print_info(id++, bo_va->base.bo, m); + } + spin_unlock(&vm->invalidated_lock); + total_done_objs = id; + + seq_printf(m, "\tTotal idle size: %12lld\tobjs:\t%d\n", total_idle, + total_idle_objs); + seq_printf(m, "\tTotal evicted size: %12lld\tobjs:\t%d\n", total_evicted, + total_evicted_objs); + seq_printf(m, "\tTotal relocated size: %12lld\tobjs:\t%d\n", total_relocated, + total_relocated_objs); + seq_printf(m, "\tTotal moved size: %12lld\tobjs:\t%d\n", total_moved, + total_moved_objs); + seq_printf(m, "\tTotal invalidated size: %12lld\tobjs:\t%d\n", total_invalidated, + total_invalidated_objs); + seq_printf(m, "\tTotal done size: %12lld\tobjs:\t%d\n", total_done, + total_done_objs); +} +#endif diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h index 58c83a7ad0fd..976a12e5a8b9 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h @@ -76,6 +76,9 @@ struct amdgpu_bo_list_entry; /* PTE is handled as PDE for VEGA10 (Translate Further) */ #define AMDGPU_PTE_TF (1ULL << 56) +/* MALL noalloc for sienna_cichlid, reserved for older ASICs */ +#define AMDGPU_PTE_NOALLOC (1ULL << 58) + /* PDE Block Fragment Size for VEGA10 */ #define AMDGPU_PDE_BFS(a) ((uint64_t)a << 59) @@ -104,7 +107,7 @@ struct amdgpu_bo_list_entry; #define AMDGPU_VM_FAULT_STOP_ALWAYS 2 /* Reserve 4MB VRAM for page tables */ -#define AMDGPU_VM_RESERVED_VRAM (4ULL << 20) +#define AMDGPU_VM_RESERVED_VRAM (8ULL << 20) /* max number of VMHUB */ #define AMDGPU_MAX_VMHUBS 3 @@ -274,6 +277,9 @@ struct amdgpu_vm { /* BO mappings freed, but not yet updated in the PT */ struct list_head freed; + /* BOs which are invalidated, has been updated in the PTs */ + struct list_head done; + /* contains the page directory */ struct amdgpu_vm_pt root; struct dma_fence *last_update; @@ -441,4 +447,8 @@ void amdgpu_vm_move_to_lru_tail(struct amdgpu_device *adev, struct amdgpu_vm *vm); void amdgpu_vm_del_from_lru_notify(struct ttm_buffer_object *bo); +#if defined(CONFIG_DEBUG_FS) +void amdgpu_debugfs_vm_bo_info(struct amdgpu_vm *vm, struct seq_file *m); +#endif + #endif diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_cpu.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_cpu.c index 0786e7555554..ac45d9c7a4e9 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_cpu.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_cpu.c @@ -38,8 +38,8 @@ static int amdgpu_vm_cpu_map_table(struct amdgpu_bo *table) * amdgpu_vm_cpu_prepare - prepare page table update with the CPU * * @p: see amdgpu_vm_update_params definition - * @owner: owner we need to sync to - * @exclusive: exclusive move fence we need to sync to + * @resv: reservation object with embedded fence + * @sync_mode: synchronization mode * * Returns: * Negativ errno, 0 for success. diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_sdma.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_sdma.c index db790574dc2e..a83a646759c5 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_sdma.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_sdma.c @@ -51,8 +51,8 @@ static int amdgpu_vm_sdma_map_table(struct amdgpu_bo *table) * amdgpu_vm_sdma_prepare - prepare SDMA command submission * * @p: see amdgpu_vm_update_params definition - * @owner: owner we need to sync to - * @exclusive: exclusive move fence we need to sync to + * @resv: reservation object with embedded fence + * @sync_mode: synchronization mode * * Returns: * Negativ errno, 0 for success. diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c index 0c6b7c5ecfec..d2de2a720a3d 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c @@ -184,6 +184,8 @@ int amdgpu_vram_mgr_init(struct amdgpu_device *adev) drm_mm_init(&mgr->mm, 0, man->size); spin_lock_init(&mgr->lock); + INIT_LIST_HEAD(&mgr->reservations_pending); + INIT_LIST_HEAD(&mgr->reserved_pages); /* Add the two VRAM-related sysfs files */ ret = sysfs_create_files(&adev->dev->kobj, amdgpu_vram_mgr_attributes); @@ -208,14 +210,22 @@ void amdgpu_vram_mgr_fini(struct amdgpu_device *adev) struct amdgpu_vram_mgr *mgr = &adev->mman.vram_mgr; struct ttm_resource_manager *man = &mgr->manager; int ret; + struct amdgpu_vram_reservation *rsv, *temp; ttm_resource_manager_set_used(man, false); - ret = ttm_resource_manager_force_list_clean(&adev->mman.bdev, man); + ret = ttm_resource_manager_evict_all(&adev->mman.bdev, man); if (ret) return; spin_lock(&mgr->lock); + list_for_each_entry_safe(rsv, temp, &mgr->reservations_pending, node) + kfree(rsv); + + list_for_each_entry_safe(rsv, temp, &mgr->reserved_pages, node) { + drm_mm_remove_node(&rsv->mm_node); + kfree(rsv); + } drm_mm_takedown(&mgr->mm); spin_unlock(&mgr->lock); @@ -274,6 +284,101 @@ u64 amdgpu_vram_mgr_bo_visible_size(struct amdgpu_bo *bo) return usage; } +static void amdgpu_vram_mgr_do_reserve(struct ttm_resource_manager *man) +{ + struct amdgpu_vram_mgr *mgr = to_vram_mgr(man); + struct amdgpu_device *adev = to_amdgpu_device(mgr); + struct drm_mm *mm = &mgr->mm; + struct amdgpu_vram_reservation *rsv, *temp; + uint64_t vis_usage; + + list_for_each_entry_safe(rsv, temp, &mgr->reservations_pending, node) { + if (drm_mm_reserve_node(mm, &rsv->mm_node)) + continue; + + dev_dbg(adev->dev, "Reservation 0x%llx - %lld, Succeeded\n", + rsv->mm_node.start, rsv->mm_node.size); + + vis_usage = amdgpu_vram_mgr_vis_size(adev, &rsv->mm_node); + atomic64_add(vis_usage, &mgr->vis_usage); + atomic64_add(rsv->mm_node.size << PAGE_SHIFT, &mgr->usage); + list_move(&rsv->node, &mgr->reserved_pages); + } +} + +/** + * amdgpu_vram_mgr_reserve_range - Reserve a range from VRAM + * + * @man: TTM memory type manager + * @start: start address of the range in VRAM + * @size: size of the range + * + * Reserve memory from start addess with the specified size in VRAM + */ +int amdgpu_vram_mgr_reserve_range(struct ttm_resource_manager *man, + uint64_t start, uint64_t size) +{ + struct amdgpu_vram_mgr *mgr = to_vram_mgr(man); + struct amdgpu_vram_reservation *rsv; + + rsv = kzalloc(sizeof(*rsv), GFP_KERNEL); + if (!rsv) + return -ENOMEM; + + INIT_LIST_HEAD(&rsv->node); + rsv->mm_node.start = start >> PAGE_SHIFT; + rsv->mm_node.size = size >> PAGE_SHIFT; + + spin_lock(&mgr->lock); + list_add_tail(&mgr->reservations_pending, &rsv->node); + amdgpu_vram_mgr_do_reserve(man); + spin_unlock(&mgr->lock); + + return 0; +} + +/** + * amdgpu_vram_mgr_query_page_status - query the reservation status + * + * @man: TTM memory type manager + * @start: start address of a page in VRAM + * + * Returns: + * -EBUSY: the page is still hold and in pending list + * 0: the page has been reserved + * -ENOENT: the input page is not a reservation + */ +int amdgpu_vram_mgr_query_page_status(struct ttm_resource_manager *man, + uint64_t start) +{ + struct amdgpu_vram_mgr *mgr = to_vram_mgr(man); + struct amdgpu_vram_reservation *rsv; + int ret; + + spin_lock(&mgr->lock); + + list_for_each_entry(rsv, &mgr->reservations_pending, node) { + if ((rsv->mm_node.start <= start) && + (start < (rsv->mm_node.start + rsv->mm_node.size))) { + ret = -EBUSY; + goto out; + } + } + + list_for_each_entry(rsv, &mgr->reserved_pages, node) { + if ((rsv->mm_node.start <= start) && + (start < (rsv->mm_node.start + rsv->mm_node.size))) { + ret = 0; + goto out; + } + } + + ret = -ENOENT; +out: + spin_unlock(&mgr->lock); + return ret; +} + /** * amdgpu_vram_mgr_virt_start - update virtual start address * @@ -444,6 +549,7 @@ static void amdgpu_vram_mgr_del(struct ttm_resource_manager *man, vis_usage += amdgpu_vram_mgr_vis_size(adev, nodes); ++nodes; } + amdgpu_vram_mgr_do_reserve(man); spin_unlock(&mgr->lock); atomic64_sub(usage, &mgr->usage); @@ -528,9 +634,11 @@ error_free: } /** - * amdgpu_vram_mgr_alloc_sgt - allocate and fill a sg table + * amdgpu_vram_mgr_free_sgt - allocate and fill a sg table * * @adev: amdgpu device pointer + * @dev: device pointer + * @dir: data direction of resource to unmap * @sgt: sg table to free * * Free a previously allocate sg table. diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c index 1162913c8bf4..541ef6be390f 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c @@ -246,7 +246,7 @@ static ssize_t amdgpu_xgmi_show_error(struct device *dev, adev->df.funcs->set_fica(adev, ficaa_pie_status_in, 0, 0); - return snprintf(buf, PAGE_SIZE, "%d\n", error_count); + return snprintf(buf, PAGE_SIZE, "%u\n", error_count); } @@ -395,12 +395,17 @@ void amdgpu_put_xgmi_hive(struct amdgpu_hive_info *hive) int amdgpu_xgmi_set_pstate(struct amdgpu_device *adev, int pstate) { int ret = 0; - struct amdgpu_hive_info *hive = amdgpu_get_xgmi_hive(adev); - struct amdgpu_device *request_adev = hive->hi_req_gpu ? - hive->hi_req_gpu : adev; + struct amdgpu_hive_info *hive; + struct amdgpu_device *request_adev; bool is_hi_req = pstate == AMDGPU_XGMI_PSTATE_MAX_VEGA20; - bool init_low = hive->pstate == AMDGPU_XGMI_PSTATE_UNKNOWN; + bool init_low; + + hive = amdgpu_get_xgmi_hive(adev); + if (!hive) + return 0; + request_adev = hive->hi_req_gpu ? hive->hi_req_gpu : adev; + init_low = hive->pstate == AMDGPU_XGMI_PSTATE_UNKNOWN; amdgpu_put_xgmi_hive(hive); /* fw bug so temporarily disable pstate switching */ return 0; diff --git a/drivers/gpu/drm/amd/amdgpu/athub_v2_1.c b/drivers/gpu/drm/amd/amdgpu/athub_v2_1.c index 939eca63b094..66c183ddd43e 100644 --- a/drivers/gpu/drm/amd/amdgpu/athub_v2_1.c +++ b/drivers/gpu/drm/amd/amdgpu/athub_v2_1.c @@ -74,6 +74,7 @@ int athub_v2_1_set_clockgating(struct amdgpu_device *adev, switch (adev->asic_type) { case CHIP_SIENNA_CICHLID: case CHIP_NAVY_FLOUNDER: + case CHIP_DIMGREY_CAVEFISH: athub_v2_1_update_medium_grain_clock_gating(adev, state == AMD_CG_STATE_GATE ? true : false); athub_v2_1_update_medium_grain_light_sleep(adev, diff --git a/drivers/gpu/drm/amd/amdgpu/atom.c b/drivers/gpu/drm/amd/amdgpu/atom.c index 4cfc786699c7..515890f4f5a0 100644 --- a/drivers/gpu/drm/amd/amdgpu/atom.c +++ b/drivers/gpu/drm/amd/amdgpu/atom.c @@ -66,13 +66,13 @@ typedef struct { bool abort; } atom_exec_context; -int amdgpu_atom_debug = 0; -static int amdgpu_atom_execute_table_locked(struct atom_context *ctx, int index, uint32_t * params); -int amdgpu_atom_execute_table(struct atom_context *ctx, int index, uint32_t * params); +int amdgpu_atom_debug; +static int amdgpu_atom_execute_table_locked(struct atom_context *ctx, int index, uint32_t *params); +int amdgpu_atom_execute_table(struct atom_context *ctx, int index, uint32_t *params); static uint32_t atom_arg_mask[8] = - { 0xFFFFFFFF, 0xFFFF, 0xFFFF00, 0xFFFF0000, 0xFF, 0xFF00, 0xFF0000, -0xFF000000 }; + { 0xFFFFFFFF, 0xFFFF, 0xFFFF00, 0xFFFF0000, 0xFF, 0xFF00, 0xFF0000, + 0xFF000000 }; static int atom_arg_shift[8] = { 0, 0, 8, 16, 0, 8, 16, 24 }; static int atom_dst_to_src[8][4] = { @@ -88,7 +88,7 @@ static int atom_dst_to_src[8][4] = { }; static int atom_def_dst[8] = { 0, 0, 1, 2, 0, 1, 2, 3 }; -static int debug_depth = 0; +static int debug_depth; #ifdef ATOM_DEBUG static void debug_print_spaces(int n) { @@ -1201,7 +1201,7 @@ static struct { atom_op_div32, ATOM_ARG_WS}, }; -static int amdgpu_atom_execute_table_locked(struct atom_context *ctx, int index, uint32_t * params) +static int amdgpu_atom_execute_table_locked(struct atom_context *ctx, int index, uint32_t *params) { int base = CU16(ctx->cmd_table + 4 + 2 * index); int len, ws, ps, ptr; @@ -1262,7 +1262,7 @@ free: return ret; } -int amdgpu_atom_execute_table(struct atom_context *ctx, int index, uint32_t * params) +int amdgpu_atom_execute_table(struct atom_context *ctx, int index, uint32_t *params) { int r; @@ -1388,8 +1388,8 @@ void amdgpu_atom_destroy(struct atom_context *ctx) } bool amdgpu_atom_parse_data_header(struct atom_context *ctx, int index, - uint16_t * size, uint8_t * frev, uint8_t * crev, - uint16_t * data_start) + uint16_t *size, uint8_t *frev, uint8_t *crev, + uint16_t *data_start) { int offset = index * 2 + 4; int idx = CU16(ctx->data_table + offset); @@ -1408,8 +1408,8 @@ bool amdgpu_atom_parse_data_header(struct atom_context *ctx, int index, return true; } -bool amdgpu_atom_parse_cmd_header(struct atom_context *ctx, int index, uint8_t * frev, - uint8_t * crev) +bool amdgpu_atom_parse_cmd_header(struct atom_context *ctx, int index, uint8_t *frev, + uint8_t *crev) { int offset = index * 2 + 4; int idx = CU16(ctx->cmd_table + offset); diff --git a/drivers/gpu/drm/amd/amdgpu/atombios_encoders.c b/drivers/gpu/drm/amd/amdgpu/atombios_encoders.c index 8339c8c3a328..6134ed964027 100644 --- a/drivers/gpu/drm/amd/amdgpu/atombios_encoders.c +++ b/drivers/gpu/drm/amd/amdgpu/atombios_encoders.c @@ -171,7 +171,6 @@ void amdgpu_atombios_encoder_init_backlight(struct amdgpu_encoder *amdgpu_encode struct backlight_properties props; struct amdgpu_backlight_privdata *pdata; struct amdgpu_encoder_atom_dig *dig; - u8 backlight_level; char bl_name[16]; /* Mac laptops with multiple GPUs use the gmux driver for backlight @@ -207,8 +206,6 @@ void amdgpu_atombios_encoder_init_backlight(struct amdgpu_encoder *amdgpu_encode pdata->encoder = amdgpu_encoder; - backlight_level = amdgpu_atombios_encoder_get_backlight_level_from_reg(adev); - dig = amdgpu_encoder->enc_priv; dig->bl_dev = bd; @@ -499,10 +496,8 @@ int amdgpu_atombios_encoder_get_encoder_mode(struct drm_encoder *encoder) } else { return ATOM_ENCODER_MODE_DVI; } - break; case DRM_MODE_CONNECTOR_LVDS: return ATOM_ENCODER_MODE_LVDS; - break; case DRM_MODE_CONNECTOR_DisplayPort: dig_connector = amdgpu_connector->con_priv; if ((dig_connector->dp_sink_type == CONNECTOR_OBJECT_ID_DISPLAYPORT) || @@ -519,20 +514,16 @@ int amdgpu_atombios_encoder_get_encoder_mode(struct drm_encoder *encoder) } else { return ATOM_ENCODER_MODE_DVI; } - break; case DRM_MODE_CONNECTOR_eDP: return ATOM_ENCODER_MODE_DP; case DRM_MODE_CONNECTOR_DVIA: case DRM_MODE_CONNECTOR_VGA: return ATOM_ENCODER_MODE_CRT; - break; case DRM_MODE_CONNECTOR_Composite: case DRM_MODE_CONNECTOR_SVIDEO: case DRM_MODE_CONNECTOR_9PinDIN: /* fix me */ return ATOM_ENCODER_MODE_TV; - /*return ATOM_ENCODER_MODE_CV;*/ - break; } } diff --git a/drivers/gpu/drm/amd/amdgpu/atombios_i2c.c b/drivers/gpu/drm/amd/amdgpu/atombios_i2c.c index 09a538465ffd..af0335535f82 100644 --- a/drivers/gpu/drm/amd/amdgpu/atombios_i2c.c +++ b/drivers/gpu/drm/amd/amdgpu/atombios_i2c.c @@ -159,7 +159,7 @@ u32 amdgpu_atombios_i2c_func(struct i2c_adapter *adap) return I2C_FUNC_I2C | I2C_FUNC_SMBUS_EMUL; } -void amdgpu_atombios_i2c_channel_trans(struct amdgpu_device* adev, u8 slave_addr, u8 line_number, u8 offset, u8 data) +void amdgpu_atombios_i2c_channel_trans(struct amdgpu_device *adev, u8 slave_addr, u8 line_number, u8 offset, u8 data) { PROCESS_I2C_CHANNEL_TRANSACTION_PS_ALLOCATION args; int index = GetIndexIntoMasterTable(COMMAND, ProcessI2cChannelTransaction); diff --git a/drivers/gpu/drm/amd/amdgpu/cik.c b/drivers/gpu/drm/amd/amdgpu/cik.c index 5442df094102..13737b317f7c 100644 --- a/drivers/gpu/drm/amd/amdgpu/cik.c +++ b/drivers/gpu/drm/amd/amdgpu/cik.c @@ -1336,10 +1336,6 @@ cik_asic_reset_method(struct amdgpu_device *adev) switch (adev->asic_type) { case CHIP_BONAIRE: - /* disable baco reset until it works */ - /* smu7_asic_get_baco_capability(adev, &baco_reset); */ - baco_reset = false; - break; case CHIP_HAWAII: baco_reset = cik_asic_supports_baco(adev); break; diff --git a/drivers/gpu/drm/amd/amdgpu/cik_ih.c b/drivers/gpu/drm/amd/amdgpu/cik_ih.c index db953e95f3d2..d3745711d55f 100644 --- a/drivers/gpu/drm/amd/amdgpu/cik_ih.c +++ b/drivers/gpu/drm/amd/amdgpu/cik_ih.c @@ -177,6 +177,7 @@ static void cik_ih_irq_disable(struct amdgpu_device *adev) * cik_ih_get_wptr - get the IH ring buffer wptr * * @adev: amdgpu_device pointer + * @ih: IH ring buffer to fetch wptr * * Get the IH ring buffer wptr from either the register * or the writeback memory buffer (CIK). Also check for @@ -266,6 +267,7 @@ static void cik_ih_decode_iv(struct amdgpu_device *adev, * cik_ih_set_rptr - set the IH ring buffer rptr * * @adev: amdgpu_device pointer + * @ih: IH ring buffer to set wptr * * Set the IH ring buffer rptr. */ diff --git a/drivers/gpu/drm/amd/amdgpu/cik_sdma.c b/drivers/gpu/drm/amd/amdgpu/cik_sdma.c index a3c3fe96515f..43b978144b79 100644 --- a/drivers/gpu/drm/amd/amdgpu/cik_sdma.c +++ b/drivers/gpu/drm/amd/amdgpu/cik_sdma.c @@ -195,7 +195,7 @@ static void cik_sdma_ring_set_wptr(struct amdgpu_ring *ring) struct amdgpu_device *adev = ring->adev; WREG32(mmSDMA0_GFX_RB_WPTR + sdma_offsets[ring->me], - (lower_32_bits(ring->wptr) << 2) & 0x3fffc); + (lower_32_bits(ring->wptr) << 2) & 0x3fffc); } static void cik_sdma_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count) @@ -215,7 +215,9 @@ static void cik_sdma_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count) * cik_sdma_ring_emit_ib - Schedule an IB on the DMA engine * * @ring: amdgpu ring pointer + * @job: job to retrive vmid from * @ib: IB object to schedule + * @flags: unused * * Schedule an IB in the DMA ring (CIK). */ @@ -267,7 +269,9 @@ static void cik_sdma_ring_emit_hdp_flush(struct amdgpu_ring *ring) * cik_sdma_ring_emit_fence - emit a fence on the DMA ring * * @ring: amdgpu ring pointer - * @fence: amdgpu fence object + * @addr: address + * @seq: sequence number + * @flags: fence related flags * * Add a DMA fence packet to the ring to write * the fence seq number and DMA trap packet to generate @@ -655,6 +659,7 @@ error_free_wb: * cik_sdma_ring_test_ib - test an IB on the DMA engine * * @ring: amdgpu_ring structure holding ring information + * @timeout: timeout value in jiffies, or MAX_SCHEDULE_TIMEOUT * * Test a simple IB in the DMA ring (CIK). * Returns 0 on success, error on failure. @@ -801,6 +806,7 @@ static void cik_sdma_vm_set_pte_pde(struct amdgpu_ib *ib, uint64_t pe, /** * cik_sdma_vm_pad_ib - pad the IB to the required number of dw * + * @ring: amdgpu_ring structure holding ring information * @ib: indirect buffer to fill with padding * */ @@ -849,7 +855,8 @@ static void cik_sdma_ring_emit_pipeline_sync(struct amdgpu_ring *ring) * cik_sdma_ring_emit_vm_flush - cik vm flush using sDMA * * @ring: amdgpu_ring pointer - * @vm: amdgpu_vm pointer + * @vmid: vmid number to use + * @pd_addr: address * * Update the page table base and flush the VM TLB * using sDMA (CIK). @@ -1298,10 +1305,11 @@ static void cik_sdma_set_irq_funcs(struct amdgpu_device *adev) /** * cik_sdma_emit_copy_buffer - copy buffer using the sDMA engine * - * @ring: amdgpu_ring structure holding ring information + * @ib: indirect buffer to copy to * @src_offset: src GPU address * @dst_offset: dst GPU address * @byte_count: number of bytes to xfer + * @tmz: is this a secure operation * * Copy GPU buffers using the DMA engine (CIK). * Used by the amdgpu ttm implementation to move pages if @@ -1325,7 +1333,7 @@ static void cik_sdma_emit_copy_buffer(struct amdgpu_ib *ib, /** * cik_sdma_emit_fill_buffer - fill buffer using the sDMA engine * - * @ring: amdgpu_ring structure holding ring information + * @ib: indirect buffer to fill * @src_data: value to write to buffer * @dst_offset: dst GPU address * @byte_count: number of bytes to xfer diff --git a/drivers/gpu/drm/amd/amdgpu/cz_ih.c b/drivers/gpu/drm/amd/amdgpu/cz_ih.c index 1dca0cabc326..da37f8a900af 100644 --- a/drivers/gpu/drm/amd/amdgpu/cz_ih.c +++ b/drivers/gpu/drm/amd/amdgpu/cz_ih.c @@ -179,6 +179,7 @@ static void cz_ih_irq_disable(struct amdgpu_device *adev) * cz_ih_get_wptr - get the IH ring buffer wptr * * @adev: amdgpu_device pointer + * @ih: IH ring buffer to fetch wptr * * Get the IH ring buffer wptr from either the register * or the writeback memory buffer (VI). Also check for @@ -213,6 +214,8 @@ static u32 cz_ih_get_wptr(struct amdgpu_device *adev, * cz_ih_decode_iv - decode an interrupt vector * * @adev: amdgpu_device pointer + * @ih: IH ring buffer to decode + * @entry: IV entry to place decoded information into * * Decodes the interrupt vector at the current rptr * position and also advance the position. @@ -245,6 +248,7 @@ static void cz_ih_decode_iv(struct amdgpu_device *adev, * cz_ih_set_rptr - set the IH ring buffer rptr * * @adev: amdgpu_device pointer + * @ih: IH ring buffer to set rptr * * Set the IH ring buffer rptr. */ diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c b/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c index 5963cbe0d455..7944781e1086 100644 --- a/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c +++ b/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c @@ -228,6 +228,7 @@ static void dce_v10_0_pageflip_interrupt_fini(struct amdgpu_device *adev) * @adev: amdgpu_device pointer * @crtc_id: crtc to cleanup pageflip on * @crtc_base: new address of the crtc (GPU MC address) + * @async: asynchronous flip * * Triggers the actual pageflip by updating the primary * surface base address. @@ -2202,22 +2203,18 @@ static int dce_v10_0_pick_dig_encoder(struct drm_encoder *encoder) return 1; else return 0; - break; case ENCODER_OBJECT_ID_INTERNAL_UNIPHY1: if (dig->linkb) return 3; else return 2; - break; case ENCODER_OBJECT_ID_INTERNAL_UNIPHY2: if (dig->linkb) return 5; else return 4; - break; case ENCODER_OBJECT_ID_INTERNAL_UNIPHY3: return 6; - break; default: DRM_ERROR("invalid encoder_id: 0x%x\n", amdgpu_encoder->encoder_id); return 0; @@ -2677,7 +2674,7 @@ static int dce_v10_0_crtc_set_base_atomic(struct drm_crtc *crtc, struct drm_framebuffer *fb, int x, int y, enum mode_set_atomic state) { - return dce_v10_0_crtc_do_set_base(crtc, fb, x, y, 1); + return dce_v10_0_crtc_do_set_base(crtc, fb, x, y, 1); } static const struct drm_crtc_helper_funcs dce_v10_0_crtc_helper_funcs = { diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c b/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c index 1954472c8e8f..1b6ff0470011 100644 --- a/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c @@ -246,6 +246,7 @@ static void dce_v11_0_pageflip_interrupt_fini(struct amdgpu_device *adev) * @adev: amdgpu_device pointer * @crtc_id: crtc to cleanup pageflip on * @crtc_base: new address of the crtc (GPU MC address) + * @async: asynchronous flip * * Triggers the actual pageflip by updating the primary * surface base address. @@ -2235,22 +2236,18 @@ static int dce_v11_0_pick_dig_encoder(struct drm_encoder *encoder) return 1; else return 0; - break; case ENCODER_OBJECT_ID_INTERNAL_UNIPHY1: if (dig->linkb) return 3; else return 2; - break; case ENCODER_OBJECT_ID_INTERNAL_UNIPHY2: if (dig->linkb) return 5; else return 4; - break; case ENCODER_OBJECT_ID_INTERNAL_UNIPHY3: return 6; - break; default: DRM_ERROR("invalid encoder_id: 0x%x\n", amdgpu_encoder->encoder_id); return 0; @@ -2304,19 +2301,16 @@ static u32 dce_v11_0_pick_pll(struct drm_crtc *crtc) return ATOM_COMBOPHY_PLL1; else return ATOM_COMBOPHY_PLL0; - break; case ENCODER_OBJECT_ID_INTERNAL_UNIPHY1: if (dig->linkb) return ATOM_COMBOPHY_PLL3; else return ATOM_COMBOPHY_PLL2; - break; case ENCODER_OBJECT_ID_INTERNAL_UNIPHY2: if (dig->linkb) return ATOM_COMBOPHY_PLL5; else return ATOM_COMBOPHY_PLL4; - break; default: DRM_ERROR("invalid encoder_id: 0x%x\n", amdgpu_encoder->encoder_id); return ATOM_PPLL_INVALID; @@ -2785,7 +2779,7 @@ static int dce_v11_0_crtc_set_base_atomic(struct drm_crtc *crtc, struct drm_framebuffer *fb, int x, int y, enum mode_set_atomic state) { - return dce_v11_0_crtc_do_set_base(crtc, fb, x, y, 1); + return dce_v11_0_crtc_do_set_base(crtc, fb, x, y, 1); } static const struct drm_crtc_helper_funcs dce_v11_0_crtc_helper_funcs = { diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c b/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c index 3a44753a80d1..83a88385b762 100644 --- a/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c +++ b/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c @@ -180,6 +180,7 @@ static void dce_v6_0_pageflip_interrupt_fini(struct amdgpu_device *adev) * @adev: amdgpu_device pointer * @crtc_id: crtc to cleanup pageflip on * @crtc_base: new address of the crtc (GPU MC address) + * @async: asynchronous flip * * Does the actual pageflip (evergreen+). * During vblank we take the crtc lock and wait for the update_pending @@ -1047,7 +1048,6 @@ static u32 dce_v6_0_line_buffer_adjust(struct amdgpu_device *adev, /** - * * dce_v6_0_bandwidth_update - program display watermarks * * @adev: amdgpu_device pointer @@ -2567,7 +2567,7 @@ static int dce_v6_0_crtc_set_base_atomic(struct drm_crtc *crtc, struct drm_framebuffer *fb, int x, int y, enum mode_set_atomic state) { - return dce_v6_0_crtc_do_set_base(crtc, fb, x, y, 1); + return dce_v6_0_crtc_do_set_base(crtc, fb, x, y, 1); } static const struct drm_crtc_helper_funcs dce_v6_0_crtc_helper_funcs = { diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c b/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c index 3603e5f13077..224b30214427 100644 --- a/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c +++ b/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c @@ -176,6 +176,7 @@ static void dce_v8_0_pageflip_interrupt_fini(struct amdgpu_device *adev) * @adev: amdgpu_device pointer * @crtc_id: crtc to cleanup pageflip on * @crtc_base: new address of the crtc (GPU MC address) + * @async: asynchronous flip * * Triggers the actual pageflip by updating the primary * surface base address. @@ -2498,7 +2499,7 @@ static void dce_v8_0_crtc_disable(struct drm_crtc *crtc) case ATOM_PPLL2: /* disable the ppll */ amdgpu_atombios_crtc_program_pll(crtc, amdgpu_crtc->crtc_id, amdgpu_crtc->pll_id, - 0, 0, ATOM_DISABLE, 0, 0, 0, 0, 0, false, &ss); + 0, 0, ATOM_DISABLE, 0, 0, 0, 0, 0, false, &ss); break; case ATOM_PPLL0: /* disable the ppll */ @@ -2585,7 +2586,7 @@ static int dce_v8_0_crtc_set_base_atomic(struct drm_crtc *crtc, struct drm_framebuffer *fb, int x, int y, enum mode_set_atomic state) { - return dce_v8_0_crtc_do_set_base(crtc, fb, x, y, 1); + return dce_v8_0_crtc_do_set_base(crtc, fb, x, y, 1); } static const struct drm_crtc_helper_funcs dce_v8_0_crtc_helper_funcs = { diff --git a/drivers/gpu/drm/amd/amdgpu/dce_virtual.c b/drivers/gpu/drm/amd/amdgpu/dce_virtual.c index b4d4b76538d2..ffcc64ec6473 100644 --- a/drivers/gpu/drm/amd/amdgpu/dce_virtual.c +++ b/drivers/gpu/drm/amd/amdgpu/dce_virtual.c @@ -139,9 +139,6 @@ static void dce_virtual_crtc_dpms(struct drm_crtc *crtc, int mode) struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc); unsigned type; - if (amdgpu_sriov_vf(adev)) - return; - switch (mode) { case DRM_MODE_DPMS_ON: amdgpu_crtc->enabled = true; diff --git a/drivers/gpu/drm/amd/amdgpu/df_v1_7.c b/drivers/gpu/drm/amd/amdgpu/df_v1_7.c index d6aca1c08068..2d01ac0d4c11 100644 --- a/drivers/gpu/drm/amd/amdgpu/df_v1_7.c +++ b/drivers/gpu/drm/amd/amdgpu/df_v1_7.c @@ -41,7 +41,7 @@ static void df_v1_7_sw_fini(struct amdgpu_device *adev) } static void df_v1_7_enable_broadcast_mode(struct amdgpu_device *adev, - bool enable) + bool enable) { u32 tmp; diff --git a/drivers/gpu/drm/amd/amdgpu/df_v3_6.c b/drivers/gpu/drm/amd/amdgpu/df_v3_6.c index 7b89fd2aa44a..6b4b30a8dce5 100644 --- a/drivers/gpu/drm/amd/amdgpu/df_v3_6.c +++ b/drivers/gpu/drm/amd/amdgpu/df_v3_6.c @@ -30,71 +30,17 @@ #define DF_3_6_SMN_REG_INST_DIST 0x8 #define DF_3_6_INST_CNT 8 -static u32 df_v3_6_channel_number[] = {1, 2, 0, 4, 0, 8, 0, - 16, 32, 0, 0, 0, 2, 4, 8}; - -/* init df format attrs */ -AMDGPU_PMU_ATTR(event, "config:0-7"); -AMDGPU_PMU_ATTR(instance, "config:8-15"); -AMDGPU_PMU_ATTR(umask, "config:16-23"); - -/* df format attributes */ -static struct attribute *df_v3_6_format_attrs[] = { - &pmu_attr_event.attr, - &pmu_attr_instance.attr, - &pmu_attr_umask.attr, - NULL -}; - -/* df format attribute group */ -static struct attribute_group df_v3_6_format_attr_group = { - .name = "format", - .attrs = df_v3_6_format_attrs, -}; - -/* df event attrs */ -AMDGPU_PMU_ATTR(cake0_pcsout_txdata, - "event=0x7,instance=0x46,umask=0x2"); -AMDGPU_PMU_ATTR(cake1_pcsout_txdata, - "event=0x7,instance=0x47,umask=0x2"); -AMDGPU_PMU_ATTR(cake0_pcsout_txmeta, - "event=0x7,instance=0x46,umask=0x4"); -AMDGPU_PMU_ATTR(cake1_pcsout_txmeta, - "event=0x7,instance=0x47,umask=0x4"); -AMDGPU_PMU_ATTR(cake0_ftiinstat_reqalloc, - "event=0xb,instance=0x46,umask=0x4"); -AMDGPU_PMU_ATTR(cake1_ftiinstat_reqalloc, - "event=0xb,instance=0x47,umask=0x4"); -AMDGPU_PMU_ATTR(cake0_ftiinstat_rspalloc, - "event=0xb,instance=0x46,umask=0x8"); -AMDGPU_PMU_ATTR(cake1_ftiinstat_rspalloc, - "event=0xb,instance=0x47,umask=0x8"); - -/* df event attributes */ -static struct attribute *df_v3_6_event_attrs[] = { - &pmu_attr_cake0_pcsout_txdata.attr, - &pmu_attr_cake1_pcsout_txdata.attr, - &pmu_attr_cake0_pcsout_txmeta.attr, - &pmu_attr_cake1_pcsout_txmeta.attr, - &pmu_attr_cake0_ftiinstat_reqalloc.attr, - &pmu_attr_cake1_ftiinstat_reqalloc.attr, - &pmu_attr_cake0_ftiinstat_rspalloc.attr, - &pmu_attr_cake1_ftiinstat_rspalloc.attr, - NULL -}; +/* Defined in global_features.h as FTI_PERFMON_VISIBLE */ +#define DF_V3_6_MAX_COUNTERS 4 -/* df event attribute group */ -static struct attribute_group df_v3_6_event_attr_group = { - .name = "events", - .attrs = df_v3_6_event_attrs -}; +/* get flags from df perfmon config */ +#define DF_V3_6_GET_EVENT(x) (x & 0xFFUL) +#define DF_V3_6_GET_INSTANCE(x) ((x >> 8) & 0xFFUL) +#define DF_V3_6_GET_UNITMASK(x) ((x >> 16) & 0xFFUL) +#define DF_V3_6_PERFMON_OVERFLOW 0xFFFFFFFFFFFFULL -/* df event attr groups */ -const struct attribute_group *df_v3_6_attr_groups[] = { - &df_v3_6_format_attr_group, - &df_v3_6_event_attr_group, - NULL -}; +static u32 df_v3_6_channel_number[] = {1, 2, 0, 4, 0, 8, 0, + 16, 32, 0, 0, 0, 2, 4, 8}; static uint64_t df_v3_6_get_fica(struct amdgpu_device *adev, uint32_t ficaa_val) @@ -391,33 +337,28 @@ static void df_v3_6_get_clockgating_state(struct amdgpu_device *adev, } /* get assigned df perfmon ctr as int */ -static int df_v3_6_pmc_config_2_cntr(struct amdgpu_device *adev, - uint64_t config) +static bool df_v3_6_pmc_has_counter(struct amdgpu_device *adev, + uint64_t config, + int counter_idx) { - int i; - for (i = 0; i < DF_V3_6_MAX_COUNTERS; i++) { - if ((config & 0x0FFFFFFUL) == - adev->df_perfmon_config_assign_mask[i]) - return i; - } + return ((config & 0x0FFFFFFUL) == + adev->df_perfmon_config_assign_mask[counter_idx]); - return -EINVAL; } /* get address based on counter assignment */ static void df_v3_6_pmc_get_addr(struct amdgpu_device *adev, uint64_t config, + int counter_idx, int is_ctrl, uint32_t *lo_base_addr, uint32_t *hi_base_addr) { - int target_cntr = df_v3_6_pmc_config_2_cntr(adev, config); - - if (target_cntr < 0) + if (!df_v3_6_pmc_has_counter(adev, config, counter_idx)) return; - switch (target_cntr) { + switch (counter_idx) { case 0: *lo_base_addr = is_ctrl ? smnPerfMonCtlLo4 : smnPerfMonCtrLo4; @@ -443,15 +384,18 @@ static void df_v3_6_pmc_get_addr(struct amdgpu_device *adev, /* get read counter address */ static void df_v3_6_pmc_get_read_settings(struct amdgpu_device *adev, uint64_t config, + int counter_idx, uint32_t *lo_base_addr, uint32_t *hi_base_addr) { - df_v3_6_pmc_get_addr(adev, config, 0, lo_base_addr, hi_base_addr); + df_v3_6_pmc_get_addr(adev, config, counter_idx, 0, lo_base_addr, + hi_base_addr); } /* get control counter settings i.e. address and values to set */ static int df_v3_6_pmc_get_ctrl_settings(struct amdgpu_device *adev, uint64_t config, + int counter_idx, uint32_t *lo_base_addr, uint32_t *hi_base_addr, uint32_t *lo_val, @@ -462,7 +406,8 @@ static int df_v3_6_pmc_get_ctrl_settings(struct amdgpu_device *adev, uint32_t eventsel, instance, unitmask; uint32_t instance_10, instance_5432, instance_76; - df_v3_6_pmc_get_addr(adev, config, 1, lo_base_addr, hi_base_addr); + df_v3_6_pmc_get_addr(adev, config, counter_idx, 1, lo_base_addr, + hi_base_addr); if ((*lo_base_addr == 0) || (*hi_base_addr == 0)) { DRM_ERROR("[DF PMC] addressing not retrieved! Lo: %x, Hi: %x", @@ -492,18 +437,13 @@ static int df_v3_6_pmc_get_ctrl_settings(struct amdgpu_device *adev, static int df_v3_6_pmc_add_cntr(struct amdgpu_device *adev, uint64_t config) { - int i, target_cntr; - - target_cntr = df_v3_6_pmc_config_2_cntr(adev, config); - - if (target_cntr >= 0) - return 0; + int i; for (i = 0; i < DF_V3_6_MAX_COUNTERS; i++) { if (adev->df_perfmon_config_assign_mask[i] == 0U) { adev->df_perfmon_config_assign_mask[i] = config & 0x0FFFFFFUL; - return 0; + return i; } } @@ -512,59 +452,50 @@ static int df_v3_6_pmc_add_cntr(struct amdgpu_device *adev, #define DEFERRED_ARM_MASK (1 << 31) static int df_v3_6_pmc_set_deferred(struct amdgpu_device *adev, - uint64_t config, bool is_deferred) + int counter_idx, uint64_t config, + bool is_deferred) { - int target_cntr; - target_cntr = df_v3_6_pmc_config_2_cntr(adev, config); - - if (target_cntr < 0) + if (!df_v3_6_pmc_has_counter(adev, config, counter_idx)) return -EINVAL; if (is_deferred) - adev->df_perfmon_config_assign_mask[target_cntr] |= + adev->df_perfmon_config_assign_mask[counter_idx] |= DEFERRED_ARM_MASK; else - adev->df_perfmon_config_assign_mask[target_cntr] &= + adev->df_perfmon_config_assign_mask[counter_idx] &= ~DEFERRED_ARM_MASK; return 0; } static bool df_v3_6_pmc_is_deferred(struct amdgpu_device *adev, + int counter_idx, uint64_t config) { - int target_cntr; - - target_cntr = df_v3_6_pmc_config_2_cntr(adev, config); - - /* - * we never get target_cntr < 0 since this funciton is only called in - * pmc_count for now but we should check anyways. - */ - return (target_cntr >= 0 && - (adev->df_perfmon_config_assign_mask[target_cntr] - & DEFERRED_ARM_MASK)); + return (df_v3_6_pmc_has_counter(adev, config, counter_idx) && + (adev->df_perfmon_config_assign_mask[counter_idx] + & DEFERRED_ARM_MASK)); } /* release performance counter */ static void df_v3_6_pmc_release_cntr(struct amdgpu_device *adev, - uint64_t config) + uint64_t config, + int counter_idx) { - int target_cntr = df_v3_6_pmc_config_2_cntr(adev, config); - - if (target_cntr >= 0) - adev->df_perfmon_config_assign_mask[target_cntr] = 0ULL; + if (df_v3_6_pmc_has_counter(adev, config, counter_idx)) + adev->df_perfmon_config_assign_mask[counter_idx] = 0ULL; } static void df_v3_6_reset_perfmon_cntr(struct amdgpu_device *adev, - uint64_t config) + uint64_t config, + int counter_idx) { uint32_t lo_base_addr = 0, hi_base_addr = 0; - df_v3_6_pmc_get_read_settings(adev, config, &lo_base_addr, + df_v3_6_pmc_get_read_settings(adev, config, counter_idx, &lo_base_addr, &hi_base_addr); if ((lo_base_addr == 0) || (hi_base_addr == 0)) @@ -573,21 +504,22 @@ static void df_v3_6_reset_perfmon_cntr(struct amdgpu_device *adev, df_v3_6_perfmon_wreg(adev, lo_base_addr, 0, hi_base_addr, 0); } +/* return available counter if is_add == 1 otherwise return error status. */ static int df_v3_6_pmc_start(struct amdgpu_device *adev, uint64_t config, - int is_add) + int counter_idx, int is_add) { uint32_t lo_base_addr, hi_base_addr, lo_val, hi_val; int err = 0, ret = 0; switch (adev->asic_type) { case CHIP_VEGA20: + case CHIP_ARCTURUS: if (is_add) return df_v3_6_pmc_add_cntr(adev, config); - df_v3_6_reset_perfmon_cntr(adev, config); - ret = df_v3_6_pmc_get_ctrl_settings(adev, config, + counter_idx, &lo_base_addr, &hi_base_addr, &lo_val, @@ -604,7 +536,8 @@ static int df_v3_6_pmc_start(struct amdgpu_device *adev, uint64_t config, hi_val); if (err) - ret = df_v3_6_pmc_set_deferred(adev, config, true); + ret = df_v3_6_pmc_set_deferred(adev, config, + counter_idx, true); break; default: @@ -615,15 +548,17 @@ static int df_v3_6_pmc_start(struct amdgpu_device *adev, uint64_t config, } static int df_v3_6_pmc_stop(struct amdgpu_device *adev, uint64_t config, - int is_remove) + int counter_idx, int is_remove) { uint32_t lo_base_addr, hi_base_addr, lo_val, hi_val; int ret = 0; switch (adev->asic_type) { case CHIP_VEGA20: + case CHIP_ARCTURUS: ret = df_v3_6_pmc_get_ctrl_settings(adev, config, + counter_idx, &lo_base_addr, &hi_base_addr, &lo_val, @@ -635,8 +570,8 @@ static int df_v3_6_pmc_stop(struct amdgpu_device *adev, uint64_t config, if (is_remove) { - df_v3_6_reset_perfmon_cntr(adev, config); - df_v3_6_pmc_release_cntr(adev, config); + df_v3_6_reset_perfmon_cntr(adev, config, counter_idx); + df_v3_6_pmc_release_cntr(adev, config, counter_idx); } break; @@ -649,6 +584,7 @@ static int df_v3_6_pmc_stop(struct amdgpu_device *adev, uint64_t config, static void df_v3_6_pmc_get_count(struct amdgpu_device *adev, uint64_t config, + int counter_idx, uint64_t *count) { uint32_t lo_base_addr = 0, hi_base_addr = 0, lo_val = 0, hi_val = 0; @@ -656,14 +592,15 @@ static void df_v3_6_pmc_get_count(struct amdgpu_device *adev, switch (adev->asic_type) { case CHIP_VEGA20: - df_v3_6_pmc_get_read_settings(adev, config, &lo_base_addr, - &hi_base_addr); + case CHIP_ARCTURUS: + df_v3_6_pmc_get_read_settings(adev, config, counter_idx, + &lo_base_addr, &hi_base_addr); if ((lo_base_addr == 0) || (hi_base_addr == 0)) return; /* rearm the counter or throw away count value on failure */ - if (df_v3_6_pmc_is_deferred(adev, config)) { + if (df_v3_6_pmc_is_deferred(adev, config, counter_idx)) { int rearm_err = df_v3_6_perfmon_arm_with_status(adev, lo_base_addr, lo_val, hi_base_addr, hi_val); @@ -671,7 +608,8 @@ static void df_v3_6_pmc_get_count(struct amdgpu_device *adev, if (rearm_err) return; - df_v3_6_pmc_set_deferred(adev, config, false); + df_v3_6_pmc_set_deferred(adev, config, counter_idx, + false); } df_v3_6_perfmon_rreg(adev, lo_base_addr, &lo_val, diff --git a/drivers/gpu/drm/amd/amdgpu/df_v3_6.h b/drivers/gpu/drm/amd/amdgpu/df_v3_6.h index 76998541bc30..2505c7ef258a 100644 --- a/drivers/gpu/drm/amd/amdgpu/df_v3_6.h +++ b/drivers/gpu/drm/amd/amdgpu/df_v3_6.h @@ -35,15 +35,6 @@ enum DF_V3_6_MGCG { DF_V3_6_MGCG_ENABLE_63_CYCLE_DELAY = 15 }; -/* Defined in global_features.h as FTI_PERFMON_VISIBLE */ -#define DF_V3_6_MAX_COUNTERS 4 - -/* get flags from df perfmon config */ -#define DF_V3_6_GET_EVENT(x) (x & 0xFFUL) -#define DF_V3_6_GET_INSTANCE(x) ((x >> 8) & 0xFFUL) -#define DF_V3_6_GET_UNITMASK(x) ((x >> 16) & 0xFFUL) -#define DF_V3_6_PERFMON_OVERFLOW 0xFFFFFFFFFFFFULL - extern const struct attribute_group *df_v3_6_attr_groups[]; extern const struct amdgpu_df_funcs df_v3_6_funcs; diff --git a/drivers/gpu/drm/amd/amdgpu/dimgrey_cavefish_reg_init.c b/drivers/gpu/drm/amd/amdgpu/dimgrey_cavefish_reg_init.c new file mode 100755 index 000000000000..e9f177e9e3cf --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/dimgrey_cavefish_reg_init.c @@ -0,0 +1,54 @@ +/* + * Copyright 2020 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ +#include "amdgpu.h" +#include "nv.h" + +#include "soc15_common.h" +#include "soc15_hw_ip.h" +#include "dimgrey_cavefish_ip_offset.h" + +int dimgrey_cavefish_reg_base_init(struct amdgpu_device *adev) +{ + /* HW has more IP blocks, only initialize the block needed by driver */ + uint32_t i; + for (i = 0 ; i < MAX_INSTANCE ; ++i) { + adev->reg_offset[GC_HWIP][i] = (uint32_t *)(&(GC_BASE.instance[i])); + adev->reg_offset[HDP_HWIP][i] = (uint32_t *)(&(HDP_BASE.instance[i])); + adev->reg_offset[MMHUB_HWIP][i] = (uint32_t *)(&(MMHUB_BASE.instance[i])); + adev->reg_offset[ATHUB_HWIP][i] = (uint32_t *)(&(ATHUB_BASE.instance[i])); + adev->reg_offset[NBIO_HWIP][i] = (uint32_t *)(&(NBIO_BASE.instance[i])); + adev->reg_offset[MP0_HWIP][i] = (uint32_t *)(&(MP0_BASE.instance[i])); + adev->reg_offset[MP1_HWIP][i] = (uint32_t *)(&(MP1_BASE.instance[i])); + adev->reg_offset[VCN_HWIP][i] = (uint32_t *)(&(VCN0_BASE.instance[i])); + adev->reg_offset[DF_HWIP][i] = (uint32_t *)(&(DF_BASE.instance[i])); + adev->reg_offset[DCE_HWIP][i] = (uint32_t *)(&(DCN_BASE.instance[i])); + adev->reg_offset[OSSSYS_HWIP][i] = (uint32_t *)(&(OSSSYS_BASE.instance[i])); + adev->reg_offset[SDMA0_HWIP][i] = (uint32_t *)(&(GC_BASE.instance[i])); + adev->reg_offset[SDMA1_HWIP][i] = (uint32_t *)(&(GC_BASE.instance[i])); + adev->reg_offset[SDMA2_HWIP][i] = (uint32_t *)(&(GC_BASE.instance[i])); + adev->reg_offset[SDMA3_HWIP][i] = (uint32_t *)(&(GC_BASE.instance[i])); + adev->reg_offset[SMUIO_HWIP][i] = (uint32_t *)(&(SMUIO_BASE.instance[i])); + adev->reg_offset[THM_HWIP][i] = (uint32_t *)(&(THM_BASE.instance[i])); + } + return 0; +} diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c index 55f4b8c3b933..ba1086784525 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c @@ -99,6 +99,23 @@ #define mmGCR_GENERAL_CNTL_Sienna_Cichlid 0x1580 #define mmGCR_GENERAL_CNTL_Sienna_Cichlid_BASE_IDX 0 +#define mmSPI_CONFIG_CNTL_1_Vangogh 0x2441 +#define mmSPI_CONFIG_CNTL_1_Vangogh_BASE_IDX 1 +#define mmVGT_TF_MEMORY_BASE_HI_Vangogh 0x2261 +#define mmVGT_TF_MEMORY_BASE_HI_Vangogh_BASE_IDX 1 +#define mmVGT_HS_OFFCHIP_PARAM_Vangogh 0x224f +#define mmVGT_HS_OFFCHIP_PARAM_Vangogh_BASE_IDX 1 +#define mmVGT_TF_RING_SIZE_Vangogh 0x224e +#define mmVGT_TF_RING_SIZE_Vangogh_BASE_IDX 1 +#define mmVGT_GSVS_RING_SIZE_Vangogh 0x2241 +#define mmVGT_GSVS_RING_SIZE_Vangogh_BASE_IDX 1 +#define mmVGT_TF_MEMORY_BASE_Vangogh 0x2250 +#define mmVGT_TF_MEMORY_BASE_Vangogh_BASE_IDX 1 +#define mmVGT_ESGS_RING_SIZE_Vangogh 0x2240 +#define mmVGT_ESGS_RING_SIZE_Vangogh_BASE_IDX 1 +#define mmSPI_CONFIG_CNTL_Vangogh 0x2440 +#define mmSPI_CONFIG_CNTL_Vangogh_BASE_IDX 1 + #define mmCP_HYP_PFP_UCODE_ADDR 0x5814 #define mmCP_HYP_PFP_UCODE_ADDR_BASE_IDX 1 #define mmCP_HYP_PFP_UCODE_DATA 0x5815 @@ -112,6 +129,13 @@ #define mmCP_HYP_ME_UCODE_DATA 0x5817 #define mmCP_HYP_ME_UCODE_DATA_BASE_IDX 1 +#define mmCPG_PSP_DEBUG 0x5c10 +#define mmCPG_PSP_DEBUG_BASE_IDX 1 +#define mmCPC_PSP_DEBUG 0x5c11 +#define mmCPC_PSP_DEBUG_BASE_IDX 1 +#define CPC_PSP_DEBUG__GPA_OVERRIDE_MASK 0x00000008L +#define CPG_PSP_DEBUG__GPA_OVERRIDE_MASK 0x00000008L + //CC_GC_SA_UNIT_DISABLE #define mmCC_GC_SA_UNIT_DISABLE 0x0fe9 #define mmCC_GC_SA_UNIT_DISABLE_BASE_IDX 0 @@ -131,6 +155,11 @@ #define mmCGTT_SPI_CS_CLK_CTRL 0x507c #define mmCGTT_SPI_CS_CLK_CTRL_BASE_IDX 1 +#define mmGCUTCL2_CGTT_CLK_CTRL_Sienna_Cichlid 0x16f3 +#define mmGCUTCL2_CGTT_CLK_CTRL_Sienna_Cichlid_BASE_IDX 0 +#define mmGCVM_L2_CGTT_CLK_CTRL_Sienna_Cichlid 0x15db +#define mmGCVM_L2_CGTT_CLK_CTRL_Sienna_Cichlid_BASE_IDX 0 + MODULE_FIRMWARE("amdgpu/navi10_ce.bin"); MODULE_FIRMWARE("amdgpu/navi10_pfp.bin"); MODULE_FIRMWARE("amdgpu/navi10_me.bin"); @@ -171,6 +200,20 @@ MODULE_FIRMWARE("amdgpu/navy_flounder_mec.bin"); MODULE_FIRMWARE("amdgpu/navy_flounder_mec2.bin"); MODULE_FIRMWARE("amdgpu/navy_flounder_rlc.bin"); +MODULE_FIRMWARE("amdgpu/vangogh_ce.bin"); +MODULE_FIRMWARE("amdgpu/vangogh_pfp.bin"); +MODULE_FIRMWARE("amdgpu/vangogh_me.bin"); +MODULE_FIRMWARE("amdgpu/vangogh_mec.bin"); +MODULE_FIRMWARE("amdgpu/vangogh_mec2.bin"); +MODULE_FIRMWARE("amdgpu/vangogh_rlc.bin"); + +MODULE_FIRMWARE("amdgpu/dimgrey_cavefish_ce.bin"); +MODULE_FIRMWARE("amdgpu/dimgrey_cavefish_pfp.bin"); +MODULE_FIRMWARE("amdgpu/dimgrey_cavefish_me.bin"); +MODULE_FIRMWARE("amdgpu/dimgrey_cavefish_mec.bin"); +MODULE_FIRMWARE("amdgpu/dimgrey_cavefish_mec2.bin"); +MODULE_FIRMWARE("amdgpu/dimgrey_cavefish_rlc.bin"); + static const struct soc15_reg_golden golden_settings_gc_10_1[] = { SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_4, 0xffffffff, 0x00400014), @@ -1366,23 +1409,14 @@ static void gfx_v10_rlcg_wreg(struct amdgpu_device *adev, u32 offset, u32 v) { static void *scratch_reg0; static void *scratch_reg1; - static void *scratch_reg2; - static void *scratch_reg3; static void *spare_int; - static uint32_t grbm_cntl; - static uint32_t grbm_idx; uint32_t i = 0; uint32_t retries = 50000; scratch_reg0 = adev->rmmio + (adev->reg_offset[GC_HWIP][0][mmSCRATCH_REG0_BASE_IDX] + mmSCRATCH_REG0)*4; scratch_reg1 = adev->rmmio + (adev->reg_offset[GC_HWIP][0][mmSCRATCH_REG1_BASE_IDX] + mmSCRATCH_REG1)*4; - scratch_reg2 = adev->rmmio + (adev->reg_offset[GC_HWIP][0][mmSCRATCH_REG1_BASE_IDX] + mmSCRATCH_REG2)*4; - scratch_reg3 = adev->rmmio + (adev->reg_offset[GC_HWIP][0][mmSCRATCH_REG1_BASE_IDX] + mmSCRATCH_REG3)*4; spare_int = adev->rmmio + (adev->reg_offset[GC_HWIP][0][mmRLC_SPARE_INT_BASE_IDX] + mmRLC_SPARE_INT)*4; - grbm_cntl = adev->reg_offset[GC_HWIP][0][mmGRBM_GFX_CNTL_BASE_IDX] + mmGRBM_GFX_CNTL; - grbm_idx = adev->reg_offset[GC_HWIP][0][mmGRBM_GFX_INDEX_BASE_IDX] + mmGRBM_GFX_INDEX; - if (amdgpu_sriov_runtime(adev)) { pr_err("shouldn't call rlcg write register during runtime\n"); return; @@ -3108,6 +3142,8 @@ static const struct soc15_reg_golden golden_settings_gc_10_3[] = SOC15_REG_GOLDEN_VALUE(GC, 0 ,mmGCEA_SDP_TAG_RESERVE0, 0xffffffff, 0x10100100), SOC15_REG_GOLDEN_VALUE(GC, 0, mmGCEA_SDP_TAG_RESERVE1, 0xffffffff, 0x17000088), SOC15_REG_GOLDEN_VALUE(GC, 0, mmGCR_GENERAL_CNTL_Sienna_Cichlid, 0x1ff1ffff, 0x00000500), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmGCUTCL2_CGTT_CLK_CTRL_Sienna_Cichlid, 0xff000000, 0xff008080), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmGCVM_L2_CGTT_CLK_CTRL_Sienna_Cichlid, 0xff000000, 0xff008080), SOC15_REG_GOLDEN_VALUE(GC, 0, mmGE_PC_CNTL, 0x003fffff, 0x00280400), SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL2A_ADDR_MATCH_MASK, 0xffffffff, 0xffffffcf), SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL2C_ADDR_MATCH_MASK, 0xffffffff, 0xffffffcf), @@ -3146,6 +3182,7 @@ static const struct soc15_reg_golden golden_settings_gc_10_3_sienna_cichlid[] = static const struct soc15_reg_golden golden_settings_gc_10_3_2[] = { + SOC15_REG_GOLDEN_VALUE(GC, 0, mmCGTT_SPI_CS_CLK_CTRL, 0xff7f0fff, 0x78000100), SOC15_REG_GOLDEN_VALUE(GC, 0, mmCGTT_SPI_PS_CLK_CTRL, 0xff7f0fff, 0x78000100), SOC15_REG_GOLDEN_VALUE(GC, 0, mmCGTT_SPI_RA0_CLK_CTRL, 0xff7f0fff, 0x30000100), SOC15_REG_GOLDEN_VALUE(GC, 0, mmCGTT_SPI_RA1_CLK_CTRL, 0xff7f0fff, 0x7e000100), @@ -3154,6 +3191,8 @@ static const struct soc15_reg_golden golden_settings_gc_10_3_2[] = SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG4, 0xffffffff, 0x00800000), SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_EXCEPTION_CONTROL, 0x7fff0f1f, 0x00b80000), SOC15_REG_GOLDEN_VALUE(GC, 0, mmGCR_GENERAL_CNTL_Sienna_Cichlid, 0x1ff1ffff, 0x00000500), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmGCUTCL2_CGTT_CLK_CTRL_Sienna_Cichlid, 0xffffffff, 0xff008080), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmGCVM_L2_CGTT_CLK_CTRL_Sienna_Cichlid, 0xffff8fff, 0xff008080), SOC15_REG_GOLDEN_VALUE(GC, 0, mmGE_PC_CNTL, 0x003fffff, 0x00280400), SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL2A_ADDR_MATCH_MASK, 0xffffffff, 0xffffffcf), SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL2C_ADDR_MATCH_MASK, 0xffffffff, 0xffffffcf), @@ -3183,7 +3222,79 @@ static const struct soc15_reg_golden golden_settings_gc_10_3_2[] = SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_PERFCOUNTER9_SELECT, 0xf0f001ff, 0x00000000), SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfff7ffff, 0x01030000), SOC15_REG_GOLDEN_VALUE(GC, 0, mmUTCL1_CTRL, 0xffbfffff, 0x00a00000), - SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_GS_MAX_WAVE_ID, 0x00000fff, 0x000003ff) + SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_GS_MAX_WAVE_ID, 0x00000fff, 0x000003ff), + + /* This is not in GDB yet. Don't remove it. It fixes a GPU hang on Navy Flounder. */ + SOC15_REG_GOLDEN_VALUE(GC, 0, mmLDS_CONFIG, 0x00000020, 0x00000020), +}; + +static const struct soc15_reg_golden golden_settings_gc_10_3_vangogh[] = +{ + SOC15_REG_GOLDEN_VALUE(GC, 0, mmCGTT_SPI_RA0_CLK_CTRL, 0xff7f0fff, 0x30000100), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmCGTT_SPI_RA1_CLK_CTRL, 0xff7f0fff, 0x7e000100), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmCH_PIPE_STEER, 0x000000ff, 0x000000e4), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG3, 0xffffffff, 0x00000200), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG4, 0xffffffff, 0x00800000), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_EXCEPTION_CONTROL, 0x7fff0f1f, 0x00b80000), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0x0c1807ff, 0x00000142), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmGCR_GENERAL_CNTL, 0x1ff1ffff, 0x00000500), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL1_PIPE_STEER, 0x000000ff, 0x000000e4), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL2_PIPE_STEER_0, 0x77777777, 0x32103210), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL2_PIPE_STEER_1, 0x77777777, 0x32103210), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL2A_ADDR_MATCH_MASK, 0xffffffff, 0xfffffff3), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL2C_ADDR_MATCH_MASK, 0xffffffff, 0xfffffff3), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL2C_CM_CTRL1, 0xff8fff0f, 0x580f1008), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL2C_CTRL3, 0xf7ffffff, 0x00f80988), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_CL_ENHANCE, 0xf17fffff, 0x01200007), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_BINNER_TIMEOUT_COUNTER, 0xffffffff, 0x00000800), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_2, 0xffffffbf, 0x00000020), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_CONFIG_CNTL_1_Vangogh, 0xffffffff, 0x00070103), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQG_CONFIG, 0x000017ff, 0x00001000), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfff7ffff, 0x01030000), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmUTCL1_CTRL, 0xffffffff, 0x00400000), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_GS_MAX_WAVE_ID, 0x00000fff, 0x000000ff), + + /* This is not in GDB yet. Don't remove it. It fixes a GPU hang on VanGogh. */ + SOC15_REG_GOLDEN_VALUE(GC, 0, mmLDS_CONFIG, 0x00000020, 0x00000020), +}; + +static const struct soc15_reg_golden golden_settings_gc_10_3_4[] = +{ + SOC15_REG_GOLDEN_VALUE(GC, 0, mmCGTT_SPI_CS_CLK_CTRL, 0x78000000, 0x78000100), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmCGTT_SPI_RA0_CLK_CTRL, 0x30000000, 0x30000100), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmCGTT_SPI_RA1_CLK_CTRL, 0x7e000000, 0x7e000100), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPF_GCR_CNTL, 0x0007ffff, 0x0000c000), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG3, 0x00000280, 0x00000280), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG4, 0x07800000, 0x00800000), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmGCR_GENERAL_CNTL, 0x00001d00, 0x00000500), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmGE_PC_CNTL, 0x003c0000, 0x00280400), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL2A_ADDR_MATCH_MASK, 0xffffffff, 0xffffffcf), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL2C_ADDR_MATCH_MASK, 0xffffffff, 0xffffffcf), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL2C_CM_CTRL1, 0x40000000, 0x580f1008), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL2C_CTRL3, 0x00040000, 0x00f80988), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_CL_ENHANCE, 0x01000000, 0x01200007), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_BINNER_TIMEOUT_COUNTER, 0xffffffff, 0x00000800), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_2, 0x00000800, 0x00000820), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_CONFIG, 0x0000001f, 0x00180070), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_PERFCOUNTER0_SELECT, 0xf0f001ff, 0x00000000), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_PERFCOUNTER1_SELECT, 0xf0f001ff, 0x00000000), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_PERFCOUNTER10_SELECT, 0xf0f001ff, 0x00000000), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_PERFCOUNTER11_SELECT, 0xf0f001ff, 0x00000000), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_PERFCOUNTER12_SELECT, 0xf0f001ff, 0x00000000), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_PERFCOUNTER13_SELECT, 0xf0f001ff, 0x00000000), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_PERFCOUNTER14_SELECT, 0xf0f001ff, 0x00000000), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_PERFCOUNTER15_SELECT, 0xf0f001ff, 0x00000000), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_PERFCOUNTER2_SELECT, 0xf0f001ff, 0x00000000), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_PERFCOUNTER3_SELECT, 0xf0f001ff, 0x00000000), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_PERFCOUNTER4_SELECT, 0xf0f001ff, 0x00000000), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_PERFCOUNTER5_SELECT, 0xf0f001ff, 0x00000000), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_PERFCOUNTER6_SELECT, 0xf0f001ff, 0x00000000), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_PERFCOUNTER7_SELECT, 0xf0f001ff, 0x00000000), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_PERFCOUNTER8_SELECT, 0xf0f001ff, 0x00000000), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_PERFCOUNTER9_SELECT, 0xf0f001ff, 0x00000000), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0x01030000, 0x01030000), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmUTCL1_CTRL, 0x03a00000, 0x00a00000), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmLDS_CONFIG, 0x00000020, 0x00000020) }; #define DEFAULT_SH_MEM_CONFIG \ @@ -3198,7 +3309,7 @@ static void gfx_v10_0_set_irq_funcs(struct amdgpu_device *adev); static void gfx_v10_0_set_gds_init(struct amdgpu_device *adev); static void gfx_v10_0_set_rlc_funcs(struct amdgpu_device *adev); static int gfx_v10_0_get_cu_info(struct amdgpu_device *adev, - struct amdgpu_cu_info *cu_info); + struct amdgpu_cu_info *cu_info); static uint64_t gfx_v10_0_get_gpu_clock_counter(struct amdgpu_device *adev); static void gfx_v10_0_select_se_sh(struct amdgpu_device *adev, u32 se_num, u32 sh_num, u32 instance); @@ -3395,7 +3506,16 @@ static void gfx_v10_0_init_golden_registers(struct amdgpu_device *adev) golden_settings_gc_10_3_2, (const u32)ARRAY_SIZE(golden_settings_gc_10_3_2)); break; - + case CHIP_VANGOGH: + soc15_program_register_sequence(adev, + golden_settings_gc_10_3_vangogh, + (const u32)ARRAY_SIZE(golden_settings_gc_10_3_vangogh)); + break; + case CHIP_DIMGREY_CAVEFISH: + soc15_program_register_sequence(adev, + golden_settings_gc_10_3_4, + (const u32)ARRAY_SIZE(golden_settings_gc_10_3_4)); + break; default: break; } @@ -3579,6 +3699,8 @@ static void gfx_v10_0_check_fw_write_wait(struct amdgpu_device *adev) break; case CHIP_SIENNA_CICHLID: case CHIP_NAVY_FLOUNDER: + case CHIP_VANGOGH: + case CHIP_DIMGREY_CAVEFISH: adev->gfx.cp_fw_write_wait = true; break; default: @@ -3646,7 +3768,7 @@ static void gfx_v10_0_check_gfxoff_flag(struct amdgpu_device *adev) if (!gfx_v10_0_navi10_gfxoff_should_enable(adev)) adev->pm.pp_feature &= ~PP_GFXOFF_MASK; break; - case CHIP_NAVY_FLOUNDER: + case CHIP_VANGOGH: adev->pm.pp_feature &= ~PP_GFXOFF_MASK; break; default: @@ -3691,6 +3813,12 @@ static int gfx_v10_0_init_microcode(struct amdgpu_device *adev) case CHIP_NAVY_FLOUNDER: chip_name = "navy_flounder"; break; + case CHIP_VANGOGH: + chip_name = "vangogh"; + break; + case CHIP_DIMGREY_CAVEFISH: + chip_name = "dimgrey_cavefish"; + break; default: BUG(); } @@ -4206,11 +4334,26 @@ static void gfx_v10_0_read_wave_vgprs(struct amdgpu_device *adev, uint32_t simd, } static void gfx_v10_0_select_me_pipe_q(struct amdgpu_device *adev, - u32 me, u32 pipe, u32 q, u32 vm) - { - nv_grbm_select(adev, me, pipe, q, vm); - } + u32 me, u32 pipe, u32 q, u32 vm) +{ + nv_grbm_select(adev, me, pipe, q, vm); +} + +static void gfx_v10_0_update_perfmon_mgcg(struct amdgpu_device *adev, + bool enable) +{ + uint32_t data, def; + + data = def = RREG32_SOC15(GC, 0, mmRLC_PERFMON_CLK_CNTL); + + if (enable) + data |= RLC_PERFMON_CLK_CNTL__PERFMON_CLOCK_STATE_MASK; + else + data &= ~RLC_PERFMON_CLK_CNTL__PERFMON_CLOCK_STATE_MASK; + if (data != def) + WREG32_SOC15(GC, 0, mmRLC_PERFMON_CLK_CNTL, data); +} static const struct amdgpu_gfx_funcs gfx_v10_0_gfx_funcs = { .get_gpu_clock_counter = &gfx_v10_0_get_gpu_clock_counter, @@ -4220,6 +4363,7 @@ static const struct amdgpu_gfx_funcs gfx_v10_0_gfx_funcs = { .read_wave_vgprs = &gfx_v10_0_read_wave_vgprs, .select_me_pipe_q = &gfx_v10_0_select_me_pipe_q, .init_spm_golden = &gfx_v10_0_init_spm_golden_registers, + .update_perfmon_mgcg = &gfx_v10_0_update_perfmon_mgcg, }; static void gfx_v10_0_gpu_early_init(struct amdgpu_device *adev) @@ -4241,6 +4385,8 @@ static void gfx_v10_0_gpu_early_init(struct amdgpu_device *adev) break; case CHIP_SIENNA_CICHLID: case CHIP_NAVY_FLOUNDER: + case CHIP_VANGOGH: + case CHIP_DIMGREY_CAVEFISH: adev->gfx.config.max_hw_contexts = 8; adev->gfx.config.sc_prim_fifo_size_frontend = 0x20; adev->gfx.config.sc_prim_fifo_size_backend = 0x100; @@ -4334,7 +4480,8 @@ static int gfx_v10_0_compute_ring_init(struct amdgpu_device *adev, int ring_id, irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP + ((ring->me - 1) * adev->gfx.mec.num_pipe_per_mec) + ring->pipe; - hw_prio = amdgpu_gfx_is_high_priority_compute_queue(adev, ring->queue) ? + hw_prio = amdgpu_gfx_is_high_priority_compute_queue(adev, ring->pipe, + ring->queue) ? AMDGPU_GFX_PIPE_PRIO_HIGH : AMDGPU_GFX_PIPE_PRIO_NORMAL; /* type-2 packets are deprecated on MEC, use type-3 instead */ r = amdgpu_ring_init(adev, ring, 1024, @@ -4364,6 +4511,8 @@ static int gfx_v10_0_sw_init(void *handle) break; case CHIP_SIENNA_CICHLID: case CHIP_NAVY_FLOUNDER: + case CHIP_VANGOGH: + case CHIP_DIMGREY_CAVEFISH: adev->gfx.me.num_me = 1; adev->gfx.me.num_pipe_per_me = 1; adev->gfx.me.num_queue_per_pipe = 1; @@ -4623,8 +4772,7 @@ static u32 gfx_v10_0_init_pa_sc_tile_steering_override(struct amdgpu_device *ade /* for ASICs that integrates GFX v10.3 * pa_sc_tile_steering_override should be set to 0 */ - if (adev->asic_type == CHIP_SIENNA_CICHLID || - adev->asic_type == CHIP_NAVY_FLOUNDER) + if (adev->asic_type >= CHIP_SIENNA_CICHLID) return 0; /* init num_sc */ @@ -4696,7 +4844,7 @@ static void gfx_v10_0_init_gds_vmid(struct amdgpu_device *adev) * the driver can enable them for graphics. VMID0 should maintain * access so that HWS firmware can save/restore entries. */ - for (vmid = 1; vmid < 16; vmid++) { + for (vmid = 1; vmid < AMDGPU_NUM_VMID; vmid++) { WREG32_SOC15_OFFSET(GC, 0, mmGDS_VMID0_BASE, 2 * vmid, 0); WREG32_SOC15_OFFSET(GC, 0, mmGDS_VMID0_SIZE, 2 * vmid, 0); WREG32_SOC15_OFFSET(GC, 0, mmGDS_GWS_VMID0, vmid, 0); @@ -4866,7 +5014,7 @@ static int gfx_v10_0_init_csb(struct amdgpu_device *adev) return 0; } -void gfx_v10_0_rlc_stop(struct amdgpu_device *adev) +static void gfx_v10_0_rlc_stop(struct amdgpu_device *adev) { u32 tmp = RREG32_SOC15(GC, 0, mmRLC_CNTL); @@ -5847,20 +5995,24 @@ static void gfx_v10_0_cp_gfx_set_doorbell(struct amdgpu_device *adev, { u32 tmp; - tmp = RREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_CONTROL); - if (ring->use_doorbell) { - tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL, - DOORBELL_OFFSET, ring->doorbell_index); - tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL, - DOORBELL_EN, 1); - } else { - tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL, - DOORBELL_EN, 0); + if (!amdgpu_async_gfx_ring) { + tmp = RREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_CONTROL); + if (ring->use_doorbell) { + tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL, + DOORBELL_OFFSET, ring->doorbell_index); + tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL, + DOORBELL_EN, 1); + } else { + tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL, + DOORBELL_EN, 0); + } + WREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_CONTROL, tmp); } - WREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_CONTROL, tmp); switch (adev->asic_type) { case CHIP_SIENNA_CICHLID: case CHIP_NAVY_FLOUNDER: + case CHIP_VANGOGH: + case CHIP_DIMGREY_CAVEFISH: tmp = REG_SET_FIELD(0, CP_RB_DOORBELL_RANGE_LOWER, DOORBELL_RANGE_LOWER_Sienna_Cichlid, ring->doorbell_index); WREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_RANGE_LOWER, tmp); @@ -5994,6 +6146,8 @@ static void gfx_v10_0_cp_compute_enable(struct amdgpu_device *adev, bool enable) switch (adev->asic_type) { case CHIP_SIENNA_CICHLID: case CHIP_NAVY_FLOUNDER: + case CHIP_VANGOGH: + case CHIP_DIMGREY_CAVEFISH: WREG32_SOC15(GC, 0, mmCP_MEC_CNTL_Sienna_Cichlid, 0); break; default: @@ -6004,6 +6158,8 @@ static void gfx_v10_0_cp_compute_enable(struct amdgpu_device *adev, bool enable) switch (adev->asic_type) { case CHIP_SIENNA_CICHLID: case CHIP_NAVY_FLOUNDER: + case CHIP_VANGOGH: + case CHIP_DIMGREY_CAVEFISH: WREG32_SOC15(GC, 0, mmCP_MEC_CNTL_Sienna_Cichlid, (CP_MEC_CNTL__MEC_ME1_HALT_MASK | CP_MEC_CNTL__MEC_ME2_HALT_MASK)); @@ -6098,6 +6254,8 @@ static void gfx_v10_0_kiq_setting(struct amdgpu_ring *ring) switch (adev->asic_type) { case CHIP_SIENNA_CICHLID: case CHIP_NAVY_FLOUNDER: + case CHIP_VANGOGH: + case CHIP_DIMGREY_CAVEFISH: tmp = RREG32_SOC15(GC, 0, mmRLC_CP_SCHEDULERS_Sienna_Cichlid); tmp &= 0xffffff00; tmp |= (ring->me << 5) | (ring->pipe << 3) | (ring->queue); @@ -6193,6 +6351,11 @@ static int gfx_v10_0_gfx_mqd_init(struct amdgpu_ring *ring) DOORBELL_EN, 0); mqd->cp_rb_doorbell_control = tmp; + /*if there are 2 gfx rings, set the lower doorbell range of the first ring, + *otherwise the range of the second ring will override the first ring */ + if (ring->doorbell_index == adev->doorbell_index.gfx_ring0 << 1) + gfx_v10_0_cp_gfx_set_doorbell(adev, ring); + /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */ ring->wptr = 0; mqd->cp_gfx_hqd_rptr = RREG32_SOC15(GC, 0, mmCP_GFX_HQD_RPTR); @@ -6360,7 +6523,8 @@ static void gfx_v10_0_compute_mqd_set_priority(struct amdgpu_ring *ring, struct struct amdgpu_device *adev = ring->adev; if (ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE) { - if (amdgpu_gfx_is_high_priority_compute_queue(adev, ring->queue)) { + if (amdgpu_gfx_is_high_priority_compute_queue(adev, ring->pipe, + ring->queue)) { mqd->cp_hqd_pipe_priority = AMDGPU_GFX_PIPE_PRIO_HIGH; mqd->cp_hqd_queue_priority = AMDGPU_GFX_QUEUE_PRIORITY_MAXIMUM; @@ -6806,6 +6970,7 @@ static bool gfx_v10_0_check_grbm_cam_remapping(struct amdgpu_device *adev) switch (adev->asic_type) { case CHIP_SIENNA_CICHLID: case CHIP_NAVY_FLOUNDER: + case CHIP_DIMGREY_CAVEFISH: data = RREG32_SOC15(GC, 0, mmVGT_ESGS_RING_SIZE_Sienna_Cichlid); WREG32_SOC15(GC, 0, mmVGT_ESGS_RING_SIZE_Sienna_Cichlid, 0); WREG32_SOC15(GC, 0, mmVGT_ESGS_RING_SIZE_UMD, pattern); @@ -6818,6 +6983,8 @@ static bool gfx_v10_0_check_grbm_cam_remapping(struct amdgpu_device *adev) return false; } break; + case CHIP_VANGOGH: + return true; default: data = RREG32_SOC15(GC, 0, mmVGT_ESGS_RING_SIZE); WREG32_SOC15(GC, 0, mmVGT_ESGS_RING_SIZE, 0); @@ -6845,6 +7012,8 @@ static void gfx_v10_0_setup_grbm_cam_remapping(struct amdgpu_device *adev) switch (adev->asic_type) { case CHIP_SIENNA_CICHLID: case CHIP_NAVY_FLOUNDER: + case CHIP_VANGOGH: + case CHIP_DIMGREY_CAVEFISH: /* mmVGT_TF_RING_SIZE_UMD -> mmVGT_TF_RING_SIZE */ data = (SOC15_REG_OFFSET(GC, 0, mmVGT_TF_RING_SIZE_UMD) << GRBM_CAM_DATA__CAM_ADDR__SHIFT) | @@ -6960,6 +7129,18 @@ static void gfx_v10_0_setup_grbm_cam_remapping(struct amdgpu_device *adev) WREG32_SOC15(GC, 0, mmGRBM_CAM_DATA, data); } +static void gfx_v10_0_disable_gpa_mode(struct amdgpu_device *adev) +{ + uint32_t data; + data = RREG32_SOC15(GC, 0, mmCPC_PSP_DEBUG); + data |= CPC_PSP_DEBUG__GPA_OVERRIDE_MASK; + WREG32_SOC15(GC, 0, mmCPC_PSP_DEBUG, data); + + data = RREG32_SOC15(GC, 0, mmCPG_PSP_DEBUG); + data |= CPG_PSP_DEBUG__GPA_OVERRIDE_MASK; + WREG32_SOC15(GC, 0, mmCPG_PSP_DEBUG, data); +} + static int gfx_v10_0_hw_init(void *handle) { int r; @@ -6974,7 +7155,7 @@ static int gfx_v10_0_hw_init(void *handle) * loaded firstly, so in direct type, it has to load smc ucode * here before rlc. */ - if (adev->smu.ppt_funcs != NULL) { + if (adev->smu.ppt_funcs != NULL && !(adev->flags & AMD_IS_APU)) { r = smu_load_microcode(&adev->smu); if (r) return r; @@ -6985,6 +7166,7 @@ static int gfx_v10_0_hw_init(void *handle) return r; } } + gfx_v10_0_disable_gpa_mode(adev); } /* if GRBM CAM not remapped, set up the remapping */ @@ -7142,6 +7324,8 @@ static int gfx_v10_0_soft_reset(void *handle) switch (adev->asic_type) { case CHIP_SIENNA_CICHLID: case CHIP_NAVY_FLOUNDER: + case CHIP_VANGOGH: + case CHIP_DIMGREY_CAVEFISH: if (REG_GET_FIELD(tmp, GRBM_STATUS2, RLC_BUSY_Sienna_Cichlid)) grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, @@ -7241,13 +7425,16 @@ static int gfx_v10_0_early_init(void *handle) break; case CHIP_SIENNA_CICHLID: case CHIP_NAVY_FLOUNDER: + case CHIP_VANGOGH: + case CHIP_DIMGREY_CAVEFISH: adev->gfx.num_gfx_rings = GFX10_NUM_GFX_RINGS_Sienna_Cichlid; break; default: break; } - adev->gfx.num_compute_rings = amdgpu_num_kcq; + adev->gfx.num_compute_rings = min(amdgpu_gfx_get_num_kcq(adev), + AMDGPU_MAX_COMPUTE_RINGS); gfx_v10_0_set_kiq_pm4_funcs(adev); gfx_v10_0_set_ring_funcs(adev); @@ -7294,6 +7481,8 @@ static void gfx_v10_0_set_safe_mode(struct amdgpu_device *adev) switch (adev->asic_type) { case CHIP_SIENNA_CICHLID: case CHIP_NAVY_FLOUNDER: + case CHIP_VANGOGH: + case CHIP_DIMGREY_CAVEFISH: WREG32_SOC15(GC, 0, mmRLC_SAFE_MODE_Sienna_Cichlid, data); /* wait for RLC_SAFE_MODE */ @@ -7326,6 +7515,8 @@ static void gfx_v10_0_unset_safe_mode(struct amdgpu_device *adev) switch (adev->asic_type) { case CHIP_SIENNA_CICHLID: case CHIP_NAVY_FLOUNDER: + case CHIP_VANGOGH: + case CHIP_DIMGREY_CAVEFISH: WREG32_SOC15(GC, 0, mmRLC_SAFE_MODE_Sienna_Cichlid, data); break; default: @@ -7350,6 +7541,7 @@ static void gfx_v10_0_update_medium_grain_clock_gating(struct amdgpu_device *ade /* 1 - RLC_CGTT_MGCG_OVERRIDE */ def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE); data &= ~(RLC_CGTT_MGCG_OVERRIDE__GRBM_CGTT_SCLK_OVERRIDE_MASK | + RLC_CGTT_MGCG_OVERRIDE__RLC_CGTT_SCLK_OVERRIDE_MASK | RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK | RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGLS_OVERRIDE_MASK | RLC_CGTT_MGCG_OVERRIDE__ENABLE_CGTS_LEGACY_MASK); @@ -7486,12 +7678,50 @@ static void gfx_v10_0_update_coarse_grain_clock_gating(struct amdgpu_device *ade } } +static void gfx_v10_0_update_fine_grain_clock_gating(struct amdgpu_device *adev, + bool enable) +{ + uint32_t def, data; + + if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_FGCG)) { + def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE); + /* unset FGCG override */ + data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_FGCG_OVERRIDE_MASK; + /* update FGCG override bits */ + if (def != data) + WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data); + + def = data = RREG32_SOC15(GC, 0, mmRLC_CLK_CNTL); + /* unset RLC SRAM CLK GATER override */ + data &= ~RLC_CLK_CNTL__RLC_SRAM_CLK_GATER_OVERRIDE_MASK; + /* update RLC SRAM CLK GATER override bits */ + if (def != data) + WREG32_SOC15(GC, 0, mmRLC_CLK_CNTL, data); + } else { + def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE); + /* reset FGCG bits */ + data |= RLC_CGTT_MGCG_OVERRIDE__GFXIP_FGCG_OVERRIDE_MASK; + /* disable FGCG*/ + if (def != data) + WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data); + + def = data = RREG32_SOC15(GC, 0, mmRLC_CLK_CNTL); + /* reset RLC SRAM CLK GATER bits */ + data |= RLC_CLK_CNTL__RLC_SRAM_CLK_GATER_OVERRIDE_MASK; + /* disable RLC SRAM CLK*/ + if (def != data) + WREG32_SOC15(GC, 0, mmRLC_CLK_CNTL, data); + } +} + static int gfx_v10_0_update_gfx_clock_gating(struct amdgpu_device *adev, bool enable) { amdgpu_gfx_rlc_enter_safe_mode(adev); if (enable) { + /* enable FGCG firstly*/ + gfx_v10_0_update_fine_grain_clock_gating(adev, enable); /* CGCG/CGLS should be enabled after MGCG/MGLS * === MGCG + MGLS === */ @@ -7509,6 +7739,8 @@ static int gfx_v10_0_update_gfx_clock_gating(struct amdgpu_device *adev, gfx_v10_0_update_3d_clock_gating(adev, enable); /* === MGCG + MGLS === */ gfx_v10_0_update_medium_grain_clock_gating(adev, enable); + /* disable fgcg at last*/ + gfx_v10_0_update_fine_grain_clock_gating(adev, enable); } if (adev->cg_flags & @@ -7570,6 +7802,27 @@ static bool gfx_v10_0_is_rlcg_access_range(struct amdgpu_device *adev, u32 offse return gfx_v10_0_check_rlcg_range(adev, offset, NULL, 0); } +static void gfx_v10_cntl_power_gating(struct amdgpu_device *adev, bool enable) +{ + u32 data = RREG32_SOC15(GC, 0, mmRLC_PG_CNTL); + + if (enable && (adev->pg_flags & AMD_PG_SUPPORT_GFX_PG)) + data |= RLC_PG_CNTL__GFX_POWER_GATING_ENABLE_MASK; + else + data &= ~RLC_PG_CNTL__GFX_POWER_GATING_ENABLE_MASK; + + WREG32_SOC15(GC, 0, mmRLC_PG_CNTL, data); +} + +static void gfx_v10_cntl_pg(struct amdgpu_device *adev, bool enable) +{ + amdgpu_gfx_rlc_enter_safe_mode(adev); + + gfx_v10_cntl_power_gating(adev, enable); + + amdgpu_gfx_rlc_exit_safe_mode(adev); +} + static const struct amdgpu_rlc_funcs gfx_v10_0_rlc_funcs = { .is_rlc_enabled = gfx_v10_0_is_rlc_enabled, .set_safe_mode = gfx_v10_0_set_safe_mode, @@ -7615,8 +7868,12 @@ static int gfx_v10_0_set_powergating_state(void *handle, case CHIP_NAVI12: case CHIP_SIENNA_CICHLID: case CHIP_NAVY_FLOUNDER: + case CHIP_DIMGREY_CAVEFISH: amdgpu_gfx_off_ctrl(adev, enable); break; + case CHIP_VANGOGH: + gfx_v10_cntl_pg(adev, enable); + break; default: break; } @@ -7637,6 +7894,8 @@ static int gfx_v10_0_set_clockgating_state(void *handle, case CHIP_NAVI12: case CHIP_SIENNA_CICHLID: case CHIP_NAVY_FLOUNDER: + case CHIP_VANGOGH: + case CHIP_DIMGREY_CAVEFISH: gfx_v10_0_update_gfx_clock_gating(adev, state == AMD_CG_STATE_GATE); break; @@ -7651,6 +7910,11 @@ static void gfx_v10_0_get_clockgating_state(void *handle, u32 *flags) struct amdgpu_device *adev = (struct amdgpu_device *)handle; int data; + /* AMD_CG_SUPPORT_GFX_FGCG */ + data = RREG32_KIQ(SOC15_REG_OFFSET(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE)); + if (!(data & RLC_CGTT_MGCG_OVERRIDE__GFXIP_FGCG_OVERRIDE_MASK)) + *flags |= AMD_CG_SUPPORT_GFX_FGCG; + /* AMD_CG_SUPPORT_GFX_MGCG */ data = RREG32_KIQ(SOC15_REG_OFFSET(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE)); if (!(data & RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK)) @@ -8400,6 +8664,7 @@ static int gfx_v10_0_set_priv_inst_fault_state(struct amdgpu_device *adev, WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0, PRIV_INSTR_INT_ENABLE, state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0); + break; default: break; } @@ -8739,6 +9004,8 @@ static void gfx_v10_0_set_rlc_funcs(struct amdgpu_device *adev) case CHIP_NAVI14: case CHIP_SIENNA_CICHLID: case CHIP_NAVY_FLOUNDER: + case CHIP_VANGOGH: + case CHIP_DIMGREY_CAVEFISH: adev->gfx.rlc.funcs = &gfx_v10_0_rlc_funcs; break; case CHIP_NAVI12: diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c index 79c52c7a02e3..ca74638dec9b 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c @@ -1894,6 +1894,7 @@ static void gfx_v6_0_ring_emit_ib(struct amdgpu_ring *ring, * gfx_v6_0_ring_test_ib - basic ring IB test * * @ring: amdgpu_ring structure holding ring information + * @timeout: timeout value in jiffies, or MAX_SCHEDULE_TIMEOUT * * Allocate an IB and execute it on the gfx ring (SI). * Provides a basic gfx ring test to verify that IBs are working. @@ -3064,7 +3065,8 @@ static int gfx_v6_0_early_init(void *handle) struct amdgpu_device *adev = (struct amdgpu_device *)handle; adev->gfx.num_gfx_rings = GFX6_NUM_GFX_RINGS; - adev->gfx.num_compute_rings = GFX6_NUM_COMPUTE_RINGS; + adev->gfx.num_compute_rings = min(amdgpu_gfx_get_num_kcq(adev), + GFX6_NUM_COMPUTE_RINGS); adev->gfx.funcs = &gfx_v6_0_gfx_funcs; adev->gfx.rlc.funcs = &gfx_v6_0_rlc_funcs; gfx_v6_0_set_ring_funcs(adev); diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c index 04eaf3a8fddb..a368724c3dfc 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c @@ -1580,10 +1580,10 @@ static void gfx_v7_0_tiling_mode_table_init(struct amdgpu_device *adev) * @adev: amdgpu_device pointer * @se_num: shader engine to address * @sh_num: sh block to address + * @instance: Certain registers are instanced per SE or SH. + * 0xffffffff means broadcast to all SEs or SHs (CIK). * - * Select which SE, SH combinations to address. Certain - * registers are instanced per SE or SH. 0xffffffff means - * broadcast to all SEs or SHs (CIK). + * Select which SE, SH combinations to address. */ static void gfx_v7_0_select_se_sh(struct amdgpu_device *adev, u32 se_num, u32 sh_num, u32 instance) @@ -1779,8 +1779,6 @@ gfx_v7_0_write_harvested_raster_configs(struct amdgpu_device *adev, * gfx_v7_0_setup_rb - setup the RBs on the asic * * @adev: amdgpu_device pointer - * @se_num: number of SEs (shader engines) for the asic - * @sh_per_se: number of SH blocks per SE for the asic * * Configures per-SE/SH RB registers (CIK). */ @@ -1841,6 +1839,7 @@ static void gfx_v7_0_setup_rb(struct amdgpu_device *adev) mutex_unlock(&adev->grbm_idx_mutex); } +#define DEFAULT_SH_MEM_BASES (0x6000) /** * gfx_v7_0_init_compute_vmid - gart enable * @@ -1849,7 +1848,6 @@ static void gfx_v7_0_setup_rb(struct amdgpu_device *adev) * Initialize compute vmid sh_mem registers * */ -#define DEFAULT_SH_MEM_BASES (0x6000) static void gfx_v7_0_init_compute_vmid(struct amdgpu_device *adev) { int i; @@ -1898,7 +1896,7 @@ static void gfx_v7_0_init_gds_vmid(struct amdgpu_device *adev) * the driver can enable them for graphics. VMID0 should maintain * access so that HWS firmware can save/restore entries. */ - for (vmid = 1; vmid < 16; vmid++) { + for (vmid = 1; vmid < AMDGPU_NUM_VMID; vmid++) { WREG32(amdgpu_gds_reg_offset[vmid].mem_base, 0); WREG32(amdgpu_gds_reg_offset[vmid].mem_size, 0); WREG32(amdgpu_gds_reg_offset[vmid].gws, 0); @@ -2074,7 +2072,6 @@ static void gfx_v7_0_scratch_init(struct amdgpu_device *adev) /** * gfx_v7_0_ring_test_ring - basic gfx ring test * - * @adev: amdgpu_device pointer * @ring: amdgpu_ring structure holding ring information * * Allocate a scratch register and write to it using the gfx ring (CIK). @@ -2121,8 +2118,7 @@ error_free_scratch: /** * gfx_v7_0_ring_emit_hdp - emit an hdp flush on the cp * - * @adev: amdgpu_device pointer - * @ridx: amdgpu ring index + * @ring: amdgpu_ring structure holding ring information * * Emits an hdp flush on the cp. */ @@ -2171,8 +2167,10 @@ static void gfx_v7_0_ring_emit_vgt_flush(struct amdgpu_ring *ring) /** * gfx_v7_0_ring_emit_fence_gfx - emit a fence on the gfx ring * - * @adev: amdgpu_device pointer - * @fence: amdgpu fence object + * @ring: amdgpu_ring structure holding ring information + * @addr: address + * @seq: sequence number + * @flags: fence related flags * * Emits a fence sequnce number on the gfx ring and flushes * GPU caches. @@ -2212,8 +2210,10 @@ static void gfx_v7_0_ring_emit_fence_gfx(struct amdgpu_ring *ring, u64 addr, /** * gfx_v7_0_ring_emit_fence_compute - emit a fence on the compute ring * - * @adev: amdgpu_device pointer - * @fence: amdgpu fence object + * @ring: amdgpu_ring structure holding ring information + * @addr: address + * @seq: sequence number + * @flags: fence related flags * * Emits a fence sequnce number on the compute ring and flushes * GPU caches. @@ -2245,7 +2245,9 @@ static void gfx_v7_0_ring_emit_fence_compute(struct amdgpu_ring *ring, * gfx_v7_0_ring_emit_ib - emit an IB (Indirect Buffer) on the ring * * @ring: amdgpu_ring structure holding ring information + * @job: job to retrive vmid from * @ib: amdgpu indirect buffer object + * @flags: options (AMDGPU_HAVE_CTX_SWITCH) * * Emits an DE (drawing engine) or CE (constant engine) IB * on the gfx ring. IBs are usually generated by userspace @@ -2342,6 +2344,7 @@ static void gfx_v7_ring_emit_cntxcntl(struct amdgpu_ring *ring, uint32_t flags) * gfx_v7_0_ring_test_ib - basic ring IB test * * @ring: amdgpu_ring structure holding ring information + * @timeout: timeout value in jiffies, or MAX_SCHEDULE_TIMEOUT * * Allocate an IB and execute it on the gfx ring (CIK). * Provides a basic gfx ring test to verify that IBs are working. @@ -3234,7 +3237,9 @@ static void gfx_v7_0_ring_emit_pipeline_sync(struct amdgpu_ring *ring) /** * gfx_v7_0_ring_emit_vm_flush - cik vm flush using the CP * - * @adev: amdgpu_device pointer + * @ring: amdgpu_ring pointer + * @vmid: vmid number to use + * @pd_addr: address * * Update the page table base and flush the VM TLB * using the CP (CIK). @@ -4238,7 +4243,8 @@ static int gfx_v7_0_early_init(void *handle) struct amdgpu_device *adev = (struct amdgpu_device *)handle; adev->gfx.num_gfx_rings = GFX7_NUM_GFX_RINGS; - adev->gfx.num_compute_rings = AMDGPU_MAX_COMPUTE_RINGS; + adev->gfx.num_compute_rings = min(amdgpu_gfx_get_num_kcq(adev), + AMDGPU_MAX_COMPUTE_RINGS); adev->gfx.funcs = &gfx_v7_0_gfx_funcs; adev->gfx.rlc.funcs = &gfx_v7_0_rlc_funcs; gfx_v7_0_set_ring_funcs(adev); @@ -5207,15 +5213,6 @@ static void gfx_v7_0_get_cu_info(struct amdgpu_device *adev) cu_info->lds_size = 64; } -static const struct amdgpu_ip_block_version gfx_v7_0_ip_block = -{ - .type = AMD_IP_BLOCK_TYPE_GFX, - .major = 7, - .minor = 0, - .rev = 0, - .funcs = &gfx_v7_0_ip_funcs, -}; - const struct amdgpu_ip_block_version gfx_v7_1_ip_block = { .type = AMD_IP_BLOCK_TYPE_GFX, diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.h b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.h index 6fb9c1524691..eedce7d007f1 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.h +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.h @@ -24,7 +24,6 @@ #ifndef __GFX_V7_0_H__ #define __GFX_V7_0_H__ -extern const struct amdgpu_ip_block_version gfx_v7_0_ip_block; extern const struct amdgpu_ip_block_version gfx_v7_1_ip_block; extern const struct amdgpu_ip_block_version gfx_v7_2_ip_block; extern const struct amdgpu_ip_block_version gfx_v7_3_ip_block; diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c index 94b7e0531d09..37639214cbbb 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c @@ -729,8 +729,13 @@ static void gfx_v8_0_get_cu_info(struct amdgpu_device *adev); static void gfx_v8_0_ring_emit_ce_meta(struct amdgpu_ring *ring); static void gfx_v8_0_ring_emit_de_meta(struct amdgpu_ring *ring); +#define CG_ACLK_CNTL__ACLK_DIVIDER_MASK 0x0000007fL +#define CG_ACLK_CNTL__ACLK_DIVIDER__SHIFT 0x00000000L + static void gfx_v8_0_init_golden_registers(struct amdgpu_device *adev) { + uint32_t data; + switch (adev->asic_type) { case CHIP_TOPAZ: amdgpu_device_program_register_sequence(adev, @@ -790,11 +795,14 @@ static void gfx_v8_0_init_golden_registers(struct amdgpu_device *adev) amdgpu_device_program_register_sequence(adev, polaris10_golden_common_all, ARRAY_SIZE(polaris10_golden_common_all)); - WREG32_SMC(ixCG_ACLK_CNTL, 0x0000001C); - if (adev->pdev->revision == 0xc7 && + data = RREG32_SMC(ixCG_ACLK_CNTL); + data &= ~CG_ACLK_CNTL__ACLK_DIVIDER_MASK; + data |= 0x18 << CG_ACLK_CNTL__ACLK_DIVIDER__SHIFT; + WREG32_SMC(ixCG_ACLK_CNTL, data); + if ((adev->pdev->device == 0x67DF) && (adev->pdev->revision == 0xc7) && ((adev->pdev->subsystem_device == 0xb37 && adev->pdev->subsystem_vendor == 0x1002) || (adev->pdev->subsystem_device == 0x4a8 && adev->pdev->subsystem_vendor == 0x1043) || - (adev->pdev->subsystem_device == 0x9480 && adev->pdev->subsystem_vendor == 0x1682))) { + (adev->pdev->subsystem_device == 0x9480 && adev->pdev->subsystem_vendor == 0x1680))) { amdgpu_atombios_i2c_channel_trans(adev, 0x10, 0x96, 0x1E, 0xDD); amdgpu_atombios_i2c_channel_trans(adev, 0x10, 0x96, 0x1F, 0xD0); } @@ -1915,7 +1923,8 @@ static int gfx_v8_0_compute_ring_init(struct amdgpu_device *adev, int ring_id, + ((ring->me - 1) * adev->gfx.mec.num_pipe_per_mec) + ring->pipe; - hw_prio = amdgpu_gfx_is_high_priority_compute_queue(adev, ring->queue) ? + hw_prio = amdgpu_gfx_is_high_priority_compute_queue(adev, ring->pipe, + ring->queue) ? AMDGPU_GFX_PIPE_PRIO_HIGH : AMDGPU_RING_PRIO_DEFAULT; /* type-2 packets are deprecated on MEC, use type-3 instead */ r = amdgpu_ring_init(adev, ring, 1024, @@ -3678,6 +3687,7 @@ static void gfx_v8_0_setup_rb(struct amdgpu_device *adev) mutex_unlock(&adev->grbm_idx_mutex); } +#define DEFAULT_SH_MEM_BASES (0x6000) /** * gfx_v8_0_init_compute_vmid - gart enable * @@ -3686,7 +3696,6 @@ static void gfx_v8_0_setup_rb(struct amdgpu_device *adev) * Initialize compute vmid sh_mem registers * */ -#define DEFAULT_SH_MEM_BASES (0x6000) static void gfx_v8_0_init_compute_vmid(struct amdgpu_device *adev) { int i; @@ -3740,7 +3749,7 @@ static void gfx_v8_0_init_gds_vmid(struct amdgpu_device *adev) * the driver can enable them for graphics. VMID0 should maintain * access so that HWS firmware can save/restore entries. */ - for (vmid = 1; vmid < 16; vmid++) { + for (vmid = 1; vmid < AMDGPU_NUM_VMID; vmid++) { WREG32(amdgpu_gds_reg_offset[vmid].mem_base, 0); WREG32(amdgpu_gds_reg_offset[vmid].mem_size, 0); WREG32(amdgpu_gds_reg_offset[vmid].gws, 0); @@ -4433,7 +4442,8 @@ static void gfx_v8_0_mqd_set_priority(struct amdgpu_ring *ring, struct vi_mqd *m struct amdgpu_device *adev = ring->adev; if (ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE) { - if (amdgpu_gfx_is_high_priority_compute_queue(adev, ring->queue)) { + if (amdgpu_gfx_is_high_priority_compute_queue(adev, ring->pipe, + ring->queue)) { mqd->cp_hqd_pipe_priority = AMDGPU_GFX_PIPE_PRIO_HIGH; mqd->cp_hqd_queue_priority = AMDGPU_GFX_QUEUE_PRIORITY_MAXIMUM; @@ -5058,7 +5068,7 @@ static int gfx_v8_0_pre_soft_reset(void *handle) gfx_v8_0_cp_compute_enable(adev, false); } - return 0; + return 0; } static int gfx_v8_0_soft_reset(void *handle) @@ -5295,7 +5305,8 @@ static int gfx_v8_0_early_init(void *handle) struct amdgpu_device *adev = (struct amdgpu_device *)handle; adev->gfx.num_gfx_rings = GFX8_NUM_GFX_RINGS; - adev->gfx.num_compute_rings = amdgpu_num_kcq; + adev->gfx.num_compute_rings = min(amdgpu_gfx_get_num_kcq(adev), + AMDGPU_MAX_COMPUTE_RINGS); adev->gfx.funcs = &gfx_v8_0_gfx_funcs; gfx_v8_0_set_ring_funcs(adev); gfx_v8_0_set_irq_funcs(adev); diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c index 0d8e203b10ef..fc9bb94eaaf4 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c @@ -794,7 +794,7 @@ static void gfx_v9_0_set_irq_funcs(struct amdgpu_device *adev); static void gfx_v9_0_set_gds_init(struct amdgpu_device *adev); static void gfx_v9_0_set_rlc_funcs(struct amdgpu_device *adev); static int gfx_v9_0_get_cu_info(struct amdgpu_device *adev, - struct amdgpu_cu_info *cu_info); + struct amdgpu_cu_info *cu_info); static uint64_t gfx_v9_0_get_gpu_clock_counter(struct amdgpu_device *adev); static void gfx_v9_0_ring_emit_de_meta(struct amdgpu_ring *ring); static u64 gfx_v9_0_ring_get_rptr_compute(struct amdgpu_ring *ring); @@ -2228,7 +2228,8 @@ static int gfx_v9_0_compute_ring_init(struct amdgpu_device *adev, int ring_id, irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP + ((ring->me - 1) * adev->gfx.mec.num_pipe_per_mec) + ring->pipe; - hw_prio = amdgpu_gfx_is_high_priority_compute_queue(adev, ring->queue) ? + hw_prio = amdgpu_gfx_is_high_priority_compute_queue(adev, ring->pipe, + ring->queue) ? AMDGPU_GFX_PIPE_PRIO_HIGH : AMDGPU_GFX_PIPE_PRIO_NORMAL; /* type-2 packets are deprecated on MEC, use type-3 instead */ return amdgpu_ring_init(adev, ring, 1024, @@ -2519,7 +2520,7 @@ static void gfx_v9_0_init_gds_vmid(struct amdgpu_device *adev) * the driver can enable them for graphics. VMID0 should maintain * access so that HWS firmware can save/restore entries. */ - for (vmid = 1; vmid < 16; vmid++) { + for (vmid = 1; vmid < AMDGPU_NUM_VMID; vmid++) { WREG32_SOC15_OFFSET(GC, 0, mmGDS_VMID0_BASE, 2 * vmid, 0); WREG32_SOC15_OFFSET(GC, 0, mmGDS_VMID0_SIZE, 2 * vmid, 0); WREG32_SOC15_OFFSET(GC, 0, mmGDS_GWS_VMID0, vmid, 0); @@ -2992,7 +2993,7 @@ static void gfx_v9_0_init_pg(struct amdgpu_device *adev) } } -void gfx_v9_0_rlc_stop(struct amdgpu_device *adev) +static void gfx_v9_0_rlc_stop(struct amdgpu_device *adev) { WREG32_FIELD15(GC, 0, RLC_CNTL, RLC_ENABLE_F32, 0); gfx_v9_0_enable_gui_idle_interrupt(adev, false); @@ -3383,7 +3384,9 @@ static void gfx_v9_0_mqd_set_priority(struct amdgpu_ring *ring, struct v9_mqd *m struct amdgpu_device *adev = ring->adev; if (ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE) { - if (amdgpu_gfx_is_high_priority_compute_queue(adev, ring->queue)) { + if (amdgpu_gfx_is_high_priority_compute_queue(adev, + ring->pipe, + ring->queue)) { mqd->cp_hqd_pipe_priority = AMDGPU_GFX_PIPE_PRIO_HIGH; mqd->cp_hqd_queue_priority = AMDGPU_GFX_QUEUE_PRIORITY_MAXIMUM; @@ -4633,7 +4636,8 @@ static int gfx_v9_0_early_init(void *handle) adev->gfx.num_gfx_rings = 0; else adev->gfx.num_gfx_rings = GFX9_NUM_GFX_RINGS; - adev->gfx.num_compute_rings = amdgpu_num_kcq; + adev->gfx.num_compute_rings = min(amdgpu_gfx_get_num_kcq(adev), + AMDGPU_MAX_COMPUTE_RINGS); gfx_v9_0_set_kiq_pm4_funcs(adev); gfx_v9_0_set_ring_funcs(adev); gfx_v9_0_set_irq_funcs(adev); @@ -5177,7 +5181,7 @@ static void gfx_v9_0_ring_set_wptr_gfx(struct amdgpu_ring *ring) if (ring->use_doorbell) { /* XXX check if swapping is necessary on BE */ - atomic64_set((atomic64_t*)&adev->wb.wb[ring->wptr_offs], ring->wptr); + atomic64_set((atomic64_t *)&adev->wb.wb[ring->wptr_offs], ring->wptr); WDOORBELL64(ring->doorbell_index, ring->wptr); } else { WREG32_SOC15(GC, 0, mmCP_RB0_WPTR, lower_32_bits(ring->wptr)); @@ -5363,7 +5367,7 @@ static void gfx_v9_0_ring_set_wptr_compute(struct amdgpu_ring *ring) /* XXX check if swapping is necessary on BE */ if (ring->use_doorbell) { - atomic64_set((atomic64_t*)&adev->wb.wb[ring->wptr_offs], ring->wptr); + atomic64_set((atomic64_t *)&adev->wb.wb[ring->wptr_offs], ring->wptr); WDOORBELL64(ring->doorbell_index, ring->wptr); } else{ BUG(); /* only DOORBELL method supported on gfx9 now */ @@ -5683,6 +5687,7 @@ static int gfx_v9_0_set_priv_inst_fault_state(struct amdgpu_device *adev, WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0, PRIV_INSTR_INT_ENABLE, state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0); + break; default: break; } diff --git a/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c b/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c index fad887a66886..6ddd53ba8b77 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c @@ -22,6 +22,7 @@ */ #include "amdgpu.h" #include "gfxhub_v1_0.h" +#include "gfxhub_v1_1.h" #include "gc/gc_9_0_offset.h" #include "gc/gc_9_0_sh_mask.h" @@ -30,13 +31,14 @@ #include "soc15_common.h" -u64 gfxhub_v1_0_get_mc_fb_offset(struct amdgpu_device *adev) +static u64 gfxhub_v1_0_get_mc_fb_offset(struct amdgpu_device *adev) { return (u64)RREG32_SOC15(GC, 0, mmMC_VM_FB_OFFSET) << 24; } -void gfxhub_v1_0_setup_vm_pt_regs(struct amdgpu_device *adev, uint32_t vmid, - uint64_t page_table_base) +static void gfxhub_v1_0_setup_vm_pt_regs(struct amdgpu_device *adev, + uint32_t vmid, + uint64_t page_table_base) { struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_GFXHUB_0]; @@ -274,7 +276,7 @@ static void gfxhub_v1_0_program_invalidation(struct amdgpu_device *adev) } } -int gfxhub_v1_0_gart_enable(struct amdgpu_device *adev) +static int gfxhub_v1_0_gart_enable(struct amdgpu_device *adev) { if (amdgpu_sriov_vf(adev) && adev->asic_type != CHIP_ARCTURUS) { /* @@ -304,7 +306,7 @@ int gfxhub_v1_0_gart_enable(struct amdgpu_device *adev) return 0; } -void gfxhub_v1_0_gart_disable(struct amdgpu_device *adev) +static void gfxhub_v1_0_gart_disable(struct amdgpu_device *adev) { struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_GFXHUB_0]; u32 tmp; @@ -335,8 +337,8 @@ void gfxhub_v1_0_gart_disable(struct amdgpu_device *adev) * @adev: amdgpu_device pointer * @value: true redirects VM faults to the default page */ -void gfxhub_v1_0_set_fault_enable_default(struct amdgpu_device *adev, - bool value) +static void gfxhub_v1_0_set_fault_enable_default(struct amdgpu_device *adev, + bool value) { u32 tmp; tmp = RREG32_SOC15(GC, 0, mmVM_L2_PROTECTION_FAULT_CNTL); @@ -373,7 +375,7 @@ void gfxhub_v1_0_set_fault_enable_default(struct amdgpu_device *adev, WREG32_SOC15(GC, 0, mmVM_L2_PROTECTION_FAULT_CNTL, tmp); } -void gfxhub_v1_0_init(struct amdgpu_device *adev) +static void gfxhub_v1_0_init(struct amdgpu_device *adev) { struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_GFXHUB_0]; @@ -412,4 +414,5 @@ const struct amdgpu_gfxhub_funcs gfxhub_v1_0_funcs = { .gart_disable = gfxhub_v1_0_gart_disable, .set_fault_enable_default = gfxhub_v1_0_set_fault_enable_default, .init = gfxhub_v1_0_init, + .get_xgmi_info = gfxhub_v1_1_get_xgmi_info, }; diff --git a/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.h b/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.h index 0c46672bbf49..3174bc5766fd 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.h +++ b/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.h @@ -24,14 +24,6 @@ #ifndef __GFXHUB_V1_0_H__ #define __GFXHUB_V1_0_H__ -int gfxhub_v1_0_gart_enable(struct amdgpu_device *adev); -void gfxhub_v1_0_gart_disable(struct amdgpu_device *adev); -void gfxhub_v1_0_set_fault_enable_default(struct amdgpu_device *adev, - bool value); -void gfxhub_v1_0_init(struct amdgpu_device *adev); -u64 gfxhub_v1_0_get_mc_fb_offset(struct amdgpu_device *adev); -void gfxhub_v1_0_setup_vm_pt_regs(struct amdgpu_device *adev, uint32_t vmid, - uint64_t page_table_base); - extern const struct amdgpu_gfxhub_funcs gfxhub_v1_0_funcs; + #endif diff --git a/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_1.c b/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_1.c index 1e24b6d51e41..c0ab71df0d90 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_1.c +++ b/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_1.c @@ -21,7 +21,6 @@ * */ #include "amdgpu.h" -#include "gfxhub_v1_0.h" #include "gfxhub_v1_1.h" #include "gc/gc_9_2_1_offset.h" @@ -29,7 +28,7 @@ #include "soc15_common.h" -static int gfxhub_v1_1_get_xgmi_info(struct amdgpu_device *adev) +int gfxhub_v1_1_get_xgmi_info(struct amdgpu_device *adev) { u32 xgmi_lfb_cntl = RREG32_SOC15(GC, 0, mmMC_VM_XGMI_LFB_CNTL); u32 max_region = @@ -67,13 +66,3 @@ static int gfxhub_v1_1_get_xgmi_info(struct amdgpu_device *adev) return 0; } - -const struct amdgpu_gfxhub_funcs gfxhub_v1_1_funcs = { - .get_mc_fb_offset = gfxhub_v1_0_get_mc_fb_offset, - .setup_vm_pt_regs = gfxhub_v1_0_setup_vm_pt_regs, - .gart_enable = gfxhub_v1_0_gart_enable, - .gart_disable = gfxhub_v1_0_gart_disable, - .set_fault_enable_default = gfxhub_v1_0_set_fault_enable_default, - .init = gfxhub_v1_0_init, - .get_xgmi_info = gfxhub_v1_1_get_xgmi_info, -}; diff --git a/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_1.h b/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_1.h index ae5759ffbee3..d753cf28a0a6 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_1.h +++ b/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_1.h @@ -24,6 +24,6 @@ #ifndef __GFXHUB_V1_1_H__ #define __GFXHUB_V1_1_H__ -extern const struct amdgpu_gfxhub_funcs gfxhub_v1_1_funcs; +int gfxhub_v1_1_get_xgmi_info(struct amdgpu_device *adev); #endif diff --git a/drivers/gpu/drm/amd/amdgpu/gfxhub_v2_0.c b/drivers/gpu/drm/amd/amdgpu/gfxhub_v2_0.c index 456360bf58fa..2aecc6a243e8 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfxhub_v2_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfxhub_v2_0.c @@ -153,16 +153,16 @@ static void gfxhub_v2_0_init_system_aperture_regs(struct amdgpu_device *adev) uint64_t value; if (!amdgpu_sriov_vf(adev)) { - /* Disable AGP. */ + /* Program the AGP BAR */ WREG32_SOC15(GC, 0, mmGCMC_VM_AGP_BASE, 0); - WREG32_SOC15(GC, 0, mmGCMC_VM_AGP_TOP, 0); - WREG32_SOC15(GC, 0, mmGCMC_VM_AGP_BOT, 0x00FFFFFF); + WREG32_SOC15(GC, 0, mmGCMC_VM_AGP_BOT, adev->gmc.agp_start >> 24); + WREG32_SOC15(GC, 0, mmGCMC_VM_AGP_TOP, adev->gmc.agp_end >> 24); /* Program the system aperture low logical page number. */ WREG32_SOC15(GC, 0, mmGCMC_VM_SYSTEM_APERTURE_LOW_ADDR, - adev->gmc.vram_start >> 18); + min(adev->gmc.fb_start, adev->gmc.agp_start) >> 18); WREG32_SOC15(GC, 0, mmGCMC_VM_SYSTEM_APERTURE_HIGH_ADDR, - adev->gmc.vram_end >> 18); + max(adev->gmc.fb_end, adev->gmc.agp_end) >> 18); /* Set default page address. */ value = adev->vram_scratch.gpu_addr - adev->gmc.vram_start diff --git a/drivers/gpu/drm/amd/amdgpu/gfxhub_v2_1.c b/drivers/gpu/drm/amd/amdgpu/gfxhub_v2_1.c index 724bb29e9bb4..410fd3a1a388 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfxhub_v2_1.c +++ b/drivers/gpu/drm/amd/amdgpu/gfxhub_v2_1.c @@ -152,16 +152,16 @@ static void gfxhub_v2_1_init_system_aperture_regs(struct amdgpu_device *adev) { uint64_t value; - /* Disable AGP. */ + /* Program the AGP BAR */ WREG32_SOC15(GC, 0, mmGCMC_VM_AGP_BASE, 0); - WREG32_SOC15(GC, 0, mmGCMC_VM_AGP_TOP, 0); - WREG32_SOC15(GC, 0, mmGCMC_VM_AGP_BOT, 0x00FFFFFF); + WREG32_SOC15(GC, 0, mmGCMC_VM_AGP_BOT, adev->gmc.agp_start >> 24); + WREG32_SOC15(GC, 0, mmGCMC_VM_AGP_TOP, adev->gmc.agp_end >> 24); /* Program the system aperture low logical page number. */ WREG32_SOC15(GC, 0, mmGCMC_VM_SYSTEM_APERTURE_LOW_ADDR, - adev->gmc.vram_start >> 18); + min(adev->gmc.fb_start, adev->gmc.agp_start) >> 18); WREG32_SOC15(GC, 0, mmGCMC_VM_SYSTEM_APERTURE_HIGH_ADDR, - adev->gmc.vram_end >> 18); + max(adev->gmc.fb_end, adev->gmc.agp_end) >> 18); /* Set default page address. */ value = adev->vram_scratch.gpu_addr - adev->gmc.vram_start diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c index dbc8b76b9b78..5648c48be77f 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c @@ -46,6 +46,7 @@ #include "gfxhub_v2_0.h" #include "gfxhub_v2_1.h" #include "mmhub_v2_0.h" +#include "mmhub_v2_3.h" #include "athub_v2_0.h" #include "athub_v2_1.h" @@ -93,44 +94,72 @@ static int gmc_v10_0_process_interrupt(struct amdgpu_device *adev, struct amdgpu_irq_src *source, struct amdgpu_iv_entry *entry) { + bool retry_fault = !!(entry->src_data[1] & 0x80); struct amdgpu_vmhub *hub = &adev->vmhub[entry->vmid_src]; + struct amdgpu_task_info task_info; uint32_t status = 0; u64 addr; addr = (u64)entry->src_data[0] << 12; addr |= ((u64)entry->src_data[1] & 0xf) << 44; + if (retry_fault) { + /* Returning 1 here also prevents sending the IV to the KFD */ + + /* Process it onyl if it's the first fault for this address */ + if (entry->ih != &adev->irq.ih_soft && + amdgpu_gmc_filter_faults(adev, addr, entry->pasid, + entry->timestamp)) + return 1; + + /* Delegate it to a different ring if the hardware hasn't + * already done it. + */ + if (in_interrupt()) { + amdgpu_irq_delegate(adev, entry, 8); + return 1; + } + + /* Try to handle the recoverable page faults by filling page + * tables + */ + if (amdgpu_vm_handle_fault(adev, entry->pasid, addr)) + return 1; + } + if (!amdgpu_sriov_vf(adev)) { /* * Issue a dummy read to wait for the status register to * be updated to avoid reading an incorrect value due to * the new fast GRBM interface. */ - if (entry->vmid_src == AMDGPU_GFXHUB_0) + if ((entry->vmid_src == AMDGPU_GFXHUB_0) && + (adev->asic_type < CHIP_SIENNA_CICHLID)) RREG32(hub->vm_l2_pro_fault_status); status = RREG32(hub->vm_l2_pro_fault_status); WREG32_P(hub->vm_l2_pro_fault_cntl, 1, ~1); } - if (printk_ratelimit()) { - struct amdgpu_task_info task_info; - - memset(&task_info, 0, sizeof(struct amdgpu_task_info)); - amdgpu_vm_get_task_info(adev, entry->pasid, &task_info); - - dev_err(adev->dev, - "[%s] page fault (src_id:%u ring:%u vmid:%u pasid:%u, " - "for process %s pid %d thread %s pid %d)\n", - entry->vmid_src ? "mmhub" : "gfxhub", - entry->src_id, entry->ring_id, entry->vmid, - entry->pasid, task_info.process_name, task_info.tgid, - task_info.task_name, task_info.pid); - dev_err(adev->dev, " in page starting at address 0x%016llx from client %d\n", - addr, entry->client_id); - if (!amdgpu_sriov_vf(adev)) - hub->vmhub_funcs->print_l2_protection_fault_status(adev, status); - } + if (!printk_ratelimit()) + return 0; + + memset(&task_info, 0, sizeof(struct amdgpu_task_info)); + amdgpu_vm_get_task_info(adev, entry->pasid, &task_info); + + dev_err(adev->dev, + "[%s] page fault (src_id:%u ring:%u vmid:%u pasid:%u, " + "for process %s pid %d thread %s pid %d)\n", + entry->vmid_src ? "mmhub" : "gfxhub", + entry->src_id, entry->ring_id, entry->vmid, + entry->pasid, task_info.process_name, task_info.tgid, + task_info.task_name, task_info.pid); + dev_err(adev->dev, " in page starting at address 0x%012llx from client %d\n", + addr, entry->client_id); + + if (!amdgpu_sriov_vf(adev)) + hub->vmhub_funcs->print_l2_protection_fault_status(adev, + status); return 0; } @@ -145,7 +174,7 @@ static const struct amdgpu_irq_src_funcs gmc_v10_0_ecc_funcs = { .process = amdgpu_umc_process_ecc_irq, }; - static void gmc_v10_0_set_irq_funcs(struct amdgpu_device *adev) +static void gmc_v10_0_set_irq_funcs(struct amdgpu_device *adev) { adev->gmc.vm_fault.num_types = 1; adev->gmc.vm_fault.funcs = &gmc_v10_0_irq_funcs; @@ -231,7 +260,8 @@ static void gmc_v10_0_flush_vm_hub(struct amdgpu_device *adev, uint32_t vmid, * Issue a dummy read to wait for the ACK register to be cleared * to avoid a false ACK due to the new fast GRBM interface. */ - if (vmhub == AMDGPU_GFXHUB_0) + if ((vmhub == AMDGPU_GFXHUB_0) && + (adev->asic_type < CHIP_SIENNA_CICHLID)) RREG32_NO_KIQ(hub->vm_inv_eng0_req + hub->eng_distance * eng); /* Wait for ACK with a delay.*/ @@ -267,6 +297,8 @@ static void gmc_v10_0_flush_vm_hub(struct amdgpu_device *adev, uint32_t vmid, * * @adev: amdgpu_device pointer * @vmid: vm instance to flush + * @vmhub: vmhub type + * @flush_type: the flush type * * Flush the TLB for the requested page table. */ @@ -359,6 +391,8 @@ error_alloc: * * @adev: amdgpu_device pointer * @pasid: pasid to be flush + * @flush_type: the flush type + * @all_hub: Used with PACKET3_INVALIDATE_TLBS_ALL_HUB() * * Flush the TLB for the requested pasid. */ @@ -398,7 +432,7 @@ static int gmc_v10_0_flush_gpu_tlb_pasid(struct amdgpu_device *adev, return 0; } - for (vmid = 1; vmid < 16; vmid++) { + for (vmid = 1; vmid < AMDGPU_NUM_VMID; vmid++) { ret = gmc_v10_0_get_atc_vmid_pasid_mapping_info(adev, vmid, &queried_pasid); @@ -483,7 +517,8 @@ static void gmc_v10_0_emit_pasid_mapping(struct amdgpu_ring *ring, unsigned vmid /* * PTE format on NAVI 10: * 63:59 reserved - * 58:57 reserved + * 58 reserved and for sienna_cichlid is used for MALL noalloc + * 57 reserved * 56 F * 55 L * 54 reserved @@ -631,7 +666,14 @@ static void gmc_v10_0_set_umc_funcs(struct amdgpu_device *adev) static void gmc_v10_0_set_mmhub_funcs(struct amdgpu_device *adev) { - adev->mmhub.funcs = &mmhub_v2_0_funcs; + switch (adev->asic_type) { + case CHIP_VANGOGH: + adev->mmhub.funcs = &mmhub_v2_3_funcs; + break; + default: + adev->mmhub.funcs = &mmhub_v2_0_funcs; + break; + } } static void gmc_v10_0_set_gfxhub_funcs(struct amdgpu_device *adev) @@ -639,6 +681,8 @@ static void gmc_v10_0_set_gfxhub_funcs(struct amdgpu_device *adev) switch (adev->asic_type) { case CHIP_SIENNA_CICHLID: case CHIP_NAVY_FLOUNDER: + case CHIP_VANGOGH: + case CHIP_DIMGREY_CAVEFISH: adev->gfxhub.funcs = &gfxhub_v2_1_funcs; break; default: @@ -673,8 +717,6 @@ static int gmc_v10_0_late_init(void *handle) struct amdgpu_device *adev = (struct amdgpu_device *)handle; int r; - amdgpu_bo_late_init(adev); - r = amdgpu_gmc_allocate_vm_inv_eng(adev); if (r) return r; @@ -698,6 +740,7 @@ static void gmc_v10_0_vram_gtt_location(struct amdgpu_device *adev, amdgpu_gmc_vram_location(adev, &adev->gmc, base); amdgpu_gmc_gart_location(adev, mc); + amdgpu_gmc_agp_location(adev, mc); /* base offset of vram pages */ adev->vm_manager.vram_base_offset = adev->gfxhub.funcs->get_mc_fb_offset(adev); @@ -733,6 +776,13 @@ static int gmc_v10_0_mc_init(struct amdgpu_device *adev) adev->gmc.aper_base = pci_resource_start(adev->pdev, 0); adev->gmc.aper_size = pci_resource_len(adev->pdev, 0); +#ifdef CONFIG_X86_64 + if (adev->flags & AMD_IS_APU) { + adev->gmc.aper_base = adev->gfxhub.funcs->get_mc_fb_offset(adev); + adev->gmc.aper_size = adev->gmc.real_vram_size; + } +#endif + /* In case the PCI BAR is larger than the actual amount of vram */ adev->gmc.visible_vram_size = adev->gmc.aper_size; if (adev->gmc.visible_vram_size > adev->gmc.real_vram_size) @@ -746,6 +796,8 @@ static int gmc_v10_0_mc_init(struct amdgpu_device *adev) case CHIP_NAVI12: case CHIP_SIENNA_CICHLID: case CHIP_NAVY_FLOUNDER: + case CHIP_VANGOGH: + case CHIP_DIMGREY_CAVEFISH: default: adev->gmc.gart_size = 512ULL << 20; break; @@ -790,7 +842,10 @@ static int gmc_v10_0_sw_init(void *handle) spin_lock_init(&adev->gmc.invalidate_lock); - if (adev->asic_type == CHIP_SIENNA_CICHLID && amdgpu_emu_mode == 1) { + if ((adev->flags & AMD_IS_APU) && amdgpu_emu_mode == 1) { + adev->gmc.vram_type = AMDGPU_VRAM_TYPE_DDR4; + adev->gmc.vram_width = 64; + } else if (amdgpu_emu_mode == 1) { adev->gmc.vram_type = AMDGPU_VRAM_TYPE_GDDR6; adev->gmc.vram_width = 1 * 128; /* numchan * chansize */ } else { @@ -808,6 +863,8 @@ static int gmc_v10_0_sw_init(void *handle) case CHIP_NAVI12: case CHIP_SIENNA_CICHLID: case CHIP_NAVY_FLOUNDER: + case CHIP_VANGOGH: + case CHIP_DIMGREY_CAVEFISH: adev->num_vmhubs = 2; /* * To fulfill 4-level page support, @@ -921,6 +978,8 @@ static void gmc_v10_0_init_golden_registers(struct amdgpu_device *adev) case CHIP_NAVI12: case CHIP_SIENNA_CICHLID: case CHIP_NAVY_FLOUNDER: + case CHIP_VANGOGH: + case CHIP_DIMGREY_CAVEFISH: break; default: break; @@ -1081,8 +1140,8 @@ static int gmc_v10_0_set_clockgating_state(void *handle, if (r) return r; - if (adev->asic_type == CHIP_SIENNA_CICHLID || - adev->asic_type == CHIP_NAVY_FLOUNDER) + if (adev->asic_type >= CHIP_SIENNA_CICHLID && + adev->asic_type <= CHIP_DIMGREY_CAVEFISH) return athub_v2_1_set_clockgating(adev, state); else return athub_v2_0_set_clockgating(adev, state); @@ -1094,8 +1153,8 @@ static void gmc_v10_0_get_clockgating_state(void *handle, u32 *flags) adev->mmhub.funcs->get_clockgating(adev, flags); - if (adev->asic_type == CHIP_SIENNA_CICHLID || - adev->asic_type == CHIP_NAVY_FLOUNDER) + if (adev->asic_type >= CHIP_SIENNA_CICHLID && + adev->asic_type <= CHIP_DIMGREY_CAVEFISH) athub_v2_1_get_clockgating(adev, flags); else athub_v2_0_get_clockgating(adev, flags); diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c index 95a9117e9564..f5b69484c45a 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c @@ -530,7 +530,7 @@ static int gmc_v6_0_gart_enable(struct amdgpu_device *adev) * the VMs are determined by the application and setup and assigned * on the fly in the vm part of radeon_gart.c */ - for (i = 1; i < 16; i++) { + for (i = 1; i < AMDGPU_NUM_VMID; i++) { if (i < 8) WREG32(mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR + i, table_addr >> 12); @@ -791,8 +791,6 @@ static int gmc_v6_0_late_init(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; - amdgpu_bo_late_init(adev); - if (amdgpu_vm_fault_stop != AMDGPU_VM_FAULT_STOP_ALWAYS) return amdgpu_irq_get(adev, &adev->gmc.vm_fault, 0); else diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c index 80c146df338a..dee2b34effb6 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c @@ -424,6 +424,8 @@ static int gmc_v7_0_mc_init(struct amdgpu_device *adev) * * @adev: amdgpu_device pointer * @pasid: pasid to be flush + * @flush_type: type of flush + * @all_hub: flush all hubs * * Flush the TLB for the requested pasid. */ @@ -463,7 +465,9 @@ static int gmc_v7_0_flush_gpu_tlb_pasid(struct amdgpu_device *adev, * * @adev: amdgpu_device pointer * @vmid: vm instance to flush - * + * @vmhub: which hub to flush + * @flush_type: type of flush + * * * Flush the TLB for the requested page table (CIK). */ static void gmc_v7_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid, @@ -673,7 +677,7 @@ static int gmc_v7_0_gart_enable(struct amdgpu_device *adev) /* set vm size, must be a multiple of 4 */ WREG32(mmVM_CONTEXT1_PAGE_TABLE_START_ADDR, 0); WREG32(mmVM_CONTEXT1_PAGE_TABLE_END_ADDR, adev->vm_manager.max_pfn - 1); - for (i = 1; i < 16; i++) { + for (i = 1; i < AMDGPU_NUM_VMID; i++) { if (i < 8) WREG32(mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR + i, table_addr >> 12); @@ -763,6 +767,7 @@ static void gmc_v7_0_gart_disable(struct amdgpu_device *adev) * @status: VM_CONTEXT1_PROTECTION_FAULT_STATUS register value * @addr: VM_CONTEXT1_PROTECTION_FAULT_ADDR register value * @mc_client: VM_CONTEXT1_PROTECTION_FAULT_MCCLIENT register value + * @pasid: debug logging only - no functional use * * Print human readable fault information (CIK). */ @@ -956,8 +961,6 @@ static int gmc_v7_0_late_init(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; - amdgpu_bo_late_init(adev); - if (amdgpu_vm_fault_stop != AMDGPU_VM_FAULT_STOP_ALWAYS) return amdgpu_irq_get(adev, &adev->gmc.vm_fault, 0); else diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c index 9ab65ca7df77..2d832fc23119 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c @@ -230,36 +230,20 @@ static int gmc_v8_0_init_microcode(struct amdgpu_device *adev) chip_name = "tonga"; break; case CHIP_POLARIS11: - if (((adev->pdev->device == 0x67ef) && - ((adev->pdev->revision == 0xe0) || - (adev->pdev->revision == 0xe5))) || - ((adev->pdev->device == 0x67ff) && - ((adev->pdev->revision == 0xcf) || - (adev->pdev->revision == 0xef) || - (adev->pdev->revision == 0xff)))) - chip_name = "polaris11_k"; - else if ((adev->pdev->device == 0x67ef) && - (adev->pdev->revision == 0xe2)) + if (ASICID_IS_P21(adev->pdev->device, adev->pdev->revision) || + ASICID_IS_P31(adev->pdev->device, adev->pdev->revision)) chip_name = "polaris11_k"; else chip_name = "polaris11"; break; case CHIP_POLARIS10: - if ((adev->pdev->device == 0x67df) && - ((adev->pdev->revision == 0xe1) || - (adev->pdev->revision == 0xf7))) + if (ASICID_IS_P30(adev->pdev->device, adev->pdev->revision)) chip_name = "polaris10_k"; else chip_name = "polaris10"; break; case CHIP_POLARIS12: - if (((adev->pdev->device == 0x6987) && - ((adev->pdev->revision == 0xc0) || - (adev->pdev->revision == 0xc3))) || - ((adev->pdev->device == 0x6981) && - ((adev->pdev->revision == 0x00) || - (adev->pdev->revision == 0x01) || - (adev->pdev->revision == 0x10)))) + if (ASICID_IS_P23(adev->pdev->device, adev->pdev->revision)) chip_name = "polaris12_k"; else chip_name = "polaris12"; @@ -625,6 +609,8 @@ static int gmc_v8_0_mc_init(struct amdgpu_device *adev) * * @adev: amdgpu_device pointer * @pasid: pasid to be flush + * @flush_type: type of flush + * @all_hub: flush all hubs * * Flush the TLB for the requested pasid. */ @@ -665,6 +651,8 @@ static int gmc_v8_0_flush_gpu_tlb_pasid(struct amdgpu_device *adev, * * @adev: amdgpu_device pointer * @vmid: vm instance to flush + * @vmhub: which hub to flush + * @flush_type: type of flush * * Flush the TLB for the requested page table (VI). */ @@ -915,7 +903,7 @@ static int gmc_v8_0_gart_enable(struct amdgpu_device *adev) /* set vm size, must be a multiple of 4 */ WREG32(mmVM_CONTEXT1_PAGE_TABLE_START_ADDR, 0); WREG32(mmVM_CONTEXT1_PAGE_TABLE_END_ADDR, adev->vm_manager.max_pfn - 1); - for (i = 1; i < 16; i++) { + for (i = 1; i < AMDGPU_NUM_VMID; i++) { if (i < 8) WREG32(mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR + i, table_addr >> 12); @@ -1006,6 +994,7 @@ static void gmc_v8_0_gart_disable(struct amdgpu_device *adev) * @status: VM_CONTEXT1_PROTECTION_FAULT_STATUS register value * @addr: VM_CONTEXT1_PROTECTION_FAULT_ADDR register value * @mc_client: VM_CONTEXT1_PROTECTION_FAULT_MCCLIENT register value + * @pasid: debug logging only - no functional use * * Print human readable fault information (VI). */ @@ -1073,8 +1062,6 @@ static int gmc_v8_0_late_init(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; - amdgpu_bo_late_init(adev); - if (amdgpu_vm_fault_stop != AMDGPU_VM_FAULT_STOP_ALWAYS) return amdgpu_irq_get(adev, &adev->gmc.vm_fault, 0); else diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c index 3ebbddb63705..e1531d97f486 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c @@ -379,41 +379,6 @@ static const uint32_t ecc_umc_mcumc_ctrl_mask_addrs[] = { (0x001d43e0 + 0x00001800), }; -static const uint32_t ecc_umc_mcumc_status_addrs[] = { - (0x000143c2 + 0x00000000), - (0x000143c2 + 0x00000800), - (0x000143c2 + 0x00001000), - (0x000143c2 + 0x00001800), - (0x000543c2 + 0x00000000), - (0x000543c2 + 0x00000800), - (0x000543c2 + 0x00001000), - (0x000543c2 + 0x00001800), - (0x000943c2 + 0x00000000), - (0x000943c2 + 0x00000800), - (0x000943c2 + 0x00001000), - (0x000943c2 + 0x00001800), - (0x000d43c2 + 0x00000000), - (0x000d43c2 + 0x00000800), - (0x000d43c2 + 0x00001000), - (0x000d43c2 + 0x00001800), - (0x001143c2 + 0x00000000), - (0x001143c2 + 0x00000800), - (0x001143c2 + 0x00001000), - (0x001143c2 + 0x00001800), - (0x001543c2 + 0x00000000), - (0x001543c2 + 0x00000800), - (0x001543c2 + 0x00001000), - (0x001543c2 + 0x00001800), - (0x001943c2 + 0x00000000), - (0x001943c2 + 0x00000800), - (0x001943c2 + 0x00001000), - (0x001943c2 + 0x00001800), - (0x001d43c2 + 0x00000000), - (0x001d43c2 + 0x00000800), - (0x001d43c2 + 0x00001000), - (0x001d43c2 + 0x00001800), -}; - static int gmc_v9_0_ecc_interrupt_state(struct amdgpu_device *adev, struct amdgpu_irq_src *src, unsigned type, @@ -502,6 +467,7 @@ static int gmc_v9_0_vm_fault_interrupt_state(struct amdgpu_device *adev, WREG32(reg, tmp); } } + break; default: break; } @@ -510,122 +476,136 @@ static int gmc_v9_0_vm_fault_interrupt_state(struct amdgpu_device *adev, } static int gmc_v9_0_process_interrupt(struct amdgpu_device *adev, - struct amdgpu_irq_src *source, - struct amdgpu_iv_entry *entry) + struct amdgpu_irq_src *source, + struct amdgpu_iv_entry *entry) { - struct amdgpu_vmhub *hub; bool retry_fault = !!(entry->src_data[1] & 0x80); uint32_t status = 0, cid = 0, rw = 0; - u64 addr; - char hub_name[10]; + struct amdgpu_task_info task_info; + struct amdgpu_vmhub *hub; const char *mmhub_cid; + const char *hub_name; + u64 addr; addr = (u64)entry->src_data[0] << 12; addr |= ((u64)entry->src_data[1] & 0xf) << 44; - if (retry_fault && amdgpu_gmc_filter_faults(adev, addr, entry->pasid, - entry->timestamp)) - return 1; /* This also prevents sending it to KFD */ + if (retry_fault) { + /* Returning 1 here also prevents sending the IV to the KFD */ + + /* Process it onyl if it's the first fault for this address */ + if (entry->ih != &adev->irq.ih_soft && + amdgpu_gmc_filter_faults(adev, addr, entry->pasid, + entry->timestamp)) + return 1; + + /* Delegate it to a different ring if the hardware hasn't + * already done it. + */ + if (in_interrupt()) { + amdgpu_irq_delegate(adev, entry, 8); + return 1; + } + + /* Try to handle the recoverable page faults by filling page + * tables + */ + if (amdgpu_vm_handle_fault(adev, entry->pasid, addr)) + return 1; + } + + if (!printk_ratelimit()) + return 0; if (entry->client_id == SOC15_IH_CLIENTID_VMC) { - snprintf(hub_name, sizeof(hub_name), "mmhub0"); + hub_name = "mmhub0"; hub = &adev->vmhub[AMDGPU_MMHUB_0]; } else if (entry->client_id == SOC15_IH_CLIENTID_VMC1) { - snprintf(hub_name, sizeof(hub_name), "mmhub1"); + hub_name = "mmhub1"; hub = &adev->vmhub[AMDGPU_MMHUB_1]; } else { - snprintf(hub_name, sizeof(hub_name), "gfxhub0"); + hub_name = "gfxhub0"; hub = &adev->vmhub[AMDGPU_GFXHUB_0]; } - /* If it's the first fault for this address, process it normally */ - if (retry_fault && !in_interrupt() && - amdgpu_vm_handle_fault(adev, entry->pasid, addr)) - return 1; /* This also prevents sending it to KFD */ + memset(&task_info, 0, sizeof(struct amdgpu_task_info)); + amdgpu_vm_get_task_info(adev, entry->pasid, &task_info); - if (!amdgpu_sriov_vf(adev)) { - /* - * Issue a dummy read to wait for the status register to - * be updated to avoid reading an incorrect value due to - * the new fast GRBM interface. - */ - if (entry->vmid_src == AMDGPU_GFXHUB_0) - RREG32(hub->vm_l2_pro_fault_status); - - status = RREG32(hub->vm_l2_pro_fault_status); - cid = REG_GET_FIELD(status, - VM_L2_PROTECTION_FAULT_STATUS, CID); - rw = REG_GET_FIELD(status, - VM_L2_PROTECTION_FAULT_STATUS, RW); - WREG32_P(hub->vm_l2_pro_fault_cntl, 1, ~1); - } + dev_err(adev->dev, + "[%s] %s page fault (src_id:%u ring:%u vmid:%u " + "pasid:%u, for process %s pid %d thread %s pid %d)\n", + hub_name, retry_fault ? "retry" : "no-retry", + entry->src_id, entry->ring_id, entry->vmid, + entry->pasid, task_info.process_name, task_info.tgid, + task_info.task_name, task_info.pid); + dev_err(adev->dev, " in page starting at address 0x%012llx from client %d\n", + addr, entry->client_id); - if (printk_ratelimit()) { - struct amdgpu_task_info task_info; - - memset(&task_info, 0, sizeof(struct amdgpu_task_info)); - amdgpu_vm_get_task_info(adev, entry->pasid, &task_info); - - dev_err(adev->dev, - "[%s] %s page fault (src_id:%u ring:%u vmid:%u " - "pasid:%u, for process %s pid %d thread %s pid %d)\n", - hub_name, retry_fault ? "retry" : "no-retry", - entry->src_id, entry->ring_id, entry->vmid, - entry->pasid, task_info.process_name, task_info.tgid, - task_info.task_name, task_info.pid); - dev_err(adev->dev, " in page starting at address 0x%016llx from client %d\n", - addr, entry->client_id); - if (!amdgpu_sriov_vf(adev)) { - dev_err(adev->dev, - "VM_L2_PROTECTION_FAULT_STATUS:0x%08X\n", - status); - if (hub == &adev->vmhub[AMDGPU_GFXHUB_0]) { - dev_err(adev->dev, "\t Faulty UTCL2 client ID: %s (0x%x)\n", - cid >= ARRAY_SIZE(gfxhub_client_ids) ? "unknown" : gfxhub_client_ids[cid], - cid); - } else { - switch (adev->asic_type) { - case CHIP_VEGA10: - mmhub_cid = mmhub_client_ids_vega10[cid][rw]; - break; - case CHIP_VEGA12: - mmhub_cid = mmhub_client_ids_vega12[cid][rw]; - break; - case CHIP_VEGA20: - mmhub_cid = mmhub_client_ids_vega20[cid][rw]; - break; - case CHIP_ARCTURUS: - mmhub_cid = mmhub_client_ids_arcturus[cid][rw]; - break; - case CHIP_RAVEN: - mmhub_cid = mmhub_client_ids_raven[cid][rw]; - break; - case CHIP_RENOIR: - mmhub_cid = mmhub_client_ids_renoir[cid][rw]; - break; - default: - mmhub_cid = NULL; - break; - } - dev_err(adev->dev, "\t Faulty UTCL2 client ID: %s (0x%x)\n", - mmhub_cid ? mmhub_cid : "unknown", cid); - } - dev_err(adev->dev, "\t MORE_FAULTS: 0x%lx\n", - REG_GET_FIELD(status, - VM_L2_PROTECTION_FAULT_STATUS, MORE_FAULTS)); - dev_err(adev->dev, "\t WALKER_ERROR: 0x%lx\n", - REG_GET_FIELD(status, - VM_L2_PROTECTION_FAULT_STATUS, WALKER_ERROR)); - dev_err(adev->dev, "\t PERMISSION_FAULTS: 0x%lx\n", - REG_GET_FIELD(status, - VM_L2_PROTECTION_FAULT_STATUS, PERMISSION_FAULTS)); - dev_err(adev->dev, "\t MAPPING_ERROR: 0x%lx\n", - REG_GET_FIELD(status, - VM_L2_PROTECTION_FAULT_STATUS, MAPPING_ERROR)); - dev_err(adev->dev, "\t RW: 0x%x\n", rw); + if (amdgpu_sriov_vf(adev)) + return 0; + + /* + * Issue a dummy read to wait for the status register to + * be updated to avoid reading an incorrect value due to + * the new fast GRBM interface. + */ + if (entry->vmid_src == AMDGPU_GFXHUB_0) + RREG32(hub->vm_l2_pro_fault_status); + + status = RREG32(hub->vm_l2_pro_fault_status); + cid = REG_GET_FIELD(status, VM_L2_PROTECTION_FAULT_STATUS, CID); + rw = REG_GET_FIELD(status, VM_L2_PROTECTION_FAULT_STATUS, RW); + WREG32_P(hub->vm_l2_pro_fault_cntl, 1, ~1); + + + dev_err(adev->dev, + "VM_L2_PROTECTION_FAULT_STATUS:0x%08X\n", + status); + if (hub == &adev->vmhub[AMDGPU_GFXHUB_0]) { + dev_err(adev->dev, "\t Faulty UTCL2 client ID: %s (0x%x)\n", + cid >= ARRAY_SIZE(gfxhub_client_ids) ? "unknown" : + gfxhub_client_ids[cid], + cid); + } else { + switch (adev->asic_type) { + case CHIP_VEGA10: + mmhub_cid = mmhub_client_ids_vega10[cid][rw]; + break; + case CHIP_VEGA12: + mmhub_cid = mmhub_client_ids_vega12[cid][rw]; + break; + case CHIP_VEGA20: + mmhub_cid = mmhub_client_ids_vega20[cid][rw]; + break; + case CHIP_ARCTURUS: + mmhub_cid = mmhub_client_ids_arcturus[cid][rw]; + break; + case CHIP_RAVEN: + mmhub_cid = mmhub_client_ids_raven[cid][rw]; + break; + case CHIP_RENOIR: + mmhub_cid = mmhub_client_ids_renoir[cid][rw]; + break; + default: + mmhub_cid = NULL; + break; } + dev_err(adev->dev, "\t Faulty UTCL2 client ID: %s (0x%x)\n", + mmhub_cid ? mmhub_cid : "unknown", cid); } - + dev_err(adev->dev, "\t MORE_FAULTS: 0x%lx\n", + REG_GET_FIELD(status, + VM_L2_PROTECTION_FAULT_STATUS, MORE_FAULTS)); + dev_err(adev->dev, "\t WALKER_ERROR: 0x%lx\n", + REG_GET_FIELD(status, + VM_L2_PROTECTION_FAULT_STATUS, WALKER_ERROR)); + dev_err(adev->dev, "\t PERMISSION_FAULTS: 0x%lx\n", + REG_GET_FIELD(status, + VM_L2_PROTECTION_FAULT_STATUS, PERMISSION_FAULTS)); + dev_err(adev->dev, "\t MAPPING_ERROR: 0x%lx\n", + REG_GET_FIELD(status, + VM_L2_PROTECTION_FAULT_STATUS, MAPPING_ERROR)); + dev_err(adev->dev, "\t RW: 0x%x\n", rw); return 0; } @@ -711,6 +691,7 @@ static bool gmc_v9_0_get_atc_vmid_pasid_mapping_info(struct amdgpu_device *adev, * * @adev: amdgpu_device pointer * @vmid: vm instance to flush + * @vmhub: which hub to flush * @flush_type: the flush type * * Flush the TLB for the requested page table using certain type. @@ -827,6 +808,8 @@ static void gmc_v9_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid, * * @adev: amdgpu_device pointer * @pasid: pasid to be flush + * @flush_type: the flush type + * @all_hub: flush all hubs * * Flush the TLB for the requested pasid. */ @@ -1166,15 +1149,7 @@ static void gmc_v9_0_set_mmhub_funcs(struct amdgpu_device *adev) static void gmc_v9_0_set_gfxhub_funcs(struct amdgpu_device *adev) { - switch (adev->asic_type) { - case CHIP_ARCTURUS: - case CHIP_VEGA20: - adev->gfxhub.funcs = &gfxhub_v1_1_funcs; - break; - default: - adev->gfxhub.funcs = &gfxhub_v1_0_funcs; - break; - } + adev->gfxhub.funcs = &gfxhub_v1_0_funcs; } static int gmc_v9_0_early_init(void *handle) @@ -1202,8 +1177,6 @@ static int gmc_v9_0_late_init(void *handle) struct amdgpu_device *adev = (struct amdgpu_device *)handle; int r; - amdgpu_bo_late_init(adev); - r = amdgpu_gmc_allocate_vm_inv_eng(adev); if (r) return r; diff --git a/drivers/gpu/drm/amd/amdgpu/iceland_ih.c b/drivers/gpu/drm/amd/amdgpu/iceland_ih.c index a13dd9a51149..37d8b6ca4dab 100644 --- a/drivers/gpu/drm/amd/amdgpu/iceland_ih.c +++ b/drivers/gpu/drm/amd/amdgpu/iceland_ih.c @@ -179,6 +179,7 @@ static void iceland_ih_irq_disable(struct amdgpu_device *adev) * iceland_ih_get_wptr - get the IH ring buffer wptr * * @adev: amdgpu_device pointer + * @ih: IH ring buffer to fetch wptr * * Get the IH ring buffer wptr from either the register * or the writeback memory buffer (VI). Also check for @@ -213,6 +214,8 @@ static u32 iceland_ih_get_wptr(struct amdgpu_device *adev, * iceland_ih_decode_iv - decode an interrupt vector * * @adev: amdgpu_device pointer + * @ih: IH ring buffer to decode + * @entry: IV entry to place decoded information into * * Decodes the interrupt vector at the current rptr * position and also advance the position. @@ -245,6 +248,7 @@ static void iceland_ih_decode_iv(struct amdgpu_device *adev, * iceland_ih_set_rptr - set the IH ring buffer rptr * * @adev: amdgpu_device pointer + * @ih: IH ring buffer to set rptr * * Set the IH ring buffer rptr. */ diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v1_0.c b/drivers/gpu/drm/amd/amdgpu/jpeg_v1_0.c index c600b61b5f45..7332a320ede8 100644 --- a/drivers/gpu/drm/amd/amdgpu/jpeg_v1_0.c +++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v1_0.c @@ -210,7 +210,9 @@ static void jpeg_v1_0_decode_ring_insert_end(struct amdgpu_ring *ring) * jpeg_v1_0_decode_ring_emit_fence - emit an fence & trap command * * @ring: amdgpu_ring pointer - * @fence: fence to emit + * @addr: address + * @seq: sequence number + * @flags: fence related flags * * Write a fence and a trap command to the ring. */ @@ -282,7 +284,9 @@ static void jpeg_v1_0_decode_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, * jpeg_v1_0_decode_ring_emit_ib - execute indirect buffer * * @ring: amdgpu_ring pointer + * @job: job to retrieve vmid from * @ib: indirect buffer to execute + * @flags: unused * * Write ring commands to execute the indirect buffer. */ @@ -511,6 +515,7 @@ void jpeg_v1_0_sw_fini(void *handle) * jpeg_v1_0_start - start JPEG block * * @adev: amdgpu_device pointer + * @mode: SPG or DPG mode * * Setup and start the JPEG block */ diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v2_0.c b/drivers/gpu/drm/amd/amdgpu/jpeg_v2_0.c index 94caf5204c8b..3b22953aa62e 100644 --- a/drivers/gpu/drm/amd/amdgpu/jpeg_v2_0.c +++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v2_0.c @@ -32,19 +32,19 @@ #include "vcn/vcn_2_0_0_sh_mask.h" #include "ivsrcid/vcn/irqsrcs_vcn_2_0.h" -#define mmUVD_JRBC_EXTERNAL_REG_INTERNAL_OFFSET 0x1bfff +#define mmUVD_JRBC_EXTERNAL_REG_INTERNAL_OFFSET 0x1bfff #define mmUVD_JPEG_GPCOM_CMD_INTERNAL_OFFSET 0x4029 #define mmUVD_JPEG_GPCOM_DATA0_INTERNAL_OFFSET 0x402a #define mmUVD_JPEG_GPCOM_DATA1_INTERNAL_OFFSET 0x402b #define mmUVD_LMI_JRBC_RB_MEM_WR_64BIT_BAR_LOW_INTERNAL_OFFSET 0x40ea -#define mmUVD_LMI_JRBC_RB_MEM_WR_64BIT_BAR_HIGH_INTERNAL_OFFSET 0x40eb +#define mmUVD_LMI_JRBC_RB_MEM_WR_64BIT_BAR_HIGH_INTERNAL_OFFSET 0x40eb #define mmUVD_LMI_JRBC_IB_VMID_INTERNAL_OFFSET 0x40cf #define mmUVD_LMI_JPEG_VMID_INTERNAL_OFFSET 0x40d1 -#define mmUVD_LMI_JRBC_IB_64BIT_BAR_LOW_INTERNAL_OFFSET 0x40e8 +#define mmUVD_LMI_JRBC_IB_64BIT_BAR_LOW_INTERNAL_OFFSET 0x40e8 #define mmUVD_LMI_JRBC_IB_64BIT_BAR_HIGH_INTERNAL_OFFSET 0x40e9 #define mmUVD_JRBC_IB_SIZE_INTERNAL_OFFSET 0x4082 #define mmUVD_LMI_JRBC_RB_MEM_RD_64BIT_BAR_LOW_INTERNAL_OFFSET 0x40ec -#define mmUVD_LMI_JRBC_RB_MEM_RD_64BIT_BAR_HIGH_INTERNAL_OFFSET 0x40ed +#define mmUVD_LMI_JRBC_RB_MEM_RD_64BIT_BAR_HIGH_INTERNAL_OFFSET 0x40ed #define mmUVD_JRBC_RB_COND_RD_TIMER_INTERNAL_OFFSET 0x4085 #define mmUVD_JRBC_RB_REF_DATA_INTERNAL_OFFSET 0x4084 #define mmUVD_JRBC_STATUS_INTERNAL_OFFSET 0x4089 @@ -247,7 +247,7 @@ static int jpeg_v2_0_disable_power_gating(struct amdgpu_device *adev) return 0; } -static int jpeg_v2_0_enable_power_gating(struct amdgpu_device* adev) +static int jpeg_v2_0_enable_power_gating(struct amdgpu_device *adev) { if (adev->pg_flags & AMD_PG_SUPPORT_JPEG) { uint32_t data; @@ -274,7 +274,7 @@ static int jpeg_v2_0_enable_power_gating(struct amdgpu_device* adev) return 0; } -static void jpeg_v2_0_disable_clock_gating(struct amdgpu_device* adev) +static void jpeg_v2_0_disable_clock_gating(struct amdgpu_device *adev) { uint32_t data; @@ -297,7 +297,7 @@ static void jpeg_v2_0_disable_clock_gating(struct amdgpu_device* adev) WREG32_SOC15(JPEG, 0, mmJPEG_CGC_GATE, data); } -static void jpeg_v2_0_enable_clock_gating(struct amdgpu_device* adev) +static void jpeg_v2_0_enable_clock_gating(struct amdgpu_device *adev) { uint32_t data; @@ -489,7 +489,9 @@ void jpeg_v2_0_dec_ring_insert_end(struct amdgpu_ring *ring) * jpeg_v2_0_dec_ring_emit_fence - emit an fence & trap command * * @ring: amdgpu_ring pointer - * @fence: fence to emit + * @addr: address + * @seq: sequence number + * @flags: fence related flags * * Write a fence and a trap command to the ring. */ @@ -538,7 +540,9 @@ void jpeg_v2_0_dec_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u64 seq, * jpeg_v2_0_dec_ring_emit_ib - execute indirect buffer * * @ring: amdgpu_ring pointer + * @job: job to retrieve vmid from * @ib: indirect buffer to execute + * @flags: unused * * Write ring commands to execute the indirect buffer. */ diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v2_5.c b/drivers/gpu/drm/amd/amdgpu/jpeg_v2_5.c index 845306f63cdb..c6724a0e0c43 100644 --- a/drivers/gpu/drm/amd/amdgpu/jpeg_v2_5.c +++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v2_5.c @@ -247,7 +247,7 @@ static int jpeg_v2_5_resume(void *handle) return r; } -static void jpeg_v2_5_disable_clock_gating(struct amdgpu_device* adev, int inst) +static void jpeg_v2_5_disable_clock_gating(struct amdgpu_device *adev, int inst) { uint32_t data; @@ -276,7 +276,7 @@ static void jpeg_v2_5_disable_clock_gating(struct amdgpu_device* adev, int inst) WREG32_SOC15(JPEG, inst, mmJPEG_CGC_CTRL, data); } -static void jpeg_v2_5_enable_clock_gating(struct amdgpu_device* adev, int inst) +static void jpeg_v2_5_enable_clock_gating(struct amdgpu_device *adev, int inst) { uint32_t data; diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v3_0.c b/drivers/gpu/drm/amd/amdgpu/jpeg_v3_0.c index 3a0dff53654d..e8fbb2a0de34 100644 --- a/drivers/gpu/drm/amd/amdgpu/jpeg_v3_0.c +++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v3_0.c @@ -213,7 +213,7 @@ static int jpeg_v3_0_resume(void *handle) return r; } -static void jpeg_v3_0_disable_clock_gating(struct amdgpu_device* adev) +static void jpeg_v3_0_disable_clock_gating(struct amdgpu_device *adev) { uint32_t data = 0; @@ -243,7 +243,7 @@ static void jpeg_v3_0_disable_clock_gating(struct amdgpu_device* adev) WREG32_SOC15(JPEG, 0, mmJPEG_CGC_CTRL, data); } -static void jpeg_v3_0_enable_clock_gating(struct amdgpu_device* adev) +static void jpeg_v3_0_enable_clock_gating(struct amdgpu_device *adev) { uint32_t data = 0; @@ -286,7 +286,7 @@ static int jpeg_v3_0_disable_static_power_gating(struct amdgpu_device *adev) return 0; } -static int jpeg_v3_0_enable_static_power_gating(struct amdgpu_device* adev) +static int jpeg_v3_0_enable_static_power_gating(struct amdgpu_device *adev) { /* enable anti hang mechanism */ WREG32_P(SOC15_REG_OFFSET(JPEG, 0, mmUVD_JPEG_POWER_STATUS), diff --git a/drivers/gpu/drm/amd/amdgpu/mes_v10_1.c b/drivers/gpu/drm/amd/amdgpu/mes_v10_1.c index 1c22d8393b21..985e454463e1 100644 --- a/drivers/gpu/drm/amd/amdgpu/mes_v10_1.c +++ b/drivers/gpu/drm/amd/amdgpu/mes_v10_1.c @@ -46,7 +46,7 @@ static void mes_v10_1_ring_set_wptr(struct amdgpu_ring *ring) struct amdgpu_device *adev = ring->adev; if (ring->use_doorbell) { - atomic64_set((atomic64_t*)&adev->wb.wb[ring->wptr_offs], + atomic64_set((atomic64_t *)&adev->wb.wb[ring->wptr_offs], ring->wptr); WDOORBELL64(ring->doorbell_index, ring->wptr); } else { diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c index f84701c562bf..d7b39c07de20 100644 --- a/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c +++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c @@ -344,7 +344,7 @@ static void mmhub_v1_0_gart_disable(struct amdgpu_device *adev) u32 i; /* Disable all tables */ - for (i = 0; i < 16; i++) + for (i = 0; i < AMDGPU_NUM_VMID; i++) WREG32_SOC15_OFFSET(MMHUB, 0, mmVM_CONTEXT0_CNTL, i * hub->ctx_distance, 0); @@ -409,7 +409,7 @@ static void mmhub_v1_0_set_fault_enable_default(struct amdgpu_device *adev, bool CRASH_ON_NO_RETRY_FAULT, 1); tmp = REG_SET_FIELD(tmp, VM_L2_PROTECTION_FAULT_CNTL, CRASH_ON_RETRY_FAULT, 1); - } + } WREG32_SOC15(MMHUB, 0, mmVM_L2_PROTECTION_FAULT_CNTL, tmp); } @@ -712,7 +712,7 @@ static int mmhub_v1_0_get_ras_error_count(struct amdgpu_device *adev, uint32_t sec_cnt, ded_cnt; for (i = 0; i < ARRAY_SIZE(mmhub_v1_0_ras_fields); i++) { - if(mmhub_v1_0_ras_fields[i].reg_offset != reg->reg_offset) + if (mmhub_v1_0_ras_fields[i].reg_offset != reg->reg_offset) continue; sec_cnt = (value & diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v2_0.c b/drivers/gpu/drm/amd/amdgpu/mmhub_v2_0.c index 2063700f0bc6..092ff2c43658 100644 --- a/drivers/gpu/drm/amd/amdgpu/mmhub_v2_0.c +++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v2_0.c @@ -195,17 +195,17 @@ static void mmhub_v2_0_init_system_aperture_regs(struct amdgpu_device *adev) uint64_t value; uint32_t tmp; - /* Disable AGP. */ + /* Program the AGP BAR */ WREG32_SOC15(MMHUB, 0, mmMMMC_VM_AGP_BASE, 0); - WREG32_SOC15(MMHUB, 0, mmMMMC_VM_AGP_TOP, 0); - WREG32_SOC15(MMHUB, 0, mmMMMC_VM_AGP_BOT, 0x00FFFFFF); + WREG32_SOC15(MMHUB, 0, mmMMMC_VM_AGP_BOT, adev->gmc.agp_start >> 24); + WREG32_SOC15(MMHUB, 0, mmMMMC_VM_AGP_TOP, adev->gmc.agp_end >> 24); if (!amdgpu_sriov_vf(adev)) { /* Program the system aperture low logical page number. */ WREG32_SOC15(MMHUB, 0, mmMMMC_VM_SYSTEM_APERTURE_LOW_ADDR, - adev->gmc.vram_start >> 18); + min(adev->gmc.fb_start, adev->gmc.agp_start) >> 18); WREG32_SOC15(MMHUB, 0, mmMMMC_VM_SYSTEM_APERTURE_HIGH_ADDR, - adev->gmc.vram_end >> 18); + max(adev->gmc.fb_end, adev->gmc.agp_end) >> 18); } /* Set default page address. */ @@ -421,7 +421,7 @@ static void mmhub_v2_0_gart_disable(struct amdgpu_device *adev) u32 i; /* Disable all tables */ - for (i = 0; i < 16; i++) + for (i = 0; i < AMDGPU_NUM_VMID; i++) WREG32_SOC15_OFFSET(MMHUB, 0, mmMMVM_CONTEXT0_CNTL, i * hub->ctx_distance, 0); @@ -543,6 +543,7 @@ static void mmhub_v2_0_update_medium_grain_clock_gating(struct amdgpu_device *ad switch (adev->asic_type) { case CHIP_SIENNA_CICHLID: case CHIP_NAVY_FLOUNDER: + case CHIP_DIMGREY_CAVEFISH: def = data = RREG32_SOC15(MMHUB, 0, mmMM_ATC_L2_MISC_CG_Sienna_Cichlid); def1 = data1 = RREG32_SOC15(MMHUB, 0, mmDAGB0_CNTL_MISC2_Sienna_Cichlid); break; @@ -576,6 +577,7 @@ static void mmhub_v2_0_update_medium_grain_clock_gating(struct amdgpu_device *ad switch (adev->asic_type) { case CHIP_SIENNA_CICHLID: case CHIP_NAVY_FLOUNDER: + case CHIP_DIMGREY_CAVEFISH: if (def != data) WREG32_SOC15(MMHUB, 0, mmMM_ATC_L2_MISC_CG_Sienna_Cichlid, data); if (def1 != data1) @@ -598,6 +600,7 @@ static void mmhub_v2_0_update_medium_grain_light_sleep(struct amdgpu_device *ade switch (adev->asic_type) { case CHIP_SIENNA_CICHLID: case CHIP_NAVY_FLOUNDER: + case CHIP_DIMGREY_CAVEFISH: def = data = RREG32_SOC15(MMHUB, 0, mmMM_ATC_L2_MISC_CG_Sienna_Cichlid); break; default: @@ -614,6 +617,7 @@ static void mmhub_v2_0_update_medium_grain_light_sleep(struct amdgpu_device *ade switch (adev->asic_type) { case CHIP_SIENNA_CICHLID: case CHIP_NAVY_FLOUNDER: + case CHIP_DIMGREY_CAVEFISH: WREG32_SOC15(MMHUB, 0, mmMM_ATC_L2_MISC_CG_Sienna_Cichlid, data); break; default: @@ -635,6 +639,7 @@ static int mmhub_v2_0_set_clockgating(struct amdgpu_device *adev, case CHIP_NAVI12: case CHIP_SIENNA_CICHLID: case CHIP_NAVY_FLOUNDER: + case CHIP_DIMGREY_CAVEFISH: mmhub_v2_0_update_medium_grain_clock_gating(adev, state == AMD_CG_STATE_GATE); mmhub_v2_0_update_medium_grain_light_sleep(adev, @@ -657,6 +662,7 @@ static void mmhub_v2_0_get_clockgating(struct amdgpu_device *adev, u32 *flags) switch (adev->asic_type) { case CHIP_SIENNA_CICHLID: case CHIP_NAVY_FLOUNDER: + case CHIP_DIMGREY_CAVEFISH: data = RREG32_SOC15(MMHUB, 0, mmMM_ATC_L2_MISC_CG_Sienna_Cichlid); data1 = RREG32_SOC15(MMHUB, 0, mmDAGB0_CNTL_MISC2_Sienna_Cichlid); break; diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v2_3.c b/drivers/gpu/drm/amd/amdgpu/mmhub_v2_3.c new file mode 100644 index 000000000000..b72c8e4ca36b --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v2_3.c @@ -0,0 +1,589 @@ +/* + * Copyright 2019 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#include "amdgpu.h" +#include "mmhub_v2_3.h" + +#include "mmhub/mmhub_2_3_0_offset.h" +#include "mmhub/mmhub_2_3_0_sh_mask.h" +#include "mmhub/mmhub_2_3_0_default.h" +#include "navi10_enum.h" + +#include "soc15_common.h" + +static const char *mmhub_client_ids_vangogh[][2] = { + [0][0] = "MP0", + [1][0] = "MP1", + [2][0] = "DCEDMC", + [3][0] = "DCEVGA", + [13][0] = "UTCL2", + [26][0] = "OSS", + [27][0] = "HDP", + [28][0] = "VCN", + [29][0] = "VCNU", + [30][0] = "JPEG", + [0][1] = "MP0", + [1][1] = "MP1", + [2][1] = "DCEDMC", + [3][1] = "DCEVGA", + [4][1] = "DCEDWB", + [5][1] = "XDP", + [26][1] = "OSS", + [27][1] = "HDP", + [28][1] = "VCN", + [29][1] = "VCNU", + [30][1] = "JPEG", +}; + +static uint32_t mmhub_v2_3_get_invalidate_req(unsigned int vmid, + uint32_t flush_type) +{ + u32 req = 0; + + /* invalidate using legacy mode on vmid*/ + req = REG_SET_FIELD(req, MMVM_INVALIDATE_ENG0_REQ, + PER_VMID_INVALIDATE_REQ, 1 << vmid); + req = REG_SET_FIELD(req, MMVM_INVALIDATE_ENG0_REQ, FLUSH_TYPE, flush_type); + req = REG_SET_FIELD(req, MMVM_INVALIDATE_ENG0_REQ, INVALIDATE_L2_PTES, 1); + req = REG_SET_FIELD(req, MMVM_INVALIDATE_ENG0_REQ, INVALIDATE_L2_PDE0, 1); + req = REG_SET_FIELD(req, MMVM_INVALIDATE_ENG0_REQ, INVALIDATE_L2_PDE1, 1); + req = REG_SET_FIELD(req, MMVM_INVALIDATE_ENG0_REQ, INVALIDATE_L2_PDE2, 1); + req = REG_SET_FIELD(req, MMVM_INVALIDATE_ENG0_REQ, INVALIDATE_L1_PTES, 1); + req = REG_SET_FIELD(req, MMVM_INVALIDATE_ENG0_REQ, + CLEAR_PROTECTION_FAULT_STATUS_ADDR, 0); + + return req; +} + +static void +mmhub_v2_3_print_l2_protection_fault_status(struct amdgpu_device *adev, + uint32_t status) +{ + uint32_t cid, rw; + const char *mmhub_cid = NULL; + + cid = REG_GET_FIELD(status, + MMVM_L2_PROTECTION_FAULT_STATUS, CID); + rw = REG_GET_FIELD(status, + MMVM_L2_PROTECTION_FAULT_STATUS, RW); + + dev_err(adev->dev, + "MMVM_L2_PROTECTION_FAULT_STATUS:0x%08X\n", + status); + switch (adev->asic_type) { + case CHIP_VANGOGH: + mmhub_cid = mmhub_client_ids_vangogh[cid][rw]; + break; + default: + mmhub_cid = NULL; + break; + } + dev_err(adev->dev, "\t Faulty UTCL2 client ID: %s (0x%x)\n", + mmhub_cid ? mmhub_cid : "unknown", cid); + dev_err(adev->dev, "\t MORE_FAULTS: 0x%lx\n", + REG_GET_FIELD(status, + MMVM_L2_PROTECTION_FAULT_STATUS, MORE_FAULTS)); + dev_err(adev->dev, "\t WALKER_ERROR: 0x%lx\n", + REG_GET_FIELD(status, + MMVM_L2_PROTECTION_FAULT_STATUS, WALKER_ERROR)); + dev_err(adev->dev, "\t PERMISSION_FAULTS: 0x%lx\n", + REG_GET_FIELD(status, + MMVM_L2_PROTECTION_FAULT_STATUS, PERMISSION_FAULTS)); + dev_err(adev->dev, "\t MAPPING_ERROR: 0x%lx\n", + REG_GET_FIELD(status, + MMVM_L2_PROTECTION_FAULT_STATUS, MAPPING_ERROR)); + dev_err(adev->dev, "\t RW: 0x%x\n", rw); +} + +static void mmhub_v2_3_setup_vm_pt_regs(struct amdgpu_device *adev, + uint32_t vmid, + uint64_t page_table_base) +{ + struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB_0]; + + WREG32_SOC15_OFFSET(MMHUB, 0, mmMMVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32, + hub->ctx_addr_distance * vmid, lower_32_bits(page_table_base)); + + WREG32_SOC15_OFFSET(MMHUB, 0, mmMMVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_HI32, + hub->ctx_addr_distance * vmid, upper_32_bits(page_table_base)); +} + +static void mmhub_v2_3_init_gart_aperture_regs(struct amdgpu_device *adev) +{ + uint64_t pt_base = amdgpu_gmc_pd_addr(adev->gart.bo); + + mmhub_v2_3_setup_vm_pt_regs(adev, 0, pt_base); + + WREG32_SOC15(MMHUB, 0, mmMMVM_CONTEXT0_PAGE_TABLE_START_ADDR_LO32, + (u32)(adev->gmc.gart_start >> 12)); + WREG32_SOC15(MMHUB, 0, mmMMVM_CONTEXT0_PAGE_TABLE_START_ADDR_HI32, + (u32)(adev->gmc.gart_start >> 44)); + + WREG32_SOC15(MMHUB, 0, mmMMVM_CONTEXT0_PAGE_TABLE_END_ADDR_LO32, + (u32)(adev->gmc.gart_end >> 12)); + WREG32_SOC15(MMHUB, 0, mmMMVM_CONTEXT0_PAGE_TABLE_END_ADDR_HI32, + (u32)(adev->gmc.gart_end >> 44)); +} + +static void mmhub_v2_3_init_system_aperture_regs(struct amdgpu_device *adev) +{ + uint64_t value; + uint32_t tmp; + + /* Disable AGP. */ + WREG32_SOC15(MMHUB, 0, mmMMMC_VM_AGP_BASE, 0); + WREG32_SOC15(MMHUB, 0, mmMMMC_VM_AGP_BOT, adev->gmc.agp_start >> 24); + WREG32_SOC15(MMHUB, 0, mmMMMC_VM_AGP_TOP, adev->gmc.agp_end >> 24); + + /* Program the system aperture low logical page number. */ + WREG32_SOC15(MMHUB, 0, mmMMMC_VM_SYSTEM_APERTURE_LOW_ADDR, + min(adev->gmc.fb_start, adev->gmc.agp_start) >> 18); + WREG32_SOC15(MMHUB, 0, mmMMMC_VM_SYSTEM_APERTURE_HIGH_ADDR, + max(adev->gmc.fb_end, adev->gmc.agp_end) >> 18); + + /* Set default page address. */ + value = adev->vram_scratch.gpu_addr - adev->gmc.vram_start + + adev->vm_manager.vram_base_offset; + WREG32_SOC15(MMHUB, 0, mmMMMC_VM_SYSTEM_APERTURE_DEFAULT_ADDR_LSB, + (u32)(value >> 12)); + WREG32_SOC15(MMHUB, 0, mmMMMC_VM_SYSTEM_APERTURE_DEFAULT_ADDR_MSB, + (u32)(value >> 44)); + + /* Program "protection fault". */ + WREG32_SOC15(MMHUB, 0, mmMMVM_L2_PROTECTION_FAULT_DEFAULT_ADDR_LO32, + (u32)(adev->dummy_page_addr >> 12)); + WREG32_SOC15(MMHUB, 0, mmMMVM_L2_PROTECTION_FAULT_DEFAULT_ADDR_HI32, + (u32)((u64)adev->dummy_page_addr >> 44)); + + tmp = RREG32_SOC15(MMHUB, 0, mmMMVM_L2_PROTECTION_FAULT_CNTL2); + tmp = REG_SET_FIELD(tmp, MMVM_L2_PROTECTION_FAULT_CNTL2, + ACTIVE_PAGE_MIGRATION_PTE_READ_RETRY, 1); + WREG32_SOC15(MMHUB, 0, mmMMVM_L2_PROTECTION_FAULT_CNTL2, tmp); +} + +static void mmhub_v2_3_init_tlb_regs(struct amdgpu_device *adev) +{ + uint32_t tmp; + + /* Setup TLB control */ + tmp = RREG32_SOC15(MMHUB, 0, mmMMMC_VM_MX_L1_TLB_CNTL); + + tmp = REG_SET_FIELD(tmp, MMMC_VM_MX_L1_TLB_CNTL, ENABLE_L1_TLB, 1); + tmp = REG_SET_FIELD(tmp, MMMC_VM_MX_L1_TLB_CNTL, SYSTEM_ACCESS_MODE, 3); + tmp = REG_SET_FIELD(tmp, MMMC_VM_MX_L1_TLB_CNTL, + ENABLE_ADVANCED_DRIVER_MODEL, 1); + tmp = REG_SET_FIELD(tmp, MMMC_VM_MX_L1_TLB_CNTL, + SYSTEM_APERTURE_UNMAPPED_ACCESS, 0); + tmp = REG_SET_FIELD(tmp, MMMC_VM_MX_L1_TLB_CNTL, ECO_BITS, 0); + tmp = REG_SET_FIELD(tmp, MMMC_VM_MX_L1_TLB_CNTL, + MTYPE, MTYPE_UC); /* UC, uncached */ + + WREG32_SOC15(MMHUB, 0, mmMMMC_VM_MX_L1_TLB_CNTL, tmp); +} + +static void mmhub_v2_3_init_cache_regs(struct amdgpu_device *adev) +{ + uint32_t tmp; + + /* Setup L2 cache */ + tmp = RREG32_SOC15(MMHUB, 0, mmMMVM_L2_CNTL); + tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL, ENABLE_L2_CACHE, 1); + tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL, ENABLE_L2_FRAGMENT_PROCESSING, 0); + tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL, + ENABLE_DEFAULT_PAGE_OUT_TO_SYSTEM_MEMORY, 1); + /* XXX for emulation, Refer to closed source code.*/ + tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL, L2_PDE0_CACHE_TAG_GENERATION_MODE, + 0); + tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL, PDE_FAULT_CLASSIFICATION, 0); + tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL, CONTEXT1_IDENTITY_ACCESS_MODE, 1); + tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL, IDENTITY_MODE_FRAGMENT_SIZE, 0); + WREG32_SOC15(MMHUB, 0, mmMMVM_L2_CNTL, tmp); + + tmp = RREG32_SOC15(MMHUB, 0, mmMMVM_L2_CNTL2); + tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL2, INVALIDATE_ALL_L1_TLBS, 1); + tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL2, INVALIDATE_L2_CACHE, 1); + WREG32_SOC15(MMHUB, 0, mmMMVM_L2_CNTL2, tmp); + + tmp = mmMMVM_L2_CNTL3_DEFAULT; + if (adev->gmc.translate_further) { + tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL3, BANK_SELECT, 12); + tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL3, + L2_CACHE_BIGK_FRAGMENT_SIZE, 9); + } else { + tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL3, BANK_SELECT, 9); + tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL3, + L2_CACHE_BIGK_FRAGMENT_SIZE, 6); + } + WREG32_SOC15(MMHUB, 0, mmMMVM_L2_CNTL3, tmp); + + tmp = mmMMVM_L2_CNTL4_DEFAULT; + tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL4, VMC_TAP_PDE_REQUEST_PHYSICAL, 0); + tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL4, VMC_TAP_PTE_REQUEST_PHYSICAL, 0); + WREG32_SOC15(MMHUB, 0, mmMMVM_L2_CNTL4, tmp); + + tmp = mmMMVM_L2_CNTL5_DEFAULT; + tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL5, L2_CACHE_SMALLK_FRAGMENT_SIZE, 0); + WREG32_SOC15(GC, 0, mmMMVM_L2_CNTL5, tmp); +} + +static void mmhub_v2_3_enable_system_domain(struct amdgpu_device *adev) +{ + uint32_t tmp; + + tmp = RREG32_SOC15(MMHUB, 0, mmMMVM_CONTEXT0_CNTL); + tmp = REG_SET_FIELD(tmp, MMVM_CONTEXT0_CNTL, ENABLE_CONTEXT, 1); + tmp = REG_SET_FIELD(tmp, MMVM_CONTEXT0_CNTL, PAGE_TABLE_DEPTH, 0); + tmp = REG_SET_FIELD(tmp, MMVM_CONTEXT0_CNTL, + RETRY_PERMISSION_OR_INVALID_PAGE_FAULT, 0); + WREG32_SOC15(MMHUB, 0, mmMMVM_CONTEXT0_CNTL, tmp); +} + +static void mmhub_v2_3_disable_identity_aperture(struct amdgpu_device *adev) +{ + WREG32_SOC15(MMHUB, 0, + mmMMVM_L2_CONTEXT1_IDENTITY_APERTURE_LOW_ADDR_LO32, + 0xFFFFFFFF); + WREG32_SOC15(MMHUB, 0, + mmMMVM_L2_CONTEXT1_IDENTITY_APERTURE_LOW_ADDR_HI32, + 0x0000000F); + + WREG32_SOC15(MMHUB, 0, + mmMMVM_L2_CONTEXT1_IDENTITY_APERTURE_HIGH_ADDR_LO32, 0); + WREG32_SOC15(MMHUB, 0, + mmMMVM_L2_CONTEXT1_IDENTITY_APERTURE_HIGH_ADDR_HI32, 0); + + WREG32_SOC15(MMHUB, 0, mmMMVM_L2_CONTEXT_IDENTITY_PHYSICAL_OFFSET_LO32, + 0); + WREG32_SOC15(MMHUB, 0, mmMMVM_L2_CONTEXT_IDENTITY_PHYSICAL_OFFSET_HI32, + 0); +} + +static void mmhub_v2_3_setup_vmid_config(struct amdgpu_device *adev) +{ + struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB_0]; + int i; + uint32_t tmp; + + for (i = 0; i <= 14; i++) { + tmp = RREG32_SOC15_OFFSET(MMHUB, 0, mmMMVM_CONTEXT1_CNTL, i); + tmp = REG_SET_FIELD(tmp, MMVM_CONTEXT1_CNTL, ENABLE_CONTEXT, 1); + tmp = REG_SET_FIELD(tmp, MMVM_CONTEXT1_CNTL, PAGE_TABLE_DEPTH, + adev->vm_manager.num_level); + tmp = REG_SET_FIELD(tmp, MMVM_CONTEXT1_CNTL, + RANGE_PROTECTION_FAULT_ENABLE_DEFAULT, 1); + tmp = REG_SET_FIELD(tmp, MMVM_CONTEXT1_CNTL, + DUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT, + 1); + tmp = REG_SET_FIELD(tmp, MMVM_CONTEXT1_CNTL, + PDE0_PROTECTION_FAULT_ENABLE_DEFAULT, 1); + tmp = REG_SET_FIELD(tmp, MMVM_CONTEXT1_CNTL, + VALID_PROTECTION_FAULT_ENABLE_DEFAULT, 1); + tmp = REG_SET_FIELD(tmp, MMVM_CONTEXT1_CNTL, + READ_PROTECTION_FAULT_ENABLE_DEFAULT, 1); + tmp = REG_SET_FIELD(tmp, MMVM_CONTEXT1_CNTL, + WRITE_PROTECTION_FAULT_ENABLE_DEFAULT, 1); + tmp = REG_SET_FIELD(tmp, MMVM_CONTEXT1_CNTL, + EXECUTE_PROTECTION_FAULT_ENABLE_DEFAULT, 1); + tmp = REG_SET_FIELD(tmp, MMVM_CONTEXT1_CNTL, + PAGE_TABLE_BLOCK_SIZE, + adev->vm_manager.block_size - 9); + /* Send no-retry XNACK on fault to suppress VM fault storm. */ + tmp = REG_SET_FIELD(tmp, MMVM_CONTEXT1_CNTL, + RETRY_PERMISSION_OR_INVALID_PAGE_FAULT, + !amdgpu_noretry); + WREG32_SOC15_OFFSET(MMHUB, 0, mmMMVM_CONTEXT1_CNTL, + i * hub->ctx_distance, tmp); + WREG32_SOC15_OFFSET(MMHUB, 0, mmMMVM_CONTEXT1_PAGE_TABLE_START_ADDR_LO32, + i * hub->ctx_addr_distance, 0); + WREG32_SOC15_OFFSET(MMHUB, 0, mmMMVM_CONTEXT1_PAGE_TABLE_START_ADDR_HI32, + i * hub->ctx_addr_distance, 0); + WREG32_SOC15_OFFSET(MMHUB, 0, mmMMVM_CONTEXT1_PAGE_TABLE_END_ADDR_LO32, + i * hub->ctx_addr_distance, + lower_32_bits(adev->vm_manager.max_pfn - 1)); + WREG32_SOC15_OFFSET(MMHUB, 0, mmMMVM_CONTEXT1_PAGE_TABLE_END_ADDR_HI32, + i * hub->ctx_addr_distance, + upper_32_bits(adev->vm_manager.max_pfn - 1)); + } +} + +static void mmhub_v2_3_program_invalidation(struct amdgpu_device *adev) +{ + struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB_0]; + unsigned i; + + for (i = 0; i < 18; ++i) { + WREG32_SOC15_OFFSET(MMHUB, 0, + mmMMVM_INVALIDATE_ENG0_ADDR_RANGE_LO32, + i * hub->eng_addr_distance, 0xffffffff); + WREG32_SOC15_OFFSET(MMHUB, 0, + mmMMVM_INVALIDATE_ENG0_ADDR_RANGE_HI32, + i * hub->eng_addr_distance, 0x1f); + } +} + +static int mmhub_v2_3_gart_enable(struct amdgpu_device *adev) +{ + if (amdgpu_sriov_vf(adev)) { + /* + * MMMC_VM_FB_LOCATION_BASE/TOP is NULL for VF, becuase they are + * VF copy registers so vbios post doesn't program them, for + * SRIOV driver need to program them + */ + WREG32_SOC15(MMHUB, 0, mmMMMC_VM_FB_LOCATION_BASE, + adev->gmc.vram_start >> 24); + WREG32_SOC15(MMHUB, 0, mmMMMC_VM_FB_LOCATION_TOP, + adev->gmc.vram_end >> 24); + } + + /* GART Enable. */ + mmhub_v2_3_init_gart_aperture_regs(adev); + mmhub_v2_3_init_system_aperture_regs(adev); + mmhub_v2_3_init_tlb_regs(adev); + mmhub_v2_3_init_cache_regs(adev); + + mmhub_v2_3_enable_system_domain(adev); + mmhub_v2_3_disable_identity_aperture(adev); + mmhub_v2_3_setup_vmid_config(adev); + mmhub_v2_3_program_invalidation(adev); + + return 0; +} + +static void mmhub_v2_3_gart_disable(struct amdgpu_device *adev) +{ + struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB_0]; + u32 tmp; + u32 i; + + /* Disable all tables */ + for (i = 0; i < AMDGPU_NUM_VMID; i++) + WREG32_SOC15_OFFSET(MMHUB, 0, mmMMVM_CONTEXT0_CNTL, + i * hub->ctx_distance, 0); + + /* Setup TLB control */ + tmp = RREG32_SOC15(MMHUB, 0, mmMMMC_VM_MX_L1_TLB_CNTL); + tmp = REG_SET_FIELD(tmp, MMMC_VM_MX_L1_TLB_CNTL, ENABLE_L1_TLB, 0); + tmp = REG_SET_FIELD(tmp, MMMC_VM_MX_L1_TLB_CNTL, + ENABLE_ADVANCED_DRIVER_MODEL, 0); + WREG32_SOC15(MMHUB, 0, mmMMMC_VM_MX_L1_TLB_CNTL, tmp); + + /* Setup L2 cache */ + tmp = RREG32_SOC15(MMHUB, 0, mmMMVM_L2_CNTL); + tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL, ENABLE_L2_CACHE, 0); + WREG32_SOC15(MMHUB, 0, mmMMVM_L2_CNTL, tmp); + WREG32_SOC15(MMHUB, 0, mmMMVM_L2_CNTL3, 0); +} + +/** + * mmhub_v2_3_set_fault_enable_default - update GART/VM fault handling + * + * @adev: amdgpu_device pointer + * @value: true redirects VM faults to the default page + */ +static void mmhub_v2_3_set_fault_enable_default(struct amdgpu_device *adev, + bool value) +{ + u32 tmp; + tmp = RREG32_SOC15(MMHUB, 0, mmMMVM_L2_PROTECTION_FAULT_CNTL); + tmp = REG_SET_FIELD(tmp, MMVM_L2_PROTECTION_FAULT_CNTL, + RANGE_PROTECTION_FAULT_ENABLE_DEFAULT, value); + tmp = REG_SET_FIELD(tmp, MMVM_L2_PROTECTION_FAULT_CNTL, + PDE0_PROTECTION_FAULT_ENABLE_DEFAULT, value); + tmp = REG_SET_FIELD(tmp, MMVM_L2_PROTECTION_FAULT_CNTL, + PDE1_PROTECTION_FAULT_ENABLE_DEFAULT, value); + tmp = REG_SET_FIELD(tmp, MMVM_L2_PROTECTION_FAULT_CNTL, + PDE2_PROTECTION_FAULT_ENABLE_DEFAULT, value); + tmp = REG_SET_FIELD(tmp, MMVM_L2_PROTECTION_FAULT_CNTL, + TRANSLATE_FURTHER_PROTECTION_FAULT_ENABLE_DEFAULT, + value); + tmp = REG_SET_FIELD(tmp, MMVM_L2_PROTECTION_FAULT_CNTL, + NACK_PROTECTION_FAULT_ENABLE_DEFAULT, value); + tmp = REG_SET_FIELD(tmp, MMVM_L2_PROTECTION_FAULT_CNTL, + DUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT, value); + tmp = REG_SET_FIELD(tmp, MMVM_L2_PROTECTION_FAULT_CNTL, + VALID_PROTECTION_FAULT_ENABLE_DEFAULT, value); + tmp = REG_SET_FIELD(tmp, MMVM_L2_PROTECTION_FAULT_CNTL, + READ_PROTECTION_FAULT_ENABLE_DEFAULT, value); + tmp = REG_SET_FIELD(tmp, MMVM_L2_PROTECTION_FAULT_CNTL, + WRITE_PROTECTION_FAULT_ENABLE_DEFAULT, value); + tmp = REG_SET_FIELD(tmp, MMVM_L2_PROTECTION_FAULT_CNTL, + EXECUTE_PROTECTION_FAULT_ENABLE_DEFAULT, value); + if (!value) { + tmp = REG_SET_FIELD(tmp, MMVM_L2_PROTECTION_FAULT_CNTL, + CRASH_ON_NO_RETRY_FAULT, 1); + tmp = REG_SET_FIELD(tmp, MMVM_L2_PROTECTION_FAULT_CNTL, + CRASH_ON_RETRY_FAULT, 1); + } + WREG32_SOC15(MMHUB, 0, mmMMVM_L2_PROTECTION_FAULT_CNTL, tmp); +} + +static const struct amdgpu_vmhub_funcs mmhub_v2_3_vmhub_funcs = { + .print_l2_protection_fault_status = mmhub_v2_3_print_l2_protection_fault_status, + .get_invalidate_req = mmhub_v2_3_get_invalidate_req, +}; + +static void mmhub_v2_3_init(struct amdgpu_device *adev) +{ + struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB_0]; + + hub->ctx0_ptb_addr_lo32 = + SOC15_REG_OFFSET(MMHUB, 0, + mmMMVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32); + hub->ctx0_ptb_addr_hi32 = + SOC15_REG_OFFSET(MMHUB, 0, + mmMMVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_HI32); + hub->vm_inv_eng0_sem = + SOC15_REG_OFFSET(MMHUB, 0, + mmMMVM_INVALIDATE_ENG0_SEM); + hub->vm_inv_eng0_req = + SOC15_REG_OFFSET(MMHUB, 0, mmMMVM_INVALIDATE_ENG0_REQ); + hub->vm_inv_eng0_ack = + SOC15_REG_OFFSET(MMHUB, 0, mmMMVM_INVALIDATE_ENG0_ACK); + hub->vm_context0_cntl = + SOC15_REG_OFFSET(MMHUB, 0, mmMMVM_CONTEXT0_CNTL); + hub->vm_l2_pro_fault_status = + SOC15_REG_OFFSET(MMHUB, 0, mmMMVM_L2_PROTECTION_FAULT_STATUS); + hub->vm_l2_pro_fault_cntl = + SOC15_REG_OFFSET(MMHUB, 0, mmMMVM_L2_PROTECTION_FAULT_CNTL); + + hub->ctx_distance = mmMMVM_CONTEXT1_CNTL - mmMMVM_CONTEXT0_CNTL; + hub->ctx_addr_distance = mmMMVM_CONTEXT1_PAGE_TABLE_BASE_ADDR_LO32 - + mmMMVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32; + hub->eng_distance = mmMMVM_INVALIDATE_ENG1_REQ - + mmMMVM_INVALIDATE_ENG0_REQ; + hub->eng_addr_distance = mmMMVM_INVALIDATE_ENG1_ADDR_RANGE_LO32 - + mmMMVM_INVALIDATE_ENG0_ADDR_RANGE_LO32; + + hub->vm_cntx_cntl_vm_fault = MMVM_CONTEXT1_CNTL__RANGE_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK | + MMVM_CONTEXT1_CNTL__DUMMY_PAGE_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK | + MMVM_CONTEXT1_CNTL__PDE0_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK | + MMVM_CONTEXT1_CNTL__VALID_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK | + MMVM_CONTEXT1_CNTL__READ_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK | + MMVM_CONTEXT1_CNTL__WRITE_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK | + MMVM_CONTEXT1_CNTL__EXECUTE_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK; + + hub->vmhub_funcs = &mmhub_v2_3_vmhub_funcs; +} + +static void +mmhub_v2_3_update_medium_grain_clock_gating(struct amdgpu_device *adev, + bool enable) +{ + uint32_t def, data, def1, data1; + + def = data = RREG32_SOC15(MMHUB, 0, mmMM_ATC_L2_MISC_CG); + def1 = data1 = RREG32_SOC15(MMHUB, 0, mmDAGB0_CNTL_MISC2); + + if (enable && (adev->cg_flags & AMD_CG_SUPPORT_MC_MGCG)) { + data |= MM_ATC_L2_MISC_CG__ENABLE_MASK; + + data1 &= ~(DAGB0_CNTL_MISC2__DISABLE_WRREQ_CG_MASK | + DAGB0_CNTL_MISC2__DISABLE_WRRET_CG_MASK | + DAGB0_CNTL_MISC2__DISABLE_RDREQ_CG_MASK | + DAGB0_CNTL_MISC2__DISABLE_RDRET_CG_MASK | + DAGB0_CNTL_MISC2__DISABLE_TLBWR_CG_MASK | + DAGB0_CNTL_MISC2__DISABLE_TLBRD_CG_MASK); + + } else { + data &= ~MM_ATC_L2_MISC_CG__ENABLE_MASK; + + data1 |= (DAGB0_CNTL_MISC2__DISABLE_WRREQ_CG_MASK | + DAGB0_CNTL_MISC2__DISABLE_WRRET_CG_MASK | + DAGB0_CNTL_MISC2__DISABLE_RDREQ_CG_MASK | + DAGB0_CNTL_MISC2__DISABLE_RDRET_CG_MASK | + DAGB0_CNTL_MISC2__DISABLE_TLBWR_CG_MASK | + DAGB0_CNTL_MISC2__DISABLE_TLBRD_CG_MASK); + } + + if (def != data) + WREG32_SOC15(MMHUB, 0, mmMM_ATC_L2_MISC_CG, data); + if (def1 != data1) + WREG32_SOC15(MMHUB, 0, mmDAGB0_CNTL_MISC2, data1); +} + +static void +mmhub_v2_3_update_medium_grain_light_sleep(struct amdgpu_device *adev, + bool enable) +{ + uint32_t def, data; + + def = data = RREG32_SOC15(MMHUB, 0, mmMM_ATC_L2_MISC_CG); + + if (enable && (adev->cg_flags & AMD_CG_SUPPORT_MC_LS)) + data |= MM_ATC_L2_MISC_CG__MEM_LS_ENABLE_MASK; + else + data &= ~MM_ATC_L2_MISC_CG__MEM_LS_ENABLE_MASK; + + if (def != data) + WREG32_SOC15(MMHUB, 0, mmMM_ATC_L2_MISC_CG, data); +} + +static int mmhub_v2_3_set_clockgating(struct amdgpu_device *adev, + enum amd_clockgating_state state) +{ + if (amdgpu_sriov_vf(adev)) + return 0; + + mmhub_v2_3_update_medium_grain_clock_gating(adev, + state == AMD_CG_STATE_GATE ? true : false); + mmhub_v2_3_update_medium_grain_light_sleep(adev, + state == AMD_CG_STATE_GATE ? true : false); + + return 0; +} + +static void mmhub_v2_3_get_clockgating(struct amdgpu_device *adev, u32 *flags) +{ + int data, data1; + + if (amdgpu_sriov_vf(adev)) + *flags = 0; + + data = RREG32_SOC15(MMHUB, 0, mmMM_ATC_L2_MISC_CG); + data1 = RREG32_SOC15(MMHUB, 0, mmDAGB0_CNTL_MISC2); + + /* AMD_CG_SUPPORT_MC_MGCG */ + if ((data & MM_ATC_L2_MISC_CG__ENABLE_MASK) && + !(data1 & (DAGB0_CNTL_MISC2__DISABLE_WRREQ_CG_MASK | + DAGB0_CNTL_MISC2__DISABLE_WRRET_CG_MASK | + DAGB0_CNTL_MISC2__DISABLE_RDREQ_CG_MASK | + DAGB0_CNTL_MISC2__DISABLE_RDRET_CG_MASK | + DAGB0_CNTL_MISC2__DISABLE_TLBWR_CG_MASK | + DAGB0_CNTL_MISC2__DISABLE_TLBRD_CG_MASK))) + *flags |= AMD_CG_SUPPORT_MC_MGCG; + + /* AMD_CG_SUPPORT_MC_LS */ + if (data & MM_ATC_L2_MISC_CG__MEM_LS_ENABLE_MASK) + *flags |= AMD_CG_SUPPORT_MC_LS; +} + +const struct amdgpu_mmhub_funcs mmhub_v2_3_funcs = { + .ras_late_init = amdgpu_mmhub_ras_late_init, + .init = mmhub_v2_3_init, + .gart_enable = mmhub_v2_3_gart_enable, + .set_fault_enable_default = mmhub_v2_3_set_fault_enable_default, + .gart_disable = mmhub_v2_3_gart_disable, + .set_clockgating = mmhub_v2_3_set_clockgating, + .get_clockgating = mmhub_v2_3_get_clockgating, + .setup_vm_pt_regs = mmhub_v2_3_setup_vm_pt_regs, +}; diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v2_3.h b/drivers/gpu/drm/amd/amdgpu/mmhub_v2_3.h new file mode 100644 index 000000000000..2926d21dea8b --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v2_3.h @@ -0,0 +1,28 @@ +/* + * Copyright 2019 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ +#ifndef __MMHUB_V2_3_H__ +#define __MMHUB_V2_3_H__ + +extern const struct amdgpu_mmhub_funcs mmhub_v2_3_funcs; + +#endif diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v9_4.c b/drivers/gpu/drm/amd/amdgpu/mmhub_v9_4.c index 66748bb01b52..4a31737b6bb0 100644 --- a/drivers/gpu/drm/amd/amdgpu/mmhub_v9_4.c +++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v9_4.c @@ -405,7 +405,7 @@ static void mmhub_v9_4_gart_disable(struct amdgpu_device *adev) for (j = 0; j < MMHUB_NUM_INSTANCES; j++) { /* Disable all tables */ - for (i = 0; i < 16; i++) + for (i = 0; i < AMDGPU_NUM_VMID; i++) WREG32_SOC15_OFFSET(MMHUB, 0, mmVML2VC0_VM_CONTEXT0_CNTL, j * MMHUB_INSTANCE_REGISTER_OFFSET + diff --git a/drivers/gpu/drm/amd/amdgpu/navi10_ih.c b/drivers/gpu/drm/amd/amdgpu/navi10_ih.c index 74b1e7dc49a9..7ba229e43799 100644 --- a/drivers/gpu/drm/amd/amdgpu/navi10_ih.c +++ b/drivers/gpu/drm/amd/amdgpu/navi10_ih.c @@ -136,6 +136,9 @@ static void navi10_ih_enable_interrupts(struct amdgpu_device *adev) } adev->irq.ih2.enabled = true; } + + if (adev->irq.ih_soft.ring_size) + adev->irq.ih_soft.enabled = true; } /** @@ -314,6 +317,8 @@ static int navi10_ih_irq_init(struct amdgpu_device *adev) switch (adev->asic_type) { case CHIP_SIENNA_CICHLID: case CHIP_NAVY_FLOUNDER: + case CHIP_VANGOGH: + case CHIP_DIMGREY_CAVEFISH: ih_chicken = RREG32_SOC15(OSSSYS, 0, mmIH_CHICKEN_Sienna_Cichlid); ih_chicken = REG_SET_FIELD(ih_chicken, IH_CHICKEN, MC_SPACE_GPA_ENABLE, 1); @@ -440,6 +445,7 @@ static void navi10_ih_irq_disable(struct amdgpu_device *adev) * navi10_ih_get_wptr - get the IH ring buffer wptr * * @adev: amdgpu_device pointer + * @ih: IH ring buffer to fetch wptr * * Get the IH ring buffer wptr from either the register * or the writeback memory buffer (NAVI10). Also check for @@ -500,6 +506,8 @@ out: * navi10_ih_decode_iv - decode an interrupt vector * * @adev: amdgpu_device pointer + * @ih: IH ring buffer to decode + * @entry: IV entry to place decoded information into * * Decodes the interrupt vector at the current rptr * position and also advance the position. @@ -543,6 +551,7 @@ static void navi10_ih_decode_iv(struct amdgpu_device *adev, * navi10_ih_irq_rearm - rearm IRQ if lost * * @adev: amdgpu_device pointer + * @ih: IH ring to match * */ static void navi10_ih_irq_rearm(struct amdgpu_device *adev, @@ -576,6 +585,7 @@ static void navi10_ih_irq_rearm(struct amdgpu_device *adev, * * @adev: amdgpu_device pointer * + * @ih: IH ring buffer to set rptr * Set the IH ring buffer rptr. */ static void navi10_ih_set_rptr(struct amdgpu_device *adev, @@ -660,8 +670,11 @@ static int navi10_ih_sw_init(void *handle) /* use gpu virtual address for ih ring * until ih_checken is programmed to allow * use bus address for ih ring by psp bl */ - use_bus_addr = - (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) ? false : true; + if ((adev->flags & AMD_IS_APU) || + (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP)) + use_bus_addr = false; + else + use_bus_addr = true; r = amdgpu_ih_ring_init(adev, &adev->irq.ih, 256 * 1024, use_bus_addr); if (r) return r; @@ -690,6 +703,10 @@ static int navi10_ih_sw_init(void *handle) (adev->doorbell_index.ih + 2) << 1; } + r = amdgpu_ih_ring_init(adev, &adev->irq.ih_soft, PAGE_SIZE, true); + if (r) + return r; + r = amdgpu_irq_init(adev); return r; diff --git a/drivers/gpu/drm/amd/amdgpu/nbio_v2_3.c b/drivers/gpu/drm/amd/amdgpu/nbio_v2_3.c index 7429f30398b9..b5c3db16c2b0 100644 --- a/drivers/gpu/drm/amd/amdgpu/nbio_v2_3.c +++ b/drivers/gpu/drm/amd/amdgpu/nbio_v2_3.c @@ -28,10 +28,12 @@ #include "nbio/nbio_2_3_offset.h" #include "nbio/nbio_2_3_sh_mask.h" #include <uapi/linux/kfd_ioctl.h> +#include <linux/pci.h> #define smnPCIE_CONFIG_CNTL 0x11180044 #define smnCPM_CONTROL 0x11180460 #define smnPCIE_CNTL2 0x11180070 +#define smnPCIE_LC_CNTL 0x11140280 #define mmBIF_SDMA2_DOORBELL_RANGE 0x01d6 #define mmBIF_SDMA2_DOORBELL_RANGE_BASE_IDX 2 @@ -51,8 +53,17 @@ static void nbio_v2_3_remap_hdp_registers(struct amdgpu_device *adev) static u32 nbio_v2_3_get_rev_id(struct amdgpu_device *adev) { - u32 tmp = RREG32_SOC15(NBIO, 0, mmRCC_DEV0_EPF0_STRAP0); + u32 tmp; + /* + * guest vm gets 0xffffffff when reading RCC_DEV0_EPF0_STRAP0, + * therefore we force rev_id to 0 (which is the default value) + */ + if (amdgpu_sriov_vf(adev)) { + return 0; + } + + tmp = RREG32_SOC15(NBIO, 0, mmRCC_DEV0_EPF0_STRAP0); tmp &= RCC_DEV0_EPF0_STRAP0__STRAP_ATI_REV_ID_DEV0_F0_MASK; tmp >>= RCC_DEV0_EPF0_STRAP0__STRAP_ATI_REV_ID_DEV0_F0__SHIFT; @@ -312,6 +323,42 @@ static void nbio_v2_3_init_registers(struct amdgpu_device *adev) WREG32_PCIE(smnPCIE_CONFIG_CNTL, data); } +#define NAVI10_PCIE__LC_L0S_INACTIVITY_DEFAULT 0x00000000 // off by default, no gains over L1 +#define NAVI10_PCIE__LC_L1_INACTIVITY_DEFAULT 0x00000009 // 1=1us, 9=1ms +#define NAVI10_PCIE__LC_L1_INACTIVITY_TBT_DEFAULT 0x0000000E // 4ms + +static void nbio_v2_3_enable_aspm(struct amdgpu_device *adev, + bool enable) +{ + uint32_t def, data; + + def = data = RREG32_PCIE(smnPCIE_LC_CNTL); + + if (enable) { + /* Disable ASPM L0s/L1 first */ + data &= ~(PCIE_LC_CNTL__LC_L0S_INACTIVITY_MASK | PCIE_LC_CNTL__LC_L1_INACTIVITY_MASK); + + data |= NAVI10_PCIE__LC_L0S_INACTIVITY_DEFAULT << PCIE_LC_CNTL__LC_L0S_INACTIVITY__SHIFT; + + if (pci_is_thunderbolt_attached(adev->pdev)) + data |= NAVI10_PCIE__LC_L1_INACTIVITY_TBT_DEFAULT << PCIE_LC_CNTL__LC_L1_INACTIVITY__SHIFT; + else + data |= NAVI10_PCIE__LC_L1_INACTIVITY_DEFAULT << PCIE_LC_CNTL__LC_L1_INACTIVITY__SHIFT; + + data &= ~PCIE_LC_CNTL__LC_PMI_TO_L1_DIS_MASK; + } else { + /* Disbale ASPM L1 */ + data &= ~PCIE_LC_CNTL__LC_L1_INACTIVITY_MASK; + /* Disable ASPM TxL0s */ + data &= ~PCIE_LC_CNTL__LC_L0S_INACTIVITY_MASK; + /* Disable ACPI L1 */ + data |= PCIE_LC_CNTL__LC_PMI_TO_L1_DIS_MASK; + } + + if (def != data) + WREG32_PCIE(smnPCIE_LC_CNTL, data); +} + const struct amdgpu_nbio_funcs nbio_v2_3_funcs = { .get_hdp_flush_req_offset = nbio_v2_3_get_hdp_flush_req_offset, .get_hdp_flush_done_offset = nbio_v2_3_get_hdp_flush_done_offset, @@ -332,4 +379,5 @@ const struct amdgpu_nbio_funcs nbio_v2_3_funcs = { .ih_control = nbio_v2_3_ih_control, .init_registers = nbio_v2_3_init_registers, .remap_hdp_registers = nbio_v2_3_remap_hdp_registers, + .enable_aspm = nbio_v2_3_enable_aspm, }; diff --git a/drivers/gpu/drm/amd/amdgpu/nbio_v6_1.c b/drivers/gpu/drm/amd/amdgpu/nbio_v6_1.c index 7b2fb050407d..d2f1fe55d388 100644 --- a/drivers/gpu/drm/amd/amdgpu/nbio_v6_1.c +++ b/drivers/gpu/drm/amd/amdgpu/nbio_v6_1.c @@ -32,7 +32,7 @@ static u32 nbio_v6_1_get_rev_id(struct amdgpu_device *adev) { - u32 tmp = RREG32_SOC15(NBIO, 0, mmRCC_DEV0_EPF0_STRAP0); + u32 tmp = RREG32_SOC15(NBIO, 0, mmRCC_DEV0_EPF0_STRAP0); tmp &= RCC_DEV0_EPF0_STRAP0__STRAP_ATI_REV_ID_DEV0_F0_MASK; tmp >>= RCC_DEV0_EPF0_STRAP0__STRAP_ATI_REV_ID_DEV0_F0__SHIFT; @@ -114,7 +114,7 @@ static void nbio_v6_1_enable_doorbell_selfring_aperture(struct amdgpu_device *ad static void nbio_v6_1_ih_doorbell_range(struct amdgpu_device *adev, bool use_doorbell, int doorbell_index) { - u32 ih_doorbell_range = RREG32_SOC15(NBIO, 0 , mmBIF_IH_DOORBELL_RANGE); + u32 ih_doorbell_range = RREG32_SOC15(NBIO, 0, mmBIF_IH_DOORBELL_RANGE); if (use_doorbell) { ih_doorbell_range = REG_SET_FIELD(ih_doorbell_range, BIF_IH_DOORBELL_RANGE, OFFSET, doorbell_index); diff --git a/drivers/gpu/drm/amd/amdgpu/nbio_v7_0.c b/drivers/gpu/drm/amd/amdgpu/nbio_v7_0.c index d34628e113fc..ae685813c419 100644 --- a/drivers/gpu/drm/amd/amdgpu/nbio_v7_0.c +++ b/drivers/gpu/drm/amd/amdgpu/nbio_v7_0.c @@ -43,7 +43,7 @@ static void nbio_v7_0_remap_hdp_registers(struct amdgpu_device *adev) static u32 nbio_v7_0_get_rev_id(struct amdgpu_device *adev) { - u32 tmp = RREG32_SOC15(NBIO, 0, mmRCC_DEV0_EPF0_STRAP0); + u32 tmp = RREG32_SOC15(NBIO, 0, mmRCC_DEV0_EPF0_STRAP0); tmp &= RCC_DEV0_EPF0_STRAP0__STRAP_ATI_REV_ID_DEV0_F0_MASK; tmp >>= RCC_DEV0_EPF0_STRAP0__STRAP_ATI_REV_ID_DEV0_F0__SHIFT; @@ -126,7 +126,7 @@ static void nbio_v7_0_enable_doorbell_selfring_aperture(struct amdgpu_device *ad static void nbio_v7_0_ih_doorbell_range(struct amdgpu_device *adev, bool use_doorbell, int doorbell_index) { - u32 ih_doorbell_range = RREG32_SOC15(NBIO, 0 , mmBIF_IH_DOORBELL_RANGE); + u32 ih_doorbell_range = RREG32_SOC15(NBIO, 0, mmBIF_IH_DOORBELL_RANGE); if (use_doorbell) { ih_doorbell_range = REG_SET_FIELD(ih_doorbell_range, BIF_IH_DOORBELL_RANGE, OFFSET, doorbell_index); diff --git a/drivers/gpu/drm/amd/amdgpu/nbio_v7_2.c b/drivers/gpu/drm/amd/amdgpu/nbio_v7_2.c new file mode 100644 index 000000000000..aa36022670f9 --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/nbio_v7_2.c @@ -0,0 +1,341 @@ +/* + * Copyright 2020 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ +#include "amdgpu.h" +#include "amdgpu_atombios.h" +#include "nbio_v7_2.h" + +#include "nbio/nbio_7_2_0_offset.h" +#include "nbio/nbio_7_2_0_sh_mask.h" +#include <uapi/linux/kfd_ioctl.h> + +static void nbio_v7_2_remap_hdp_registers(struct amdgpu_device *adev) +{ + WREG32_SOC15(NBIO, 0, regBIF_BX0_REMAP_HDP_MEM_FLUSH_CNTL, + adev->rmmio_remap.reg_offset + KFD_MMIO_REMAP_HDP_MEM_FLUSH_CNTL); + WREG32_SOC15(NBIO, 0, regBIF_BX0_REMAP_HDP_REG_FLUSH_CNTL, + adev->rmmio_remap.reg_offset + KFD_MMIO_REMAP_HDP_REG_FLUSH_CNTL); +} + +static u32 nbio_v7_2_get_rev_id(struct amdgpu_device *adev) +{ + u32 tmp = RREG32_SOC15(NBIO, 0, regRCC_STRAP0_RCC_DEV0_EPF0_STRAP0); + + tmp &= RCC_STRAP0_RCC_DEV0_EPF0_STRAP0__STRAP_ATI_REV_ID_DEV0_F0_MASK; + tmp >>= RCC_STRAP0_RCC_DEV0_EPF0_STRAP0__STRAP_ATI_REV_ID_DEV0_F0__SHIFT; + + return tmp; +} + +static void nbio_v7_2_mc_access_enable(struct amdgpu_device *adev, bool enable) +{ + if (enable) + WREG32_SOC15(NBIO, 0, regBIF_BX0_BIF_FB_EN, + BIF_BX0_BIF_FB_EN__FB_READ_EN_MASK | + BIF_BX0_BIF_FB_EN__FB_WRITE_EN_MASK); + else + WREG32_SOC15(NBIO, 0, regBIF_BX0_BIF_FB_EN, 0); +} + +static void nbio_v7_2_hdp_flush(struct amdgpu_device *adev, + struct amdgpu_ring *ring) +{ + if (!ring || !ring->funcs->emit_wreg) + WREG32_NO_KIQ((adev->rmmio_remap.reg_offset + KFD_MMIO_REMAP_HDP_MEM_FLUSH_CNTL) >> 2, 0); + else + amdgpu_ring_emit_wreg(ring, (adev->rmmio_remap.reg_offset + KFD_MMIO_REMAP_HDP_MEM_FLUSH_CNTL) >> 2, 0); +} + +static u32 nbio_v7_2_get_memsize(struct amdgpu_device *adev) +{ + return RREG32_SOC15(NBIO, 0, regRCC_DEV0_EPF0_0_RCC_CONFIG_MEMSIZE); +} + +static void nbio_v7_2_sdma_doorbell_range(struct amdgpu_device *adev, int instance, + bool use_doorbell, int doorbell_index, + int doorbell_size) +{ + u32 reg = SOC15_REG_OFFSET(NBIO, 0, regGDC0_BIF_SDMA0_DOORBELL_RANGE); + u32 doorbell_range = RREG32_PCIE_PORT(reg); + + if (use_doorbell) { + doorbell_range = REG_SET_FIELD(doorbell_range, + GDC0_BIF_SDMA0_DOORBELL_RANGE, + OFFSET, doorbell_index); + doorbell_range = REG_SET_FIELD(doorbell_range, + GDC0_BIF_SDMA0_DOORBELL_RANGE, + SIZE, doorbell_size); + } else { + doorbell_range = REG_SET_FIELD(doorbell_range, + GDC0_BIF_SDMA0_DOORBELL_RANGE, + SIZE, 0); + } + + WREG32_PCIE_PORT(reg, doorbell_range); +} + +static void nbio_v7_2_vcn_doorbell_range(struct amdgpu_device *adev, bool use_doorbell, + int doorbell_index, int instance) +{ + u32 reg = SOC15_REG_OFFSET(NBIO, 0, regGDC0_BIF_VCN0_DOORBELL_RANGE); + u32 doorbell_range = RREG32_PCIE_PORT(reg); + + if (use_doorbell) { + doorbell_range = REG_SET_FIELD(doorbell_range, + GDC0_BIF_VCN0_DOORBELL_RANGE, OFFSET, + doorbell_index); + doorbell_range = REG_SET_FIELD(doorbell_range, + GDC0_BIF_VCN0_DOORBELL_RANGE, SIZE, 8); + } else { + doorbell_range = REG_SET_FIELD(doorbell_range, + GDC0_BIF_VCN0_DOORBELL_RANGE, SIZE, 0); + } + + WREG32_PCIE_PORT(reg, doorbell_range); +} + +static void nbio_v7_2_enable_doorbell_aperture(struct amdgpu_device *adev, + bool enable) +{ + u32 reg; + + reg = RREG32_SOC15(NBIO, 0, regRCC_DEV0_EPF0_0_RCC_DOORBELL_APER_EN); + reg = REG_SET_FIELD(reg, RCC_DEV0_EPF0_0_RCC_DOORBELL_APER_EN, + BIF_DOORBELL_APER_EN, enable ? 1 : 0); + + WREG32_SOC15(NBIO, 0, regRCC_DEV0_EPF0_0_RCC_DOORBELL_APER_EN, reg); +} + +static void nbio_v7_2_enable_doorbell_selfring_aperture(struct amdgpu_device *adev, + bool enable) +{ + u32 tmp = 0; + + if (enable) { + tmp = REG_SET_FIELD(tmp, BIF_BX_PF0_DOORBELL_SELFRING_GPA_APER_CNTL, + DOORBELL_SELFRING_GPA_APER_EN, 1) | + REG_SET_FIELD(tmp, BIF_BX_PF0_DOORBELL_SELFRING_GPA_APER_CNTL, + DOORBELL_SELFRING_GPA_APER_MODE, 1) | + REG_SET_FIELD(tmp, BIF_BX_PF0_DOORBELL_SELFRING_GPA_APER_CNTL, + DOORBELL_SELFRING_GPA_APER_SIZE, 0); + + WREG32_SOC15(NBIO, 0, + regBIF_BX_PF0_DOORBELL_SELFRING_GPA_APER_BASE_LOW, + lower_32_bits(adev->doorbell.base)); + WREG32_SOC15(NBIO, 0, + regBIF_BX_PF0_DOORBELL_SELFRING_GPA_APER_BASE_HIGH, + upper_32_bits(adev->doorbell.base)); + } + + WREG32_SOC15(NBIO, 0, regBIF_BX_PF0_DOORBELL_SELFRING_GPA_APER_CNTL, + tmp); +} + + +static void nbio_v7_2_ih_doorbell_range(struct amdgpu_device *adev, + bool use_doorbell, int doorbell_index) +{ + u32 ih_doorbell_range = RREG32_PCIE_PORT(SOC15_REG_OFFSET(NBIO, 0, regGDC0_BIF_IH_DOORBELL_RANGE)); + + if (use_doorbell) { + ih_doorbell_range = REG_SET_FIELD(ih_doorbell_range, + GDC0_BIF_IH_DOORBELL_RANGE, OFFSET, + doorbell_index); + ih_doorbell_range = REG_SET_FIELD(ih_doorbell_range, + GDC0_BIF_IH_DOORBELL_RANGE, SIZE, + 2); + } else { + ih_doorbell_range = REG_SET_FIELD(ih_doorbell_range, + GDC0_BIF_IH_DOORBELL_RANGE, SIZE, + 0); + } + + WREG32_PCIE_PORT(SOC15_REG_OFFSET(NBIO, 0, regGDC0_BIF_IH_DOORBELL_RANGE), + ih_doorbell_range); +} + +static void nbio_v7_2_ih_control(struct amdgpu_device *adev) +{ + u32 interrupt_cntl; + + /* setup interrupt control */ + WREG32_SOC15(NBIO, 0, regBIF_BX0_INTERRUPT_CNTL2, + adev->dummy_page_addr >> 8); + + interrupt_cntl = RREG32_SOC15(NBIO, 0, regBIF_BX0_INTERRUPT_CNTL); + /* + * INTERRUPT_CNTL__IH_DUMMY_RD_OVERRIDE_MASK=0 - dummy read disabled with msi, enabled without msi + * INTERRUPT_CNTL__IH_DUMMY_RD_OVERRIDE_MASK=1 - dummy read controlled by IH_DUMMY_RD_EN + */ + interrupt_cntl = REG_SET_FIELD(interrupt_cntl, BIF_BX0_INTERRUPT_CNTL, + IH_DUMMY_RD_OVERRIDE, 0); + + /* INTERRUPT_CNTL__IH_REQ_NONSNOOP_EN_MASK=1 if ring is in non-cacheable memory, e.g., vram */ + interrupt_cntl = REG_SET_FIELD(interrupt_cntl, BIF_BX0_INTERRUPT_CNTL, + IH_REQ_NONSNOOP_EN, 0); + + WREG32_SOC15(NBIO, 0, regBIF_BX0_INTERRUPT_CNTL, interrupt_cntl); +} + +static void nbio_v7_2_update_medium_grain_clock_gating(struct amdgpu_device *adev, + bool enable) +{ + uint32_t def, data; + + def = data = RREG32_PCIE_PORT(SOC15_REG_OFFSET(NBIO, 0, regCPM_CONTROL)); + if (enable && (adev->cg_flags & AMD_CG_SUPPORT_BIF_MGCG)) { + data |= (CPM_CONTROL__LCLK_DYN_GATE_ENABLE_MASK | + CPM_CONTROL__TXCLK_DYN_GATE_ENABLE_MASK | + CPM_CONTROL__TXCLK_LCNT_GATE_ENABLE_MASK | + CPM_CONTROL__TXCLK_REGS_GATE_ENABLE_MASK | + CPM_CONTROL__TXCLK_PRBS_GATE_ENABLE_MASK | + CPM_CONTROL__REFCLK_REGS_GATE_ENABLE_MASK); + } else { + data &= ~(CPM_CONTROL__LCLK_DYN_GATE_ENABLE_MASK | + CPM_CONTROL__TXCLK_DYN_GATE_ENABLE_MASK | + CPM_CONTROL__TXCLK_LCNT_GATE_ENABLE_MASK | + CPM_CONTROL__TXCLK_REGS_GATE_ENABLE_MASK | + CPM_CONTROL__TXCLK_PRBS_GATE_ENABLE_MASK | + CPM_CONTROL__REFCLK_REGS_GATE_ENABLE_MASK); + } + + if (def != data) + WREG32_PCIE_PORT(SOC15_REG_OFFSET(NBIO, 0, regCPM_CONTROL), data); +} + +static void nbio_v7_2_update_medium_grain_light_sleep(struct amdgpu_device *adev, + bool enable) +{ + uint32_t def, data; + + def = data = RREG32_PCIE_PORT(SOC15_REG_OFFSET(NBIO, 0, regPCIE_CNTL2)); + if (enable && (adev->cg_flags & AMD_CG_SUPPORT_BIF_LS)) { + data |= (PCIE_CNTL2__SLV_MEM_LS_EN_MASK | + PCIE_CNTL2__MST_MEM_LS_EN_MASK | + PCIE_CNTL2__REPLAY_MEM_LS_EN_MASK); + } else { + data &= ~(PCIE_CNTL2__SLV_MEM_LS_EN_MASK | + PCIE_CNTL2__MST_MEM_LS_EN_MASK | + PCIE_CNTL2__REPLAY_MEM_LS_EN_MASK); + } + + if (def != data) + WREG32_PCIE_PORT(SOC15_REG_OFFSET(NBIO, 0, regPCIE_CNTL2), data); +} + +static void nbio_v7_2_get_clockgating_state(struct amdgpu_device *adev, + u32 *flags) +{ + int data; + + /* AMD_CG_SUPPORT_BIF_MGCG */ + data = RREG32_PCIE_PORT(SOC15_REG_OFFSET(NBIO, 0, regCPM_CONTROL)); + if (data & CPM_CONTROL__LCLK_DYN_GATE_ENABLE_MASK) + *flags |= AMD_CG_SUPPORT_BIF_MGCG; + + /* AMD_CG_SUPPORT_BIF_LS */ + data = RREG32_PCIE_PORT(SOC15_REG_OFFSET(NBIO, 0, regPCIE_CNTL2)); + if (data & PCIE_CNTL2__SLV_MEM_LS_EN_MASK) + *flags |= AMD_CG_SUPPORT_BIF_LS; +} + +static u32 nbio_v7_2_get_hdp_flush_req_offset(struct amdgpu_device *adev) +{ + return SOC15_REG_OFFSET(NBIO, 0, regBIF_BX_PF0_GPU_HDP_FLUSH_REQ); +} + +static u32 nbio_v7_2_get_hdp_flush_done_offset(struct amdgpu_device *adev) +{ + return SOC15_REG_OFFSET(NBIO, 0, regBIF_BX_PF0_GPU_HDP_FLUSH_DONE); +} + +static u32 nbio_v7_2_get_pcie_index_offset(struct amdgpu_device *adev) +{ + return SOC15_REG_OFFSET(NBIO, 0, regBIF_BX0_PCIE_INDEX2); +} + +static u32 nbio_v7_2_get_pcie_data_offset(struct amdgpu_device *adev) +{ + return SOC15_REG_OFFSET(NBIO, 0, regBIF_BX0_PCIE_DATA2); +} + +static u32 nbio_v7_2_get_pcie_port_index_offset(struct amdgpu_device *adev) +{ + return SOC15_REG_OFFSET(NBIO, 0, regBIF_BX_PF0_RSMU_INDEX); +} + +static u32 nbio_v7_2_get_pcie_port_data_offset(struct amdgpu_device *adev) +{ + return SOC15_REG_OFFSET(NBIO, 0, regBIF_BX_PF0_RSMU_DATA); +} + +const struct nbio_hdp_flush_reg nbio_v7_2_hdp_flush_reg = { + .ref_and_mask_cp0 = BIF_BX_PF0_GPU_HDP_FLUSH_DONE__CP0_MASK, + .ref_and_mask_cp1 = BIF_BX_PF0_GPU_HDP_FLUSH_DONE__CP1_MASK, + .ref_and_mask_cp2 = BIF_BX_PF0_GPU_HDP_FLUSH_DONE__CP2_MASK, + .ref_and_mask_cp3 = BIF_BX_PF0_GPU_HDP_FLUSH_DONE__CP3_MASK, + .ref_and_mask_cp4 = BIF_BX_PF0_GPU_HDP_FLUSH_DONE__CP4_MASK, + .ref_and_mask_cp5 = BIF_BX_PF0_GPU_HDP_FLUSH_DONE__CP5_MASK, + .ref_and_mask_cp6 = BIF_BX_PF0_GPU_HDP_FLUSH_DONE__CP6_MASK, + .ref_and_mask_cp7 = BIF_BX_PF0_GPU_HDP_FLUSH_DONE__CP7_MASK, + .ref_and_mask_cp8 = BIF_BX_PF0_GPU_HDP_FLUSH_DONE__CP8_MASK, + .ref_and_mask_cp9 = BIF_BX_PF0_GPU_HDP_FLUSH_DONE__CP9_MASK, + .ref_and_mask_sdma0 = BIF_BX_PF0_GPU_HDP_FLUSH_DONE__SDMA0_MASK, + .ref_and_mask_sdma1 = BIF_BX_PF0_GPU_HDP_FLUSH_DONE__SDMA1_MASK, +}; + +static void nbio_v7_2_init_registers(struct amdgpu_device *adev) +{ + uint32_t def, data; + + def = data = RREG32_PCIE_PORT(SOC15_REG_OFFSET(NBIO, 0, regPCIE_CONFIG_CNTL)); + data = REG_SET_FIELD(data, PCIE_CONFIG_CNTL, CI_SWUS_MAX_READ_REQUEST_SIZE_MODE, 1); + data = REG_SET_FIELD(data, PCIE_CONFIG_CNTL, CI_SWUS_MAX_READ_REQUEST_SIZE_PRIV, 1); + + if (def != data) + WREG32_PCIE_PORT(SOC15_REG_OFFSET(NBIO, 0, regPCIE_CONFIG_CNTL), + data); +} + +const struct amdgpu_nbio_funcs nbio_v7_2_funcs = { + .get_hdp_flush_req_offset = nbio_v7_2_get_hdp_flush_req_offset, + .get_hdp_flush_done_offset = nbio_v7_2_get_hdp_flush_done_offset, + .get_pcie_index_offset = nbio_v7_2_get_pcie_index_offset, + .get_pcie_data_offset = nbio_v7_2_get_pcie_data_offset, + .get_pcie_port_index_offset = nbio_v7_2_get_pcie_port_index_offset, + .get_pcie_port_data_offset = nbio_v7_2_get_pcie_port_data_offset, + .get_rev_id = nbio_v7_2_get_rev_id, + .mc_access_enable = nbio_v7_2_mc_access_enable, + .hdp_flush = nbio_v7_2_hdp_flush, + .get_memsize = nbio_v7_2_get_memsize, + .sdma_doorbell_range = nbio_v7_2_sdma_doorbell_range, + .vcn_doorbell_range = nbio_v7_2_vcn_doorbell_range, + .enable_doorbell_aperture = nbio_v7_2_enable_doorbell_aperture, + .enable_doorbell_selfring_aperture = nbio_v7_2_enable_doorbell_selfring_aperture, + .ih_doorbell_range = nbio_v7_2_ih_doorbell_range, + .update_medium_grain_clock_gating = nbio_v7_2_update_medium_grain_clock_gating, + .update_medium_grain_light_sleep = nbio_v7_2_update_medium_grain_light_sleep, + .get_clockgating_state = nbio_v7_2_get_clockgating_state, + .ih_control = nbio_v7_2_ih_control, + .init_registers = nbio_v7_2_init_registers, + .remap_hdp_registers = nbio_v7_2_remap_hdp_registers, +}; diff --git a/drivers/gpu/drm/amd/amdgpu/nbio_v7_2.h b/drivers/gpu/drm/amd/amdgpu/nbio_v7_2.h new file mode 100644 index 000000000000..a8e8e65648a0 --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/nbio_v7_2.h @@ -0,0 +1,32 @@ +/* + * Copyright 2020 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#ifndef __NBIO_V7_2_H__ +#define __NBIO_V7_2_H__ + +#include "soc15_common.h" + +extern const struct nbio_hdp_flush_reg nbio_v7_2_hdp_flush_reg; +extern const struct amdgpu_nbio_funcs nbio_v7_2_funcs; + +#endif diff --git a/drivers/gpu/drm/amd/amdgpu/nv.c b/drivers/gpu/drm/amd/amdgpu/nv.c index 8eeba8096493..ac02dd707c44 100644 --- a/drivers/gpu/drm/amd/amdgpu/nv.c +++ b/drivers/gpu/drm/amd/amdgpu/nv.c @@ -49,6 +49,7 @@ #include "gfxhub_v2_0.h" #include "mmhub_v2_0.h" #include "nbio_v2_3.h" +#include "nbio_v7_2.h" #include "nv.h" #include "navi10_ih.h" #include "gfx_v10_0.h" @@ -95,6 +96,21 @@ static u64 nv_pcie_rreg64(struct amdgpu_device *adev, u32 reg) return amdgpu_device_indirect_rreg64(adev, address, data, reg); } +static u32 nv_pcie_port_rreg(struct amdgpu_device *adev, u32 reg) +{ + unsigned long flags, address, data; + u32 r; + address = adev->nbio.funcs->get_pcie_port_index_offset(adev); + data = adev->nbio.funcs->get_pcie_port_data_offset(adev); + + spin_lock_irqsave(&adev->pcie_idx_lock, flags); + WREG32(address, reg * 4); + (void)RREG32(address); + r = RREG32(data); + spin_unlock_irqrestore(&adev->pcie_idx_lock, flags); + return r; +} + static void nv_pcie_wreg64(struct amdgpu_device *adev, u32 reg, u64 v) { unsigned long address, data; @@ -105,6 +121,21 @@ static void nv_pcie_wreg64(struct amdgpu_device *adev, u32 reg, u64 v) amdgpu_device_indirect_wreg64(adev, address, data, reg, v); } +static void nv_pcie_port_wreg(struct amdgpu_device *adev, u32 reg, u32 v) +{ + unsigned long flags, address, data; + + address = adev->nbio.funcs->get_pcie_port_index_offset(adev); + data = adev->nbio.funcs->get_pcie_port_data_offset(adev); + + spin_lock_irqsave(&adev->pcie_idx_lock, flags); + WREG32(address, reg * 4); + (void)RREG32(address); + WREG32(data, v); + (void)RREG32(data); + spin_unlock_irqrestore(&adev->pcie_idx_lock, flags); +} + static u32 nv_didt_rreg(struct amdgpu_device *adev, u32 reg) { unsigned long flags, address, data; @@ -254,7 +285,8 @@ static int nv_read_register(struct amdgpu_device *adev, u32 se_num, *value = 0; for (i = 0; i < ARRAY_SIZE(nv_allowed_read_registers); i++) { en = &nv_allowed_read_registers[i]; - if (reg_offset != + if ((i == 7 && (adev->sdma.num_instances == 1)) || /* some asics don't have SDMA1 */ + reg_offset != (adev->reg_offset[en->hwip][en->inst][en->seg] + en->reg_offset)) continue; @@ -443,6 +475,12 @@ legacy_init: case CHIP_NAVY_FLOUNDER: sienna_cichlid_reg_base_init(adev); break; + case CHIP_VANGOGH: + vangogh_reg_base_init(adev); + break; + case CHIP_DIMGREY_CAVEFISH: + dimgrey_cavefish_reg_base_init(adev); + break; default: return -EINVAL; } @@ -468,8 +506,13 @@ int nv_set_ip_blocks(struct amdgpu_device *adev) { int r; - adev->nbio.funcs = &nbio_v2_3_funcs; - adev->nbio.hdp_flush_reg = &nbio_v2_3_hdp_flush_reg; + if (adev->flags & AMD_IS_APU) { + adev->nbio.funcs = &nbio_v7_2_funcs; + adev->nbio.hdp_flush_reg = &nbio_v7_2_hdp_flush_reg; + } else { + adev->nbio.funcs = &nbio_v2_3_funcs; + adev->nbio.hdp_flush_reg = &nbio_v2_3_hdp_flush_reg; + } if (adev->asic_type == CHIP_SIENNA_CICHLID) adev->gmc.xgmi.supported = true; @@ -535,7 +578,7 @@ int nv_set_ip_blocks(struct amdgpu_device *adev) if (likely(adev->firmware.load_type == AMDGPU_FW_LOAD_PSP)) amdgpu_device_ip_block_add(adev, &psp_v11_0_ip_block); if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP && - is_support_sw_smu(adev) && !amdgpu_sriov_vf(adev)) + is_support_sw_smu(adev)) amdgpu_device_ip_block_add(adev, &smu_v11_0_ip_block); if (adev->enable_virtual_display || amdgpu_sriov_vf(adev)) amdgpu_device_ip_block_add(adev, &dce_virtual_ip_block); @@ -575,6 +618,44 @@ int nv_set_ip_blocks(struct amdgpu_device *adev) is_support_sw_smu(adev)) amdgpu_device_ip_block_add(adev, &smu_v11_0_ip_block); break; + case CHIP_VANGOGH: + amdgpu_device_ip_block_add(adev, &nv_common_ip_block); + amdgpu_device_ip_block_add(adev, &gmc_v10_0_ip_block); + amdgpu_device_ip_block_add(adev, &navi10_ih_ip_block); + if (likely(adev->firmware.load_type == AMDGPU_FW_LOAD_PSP)) + amdgpu_device_ip_block_add(adev, &psp_v11_0_ip_block); + amdgpu_device_ip_block_add(adev, &smu_v11_0_ip_block); + if (adev->enable_virtual_display || amdgpu_sriov_vf(adev)) + amdgpu_device_ip_block_add(adev, &dce_virtual_ip_block); +#if defined(CONFIG_DRM_AMD_DC) + else if (amdgpu_device_has_dc_support(adev)) + amdgpu_device_ip_block_add(adev, &dm_ip_block); +#endif + amdgpu_device_ip_block_add(adev, &gfx_v10_0_ip_block); + amdgpu_device_ip_block_add(adev, &sdma_v5_2_ip_block); + amdgpu_device_ip_block_add(adev, &vcn_v3_0_ip_block); + amdgpu_device_ip_block_add(adev, &jpeg_v3_0_ip_block); + break; + case CHIP_DIMGREY_CAVEFISH: + amdgpu_device_ip_block_add(adev, &nv_common_ip_block); + amdgpu_device_ip_block_add(adev, &gmc_v10_0_ip_block); + amdgpu_device_ip_block_add(adev, &navi10_ih_ip_block); + if (likely(adev->firmware.load_type == AMDGPU_FW_LOAD_PSP)) + amdgpu_device_ip_block_add(adev, &psp_v11_0_ip_block); + if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP && + is_support_sw_smu(adev)) + amdgpu_device_ip_block_add(adev, &smu_v11_0_ip_block); + if (adev->enable_virtual_display || amdgpu_sriov_vf(adev)) + amdgpu_device_ip_block_add(adev, &dce_virtual_ip_block); +#if defined(CONFIG_DRM_AMD_DC) + else if (amdgpu_device_has_dc_support(adev)) + amdgpu_device_ip_block_add(adev, &dm_ip_block); +#endif + amdgpu_device_ip_block_add(adev, &gfx_v10_0_ip_block); + amdgpu_device_ip_block_add(adev, &sdma_v5_2_ip_block); + amdgpu_device_ip_block_add(adev, &vcn_v3_0_ip_block); + amdgpu_device_ip_block_add(adev, &jpeg_v3_0_ip_block); + break; default: return -EINVAL; } @@ -671,6 +752,29 @@ static void nv_pre_asic_init(struct amdgpu_device *adev) { } +static int nv_update_umd_stable_pstate(struct amdgpu_device *adev, + bool enter) +{ + if (enter) + amdgpu_gfx_rlc_enter_safe_mode(adev); + else + amdgpu_gfx_rlc_exit_safe_mode(adev); + + if (adev->gfx.funcs->update_perfmon_mgcg) + adev->gfx.funcs->update_perfmon_mgcg(adev, !enter); + + /* + * The ASPM function is not fully enabled and verified on + * Navi yet. Temporarily skip this until ASPM enabled. + */ +#if 0 + if (adev->nbio.funcs->enable_aspm) + adev->nbio.funcs->enable_aspm(adev, !enter); +#endif + + return 0; +} + static const struct amdgpu_asic_funcs nv_asic_funcs = { .read_disabled_bios = &nv_read_disabled_bios, @@ -691,6 +795,7 @@ static const struct amdgpu_asic_funcs nv_asic_funcs = .get_pcie_replay_count = &nv_get_pcie_replay_count, .supports_baco = &nv_asic_supports_baco, .pre_asic_init = &nv_pre_asic_init, + .update_umd_stable_pstate = &nv_update_umd_stable_pstate, }; static int nv_common_early_init(void *handle) @@ -706,6 +811,8 @@ static int nv_common_early_init(void *handle) adev->pcie_wreg = &nv_pcie_wreg; adev->pcie_rreg64 = &nv_pcie_rreg64; adev->pcie_wreg64 = &nv_pcie_wreg64; + adev->pciep_rreg = &nv_pcie_port_rreg; + adev->pciep_wreg = &nv_pcie_port_wreg; /* TODO: will add them during VCN v2 implementation */ adev->uvd_ctx_rreg = NULL; @@ -833,6 +940,46 @@ static int nv_common_early_init(void *handle) adev->external_rev_id = adev->rev_id + 0x32; break; + case CHIP_VANGOGH: + adev->apu_flags |= AMD_APU_IS_VANGOGH; + adev->cg_flags = AMD_CG_SUPPORT_GFX_MGCG | + AMD_CG_SUPPORT_GFX_MGLS | + AMD_CG_SUPPORT_GFX_CP_LS | + AMD_CG_SUPPORT_GFX_RLC_LS | + AMD_CG_SUPPORT_GFX_CGCG | + AMD_CG_SUPPORT_GFX_CGLS | + AMD_CG_SUPPORT_GFX_3D_CGCG | + AMD_CG_SUPPORT_GFX_3D_CGLS | + AMD_CG_SUPPORT_MC_MGCG | + AMD_CG_SUPPORT_MC_LS | + AMD_CG_SUPPORT_GFX_FGCG | + AMD_CG_SUPPORT_VCN_MGCG | + AMD_CG_SUPPORT_JPEG_MGCG; + adev->pg_flags = AMD_PG_SUPPORT_GFX_PG | + AMD_PG_SUPPORT_VCN | + AMD_PG_SUPPORT_VCN_DPG | + AMD_PG_SUPPORT_JPEG; + if (adev->apu_flags & AMD_APU_IS_VANGOGH) + adev->external_rev_id = adev->rev_id + 0x01; + break; + case CHIP_DIMGREY_CAVEFISH: + adev->cg_flags = AMD_CG_SUPPORT_GFX_MGCG | + AMD_CG_SUPPORT_GFX_CGCG | + AMD_CG_SUPPORT_GFX_3D_CGCG | + AMD_CG_SUPPORT_VCN_MGCG | + AMD_CG_SUPPORT_JPEG_MGCG | + AMD_CG_SUPPORT_MC_MGCG | + AMD_CG_SUPPORT_MC_LS | + AMD_CG_SUPPORT_HDP_MGCG | + AMD_CG_SUPPORT_HDP_LS | + AMD_CG_SUPPORT_IH_CG; + adev->pg_flags = AMD_PG_SUPPORT_VCN | + AMD_PG_SUPPORT_VCN_DPG | + AMD_PG_SUPPORT_JPEG | + AMD_PG_SUPPORT_ATHUB | + AMD_PG_SUPPORT_MMHUB; + adev->external_rev_id = adev->rev_id + 0x3c; + break; default: /* FIXME: not supported yet */ return -EINVAL; @@ -1060,6 +1207,7 @@ static int nv_common_set_clockgating_state(void *handle, case CHIP_NAVI12: case CHIP_SIENNA_CICHLID: case CHIP_NAVY_FLOUNDER: + case CHIP_DIMGREY_CAVEFISH: adev->nbio.funcs->update_medium_grain_clock_gating(adev, state == AMD_CG_STATE_GATE); adev->nbio.funcs->update_medium_grain_light_sleep(adev, diff --git a/drivers/gpu/drm/amd/amdgpu/nv.h b/drivers/gpu/drm/amd/amdgpu/nv.h index aeef50a6a54b..515d67bf249f 100644 --- a/drivers/gpu/drm/amd/amdgpu/nv.h +++ b/drivers/gpu/drm/amd/amdgpu/nv.h @@ -34,4 +34,6 @@ int navi10_reg_base_init(struct amdgpu_device *adev); int navi14_reg_base_init(struct amdgpu_device *adev); int navi12_reg_base_init(struct amdgpu_device *adev); int sienna_cichlid_reg_base_init(struct amdgpu_device *adev); +void vangogh_reg_base_init(struct amdgpu_device *adev); +int dimgrey_cavefish_reg_base_init(struct amdgpu_device *adev); #endif diff --git a/drivers/gpu/drm/amd/amdgpu/psp_gfx_if.h b/drivers/gpu/drm/amd/amdgpu/psp_gfx_if.h index 4137dc710aaf..d65a5339d354 100644 --- a/drivers/gpu/drm/amd/amdgpu/psp_gfx_if.h +++ b/drivers/gpu/drm/amd/amdgpu/psp_gfx_if.h @@ -86,21 +86,22 @@ struct psp_gfx_ctrl /* TEE Gfx Command IDs for the ring buffer interface. */ enum psp_gfx_cmd_id { - GFX_CMD_ID_LOAD_TA = 0x00000001, /* load TA */ - GFX_CMD_ID_UNLOAD_TA = 0x00000002, /* unload TA */ - GFX_CMD_ID_INVOKE_CMD = 0x00000003, /* send command to TA */ - GFX_CMD_ID_LOAD_ASD = 0x00000004, /* load ASD Driver */ - GFX_CMD_ID_SETUP_TMR = 0x00000005, /* setup TMR region */ - GFX_CMD_ID_LOAD_IP_FW = 0x00000006, /* load HW IP FW */ - GFX_CMD_ID_DESTROY_TMR = 0x00000007, /* destroy TMR region */ - GFX_CMD_ID_SAVE_RESTORE = 0x00000008, /* save/restore HW IP FW */ - GFX_CMD_ID_SETUP_VMR = 0x00000009, /* setup VMR region */ - GFX_CMD_ID_DESTROY_VMR = 0x0000000A, /* destroy VMR region */ - GFX_CMD_ID_PROG_REG = 0x0000000B, /* program regs */ - GFX_CMD_ID_CLEAR_VF_FW = 0x0000000D, /* Clear VF FW, to be used on VF shutdown. */ + GFX_CMD_ID_LOAD_TA = 0x00000001, /* load TA */ + GFX_CMD_ID_UNLOAD_TA = 0x00000002, /* unload TA */ + GFX_CMD_ID_INVOKE_CMD = 0x00000003, /* send command to TA */ + GFX_CMD_ID_LOAD_ASD = 0x00000004, /* load ASD Driver */ + GFX_CMD_ID_SETUP_TMR = 0x00000005, /* setup TMR region */ + GFX_CMD_ID_LOAD_IP_FW = 0x00000006, /* load HW IP FW */ + GFX_CMD_ID_DESTROY_TMR = 0x00000007, /* destroy TMR region */ + GFX_CMD_ID_SAVE_RESTORE = 0x00000008, /* save/restore HW IP FW */ + GFX_CMD_ID_SETUP_VMR = 0x00000009, /* setup VMR region */ + GFX_CMD_ID_DESTROY_VMR = 0x0000000A, /* destroy VMR region */ + GFX_CMD_ID_PROG_REG = 0x0000000B, /* program regs */ + GFX_CMD_ID_CLEAR_VF_FW = 0x0000000D, /* Clear VF FW, to be used on VF shutdown. */ + GFX_CMD_ID_GET_FW_ATTESTATION = 0x0000000F, /* Query GPUVA of the Fw Attestation DB */ /* IDs upto 0x1F are reserved for older programs (Raven, Vega 10/12/20) */ - GFX_CMD_ID_LOAD_TOC = 0x00000020, /* Load TOC and obtain TMR size */ - GFX_CMD_ID_AUTOLOAD_RLC = 0x00000021, /* Indicates all graphics fw loaded, start RLC autoload */ + GFX_CMD_ID_LOAD_TOC = 0x00000020, /* Load TOC and obtain TMR size */ + GFX_CMD_ID_AUTOLOAD_RLC = 0x00000021, /* Indicates all graphics fw loaded, start RLC autoload */ }; /* Command to load Trusted Application binary into PSP OS. */ @@ -285,6 +286,25 @@ union psp_gfx_commands struct psp_gfx_cmd_load_toc cmd_load_toc; }; +struct psp_gfx_uresp_reserved +{ + uint32_t reserved[8]; +}; + +/* Command-specific response for Fw Attestation Db */ +struct psp_gfx_uresp_fwar_db_info +{ + uint32_t fwar_db_addr_lo; + uint32_t fwar_db_addr_hi; +}; + +/* Union of command-specific responses for GPCOM ring. */ +union psp_gfx_uresp +{ + struct psp_gfx_uresp_reserved reserved; + struct psp_gfx_uresp_fwar_db_info fwar_db_info; +}; + /* Structure of GFX Response buffer. * For GPCOM I/F it is part of GFX_CMD_RESP buffer, for RBI * it is separate buffer. @@ -297,9 +317,11 @@ struct psp_gfx_resp uint32_t fw_addr_hi; /* +12 bits [63:32] of FW address within TMR (in response to cmd_load_ip_fw command) */ uint32_t tmr_size; /* +16 size of the TMR to be reserved including MM fw and Gfx fw in response to cmd_load_toc command */ - uint32_t reserved[3]; + uint32_t reserved[11]; + + union psp_gfx_uresp uresp; /* +64 response union containing command-specific responses */ - /* total 32 bytes */ + /* total 96 bytes */ }; /* Structure of Command buffer pointed by psp_gfx_rb_frame.cmd_buf_addr_hi diff --git a/drivers/gpu/drm/amd/amdgpu/psp_v11_0.c b/drivers/gpu/drm/amd/amdgpu/psp_v11_0.c index 6c5d9612abcb..bd4248c93c49 100644 --- a/drivers/gpu/drm/amd/amdgpu/psp_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/psp_v11_0.c @@ -59,6 +59,10 @@ MODULE_FIRMWARE("amdgpu/sienna_cichlid_sos.bin"); MODULE_FIRMWARE("amdgpu/sienna_cichlid_ta.bin"); MODULE_FIRMWARE("amdgpu/navy_flounder_sos.bin"); MODULE_FIRMWARE("amdgpu/navy_flounder_ta.bin"); +MODULE_FIRMWARE("amdgpu/vangogh_asd.bin"); +MODULE_FIRMWARE("amdgpu/vangogh_toc.bin"); +MODULE_FIRMWARE("amdgpu/dimgrey_cavefish_sos.bin"); +MODULE_FIRMWARE("amdgpu/dimgrey_cavefish_ta.bin"); /* address block */ #define smnMP1_FIRMWARE_FLAGS 0x3010024 @@ -77,7 +81,7 @@ static int psp_v11_0_init_microcode(struct psp_context *psp) { struct amdgpu_device *adev = psp->adev; const char *chip_name; - char fw_name[30]; + char fw_name[PSP_FW_NAME_LEN]; int err = 0; const struct ta_firmware_header_v1_0 *ta_hdr; @@ -105,24 +109,26 @@ static int psp_v11_0_init_microcode(struct psp_context *psp) case CHIP_NAVY_FLOUNDER: chip_name = "navy_flounder"; break; + case CHIP_VANGOGH: + chip_name = "vangogh"; + break; + case CHIP_DIMGREY_CAVEFISH: + chip_name = "dimgrey_cavefish"; + break; default: BUG(); } - err = psp_init_sos_microcode(psp, chip_name); - if (err) - return err; - - if (adev->asic_type != CHIP_SIENNA_CICHLID && - adev->asic_type != CHIP_NAVY_FLOUNDER) { - err = psp_init_asd_microcode(psp, chip_name); - if (err) - return err; - } switch (adev->asic_type) { case CHIP_VEGA20: case CHIP_ARCTURUS: + err = psp_init_sos_microcode(psp, chip_name); + if (err) + return err; + err = psp_init_asd_microcode(psp, chip_name); + if (err) + return err; snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ta.bin", chip_name); err = request_firmware(&adev->psp.ta_fw, fw_name, adev->dev); if (err) { @@ -150,6 +156,12 @@ static int psp_v11_0_init_microcode(struct psp_context *psp) case CHIP_NAVI10: case CHIP_NAVI14: case CHIP_NAVI12: + err = psp_init_sos_microcode(psp, chip_name); + if (err) + return err; + err = psp_init_asd_microcode(psp, chip_name); + if (err) + return err; if (amdgpu_sriov_vf(adev)) break; snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ta.bin", chip_name); @@ -180,7 +192,19 @@ static int psp_v11_0_init_microcode(struct psp_context *psp) break; case CHIP_SIENNA_CICHLID: case CHIP_NAVY_FLOUNDER: - err = psp_init_ta_microcode(&adev->psp, chip_name); + case CHIP_DIMGREY_CAVEFISH: + err = psp_init_sos_microcode(psp, chip_name); + if (err) + return err; + err = psp_init_ta_microcode(psp, chip_name); + if (err) + return err; + break; + case CHIP_VANGOGH: + err = psp_init_asd_microcode(psp, chip_name); + if (err) + return err; + err = psp_init_toc_microcode(psp, chip_name); if (err) return err; break; @@ -196,7 +220,7 @@ out2: return err; } -int psp_v11_0_wait_for_bootloader(struct psp_context *psp) +static int psp_v11_0_wait_for_bootloader(struct psp_context *psp) { struct amdgpu_device *adev = psp->adev; @@ -407,8 +431,8 @@ static int psp_v11_0_ring_init(struct psp_context *psp, struct amdgpu_device *adev = psp->adev; if ((!amdgpu_sriov_vf(adev)) && - (adev->asic_type != CHIP_SIENNA_CICHLID) && - (adev->asic_type != CHIP_NAVY_FLOUNDER)) + !(adev->asic_type >= CHIP_SIENNA_CICHLID && + adev->asic_type <= CHIP_DIMGREY_CAVEFISH)) psp_v11_0_reroute_ih(psp); ring = &psp->km_ring; @@ -615,7 +639,7 @@ static int psp_v11_0_memory_training_send_msg(struct psp_context *psp, int msg) static int psp_v11_0_memory_training(struct psp_context *psp, uint32_t ops) { struct psp_memory_training_context *ctx = &psp->mem_train_ctx; - uint32_t *pcache = (uint32_t*)ctx->sys_cache; + uint32_t *pcache = (uint32_t *)ctx->sys_cache; struct amdgpu_device *adev = psp->adev; uint32_t p2c_header[4]; uint32_t sz; diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c b/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c index 5f304d61999e..eb5dc6c5b46e 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c @@ -243,7 +243,9 @@ static void sdma_v2_4_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count) * sdma_v2_4_ring_emit_ib - Schedule an IB on the DMA engine * * @ring: amdgpu ring pointer + * @job: job to retrieve vmid from * @ib: IB object to schedule + * @flags: unused * * Schedule an IB in the DMA ring (VI). */ @@ -299,7 +301,9 @@ static void sdma_v2_4_ring_emit_hdp_flush(struct amdgpu_ring *ring) * sdma_v2_4_ring_emit_fence - emit a fence on the DMA ring * * @ring: amdgpu ring pointer - * @fence: amdgpu fence object + * @addr: address + * @seq: sequence number + * @flags: fence related flags * * Add a DMA fence packet to the ring to write * the fence seq number and DMA trap packet to generate @@ -590,6 +594,7 @@ error_free_wb: * sdma_v2_4_ring_test_ib - test an IB on the DMA engine * * @ring: amdgpu_ring structure holding ring information + * @timeout: timeout value in jiffies, or MAX_SCHEDULE_TIMEOUT * * Test a simple IB in the DMA ring (VI). * Returns 0 on success, error on failure. @@ -740,6 +745,7 @@ static void sdma_v2_4_vm_set_pte_pde(struct amdgpu_ib *ib, uint64_t pe, /** * sdma_v2_4_ring_pad_ib - pad the IB to the required number of dw * + * @ring: amdgpu_ring structure holding ring information * @ib: indirect buffer to fill with padding * */ @@ -789,7 +795,8 @@ static void sdma_v2_4_ring_emit_pipeline_sync(struct amdgpu_ring *ring) * sdma_v2_4_ring_emit_vm_flush - cik vm flush using sDMA * * @ring: amdgpu_ring pointer - * @vm: amdgpu_vm pointer + * @vmid: vmid number to use + * @pd_addr: address * * Update the page table base and flush the VM TLB * using sDMA (VI). @@ -1188,10 +1195,11 @@ static void sdma_v2_4_set_irq_funcs(struct amdgpu_device *adev) /** * sdma_v2_4_emit_copy_buffer - copy buffer using the sDMA engine * - * @ring: amdgpu_ring structure holding ring information + * @ib: indirect buffer to copy to * @src_offset: src GPU address * @dst_offset: dst GPU address * @byte_count: number of bytes to xfer + * @tmz: unused * * Copy GPU buffers using the DMA engine (VI). * Used by the amdgpu ttm implementation to move pages if @@ -1216,7 +1224,7 @@ static void sdma_v2_4_emit_copy_buffer(struct amdgpu_ib *ib, /** * sdma_v2_4_emit_fill_buffer - fill buffer using the sDMA engine * - * @ring: amdgpu_ring structure holding ring information + * @ib: indirect buffer to copy to * @src_data: value to write to buffer * @dst_offset: dst GPU address * @byte_count: number of bytes to xfer diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c index c59f6f6f4c09..ad308d8c6d30 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c @@ -417,7 +417,9 @@ static void sdma_v3_0_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count) * sdma_v3_0_ring_emit_ib - Schedule an IB on the DMA engine * * @ring: amdgpu ring pointer + * @job: job to retrieve vmid from * @ib: IB object to schedule + * @flags: unused * * Schedule an IB in the DMA ring (VI). */ @@ -473,7 +475,9 @@ static void sdma_v3_0_ring_emit_hdp_flush(struct amdgpu_ring *ring) * sdma_v3_0_ring_emit_fence - emit a fence on the DMA ring * * @ring: amdgpu ring pointer - * @fence: amdgpu fence object + * @addr: address + * @seq: sequence number + * @flags: fence related flags * * Add a DMA fence packet to the ring to write * the fence seq number and DMA trap packet to generate @@ -862,6 +866,7 @@ error_free_wb: * sdma_v3_0_ring_test_ib - test an IB on the DMA engine * * @ring: amdgpu_ring structure holding ring information + * @timeout: timeout value in jiffies, or MAX_SCHEDULE_TIMEOUT * * Test a simple IB in the DMA ring (VI). * Returns 0 on success, error on failure. @@ -1011,6 +1016,7 @@ static void sdma_v3_0_vm_set_pte_pde(struct amdgpu_ib *ib, uint64_t pe, /** * sdma_v3_0_ring_pad_ib - pad the IB to the required number of dw * + * @ring: amdgpu_ring structure holding ring information * @ib: indirect buffer to fill with padding * */ @@ -1060,7 +1066,8 @@ static void sdma_v3_0_ring_emit_pipeline_sync(struct amdgpu_ring *ring) * sdma_v3_0_ring_emit_vm_flush - cik vm flush using sDMA * * @ring: amdgpu_ring pointer - * @vm: amdgpu_vm pointer + * @vmid: vmid number to use + * @pd_addr: address * * Update the page table base and flush the VM TLB * using sDMA (VI). @@ -1626,10 +1633,11 @@ static void sdma_v3_0_set_irq_funcs(struct amdgpu_device *adev) /** * sdma_v3_0_emit_copy_buffer - copy buffer using the sDMA engine * - * @ring: amdgpu_ring structure holding ring information + * @ib: indirect buffer to copy to * @src_offset: src GPU address * @dst_offset: dst GPU address * @byte_count: number of bytes to xfer + * @tmz: unused * * Copy GPU buffers using the DMA engine (VI). * Used by the amdgpu ttm implementation to move pages if @@ -1654,7 +1662,7 @@ static void sdma_v3_0_emit_copy_buffer(struct amdgpu_ib *ib, /** * sdma_v3_0_emit_fill_buffer - fill buffer using the sDMA engine * - * @ring: amdgpu_ring structure holding ring information + * @ib: indirect buffer to copy to * @src_data: value to write to buffer * @dst_offset: dst GPU address * @byte_count: number of bytes to xfer diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c index e82f49f62f6e..ce56e93c6886 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c @@ -569,7 +569,7 @@ static void sdma_v4_0_destroy_inst_ctx(struct amdgpu_device *adev) break; } - memset((void*)adev->sdma.instance, 0, + memset((void *)adev->sdma.instance, 0, sizeof(struct amdgpu_sdma_instance) * AMDGPU_MAX_SDMA_INSTANCES); } @@ -593,9 +593,6 @@ static int sdma_v4_0_init_microcode(struct amdgpu_device *adev) struct amdgpu_firmware_info *info = NULL; const struct common_firmware_header *header = NULL; - if (amdgpu_sriov_vf(adev)) - return 0; - DRM_DEBUG("\n"); switch (adev->asic_type) { @@ -643,8 +640,8 @@ static int sdma_v4_0_init_microcode(struct amdgpu_device *adev) if (adev->asic_type == CHIP_ARCTURUS) { /* Acturus will leverage the same FW memory for every SDMA instance */ - memcpy((void*)&adev->sdma.instance[i], - (void*)&adev->sdma.instance[0], + memcpy((void *)&adev->sdma.instance[i], + (void *)&adev->sdma.instance[0], sizeof(struct amdgpu_sdma_instance)); } else { @@ -837,7 +834,9 @@ static void sdma_v4_0_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count) * sdma_v4_0_ring_emit_ib - Schedule an IB on the DMA engine * * @ring: amdgpu ring pointer + * @job: job to retrieve vmid from * @ib: IB object to schedule + * @flags: unused * * Schedule an IB in the DMA ring (VEGA10). */ @@ -912,7 +911,9 @@ static void sdma_v4_0_ring_emit_hdp_flush(struct amdgpu_ring *ring) * sdma_v4_0_ring_emit_fence - emit a fence on the DMA ring * * @ring: amdgpu ring pointer - * @fence: amdgpu fence object + * @addr: address + * @seq: sequence number + * @flags: fence related flags * * Add a DMA fence packet to the ring to write * the fence seq number and DMA trap packet to generate @@ -1110,7 +1111,7 @@ static void sdma_v4_0_enable(struct amdgpu_device *adev, bool enable) } } -/** +/* * sdma_v4_0_rb_cntl - get parameters for rb_cntl */ static uint32_t sdma_v4_0_rb_cntl(struct amdgpu_ring *ring, uint32_t rb_cntl) @@ -1573,6 +1574,7 @@ error_free_wb: * sdma_v4_0_ring_test_ib - test an IB on the DMA engine * * @ring: amdgpu_ring structure holding ring information + * @timeout: timeout value in jiffies, or MAX_SCHEDULE_TIMEOUT * * Test a simple IB in the DMA ring (VEGA10). * Returns 0 on success, error on failure. @@ -1669,10 +1671,9 @@ static void sdma_v4_0_vm_copy_pte(struct amdgpu_ib *ib, * * @ib: indirect buffer to fill with commands * @pe: addr of the page entry - * @addr: dst addr to write into pe + * @value: dst addr to write into pe * @count: number of page entries to update * @incr: increase next addr by incr bytes - * @flags: access flags * * Update PTEs by writing them manually using sDMA (VEGA10). */ @@ -1727,8 +1728,8 @@ static void sdma_v4_0_vm_set_pte_pde(struct amdgpu_ib *ib, /** * sdma_v4_0_ring_pad_ib - pad the IB to the required number of dw * + * @ring: amdgpu_ring structure holding ring information * @ib: indirect buffer to fill with padding - * */ static void sdma_v4_0_ring_pad_ib(struct amdgpu_ring *ring, struct amdgpu_ib *ib) { @@ -1772,7 +1773,8 @@ static void sdma_v4_0_ring_emit_pipeline_sync(struct amdgpu_ring *ring) * sdma_v4_0_ring_emit_vm_flush - vm flush using sDMA * * @ring: amdgpu_ring pointer - * @vm: amdgpu_vm pointer + * @vmid: vmid number to use + * @pd_addr: address * * Update the page table base and flush the VM TLB * using sDMA (VEGA10). @@ -2491,10 +2493,11 @@ static void sdma_v4_0_set_irq_funcs(struct amdgpu_device *adev) /** * sdma_v4_0_emit_copy_buffer - copy buffer using the sDMA engine * - * @ring: amdgpu_ring structure holding ring information + * @ib: indirect buffer to copy to * @src_offset: src GPU address * @dst_offset: dst GPU address * @byte_count: number of bytes to xfer + * @tmz: if a secure copy should be used * * Copy GPU buffers using the DMA engine (VEGA10/12). * Used by the amdgpu ttm implementation to move pages if @@ -2520,7 +2523,7 @@ static void sdma_v4_0_emit_copy_buffer(struct amdgpu_ib *ib, /** * sdma_v4_0_emit_fill_buffer - fill buffer using the sDMA engine * - * @ring: amdgpu_ring structure holding ring information + * @ib: indirect buffer to copy to * @src_data: value to write to buffer * @dst_offset: dst GPU address * @byte_count: number of bytes to xfer diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c index 9c72b95b7463..b208b81005bb 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c @@ -203,7 +203,7 @@ static int sdma_v5_0_init_microcode(struct amdgpu_device *adev) const struct common_firmware_header *header = NULL; const struct sdma_firmware_header_v1_0 *hdr; - if (amdgpu_sriov_vf(adev)) + if (amdgpu_sriov_vf(adev) && (adev->asic_type == CHIP_NAVI12)) return 0; DRM_DEBUG("\n"); @@ -392,7 +392,9 @@ static void sdma_v5_0_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count) * sdma_v5_0_ring_emit_ib - Schedule an IB on the DMA engine * * @ring: amdgpu ring pointer + * @job: job to retrieve vmid from * @ib: IB object to schedule + * @flags: unused * * Schedule an IB in the DMA ring (NAVI10). */ @@ -469,7 +471,9 @@ static void sdma_v5_0_ring_emit_hdp_flush(struct amdgpu_ring *ring) * sdma_v5_0_ring_emit_fence - emit a fence on the DMA ring * * @ring: amdgpu ring pointer - * @fence: amdgpu fence object + * @addr: address + * @seq: sequence number + * @flags: fence related flags * * Add a DMA fence packet to the ring to write * the fence seq number and DMA trap packet to generate @@ -959,6 +963,7 @@ static int sdma_v5_0_ring_test_ring(struct amdgpu_ring *ring) * sdma_v5_0_ring_test_ib - test an IB on the DMA engine * * @ring: amdgpu_ring structure holding ring information + * @timeout: timeout value in jiffies, or MAX_SCHEDULE_TIMEOUT * * Test a simple IB in the DMA ring (NAVI10). * Returns 0 on success, error on failure. @@ -1061,10 +1066,9 @@ static void sdma_v5_0_vm_copy_pte(struct amdgpu_ib *ib, * * @ib: indirect buffer to fill with commands * @pe: addr of the page entry - * @addr: dst addr to write into pe + * @value: dst addr to write into pe * @count: number of page entries to update * @incr: increase next addr by incr bytes - * @flags: access flags * * Update PTEs by writing them manually using sDMA (NAVI10). */ @@ -1118,6 +1122,7 @@ static void sdma_v5_0_vm_set_pte_pde(struct amdgpu_ib *ib, /** * sdma_v5_0_ring_pad_ib - pad the IB + * @ring: amdgpu_ring structure holding ring information * @ib: indirect buffer to fill with padding * * Pad the IB with NOPs to a boundary multiple of 8. @@ -1170,7 +1175,8 @@ static void sdma_v5_0_ring_emit_pipeline_sync(struct amdgpu_ring *ring) * sdma_v5_0_ring_emit_vm_flush - vm flush using sDMA * * @ring: amdgpu_ring pointer - * @vm: amdgpu_vm pointer + * @vmid: vmid number to use + * @pd_addr: address * * Update the page table base and flush the VM TLB * using sDMA (NAVI10). @@ -1686,10 +1692,11 @@ static void sdma_v5_0_set_irq_funcs(struct amdgpu_device *adev) /** * sdma_v5_0_emit_copy_buffer - copy buffer using the sDMA engine * - * @ring: amdgpu_ring structure holding ring information + * @ib: indirect buffer to copy to * @src_offset: src GPU address * @dst_offset: dst GPU address * @byte_count: number of bytes to xfer + * @tmz: if a secure copy should be used * * Copy GPU buffers using the DMA engine (NAVI10). * Used by the amdgpu ttm implementation to move pages if @@ -1715,7 +1722,7 @@ static void sdma_v5_0_emit_copy_buffer(struct amdgpu_ib *ib, /** * sdma_v5_0_emit_fill_buffer - fill buffer using the sDMA engine * - * @ring: amdgpu_ring structure holding ring information + * @ib: indirect buffer to fill * @src_data: value to write to buffer * @dst_offset: dst GPU address * @byte_count: number of bytes to xfer diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c b/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c index 2a485052e3ab..39e17aae655f 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c @@ -46,6 +46,9 @@ MODULE_FIRMWARE("amdgpu/sienna_cichlid_sdma.bin"); MODULE_FIRMWARE("amdgpu/navy_flounder_sdma.bin"); +MODULE_FIRMWARE("amdgpu/dimgrey_cavefish_sdma.bin"); + +MODULE_FIRMWARE("amdgpu/vangogh_sdma.bin"); #define SDMA1_REG_OFFSET 0x600 #define SDMA3_REG_OFFSET 0x400 @@ -87,6 +90,8 @@ static void sdma_v5_2_init_golden_registers(struct amdgpu_device *adev) switch (adev->asic_type) { case CHIP_SIENNA_CICHLID: case CHIP_NAVY_FLOUNDER: + case CHIP_VANGOGH: + case CHIP_DIMGREY_CAVEFISH: break; default: break; @@ -124,7 +129,7 @@ static void sdma_v5_2_destroy_inst_ctx(struct amdgpu_device *adev) break; } - memset((void*)adev->sdma.instance, 0, + memset((void *)adev->sdma.instance, 0, sizeof(struct amdgpu_sdma_instance) * AMDGPU_MAX_SDMA_INSTANCES); } @@ -148,9 +153,6 @@ static int sdma_v5_2_init_microcode(struct amdgpu_device *adev) struct amdgpu_firmware_info *info = NULL; const struct common_firmware_header *header = NULL; - if (amdgpu_sriov_vf(adev)) - return 0; - DRM_DEBUG("\n"); switch (adev->asic_type) { @@ -160,6 +162,12 @@ static int sdma_v5_2_init_microcode(struct amdgpu_device *adev) case CHIP_NAVY_FLOUNDER: chip_name = "navy_flounder"; break; + case CHIP_VANGOGH: + chip_name = "vangogh"; + break; + case CHIP_DIMGREY_CAVEFISH: + chip_name = "dimgrey_cavefish"; + break; default: BUG(); } @@ -175,10 +183,10 @@ static int sdma_v5_2_init_microcode(struct amdgpu_device *adev) goto out; for (i = 1; i < adev->sdma.num_instances; i++) { - if (adev->asic_type == CHIP_SIENNA_CICHLID || - adev->asic_type == CHIP_NAVY_FLOUNDER) { - memcpy((void*)&adev->sdma.instance[i], - (void*)&adev->sdma.instance[0], + if (adev->asic_type >= CHIP_SIENNA_CICHLID && + adev->asic_type <= CHIP_DIMGREY_CAVEFISH) { + memcpy((void *)&adev->sdma.instance[i], + (void *)&adev->sdma.instance[0], sizeof(struct amdgpu_sdma_instance)); } else { snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_sdma%d.bin", chip_name, i); @@ -345,7 +353,9 @@ static void sdma_v5_2_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count) * sdma_v5_2_ring_emit_ib - Schedule an IB on the DMA engine * * @ring: amdgpu ring pointer + * @job: job to retrieve vmid from * @ib: IB object to schedule + * @flags: unused * * Schedule an IB in the DMA ring. */ @@ -407,7 +417,9 @@ static void sdma_v5_2_ring_emit_hdp_flush(struct amdgpu_ring *ring) * sdma_v5_2_ring_emit_fence - emit a fence on the DMA ring * * @ring: amdgpu ring pointer - * @fence: amdgpu fence object + * @addr: address + * @seq: sequence number + * @flags: fence related flags * * Add a DMA fence packet to the ring to write * the fence seq number and DMA trap packet to generate @@ -696,7 +708,7 @@ static int sdma_v5_2_gfx_resume(struct amdgpu_device *adev) temp &= 0xFF0FFF; temp |= ((CACHE_READ_POLICY_L2__DEFAULT << 12) | (CACHE_WRITE_POLICY_L2__DEFAULT << 14) | - 0x01000000); + SDMA0_UTCL1_PAGE__LLC_NOALLOC_MASK); WREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_UTCL1_PAGE), temp); if (!amdgpu_sriov_vf(adev)) { @@ -905,6 +917,7 @@ static int sdma_v5_2_ring_test_ring(struct amdgpu_ring *ring) * sdma_v5_2_ring_test_ib - test an IB on the DMA engine * * @ring: amdgpu_ring structure holding ring information + * @timeout: timeout value in jiffies, or MAX_SCHEDULE_TIMEOUT * * Test a simple IB in the DMA ring. * Returns 0 on success, error on failure. @@ -1006,10 +1019,9 @@ static void sdma_v5_2_vm_copy_pte(struct amdgpu_ib *ib, * * @ib: indirect buffer to fill with commands * @pe: addr of the page entry - * @addr: dst addr to write into pe + * @value: dst addr to write into pe * @count: number of page entries to update * @incr: increase next addr by incr bytes - * @flags: access flags * * Update PTEs by writing them manually using sDMA. */ @@ -1065,6 +1077,7 @@ static void sdma_v5_2_vm_set_pte_pde(struct amdgpu_ib *ib, * sdma_v5_2_ring_pad_ib - pad the IB * * @ib: indirect buffer to fill with padding + * @ring: amdgpu_ring structure holding ring information * * Pad the IB with NOPs to a boundary multiple of 8. */ @@ -1116,7 +1129,8 @@ static void sdma_v5_2_ring_emit_pipeline_sync(struct amdgpu_ring *ring) * sdma_v5_2_ring_emit_vm_flush - vm flush using sDMA * * @ring: amdgpu_ring pointer - * @vm: amdgpu_vm pointer + * @vmid: vmid number to use + * @pd_addr: address * * Update the page table base and flush the VM TLB * using sDMA. @@ -1169,8 +1183,12 @@ static int sdma_v5_2_early_init(void *handle) adev->sdma.num_instances = 4; break; case CHIP_NAVY_FLOUNDER: + case CHIP_DIMGREY_CAVEFISH: adev->sdma.num_instances = 2; break; + case CHIP_VANGOGH: + adev->sdma.num_instances = 1; + break; default: break; } @@ -1567,6 +1585,8 @@ static int sdma_v5_2_set_clockgating_state(void *handle, switch (adev->asic_type) { case CHIP_SIENNA_CICHLID: case CHIP_NAVY_FLOUNDER: + case CHIP_VANGOGH: + case CHIP_DIMGREY_CAVEFISH: sdma_v5_2_update_medium_grain_clock_gating(adev, state == AMD_CG_STATE_GATE ? true : false); sdma_v5_2_update_medium_grain_light_sleep(adev, @@ -1683,10 +1703,11 @@ static void sdma_v5_2_set_irq_funcs(struct amdgpu_device *adev) /** * sdma_v5_2_emit_copy_buffer - copy buffer using the sDMA engine * - * @ring: amdgpu_ring structure holding ring information + * @ib: indirect buffer to copy to * @src_offset: src GPU address * @dst_offset: dst GPU address * @byte_count: number of bytes to xfer + * @tmz: if a secure copy should be used * * Copy GPU buffers using the DMA engine. * Used by the amdgpu ttm implementation to move pages if @@ -1712,7 +1733,7 @@ static void sdma_v5_2_emit_copy_buffer(struct amdgpu_ib *ib, /** * sdma_v5_2_emit_fill_buffer - fill buffer using the sDMA engine * - * @ring: amdgpu_ring structure holding ring information + * @ib: indirect buffer to fill * @src_data: value to write to buffer * @dst_offset: dst GPU address * @byte_count: number of bytes to xfer diff --git a/drivers/gpu/drm/amd/amdgpu/si.c b/drivers/gpu/drm/amd/amdgpu/si.c index e5e336fd9e94..3cf0589bfea5 100644 --- a/drivers/gpu/drm/amd/amdgpu/si.c +++ b/drivers/gpu/drm/amd/amdgpu/si.c @@ -1350,7 +1350,7 @@ static void si_vga_set_state(struct amdgpu_device *adev, bool state) static u32 si_get_xclk(struct amdgpu_device *adev) { - u32 reference_clock = adev->clock.spll.reference_freq; + u32 reference_clock = adev->clock.spll.reference_freq; u32 tmp; tmp = RREG32(CG_CLKPIN_CNTL_2); diff --git a/drivers/gpu/drm/amd/amdgpu/si_dma.c b/drivers/gpu/drm/amd/amdgpu/si_dma.c index 7d2bbcbe547b..488497ad5e0c 100644 --- a/drivers/gpu/drm/amd/amdgpu/si_dma.c +++ b/drivers/gpu/drm/amd/amdgpu/si_dma.c @@ -81,7 +81,9 @@ static void si_dma_ring_emit_ib(struct amdgpu_ring *ring, * si_dma_ring_emit_fence - emit a fence on the DMA ring * * @ring: amdgpu ring pointer - * @fence: amdgpu fence object + * @addr: address + * @seq: sequence number + * @flags: fence related flags * * Add a DMA fence packet to the ring to write * the fence seq number and DMA trap packet to generate @@ -244,6 +246,7 @@ error_free_wb: * si_dma_ring_test_ib - test an IB on the DMA engine * * @ring: amdgpu_ring structure holding ring information + * @timeout: timeout value in jiffies, or MAX_SCHEDULE_TIMEOUT * * Test a simple IB in the DMA ring (VI). * Returns 0 on success, error on failure. @@ -401,6 +404,7 @@ static void si_dma_vm_set_pte_pde(struct amdgpu_ib *ib, /** * si_dma_pad_ib - pad the IB to the required number of dw * + * @ring: amdgpu_ring pointer * @ib: indirect buffer to fill with padding * */ @@ -436,7 +440,8 @@ static void si_dma_ring_emit_pipeline_sync(struct amdgpu_ring *ring) * si_dma_ring_emit_vm_flush - cik vm flush using sDMA * * @ring: amdgpu_ring pointer - * @vm: amdgpu_vm pointer + * @vmid: vmid number to use + * @pd_addr: address * * Update the page table base and flush the VM TLB * using sDMA (VI). @@ -764,10 +769,11 @@ static void si_dma_set_irq_funcs(struct amdgpu_device *adev) /** * si_dma_emit_copy_buffer - copy buffer using the sDMA engine * - * @ring: amdgpu_ring structure holding ring information + * @ib: indirect buffer to copy to * @src_offset: src GPU address * @dst_offset: dst GPU address * @byte_count: number of bytes to xfer + * @tmz: is this a secure operation * * Copy GPU buffers using the DMA engine (VI). * Used by the amdgpu ttm implementation to move pages if @@ -790,7 +796,7 @@ static void si_dma_emit_copy_buffer(struct amdgpu_ib *ib, /** * si_dma_emit_fill_buffer - fill buffer using the sDMA engine * - * @ring: amdgpu_ring structure holding ring information + * @ib: indirect buffer to copy to * @src_data: value to write to buffer * @dst_offset: dst GPU address * @byte_count: number of bytes to xfer diff --git a/drivers/gpu/drm/amd/amdgpu/si_ih.c b/drivers/gpu/drm/amd/amdgpu/si_ih.c index 621727d7fd18..51880f6ef634 100644 --- a/drivers/gpu/drm/amd/amdgpu/si_ih.c +++ b/drivers/gpu/drm/amd/amdgpu/si_ih.c @@ -43,7 +43,7 @@ static void si_ih_enable_interrupts(struct amdgpu_device *adev) WREG32(IH_RB_CNTL, ih_rb_cntl); adev->irq.ih.enabled = true; } - + static void si_ih_disable_interrupts(struct amdgpu_device *adev) { u32 ih_rb_cntl = RREG32(IH_RB_CNTL); diff --git a/drivers/gpu/drm/amd/amdgpu/smu_v11_0_i2c.c b/drivers/gpu/drm/amd/amdgpu/smu_v11_0_i2c.c index 7fb240c4990c..5c7d769aee3f 100644 --- a/drivers/gpu/drm/amd/amdgpu/smu_v11_0_i2c.c +++ b/drivers/gpu/drm/amd/amdgpu/smu_v11_0_i2c.c @@ -212,6 +212,7 @@ static uint32_t smu_v11_0_i2c_poll_rx_status(struct i2c_adapter *control) /** * smu_v11_0_i2c_transmit - Send a block of data over the I2C bus to a slave device. * + * @control: I2C adapter reference * @address: The I2C address of the slave device. * @data: The data to transmit over the bus. * @numbytes: The amount of data to transmit. @@ -313,7 +314,9 @@ Err: /** * smu_v11_0_i2c_receive - Receive a block of data over the I2C bus from a slave device. * + * @control: I2C adapter reference * @address: The I2C address of the slave device. + * @data: Placeholder to store received data. * @numbytes: The amount of data to transmit. * @i2c_flag: Flags for transmission * diff --git a/drivers/gpu/drm/amd/amdgpu/smuio_v11_0.c b/drivers/gpu/drm/amd/amdgpu/smuio_v11_0.c new file mode 100644 index 000000000000..e9c474c217ec --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/smuio_v11_0.c @@ -0,0 +1,77 @@ +/* + * Copyright 2020 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ +#include "amdgpu.h" +#include "smuio_v11_0.h" +#include "smuio/smuio_11_0_0_offset.h" +#include "smuio/smuio_11_0_0_sh_mask.h" + +static u32 smuio_v11_0_get_rom_index_offset(struct amdgpu_device *adev) +{ + return SOC15_REG_OFFSET(SMUIO, 0, mmROM_INDEX); +} + +static u32 smuio_v11_0_get_rom_data_offset(struct amdgpu_device *adev) +{ + return SOC15_REG_OFFSET(SMUIO, 0, mmROM_DATA); +} + +static void smuio_v11_0_update_rom_clock_gating(struct amdgpu_device *adev, bool enable) +{ + u32 def, data; + + /* enable/disable ROM CG is not supported on APU */ + if (adev->flags & AMD_IS_APU) + return; + + def = data = RREG32_SOC15(SMUIO, 0, mmCGTT_ROM_CLK_CTRL0); + + if (enable && (adev->cg_flags & AMD_CG_SUPPORT_ROM_MGCG)) + data &= ~(CGTT_ROM_CLK_CTRL0__SOFT_OVERRIDE0_MASK | + CGTT_ROM_CLK_CTRL0__SOFT_OVERRIDE1_MASK); + else + data |= CGTT_ROM_CLK_CTRL0__SOFT_OVERRIDE0_MASK | + CGTT_ROM_CLK_CTRL0__SOFT_OVERRIDE1_MASK; + + if (def != data) + WREG32_SOC15(SMUIO, 0, mmCGTT_ROM_CLK_CTRL0, data); +} + +static void smuio_v11_0_get_clock_gating_state(struct amdgpu_device *adev, u32 *flags) +{ + u32 data; + + /* CGTT_ROM_CLK_CTRL0 is not available for APU */ + if (adev->flags & AMD_IS_APU) + return; + + data = RREG32_SOC15(SMUIO, 0, mmCGTT_ROM_CLK_CTRL0); + if (!(data & CGTT_ROM_CLK_CTRL0__SOFT_OVERRIDE0_MASK)) + *flags |= AMD_CG_SUPPORT_ROM_MGCG; +} + +const struct amdgpu_smuio_funcs smuio_v11_0_funcs = { + .get_rom_index_offset = smuio_v11_0_get_rom_index_offset, + .get_rom_data_offset = smuio_v11_0_get_rom_data_offset, + .update_rom_clock_gating = smuio_v11_0_update_rom_clock_gating, + .get_clock_gating_state = smuio_v11_0_get_clock_gating_state, +}; diff --git a/drivers/gpu/drm/amd/amdgpu/smuio_v11_0.h b/drivers/gpu/drm/amd/amdgpu/smuio_v11_0.h new file mode 100644 index 000000000000..43c4262f2b8b --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/smuio_v11_0.h @@ -0,0 +1,30 @@ +/* + * Copyright 2020 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ +#ifndef __SMUIO_V11_0_H__ +#define __SMUIO_V11_0_H__ + +#include "soc15_common.h" + +extern const struct amdgpu_smuio_funcs smuio_v11_0_funcs; + +#endif /* __SMUIO_V11_0_H__ */ diff --git a/drivers/gpu/drm/amd/amdgpu/smuio_v9_0.c b/drivers/gpu/drm/amd/amdgpu/smuio_v9_0.c new file mode 100644 index 000000000000..8417890af227 --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/smuio_v9_0.c @@ -0,0 +1,77 @@ +/* + * Copyright 2020 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ +#include "amdgpu.h" +#include "smuio_v9_0.h" +#include "smuio/smuio_9_0_offset.h" +#include "smuio/smuio_9_0_sh_mask.h" + +static u32 smuio_v9_0_get_rom_index_offset(struct amdgpu_device *adev) +{ + return SOC15_REG_OFFSET(SMUIO, 0, mmROM_INDEX); +} + +static u32 smuio_v9_0_get_rom_data_offset(struct amdgpu_device *adev) +{ + return SOC15_REG_OFFSET(SMUIO, 0, mmROM_DATA); +} + +static void smuio_v9_0_update_rom_clock_gating(struct amdgpu_device *adev, bool enable) +{ + u32 def, data; + + /* enable/disable ROM CG is not supported on APU */ + if (adev->flags & AMD_IS_APU) + return; + + def = data = RREG32_SOC15(SMUIO, 0, mmCGTT_ROM_CLK_CTRL0); + + if (enable && (adev->cg_flags & AMD_CG_SUPPORT_ROM_MGCG)) + data &= ~(CGTT_ROM_CLK_CTRL0__SOFT_OVERRIDE0_MASK | + CGTT_ROM_CLK_CTRL0__SOFT_OVERRIDE1_MASK); + else + data |= CGTT_ROM_CLK_CTRL0__SOFT_OVERRIDE0_MASK | + CGTT_ROM_CLK_CTRL0__SOFT_OVERRIDE1_MASK; + + if (def != data) + WREG32_SOC15(SMUIO, 0, mmCGTT_ROM_CLK_CTRL0, data); +} + +static void smuio_v9_0_get_clock_gating_state(struct amdgpu_device *adev, u32 *flags) +{ + u32 data; + + /* CGTT_ROM_CLK_CTRL0 is not availabe for APUs */ + if (adev->flags & AMD_IS_APU) + return; + + data = RREG32_SOC15(SMUIO, 0, mmCGTT_ROM_CLK_CTRL0); + if (!(data & CGTT_ROM_CLK_CTRL0__SOFT_OVERRIDE0_MASK)) + *flags |= AMD_CG_SUPPORT_ROM_MGCG; +} + +const struct amdgpu_smuio_funcs smuio_v9_0_funcs = { + .get_rom_index_offset = smuio_v9_0_get_rom_index_offset, + .get_rom_data_offset = smuio_v9_0_get_rom_data_offset, + .update_rom_clock_gating = smuio_v9_0_update_rom_clock_gating, + .get_clock_gating_state = smuio_v9_0_get_clock_gating_state, +}; diff --git a/drivers/gpu/drm/amd/amdgpu/smuio_v9_0.h b/drivers/gpu/drm/amd/amdgpu/smuio_v9_0.h new file mode 100644 index 000000000000..fc265ce9837d --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/smuio_v9_0.h @@ -0,0 +1,30 @@ +/* + * Copyright 2020 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ +#ifndef __SMUIO_V9_0_H__ +#define __SMUIO_V9_0_H__ + +#include "soc15_common.h" + +extern const struct amdgpu_smuio_funcs smuio_v9_0_funcs; + +#endif /* __SMUIO_V9_0_H__ */ diff --git a/drivers/gpu/drm/amd/amdgpu/soc15.c b/drivers/gpu/drm/amd/amdgpu/soc15.c index f57c5f57efa8..8a23636ecc27 100644 --- a/drivers/gpu/drm/amd/amdgpu/soc15.c +++ b/drivers/gpu/drm/amd/amdgpu/soc15.c @@ -42,8 +42,6 @@ #include "sdma1/sdma1_4_0_offset.h" #include "hdp/hdp_4_0_offset.h" #include "hdp/hdp_4_0_sh_mask.h" -#include "smuio/smuio_9_0_offset.h" -#include "smuio/smuio_9_0_sh_mask.h" #include "nbio/nbio_7_0_default.h" #include "nbio/nbio_7_0_offset.h" #include "nbio/nbio_7_0_sh_mask.h" @@ -62,6 +60,7 @@ #include "nbio_v7_0.h" #include "nbio_v7_4.h" #include "vega10_ih.h" +#include "navi10_ih.h" #include "sdma_v4_0.h" #include "uvd_v7_0.h" #include "vce_v4_0.h" @@ -70,6 +69,8 @@ #include "jpeg_v2_0.h" #include "vcn_v2_5.h" #include "jpeg_v2_5.h" +#include "smuio_v9_0.h" +#include "smuio_v11_0.h" #include "dce_virtual.h" #include "mxgpu_ai.h" #include "amdgpu_smu.h" @@ -90,12 +91,6 @@ #define HDP_MEM_POWER_CTRL__RC_MEM_POWER_LS_EN_MASK 0x00020000L #define mmHDP_MEM_POWER_CTRL_BASE_IDX 0 -/* for Vega20/arcturus regiter offset change */ -#define mmROM_INDEX_VG20 0x00e4 -#define mmROM_INDEX_VG20_BASE_IDX 0 -#define mmROM_DATA_VG20 0x00e5 -#define mmROM_DATA_VG20_BASE_IDX 0 - /* * Indirect registers accessor */ @@ -295,17 +290,10 @@ static bool soc15_read_bios_from_rom(struct amdgpu_device *adev, dw_ptr = (u32 *)bios; length_dw = ALIGN(length_bytes, 4) / 4; - switch (adev->asic_type) { - case CHIP_VEGA20: - case CHIP_ARCTURUS: - rom_index_offset = SOC15_REG_OFFSET(SMUIO, 0, mmROM_INDEX_VG20); - rom_data_offset = SOC15_REG_OFFSET(SMUIO, 0, mmROM_DATA_VG20); - break; - default: - rom_index_offset = SOC15_REG_OFFSET(SMUIO, 0, mmROM_INDEX); - rom_data_offset = SOC15_REG_OFFSET(SMUIO, 0, mmROM_DATA); - break; - } + rom_index_offset = + adev->smuio.funcs->get_rom_index_offset(adev); + rom_data_offset = + adev->smuio.funcs->get_rom_data_offset(adev); /* set rom index to 0 */ WREG32(rom_index_offset, 0); @@ -717,6 +705,12 @@ int soc15_set_ip_blocks(struct amdgpu_device *adev) else adev->df.funcs = &df_v1_7_funcs; + if (adev->asic_type == CHIP_VEGA20 || + adev->asic_type == CHIP_ARCTURUS) + adev->smuio.funcs = &smuio_v11_0_funcs; + else + adev->smuio.funcs = &smuio_v9_0_funcs; + adev->rev_id = soc15_get_rev_id(adev); switch (adev->asic_type) { @@ -734,9 +728,15 @@ int soc15_set_ip_blocks(struct amdgpu_device *adev) else amdgpu_device_ip_block_add(adev, &psp_v3_1_ip_block); } - amdgpu_device_ip_block_add(adev, &vega10_ih_ip_block); + if (adev->asic_type == CHIP_VEGA20) + amdgpu_device_ip_block_add(adev, &navi10_ih_ip_block); + else + amdgpu_device_ip_block_add(adev, &vega10_ih_ip_block); } else { - amdgpu_device_ip_block_add(adev, &vega10_ih_ip_block); + if (adev->asic_type == CHIP_VEGA20) + amdgpu_device_ip_block_add(adev, &navi10_ih_ip_block); + else + amdgpu_device_ip_block_add(adev, &vega10_ih_ip_block); if (likely(adev->firmware.load_type == AMDGPU_FW_LOAD_PSP)) { if (adev->asic_type == CHIP_VEGA20) amdgpu_device_ip_block_add(adev, &psp_v11_0_ip_block); @@ -787,9 +787,9 @@ int soc15_set_ip_blocks(struct amdgpu_device *adev) if (amdgpu_sriov_vf(adev)) { if (likely(adev->firmware.load_type == AMDGPU_FW_LOAD_PSP)) amdgpu_device_ip_block_add(adev, &psp_v11_0_ip_block); - amdgpu_device_ip_block_add(adev, &vega10_ih_ip_block); + amdgpu_device_ip_block_add(adev, &navi10_ih_ip_block); } else { - amdgpu_device_ip_block_add(adev, &vega10_ih_ip_block); + amdgpu_device_ip_block_add(adev, &navi10_ih_ip_block); if (likely(adev->firmware.load_type == AMDGPU_FW_LOAD_PSP)) amdgpu_device_ip_block_add(adev, &psp_v11_0_ip_block); } @@ -822,7 +822,7 @@ int soc15_set_ip_blocks(struct amdgpu_device *adev) amdgpu_device_ip_block_add(adev, &dce_virtual_ip_block); #if defined(CONFIG_DRM_AMD_DC) else if (amdgpu_device_has_dc_support(adev)) - amdgpu_device_ip_block_add(adev, &dm_ip_block); + amdgpu_device_ip_block_add(adev, &dm_ip_block); #endif amdgpu_device_ip_block_add(adev, &vcn_v2_0_ip_block); amdgpu_device_ip_block_add(adev, &jpeg_v2_0_ip_block); @@ -1169,7 +1169,6 @@ static int soc15_common_early_init(void *handle) AMD_CG_SUPPORT_GFX_CGLS | AMD_CG_SUPPORT_BIF_LS | AMD_CG_SUPPORT_HDP_LS | - AMD_CG_SUPPORT_ROM_MGCG | AMD_CG_SUPPORT_MC_MGCG | AMD_CG_SUPPORT_MC_LS | AMD_CG_SUPPORT_SDMA_MGCG | @@ -1187,7 +1186,6 @@ static int soc15_common_early_init(void *handle) AMD_CG_SUPPORT_GFX_CGLS | AMD_CG_SUPPORT_BIF_LS | AMD_CG_SUPPORT_HDP_LS | - AMD_CG_SUPPORT_ROM_MGCG | AMD_CG_SUPPORT_MC_MGCG | AMD_CG_SUPPORT_MC_LS | AMD_CG_SUPPORT_SDMA_MGCG | @@ -1211,7 +1209,6 @@ static int soc15_common_early_init(void *handle) AMD_CG_SUPPORT_HDP_LS | AMD_CG_SUPPORT_DRM_MGCG | AMD_CG_SUPPORT_DRM_LS | - AMD_CG_SUPPORT_ROM_MGCG | AMD_CG_SUPPORT_MC_MGCG | AMD_CG_SUPPORT_MC_LS | AMD_CG_SUPPORT_SDMA_MGCG | @@ -1264,7 +1261,6 @@ static int soc15_common_early_init(void *handle) AMD_CG_SUPPORT_SDMA_LS | AMD_CG_SUPPORT_BIF_LS | AMD_CG_SUPPORT_HDP_LS | - AMD_CG_SUPPORT_ROM_MGCG | AMD_CG_SUPPORT_VCN_MGCG | AMD_CG_SUPPORT_JPEG_MGCG | AMD_CG_SUPPORT_IH_CG | @@ -1504,24 +1500,6 @@ static void soc15_update_drm_light_sleep(struct amdgpu_device *adev, bool enable WREG32(SOC15_REG_OFFSET(MP0, 0, mmMP0_MISC_LIGHT_SLEEP_CTRL), data); } -static void soc15_update_rom_medium_grain_clock_gating(struct amdgpu_device *adev, - bool enable) -{ - uint32_t def, data; - - def = data = RREG32(SOC15_REG_OFFSET(SMUIO, 0, mmCGTT_ROM_CLK_CTRL0)); - - if (enable && (adev->cg_flags & AMD_CG_SUPPORT_ROM_MGCG)) - data &= ~(CGTT_ROM_CLK_CTRL0__SOFT_OVERRIDE0_MASK | - CGTT_ROM_CLK_CTRL0__SOFT_OVERRIDE1_MASK); - else - data |= CGTT_ROM_CLK_CTRL0__SOFT_OVERRIDE0_MASK | - CGTT_ROM_CLK_CTRL0__SOFT_OVERRIDE1_MASK; - - if (def != data) - WREG32(SOC15_REG_OFFSET(SMUIO, 0, mmCGTT_ROM_CLK_CTRL0), data); -} - static int soc15_common_set_clockgating_state(void *handle, enum amd_clockgating_state state) { @@ -1544,7 +1522,7 @@ static int soc15_common_set_clockgating_state(void *handle, state == AMD_CG_STATE_GATE); soc15_update_drm_light_sleep(adev, state == AMD_CG_STATE_GATE); - soc15_update_rom_medium_grain_clock_gating(adev, + adev->smuio.funcs->update_rom_clock_gating(adev, state == AMD_CG_STATE_GATE); adev->df.funcs->update_medium_grain_clock_gating(adev, state == AMD_CG_STATE_GATE); @@ -1561,8 +1539,6 @@ static int soc15_common_set_clockgating_state(void *handle, state == AMD_CG_STATE_GATE); soc15_update_drm_light_sleep(adev, state == AMD_CG_STATE_GATE); - soc15_update_rom_medium_grain_clock_gating(adev, - state == AMD_CG_STATE_GATE); break; case CHIP_ARCTURUS: soc15_update_hdp_light_sleep(adev, @@ -1600,9 +1576,7 @@ static void soc15_common_get_clockgating_state(void *handle, u32 *flags) *flags |= AMD_CG_SUPPORT_DRM_LS; /* AMD_CG_SUPPORT_ROM_MGCG */ - data = RREG32(SOC15_REG_OFFSET(SMUIO, 0, mmCGTT_ROM_CLK_CTRL0)); - if (!(data & CGTT_ROM_CLK_CTRL0__SOFT_OVERRIDE0_MASK)) - *flags |= AMD_CG_SUPPORT_ROM_MGCG; + adev->smuio.funcs->get_clock_gating_state(adev, flags); adev->df.funcs->get_clockgating_state(adev, flags); } diff --git a/drivers/gpu/drm/amd/amdgpu/tonga_ih.c b/drivers/gpu/drm/amd/amdgpu/tonga_ih.c index e40140bf6699..ce3319993b4b 100644 --- a/drivers/gpu/drm/amd/amdgpu/tonga_ih.c +++ b/drivers/gpu/drm/amd/amdgpu/tonga_ih.c @@ -181,6 +181,7 @@ static void tonga_ih_irq_disable(struct amdgpu_device *adev) * tonga_ih_get_wptr - get the IH ring buffer wptr * * @adev: amdgpu_device pointer + * @ih: IH ring buffer to fetch wptr * * Get the IH ring buffer wptr from either the register * or the writeback memory buffer (VI). Also check for @@ -215,6 +216,8 @@ static u32 tonga_ih_get_wptr(struct amdgpu_device *adev, * tonga_ih_decode_iv - decode an interrupt vector * * @adev: amdgpu_device pointer + * @ih: IH ring buffer to decode + * @entry: IV entry to place decoded information into * * Decodes the interrupt vector at the current rptr * position and also advance the position. @@ -247,6 +250,7 @@ static void tonga_ih_decode_iv(struct amdgpu_device *adev, * tonga_ih_set_rptr - set the IH ring buffer rptr * * @adev: amdgpu_device pointer + * @ih: IH ring buffer to set rptr * * Set the IH ring buffer rptr. */ diff --git a/drivers/gpu/drm/amd/amdgpu/umc_v6_1.c b/drivers/gpu/drm/amd/amdgpu/umc_v6_1.c index 5288617ca552..96d7769609f4 100644 --- a/drivers/gpu/drm/amd/amdgpu/umc_v6_1.c +++ b/drivers/gpu/drm/amd/amdgpu/umc_v6_1.c @@ -253,7 +253,7 @@ static void umc_v6_1_querry_uncorrectable_error_count(struct amdgpu_device *adev static void umc_v6_1_query_ras_error_count(struct amdgpu_device *adev, void *ras_error_status) { - struct ras_err_data* err_data = (struct ras_err_data*)ras_error_status; + struct ras_err_data *err_data = (struct ras_err_data *)ras_error_status; uint32_t umc_inst = 0; uint32_t ch_inst = 0; @@ -368,7 +368,7 @@ static void umc_v6_1_query_error_address(struct amdgpu_device *adev, static void umc_v6_1_query_ras_error_address(struct amdgpu_device *adev, void *ras_error_status) { - struct ras_err_data* err_data = (struct ras_err_data*)ras_error_status; + struct ras_err_data *err_data = (struct ras_err_data *)ras_error_status; uint32_t umc_inst = 0; uint32_t ch_inst = 0; diff --git a/drivers/gpu/drm/amd/amdgpu/umc_v8_7.c b/drivers/gpu/drm/amd/amdgpu/umc_v8_7.c index 5665c77a9d58..a064c097690c 100644 --- a/drivers/gpu/drm/amd/amdgpu/umc_v8_7.c +++ b/drivers/gpu/drm/amd/amdgpu/umc_v8_7.c @@ -170,7 +170,7 @@ static void umc_v8_7_querry_uncorrectable_error_count(struct amdgpu_device *adev static void umc_v8_7_query_ras_error_count(struct amdgpu_device *adev, void *ras_error_status) { - struct ras_err_data* err_data = (struct ras_err_data*)ras_error_status; + struct ras_err_data *err_data = (struct ras_err_data *)ras_error_status; uint32_t umc_inst = 0; uint32_t ch_inst = 0; @@ -260,7 +260,7 @@ static void umc_v8_7_query_error_address(struct amdgpu_device *adev, static void umc_v8_7_query_ras_error_address(struct amdgpu_device *adev, void *ras_error_status) { - struct ras_err_data* err_data = (struct ras_err_data*)ras_error_status; + struct ras_err_data *err_data = (struct ras_err_data *)ras_error_status; uint32_t umc_inst = 0; uint32_t ch_inst = 0; diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v3_1.c b/drivers/gpu/drm/amd/amdgpu/uvd_v3_1.c index 41800fcad410..10ecae257b18 100644 --- a/drivers/gpu/drm/amd/amdgpu/uvd_v3_1.c +++ b/drivers/gpu/drm/amd/amdgpu/uvd_v3_1.c @@ -80,7 +80,9 @@ static void uvd_v3_1_ring_set_wptr(struct amdgpu_ring *ring) * uvd_v3_1_ring_emit_ib - execute indirect buffer * * @ring: amdgpu_ring pointer + * @job: iob associated with the indirect buffer * @ib: indirect buffer to execute + * @flags: flags associated with the indirect buffer * * Write ring commands to execute the indirect buffer */ @@ -99,7 +101,9 @@ static void uvd_v3_1_ring_emit_ib(struct amdgpu_ring *ring, * uvd_v3_1_ring_emit_fence - emit an fence & trap command * * @ring: amdgpu_ring pointer - * @fence: fence to emit + * @addr: address + * @seq: sequence number + * @flags: fence related flags * * Write a fence and a trap command to the ring. */ @@ -619,7 +623,7 @@ static void uvd_v3_1_enable_mgcg(struct amdgpu_device *adev, /** * uvd_v3_1_hw_init - start and test UVD block * - * @adev: amdgpu_device pointer + * @handle: handle used to pass amdgpu_device pointer * * Initialize the hardware, boot up the VCPU and do some testing */ @@ -686,7 +690,7 @@ done: /** * uvd_v3_1_hw_fini - stop the hardware block * - * @adev: amdgpu_device pointer + * @handle: handle used to pass amdgpu_device pointer * * Stop the UVD block, mark ring as not ready any more */ diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v4_2.c b/drivers/gpu/drm/amd/amdgpu/uvd_v4_2.c index b0c0c438fc93..a70d2a0de316 100644 --- a/drivers/gpu/drm/amd/amdgpu/uvd_v4_2.c +++ b/drivers/gpu/drm/amd/amdgpu/uvd_v4_2.c @@ -149,7 +149,7 @@ static void uvd_v4_2_enable_mgcg(struct amdgpu_device *adev, /** * uvd_v4_2_hw_init - start and test UVD block * - * @adev: amdgpu_device pointer + * @handle: handle used to pass amdgpu_device pointer * * Initialize the hardware, boot up the VCPU and do some testing */ @@ -204,7 +204,7 @@ done: /** * uvd_v4_2_hw_fini - stop the hardware block * - * @adev: amdgpu_device pointer + * @handle: handle used to pass amdgpu_device pointer * * Stop the UVD block, mark ring as not ready any more */ @@ -437,7 +437,9 @@ static void uvd_v4_2_stop(struct amdgpu_device *adev) * uvd_v4_2_ring_emit_fence - emit an fence & trap command * * @ring: amdgpu_ring pointer - * @fence: fence to emit + * @addr: address + * @seq: sequence number + * @flags: fence related flags * * Write a fence and a trap command to the ring. */ @@ -502,7 +504,9 @@ static int uvd_v4_2_ring_test_ring(struct amdgpu_ring *ring) * uvd_v4_2_ring_emit_ib - execute indirect buffer * * @ring: amdgpu_ring pointer + * @job: iob associated with the indirect buffer * @ib: indirect buffer to execute + * @flags: flags associated with the indirect buffer * * Write ring commands to execute the indirect buffer */ diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v5_0.c b/drivers/gpu/drm/amd/amdgpu/uvd_v5_0.c index 6e57001f6d0a..f3b0a927101b 100644 --- a/drivers/gpu/drm/amd/amdgpu/uvd_v5_0.c +++ b/drivers/gpu/drm/amd/amdgpu/uvd_v5_0.c @@ -145,7 +145,7 @@ static int uvd_v5_0_sw_fini(void *handle) /** * uvd_v5_0_hw_init - start and test UVD block * - * @adev: amdgpu_device pointer + * @handle: handle used to pass amdgpu_device pointer * * Initialize the hardware, boot up the VCPU and do some testing */ @@ -202,7 +202,7 @@ done: /** * uvd_v5_0_hw_fini - stop the hardware block * - * @adev: amdgpu_device pointer + * @handle: handle used to pass amdgpu_device pointer * * Stop the UVD block, mark ring as not ready any more */ @@ -454,7 +454,9 @@ static void uvd_v5_0_stop(struct amdgpu_device *adev) * uvd_v5_0_ring_emit_fence - emit an fence & trap command * * @ring: amdgpu_ring pointer - * @fence: fence to emit + * @addr: address + * @seq: sequence number + * @flags: fence related flags * * Write a fence and a trap command to the ring. */ @@ -518,7 +520,9 @@ static int uvd_v5_0_ring_test_ring(struct amdgpu_ring *ring) * uvd_v5_0_ring_emit_ib - execute indirect buffer * * @ring: amdgpu_ring pointer + * @job: job to retrieve vmid from * @ib: indirect buffer to execute + * @flags: unused * * Write ring commands to execute the indirect buffer */ diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c b/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c index 666bfa4a0b8e..760859880c1e 100644 --- a/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c +++ b/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c @@ -198,9 +198,9 @@ static int uvd_v6_0_enc_ring_test_ring(struct amdgpu_ring *ring) /** * uvd_v6_0_enc_get_create_msg - generate a UVD ENC create msg * - * @adev: amdgpu_device pointer * @ring: ring we should submit the msg to * @handle: session handle to use + * @bo: amdgpu object for which we query the offset * @fence: optional fence to return * * Open up a stream for HW test @@ -261,9 +261,9 @@ err: /** * uvd_v6_0_enc_get_destroy_msg - generate a UVD ENC destroy msg * - * @adev: amdgpu_device pointer * @ring: ring we should submit the msg to * @handle: session handle to use + * @bo: amdgpu object for which we query the offset * @fence: optional fence to return * * Close up a stream for HW test or if userspace failed to do so @@ -326,6 +326,7 @@ err: * uvd_v6_0_enc_ring_test_ib - test if UVD ENC IBs are working * * @ring: the engine to test on + * @timeout: timeout value in jiffies, or MAX_SCHEDULE_TIMEOUT * */ static int uvd_v6_0_enc_ring_test_ib(struct amdgpu_ring *ring, long timeout) @@ -464,7 +465,7 @@ static int uvd_v6_0_sw_fini(void *handle) /** * uvd_v6_0_hw_init - start and test UVD block * - * @adev: amdgpu_device pointer + * @handle: handle used to pass amdgpu_device pointer * * Initialize the hardware, boot up the VCPU and do some testing */ @@ -533,7 +534,7 @@ done: /** * uvd_v6_0_hw_fini - stop the hardware block * - * @adev: amdgpu_device pointer + * @handle: handle used to pass amdgpu_device pointer * * Stop the UVD block, mark ring as not ready any more */ @@ -891,7 +892,9 @@ static void uvd_v6_0_stop(struct amdgpu_device *adev) * uvd_v6_0_ring_emit_fence - emit an fence & trap command * * @ring: amdgpu_ring pointer - * @fence: fence to emit + * @addr: address + * @seq: sequence number + * @flags: fence related flags * * Write a fence and a trap command to the ring. */ @@ -921,7 +924,9 @@ static void uvd_v6_0_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u64 seq * uvd_v6_0_enc_ring_emit_fence - emit an enc fence & trap command * * @ring: amdgpu_ring pointer - * @fence: fence to emit + * @addr: address + * @seq: sequence number + * @flags: fence related flags * * Write enc a fence and a trap command to the ring. */ @@ -986,7 +991,9 @@ static int uvd_v6_0_ring_test_ring(struct amdgpu_ring *ring) * uvd_v6_0_ring_emit_ib - execute indirect buffer * * @ring: amdgpu_ring pointer + * @job: job to retrieve vmid from * @ib: indirect buffer to execute + * @flags: unused * * Write ring commands to execute the indirect buffer */ @@ -1012,7 +1019,9 @@ static void uvd_v6_0_ring_emit_ib(struct amdgpu_ring *ring, * uvd_v6_0_enc_ring_emit_ib - enc execute indirect buffer * * @ring: amdgpu_ring pointer + * @job: job to retrive vmid from * @ib: indirect buffer to execute + * @flags: unused * * Write enc ring commands to execute the indirect buffer */ diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c b/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c index b44c8677ce8d..312ecf6d24a0 100644 --- a/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c +++ b/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c @@ -206,9 +206,9 @@ static int uvd_v7_0_enc_ring_test_ring(struct amdgpu_ring *ring) /** * uvd_v7_0_enc_get_create_msg - generate a UVD ENC create msg * - * @adev: amdgpu_device pointer * @ring: ring we should submit the msg to * @handle: session handle to use + * @bo: amdgpu object for which we query the offset * @fence: optional fence to return * * Open up a stream for HW test @@ -269,9 +269,9 @@ err: /** * uvd_v7_0_enc_get_destroy_msg - generate a UVD ENC destroy msg * - * @adev: amdgpu_device pointer * @ring: ring we should submit the msg to * @handle: session handle to use + * @bo: amdgpu object for which we query the offset * @fence: optional fence to return * * Close up a stream for HW test or if userspace failed to do so @@ -333,6 +333,7 @@ err: * uvd_v7_0_enc_ring_test_ib - test if UVD ENC IBs are working * * @ring: the engine to test on + * @timeout: timeout value in jiffies, or MAX_SCHEDULE_TIMEOUT * */ static int uvd_v7_0_enc_ring_test_ib(struct amdgpu_ring *ring, long timeout) @@ -519,7 +520,7 @@ static int uvd_v7_0_sw_fini(void *handle) /** * uvd_v7_0_hw_init - start and test UVD block * - * @adev: amdgpu_device pointer + * @handle: handle used to pass amdgpu_device pointer * * Initialize the hardware, boot up the VCPU and do some testing */ @@ -597,7 +598,7 @@ done: /** * uvd_v7_0_hw_fini - stop the hardware block * - * @adev: amdgpu_device pointer + * @handle: handle used to pass amdgpu_device pointer * * Stop the UVD block, mark ring as not ready any more */ @@ -1147,7 +1148,9 @@ static void uvd_v7_0_stop(struct amdgpu_device *adev) * uvd_v7_0_ring_emit_fence - emit an fence & trap command * * @ring: amdgpu_ring pointer - * @fence: fence to emit + * @addr: address + * @seq: sequence number + * @flags: fence related flags * * Write a fence and a trap command to the ring. */ @@ -1186,7 +1189,9 @@ static void uvd_v7_0_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u64 seq * uvd_v7_0_enc_ring_emit_fence - emit an enc fence & trap command * * @ring: amdgpu_ring pointer - * @fence: fence to emit + * @addr: address + * @seq: sequence number + * @flags: fence related flags * * Write enc a fence and a trap command to the ring. */ @@ -1282,7 +1287,9 @@ static int uvd_v7_0_ring_patch_cs_in_place(struct amdgpu_cs_parser *p, * uvd_v7_0_ring_emit_ib - execute indirect buffer * * @ring: amdgpu_ring pointer + * @job: job to retrieve vmid from * @ib: indirect buffer to execute + * @flags: unused * * Write ring commands to execute the indirect buffer */ @@ -1313,7 +1320,9 @@ static void uvd_v7_0_ring_emit_ib(struct amdgpu_ring *ring, * uvd_v7_0_enc_ring_emit_ib - enc execute indirect buffer * * @ring: amdgpu_ring pointer + * @job: job to retrive vmid from * @ib: indirect buffer to execute + * @flags: unused * * Write enc ring commands to execute the indirect buffer */ diff --git a/drivers/gpu/drm/amd/amdgpu/vangogh_reg_init.c b/drivers/gpu/drm/amd/amdgpu/vangogh_reg_init.c new file mode 100644 index 000000000000..d64d681a05dc --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/vangogh_reg_init.c @@ -0,0 +1,50 @@ +/* + * Copyright 2019 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ +#include "amdgpu.h" +#include "nv.h" + +#include "soc15_common.h" +#include "soc15_hw_ip.h" +#include "vangogh_ip_offset.h" + +void vangogh_reg_base_init(struct amdgpu_device *adev) +{ + /* HW has more IP blocks, only initialized the blocke needed by driver */ + uint32_t i; + for (i = 0 ; i < MAX_INSTANCE ; ++i) { + adev->reg_offset[GC_HWIP][i] = (uint32_t *)(&(GC_BASE.instance[i])); + adev->reg_offset[HDP_HWIP][i] = (uint32_t *)(&(HDP_BASE.instance[i])); + adev->reg_offset[MMHUB_HWIP][i] = (uint32_t *)(&(MMHUB_BASE.instance[i])); + adev->reg_offset[ATHUB_HWIP][i] = (uint32_t *)(&(ATHUB_BASE.instance[i])); + adev->reg_offset[NBIO_HWIP][i] = (uint32_t *)(&(NBIO_BASE.instance[i])); + adev->reg_offset[MP0_HWIP][i] = (uint32_t *)(&(MP0_BASE.instance[i])); + adev->reg_offset[MP1_HWIP][i] = (uint32_t *)(&(MP1_BASE.instance[i])); + adev->reg_offset[VCN_HWIP][i] = (uint32_t *)(&(VCN_BASE.instance[i])); + adev->reg_offset[DF_HWIP][i] = (uint32_t *)(&(DF_BASE.instance[i])); + adev->reg_offset[DCE_HWIP][i] = (uint32_t *)(&(DCN_BASE.instance[i])); + adev->reg_offset[OSSSYS_HWIP][i] = (uint32_t *)(&(OSSSYS_BASE.instance[i])); + adev->reg_offset[SDMA0_HWIP][i] = (uint32_t *)(&(GC_BASE.instance[i])); + adev->reg_offset[SMUIO_HWIP][i] = (uint32_t *)(&(SMUIO_BASE.instance[i])); + adev->reg_offset[THM_HWIP][i] = (uint32_t *)(&(THM_BASE.instance[i])); + } +} diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c b/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c index 86e1ef732ebe..c734e31a9e65 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c @@ -431,7 +431,6 @@ static void vcn_v1_0_mc_resume_dpg_mode(struct amdgpu_device *adev) * vcn_v1_0_disable_clock_gating - disable VCN clock gating * * @adev: amdgpu_device pointer - * @sw: enable SW clock gating * * Disable clock gating for VCN block */ @@ -558,7 +557,6 @@ static void vcn_v1_0_disable_clock_gating(struct amdgpu_device *adev) * vcn_v1_0_enable_clock_gating - enable VCN clock gating * * @adev: amdgpu_device pointer - * @sw: enable SW clock gating * * Enable clock gating for VCN block */ @@ -1445,7 +1443,9 @@ static void vcn_v1_0_dec_ring_insert_end(struct amdgpu_ring *ring) * vcn_v1_0_dec_ring_emit_fence - emit an fence & trap command * * @ring: amdgpu_ring pointer - * @fence: fence to emit + * @addr: address + * @seq: sequence number + * @flags: fence related flags * * Write a fence and a trap command to the ring. */ @@ -1484,7 +1484,9 @@ static void vcn_v1_0_dec_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u64 * vcn_v1_0_dec_ring_emit_ib - execute indirect buffer * * @ring: amdgpu_ring pointer + * @job: job to retrieve vmid from * @ib: indirect buffer to execute + * @flags: unused * * Write ring commands to execute the indirect buffer */ @@ -1619,7 +1621,9 @@ static void vcn_v1_0_enc_ring_set_wptr(struct amdgpu_ring *ring) * vcn_v1_0_enc_ring_emit_fence - emit an enc fence & trap command * * @ring: amdgpu_ring pointer - * @fence: fence to emit + * @addr: address + * @seq: sequence number + * @flags: fence related flags * * Write enc a fence and a trap command to the ring. */ @@ -1644,7 +1648,9 @@ static void vcn_v1_0_enc_ring_insert_end(struct amdgpu_ring *ring) * vcn_v1_0_enc_ring_emit_ib - enc execute indirect buffer * * @ring: amdgpu_ring pointer + * @job: job to retrive vmid from * @ib: indirect buffer to execute + * @flags: unused * * Write enc ring commands to execute the indirect buffer */ diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v2_0.c b/drivers/gpu/drm/amd/amdgpu/vcn_v2_0.c index e5d29dee0c88..d63198c945bf 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v2_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v2_0.c @@ -45,7 +45,7 @@ #define mmUVD_SCRATCH9_INTERNAL_OFFSET 0xc01d #define mmUVD_LMI_RBC_IB_VMID_INTERNAL_OFFSET 0x1e1 -#define mmUVD_LMI_RBC_IB_64BIT_BAR_HIGH_INTERNAL_OFFSET 0x5a6 +#define mmUVD_LMI_RBC_IB_64BIT_BAR_HIGH_INTERNAL_OFFSET 0x5a6 #define mmUVD_LMI_RBC_IB_64BIT_BAR_LOW_INTERNAL_OFFSET 0x5a7 #define mmUVD_RBC_IB_SIZE_INTERNAL_OFFSET 0x1e2 @@ -475,7 +475,6 @@ static void vcn_v2_0_mc_resume_dpg_mode(struct amdgpu_device *adev, bool indirec * vcn_v2_0_disable_clock_gating - disable VCN clock gating * * @adev: amdgpu_device pointer - * @sw: enable SW clock gating * * Disable clock gating for VCN block */ @@ -636,7 +635,6 @@ static void vcn_v2_0_clock_gating_dpg_mode(struct amdgpu_device *adev, * vcn_v2_0_enable_clock_gating - enable VCN clock gating * * @adev: amdgpu_device pointer - * @sw: enable SW clock gating * * Enable clock gating for VCN block */ @@ -800,7 +798,7 @@ static int vcn_v2_0_start_dpg_mode(struct amdgpu_device *adev, bool indirect) WREG32_SOC15(UVD, 0, mmUVD_POWER_STATUS, tmp); if (indirect) - adev->vcn.inst->dpg_sram_curr_addr = (uint32_t*)adev->vcn.inst->dpg_sram_cpu_addr; + adev->vcn.inst->dpg_sram_curr_addr = (uint32_t *)adev->vcn.inst->dpg_sram_cpu_addr; /* enable clock gating */ vcn_v2_0_clock_gating_dpg_mode(adev, 0, indirect); @@ -1397,6 +1395,7 @@ void vcn_v2_0_dec_ring_insert_end(struct amdgpu_ring *ring) * vcn_v2_0_dec_ring_insert_nop - insert a nop command * * @ring: amdgpu_ring pointer + * @count: the number of NOP packets to insert * * Write a nop command to the ring. */ @@ -1417,7 +1416,9 @@ void vcn_v2_0_dec_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count) * vcn_v2_0_dec_ring_emit_fence - emit an fence & trap command * * @ring: amdgpu_ring pointer - * @fence: fence to emit + * @addr: address + * @seq: sequence number + * @flags: fence related flags * * Write a fence and a trap command to the ring. */ @@ -1454,7 +1455,9 @@ void vcn_v2_0_dec_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u64 seq, * vcn_v2_0_dec_ring_emit_ib - execute indirect buffer * * @ring: amdgpu_ring pointer + * @job: job to retrieve vmid from * @ib: indirect buffer to execute + * @flags: unused * * Write ring commands to execute the indirect buffer */ @@ -1600,7 +1603,9 @@ static void vcn_v2_0_enc_ring_set_wptr(struct amdgpu_ring *ring) * vcn_v2_0_enc_ring_emit_fence - emit an enc fence & trap command * * @ring: amdgpu_ring pointer - * @fence: fence to emit + * @addr: address + * @seq: sequence number + * @flags: fence related flags * * Write enc a fence and a trap command to the ring. */ @@ -1625,7 +1630,9 @@ void vcn_v2_0_enc_ring_insert_end(struct amdgpu_ring *ring) * vcn_v2_0_enc_ring_emit_ib - enc execute indirect buffer * * @ring: amdgpu_ring pointer + * @job: job to retrive vmid from * @ib: indirect buffer to execute + * @flags: unused * * Write enc ring commands to execute the indirect buffer */ diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c b/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c index 0f1d3ef8baa7..b6e0f4ba6272 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c @@ -45,7 +45,7 @@ #define mmUVD_LMI_RBC_IB_VMID_INTERNAL_OFFSET 0x431 #define mmUVD_LMI_RBC_IB_64BIT_BAR_LOW_INTERNAL_OFFSET 0x3b4 -#define mmUVD_LMI_RBC_IB_64BIT_BAR_HIGH_INTERNAL_OFFSET 0x3b5 +#define mmUVD_LMI_RBC_IB_64BIT_BAR_HIGH_INTERNAL_OFFSET 0x3b5 #define mmUVD_RBC_IB_SIZE_INTERNAL_OFFSET 0x25c #define VCN25_MAX_HW_INSTANCES_ARCTURUS 2 @@ -777,7 +777,7 @@ static int vcn_v2_5_start_dpg_mode(struct amdgpu_device *adev, int inst_idx, boo WREG32_SOC15(VCN, inst_idx, mmUVD_POWER_STATUS, tmp); if (indirect) - adev->vcn.inst[inst_idx].dpg_sram_curr_addr = (uint32_t*)adev->vcn.inst[inst_idx].dpg_sram_cpu_addr; + adev->vcn.inst[inst_idx].dpg_sram_curr_addr = (uint32_t *)adev->vcn.inst[inst_idx].dpg_sram_cpu_addr; /* enable clock gating */ vcn_v2_5_clock_gating_dpg_mode(adev, 0, inst_idx, indirect); diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c b/drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c index b5f8f3d731cb..def583916294 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c @@ -44,10 +44,11 @@ #define mmUVD_LMI_RBC_IB_VMID_INTERNAL_OFFSET 0x431 #define mmUVD_LMI_RBC_IB_64BIT_BAR_LOW_INTERNAL_OFFSET 0x3b4 -#define mmUVD_LMI_RBC_IB_64BIT_BAR_HIGH_INTERNAL_OFFSET 0x3b5 +#define mmUVD_LMI_RBC_IB_64BIT_BAR_HIGH_INTERNAL_OFFSET 0x3b5 #define mmUVD_RBC_IB_SIZE_INTERNAL_OFFSET 0x25c -#define VCN_INSTANCES_SIENNA_CICHLID 2 +#define VCN_INSTANCES_SIENNA_CICHLID 2 +#define DEC_SW_RING_ENABLED FALSE static int amdgpu_ih_clientid_vcns[] = { SOC15_IH_CLIENTID_VCN, @@ -55,8 +56,8 @@ static int amdgpu_ih_clientid_vcns[] = { }; static int amdgpu_ucode_id_vcns[] = { - AMDGPU_UCODE_ID_VCN, - AMDGPU_UCODE_ID_VCN1 + AMDGPU_UCODE_ID_VCN, + AMDGPU_UCODE_ID_VCN1 }; static int vcn_v3_0_start_sriov(struct amdgpu_device *adev); @@ -155,6 +156,13 @@ static int vcn_v3_0_sw_init(void *handle) if (r) return r; + /* + * Note: doorbell assignment is fixed for SRIOV multiple VCN engines + * Formula: + * vcn_db_base = adev->doorbell_index.vcn.vcn_ring0_1 << 1; + * dec_ring_i = vcn_db_base + i * (adev->vcn.num_enc_rings + 1) + * enc_ring_i,j = vcn_db_base + i * (adev->vcn.num_enc_rings + 1) + 1 + j + */ if (amdgpu_sriov_vf(adev)) { vcn_doorbell_index = adev->doorbell_index.vcn.vcn_ring0_1; /* get DWORD offset */ @@ -162,6 +170,7 @@ static int vcn_v3_0_sw_init(void *handle) } for (i = 0; i < adev->vcn.num_vcn_inst; i++) { + volatile struct amdgpu_fw_shared *fw_shared; if (adev->vcn.harvest_config & (1 << i)) continue; @@ -192,9 +201,7 @@ static int vcn_v3_0_sw_init(void *handle) ring = &adev->vcn.inst[i].ring_dec; ring->use_doorbell = true; if (amdgpu_sriov_vf(adev)) { - ring->doorbell_index = vcn_doorbell_index; - /* NOTE: increment so next VCN engine use next DOORBELL DWORD */ - vcn_doorbell_index++; + ring->doorbell_index = vcn_doorbell_index + i * (adev->vcn.num_enc_rings + 1); } else { ring->doorbell_index = (adev->doorbell_index.vcn.vcn_ring0_1 << 1) + 8 * i; } @@ -216,9 +223,7 @@ static int vcn_v3_0_sw_init(void *handle) ring = &adev->vcn.inst[i].ring_enc[j]; ring->use_doorbell = true; if (amdgpu_sriov_vf(adev)) { - ring->doorbell_index = vcn_doorbell_index; - /* NOTE: increment so next VCN engine use next DOORBELL DWORD */ - vcn_doorbell_index++; + ring->doorbell_index = vcn_doorbell_index + i * (adev->vcn.num_enc_rings + 1) + 1 + j; } else { ring->doorbell_index = (adev->doorbell_index.vcn.vcn_ring0_1 << 1) + 2 + j + 8 * i; } @@ -230,6 +235,11 @@ static int vcn_v3_0_sw_init(void *handle) if (r) return r; } + + fw_shared = adev->vcn.inst[i].fw_shared_cpu_addr; + fw_shared->present_flag_0 |= cpu_to_le32(AMDGPU_VCN_SW_RING_FLAG) | + cpu_to_le32(AMDGPU_VCN_MULTI_QUEUE_FLAG); + fw_shared->sw_ring.is_enabled = cpu_to_le32(DEC_SW_RING_ENABLED); } if (amdgpu_sriov_vf(adev)) { @@ -253,7 +263,17 @@ static int vcn_v3_0_sw_init(void *handle) static int vcn_v3_0_sw_fini(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; - int r; + int i, r; + + for (i = 0; i < adev->vcn.num_vcn_inst; i++) { + volatile struct amdgpu_fw_shared *fw_shared; + + if (adev->vcn.harvest_config & (1 << i)) + continue; + fw_shared = adev->vcn.inst[i].fw_shared_cpu_addr; + fw_shared->present_flag_0 = 0; + fw_shared->sw_ring.is_enabled = false; + } if (amdgpu_sriov_vf(adev)) amdgpu_virt_free_mm_table(adev); @@ -291,17 +311,19 @@ static int vcn_v3_0_hw_init(void *handle) continue; ring = &adev->vcn.inst[i].ring_dec; - ring->wptr = 0; - ring->wptr_old = 0; - vcn_v3_0_dec_ring_set_wptr(ring); - ring->sched.ready = true; + if (ring->sched.ready) { + ring->wptr = 0; + ring->wptr_old = 0; + vcn_v3_0_dec_ring_set_wptr(ring); + } for (j = 0; j < adev->vcn.num_enc_rings; ++j) { ring = &adev->vcn.inst[i].ring_enc[j]; - ring->wptr = 0; - ring->wptr_old = 0; - vcn_v3_0_enc_ring_set_wptr(ring); - ring->sched.ready = true; + if (ring->sched.ready) { + ring->wptr = 0; + ring->wptr_old = 0; + vcn_v3_0_enc_ring_set_wptr(ring); + } } } } else { @@ -461,6 +483,15 @@ static void vcn_v3_0_mc_resume(struct amdgpu_device *adev, int inst) upper_32_bits(adev->vcn.inst[inst].gpu_addr + offset + AMDGPU_VCN_STACK_SIZE)); WREG32_SOC15(VCN, inst, mmUVD_VCPU_CACHE_OFFSET2, 0); WREG32_SOC15(VCN, inst, mmUVD_VCPU_CACHE_SIZE2, AMDGPU_VCN_CONTEXT_SIZE); + + /* non-cache window */ + WREG32_SOC15(VCN, inst, mmUVD_LMI_VCPU_NC0_64BIT_BAR_LOW, + lower_32_bits(adev->vcn.inst[inst].fw_shared_gpu_addr)); + WREG32_SOC15(VCN, inst, mmUVD_LMI_VCPU_NC0_64BIT_BAR_HIGH, + upper_32_bits(adev->vcn.inst[inst].fw_shared_gpu_addr)); + WREG32_SOC15(VCN, inst, mmUVD_VCPU_NONCACHE_OFFSET0, 0); + WREG32_SOC15(VCN, inst, mmUVD_VCPU_NONCACHE_SIZE0, + AMDGPU_GPU_PAGE_ALIGN(sizeof(struct amdgpu_fw_shared))); } static void vcn_v3_0_mc_resume_dpg_mode(struct amdgpu_device *adev, int inst_idx, bool indirect) @@ -543,13 +574,16 @@ static void vcn_v3_0_mc_resume_dpg_mode(struct amdgpu_device *adev, int inst_idx /* non-cache window */ WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET( - VCN, inst_idx, mmUVD_LMI_VCPU_NC0_64BIT_BAR_LOW), 0, 0, indirect); + VCN, inst_idx, mmUVD_LMI_VCPU_NC0_64BIT_BAR_LOW), + lower_32_bits(adev->vcn.inst[inst_idx].fw_shared_gpu_addr), 0, indirect); WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET( - VCN, inst_idx, mmUVD_LMI_VCPU_NC0_64BIT_BAR_HIGH), 0, 0, indirect); + VCN, inst_idx, mmUVD_LMI_VCPU_NC0_64BIT_BAR_HIGH), + upper_32_bits(adev->vcn.inst[inst_idx].fw_shared_gpu_addr), 0, indirect); WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET( VCN, inst_idx, mmUVD_VCPU_NONCACHE_OFFSET0), 0, 0, indirect); WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET( - VCN, inst_idx, mmUVD_VCPU_NONCACHE_SIZE0), 0, 0, indirect); + VCN, inst_idx, mmUVD_VCPU_NONCACHE_SIZE0), + AMDGPU_GPU_PAGE_ALIGN(sizeof(struct amdgpu_fw_shared)), 0, indirect); } static void vcn_v3_0_disable_static_power_gating(struct amdgpu_device *adev, int inst) @@ -902,6 +936,7 @@ static void vcn_v3_0_enable_clock_gating(struct amdgpu_device *adev, int inst) static int vcn_v3_0_start_dpg_mode(struct amdgpu_device *adev, int inst_idx, bool indirect) { + volatile struct amdgpu_fw_shared *fw_shared = adev->vcn.inst[inst_idx].fw_shared_cpu_addr; struct amdgpu_ring *ring; uint32_t rb_bufsz, tmp; @@ -915,7 +950,7 @@ static int vcn_v3_0_start_dpg_mode(struct amdgpu_device *adev, int inst_idx, boo WREG32_SOC15(VCN, inst_idx, mmUVD_POWER_STATUS, tmp); if (indirect) - adev->vcn.inst[inst_idx].dpg_sram_curr_addr = (uint32_t*)adev->vcn.inst[inst_idx].dpg_sram_cpu_addr; + adev->vcn.inst[inst_idx].dpg_sram_curr_addr = (uint32_t *)adev->vcn.inst[inst_idx].dpg_sram_cpu_addr; /* enable clock gating */ vcn_v3_0_clock_gating_dpg_mode(adev, 0, inst_idx, indirect); @@ -1015,6 +1050,7 @@ static int vcn_v3_0_start_dpg_mode(struct amdgpu_device *adev, int inst_idx, boo WREG32_P(SOC15_REG_OFFSET(VCN, inst_idx, mmUVD_POWER_STATUS), UVD_POWER_STATUS__STALL_DPG_POWER_UP_MASK, ~UVD_POWER_STATUS__STALL_DPG_POWER_UP_MASK); + fw_shared->multi_queue.decode_queue_mode |= cpu_to_le32(FW_QUEUE_RING_RESET); /* set the write pointer delay */ WREG32_SOC15(VCN, inst_idx, mmUVD_RBC_RB_WPTR_CNTL, 0); @@ -1038,6 +1074,7 @@ static int vcn_v3_0_start_dpg_mode(struct amdgpu_device *adev, int inst_idx, boo WREG32_SOC15(VCN, inst_idx, mmUVD_RBC_RB_WPTR, lower_32_bits(ring->wptr)); + fw_shared->multi_queue.decode_queue_mode &= cpu_to_le32(~FW_QUEUE_RING_RESET); /* Unstall DPG */ WREG32_P(SOC15_REG_OFFSET(VCN, inst_idx, mmUVD_POWER_STATUS), 0, ~UVD_POWER_STATUS__STALL_DPG_POWER_UP_MASK); @@ -1047,6 +1084,7 @@ static int vcn_v3_0_start_dpg_mode(struct amdgpu_device *adev, int inst_idx, boo static int vcn_v3_0_start(struct amdgpu_device *adev) { + volatile struct amdgpu_fw_shared *fw_shared; struct amdgpu_ring *ring; uint32_t rb_bufsz, tmp; int i, j, k, r; @@ -1189,6 +1227,9 @@ static int vcn_v3_0_start(struct amdgpu_device *adev) tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_RPTR_WR_EN, 1); WREG32_SOC15(VCN, i, mmUVD_RBC_RB_CNTL, tmp); + fw_shared = adev->vcn.inst[i].fw_shared_cpu_addr; + fw_shared->multi_queue.decode_queue_mode |= cpu_to_le32(FW_QUEUE_RING_RESET); + /* programm the RB_BASE for ring buffer */ WREG32_SOC15(VCN, i, mmUVD_LMI_RBC_RB_64BIT_BAR_LOW, lower_32_bits(ring->gpu_addr)); @@ -1201,19 +1242,25 @@ static int vcn_v3_0_start(struct amdgpu_device *adev) ring->wptr = RREG32_SOC15(VCN, i, mmUVD_RBC_RB_RPTR); WREG32_SOC15(VCN, i, mmUVD_RBC_RB_WPTR, lower_32_bits(ring->wptr)); + fw_shared->multi_queue.decode_queue_mode &= cpu_to_le32(~FW_QUEUE_RING_RESET); + + fw_shared->multi_queue.encode_generalpurpose_queue_mode |= cpu_to_le32(FW_QUEUE_RING_RESET); ring = &adev->vcn.inst[i].ring_enc[0]; WREG32_SOC15(VCN, i, mmUVD_RB_RPTR, lower_32_bits(ring->wptr)); WREG32_SOC15(VCN, i, mmUVD_RB_WPTR, lower_32_bits(ring->wptr)); WREG32_SOC15(VCN, i, mmUVD_RB_BASE_LO, ring->gpu_addr); WREG32_SOC15(VCN, i, mmUVD_RB_BASE_HI, upper_32_bits(ring->gpu_addr)); WREG32_SOC15(VCN, i, mmUVD_RB_SIZE, ring->ring_size / 4); + fw_shared->multi_queue.encode_generalpurpose_queue_mode &= cpu_to_le32(~FW_QUEUE_RING_RESET); + fw_shared->multi_queue.encode_lowlatency_queue_mode |= cpu_to_le32(FW_QUEUE_RING_RESET); ring = &adev->vcn.inst[i].ring_enc[1]; WREG32_SOC15(VCN, i, mmUVD_RB_RPTR2, lower_32_bits(ring->wptr)); WREG32_SOC15(VCN, i, mmUVD_RB_WPTR2, lower_32_bits(ring->wptr)); WREG32_SOC15(VCN, i, mmUVD_RB_BASE_LO2, ring->gpu_addr); WREG32_SOC15(VCN, i, mmUVD_RB_BASE_HI2, upper_32_bits(ring->gpu_addr)); WREG32_SOC15(VCN, i, mmUVD_RB_SIZE2, ring->ring_size / 4); + fw_shared->multi_queue.encode_lowlatency_queue_mode &= cpu_to_le32(~FW_QUEUE_RING_RESET); } return 0; @@ -1236,12 +1283,12 @@ static int vcn_v3_0_start_sriov(struct amdgpu_device *adev) uint32_t table_size; uint32_t size, size_dw; + bool is_vcn_ready; + struct mmsch_v3_0_cmd_direct_write direct_wt = { {0} }; struct mmsch_v3_0_cmd_direct_read_modify_write direct_rd_mod_wt = { {0} }; - struct mmsch_v3_0_cmd_direct_polling - direct_poll = { {0} }; struct mmsch_v3_0_cmd_end end = { {0} }; struct mmsch_v3_0_init_header header; @@ -1249,8 +1296,6 @@ static int vcn_v3_0_start_sriov(struct amdgpu_device *adev) MMSCH_COMMAND__DIRECT_REG_WRITE; direct_rd_mod_wt.cmd_header.command_type = MMSCH_COMMAND__DIRECT_REG_READ_MODIFY_WRITE; - direct_poll.cmd_header.command_type = - MMSCH_COMMAND__DIRECT_REG_POLLING; end.cmd_header.command_type = MMSCH_COMMAND__END; @@ -1373,14 +1418,14 @@ static int vcn_v3_0_start_sriov(struct amdgpu_device *adev) MMSCH_V3_0_INSERT_END(); /* refine header */ - header.inst[i].init_status = 1; + header.inst[i].init_status = 0; header.inst[i].table_offset = header.total_size; header.inst[i].table_size = table_size; header.total_size += table_size; } /* Update init table header in memory */ - size = sizeof(struct mmsch_v3_0_init_header); + size = sizeof(struct mmsch_v3_0_init_header); table_loc = (uint32_t *)table->cpu_addr; memcpy((void *)table_loc, &header, size); @@ -1431,6 +1476,30 @@ static int vcn_v3_0_start_sriov(struct amdgpu_device *adev) } } + /* 6, check each VCN's init_status + * if it remains as 0, then this VCN is not assigned to current VF + * do not start ring for this VCN + */ + size = sizeof(struct mmsch_v3_0_init_header); + table_loc = (uint32_t *)table->cpu_addr; + memcpy(&header, (void *)table_loc, size); + + for (i = 0; i < adev->vcn.num_vcn_inst; i++) { + if (adev->vcn.harvest_config & (1 << i)) + continue; + + is_vcn_ready = (header.inst[i].init_status == 1); + if (!is_vcn_ready) + DRM_INFO("VCN(%d) engine is disabled by hypervisor\n", i); + + ring = &adev->vcn.inst[i].ring_dec; + ring->sched.ready = is_vcn_ready; + for (j = 0; j < adev->vcn.num_enc_rings; ++j) { + ring = &adev->vcn.inst[i].ring_enc[j]; + ring->sched.ready = is_vcn_ready; + } + } + return 0; } @@ -1540,6 +1609,7 @@ static int vcn_v3_0_stop(struct amdgpu_device *adev) static int vcn_v3_0_pause_dpg_mode(struct amdgpu_device *adev, int inst_idx, struct dpg_pause_state *new_state) { + volatile struct amdgpu_fw_shared *fw_shared; struct amdgpu_ring *ring; uint32_t reg_data = 0; int ret_code; @@ -1571,6 +1641,8 @@ static int vcn_v3_0_pause_dpg_mode(struct amdgpu_device *adev, ~UVD_POWER_STATUS__STALL_DPG_POWER_UP_MASK); /* Restore */ + fw_shared = adev->vcn.inst[inst_idx].fw_shared_cpu_addr; + fw_shared->multi_queue.encode_generalpurpose_queue_mode |= cpu_to_le32(FW_QUEUE_RING_RESET); ring = &adev->vcn.inst[inst_idx].ring_enc[0]; ring->wptr = 0; WREG32_SOC15(VCN, inst_idx, mmUVD_RB_BASE_LO, ring->gpu_addr); @@ -1578,7 +1650,9 @@ static int vcn_v3_0_pause_dpg_mode(struct amdgpu_device *adev, WREG32_SOC15(VCN, inst_idx, mmUVD_RB_SIZE, ring->ring_size / 4); WREG32_SOC15(VCN, inst_idx, mmUVD_RB_RPTR, lower_32_bits(ring->wptr)); WREG32_SOC15(VCN, inst_idx, mmUVD_RB_WPTR, lower_32_bits(ring->wptr)); + fw_shared->multi_queue.encode_generalpurpose_queue_mode &= cpu_to_le32(~FW_QUEUE_RING_RESET); + fw_shared->multi_queue.encode_lowlatency_queue_mode |= cpu_to_le32(FW_QUEUE_RING_RESET); ring = &adev->vcn.inst[inst_idx].ring_enc[1]; ring->wptr = 0; WREG32_SOC15(VCN, inst_idx, mmUVD_RB_BASE_LO2, ring->gpu_addr); @@ -1586,6 +1660,7 @@ static int vcn_v3_0_pause_dpg_mode(struct amdgpu_device *adev, WREG32_SOC15(VCN, inst_idx, mmUVD_RB_SIZE2, ring->ring_size / 4); WREG32_SOC15(VCN, inst_idx, mmUVD_RB_RPTR2, lower_32_bits(ring->wptr)); WREG32_SOC15(VCN, inst_idx, mmUVD_RB_WPTR2, lower_32_bits(ring->wptr)); + fw_shared->multi_queue.encode_lowlatency_queue_mode &= cpu_to_le32(~FW_QUEUE_RING_RESET); /* Unstall DPG */ WREG32_P(SOC15_REG_OFFSET(VCN, inst_idx, mmUVD_POWER_STATUS), @@ -1655,6 +1730,98 @@ static void vcn_v3_0_dec_ring_set_wptr(struct amdgpu_ring *ring) } } +static void vcn_v3_0_dec_sw_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, + u64 seq, uint32_t flags) +{ + WARN_ON(flags & AMDGPU_FENCE_FLAG_64BIT); + + amdgpu_ring_write(ring, VCN_DEC_SW_CMD_FENCE); + amdgpu_ring_write(ring, addr); + amdgpu_ring_write(ring, upper_32_bits(addr)); + amdgpu_ring_write(ring, seq); + amdgpu_ring_write(ring, VCN_DEC_SW_CMD_TRAP); +} + +static void vcn_v3_0_dec_sw_ring_insert_end(struct amdgpu_ring *ring) +{ + amdgpu_ring_write(ring, VCN_DEC_SW_CMD_END); +} + +static void vcn_v3_0_dec_sw_ring_emit_ib(struct amdgpu_ring *ring, + struct amdgpu_job *job, + struct amdgpu_ib *ib, + uint32_t flags) +{ + uint32_t vmid = AMDGPU_JOB_GET_VMID(job); + + amdgpu_ring_write(ring, VCN_DEC_SW_CMD_IB); + amdgpu_ring_write(ring, vmid); + amdgpu_ring_write(ring, lower_32_bits(ib->gpu_addr)); + amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr)); + amdgpu_ring_write(ring, ib->length_dw); +} + +static void vcn_v3_0_dec_sw_ring_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg, + uint32_t val, uint32_t mask) +{ + amdgpu_ring_write(ring, VCN_DEC_SW_CMD_REG_WAIT); + amdgpu_ring_write(ring, reg << 2); + amdgpu_ring_write(ring, mask); + amdgpu_ring_write(ring, val); +} + +static void vcn_v3_0_dec_sw_ring_emit_vm_flush(struct amdgpu_ring *ring, + uint32_t vmid, uint64_t pd_addr) +{ + struct amdgpu_vmhub *hub = &ring->adev->vmhub[ring->funcs->vmhub]; + uint32_t data0, data1, mask; + + pd_addr = amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr); + + /* wait for register write */ + data0 = hub->ctx0_ptb_addr_lo32 + vmid * hub->ctx_addr_distance; + data1 = lower_32_bits(pd_addr); + mask = 0xffffffff; + vcn_v3_0_dec_sw_ring_emit_reg_wait(ring, data0, data1, mask); +} + +static void vcn_v3_0_dec_sw_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t reg, uint32_t val) +{ + amdgpu_ring_write(ring, VCN_DEC_SW_CMD_REG_WRITE); + amdgpu_ring_write(ring, reg << 2); + amdgpu_ring_write(ring, val); +} + +static const struct amdgpu_ring_funcs vcn_v3_0_dec_sw_ring_vm_funcs = { + .type = AMDGPU_RING_TYPE_VCN_DEC, + .align_mask = 0x3f, + .nop = VCN_DEC_SW_CMD_NO_OP, + .vmhub = AMDGPU_MMHUB_0, + .get_rptr = vcn_v3_0_dec_ring_get_rptr, + .get_wptr = vcn_v3_0_dec_ring_get_wptr, + .set_wptr = vcn_v3_0_dec_ring_set_wptr, + .emit_frame_size = + SOC15_FLUSH_GPU_TLB_NUM_WREG * 3 + + SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 4 + + 4 + /* vcn_v3_0_dec_sw_ring_emit_vm_flush */ + 5 + 5 + /* vcn_v3_0_dec_sw_ring_emit_fdec_swe x2 vm fdec_swe */ + 1, /* vcn_v3_0_dec_sw_ring_insert_end */ + .emit_ib_size = 5, /* vcn_v3_0_dec_sw_ring_emit_ib */ + .emit_ib = vcn_v3_0_dec_sw_ring_emit_ib, + .emit_fence = vcn_v3_0_dec_sw_ring_emit_fence, + .emit_vm_flush = vcn_v3_0_dec_sw_ring_emit_vm_flush, + .test_ring = amdgpu_vcn_dec_sw_ring_test_ring, + .test_ib = NULL,//amdgpu_vcn_dec_sw_ring_test_ib, + .insert_nop = amdgpu_ring_insert_nop, + .insert_end = vcn_v3_0_dec_sw_ring_insert_end, + .pad_ib = amdgpu_ring_generic_pad_ib, + .begin_use = amdgpu_vcn_ring_begin_use, + .end_use = amdgpu_vcn_ring_end_use, + .emit_wreg = vcn_v3_0_dec_sw_ring_emit_wreg, + .emit_reg_wait = vcn_v3_0_dec_sw_ring_emit_reg_wait, + .emit_reg_write_reg_wait = amdgpu_ring_emit_reg_write_reg_wait_helper, +}; + static const struct amdgpu_ring_funcs vcn_v3_0_dec_ring_vm_funcs = { .type = AMDGPU_RING_TYPE_VCN_DEC, .align_mask = 0xf, @@ -1792,9 +1959,13 @@ static void vcn_v3_0_set_dec_ring_funcs(struct amdgpu_device *adev) if (adev->vcn.harvest_config & (1 << i)) continue; - adev->vcn.inst[i].ring_dec.funcs = &vcn_v3_0_dec_ring_vm_funcs; + if (!DEC_SW_RING_ENABLED) + adev->vcn.inst[i].ring_dec.funcs = &vcn_v3_0_dec_ring_vm_funcs; + else + adev->vcn.inst[i].ring_dec.funcs = &vcn_v3_0_dec_sw_ring_vm_funcs; adev->vcn.inst[i].ring_dec.me = i; - DRM_INFO("VCN(%d) decode is enabled in VM mode\n", i); + DRM_INFO("VCN(%d) decode%s is enabled in VM mode\n", i, + DEC_SW_RING_ENABLED?"(Software Ring)":""); } } diff --git a/drivers/gpu/drm/amd/amdgpu/vega10_ih.c b/drivers/gpu/drm/amd/amdgpu/vega10_ih.c index 407c6093c2ec..e5ae31eb744e 100644 --- a/drivers/gpu/drm/amd/amdgpu/vega10_ih.c +++ b/drivers/gpu/drm/amd/amdgpu/vega10_ih.c @@ -91,6 +91,9 @@ static void vega10_ih_enable_interrupts(struct amdgpu_device *adev) } adev->irq.ih2.enabled = true; } + + if (adev->irq.ih_soft.ring_size) + adev->irq.ih_soft.enabled = true; } /** @@ -366,6 +369,7 @@ static void vega10_ih_irq_disable(struct amdgpu_device *adev) * vega10_ih_get_wptr - get the IH ring buffer wptr * * @adev: amdgpu_device pointer + * @ih: IH ring buffer to fetch wptr * * Get the IH ring buffer wptr from either the register * or the writeback memory buffer (VEGA10). Also check for @@ -430,6 +434,8 @@ out: * vega10_ih_decode_iv - decode an interrupt vector * * @adev: amdgpu_device pointer + * @ih: IH ring buffer to decode + * @entry: IV entry to place decoded information into * * Decodes the interrupt vector at the current rptr * position and also advance the position. @@ -473,6 +479,7 @@ static void vega10_ih_decode_iv(struct amdgpu_device *adev, * vega10_ih_irq_rearm - rearm IRQ if lost * * @adev: amdgpu_device pointer + * @ih: IH ring to match * */ static void vega10_ih_irq_rearm(struct amdgpu_device *adev, @@ -505,6 +512,7 @@ static void vega10_ih_irq_rearm(struct amdgpu_device *adev, * vega10_ih_set_rptr - set the IH ring buffer rptr * * @adev: amdgpu_device pointer + * @ih: IH ring buffer to set rptr * * Set the IH ring buffer rptr. */ @@ -606,6 +614,10 @@ static int vega10_ih_sw_init(void *handle) adev->irq.ih2.use_doorbell = true; adev->irq.ih2.doorbell_index = (adev->doorbell_index.ih + 2) << 1; + r = amdgpu_ih_ring_init(adev, &adev->irq.ih_soft, PAGE_SIZE, true); + if (r) + return r; + r = amdgpu_irq_init(adev); return r; diff --git a/drivers/gpu/drm/amd/amdgpu/vi.c b/drivers/gpu/drm/amd/amdgpu/vi.c index 9bcd0eebc6d7..d56b474b3a21 100644 --- a/drivers/gpu/drm/amd/amdgpu/vi.c +++ b/drivers/gpu/drm/amd/amdgpu/vi.c @@ -1645,6 +1645,7 @@ static int vi_common_set_clockgating_state(void *handle, case CHIP_POLARIS12: case CHIP_VEGAM: vi_common_set_clockgating_state_by_smu(adev, state); + break; default: break; } |