diff options
Diffstat (limited to 'drivers/gpu/drm/amd/amdgpu')
100 files changed, 5588 insertions, 1728 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/Kconfig b/drivers/gpu/drm/amd/amdgpu/Kconfig index 74a8105fd2c0..7777d55275de 100644 --- a/drivers/gpu/drm/amd/amdgpu/Kconfig +++ b/drivers/gpu/drm/amd/amdgpu/Kconfig @@ -4,7 +4,7 @@ config DRM_AMDGPU_SI depends on DRM_AMDGPU help Choose this option if you want to enable experimental support - for SI asics. + for SI (Southern Islands) asics. SI is already supported in radeon. Experimental support for SI in amdgpu will be disabled by default and is still provided by @@ -16,7 +16,8 @@ config DRM_AMDGPU_CIK bool "Enable amdgpu support for CIK parts" depends on DRM_AMDGPU help - Choose this option if you want to enable support for CIK asics. + Choose this option if you want to enable support for CIK (Sea + Islands) asics. CIK is already supported in radeon. Support for CIK in amdgpu will be disabled by default and is still provided by radeon. diff --git a/drivers/gpu/drm/amd/amdgpu/Makefile b/drivers/gpu/drm/amd/amdgpu/Makefile index 3e0e2eb7e235..5a283d12f8e1 100644 --- a/drivers/gpu/drm/amd/amdgpu/Makefile +++ b/drivers/gpu/drm/amd/amdgpu/Makefile @@ -88,11 +88,12 @@ amdgpu-y += \ gmc_v8_0.o \ gfxhub_v1_0.o mmhub_v1_0.o gmc_v9_0.o gfxhub_v1_1.o mmhub_v9_4.o \ gfxhub_v2_0.o mmhub_v2_0.o gmc_v10_0.o gfxhub_v2_1.o mmhub_v2_3.o \ - mmhub_v1_7.o gfxhub_v3_0.o mmhub_v3_0.o mmhub_v3_0_2.o gmc_v11_0.o + mmhub_v1_7.o gfxhub_v3_0.o mmhub_v3_0.o mmhub_v3_0_2.o gmc_v11_0.o \ + mmhub_v3_0_1.o # add UMC block amdgpu-y += \ - umc_v6_0.o umc_v6_1.o umc_v6_7.o umc_v8_7.o + umc_v6_0.o umc_v6_1.o umc_v6_7.o umc_v8_7.o umc_v8_10.o # add IH block amdgpu-y += \ @@ -114,7 +115,8 @@ amdgpu-y += \ psp_v11_0.o \ psp_v11_0_8.o \ psp_v12_0.o \ - psp_v13_0.o + psp_v13_0.o \ + psp_v13_0_4.o # add DCE block amdgpu-y += \ diff --git a/drivers/gpu/drm/amd/amdgpu/aldebaran.c b/drivers/gpu/drm/amd/amdgpu/aldebaran.c index c6cc493a5486..2b97b8a96fb4 100644 --- a/drivers/gpu/drm/amd/amdgpu/aldebaran.c +++ b/drivers/gpu/drm/amd/amdgpu/aldebaran.c @@ -148,30 +148,22 @@ aldebaran_mode2_perform_reset(struct amdgpu_reset_control *reset_ctl, struct amdgpu_reset_context *reset_context) { struct amdgpu_device *adev = (struct amdgpu_device *)reset_ctl->handle; + struct list_head *reset_device_list = reset_context->reset_device_list; struct amdgpu_device *tmp_adev = NULL; - struct list_head reset_device_list; int r = 0; dev_dbg(adev->dev, "aldebaran perform hw reset\n"); + + if (reset_device_list == NULL) + return -EINVAL; + if (adev->ip_versions[MP1_HWIP][0] == IP_VERSION(13, 0, 2) && reset_context->hive == NULL) { /* Wrong context, return error */ return -EINVAL; } - INIT_LIST_HEAD(&reset_device_list); - if (reset_context->hive) { - list_for_each_entry (tmp_adev, - &reset_context->hive->device_list, - gmc.xgmi.head) - list_add_tail(&tmp_adev->reset_list, - &reset_device_list); - } else { - list_add_tail(&reset_context->reset_req_dev->reset_list, - &reset_device_list); - } - - list_for_each_entry (tmp_adev, &reset_device_list, reset_list) { + list_for_each_entry(tmp_adev, reset_device_list, reset_list) { mutex_lock(&tmp_adev->reset_cntl->reset_lock); tmp_adev->reset_cntl->active_reset = AMD_RESET_METHOD_MODE2; } @@ -179,7 +171,7 @@ aldebaran_mode2_perform_reset(struct amdgpu_reset_control *reset_ctl, * Mode2 reset doesn't need any sync between nodes in XGMI hive, instead launch * them together so that they can be completed asynchronously on multiple nodes */ - list_for_each_entry (tmp_adev, &reset_device_list, reset_list) { + list_for_each_entry(tmp_adev, reset_device_list, reset_list) { /* For XGMI run all resets in parallel to speed up the process */ if (tmp_adev->gmc.xgmi.num_physical_nodes > 1) { if (!queue_work(system_unbound_wq, @@ -197,7 +189,7 @@ aldebaran_mode2_perform_reset(struct amdgpu_reset_control *reset_ctl, /* For XGMI wait for all resets to complete before proceed */ if (!r) { - list_for_each_entry (tmp_adev, &reset_device_list, reset_list) { + list_for_each_entry(tmp_adev, reset_device_list, reset_list) { if (tmp_adev->gmc.xgmi.num_physical_nodes > 1) { flush_work(&tmp_adev->reset_cntl->reset_work); r = tmp_adev->asic_reset_res; @@ -207,7 +199,7 @@ aldebaran_mode2_perform_reset(struct amdgpu_reset_control *reset_ctl, } } - list_for_each_entry (tmp_adev, &reset_device_list, reset_list) { + list_for_each_entry(tmp_adev, reset_device_list, reset_list) { mutex_unlock(&tmp_adev->reset_cntl->reset_lock); tmp_adev->reset_cntl->active_reset = AMD_RESET_METHOD_NONE; } @@ -339,10 +331,13 @@ static int aldebaran_mode2_restore_hwcontext(struct amdgpu_reset_control *reset_ctl, struct amdgpu_reset_context *reset_context) { + struct list_head *reset_device_list = reset_context->reset_device_list; struct amdgpu_device *tmp_adev = NULL; - struct list_head reset_device_list; int r; + if (reset_device_list == NULL) + return -EINVAL; + if (reset_context->reset_req_dev->ip_versions[MP1_HWIP][0] == IP_VERSION(13, 0, 2) && reset_context->hive == NULL) { @@ -350,19 +345,7 @@ aldebaran_mode2_restore_hwcontext(struct amdgpu_reset_control *reset_ctl, return -EINVAL; } - INIT_LIST_HEAD(&reset_device_list); - if (reset_context->hive) { - list_for_each_entry (tmp_adev, - &reset_context->hive->device_list, - gmc.xgmi.head) - list_add_tail(&tmp_adev->reset_list, - &reset_device_list); - } else { - list_add_tail(&reset_context->reset_req_dev->reset_list, - &reset_device_list); - } - - list_for_each_entry (tmp_adev, &reset_device_list, reset_list) { + list_for_each_entry(tmp_adev, reset_device_list, reset_list) { dev_info(tmp_adev->dev, "GPU reset succeeded, trying to resume\n"); r = aldebaran_mode2_restore_ip(tmp_adev); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index 30ce6bb6fa77..d597e2656c47 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -197,6 +197,7 @@ extern uint amdgpu_smu_memory_pool_size; extern int amdgpu_smu_pptable_id; extern uint amdgpu_dc_feature_mask; extern uint amdgpu_dc_debug_mask; +extern uint amdgpu_dc_visual_confirm; extern uint amdgpu_dm_abm_level; extern int amdgpu_backlight; extern struct amdgpu_mgpu_info mgpu_info; @@ -223,6 +224,9 @@ static const int __maybe_unused sched_policy = KFD_SCHED_POLICY_HWS; static const bool __maybe_unused debug_evictions; /* = false */ static const bool __maybe_unused no_system_mem_limit; #endif +#ifdef CONFIG_HSA_AMD_P2P +extern bool pcie_p2p; +#endif extern int amdgpu_tmz; extern int amdgpu_reset_method; @@ -274,7 +278,7 @@ extern int amdgpu_vcnfw_log; #define CIK_CURSOR_WIDTH 128 #define CIK_CURSOR_HEIGHT 128 -/* smasrt shift bias level limits */ +/* smart shift bias level limits */ #define AMDGPU_SMARTSHIFT_MAX_BIAS (100) #define AMDGPU_SMARTSHIFT_MIN_BIAS (-100) @@ -313,7 +317,7 @@ enum amdgpu_kiq_irq { AMDGPU_CP_KIQ_IRQ_DRIVER0 = 0, AMDGPU_CP_KIQ_IRQ_LAST }; - +#define SRIOV_USEC_TIMEOUT 1200000 /* wait 12 * 100ms for SRIOV */ #define MAX_KIQ_REG_WAIT 5000 /* in usecs, 5ms */ #define MAX_KIQ_REG_BAILOUT_INTERVAL 5 /* in msecs, 5ms */ #define MAX_KIQ_REG_TRY 1000 @@ -667,6 +671,7 @@ enum amd_hw_ip_block_type { RSMU_HWIP, XGMI_HWIP, DCI_HWIP, + PCIE_HWIP, MAX_HWIP }; @@ -1007,7 +1012,6 @@ struct amdgpu_device { uint64_t df_perfmon_config_assign_mask[AMDGPU_MAX_DF_PERFMONS]; /* enable runtime pm on the device */ - bool runpm; bool in_runpm; bool has_pr3; @@ -1016,7 +1020,7 @@ struct amdgpu_device { bool psp_sysfs_en; /* Chip product information */ - char product_number[16]; + char product_number[20]; char product_name[AMDGPU_PRODUCT_NAME_LEN]; char serial[20]; @@ -1044,10 +1048,18 @@ struct amdgpu_device { /* reset dump register */ uint32_t *reset_dump_reg_list; + uint32_t *reset_dump_reg_value; int num_regs; +#ifdef CONFIG_DEV_COREDUMP + struct amdgpu_task_info reset_task_info; + bool reset_vram_lost; + struct timespec64 reset_time; +#endif bool scpm_enabled; uint32_t scpm_status; + + struct work_struct reset_work; }; static inline struct amdgpu_device *drm_to_adev(struct drm_device *ddev) @@ -1241,9 +1253,8 @@ int emu_soc_asic_init(struct amdgpu_device *adev); bool amdgpu_device_has_job_running(struct amdgpu_device *adev); bool amdgpu_device_should_recover_gpu(struct amdgpu_device *adev); int amdgpu_device_gpu_recover(struct amdgpu_device *adev, - struct amdgpu_job* job); -int amdgpu_device_gpu_recover_imp(struct amdgpu_device *adev, - struct amdgpu_job *job); + struct amdgpu_job *job, + struct amdgpu_reset_context *reset_context); void amdgpu_device_pci_config_reset(struct amdgpu_device *adev); int amdgpu_device_pci_reset(struct amdgpu_device *adev); bool amdgpu_device_need_post(struct amdgpu_device *adev); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_acp.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_acp.c index cc9c9f8b23b2..6d72355ac492 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_acp.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_acp.c @@ -29,6 +29,8 @@ #include <linux/platform_device.h> #include <sound/designware_i2s.h> #include <sound/pcm.h> +#include <linux/acpi.h> +#include <linux/dmi.h> #include "amdgpu.h" #include "atom.h" @@ -36,6 +38,7 @@ #include "acp_gfx_if.h" +#define ST_JADEITE 1 #define ACP_TILE_ON_MASK 0x03 #define ACP_TILE_OFF_MASK 0x02 #define ACP_TILE_ON_RETAIN_REG_MASK 0x1f @@ -85,6 +88,8 @@ #define ACP_DEVS 4 #define ACP_SRC_ID 162 +static unsigned long acp_machine_id; + enum { ACP_TILE_P1 = 0, ACP_TILE_P2, @@ -128,16 +133,14 @@ static int acp_poweroff(struct generic_pm_domain *genpd) struct amdgpu_device *adev; apd = container_of(genpd, struct acp_pm_domain, gpd); - if (apd != NULL) { - adev = apd->adev; + adev = apd->adev; /* call smu to POWER GATE ACP block * smu will * 1. turn off the acp clock * 2. power off the acp tiles * 3. check and enter ulv state */ - amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_ACP, true); - } + amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_ACP, true); return 0; } @@ -147,16 +150,14 @@ static int acp_poweron(struct generic_pm_domain *genpd) struct amdgpu_device *adev; apd = container_of(genpd, struct acp_pm_domain, gpd); - if (apd != NULL) { - adev = apd->adev; + adev = apd->adev; /* call smu to UNGATE ACP block * smu will * 1. exit ulv * 2. turn on acp clock * 3. power on acp tiles */ - amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_ACP, false); - } + amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_ACP, false); return 0; } @@ -184,6 +185,37 @@ static int acp_genpd_remove_device(struct device *dev, void *data) return 0; } +static int acp_quirk_cb(const struct dmi_system_id *id) +{ + acp_machine_id = ST_JADEITE; + return 1; +} + +static const struct dmi_system_id acp_quirk_table[] = { + { + .callback = acp_quirk_cb, + .matches = { + DMI_EXACT_MATCH(DMI_BOARD_VENDOR, "AMD"), + DMI_EXACT_MATCH(DMI_PRODUCT_NAME, "Jadeite"), + } + }, + { + .callback = acp_quirk_cb, + .matches = { + DMI_EXACT_MATCH(DMI_BOARD_VENDOR, "IP3 Technology CO.,Ltd."), + DMI_EXACT_MATCH(DMI_PRODUCT_NAME, "ASN1D"), + }, + }, + { + .callback = acp_quirk_cb, + .matches = { + DMI_EXACT_MATCH(DMI_BOARD_VENDOR, "Standard"), + DMI_EXACT_MATCH(DMI_PRODUCT_NAME, "ASN10"), + }, + }, + {} +}; + /** * acp_hw_init - start and test ACP block * @@ -193,7 +225,7 @@ static int acp_genpd_remove_device(struct device *dev, void *data) static int acp_hw_init(void *handle) { int r; - uint64_t acp_base; + u64 acp_base; u32 val = 0; u32 count = 0; struct i2s_platform_data *i2s_pdata = NULL; @@ -220,141 +252,202 @@ static int acp_hw_init(void *handle) return -EINVAL; acp_base = adev->rmmio_base; - - adev->acp.acp_genpd = kzalloc(sizeof(struct acp_pm_domain), GFP_KERNEL); - if (adev->acp.acp_genpd == NULL) + if (!adev->acp.acp_genpd) return -ENOMEM; adev->acp.acp_genpd->gpd.name = "ACP_AUDIO"; adev->acp.acp_genpd->gpd.power_off = acp_poweroff; adev->acp.acp_genpd->gpd.power_on = acp_poweron; - - adev->acp.acp_genpd->adev = adev; pm_genpd_init(&adev->acp.acp_genpd->gpd, NULL, false); + dmi_check_system(acp_quirk_table); + switch (acp_machine_id) { + case ST_JADEITE: + { + adev->acp.acp_cell = kcalloc(2, sizeof(struct mfd_cell), + GFP_KERNEL); + if (!adev->acp.acp_cell) { + r = -ENOMEM; + goto failure; + } - adev->acp.acp_cell = kcalloc(ACP_DEVS, sizeof(struct mfd_cell), - GFP_KERNEL); - - if (adev->acp.acp_cell == NULL) { - r = -ENOMEM; - goto failure; - } - - adev->acp.acp_res = kcalloc(5, sizeof(struct resource), GFP_KERNEL); - if (adev->acp.acp_res == NULL) { - r = -ENOMEM; - goto failure; - } + adev->acp.acp_res = kcalloc(3, sizeof(struct resource), GFP_KERNEL); + if (!adev->acp.acp_res) { + r = -ENOMEM; + goto failure; + } - i2s_pdata = kcalloc(3, sizeof(struct i2s_platform_data), GFP_KERNEL); - if (i2s_pdata == NULL) { - r = -ENOMEM; - goto failure; - } + i2s_pdata = kcalloc(1, sizeof(struct i2s_platform_data), GFP_KERNEL); + if (!i2s_pdata) { + r = -ENOMEM; + goto failure; + } - switch (adev->asic_type) { - case CHIP_STONEY: i2s_pdata[0].quirks = DW_I2S_QUIRK_COMP_REG_OFFSET | - DW_I2S_QUIRK_16BIT_IDX_OVERRIDE; + DW_I2S_QUIRK_16BIT_IDX_OVERRIDE; + i2s_pdata[0].cap = DWC_I2S_PLAY | DWC_I2S_RECORD; + i2s_pdata[0].snd_rates = SNDRV_PCM_RATE_8000_96000; + i2s_pdata[0].i2s_reg_comp1 = ACP_I2S_COMP1_CAP_REG_OFFSET; + i2s_pdata[0].i2s_reg_comp2 = ACP_I2S_COMP2_CAP_REG_OFFSET; + + adev->acp.acp_res[0].name = "acp2x_dma"; + adev->acp.acp_res[0].flags = IORESOURCE_MEM; + adev->acp.acp_res[0].start = acp_base; + adev->acp.acp_res[0].end = acp_base + ACP_DMA_REGS_END; + + adev->acp.acp_res[1].name = "acp2x_dw_i2s_play_cap"; + adev->acp.acp_res[1].flags = IORESOURCE_MEM; + adev->acp.acp_res[1].start = acp_base + ACP_I2S_CAP_REGS_START; + adev->acp.acp_res[1].end = acp_base + ACP_I2S_CAP_REGS_END; + + adev->acp.acp_res[2].name = "acp2x_dma_irq"; + adev->acp.acp_res[2].flags = IORESOURCE_IRQ; + adev->acp.acp_res[2].start = amdgpu_irq_create_mapping(adev, 162); + adev->acp.acp_res[2].end = adev->acp.acp_res[2].start; + + adev->acp.acp_cell[0].name = "acp_audio_dma"; + adev->acp.acp_cell[0].num_resources = 3; + adev->acp.acp_cell[0].resources = &adev->acp.acp_res[0]; + adev->acp.acp_cell[0].platform_data = &adev->asic_type; + adev->acp.acp_cell[0].pdata_size = sizeof(adev->asic_type); + + adev->acp.acp_cell[1].name = "designware-i2s"; + adev->acp.acp_cell[1].num_resources = 1; + adev->acp.acp_cell[1].resources = &adev->acp.acp_res[1]; + adev->acp.acp_cell[1].platform_data = &i2s_pdata[0]; + adev->acp.acp_cell[1].pdata_size = sizeof(struct i2s_platform_data); + r = mfd_add_hotplug_devices(adev->acp.parent, adev->acp.acp_cell, 2); + if (r) + goto failure; + r = device_for_each_child(adev->acp.parent, &adev->acp.acp_genpd->gpd, + acp_genpd_add_device); + if (r) + goto failure; break; - default: - i2s_pdata[0].quirks = DW_I2S_QUIRK_COMP_REG_OFFSET; } - i2s_pdata[0].cap = DWC_I2S_PLAY; - i2s_pdata[0].snd_rates = SNDRV_PCM_RATE_8000_96000; - i2s_pdata[0].i2s_reg_comp1 = ACP_I2S_COMP1_PLAY_REG_OFFSET; - i2s_pdata[0].i2s_reg_comp2 = ACP_I2S_COMP2_PLAY_REG_OFFSET; - switch (adev->asic_type) { - case CHIP_STONEY: - i2s_pdata[1].quirks = DW_I2S_QUIRK_COMP_REG_OFFSET | - DW_I2S_QUIRK_COMP_PARAM1 | - DW_I2S_QUIRK_16BIT_IDX_OVERRIDE; - break; default: - i2s_pdata[1].quirks = DW_I2S_QUIRK_COMP_REG_OFFSET | - DW_I2S_QUIRK_COMP_PARAM1; - } + adev->acp.acp_cell = kcalloc(ACP_DEVS, sizeof(struct mfd_cell), + GFP_KERNEL); - i2s_pdata[1].cap = DWC_I2S_RECORD; - i2s_pdata[1].snd_rates = SNDRV_PCM_RATE_8000_96000; - i2s_pdata[1].i2s_reg_comp1 = ACP_I2S_COMP1_CAP_REG_OFFSET; - i2s_pdata[1].i2s_reg_comp2 = ACP_I2S_COMP2_CAP_REG_OFFSET; + if (!adev->acp.acp_cell) { + r = -ENOMEM; + goto failure; + } - i2s_pdata[2].quirks = DW_I2S_QUIRK_COMP_REG_OFFSET; - switch (adev->asic_type) { - case CHIP_STONEY: - i2s_pdata[2].quirks |= DW_I2S_QUIRK_16BIT_IDX_OVERRIDE; - break; - default: - break; - } + adev->acp.acp_res = kcalloc(5, sizeof(struct resource), GFP_KERNEL); + if (!adev->acp.acp_res) { + r = -ENOMEM; + goto failure; + } + + i2s_pdata = kcalloc(3, sizeof(struct i2s_platform_data), GFP_KERNEL); + if (!i2s_pdata) { + r = -ENOMEM; + goto failure; + } + + switch (adev->asic_type) { + case CHIP_STONEY: + i2s_pdata[0].quirks = DW_I2S_QUIRK_COMP_REG_OFFSET | + DW_I2S_QUIRK_16BIT_IDX_OVERRIDE; + break; + default: + i2s_pdata[0].quirks = DW_I2S_QUIRK_COMP_REG_OFFSET; + } + i2s_pdata[0].cap = DWC_I2S_PLAY; + i2s_pdata[0].snd_rates = SNDRV_PCM_RATE_8000_96000; + i2s_pdata[0].i2s_reg_comp1 = ACP_I2S_COMP1_PLAY_REG_OFFSET; + i2s_pdata[0].i2s_reg_comp2 = ACP_I2S_COMP2_PLAY_REG_OFFSET; + switch (adev->asic_type) { + case CHIP_STONEY: + i2s_pdata[1].quirks = DW_I2S_QUIRK_COMP_REG_OFFSET | + DW_I2S_QUIRK_COMP_PARAM1 | + DW_I2S_QUIRK_16BIT_IDX_OVERRIDE; + break; + default: + i2s_pdata[1].quirks = DW_I2S_QUIRK_COMP_REG_OFFSET | + DW_I2S_QUIRK_COMP_PARAM1; + } + + i2s_pdata[1].cap = DWC_I2S_RECORD; + i2s_pdata[1].snd_rates = SNDRV_PCM_RATE_8000_96000; + i2s_pdata[1].i2s_reg_comp1 = ACP_I2S_COMP1_CAP_REG_OFFSET; + i2s_pdata[1].i2s_reg_comp2 = ACP_I2S_COMP2_CAP_REG_OFFSET; + + i2s_pdata[2].quirks = DW_I2S_QUIRK_COMP_REG_OFFSET; + switch (adev->asic_type) { + case CHIP_STONEY: + i2s_pdata[2].quirks |= DW_I2S_QUIRK_16BIT_IDX_OVERRIDE; + break; + default: + break; + } - i2s_pdata[2].cap = DWC_I2S_PLAY | DWC_I2S_RECORD; - i2s_pdata[2].snd_rates = SNDRV_PCM_RATE_8000_96000; - i2s_pdata[2].i2s_reg_comp1 = ACP_BT_COMP1_REG_OFFSET; - i2s_pdata[2].i2s_reg_comp2 = ACP_BT_COMP2_REG_OFFSET; - - adev->acp.acp_res[0].name = "acp2x_dma"; - adev->acp.acp_res[0].flags = IORESOURCE_MEM; - adev->acp.acp_res[0].start = acp_base; - adev->acp.acp_res[0].end = acp_base + ACP_DMA_REGS_END; - - adev->acp.acp_res[1].name = "acp2x_dw_i2s_play"; - adev->acp.acp_res[1].flags = IORESOURCE_MEM; - adev->acp.acp_res[1].start = acp_base + ACP_I2S_PLAY_REGS_START; - adev->acp.acp_res[1].end = acp_base + ACP_I2S_PLAY_REGS_END; - - adev->acp.acp_res[2].name = "acp2x_dw_i2s_cap"; - adev->acp.acp_res[2].flags = IORESOURCE_MEM; - adev->acp.acp_res[2].start = acp_base + ACP_I2S_CAP_REGS_START; - adev->acp.acp_res[2].end = acp_base + ACP_I2S_CAP_REGS_END; - - adev->acp.acp_res[3].name = "acp2x_dw_bt_i2s_play_cap"; - adev->acp.acp_res[3].flags = IORESOURCE_MEM; - adev->acp.acp_res[3].start = acp_base + ACP_BT_PLAY_REGS_START; - adev->acp.acp_res[3].end = acp_base + ACP_BT_PLAY_REGS_END; - - adev->acp.acp_res[4].name = "acp2x_dma_irq"; - adev->acp.acp_res[4].flags = IORESOURCE_IRQ; - adev->acp.acp_res[4].start = amdgpu_irq_create_mapping(adev, 162); - adev->acp.acp_res[4].end = adev->acp.acp_res[4].start; - - adev->acp.acp_cell[0].name = "acp_audio_dma"; - adev->acp.acp_cell[0].num_resources = 5; - adev->acp.acp_cell[0].resources = &adev->acp.acp_res[0]; - adev->acp.acp_cell[0].platform_data = &adev->asic_type; - adev->acp.acp_cell[0].pdata_size = sizeof(adev->asic_type); - - adev->acp.acp_cell[1].name = "designware-i2s"; - adev->acp.acp_cell[1].num_resources = 1; - adev->acp.acp_cell[1].resources = &adev->acp.acp_res[1]; - adev->acp.acp_cell[1].platform_data = &i2s_pdata[0]; - adev->acp.acp_cell[1].pdata_size = sizeof(struct i2s_platform_data); - - adev->acp.acp_cell[2].name = "designware-i2s"; - adev->acp.acp_cell[2].num_resources = 1; - adev->acp.acp_cell[2].resources = &adev->acp.acp_res[2]; - adev->acp.acp_cell[2].platform_data = &i2s_pdata[1]; - adev->acp.acp_cell[2].pdata_size = sizeof(struct i2s_platform_data); - - adev->acp.acp_cell[3].name = "designware-i2s"; - adev->acp.acp_cell[3].num_resources = 1; - adev->acp.acp_cell[3].resources = &adev->acp.acp_res[3]; - adev->acp.acp_cell[3].platform_data = &i2s_pdata[2]; - adev->acp.acp_cell[3].pdata_size = sizeof(struct i2s_platform_data); - - r = mfd_add_hotplug_devices(adev->acp.parent, adev->acp.acp_cell, - ACP_DEVS); - if (r) - goto failure; - - r = device_for_each_child(adev->acp.parent, &adev->acp.acp_genpd->gpd, - acp_genpd_add_device); - if (r) - goto failure; + i2s_pdata[2].cap = DWC_I2S_PLAY | DWC_I2S_RECORD; + i2s_pdata[2].snd_rates = SNDRV_PCM_RATE_8000_96000; + i2s_pdata[2].i2s_reg_comp1 = ACP_BT_COMP1_REG_OFFSET; + i2s_pdata[2].i2s_reg_comp2 = ACP_BT_COMP2_REG_OFFSET; + + adev->acp.acp_res[0].name = "acp2x_dma"; + adev->acp.acp_res[0].flags = IORESOURCE_MEM; + adev->acp.acp_res[0].start = acp_base; + adev->acp.acp_res[0].end = acp_base + ACP_DMA_REGS_END; + + adev->acp.acp_res[1].name = "acp2x_dw_i2s_play"; + adev->acp.acp_res[1].flags = IORESOURCE_MEM; + adev->acp.acp_res[1].start = acp_base + ACP_I2S_PLAY_REGS_START; + adev->acp.acp_res[1].end = acp_base + ACP_I2S_PLAY_REGS_END; + + adev->acp.acp_res[2].name = "acp2x_dw_i2s_cap"; + adev->acp.acp_res[2].flags = IORESOURCE_MEM; + adev->acp.acp_res[2].start = acp_base + ACP_I2S_CAP_REGS_START; + adev->acp.acp_res[2].end = acp_base + ACP_I2S_CAP_REGS_END; + + adev->acp.acp_res[3].name = "acp2x_dw_bt_i2s_play_cap"; + adev->acp.acp_res[3].flags = IORESOURCE_MEM; + adev->acp.acp_res[3].start = acp_base + ACP_BT_PLAY_REGS_START; + adev->acp.acp_res[3].end = acp_base + ACP_BT_PLAY_REGS_END; + + adev->acp.acp_res[4].name = "acp2x_dma_irq"; + adev->acp.acp_res[4].flags = IORESOURCE_IRQ; + adev->acp.acp_res[4].start = amdgpu_irq_create_mapping(adev, 162); + adev->acp.acp_res[4].end = adev->acp.acp_res[4].start; + + adev->acp.acp_cell[0].name = "acp_audio_dma"; + adev->acp.acp_cell[0].num_resources = 5; + adev->acp.acp_cell[0].resources = &adev->acp.acp_res[0]; + adev->acp.acp_cell[0].platform_data = &adev->asic_type; + adev->acp.acp_cell[0].pdata_size = sizeof(adev->asic_type); + + adev->acp.acp_cell[1].name = "designware-i2s"; + adev->acp.acp_cell[1].num_resources = 1; + adev->acp.acp_cell[1].resources = &adev->acp.acp_res[1]; + adev->acp.acp_cell[1].platform_data = &i2s_pdata[0]; + adev->acp.acp_cell[1].pdata_size = sizeof(struct i2s_platform_data); + + adev->acp.acp_cell[2].name = "designware-i2s"; + adev->acp.acp_cell[2].num_resources = 1; + adev->acp.acp_cell[2].resources = &adev->acp.acp_res[2]; + adev->acp.acp_cell[2].platform_data = &i2s_pdata[1]; + adev->acp.acp_cell[2].pdata_size = sizeof(struct i2s_platform_data); + + adev->acp.acp_cell[3].name = "designware-i2s"; + adev->acp.acp_cell[3].num_resources = 1; + adev->acp.acp_cell[3].resources = &adev->acp.acp_res[3]; + adev->acp.acp_cell[3].platform_data = &i2s_pdata[2]; + adev->acp.acp_cell[3].pdata_size = sizeof(struct i2s_platform_data); + + r = mfd_add_hotplug_devices(adev->acp.parent, adev->acp.acp_cell, ACP_DEVS); + if (r) + goto failure; + + r = device_for_each_child(adev->acp.parent, &adev->acp.acp_genpd->gpd, + acp_genpd_add_device); + if (r) + goto failure; + } /* Assert Soft reset of ACP */ val = cgs_read_register(adev->acp.cgs_device, mmACP_SOFT_RESET); @@ -546,8 +639,7 @@ static const struct amd_ip_funcs acp_ip_funcs = { .set_powergating_state = acp_set_powergating_state, }; -const struct amdgpu_ip_block_version acp_ip_block = -{ +const struct amdgpu_ip_block_version acp_ip_block = { .type = AMD_IP_BLOCK_TYPE_ACP, .major = 2, .minor = 2, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c index 98ac53ee6bb5..130060834b4e 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c @@ -66,9 +66,7 @@ struct amdgpu_atif { struct amdgpu_atif_notifications notifications; struct amdgpu_atif_functions functions; struct amdgpu_atif_notification_cfg notification_cfg; -#if defined(CONFIG_BACKLIGHT_CLASS_DEVICE) || defined(CONFIG_BACKLIGHT_CLASS_DEVICE_MODULE) struct backlight_device *bd; -#endif struct amdgpu_dm_backlight_caps backlight_caps; }; @@ -436,7 +434,6 @@ static int amdgpu_atif_handler(struct amdgpu_device *adev, DRM_DEBUG_DRIVER("ATIF: %d pending SBIOS requests\n", count); if (req.pending & ATIF_PANEL_BRIGHTNESS_CHANGE_REQUEST) { -#if defined(CONFIG_BACKLIGHT_CLASS_DEVICE) || defined(CONFIG_BACKLIGHT_CLASS_DEVICE_MODULE) if (atif->bd) { DRM_DEBUG_DRIVER("Changing brightness to %d\n", req.backlight_level); @@ -447,7 +444,6 @@ static int amdgpu_atif_handler(struct amdgpu_device *adev, */ backlight_device_set_brightness(atif->bd, req.backlight_level); } -#endif } if (req.pending & ATIF_DGPU_DISPLAY_EVENT) { @@ -849,7 +845,6 @@ int amdgpu_acpi_init(struct amdgpu_device *adev) { struct amdgpu_atif *atif = &amdgpu_acpi_priv.atif; -#if defined(CONFIG_BACKLIGHT_CLASS_DEVICE) || defined(CONFIG_BACKLIGHT_CLASS_DEVICE_MODULE) if (atif->notifications.brightness_change) { if (amdgpu_device_has_dc_support(adev)) { #if defined(CONFIG_DRM_AMD_DC) @@ -876,7 +871,6 @@ int amdgpu_acpi_init(struct amdgpu_device *adev) } } } -#endif adev->acpi_nb.notifier_call = amdgpu_acpi_event; register_acpi_notifier(&adev->acpi_nb); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c index 3b1c675aba34..5e53a5293935 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c @@ -33,6 +33,7 @@ #include <uapi/linux/kfd_ioctl.h> #include "amdgpu_ras.h" #include "amdgpu_umc.h" +#include "amdgpu_reset.h" /* Total memory size in system memory and all GPU VRAM. Used to * estimate worst case amount of memory to reserve for page tables @@ -122,6 +123,22 @@ static void amdgpu_doorbell_get_kfd_info(struct amdgpu_device *adev, } } + +static void amdgpu_amdkfd_reset_work(struct work_struct *work) +{ + struct amdgpu_device *adev = container_of(work, struct amdgpu_device, + kfd.reset_work); + + struct amdgpu_reset_context reset_context; + memset(&reset_context, 0, sizeof(reset_context)); + + reset_context.method = AMD_RESET_METHOD_NONE; + reset_context.reset_req_dev = adev; + clear_bit(AMDGPU_NEED_FULL_RESET, &reset_context.flags); + + amdgpu_device_gpu_recover(adev, NULL, &reset_context); +} + void amdgpu_amdkfd_device_init(struct amdgpu_device *adev) { int i; @@ -180,6 +197,8 @@ void amdgpu_amdkfd_device_init(struct amdgpu_device *adev) adev->kfd.init_complete = kgd2kfd_device_init(adev->kfd.dev, adev_to_drm(adev), &gpu_resources); + + INIT_WORK(&adev->kfd.reset_work, amdgpu_amdkfd_reset_work); } } @@ -247,7 +266,8 @@ int amdgpu_amdkfd_post_reset(struct amdgpu_device *adev) void amdgpu_amdkfd_gpu_reset(struct amdgpu_device *adev) { if (amdgpu_device_should_recover_gpu(adev)) - amdgpu_device_gpu_recover(adev, NULL); + amdgpu_reset_domain_schedule(adev->reset_domain, + &adev->kfd.reset_work); } int amdgpu_amdkfd_alloc_gtt_mem(struct amdgpu_device *adev, size_t size, @@ -671,6 +691,8 @@ int amdgpu_amdkfd_submit_ib(struct amdgpu_device *adev, goto err_ib_sched; } + /* Drop the initial kref_init count (see drm_sched_main as example) */ + dma_fence_put(f); ret = dma_fence_wait(f, false); err_ib_sched: diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h index f8b9f27adcf5..647220a8762d 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h @@ -48,6 +48,7 @@ enum kfd_mem_attachment_type { KFD_MEM_ATT_SHARED, /* Share kgd_mem->bo or another attachment's */ KFD_MEM_ATT_USERPTR, /* SG bo to DMA map pages from a userptr bo */ KFD_MEM_ATT_DMABUF, /* DMAbuf to DMA map TTM BOs */ + KFD_MEM_ATT_SG /* Tag to DMA map SG BOs */ }; struct kfd_mem_attachment { @@ -95,7 +96,9 @@ struct amdgpu_amdkfd_fence { struct amdgpu_kfd_dev { struct kfd_dev *dev; uint64_t vram_used; + uint64_t vram_used_aligned; bool init_complete; + struct work_struct reset_work; }; enum kgd_engine_type { @@ -170,6 +173,9 @@ int amdgpu_queue_mask_bit_to_set_resource_bit(struct amdgpu_device *adev, struct amdgpu_amdkfd_fence *amdgpu_amdkfd_fence_create(u64 context, struct mm_struct *mm, struct svm_range_bo *svm_bo); +#if defined(CONFIG_DEBUG_FS) +int kfd_debugfs_kfd_mem_limits(struct seq_file *m, void *data); +#endif #if IS_ENABLED(CONFIG_HSA_AMD) bool amdkfd_fence_check_mm(struct dma_fence *f, struct mm_struct *mm); struct amdgpu_amdkfd_fence *to_amdgpu_amdkfd_fence(struct dma_fence *f); @@ -266,6 +272,7 @@ int amdgpu_amdkfd_gpuvm_acquire_process_vm(struct amdgpu_device *adev, void amdgpu_amdkfd_gpuvm_release_process_vm(struct amdgpu_device *adev, void *drm_priv); uint64_t amdgpu_amdkfd_gpuvm_get_process_page_dir(void *drm_priv); +size_t amdgpu_amdkfd_get_available_memory(struct amdgpu_device *adev); int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu( struct amdgpu_device *adev, uint64_t va, uint64_t size, void *drm_priv, struct kgd_mem **mem, @@ -279,10 +286,11 @@ int amdgpu_amdkfd_gpuvm_unmap_memory_from_gpu( struct amdgpu_device *adev, struct kgd_mem *mem, void *drm_priv); int amdgpu_amdkfd_gpuvm_sync_memory( struct amdgpu_device *adev, struct kgd_mem *mem, bool intr); -int amdgpu_amdkfd_gpuvm_map_gtt_bo_to_kernel(struct amdgpu_device *adev, - struct kgd_mem *mem, void **kptr, uint64_t *size); -void amdgpu_amdkfd_gpuvm_unmap_gtt_bo_from_kernel(struct amdgpu_device *adev, - struct kgd_mem *mem); +int amdgpu_amdkfd_gpuvm_map_gtt_bo_to_kernel(struct kgd_mem *mem, + void **kptr, uint64_t *size); +void amdgpu_amdkfd_gpuvm_unmap_gtt_bo_from_kernel(struct kgd_mem *mem); + +int amdgpu_amdkfd_map_gtt_bo_to_gart(struct amdgpu_device *adev, struct amdgpu_bo *bo); int amdgpu_amdkfd_gpuvm_restore_process_bos(void *process_info, struct dma_fence **ef); @@ -301,6 +309,10 @@ bool amdgpu_amdkfd_bo_mapped_to_dev(struct amdgpu_device *adev, struct kgd_mem * void amdgpu_amdkfd_block_mmu_notifications(void *p); int amdgpu_amdkfd_criu_resume(void *p); bool amdgpu_amdkfd_ras_query_utcl2_poison_status(struct amdgpu_device *adev); +int amdgpu_amdkfd_reserve_mem_limit(struct amdgpu_device *adev, + uint64_t size, u32 alloc_flag); +void amdgpu_amdkfd_unreserve_mem_limit(struct amdgpu_device *adev, + uint64_t size, u32 alloc_flag); #if IS_ENABLED(CONFIG_HSA_AMD) void amdgpu_amdkfd_gpuvm_init_mem_limits(void); @@ -332,7 +344,7 @@ void amdgpu_amdkfd_release_notify(struct amdgpu_bo *bo) } #endif /* KGD2KFD callbacks */ -int kgd2kfd_quiesce_mm(struct mm_struct *mm); +int kgd2kfd_quiesce_mm(struct mm_struct *mm, uint32_t trigger); int kgd2kfd_resume_mm(struct mm_struct *mm); int kgd2kfd_schedule_evict_and_restore_process(struct mm_struct *mm, struct dma_fence *fence); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_fence.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_fence.c index 1d0dbff87d3f..469785d33791 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_fence.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_fence.c @@ -159,11 +159,14 @@ static void amdkfd_fence_release(struct dma_fence *f) } /** - * amdkfd_fence_check_mm - Check if @mm is same as that of the fence @f - * if same return TRUE else return FALSE. + * amdkfd_fence_check_mm - Check whether to prevent eviction of @f by @mm * * @f: [IN] fence * @mm: [IN] mm that needs to be verified + * + * Check if @mm is same as that of the fence @f, if same return TRUE else + * return FALSE. + * For svm bo, which support vram overcommitment, always return FALSE. */ bool amdkfd_fence_check_mm(struct dma_fence *f, struct mm_struct *mm) { @@ -171,7 +174,7 @@ bool amdkfd_fence_check_mm(struct dma_fence *f, struct mm_struct *mm) if (!fence) return false; - else if (fence->mm == mm) + else if (fence->mm == mm && !fence->svm_bo) return true; return false; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c index 6b6d46e29e6e..cbd593f7d553 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c @@ -32,12 +32,19 @@ #include "amdgpu_dma_buf.h" #include <uapi/linux/kfd_ioctl.h> #include "amdgpu_xgmi.h" +#include "kfd_smi_events.h" /* Userptr restore delay, just long enough to allow consecutive VM * changes to accumulate */ #define AMDGPU_USERPTR_RESTORE_DELAY_MS 1 +/* + * Align VRAM availability to 2MB to avoid fragmentation caused by 4K allocations in the tail 2MB + * BO chunk + */ +#define VRAM_AVAILABLITY_ALIGN (1 << 21) + /* Impose limit on how much memory KFD can use */ static struct { uint64_t max_system_mem_limit; @@ -108,21 +115,12 @@ void amdgpu_amdkfd_reserve_system_mem(uint64_t size) * compromise that should work in most cases without reserving too * much memory for page tables unnecessarily (factor 16K, >> 14). */ -#define ESTIMATE_PT_SIZE(mem_size) ((mem_size) >> 14) -static size_t amdgpu_amdkfd_acc_size(uint64_t size) -{ - size >>= PAGE_SHIFT; - size *= sizeof(dma_addr_t) + sizeof(void *); - - return __roundup_pow_of_two(sizeof(struct amdgpu_bo)) + - __roundup_pow_of_two(sizeof(struct ttm_tt)) + - PAGE_ALIGN(size); -} +#define ESTIMATE_PT_SIZE(mem_size) max(((mem_size) >> 14), AMDGPU_VM_RESERVED_VRAM) /** * amdgpu_amdkfd_reserve_mem_limit() - Decrease available memory by size - * of buffer including any reserved for control structures + * of buffer. * * @adev: Device to which allocated BO belongs to * @size: Size of buffer, in bytes, encapsulated by B0. This should be @@ -131,33 +129,32 @@ static size_t amdgpu_amdkfd_acc_size(uint64_t size) * * Return: returns -ENOMEM in case of error, ZERO otherwise */ -static int amdgpu_amdkfd_reserve_mem_limit(struct amdgpu_device *adev, +int amdgpu_amdkfd_reserve_mem_limit(struct amdgpu_device *adev, uint64_t size, u32 alloc_flag) { uint64_t reserved_for_pt = ESTIMATE_PT_SIZE(amdgpu_amdkfd_total_mem_size); - size_t acc_size, system_mem_needed, ttm_mem_needed, vram_needed; + size_t system_mem_needed, ttm_mem_needed, vram_needed; int ret = 0; - acc_size = amdgpu_amdkfd_acc_size(size); - + system_mem_needed = 0; + ttm_mem_needed = 0; vram_needed = 0; if (alloc_flag & KFD_IOC_ALLOC_MEM_FLAGS_GTT) { - system_mem_needed = acc_size + size; - ttm_mem_needed = acc_size + size; + system_mem_needed = size; + ttm_mem_needed = size; } else if (alloc_flag & KFD_IOC_ALLOC_MEM_FLAGS_VRAM) { - system_mem_needed = acc_size; - ttm_mem_needed = acc_size; + /* + * Conservatively round up the allocation requirement to 2 MB + * to avoid fragmentation caused by 4K allocations in the tail + * 2M BO chunk. + */ vram_needed = size; } else if (alloc_flag & KFD_IOC_ALLOC_MEM_FLAGS_USERPTR) { - system_mem_needed = acc_size + size; - ttm_mem_needed = acc_size; - } else if (alloc_flag & - (KFD_IOC_ALLOC_MEM_FLAGS_DOORBELL | - KFD_IOC_ALLOC_MEM_FLAGS_MMIO_REMAP)) { - system_mem_needed = acc_size; - ttm_mem_needed = acc_size; - } else { + system_mem_needed = size; + } else if (!(alloc_flag & + (KFD_IOC_ALLOC_MEM_FLAGS_DOORBELL | + KFD_IOC_ALLOC_MEM_FLAGS_MMIO_REMAP))) { pr_err("%s: Invalid BO type %#x\n", __func__, alloc_flag); return -ENOMEM; } @@ -172,8 +169,10 @@ static int amdgpu_amdkfd_reserve_mem_limit(struct amdgpu_device *adev, kfd_mem_limit.max_system_mem_limit && !no_system_mem_limit) || (kfd_mem_limit.ttm_mem_used + ttm_mem_needed > kfd_mem_limit.max_ttm_mem_limit) || - (adev->kfd.vram_used + vram_needed > - adev->gmc.real_vram_size - reserved_for_pt)) { + (adev && adev->kfd.vram_used + vram_needed > + adev->gmc.real_vram_size - + atomic64_read(&adev->vram_pin_size) - + reserved_for_pt)) { ret = -ENOMEM; goto release; } @@ -181,7 +180,12 @@ static int amdgpu_amdkfd_reserve_mem_limit(struct amdgpu_device *adev, /* Update memory accounting by decreasing available system * memory, TTM memory and GPU memory as computed above */ - adev->kfd.vram_used += vram_needed; + WARN_ONCE(vram_needed && !adev, + "adev reference can't be null when vram is used"); + if (adev) { + adev->kfd.vram_used += vram_needed; + adev->kfd.vram_used_aligned += ALIGN(vram_needed, VRAM_AVAILABLITY_ALIGN); + } kfd_mem_limit.system_mem_used += system_mem_needed; kfd_mem_limit.ttm_mem_used += ttm_mem_needed; @@ -190,36 +194,30 @@ release: return ret; } -static void unreserve_mem_limit(struct amdgpu_device *adev, +void amdgpu_amdkfd_unreserve_mem_limit(struct amdgpu_device *adev, uint64_t size, u32 alloc_flag) { - size_t acc_size; - - acc_size = amdgpu_amdkfd_acc_size(size); - spin_lock(&kfd_mem_limit.mem_limit_lock); if (alloc_flag & KFD_IOC_ALLOC_MEM_FLAGS_GTT) { - kfd_mem_limit.system_mem_used -= (acc_size + size); - kfd_mem_limit.ttm_mem_used -= (acc_size + size); + kfd_mem_limit.system_mem_used -= size; + kfd_mem_limit.ttm_mem_used -= size; } else if (alloc_flag & KFD_IOC_ALLOC_MEM_FLAGS_VRAM) { - kfd_mem_limit.system_mem_used -= acc_size; - kfd_mem_limit.ttm_mem_used -= acc_size; - adev->kfd.vram_used -= size; + WARN_ONCE(!adev, + "adev reference can't be null when alloc mem flags vram is set"); + if (adev) { + adev->kfd.vram_used -= size; + adev->kfd.vram_used_aligned -= ALIGN(size, VRAM_AVAILABLITY_ALIGN); + } } else if (alloc_flag & KFD_IOC_ALLOC_MEM_FLAGS_USERPTR) { - kfd_mem_limit.system_mem_used -= (acc_size + size); - kfd_mem_limit.ttm_mem_used -= acc_size; - } else if (alloc_flag & - (KFD_IOC_ALLOC_MEM_FLAGS_DOORBELL | - KFD_IOC_ALLOC_MEM_FLAGS_MMIO_REMAP)) { - kfd_mem_limit.system_mem_used -= acc_size; - kfd_mem_limit.ttm_mem_used -= acc_size; - } else { + kfd_mem_limit.system_mem_used -= size; + } else if (!(alloc_flag & + (KFD_IOC_ALLOC_MEM_FLAGS_DOORBELL | + KFD_IOC_ALLOC_MEM_FLAGS_MMIO_REMAP))) { pr_err("%s: Invalid BO type %#x\n", __func__, alloc_flag); goto release; } - - WARN_ONCE(adev->kfd.vram_used < 0, + WARN_ONCE(adev && adev->kfd.vram_used < 0, "KFD VRAM memory accounting unbalanced"); WARN_ONCE(kfd_mem_limit.ttm_mem_used < 0, "KFD TTM memory accounting unbalanced"); @@ -236,11 +234,47 @@ void amdgpu_amdkfd_release_notify(struct amdgpu_bo *bo) u32 alloc_flags = bo->kfd_bo->alloc_flags; u64 size = amdgpu_bo_size(bo); - unreserve_mem_limit(adev, size, alloc_flags); + amdgpu_amdkfd_unreserve_mem_limit(adev, size, alloc_flags); kfree(bo->kfd_bo); } +/** + * @create_dmamap_sg_bo: Creates a amdgpu_bo object to reflect information + * about USERPTR or DOOREBELL or MMIO BO. + * @adev: Device for which dmamap BO is being created + * @mem: BO of peer device that is being DMA mapped. Provides parameters + * in building the dmamap BO + * @bo_out: Output parameter updated with handle of dmamap BO + */ +static int +create_dmamap_sg_bo(struct amdgpu_device *adev, + struct kgd_mem *mem, struct amdgpu_bo **bo_out) +{ + struct drm_gem_object *gem_obj; + int ret, align; + + ret = amdgpu_bo_reserve(mem->bo, false); + if (ret) + return ret; + + align = 1; + ret = amdgpu_gem_object_create(adev, mem->bo->tbo.base.size, align, + AMDGPU_GEM_DOMAIN_CPU, AMDGPU_GEM_CREATE_PREEMPTIBLE, + ttm_bo_type_sg, mem->bo->tbo.base.resv, &gem_obj); + + amdgpu_bo_unreserve(mem->bo); + + if (ret) { + pr_err("Error in creating DMA mappable SG BO on domain: %d\n", ret); + return -EINVAL; + } + + *bo_out = gem_to_amdgpu_bo(gem_obj); + (*bo_out)->parent = amdgpu_bo_ref(mem->bo); + return ret; +} + /* amdgpu_amdkfd_remove_eviction_fence - Removes eviction fence from BO's * reservation object. * @@ -350,22 +384,8 @@ static int vm_validate_pt_pd_bos(struct amdgpu_vm *vm) return ret; } - ret = amdgpu_amdkfd_validate_vm_bo(NULL, pd); - if (ret) { - pr_err("failed to validate PD\n"); - return ret; - } - vm->pd_phys_addr = amdgpu_gmc_pd_addr(vm->root.bo); - if (vm->use_cpu_for_update) { - ret = amdgpu_bo_kmap(pd, NULL); - if (ret) { - pr_err("failed to kmap PD, ret=%d\n", ret); - return ret; - } - } - return 0; } @@ -399,45 +419,42 @@ static uint64_t get_pte_flags(struct amdgpu_device *adev, struct kgd_mem *mem) switch (adev->asic_type) { case CHIP_ARCTURUS: - if (mem->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_VRAM) { - if (bo_adev == adev) - mapping_flags |= coherent ? - AMDGPU_VM_MTYPE_CC : AMDGPU_VM_MTYPE_RW; - else - mapping_flags |= coherent ? - AMDGPU_VM_MTYPE_UC : AMDGPU_VM_MTYPE_NC; - } else { - mapping_flags |= coherent ? - AMDGPU_VM_MTYPE_UC : AMDGPU_VM_MTYPE_NC; - } - break; case CHIP_ALDEBARAN: - if (coherent && uncached) { - if (adev->gmc.xgmi.connected_to_cpu || - !(mem->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_VRAM)) - snoop = true; - mapping_flags |= AMDGPU_VM_MTYPE_UC; - } else if (mem->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_VRAM) { + if (mem->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_VRAM) { if (bo_adev == adev) { - mapping_flags |= coherent ? - AMDGPU_VM_MTYPE_CC : AMDGPU_VM_MTYPE_RW; - if (adev->gmc.xgmi.connected_to_cpu) + if (uncached) + mapping_flags |= AMDGPU_VM_MTYPE_UC; + else if (coherent) + mapping_flags |= AMDGPU_VM_MTYPE_CC; + else + mapping_flags |= AMDGPU_VM_MTYPE_RW; + if (adev->asic_type == CHIP_ALDEBARAN && + adev->gmc.xgmi.connected_to_cpu) snoop = true; } else { - mapping_flags |= coherent ? - AMDGPU_VM_MTYPE_UC : AMDGPU_VM_MTYPE_NC; + if (uncached || coherent) + mapping_flags |= AMDGPU_VM_MTYPE_UC; + else + mapping_flags |= AMDGPU_VM_MTYPE_NC; if (amdgpu_xgmi_same_hive(adev, bo_adev)) snoop = true; } } else { + if (uncached || coherent) + mapping_flags |= AMDGPU_VM_MTYPE_UC; + else + mapping_flags |= AMDGPU_VM_MTYPE_NC; snoop = true; - mapping_flags |= coherent ? - AMDGPU_VM_MTYPE_UC : AMDGPU_VM_MTYPE_NC; } break; default: - mapping_flags |= coherent ? - AMDGPU_VM_MTYPE_UC : AMDGPU_VM_MTYPE_NC; + if (uncached || coherent) + mapping_flags |= AMDGPU_VM_MTYPE_UC; + else + mapping_flags |= AMDGPU_VM_MTYPE_NC; + + if (!(mem->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_VRAM)) + snoop = true; } pte_flags = amdgpu_gem_va_map_flags(adev, mapping_flags); @@ -446,6 +463,38 @@ static uint64_t get_pte_flags(struct amdgpu_device *adev, struct kgd_mem *mem) return pte_flags; } +/** + * create_sg_table() - Create an sg_table for a contiguous DMA addr range + * @addr: The starting address to point to + * @size: Size of memory area in bytes being pointed to + * + * Allocates an instance of sg_table and initializes it to point to memory + * area specified by input parameters. The address used to build is assumed + * to be DMA mapped, if needed. + * + * DOORBELL or MMIO BOs use only one scatterlist node in their sg_table + * because they are physically contiguous. + * + * Return: Initialized instance of SG Table or NULL + */ +static struct sg_table *create_sg_table(uint64_t addr, uint32_t size) +{ + struct sg_table *sg = kmalloc(sizeof(*sg), GFP_KERNEL); + + if (!sg) + return NULL; + if (sg_alloc_table(sg, 1, GFP_KERNEL)) { + kfree(sg); + return NULL; + } + sg_dma_address(sg->sgl) = addr; + sg->sgl->length = size; +#ifdef CONFIG_NEED_SG_DMA_LENGTH + sg->sgl->dma_length = size; +#endif + return sg; +} + static int kfd_mem_dmamap_userptr(struct kgd_mem *mem, struct kfd_mem_attachment *attachment) @@ -510,6 +559,87 @@ kfd_mem_dmamap_dmabuf(struct kfd_mem_attachment *attachment) return ttm_bo_validate(&bo->tbo, &bo->placement, &ctx); } +/** + * kfd_mem_dmamap_sg_bo() - Create DMA mapped sg_table to access DOORBELL or MMIO BO + * @mem: SG BO of the DOORBELL or MMIO resource on the owning device + * @attachment: Virtual address attachment of the BO on accessing device + * + * An access request from the device that owns DOORBELL does not require DMA mapping. + * This is because the request doesn't go through PCIe root complex i.e. it instead + * loops back. The need to DMA map arises only when accessing peer device's DOORBELL + * + * In contrast, all access requests for MMIO need to be DMA mapped without regard to + * device ownership. This is because access requests for MMIO go through PCIe root + * complex. + * + * This is accomplished in two steps: + * - Obtain DMA mapped address of DOORBELL or MMIO memory that could be used + * in updating requesting device's page table + * - Signal TTM to mark memory pointed to by requesting device's BO as GPU + * accessible. This allows an update of requesting device's page table + * with entries associated with DOOREBELL or MMIO memory + * + * This method is invoked in the following contexts: + * - Mapping of DOORBELL or MMIO BO of same or peer device + * - Validating an evicted DOOREBELL or MMIO BO on device seeking access + * + * Return: ZERO if successful, NON-ZERO otherwise + */ +static int +kfd_mem_dmamap_sg_bo(struct kgd_mem *mem, + struct kfd_mem_attachment *attachment) +{ + struct ttm_operation_ctx ctx = {.interruptible = true}; + struct amdgpu_bo *bo = attachment->bo_va->base.bo; + struct amdgpu_device *adev = attachment->adev; + struct ttm_tt *ttm = bo->tbo.ttm; + enum dma_data_direction dir; + dma_addr_t dma_addr; + bool mmio; + int ret; + + /* Expect SG Table of dmapmap BO to be NULL */ + mmio = (mem->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_MMIO_REMAP); + if (unlikely(ttm->sg)) { + pr_err("SG Table of %d BO for peer device is UNEXPECTEDLY NON-NULL", mmio); + return -EINVAL; + } + + dir = mem->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_WRITABLE ? + DMA_BIDIRECTIONAL : DMA_TO_DEVICE; + dma_addr = mem->bo->tbo.sg->sgl->dma_address; + pr_debug("%d BO size: %d\n", mmio, mem->bo->tbo.sg->sgl->length); + pr_debug("%d BO address before DMA mapping: %llx\n", mmio, dma_addr); + dma_addr = dma_map_resource(adev->dev, dma_addr, + mem->bo->tbo.sg->sgl->length, dir, DMA_ATTR_SKIP_CPU_SYNC); + ret = dma_mapping_error(adev->dev, dma_addr); + if (unlikely(ret)) + return ret; + pr_debug("%d BO address after DMA mapping: %llx\n", mmio, dma_addr); + + ttm->sg = create_sg_table(dma_addr, mem->bo->tbo.sg->sgl->length); + if (unlikely(!ttm->sg)) { + ret = -ENOMEM; + goto unmap_sg; + } + + amdgpu_bo_placement_from_domain(bo, AMDGPU_GEM_DOMAIN_GTT); + ret = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx); + if (unlikely(ret)) + goto free_sg; + + return ret; + +free_sg: + sg_free_table(ttm->sg); + kfree(ttm->sg); + ttm->sg = NULL; +unmap_sg: + dma_unmap_resource(adev->dev, dma_addr, mem->bo->tbo.sg->sgl->length, + dir, DMA_ATTR_SKIP_CPU_SYNC); + return ret; +} + static int kfd_mem_dmamap_attachment(struct kgd_mem *mem, struct kfd_mem_attachment *attachment) @@ -521,6 +651,8 @@ kfd_mem_dmamap_attachment(struct kgd_mem *mem, return kfd_mem_dmamap_userptr(mem, attachment); case KFD_MEM_ATT_DMABUF: return kfd_mem_dmamap_dmabuf(attachment); + case KFD_MEM_ATT_SG: + return kfd_mem_dmamap_sg_bo(mem, attachment); default: WARN_ON_ONCE(1); } @@ -561,6 +693,50 @@ kfd_mem_dmaunmap_dmabuf(struct kfd_mem_attachment *attachment) ttm_bo_validate(&bo->tbo, &bo->placement, &ctx); } +/** + * kfd_mem_dmaunmap_sg_bo() - Free DMA mapped sg_table of DOORBELL or MMIO BO + * @mem: SG BO of the DOORBELL or MMIO resource on the owning device + * @attachment: Virtual address attachment of the BO on accessing device + * + * The method performs following steps: + * - Signal TTM to mark memory pointed to by BO as GPU inaccessible + * - Free SG Table that is used to encapsulate DMA mapped memory of + * peer device's DOORBELL or MMIO memory + * + * This method is invoked in the following contexts: + * UNMapping of DOORBELL or MMIO BO on a device having access to its memory + * Eviction of DOOREBELL or MMIO BO on device having access to its memory + * + * Return: void + */ +static void +kfd_mem_dmaunmap_sg_bo(struct kgd_mem *mem, + struct kfd_mem_attachment *attachment) +{ + struct ttm_operation_ctx ctx = {.interruptible = true}; + struct amdgpu_bo *bo = attachment->bo_va->base.bo; + struct amdgpu_device *adev = attachment->adev; + struct ttm_tt *ttm = bo->tbo.ttm; + enum dma_data_direction dir; + + if (unlikely(!ttm->sg)) { + pr_err("SG Table of BO is UNEXPECTEDLY NULL"); + return; + } + + amdgpu_bo_placement_from_domain(bo, AMDGPU_GEM_DOMAIN_CPU); + ttm_bo_validate(&bo->tbo, &bo->placement, &ctx); + + dir = mem->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_WRITABLE ? + DMA_BIDIRECTIONAL : DMA_TO_DEVICE; + dma_unmap_resource(adev->dev, ttm->sg->sgl->dma_address, + ttm->sg->sgl->length, dir, DMA_ATTR_SKIP_CPU_SYNC); + sg_free_table(ttm->sg); + kfree(ttm->sg); + ttm->sg = NULL; + bo->tbo.sg = NULL; +} + static void kfd_mem_dmaunmap_attachment(struct kgd_mem *mem, struct kfd_mem_attachment *attachment) @@ -574,39 +750,15 @@ kfd_mem_dmaunmap_attachment(struct kgd_mem *mem, case KFD_MEM_ATT_DMABUF: kfd_mem_dmaunmap_dmabuf(attachment); break; + case KFD_MEM_ATT_SG: + kfd_mem_dmaunmap_sg_bo(mem, attachment); + break; default: WARN_ON_ONCE(1); } } static int -kfd_mem_attach_userptr(struct amdgpu_device *adev, struct kgd_mem *mem, - struct amdgpu_bo **bo) -{ - unsigned long bo_size = mem->bo->tbo.base.size; - struct drm_gem_object *gobj; - int ret; - - ret = amdgpu_bo_reserve(mem->bo, false); - if (ret) - return ret; - - ret = amdgpu_gem_object_create(adev, bo_size, 1, - AMDGPU_GEM_DOMAIN_CPU, - AMDGPU_GEM_CREATE_PREEMPTIBLE, - ttm_bo_type_sg, mem->bo->tbo.base.resv, - &gobj); - amdgpu_bo_unreserve(mem->bo); - if (ret) - return ret; - - *bo = gem_to_amdgpu_bo(gobj); - (*bo)->parent = amdgpu_bo_ref(mem->bo); - - return 0; -} - -static int kfd_mem_attach_dmabuf(struct amdgpu_device *adev, struct kgd_mem *mem, struct amdgpu_bo **bo) { @@ -630,7 +782,6 @@ kfd_mem_attach_dmabuf(struct amdgpu_device *adev, struct kgd_mem *mem, *bo = gem_to_amdgpu_bo(gobj); (*bo)->flags |= AMDGPU_GEM_CREATE_PREEMPTIBLE; - (*bo)->parent = amdgpu_bo_ref(mem->bo); return 0; } @@ -656,6 +807,7 @@ static int kfd_mem_attach(struct amdgpu_device *adev, struct kgd_mem *mem, uint64_t va = mem->va; struct kfd_mem_attachment *attachment[2] = {NULL, NULL}; struct amdgpu_bo *bo[2] = {NULL, NULL}; + bool same_hive = false; int i, ret; if (!va) { @@ -663,6 +815,24 @@ static int kfd_mem_attach(struct amdgpu_device *adev, struct kgd_mem *mem, return -EINVAL; } + /* Determine access to VRAM, MMIO and DOORBELL BOs of peer devices + * + * The access path of MMIO and DOORBELL BOs of is always over PCIe. + * In contrast the access path of VRAM BOs depens upon the type of + * link that connects the peer device. Access over PCIe is allowed + * if peer device has large BAR. In contrast, access over xGMI is + * allowed for both small and large BAR configurations of peer device + */ + if ((adev != bo_adev) && + ((mem->domain == AMDGPU_GEM_DOMAIN_VRAM) || + (mem->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_DOORBELL) || + (mem->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_MMIO_REMAP))) { + if (mem->domain == AMDGPU_GEM_DOMAIN_VRAM) + same_hive = amdgpu_xgmi_same_hive(adev, bo_adev); + if (!same_hive && !amdgpu_device_is_peer_accessible(bo_adev, adev)) + return -EINVAL; + } + for (i = 0; i <= is_aql; i++) { attachment[i] = kzalloc(sizeof(*attachment[i]), GFP_KERNEL); if (unlikely(!attachment[i])) { @@ -673,9 +843,9 @@ static int kfd_mem_attach(struct amdgpu_device *adev, struct kgd_mem *mem, pr_debug("\t add VA 0x%llx - 0x%llx to vm %p\n", va, va + bo_size, vm); - if (adev == bo_adev || - (amdgpu_ttm_tt_get_usermm(mem->bo->tbo.ttm) && adev->ram_is_direct_mapped) || - (mem->domain == AMDGPU_GEM_DOMAIN_VRAM && amdgpu_xgmi_same_hive(adev, bo_adev))) { + if ((adev == bo_adev && !(mem->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_MMIO_REMAP)) || + (amdgpu_ttm_tt_get_usermm(mem->bo->tbo.ttm) && adev->ram_is_direct_mapped) || + same_hive) { /* Mappings on the local GPU, or VRAM mappings in the * local hive, or userptr mapping IOMMU direct map mode * share the original BO @@ -691,26 +861,30 @@ static int kfd_mem_attach(struct amdgpu_device *adev, struct kgd_mem *mem, } else if (amdgpu_ttm_tt_get_usermm(mem->bo->tbo.ttm)) { /* Create an SG BO to DMA-map userptrs on other GPUs */ attachment[i]->type = KFD_MEM_ATT_USERPTR; - ret = kfd_mem_attach_userptr(adev, mem, &bo[i]); + ret = create_dmamap_sg_bo(adev, mem, &bo[i]); if (ret) goto unwind; - } else if (mem->domain == AMDGPU_GEM_DOMAIN_GTT && - mem->bo->tbo.type != ttm_bo_type_sg) { - /* GTT BOs use DMA-mapping ability of dynamic-attach - * DMA bufs. TODO: The same should work for VRAM on - * large-BAR GPUs. - */ + /* Handle DOORBELL BOs of peer devices and MMIO BOs of local and peer devices */ + } else if (mem->bo->tbo.type == ttm_bo_type_sg) { + WARN_ONCE(!(mem->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_DOORBELL || + mem->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_MMIO_REMAP), + "Handing invalid SG BO in ATTACH request"); + attachment[i]->type = KFD_MEM_ATT_SG; + ret = create_dmamap_sg_bo(adev, mem, &bo[i]); + if (ret) + goto unwind; + /* Enable acces to GTT and VRAM BOs of peer devices */ + } else if (mem->domain == AMDGPU_GEM_DOMAIN_GTT || + mem->domain == AMDGPU_GEM_DOMAIN_VRAM) { attachment[i]->type = KFD_MEM_ATT_DMABUF; ret = kfd_mem_attach_dmabuf(adev, mem, &bo[i]); if (ret) goto unwind; + pr_debug("Employ DMABUF mechanism to enable peer GPU access\n"); } else { - /* FIXME: Need to DMA-map other BO types: - * large-BAR VRAM, doorbells, MMIO remap - */ - attachment[i]->type = KFD_MEM_ATT_SHARED; - bo[i] = mem->bo; - drm_gem_object_get(&bo[i]->tbo.base); + WARN_ONCE(true, "Handling invalid ATTACH request"); + ret = -EINVAL; + goto unwind; } /* Add BO to VM internal data structures */ @@ -1111,24 +1285,6 @@ update_gpuvm_pte_failed: return ret; } -static struct sg_table *create_doorbell_sg(uint64_t addr, uint32_t size) -{ - struct sg_table *sg = kmalloc(sizeof(*sg), GFP_KERNEL); - - if (!sg) - return NULL; - if (sg_alloc_table(sg, 1, GFP_KERNEL)) { - kfree(sg); - return NULL; - } - sg->sgl->dma_address = addr; - sg->sgl->length = size; -#ifdef CONFIG_NEED_SG_DMA_LENGTH - sg->sgl->dma_length = size; -#endif - return sg; -} - static int process_validate_vms(struct amdkfd_process_info *process_info) { struct amdgpu_vm *peer_vm; @@ -1364,16 +1520,10 @@ void amdgpu_amdkfd_gpuvm_destroy_cb(struct amdgpu_device *adev, struct amdgpu_vm *vm) { struct amdkfd_process_info *process_info = vm->process_info; - struct amdgpu_bo *pd = vm->root.bo; if (!process_info) return; - /* Release eviction fence from PD */ - amdgpu_bo_reserve(pd, false); - amdgpu_bo_fence(pd, NULL, false); - amdgpu_bo_unreserve(pd); - /* Update process info */ mutex_lock(&process_info->lock); process_info->n_vms--; @@ -1457,6 +1607,21 @@ out_unlock: return ret; } +size_t amdgpu_amdkfd_get_available_memory(struct amdgpu_device *adev) +{ + uint64_t reserved_for_pt = + ESTIMATE_PT_SIZE(amdgpu_amdkfd_total_mem_size); + size_t available; + spin_lock(&kfd_mem_limit.mem_limit_lock); + available = adev->gmc.real_vram_size + - adev->kfd.vram_used_aligned + - atomic64_read(&adev->vram_pin_size) + - reserved_for_pt; + spin_unlock(&kfd_mem_limit.mem_limit_lock); + + return ALIGN_DOWN(available, VRAM_AVAILABLITY_ALIGN); +} + int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu( struct amdgpu_device *adev, uint64_t va, uint64_t size, void *drm_priv, struct kgd_mem **mem, @@ -1497,7 +1662,7 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu( bo_type = ttm_bo_type_sg; if (size > UINT_MAX) return -EINVAL; - sg = create_doorbell_sg(*offset, size); + sg = create_sg_table(*offset, size); if (!sg) return -ENOMEM; } else { @@ -1591,7 +1756,7 @@ err_node_allow: /* Don't unreserve system mem limit twice */ goto err_reserve_limit; err_bo_create: - unreserve_mem_limit(adev, size, flags); + amdgpu_amdkfd_unreserve_mem_limit(adev, size, flags); err_reserve_limit: mutex_destroy(&(*mem)->lock); if (gobj) @@ -1612,6 +1777,7 @@ int amdgpu_amdkfd_gpuvm_free_memory_of_gpu( { struct amdkfd_process_info *process_info = mem->process_info; unsigned long bo_size = mem->bo->tbo.base.size; + bool use_release_notifier = (mem->bo->kfd_bo == mem); struct kfd_mem_attachment *entry, *tmp; struct bo_vm_reservation_context ctx; struct ttm_validate_buffer *bo_list_entry; @@ -1703,6 +1869,13 @@ int amdgpu_amdkfd_gpuvm_free_memory_of_gpu( */ drm_gem_object_put(&mem->bo->tbo.base); + /* + * For kgd_mem allocated in amdgpu_amdkfd_gpuvm_import_dmabuf(), + * explicitly free it here. + */ + if (!use_release_notifier) + kfree(mem); + return ret; } @@ -1907,8 +2080,69 @@ int amdgpu_amdkfd_gpuvm_sync_memory( return ret; } -int amdgpu_amdkfd_gpuvm_map_gtt_bo_to_kernel(struct amdgpu_device *adev, - struct kgd_mem *mem, void **kptr, uint64_t *size) +/** + * amdgpu_amdkfd_map_gtt_bo_to_gart - Map BO to GART and increment reference count + * @adev: Device to which allocated BO belongs + * @bo: Buffer object to be mapped + * + * Before return, bo reference count is incremented. To release the reference and unpin/ + * unmap the BO, call amdgpu_amdkfd_free_gtt_mem. + */ +int amdgpu_amdkfd_map_gtt_bo_to_gart(struct amdgpu_device *adev, struct amdgpu_bo *bo) +{ + int ret; + + ret = amdgpu_bo_reserve(bo, true); + if (ret) { + pr_err("Failed to reserve bo. ret %d\n", ret); + goto err_reserve_bo_failed; + } + + ret = amdgpu_bo_pin(bo, AMDGPU_GEM_DOMAIN_GTT); + if (ret) { + pr_err("Failed to pin bo. ret %d\n", ret); + goto err_pin_bo_failed; + } + + ret = amdgpu_ttm_alloc_gart(&bo->tbo); + if (ret) { + pr_err("Failed to bind bo to GART. ret %d\n", ret); + goto err_map_bo_gart_failed; + } + + amdgpu_amdkfd_remove_eviction_fence( + bo, bo->kfd_bo->process_info->eviction_fence); + + amdgpu_bo_unreserve(bo); + + bo = amdgpu_bo_ref(bo); + + return 0; + +err_map_bo_gart_failed: + amdgpu_bo_unpin(bo); +err_pin_bo_failed: + amdgpu_bo_unreserve(bo); +err_reserve_bo_failed: + + return ret; +} + +/** amdgpu_amdkfd_gpuvm_map_gtt_bo_to_kernel() - Map a GTT BO for kernel CPU access + * + * @mem: Buffer object to be mapped for CPU access + * @kptr[out]: pointer in kernel CPU address space + * @size[out]: size of the buffer + * + * Pins the BO and maps it for kernel CPU access. The eviction fence is removed + * from the BO, since pinned BOs cannot be evicted. The bo must remain on the + * validate_list, so the GPU mapping can be restored after a page table was + * evicted. + * + * Return: 0 on success, error code on failure + */ +int amdgpu_amdkfd_gpuvm_map_gtt_bo_to_kernel(struct kgd_mem *mem, + void **kptr, uint64_t *size) { int ret; struct amdgpu_bo *bo = mem->bo; @@ -1959,8 +2193,15 @@ bo_reserve_failed: return ret; } -void amdgpu_amdkfd_gpuvm_unmap_gtt_bo_from_kernel(struct amdgpu_device *adev, - struct kgd_mem *mem) +/** amdgpu_amdkfd_gpuvm_map_gtt_bo_to_kernel() - Unmap a GTT BO for kernel CPU access + * + * @mem: Buffer object to be unmapped for CPU access + * + * Removes the kernel CPU mapping and unpins the BO. It does not restore the + * eviction fence, so this function should only be used for cleanup before the + * BO is destroyed. + */ +void amdgpu_amdkfd_gpuvm_unmap_gtt_bo_from_kernel(struct kgd_mem *mem) { struct amdgpu_bo *bo = mem->bo; @@ -2072,7 +2313,7 @@ int amdgpu_amdkfd_evict_userptr(struct kgd_mem *mem, evicted_bos = atomic_inc_return(&process_info->evicted_bos); if (evicted_bos == 1) { /* First eviction, stop the queues */ - r = kgd2kfd_quiesce_mm(mm); + r = kgd2kfd_quiesce_mm(mm, KFD_QUEUE_EVICTION_TRIGGER_USERPTR); if (r) pr_err("Failed to quiesce KFD\n"); schedule_delayed_work(&process_info->restore_userptr_work, @@ -2346,13 +2587,16 @@ static void amdgpu_amdkfd_restore_userptr_worker(struct work_struct *work) unlock_out: mutex_unlock(&process_info->lock); - mmput(mm); - put_task_struct(usertask); /* If validation failed, reschedule another attempt */ - if (evicted_bos) + if (evicted_bos) { schedule_delayed_work(&process_info->restore_userptr_work, msecs_to_jiffies(AMDGPU_USERPTR_RESTORE_DELAY_MS)); + + kfd_smi_event_queue_restore_rescheduled(mm); + } + mmput(mm); + put_task_struct(usertask); } /** amdgpu_amdkfd_gpuvm_restore_process_bos - Restore all BOs for the given @@ -2654,3 +2898,22 @@ bool amdgpu_amdkfd_bo_mapped_to_dev(struct amdgpu_device *adev, struct kgd_mem * } return false; } + +#if defined(CONFIG_DEBUG_FS) + +int kfd_debugfs_kfd_mem_limits(struct seq_file *m, void *data) +{ + + spin_lock(&kfd_mem_limit.mem_limit_lock); + seq_printf(m, "System mem used %lldM out of %lluM\n", + (kfd_mem_limit.system_mem_used >> 20), + (kfd_mem_limit.max_system_mem_limit >> 20)); + seq_printf(m, "TTM mem used %lldM out of %lluM\n", + (kfd_mem_limit.ttm_mem_used >> 20), + (kfd_mem_limit.max_ttm_mem_limit >> 20)); + spin_unlock(&kfd_mem_limit.mem_limit_lock); + + return 0; +} + +#endif diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.c index fd8f3731758e..b81b77a9efa6 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.c @@ -314,7 +314,7 @@ amdgpu_atomfirmware_get_vram_info(struct amdgpu_device *adev, mem_channel_number = vram_info->v30.channel_num; mem_channel_width = vram_info->v30.channel_width; if (vram_width) - *vram_width = mem_channel_number * mem_channel_width; + *vram_width = mem_channel_number * (1 << mem_channel_width); break; default: return -EINVAL; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c index 714178f1b6c6..2168163aad2d 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c @@ -40,7 +40,7 @@ static void amdgpu_bo_list_free_rcu(struct rcu_head *rcu) { struct amdgpu_bo_list *list = container_of(rcu, struct amdgpu_bo_list, rhead); - + mutex_destroy(&list->bo_list_mutex); kvfree(list); } @@ -136,6 +136,7 @@ int amdgpu_bo_list_create(struct amdgpu_device *adev, struct drm_file *filp, trace_amdgpu_cs_bo_status(list->num_entries, total_size); + mutex_init(&list->bo_list_mutex); *result = list; return 0; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.h index 529d52a204cf..9caea1688fc3 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.h @@ -47,6 +47,10 @@ struct amdgpu_bo_list { struct amdgpu_bo *oa_obj; unsigned first_userptr; unsigned num_entries; + + /* Protect access during command submission. + */ + struct mutex bo_list_mutex; }; int amdgpu_bo_list_get(struct amdgpu_fpriv *fpriv, int id, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c index b28af04b0c3e..b7bae833c804 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c @@ -519,6 +519,8 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p, return r; } + mutex_lock(&p->bo_list->bo_list_mutex); + /* One for TTM and one for the CS job */ amdgpu_bo_list_for_each_entry(e, p->bo_list) e->tv.num_shared = 2; @@ -651,6 +653,7 @@ out_free_user_pages: kvfree(e->user_pages); e->user_pages = NULL; } + mutex_unlock(&p->bo_list->bo_list_mutex); } return r; } @@ -690,9 +693,11 @@ static void amdgpu_cs_parser_fini(struct amdgpu_cs_parser *parser, int error, { unsigned i; - if (error && backoff) + if (error && backoff) { ttm_eu_backoff_reservation(&parser->ticket, &parser->validated); + mutex_unlock(&parser->bo_list->bo_list_mutex); + } for (i = 0; i < parser->num_post_deps; i++) { drm_syncobj_put(parser->post_deps[i].syncobj); @@ -1278,6 +1283,7 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p, ttm_eu_fence_buffer_objects(&p->ticket, &p->validated, p->fence); mutex_unlock(&p->adev->notifier_lock); + mutex_unlock(&p->bo_list->bo_list_mutex); return 0; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c index 7dc92ef36b2b..8ee4e8491f39 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c @@ -110,7 +110,7 @@ static int amdgpu_ctx_priority_permit(struct drm_file *filp, return -EACCES; } -static enum amdgpu_gfx_pipe_priority amdgpu_ctx_prio_to_compute_prio(int32_t prio) +static enum amdgpu_gfx_pipe_priority amdgpu_ctx_prio_to_gfx_pipe_prio(int32_t prio) { switch (prio) { case AMDGPU_CTX_PRIORITY_HIGH: @@ -143,8 +143,9 @@ static unsigned int amdgpu_ctx_get_hw_prio(struct amdgpu_ctx *ctx, u32 hw_ip) ctx->init_priority : ctx->override_priority; switch (hw_ip) { + case AMDGPU_HW_IP_GFX: case AMDGPU_HW_IP_COMPUTE: - hw_prio = amdgpu_ctx_prio_to_compute_prio(ctx_prio); + hw_prio = amdgpu_ctx_prio_to_gfx_pipe_prio(ctx_prio); break; case AMDGPU_HW_IP_VCE: case AMDGPU_HW_IP_VCN_ENC: @@ -271,32 +272,6 @@ static ktime_t amdgpu_ctx_fini_entity(struct amdgpu_ctx_entity *entity) return res; } -static int amdgpu_ctx_init(struct amdgpu_ctx_mgr *mgr, int32_t priority, - struct drm_file *filp, struct amdgpu_ctx *ctx) -{ - int r; - - r = amdgpu_ctx_priority_permit(filp, priority); - if (r) - return r; - - memset(ctx, 0, sizeof(*ctx)); - - kref_init(&ctx->refcount); - ctx->mgr = mgr; - spin_lock_init(&ctx->ring_lock); - mutex_init(&ctx->lock); - - ctx->reset_counter = atomic_read(&mgr->adev->gpu_reset_counter); - ctx->reset_counter_query = ctx->reset_counter; - ctx->vram_lost_counter = atomic_read(&mgr->adev->vram_lost_counter); - ctx->init_priority = priority; - ctx->override_priority = AMDGPU_CTX_PRIORITY_UNSET; - ctx->stable_pstate = AMDGPU_CTX_STABLE_PSTATE_NONE; - - return 0; -} - static int amdgpu_ctx_get_stable_pstate(struct amdgpu_ctx *ctx, u32 *stable_pstate) { @@ -325,6 +300,38 @@ static int amdgpu_ctx_get_stable_pstate(struct amdgpu_ctx *ctx, return 0; } +static int amdgpu_ctx_init(struct amdgpu_ctx_mgr *mgr, int32_t priority, + struct drm_file *filp, struct amdgpu_ctx *ctx) +{ + u32 current_stable_pstate; + int r; + + r = amdgpu_ctx_priority_permit(filp, priority); + if (r) + return r; + + memset(ctx, 0, sizeof(*ctx)); + + kref_init(&ctx->refcount); + ctx->mgr = mgr; + spin_lock_init(&ctx->ring_lock); + mutex_init(&ctx->lock); + + ctx->reset_counter = atomic_read(&mgr->adev->gpu_reset_counter); + ctx->reset_counter_query = ctx->reset_counter; + ctx->vram_lost_counter = atomic_read(&mgr->adev->vram_lost_counter); + ctx->init_priority = priority; + ctx->override_priority = AMDGPU_CTX_PRIORITY_UNSET; + + r = amdgpu_ctx_get_stable_pstate(ctx, ¤t_stable_pstate); + if (r) + return r; + + ctx->stable_pstate = current_stable_pstate; + + return 0; +} + static int amdgpu_ctx_set_stable_pstate(struct amdgpu_ctx *ctx, u32 stable_pstate) { @@ -396,7 +403,7 @@ static void amdgpu_ctx_fini(struct kref *ref) } if (drm_dev_enter(&adev->ddev, &idx)) { - amdgpu_ctx_set_stable_pstate(ctx, AMDGPU_CTX_STABLE_PSTATE_NONE); + amdgpu_ctx_set_stable_pstate(ctx, ctx->stable_pstate); drm_dev_exit(idx); } @@ -779,7 +786,7 @@ static void amdgpu_ctx_set_entity_priority(struct amdgpu_ctx *ctx, amdgpu_ctx_to_drm_sched_prio(priority)); /* set hw priority */ - if (hw_ip == AMDGPU_HW_IP_COMPUTE) { + if (hw_ip == AMDGPU_HW_IP_COMPUTE || hw_ip == AMDGPU_HW_IP_GFX) { hw_prio = amdgpu_ctx_get_hw_prio(ctx, hw_ip); hw_prio = array_index_nospec(hw_prio, AMDGPU_RING_PRIO_MAX); scheds = adev->gpu_sched[hw_ip][hw_prio].sched; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c index eedb12f6b8a3..cb00c7d6f50b 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c @@ -383,12 +383,8 @@ static ssize_t amdgpu_debugfs_regs_pcie_read(struct file *f, char __user *buf, value = RREG32_PCIE(*pos); r = put_user(value, (uint32_t *)buf); - if (r) { - pm_runtime_mark_last_busy(adev_to_drm(adev)->dev); - pm_runtime_put_autosuspend(adev_to_drm(adev)->dev); - amdgpu_virt_disable_access_debugfs(adev); - return r; - } + if (r) + goto out; result += 4; buf += 4; @@ -396,11 +392,12 @@ static ssize_t amdgpu_debugfs_regs_pcie_read(struct file *f, char __user *buf, size -= 4; } + r = result; +out: pm_runtime_mark_last_busy(adev_to_drm(adev)->dev); pm_runtime_put_autosuspend(adev_to_drm(adev)->dev); - amdgpu_virt_disable_access_debugfs(adev); - return result; + return r; } /** @@ -441,12 +438,8 @@ static ssize_t amdgpu_debugfs_regs_pcie_write(struct file *f, const char __user uint32_t value; r = get_user(value, (uint32_t *)buf); - if (r) { - pm_runtime_mark_last_busy(adev_to_drm(adev)->dev); - pm_runtime_put_autosuspend(adev_to_drm(adev)->dev); - amdgpu_virt_disable_access_debugfs(adev); - return r; - } + if (r) + goto out; WREG32_PCIE(*pos, value); @@ -456,11 +449,12 @@ static ssize_t amdgpu_debugfs_regs_pcie_write(struct file *f, const char __user size -= 4; } + r = result; +out: pm_runtime_mark_last_busy(adev_to_drm(adev)->dev); pm_runtime_put_autosuspend(adev_to_drm(adev)->dev); - amdgpu_virt_disable_access_debugfs(adev); - return result; + return r; } /** @@ -502,12 +496,8 @@ static ssize_t amdgpu_debugfs_regs_didt_read(struct file *f, char __user *buf, value = RREG32_DIDT(*pos >> 2); r = put_user(value, (uint32_t *)buf); - if (r) { - pm_runtime_mark_last_busy(adev_to_drm(adev)->dev); - pm_runtime_put_autosuspend(adev_to_drm(adev)->dev); - amdgpu_virt_disable_access_debugfs(adev); - return r; - } + if (r) + goto out; result += 4; buf += 4; @@ -515,11 +505,12 @@ static ssize_t amdgpu_debugfs_regs_didt_read(struct file *f, char __user *buf, size -= 4; } + r = result; +out: pm_runtime_mark_last_busy(adev_to_drm(adev)->dev); pm_runtime_put_autosuspend(adev_to_drm(adev)->dev); - amdgpu_virt_disable_access_debugfs(adev); - return result; + return r; } /** @@ -560,12 +551,8 @@ static ssize_t amdgpu_debugfs_regs_didt_write(struct file *f, const char __user uint32_t value; r = get_user(value, (uint32_t *)buf); - if (r) { - pm_runtime_mark_last_busy(adev_to_drm(adev)->dev); - pm_runtime_put_autosuspend(adev_to_drm(adev)->dev); - amdgpu_virt_disable_access_debugfs(adev); - return r; - } + if (r) + goto out; WREG32_DIDT(*pos >> 2, value); @@ -575,11 +562,12 @@ static ssize_t amdgpu_debugfs_regs_didt_write(struct file *f, const char __user size -= 4; } + r = result; +out: pm_runtime_mark_last_busy(adev_to_drm(adev)->dev); pm_runtime_put_autosuspend(adev_to_drm(adev)->dev); - amdgpu_virt_disable_access_debugfs(adev); - return result; + return r; } /** @@ -621,12 +609,8 @@ static ssize_t amdgpu_debugfs_regs_smc_read(struct file *f, char __user *buf, value = RREG32_SMC(*pos); r = put_user(value, (uint32_t *)buf); - if (r) { - pm_runtime_mark_last_busy(adev_to_drm(adev)->dev); - pm_runtime_put_autosuspend(adev_to_drm(adev)->dev); - amdgpu_virt_disable_access_debugfs(adev); - return r; - } + if (r) + goto out; result += 4; buf += 4; @@ -634,11 +618,12 @@ static ssize_t amdgpu_debugfs_regs_smc_read(struct file *f, char __user *buf, size -= 4; } + r = result; +out: pm_runtime_mark_last_busy(adev_to_drm(adev)->dev); pm_runtime_put_autosuspend(adev_to_drm(adev)->dev); - amdgpu_virt_disable_access_debugfs(adev); - return result; + return r; } /** @@ -679,12 +664,8 @@ static ssize_t amdgpu_debugfs_regs_smc_write(struct file *f, const char __user * uint32_t value; r = get_user(value, (uint32_t *)buf); - if (r) { - pm_runtime_mark_last_busy(adev_to_drm(adev)->dev); - pm_runtime_put_autosuspend(adev_to_drm(adev)->dev); - amdgpu_virt_disable_access_debugfs(adev); - return r; - } + if (r) + goto out; WREG32_SMC(*pos, value); @@ -694,11 +675,12 @@ static ssize_t amdgpu_debugfs_regs_smc_write(struct file *f, const char __user * size -= 4; } + r = result; +out: pm_runtime_mark_last_busy(adev_to_drm(adev)->dev); pm_runtime_put_autosuspend(adev_to_drm(adev)->dev); - amdgpu_virt_disable_access_debugfs(adev); - return result; + return r; } /** @@ -1090,11 +1072,8 @@ static ssize_t amdgpu_debugfs_gfxoff_write(struct file *f, const char __user *bu uint32_t value; r = get_user(value, (uint32_t *)buf); - if (r) { - pm_runtime_mark_last_busy(adev_to_drm(adev)->dev); - pm_runtime_put_autosuspend(adev_to_drm(adev)->dev); - return r; - } + if (r) + goto out; amdgpu_gfx_off_ctrl(adev, value ? true : false); @@ -1104,10 +1083,12 @@ static ssize_t amdgpu_debugfs_gfxoff_write(struct file *f, const char __user *bu size -= 4; } + r = result; +out: pm_runtime_mark_last_busy(adev_to_drm(adev)->dev); pm_runtime_put_autosuspend(adev_to_drm(adev)->dev); - return result; + return r; } @@ -1136,21 +1117,52 @@ static ssize_t amdgpu_debugfs_gfxoff_read(struct file *f, char __user *buf, } while (size) { - uint32_t value; + u32 value = adev->gfx.gfx_off_state; + + r = put_user(value, (u32 *)buf); + if (r) + goto out; + + result += 4; + buf += 4; + *pos += 4; + size -= 4; + } + + r = result; +out: + pm_runtime_mark_last_busy(adev_to_drm(adev)->dev); + pm_runtime_put_autosuspend(adev_to_drm(adev)->dev); + + return r; +} + +static ssize_t amdgpu_debugfs_gfxoff_status_read(struct file *f, char __user *buf, + size_t size, loff_t *pos) +{ + struct amdgpu_device *adev = file_inode(f)->i_private; + ssize_t result = 0; + int r; + + if (size & 0x3 || *pos & 0x3) + return -EINVAL; + + r = pm_runtime_get_sync(adev_to_drm(adev)->dev); + if (r < 0) { + pm_runtime_put_autosuspend(adev_to_drm(adev)->dev); + return r; + } + + while (size) { + u32 value; r = amdgpu_get_gfx_off_status(adev, &value); - if (r) { - pm_runtime_mark_last_busy(adev_to_drm(adev)->dev); - pm_runtime_put_autosuspend(adev_to_drm(adev)->dev); - return r; - } + if (r) + goto out; - r = put_user(value, (uint32_t *)buf); - if (r) { - pm_runtime_mark_last_busy(adev_to_drm(adev)->dev); - pm_runtime_put_autosuspend(adev_to_drm(adev)->dev); - return r; - } + r = put_user(value, (u32 *)buf); + if (r) + goto out; result += 4; buf += 4; @@ -1158,10 +1170,12 @@ static ssize_t amdgpu_debugfs_gfxoff_read(struct file *f, char __user *buf, size -= 4; } + r = result; +out: pm_runtime_mark_last_busy(adev_to_drm(adev)->dev); pm_runtime_put_autosuspend(adev_to_drm(adev)->dev); - return result; + return r; } static const struct file_operations amdgpu_debugfs_regs2_fops = { @@ -1229,6 +1243,12 @@ static const struct file_operations amdgpu_debugfs_gfxoff_fops = { .llseek = default_llseek }; +static const struct file_operations amdgpu_debugfs_gfxoff_status_fops = { + .owner = THIS_MODULE, + .read = amdgpu_debugfs_gfxoff_status_read, + .llseek = default_llseek +}; + static const struct file_operations *debugfs_regs[] = { &amdgpu_debugfs_regs_fops, &amdgpu_debugfs_regs2_fops, @@ -1240,6 +1260,7 @@ static const struct file_operations *debugfs_regs[] = { &amdgpu_debugfs_wave_fops, &amdgpu_debugfs_gpr_fops, &amdgpu_debugfs_gfxoff_fops, + &amdgpu_debugfs_gfxoff_status_fops, }; static const char *debugfs_regs_names[] = { @@ -1253,6 +1274,7 @@ static const char *debugfs_regs_names[] = { "amdgpu_wave", "amdgpu_gpr", "amdgpu_gfxoff", + "amdgpu_gfxoff_status", }; /** @@ -1683,7 +1705,7 @@ static ssize_t amdgpu_reset_dump_register_list_write(struct file *f, { struct amdgpu_device *adev = (struct amdgpu_device *)file_inode(f)->i_private; char reg_offset[11]; - uint32_t *new, *tmp = NULL; + uint32_t *new = NULL, *tmp = NULL; int ret, i = 0, len = 0; do { @@ -1709,17 +1731,25 @@ static ssize_t amdgpu_reset_dump_register_list_write(struct file *f, i++; } while (len < size); + new = kmalloc_array(i, sizeof(uint32_t), GFP_KERNEL); + if (!new) { + ret = -ENOMEM; + goto error_free; + } ret = down_write_killable(&adev->reset_domain->sem); if (ret) goto error_free; swap(adev->reset_dump_reg_list, tmp); + swap(adev->reset_dump_reg_value, new); adev->num_regs = i; up_write(&adev->reset_domain->sem); ret = size; error_free: - kfree(tmp); + if (tmp != new) + kfree(tmp); + kfree(new); return ret; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index 58df107e3beb..1400abee9f40 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -32,6 +32,9 @@ #include <linux/slab.h> #include <linux/iommu.h> #include <linux/pci.h> +#include <linux/devcoredump.h> +#include <generated/utsrelease.h> +#include <linux/pci-p2pdma.h> #include <drm/drm_atomic_helper.h> #include <drm/drm_probe_helper.h> @@ -1942,35 +1945,6 @@ static int amdgpu_device_parse_gpu_info_fw(struct amdgpu_device *adev) } switch (adev->asic_type) { -#ifdef CONFIG_DRM_AMDGPU_SI - case CHIP_VERDE: - case CHIP_TAHITI: - case CHIP_PITCAIRN: - case CHIP_OLAND: - case CHIP_HAINAN: -#endif -#ifdef CONFIG_DRM_AMDGPU_CIK - case CHIP_BONAIRE: - case CHIP_HAWAII: - case CHIP_KAVERI: - case CHIP_KABINI: - case CHIP_MULLINS: -#endif - case CHIP_TOPAZ: - case CHIP_TONGA: - case CHIP_FIJI: - case CHIP_POLARIS10: - case CHIP_POLARIS11: - case CHIP_POLARIS12: - case CHIP_VEGAM: - case CHIP_CARRIZO: - case CHIP_STONEY: - case CHIP_VEGA20: - case CHIP_ALDEBARAN: - case CHIP_SIENNA_CICHLID: - case CHIP_NAVY_FLOUNDER: - case CHIP_DIMGREY_CAVEFISH: - case CHIP_BEIGE_GOBY: default: return 0; case CHIP_VEGA10: @@ -2482,12 +2456,14 @@ static int amdgpu_device_ip_init(struct amdgpu_device *adev) if (!hive->reset_domain || !amdgpu_reset_get_reset_domain(hive->reset_domain)) { r = -ENOENT; + amdgpu_put_xgmi_hive(hive); goto init_failed; } /* Drop the early temporary reset domain we created for device */ amdgpu_reset_put_reset_domain(adev->reset_domain); adev->reset_domain = hive->reset_domain; + amdgpu_put_xgmi_hive(hive); } } @@ -3316,38 +3292,12 @@ bool amdgpu_device_asic_has_dc_support(enum amd_asic_type asic_type) case CHIP_MULLINS: /* * We have systems in the wild with these ASICs that require - * LVDS and VGA support which is not supported with DC. + * VGA support which is not supported with DC. * * Fallback to the non-DC driver here by default so as not to * cause regressions. */ return amdgpu_dc > 0; - case CHIP_HAWAII: - case CHIP_CARRIZO: - case CHIP_STONEY: - case CHIP_POLARIS10: - case CHIP_POLARIS11: - case CHIP_POLARIS12: - case CHIP_VEGAM: - case CHIP_TONGA: - case CHIP_FIJI: - case CHIP_VEGA10: - case CHIP_VEGA12: - case CHIP_VEGA20: -#if defined(CONFIG_DRM_AMD_DC_DCN) - case CHIP_RAVEN: - case CHIP_NAVI10: - case CHIP_NAVI14: - case CHIP_NAVI12: - case CHIP_RENOIR: - case CHIP_CYAN_SKILLFISH: - case CHIP_SIENNA_CICHLID: - case CHIP_NAVY_FLOUNDER: - case CHIP_DIMGREY_CAVEFISH: - case CHIP_BEIGE_GOBY: - case CHIP_VANGOGH: - case CHIP_YELLOW_CARP: -#endif default: return amdgpu_dc != 0; #else @@ -3369,7 +3319,7 @@ bool amdgpu_device_asic_has_dc_support(enum amd_asic_type asic_type) */ bool amdgpu_device_has_dc_support(struct amdgpu_device *adev) { - if (amdgpu_sriov_vf(adev) || + if (amdgpu_sriov_vf(adev) || adev->enable_virtual_display || (adev->harvest_ip_mask & AMD_HARVEST_IP_DMU_MASK)) return false; @@ -3667,14 +3617,6 @@ int amdgpu_device_init(struct amdgpu_device *adev, if (amdgpu_mcbp) DRM_INFO("MCBP is enabled\n"); - if (adev->asic_type >= CHIP_NAVI10) { - if (amdgpu_mes || amdgpu_mes_kiq) - adev->enable_mes = true; - - if (amdgpu_mes_kiq) - adev->enable_mes_kiq = true; - } - /* * Reset domain needs to be present early, before XGMI hive discovered * (if any) and intitialized to use reset sem and in_gpu reset flag @@ -4473,8 +4415,6 @@ static int amdgpu_device_reset_sriov(struct amdgpu_device *adev, retry: amdgpu_amdkfd_pre_reset(adev); - amdgpu_amdkfd_pre_reset(adev); - if (from_hypervisor) r = amdgpu_virt_request_full_gpu(adev, true); else @@ -4666,6 +4606,8 @@ int amdgpu_device_pre_asic_reset(struct amdgpu_device *adev, amdgpu_virt_fini_data_exchange(adev); } + amdgpu_fence_driver_isr_toggle(adev, true); + /* block all schedulers and reset given job's ring */ for (i = 0; i < AMDGPU_MAX_RINGS; ++i) { struct amdgpu_ring *ring = adev->rings[i]; @@ -4681,6 +4623,8 @@ int amdgpu_device_pre_asic_reset(struct amdgpu_device *adev, amdgpu_fence_driver_force_completion(ring); } + amdgpu_fence_driver_isr_toggle(adev, false); + if (job && job->vm) drm_sched_increase_karma(&job->base); @@ -4721,20 +4665,72 @@ int amdgpu_device_pre_asic_reset(struct amdgpu_device *adev, static int amdgpu_reset_reg_dumps(struct amdgpu_device *adev) { - uint32_t reg_value; int i; lockdep_assert_held(&adev->reset_domain->sem); - dump_stack(); for (i = 0; i < adev->num_regs; i++) { - reg_value = RREG32(adev->reset_dump_reg_list[i]); - trace_amdgpu_reset_reg_dumps(adev->reset_dump_reg_list[i], reg_value); + adev->reset_dump_reg_value[i] = RREG32(adev->reset_dump_reg_list[i]); + trace_amdgpu_reset_reg_dumps(adev->reset_dump_reg_list[i], + adev->reset_dump_reg_value[i]); } return 0; } +#ifdef CONFIG_DEV_COREDUMP +static ssize_t amdgpu_devcoredump_read(char *buffer, loff_t offset, + size_t count, void *data, size_t datalen) +{ + struct drm_printer p; + struct amdgpu_device *adev = data; + struct drm_print_iterator iter; + int i; + + iter.data = buffer; + iter.offset = 0; + iter.start = offset; + iter.remain = count; + + p = drm_coredump_printer(&iter); + + drm_printf(&p, "**** AMDGPU Device Coredump ****\n"); + drm_printf(&p, "kernel: " UTS_RELEASE "\n"); + drm_printf(&p, "module: " KBUILD_MODNAME "\n"); + drm_printf(&p, "time: %lld.%09ld\n", adev->reset_time.tv_sec, adev->reset_time.tv_nsec); + if (adev->reset_task_info.pid) + drm_printf(&p, "process_name: %s PID: %d\n", + adev->reset_task_info.process_name, + adev->reset_task_info.pid); + + if (adev->reset_vram_lost) + drm_printf(&p, "VRAM is lost due to GPU reset!\n"); + if (adev->num_regs) { + drm_printf(&p, "AMDGPU register dumps:\nOffset: Value:\n"); + + for (i = 0; i < adev->num_regs; i++) + drm_printf(&p, "0x%08x: 0x%08x\n", + adev->reset_dump_reg_list[i], + adev->reset_dump_reg_value[i]); + } + + return count - iter.remain; +} + +static void amdgpu_devcoredump_free(void *data) +{ +} + +static void amdgpu_reset_capture_coredumpm(struct amdgpu_device *adev) +{ + struct drm_device *dev = adev_to_drm(adev); + + ktime_get_ts64(&adev->reset_time); + dev_coredumpm(dev->dev, THIS_MODULE, adev, 0, GFP_KERNEL, + amdgpu_devcoredump_read, amdgpu_devcoredump_free); +} +#endif + int amdgpu_do_asic_reset(struct list_head *device_list_handle, struct amdgpu_reset_context *reset_context) { @@ -4746,6 +4742,8 @@ int amdgpu_do_asic_reset(struct list_head *device_list_handle, tmp_adev = list_first_entry(device_list_handle, struct amdgpu_device, reset_list); amdgpu_reset_reg_dumps(tmp_adev); + + reset_context->reset_device_list = device_list_handle; r = amdgpu_reset_perform_reset(tmp_adev, reset_context); /* If reset handler not implemented, continue; otherwise return */ if (r == -ENOSYS) @@ -4819,6 +4817,15 @@ int amdgpu_do_asic_reset(struct list_head *device_list_handle, goto out; vram_lost = amdgpu_device_check_vram_lost(tmp_adev); +#ifdef CONFIG_DEV_COREDUMP + tmp_adev->reset_vram_lost = vram_lost; + memset(&tmp_adev->reset_task_info, 0, + sizeof(tmp_adev->reset_task_info)); + if (reset_context->job && reset_context->job->vm) + tmp_adev->reset_task_info = + reset_context->job->vm->task_info; + amdgpu_reset_capture_coredumpm(tmp_adev); +#endif if (vram_lost) { DRM_INFO("VRAM is lost due to GPU reset!\n"); amdgpu_inc_vram_lost(tmp_adev); @@ -5004,16 +5011,32 @@ static void amdgpu_device_recheck_guilty_jobs( /* clear job's guilty and depend the folowing step to decide the real one */ drm_sched_reset_karma(s_job); - /* for the real bad job, it will be resubmitted twice, adding a dma_fence_get - * to make sure fence is balanced */ - dma_fence_get(s_job->s_fence->parent); drm_sched_resubmit_jobs_ext(&ring->sched, 1); + if (!s_job->s_fence->parent) { + DRM_WARN("Failed to get a HW fence for job!"); + continue; + } + ret = dma_fence_wait_timeout(s_job->s_fence->parent, false, ring->sched.timeout); if (ret == 0) { /* timeout */ DRM_ERROR("Found the real bad job! ring:%s, job_id:%llx\n", ring->sched.name, s_job->id); + + amdgpu_fence_driver_isr_toggle(adev, true); + + /* Clear this failed job from fence array */ + amdgpu_fence_driver_clear_job_fences(ring); + + amdgpu_fence_driver_isr_toggle(adev, false); + + /* Since the job won't signal and we go for + * another resubmit drop this parent pointer + */ + dma_fence_put(s_job->s_fence->parent); + s_job->s_fence->parent = NULL; + /* set guilty */ drm_sched_increase_karma(s_job); retry: @@ -5042,7 +5065,6 @@ retry: /* got the hw fence, signal finished fence */ atomic_dec(ring->sched.score); - dma_fence_put(s_job->s_fence->parent); dma_fence_get(&s_job->s_fence->finished); dma_fence_signal(&s_job->s_fence->finished); dma_fence_put(&s_job->s_fence->finished); @@ -5055,8 +5077,29 @@ retry: } } +static inline void amdgpu_device_stop_pending_resets(struct amdgpu_device *adev) +{ + struct amdgpu_ras *con = amdgpu_ras_get_context(adev); + +#if defined(CONFIG_DEBUG_FS) + if (!amdgpu_sriov_vf(adev)) + cancel_work(&adev->reset_work); +#endif + + if (adev->kfd.dev) + cancel_work(&adev->kfd.reset_work); + + if (amdgpu_sriov_vf(adev)) + cancel_work(&adev->virt.flr_work); + + if (con && adev->ras_enabled) + cancel_work(&con->recovery_work); + +} + + /** - * amdgpu_device_gpu_recover_imp - reset the asic and recover scheduler + * amdgpu_device_gpu_recover - reset the asic and recover scheduler * * @adev: amdgpu_device pointer * @job: which job trigger hang @@ -5066,8 +5109,9 @@ retry: * Returns 0 for success or an error on failure. */ -int amdgpu_device_gpu_recover_imp(struct amdgpu_device *adev, - struct amdgpu_job *job) +int amdgpu_device_gpu_recover(struct amdgpu_device *adev, + struct amdgpu_job *job, + struct amdgpu_reset_context *reset_context) { struct list_head device_list, *device_list_handle = NULL; bool job_signaled = false; @@ -5077,9 +5121,6 @@ int amdgpu_device_gpu_recover_imp(struct amdgpu_device *adev, bool need_emergency_restart = false; bool audio_suspended = false; int tmp_vram_lost_counter; - struct amdgpu_reset_context reset_context; - - memset(&reset_context, 0, sizeof(reset_context)); /* * Special case: RAS triggered and full reset isn't supported @@ -5105,12 +5146,8 @@ int amdgpu_device_gpu_recover_imp(struct amdgpu_device *adev, if (hive) mutex_lock(&hive->hive_lock); - reset_context.method = AMD_RESET_METHOD_NONE; - reset_context.reset_req_dev = adev; - reset_context.job = job; - reset_context.hive = hive; - clear_bit(AMDGPU_NEED_FULL_RESET, &reset_context.flags); - + reset_context->job = job; + reset_context->hive = hive; /* * Build list of devices to reset. * In case we are in XGMI hive mode, resort the device list @@ -5194,8 +5231,7 @@ int amdgpu_device_gpu_recover_imp(struct amdgpu_device *adev, * * job->base holds a reference to parent fence */ - if (job && job->base.s_fence->parent && - dma_fence_is_signaled(job->base.s_fence->parent)) { + if (job && dma_fence_is_signaled(&job->hw_fence)) { job_signaled = true; dev_info(adev->dev, "Guilty job already signaled, skipping HW reset"); goto skip_hw_reset; @@ -5203,13 +5239,19 @@ int amdgpu_device_gpu_recover_imp(struct amdgpu_device *adev, retry: /* Rest of adevs pre asic reset from XGMI hive. */ list_for_each_entry(tmp_adev, device_list_handle, reset_list) { - r = amdgpu_device_pre_asic_reset(tmp_adev, &reset_context); + r = amdgpu_device_pre_asic_reset(tmp_adev, reset_context); /*TODO Should we stop ?*/ if (r) { dev_err(tmp_adev->dev, "GPU pre asic reset failed with err, %d for drm dev, %s ", r, adev_to_drm(tmp_adev)->unique); tmp_adev->asic_reset_res = r; } + + /* + * Drop all pending non scheduler resets. Scheduler resets + * were already dropped during drm_sched_stop + */ + amdgpu_device_stop_pending_resets(tmp_adev); } tmp_vram_lost_counter = atomic_read(&((adev)->vram_lost_counter)); @@ -5224,7 +5266,7 @@ retry: /* Rest of adevs pre asic reset from XGMI hive. */ if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 2)) amdgpu_ras_resume(adev); } else { - r = amdgpu_do_asic_reset(device_list_handle, &reset_context); + r = amdgpu_do_asic_reset(device_list_handle, reset_context); if (r && r == -EAGAIN) goto retry; } @@ -5244,7 +5286,7 @@ skip_hw_reset: if (amdgpu_gpu_recovery == 2 && !(tmp_vram_lost_counter < atomic_read(&adev->vram_lost_counter))) amdgpu_device_recheck_guilty_jobs( - tmp_adev, device_list_handle, &reset_context); + tmp_adev, device_list_handle, reset_context); for (i = 0; i < AMDGPU_MAX_RINGS; ++i) { struct amdgpu_ring *ring = tmp_adev->rings[i]; @@ -5259,6 +5301,9 @@ skip_hw_reset: drm_sched_start(&ring->sched, !tmp_adev->asic_reset_res); } + if (adev->enable_mes) + amdgpu_mes_self_test(tmp_adev); + if (!drm_drv_uses_atomic_modeset(adev_to_drm(tmp_adev)) && !job_signaled) { drm_helper_resume_force_mode(adev_to_drm(tmp_adev)); } @@ -5308,38 +5353,9 @@ skip_sched_resume: if (r) dev_info(adev->dev, "GPU reset end with ret = %d\n", r); - return r; -} - -struct amdgpu_recover_work_struct { - struct work_struct base; - struct amdgpu_device *adev; - struct amdgpu_job *job; - int ret; -}; - -static void amdgpu_device_queue_gpu_recover_work(struct work_struct *work) -{ - struct amdgpu_recover_work_struct *recover_work = container_of(work, struct amdgpu_recover_work_struct, base); - - recover_work->ret = amdgpu_device_gpu_recover_imp(recover_work->adev, recover_work->job); -} -/* - * Serialize gpu recover into reset domain single threaded wq - */ -int amdgpu_device_gpu_recover(struct amdgpu_device *adev, - struct amdgpu_job *job) -{ - struct amdgpu_recover_work_struct work = {.adev = adev, .job = job}; - - INIT_WORK(&work.base, amdgpu_device_queue_gpu_recover_work); - - if (!amdgpu_reset_domain_schedule(adev->reset_domain, &work.base)) - return -EAGAIN; - flush_work(&work.base); - - return work.ret; + atomic_set(&adev->reset_domain->reset_res, r); + return r; } /** @@ -5490,6 +5506,37 @@ static void amdgpu_device_get_pcie_info(struct amdgpu_device *adev) } } +/** + * amdgpu_device_is_peer_accessible - Check peer access through PCIe BAR + * + * @adev: amdgpu_device pointer + * @peer_adev: amdgpu_device pointer for peer device trying to access @adev + * + * Return true if @peer_adev can access (DMA) @adev through the PCIe + * BAR, i.e. @adev is "large BAR" and the BAR matches the DMA mask of + * @peer_adev. + */ +bool amdgpu_device_is_peer_accessible(struct amdgpu_device *adev, + struct amdgpu_device *peer_adev) +{ +#ifdef CONFIG_HSA_AMD_P2P + uint64_t address_mask = peer_adev->dev->dma_mask ? + ~*peer_adev->dev->dma_mask : ~((1ULL << 32) - 1); + resource_size_t aper_limit = + adev->gmc.aper_base + adev->gmc.aper_size - 1; + bool p2p_access = !adev->gmc.xgmi.connected_to_cpu && + !(pci_p2pdma_distance_many(adev->pdev, + &peer_adev->dev, 1, true) < 0); + + return pcie_p2p && p2p_access && (adev->gmc.visible_vram_size && + adev->gmc.real_vram_size == adev->gmc.visible_vram_size && + !(adev->gmc.aper_base & address_mask || + aper_limit & address_mask)); +#else + return false; +#endif +} + int amdgpu_device_baco_enter(struct drm_device *dev) { struct amdgpu_device *adev = drm_to_adev(dev); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c index 47f0344205ed..95d34590cad1 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c @@ -194,6 +194,7 @@ static int hw_id_map[MAX_HWIP] = { [UMC_HWIP] = UMC_HWID, [XGMI_HWIP] = XGMI_HWID, [DCI_HWIP] = DCI_HWID, + [PCIE_HWIP] = PCIE_HWID, }; static int amdgpu_discovery_read_binary_from_vram(struct amdgpu_device *adev, uint8_t *binary) @@ -1435,6 +1436,11 @@ static int amdgpu_discovery_get_vcn_info(struct amdgpu_device *adev) return -EINVAL; } + /* num_vcn_inst is currently limited to AMDGPU_MAX_VCN_INSTANCES + * which is smaller than VCN_INFO_TABLE_MAX_NUM_INSTANCES + * but that may change in the future with new GPUs so keep this + * check for defensive purposes. + */ if (adev->vcn.num_vcn_inst > VCN_INFO_TABLE_MAX_NUM_INSTANCES) { dev_err(adev->dev, "invalid vcn instances\n"); return -EINVAL; @@ -1450,6 +1456,9 @@ static int amdgpu_discovery_get_vcn_info(struct amdgpu_device *adev) switch (le16_to_cpu(vcn_info->v1.header.version_major)) { case 1: + /* num_vcn_inst is currently limited to AMDGPU_MAX_VCN_INSTANCES + * so this won't overflow. + */ for (v = 0; v < adev->vcn.num_vcn_inst; v++) { adev->vcn.vcn_codec_disable_mask[v] = le32_to_cpu(vcn_info->v1.instance_info[v].fuse_data.all_bits); @@ -1621,12 +1630,14 @@ static int amdgpu_discovery_set_psp_ip_blocks(struct amdgpu_device *adev) case IP_VERSION(13, 0, 1): case IP_VERSION(13, 0, 2): case IP_VERSION(13, 0, 3): - case IP_VERSION(13, 0, 4): case IP_VERSION(13, 0, 5): case IP_VERSION(13, 0, 7): case IP_VERSION(13, 0, 8): amdgpu_device_ip_block_add(adev, &psp_v13_0_ip_block); break; + case IP_VERSION(13, 0, 4): + amdgpu_device_ip_block_add(adev, &psp_v13_0_4_ip_block); + break; default: dev_err(adev->dev, "Failed to add psp ip block(MP0_HWIP:0x%x)\n", @@ -1707,8 +1718,11 @@ static int amdgpu_discovery_set_display_ip_blocks(struct amdgpu_device *adev) case IP_VERSION(3, 0, 1): case IP_VERSION(3, 1, 2): case IP_VERSION(3, 1, 3): + case IP_VERSION(3, 1, 4): case IP_VERSION(3, 1, 5): case IP_VERSION(3, 1, 6): + case IP_VERSION(3, 2, 0): + case IP_VERSION(3, 2, 1): amdgpu_device_ip_block_add(adev, &dm_ip_block); break; default: @@ -1886,6 +1900,7 @@ static int amdgpu_discovery_set_mm_ip_blocks(struct amdgpu_device *adev) amdgpu_device_ip_block_add(adev, &vcn_v3_0_ip_block); break; case IP_VERSION(4, 0, 0): + case IP_VERSION(4, 0, 2): case IP_VERSION(4, 0, 4): amdgpu_device_ip_block_add(adev, &vcn_v4_0_ip_block); amdgpu_device_ip_block_add(adev, &jpeg_v4_0_ip_block); @@ -2194,12 +2209,9 @@ int amdgpu_discovery_set_ip_blocks(struct amdgpu_device *adev) break; case IP_VERSION(7, 4, 0): case IP_VERSION(7, 4, 1): - adev->nbio.funcs = &nbio_v7_4_funcs; - adev->nbio.hdp_flush_reg = &nbio_v7_4_hdp_flush_reg; - break; case IP_VERSION(7, 4, 4): adev->nbio.funcs = &nbio_v7_4_funcs; - adev->nbio.hdp_flush_reg = &nbio_v7_4_hdp_flush_reg_ald; + adev->nbio.hdp_flush_reg = &nbio_v7_4_hdp_flush_reg; break; case IP_VERSION(7, 2, 0): case IP_VERSION(7, 2, 1): @@ -2213,15 +2225,12 @@ int amdgpu_discovery_set_ip_blocks(struct amdgpu_device *adev) case IP_VERSION(2, 3, 0): case IP_VERSION(2, 3, 1): case IP_VERSION(2, 3, 2): - adev->nbio.funcs = &nbio_v2_3_funcs; - adev->nbio.hdp_flush_reg = &nbio_v2_3_hdp_flush_reg; - break; case IP_VERSION(3, 3, 0): case IP_VERSION(3, 3, 1): case IP_VERSION(3, 3, 2): case IP_VERSION(3, 3, 3): adev->nbio.funcs = &nbio_v2_3_funcs; - adev->nbio.hdp_flush_reg = &nbio_v2_3_hdp_flush_reg_sc; + adev->nbio.hdp_flush_reg = &nbio_v2_3_hdp_flush_reg; break; case IP_VERSION(4, 3, 0): case IP_VERSION(4, 3, 1): @@ -2321,6 +2330,7 @@ int amdgpu_discovery_set_ip_blocks(struct amdgpu_device *adev) switch (adev->ip_versions[LSDMA_HWIP][0]) { case IP_VERSION(6, 0, 0): + case IP_VERSION(6, 0, 1): case IP_VERSION(6, 0, 2): adev->lsdma.funcs = &lsdma_v6_0_funcs; break; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c index 17c9bbe0cbc5..c20922a5af9f 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c @@ -30,6 +30,9 @@ #include "atom.h" #include "amdgpu_connectors.h" #include "amdgpu_display.h" +#include "soc15_common.h" +#include "gc/gc_11_0_0_offset.h" +#include "gc/gc_11_0_0_sh_mask.h" #include <asm/div64.h> #include <linux/pci.h> @@ -663,6 +666,11 @@ static int convert_tiling_flags_to_modifier(struct amdgpu_framebuffer *afb) { struct amdgpu_device *adev = drm_to_adev(afb->base.dev); uint64_t modifier = 0; + int num_pipes = 0; + int num_pkrs = 0; + + num_pkrs = adev->gfx.config.gb_addr_config_fields.num_pkrs; + num_pipes = adev->gfx.config.gb_addr_config_fields.num_pipes; if (!afb->tiling_flags || !AMDGPU_TILING_GET(afb->tiling_flags, SWIZZLE_MODE)) { modifier = DRM_FORMAT_MOD_LINEAR; @@ -675,7 +683,7 @@ static int convert_tiling_flags_to_modifier(struct amdgpu_framebuffer *afb) int bank_xor_bits = 0; int packers = 0; int rb = 0; - int pipes = ilog2(adev->gfx.config.gb_addr_config_fields.num_pipes); + int pipes = ilog2(num_pipes); uint32_t dcc_offset = AMDGPU_TILING_GET(afb->tiling_flags, DCC_OFFSET_256B); switch (swizzle >> 2) { @@ -691,12 +699,17 @@ static int convert_tiling_flags_to_modifier(struct amdgpu_framebuffer *afb) case 6: /* 64 KiB _X */ block_size_bits = 16; break; + case 7: /* 256 KiB */ + block_size_bits = 18; + break; default: /* RESERVED or VAR */ return -EINVAL; } - if (adev->ip_versions[GC_HWIP][0] >= IP_VERSION(10, 3, 0)) + if (adev->ip_versions[GC_HWIP][0] >= IP_VERSION(11, 0, 0)) + version = AMD_FMT_MOD_TILE_VER_GFX11; + else if (adev->ip_versions[GC_HWIP][0] >= IP_VERSION(10, 3, 0)) version = AMD_FMT_MOD_TILE_VER_GFX10_RBPLUS; else if (adev->ip_versions[GC_HWIP][0] >= IP_VERSION(10, 0, 0)) version = AMD_FMT_MOD_TILE_VER_GFX10; @@ -707,19 +720,32 @@ static int convert_tiling_flags_to_modifier(struct amdgpu_framebuffer *afb) case 0: /* Z microtiling */ return -EINVAL; case 1: /* S microtiling */ - if (!has_xor) - version = AMD_FMT_MOD_TILE_VER_GFX9; + if (adev->ip_versions[GC_HWIP][0] < IP_VERSION(11, 0, 0)) { + if (!has_xor) + version = AMD_FMT_MOD_TILE_VER_GFX9; + } break; case 2: - if (!has_xor && afb->base.format->cpp[0] != 4) - version = AMD_FMT_MOD_TILE_VER_GFX9; + if (adev->ip_versions[GC_HWIP][0] < IP_VERSION(11, 0, 0)) { + if (!has_xor && afb->base.format->cpp[0] != 4) + version = AMD_FMT_MOD_TILE_VER_GFX9; + } break; case 3: break; } if (has_xor) { + if (num_pipes == num_pkrs && num_pkrs == 0) { + DRM_ERROR("invalid number of pipes and packers\n"); + return -EINVAL; + } + switch (version) { + case AMD_FMT_MOD_TILE_VER_GFX11: + pipe_xor_bits = min(block_size_bits - 8, pipes); + packers = ilog2(adev->gfx.config.gb_addr_config_fields.num_pkrs); + break; case AMD_FMT_MOD_TILE_VER_GFX10_RBPLUS: pipe_xor_bits = min(block_size_bits - 8, pipes); packers = min(block_size_bits - 8 - pipe_xor_bits, @@ -753,9 +779,10 @@ static int convert_tiling_flags_to_modifier(struct amdgpu_framebuffer *afb) u64 render_dcc_offset; /* Enable constant encode on RAVEN2 and later. */ - bool dcc_constant_encode = adev->asic_type > CHIP_RAVEN || + bool dcc_constant_encode = (adev->asic_type > CHIP_RAVEN || (adev->asic_type == CHIP_RAVEN && - adev->external_rev_id >= 0x81); + adev->external_rev_id >= 0x81)) && + adev->ip_versions[GC_HWIP][0] < IP_VERSION(11, 0, 0); int max_cblock_size = dcc_i64b ? AMD_FMT_MOD_DCC_BLOCK_64B : dcc_i128b ? AMD_FMT_MOD_DCC_BLOCK_128B : @@ -870,10 +897,11 @@ static unsigned int get_dcc_block_size(uint64_t modifier, bool rb_aligned, return max(10 + (rb_aligned ? (int)AMD_FMT_MOD_GET(RB, modifier) : 0), 12); } case AMD_FMT_MOD_TILE_VER_GFX10: - case AMD_FMT_MOD_TILE_VER_GFX10_RBPLUS: { + case AMD_FMT_MOD_TILE_VER_GFX10_RBPLUS: + case AMD_FMT_MOD_TILE_VER_GFX11: { int pipes_log2 = AMD_FMT_MOD_GET(PIPE_XOR_BITS, modifier); - if (ver == AMD_FMT_MOD_TILE_VER_GFX10_RBPLUS && pipes_log2 > 1 && + if (ver >= AMD_FMT_MOD_TILE_VER_GFX10_RBPLUS && pipes_log2 > 1 && AMD_FMT_MOD_GET(PACKERS, modifier) == pipes_log2) ++pipes_log2; @@ -966,6 +994,9 @@ static int amdgpu_display_verify_sizes(struct amdgpu_framebuffer *rfb) case DC_SW_64KB_S_X: block_size_log2 = 16; break; + case DC_SW_VAR_S_X: + block_size_log2 = 18; + break; default: drm_dbg_kms(rfb->base.dev, "Swizzle mode with unknown block size: %d\n", swizzle); @@ -1528,6 +1559,21 @@ bool amdgpu_crtc_get_scanout_position(struct drm_crtc *crtc, stime, etime, mode); } +static bool +amdgpu_display_robj_is_fb(struct amdgpu_device *adev, struct amdgpu_bo *robj) +{ + struct drm_device *dev = adev_to_drm(adev); + struct drm_fb_helper *fb_helper = dev->fb_helper; + + if (!fb_helper || !fb_helper->buffer) + return false; + + if (gem_to_amdgpu_bo(fb_helper->buffer->gem) != robj) + return false; + + return true; +} + int amdgpu_display_suspend_helper(struct amdgpu_device *adev) { struct drm_device *dev = adev_to_drm(adev); @@ -1563,10 +1609,12 @@ int amdgpu_display_suspend_helper(struct amdgpu_device *adev) continue; } robj = gem_to_amdgpu_bo(fb->obj[0]); - r = amdgpu_bo_reserve(robj, true); - if (r == 0) { - amdgpu_bo_unpin(robj); - amdgpu_bo_unreserve(robj); + if (!amdgpu_display_robj_is_fb(adev, robj)) { + r = amdgpu_bo_reserve(robj, true); + if (r == 0) { + amdgpu_bo_unpin(robj); + amdgpu_bo_unreserve(robj); + } } } return 0; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.h index 7b6d83e2b13c..560352f7c317 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.h @@ -35,8 +35,6 @@ #define amdgpu_display_add_encoder(adev, e, s, c) (adev)->mode_info.funcs->add_encoder((adev), (e), (s), (c)) #define amdgpu_display_add_connector(adev, ci, sd, ct, ib, coi, h, r) (adev)->mode_info.funcs->add_connector((adev), (ci), (sd), (ct), (ib), (coi), (h), (r)) -int amdgpu_display_freesync_ioctl(struct drm_device *dev, void *data, - struct drm_file *filp); void amdgpu_display_update_priority(struct amdgpu_device *adev); uint32_t amdgpu_display_supported_domains(struct amdgpu_device *adev, uint64_t bo_flags); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c index 8890300766a5..429fcdf28836 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c @@ -80,7 +80,7 @@ * - 3.24.0 - Add high priority compute support for gfx9 * - 3.25.0 - Add support for sensor query info (stable pstate sclk/mclk). * - 3.26.0 - GFX9: Process AMDGPU_IB_FLAG_TC_WB_NOT_INVALIDATE. - * - 3.27.0 - Add new chunk to to AMDGPU_CS to enable BO_LIST creation. + * - 3.27.0 - Add new chunk to AMDGPU_CS to enable BO_LIST creation. * - 3.28.0 - Add AMDGPU_CHUNK_ID_SCHEDULED_DEPENDENCIES * - 3.29.0 - Add AMDGPU_IB_FLAG_RESET_GDS_MAX_WAVE_ID * - 3.30.0 - Add AMDGPU_SCHED_OP_CONTEXT_PRIORITY_OVERRIDE. @@ -100,10 +100,11 @@ * - 3.44.0 - DCN3 supports DCC independent block settings: !64B && 128B, 64B && 128B * - 3.45.0 - Add context ioctl stable pstate interface * - 3.46.0 - To enable hot plug amdgpu tests in libdrm - * * 3.47.0 - Add AMDGPU_GEM_CREATE_DISCARDABLE and AMDGPU_VM_NOALLOC flags + * - 3.47.0 - Add AMDGPU_GEM_CREATE_DISCARDABLE and AMDGPU_VM_NOALLOC flags + * - 3.48.0 - Add IP discovery version info to HW INFO */ #define KMS_DRIVER_MAJOR 3 -#define KMS_DRIVER_MINOR 47 +#define KMS_DRIVER_MINOR 48 #define KMS_DRIVER_PATCHLEVEL 0 int amdgpu_vram_limit; @@ -167,6 +168,7 @@ int amdgpu_smu_pptable_id = -1; */ uint amdgpu_dc_feature_mask = 2; uint amdgpu_dc_debug_mask; +uint amdgpu_dc_visual_confirm; int amdgpu_async_gfx_ring = 1; int amdgpu_mcbp; int amdgpu_discovery = -1; @@ -803,6 +805,16 @@ module_param_named(no_queue_eviction_on_vm_fault, amdgpu_no_queue_eviction_on_vm #endif /** + * DOC: pcie_p2p (bool) + * Enable PCIe P2P (requires large-BAR). Default value: true (on) + */ +#ifdef CONFIG_HSA_AMD_P2P +bool pcie_p2p = true; +module_param(pcie_p2p, bool, 0444); +MODULE_PARM_DESC(pcie_p2p, "Enable PCIe P2P (requires large-BAR). (N = off, Y = on(default))"); +#endif + +/** * DOC: dcfeaturemask (uint) * Override display features enabled. See enum DC_FEATURE_MASK in drivers/gpu/drm/amd/include/amd_shared.h. * The default is the current set of stable display features. @@ -817,6 +829,9 @@ module_param_named(dcfeaturemask, amdgpu_dc_feature_mask, uint, 0444); MODULE_PARM_DESC(dcdebugmask, "all debug options disabled (default))"); module_param_named(dcdebugmask, amdgpu_dc_debug_mask, uint, 0444); +MODULE_PARM_DESC(visualconfirm, "Visual confirm (0 = off (default), 1 = MPO, 5 = PSR)"); +module_param_named(visualconfirm, amdgpu_dc_visual_confirm, uint, 0444); + /** * DOC: abmlevel (uint) * Override the default ABM (Adaptive Backlight Management) level used for DC @@ -2111,7 +2126,7 @@ retry_init: if (ret) DRM_ERROR("Creating debugfs files failed (%d).\n", ret); - if (adev->runpm) { + if (adev->pm.rpm_mode != AMDGPU_RUNPM_NONE) { /* only need to skip on ATPX */ if (amdgpu_device_supports_px(ddev)) dev_pm_set_driver_flags(ddev->dev, DPM_FLAG_NO_DIRECT_COMPLETE); @@ -2168,7 +2183,7 @@ amdgpu_pci_remove(struct pci_dev *pdev) drm_dev_unplug(dev); - if (adev->runpm) { + if (adev->pm.rpm_mode != AMDGPU_RUNPM_NONE) { pm_runtime_get_sync(dev->dev); pm_runtime_forbid(dev->dev); } @@ -2451,7 +2466,7 @@ static int amdgpu_pmops_runtime_suspend(struct device *dev) struct amdgpu_device *adev = drm_to_adev(drm_dev); int ret, i; - if (!adev->runpm) { + if (adev->pm.rpm_mode == AMDGPU_RUNPM_NONE) { pm_runtime_forbid(dev); return -EBUSY; } @@ -2520,7 +2535,7 @@ static int amdgpu_pmops_runtime_resume(struct device *dev) struct amdgpu_device *adev = drm_to_adev(drm_dev); int ret; - if (!adev->runpm) + if (adev->pm.rpm_mode == AMDGPU_RUNPM_NONE) return -EINVAL; /* Avoids registers access if device is physically gone */ @@ -2564,7 +2579,7 @@ static int amdgpu_pmops_runtime_idle(struct device *dev) /* we don't want the main rpm_idle to call suspend - we want to autosuspend */ int ret = 1; - if (!adev->runpm) { + if (adev->pm.rpm_mode == AMDGPU_RUNPM_NONE) { pm_runtime_forbid(dev); return -EBUSY; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c index d16c8c1f72db..8adeb7469f1e 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c @@ -39,6 +39,7 @@ #include <drm/drm_drv.h> #include "amdgpu.h" #include "amdgpu_trace.h" +#include "amdgpu_reset.h" /* * Fences @@ -46,7 +47,7 @@ * for GPU/CPU synchronization. When the fence is written, * it is expected that all buffers associated with that fence * are no longer in use by the associated ring on the GPU and - * that the the relevant GPU caches have been flushed. + * that the relevant GPU caches have been flushed. */ struct amdgpu_fence { @@ -163,11 +164,16 @@ int amdgpu_fence_emit(struct amdgpu_ring *ring, struct dma_fence **f, struct amd if (job && job->job_run_counter) { /* reinit seq for resubmitted jobs */ fence->seqno = seq; + /* TO be inline with external fence creation and other drivers */ + dma_fence_get(fence); } else { - if (job) + if (job) { dma_fence_init(fence, &amdgpu_job_fence_ops, &ring->fence_drv.lock, adev->fence_context + ring->idx, seq); + /* Against remove in amdgpu_job_{free, free_cb} */ + dma_fence_get(fence); + } else dma_fence_init(fence, &amdgpu_fence_ops, &ring->fence_drv.lock, @@ -531,6 +537,24 @@ void amdgpu_fence_driver_hw_fini(struct amdgpu_device *adev) } } +/* Will either stop and flush handlers for amdgpu interrupt or reanble it */ +void amdgpu_fence_driver_isr_toggle(struct amdgpu_device *adev, bool stop) +{ + int i; + + for (i = 0; i < AMDGPU_MAX_RINGS; i++) { + struct amdgpu_ring *ring = adev->rings[i]; + + if (!ring || !ring->fence_drv.initialized || !ring->fence_drv.irq_src) + continue; + + if (stop) + disable_irq(adev->irq.irq); + else + enable_irq(adev->irq.irq); + } +} + void amdgpu_fence_driver_sw_fini(struct amdgpu_device *adev) { unsigned int i, j; @@ -594,8 +618,10 @@ void amdgpu_fence_driver_clear_job_fences(struct amdgpu_ring *ring) for (i = 0; i <= ring->fence_drv.num_fences_mask; i++) { ptr = &ring->fence_drv.fences[i]; old = rcu_dereference_protected(*ptr, 1); - if (old && old->ops == &amdgpu_job_fence_ops) + if (old && old->ops == &amdgpu_job_fence_ops) { RCU_INIT_POINTER(*ptr, NULL); + dma_fence_put(old); + } } } @@ -798,7 +824,10 @@ static int gpu_recover_get(void *data, u64 *val) return 0; } - *val = amdgpu_device_gpu_recover(adev, NULL); + if (amdgpu_reset_domain_schedule(adev->reset_domain, &adev->reset_work)) + flush_work(&adev->reset_work); + + *val = atomic_read(&adev->reset_domain->reset_res); pm_runtime_mark_last_busy(dev->dev); pm_runtime_put_autosuspend(dev->dev); @@ -810,6 +839,21 @@ DEFINE_SHOW_ATTRIBUTE(amdgpu_debugfs_fence_info); DEFINE_DEBUGFS_ATTRIBUTE(amdgpu_debugfs_gpu_recover_fops, gpu_recover_get, NULL, "%lld\n"); +static void amdgpu_debugfs_reset_work(struct work_struct *work) +{ + struct amdgpu_device *adev = container_of(work, struct amdgpu_device, + reset_work); + + struct amdgpu_reset_context reset_context; + memset(&reset_context, 0, sizeof(reset_context)); + + reset_context.method = AMD_RESET_METHOD_NONE; + reset_context.reset_req_dev = adev; + set_bit(AMDGPU_NEED_FULL_RESET, &reset_context.flags); + + amdgpu_device_gpu_recover(adev, NULL, &reset_context); +} + #endif void amdgpu_debugfs_fence_init(struct amdgpu_device *adev) @@ -821,9 +865,12 @@ void amdgpu_debugfs_fence_init(struct amdgpu_device *adev) debugfs_create_file("amdgpu_fence_info", 0444, root, adev, &amdgpu_debugfs_fence_info_fops); - if (!amdgpu_sriov_vf(adev)) + if (!amdgpu_sriov_vf(adev)) { + + INIT_WORK(&adev->reset_work, amdgpu_debugfs_reset_work); debugfs_create_file("amdgpu_gpu_recover", 0444, root, adev, &amdgpu_debugfs_gpu_recover_fops); + } #endif } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fru_eeprom.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_fru_eeprom.c index ecada5eadfe3..e325150879df 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fru_eeprom.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fru_eeprom.c @@ -66,10 +66,15 @@ static bool is_fru_eeprom_supported(struct amdgpu_device *adev) return true; case CHIP_SIENNA_CICHLID: if (strnstr(atom_ctx->vbios_version, "D603", + sizeof(atom_ctx->vbios_version))) { + if (strnstr(atom_ctx->vbios_version, "D603GLXE", sizeof(atom_ctx->vbios_version))) - return true; - else + return false; + else + return true; + } else { return false; + } default: return false; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c index 16699158e00d..222d3d7ea076 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c @@ -142,7 +142,12 @@ void amdgpu_gfx_parse_disable_cu(unsigned *mask, unsigned max_se, unsigned max_s } } -static bool amdgpu_gfx_is_multipipe_capable(struct amdgpu_device *adev) +static bool amdgpu_gfx_is_graphics_multipipe_capable(struct amdgpu_device *adev) +{ + return amdgpu_async_gfx_ring && adev->gfx.me.num_pipe_per_me > 1; +} + +static bool amdgpu_gfx_is_compute_multipipe_capable(struct amdgpu_device *adev) { if (amdgpu_compute_multipipe != -1) { DRM_INFO("amdgpu: forcing compute pipe policy %d\n", @@ -158,6 +163,28 @@ static bool amdgpu_gfx_is_multipipe_capable(struct amdgpu_device *adev) return adev->gfx.mec.num_mec > 1; } +bool amdgpu_gfx_is_high_priority_graphics_queue(struct amdgpu_device *adev, + struct amdgpu_ring *ring) +{ + int queue = ring->queue; + int pipe = ring->pipe; + + /* Policy: use pipe1 queue0 as high priority graphics queue if we + * have more than one gfx pipe. + */ + if (amdgpu_gfx_is_graphics_multipipe_capable(adev) && + adev->gfx.num_gfx_rings > 1 && pipe == 1 && queue == 0) { + int me = ring->me; + int bit; + + bit = amdgpu_gfx_me_queue_to_bit(adev, me, pipe, queue); + if (ring == &adev->gfx.gfx_ring[bit]) + return true; + } + + return false; +} + bool amdgpu_gfx_is_high_priority_compute_queue(struct amdgpu_device *adev, struct amdgpu_ring *ring) { @@ -174,7 +201,7 @@ bool amdgpu_gfx_is_high_priority_compute_queue(struct amdgpu_device *adev, void amdgpu_gfx_compute_queue_acquire(struct amdgpu_device *adev) { int i, queue, pipe; - bool multipipe_policy = amdgpu_gfx_is_multipipe_capable(adev); + bool multipipe_policy = amdgpu_gfx_is_compute_multipipe_capable(adev); int max_queues_per_mec = min(adev->gfx.mec.num_pipe_per_mec * adev->gfx.mec.num_queue_per_pipe, adev->gfx.num_compute_rings); @@ -200,18 +227,24 @@ void amdgpu_gfx_compute_queue_acquire(struct amdgpu_device *adev) void amdgpu_gfx_graphics_queue_acquire(struct amdgpu_device *adev) { - int i, queue, me; - - for (i = 0; i < AMDGPU_MAX_GFX_QUEUES; ++i) { - queue = i % adev->gfx.me.num_queue_per_pipe; - me = (i / adev->gfx.me.num_queue_per_pipe) - / adev->gfx.me.num_pipe_per_me; + int i, queue, pipe; + bool multipipe_policy = amdgpu_gfx_is_graphics_multipipe_capable(adev); + int max_queues_per_me = adev->gfx.me.num_pipe_per_me * + adev->gfx.me.num_queue_per_pipe; - if (me >= adev->gfx.me.num_me) - break; + if (multipipe_policy) { /* policy: amdgpu owns the first queue per pipe at this stage * will extend to mulitple queues per pipe later */ - if (me == 0 && queue < 1) + for (i = 0; i < max_queues_per_me; i++) { + pipe = i % adev->gfx.me.num_pipe_per_me; + queue = (i / adev->gfx.me.num_pipe_per_me) % + adev->gfx.me.num_queue_per_pipe; + + set_bit(pipe * adev->gfx.me.num_queue_per_pipe + queue, + adev->gfx.me.queue_bitmap); + } + } else { + for (i = 0; i < max_queues_per_me; ++i) set_bit(i, adev->gfx.me.queue_bitmap); } @@ -666,6 +699,9 @@ uint32_t amdgpu_kiq_rreg(struct amdgpu_device *adev, uint32_t reg) if (amdgpu_device_skip_hw_access(adev)) return 0; + if (adev->mes.ring.sched.ready) + return amdgpu_mes_rreg(adev, reg); + BUG_ON(!ring->funcs->emit_rreg); spin_lock_irqsave(&kiq->ring_lock, flags); @@ -733,6 +769,11 @@ void amdgpu_kiq_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v) if (amdgpu_device_skip_hw_access(adev)) return; + if (adev->mes.ring.sched.ready) { + amdgpu_mes_wreg(adev, reg, v); + return; + } + spin_lock_irqsave(&kiq->ring_lock, flags); amdgpu_ring_alloc(ring, 32); amdgpu_ring_emit_wreg(ring, reg, v); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h index 53526ffb2ce1..23a696d38390 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h @@ -396,6 +396,8 @@ bool amdgpu_gfx_is_mec_queue_enabled(struct amdgpu_device *adev, int mec, int pipe, int queue); bool amdgpu_gfx_is_high_priority_compute_queue(struct amdgpu_device *adev, struct amdgpu_ring *ring); +bool amdgpu_gfx_is_high_priority_graphics_queue(struct amdgpu_device *adev, + struct amdgpu_ring *ring); int amdgpu_gfx_me_queue_to_bit(struct amdgpu_device *adev, int me, int pipe, int queue); void amdgpu_gfx_bit_to_me_queue(struct amdgpu_device *adev, int bit, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.c index 3df146579ad9..1d5af50331e4 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.c @@ -242,7 +242,7 @@ restart_ih: * @entry: IV entry * * Decodes the interrupt vector at the current rptr - * position and also advance the position for for Vega10 + * position and also advance the position for Vega10 * and later GPUs. */ void amdgpu_ih_decode_iv_helper(struct amdgpu_device *adev, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_imu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_imu.h index 56cf127cdf93..484e936812e4 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_imu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_imu.h @@ -24,12 +24,18 @@ #ifndef __AMDGPU_IMU_H__ #define __AMDGPU_IMU_H__ +enum imu_work_mode { + DEBUG_MODE, + MISSION_MODE +}; + struct amdgpu_imu_funcs { int (*init_microcode)(struct amdgpu_device *adev); int (*load_microcode)(struct amdgpu_device *adev); void (*setup_imu)(struct amdgpu_device *adev); int (*start_imu)(struct amdgpu_device *adev); void (*program_rlc_ram)(struct amdgpu_device *adev); + int (*wait_for_reset_status)(struct amdgpu_device *adev); }; struct imu_rlc_ram_golden { @@ -46,6 +52,7 @@ struct imu_rlc_ram_golden { struct amdgpu_imu { const struct amdgpu_imu_funcs *funcs; + enum imu_work_mode mode; }; #endif diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c index 67f66f2f1809..c2fd6f3076a6 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c @@ -29,6 +29,7 @@ #include "amdgpu.h" #include "amdgpu_trace.h" +#include "amdgpu_reset.h" static enum drm_gpu_sched_stat amdgpu_job_timedout(struct drm_sched_job *s_job) { @@ -64,7 +65,14 @@ static enum drm_gpu_sched_stat amdgpu_job_timedout(struct drm_sched_job *s_job) ti.process_name, ti.tgid, ti.task_name, ti.pid); if (amdgpu_device_should_recover_gpu(ring->adev)) { - r = amdgpu_device_gpu_recover_imp(ring->adev, job); + struct amdgpu_reset_context reset_context; + memset(&reset_context, 0, sizeof(reset_context)); + + reset_context.method = AMD_RESET_METHOD_NONE; + reset_context.reset_req_dev = adev; + clear_bit(AMDGPU_NEED_FULL_RESET, &reset_context.flags); + + r = amdgpu_device_gpu_recover(ring->adev, job, &reset_context); if (r) DRM_ERROR("GPU Recovery Failed: %d\n", r); } else { @@ -125,16 +133,10 @@ void amdgpu_job_free_resources(struct amdgpu_job *job) { struct amdgpu_ring *ring = to_amdgpu_ring(job->base.sched); struct dma_fence *f; - struct dma_fence *hw_fence; unsigned i; - if (job->hw_fence.ops == NULL) - hw_fence = job->external_hw_fence; - else - hw_fence = &job->hw_fence; - /* use sched fence if available */ - f = job->base.s_fence ? &job->base.s_fence->finished : hw_fence; + f = job->base.s_fence ? &job->base.s_fence->finished : &job->hw_fence; for (i = 0; i < job->num_ibs; ++i) amdgpu_ib_free(ring->adev, &job->ibs[i], f); } @@ -148,11 +150,7 @@ static void amdgpu_job_free_cb(struct drm_sched_job *s_job) amdgpu_sync_free(&job->sync); amdgpu_sync_free(&job->sched_sync); - /* only put the hw fence if has embedded fence */ - if (job->hw_fence.ops != NULL) - dma_fence_put(&job->hw_fence); - else - kfree(job); + dma_fence_put(&job->hw_fence); } void amdgpu_job_free(struct amdgpu_job *job) @@ -161,11 +159,10 @@ void amdgpu_job_free(struct amdgpu_job *job) amdgpu_sync_free(&job->sync); amdgpu_sync_free(&job->sched_sync); - /* only put the hw fence if has embedded fence */ - if (job->hw_fence.ops != NULL) - dma_fence_put(&job->hw_fence); - else + if (!job->hw_fence.ops) kfree(job); + else + dma_fence_put(&job->hw_fence); } int amdgpu_job_submit(struct amdgpu_job *job, struct drm_sched_entity *entity, @@ -195,15 +192,12 @@ int amdgpu_job_submit_direct(struct amdgpu_job *job, struct amdgpu_ring *ring, int r; job->base.sched = &ring->sched; - r = amdgpu_ib_schedule(ring, job->num_ibs, job->ibs, NULL, fence); - /* record external_hw_fence for direct submit */ - job->external_hw_fence = dma_fence_get(*fence); + r = amdgpu_ib_schedule(ring, job->num_ibs, job->ibs, job, fence); + if (r) return r; amdgpu_job_free(job); - dma_fence_put(*fence); - return 0; } @@ -262,10 +256,6 @@ static struct dma_fence *amdgpu_job_run(struct drm_sched_job *sched_job) DRM_ERROR("Error scheduling IBs (%d)\n", r); } - if (!job->job_run_counter) - dma_fence_get(fence); - else if (finished->error < 0) - dma_fence_put(&job->hw_fence); job->job_run_counter++; amdgpu_job_free_resources(job); @@ -285,10 +275,6 @@ void amdgpu_job_stop_all_jobs_on_sched(struct drm_gpu_scheduler *sched) /* Signal all jobs not yet scheduled */ for (i = DRM_SCHED_PRIORITY_COUNT - 1; i >= DRM_SCHED_PRIORITY_MIN; i--) { struct drm_sched_rq *rq = &sched->sched_rq[i]; - - if (!rq) - continue; - spin_lock(&rq->lock); list_for_each_entry(s_entity, &rq->entities, list) { while ((s_job = to_drm_sched_job(spsc_queue_pop(&s_entity->job_queue)))) { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.h index d599c0540b46..babc0af751c2 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.h @@ -50,7 +50,6 @@ struct amdgpu_job { struct amdgpu_sync sync; struct amdgpu_sync sched_sync; struct dma_fence hw_fence; - struct dma_fence *external_hw_fence; uint32_t preamble_status; uint32_t preemption_status; bool vm_needs_flush; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c index 6de63ea6687e..1369c25448dc 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c @@ -43,17 +43,6 @@ #include "amdgpu_display.h" #include "amdgpu_ras.h" -static void amdgpu_runtime_pm_quirk(struct amdgpu_device *adev) -{ - /* - * Add below quirk on several sienna_cichlid cards to disable - * runtime pm to fix EMI failures. - */ - if (((adev->pdev->device == 0x73A1) && (adev->pdev->revision == 0x00)) || - ((adev->pdev->device == 0x73BF) && (adev->pdev->revision == 0xCF))) - adev->runpm = false; -} - void amdgpu_unregister_gpu_instance(struct amdgpu_device *adev) { struct amdgpu_gpu_instance *gpu_instance; @@ -158,37 +147,36 @@ int amdgpu_driver_load_kms(struct amdgpu_device *adev, unsigned long flags) goto out; } + adev->pm.rpm_mode = AMDGPU_RUNPM_NONE; if (amdgpu_device_supports_px(dev) && - (amdgpu_runtime_pm != 0)) { /* enable runpm by default for atpx */ - adev->runpm = true; + (amdgpu_runtime_pm != 0)) { /* enable PX as runtime mode */ + adev->pm.rpm_mode = AMDGPU_RUNPM_PX; dev_info(adev->dev, "Using ATPX for runtime pm\n"); } else if (amdgpu_device_supports_boco(dev) && - (amdgpu_runtime_pm != 0)) { /* enable runpm by default for boco */ - adev->runpm = true; + (amdgpu_runtime_pm != 0)) { /* enable boco as runtime mode */ + adev->pm.rpm_mode = AMDGPU_RUNPM_BOCO; dev_info(adev->dev, "Using BOCO for runtime pm\n"); } else if (amdgpu_device_supports_baco(dev) && (amdgpu_runtime_pm != 0)) { switch (adev->asic_type) { case CHIP_VEGA20: case CHIP_ARCTURUS: - /* enable runpm if runpm=1 */ + /* enable BACO as runpm mode if runpm=1 */ if (amdgpu_runtime_pm > 0) - adev->runpm = true; + adev->pm.rpm_mode = AMDGPU_RUNPM_BACO; break; case CHIP_VEGA10: - /* turn runpm on if noretry=0 */ + /* enable BACO as runpm mode if noretry=0 */ if (!adev->gmc.noretry) - adev->runpm = true; + adev->pm.rpm_mode = AMDGPU_RUNPM_BACO; break; default: - /* enable runpm on CI+ */ - adev->runpm = true; + /* enable BACO as runpm mode on CI+ */ + adev->pm.rpm_mode = AMDGPU_RUNPM_BACO; break; } - amdgpu_runtime_pm_quirk(adev); - - if (adev->runpm) + if (adev->pm.rpm_mode == AMDGPU_RUNPM_BACO) dev_info(adev->dev, "Using BACO for runtime pm\n"); } @@ -473,6 +461,30 @@ static int amdgpu_hw_ip_info(struct amdgpu_device *adev, result->hw_ip_version_major = adev->ip_blocks[i].version->major; result->hw_ip_version_minor = adev->ip_blocks[i].version->minor; + + if (adev->asic_type >= CHIP_VEGA10) { + switch (type) { + case AMD_IP_BLOCK_TYPE_GFX: + result->ip_discovery_version = adev->ip_versions[GC_HWIP][0]; + break; + case AMD_IP_BLOCK_TYPE_SDMA: + result->ip_discovery_version = adev->ip_versions[SDMA0_HWIP][0]; + break; + case AMD_IP_BLOCK_TYPE_UVD: + case AMD_IP_BLOCK_TYPE_VCN: + case AMD_IP_BLOCK_TYPE_JPEG: + result->ip_discovery_version = adev->ip_versions[UVD_HWIP][0]; + break; + case AMD_IP_BLOCK_TYPE_VCE: + result->ip_discovery_version = adev->ip_versions[VCE_HWIP][0]; + break; + default: + result->ip_discovery_version = 0; + break; + } + } else { + result->ip_discovery_version = 0; + } result->capabilities_flags = 0; result->available_rings = (1 << num_rings) - 1; result->ib_start_alignment = ib_start_alignment; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c index 69a70a0aaed9..fe82b8b19a4e 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c @@ -114,8 +114,14 @@ static int amdgpu_mes_doorbell_init(struct amdgpu_device *adev) size_t doorbell_start_offset; size_t doorbell_aperture_size; size_t doorbell_process_limit; + size_t aggregated_doorbell_start; + int i; - doorbell_start_offset = (adev->doorbell_index.max_assignment+1) * sizeof(u32); + aggregated_doorbell_start = (adev->doorbell_index.max_assignment + 1) * sizeof(u32); + aggregated_doorbell_start = + roundup(aggregated_doorbell_start, PAGE_SIZE); + + doorbell_start_offset = aggregated_doorbell_start + PAGE_SIZE; doorbell_start_offset = roundup(doorbell_start_offset, amdgpu_mes_doorbell_process_slice(adev)); @@ -135,6 +141,11 @@ static int amdgpu_mes_doorbell_init(struct amdgpu_device *adev) adev->mes.doorbell_id_offset = doorbell_start_offset / sizeof(u32); adev->mes.max_doorbell_slices = doorbell_process_limit; + /* allocate Qword range for aggregated doorbell */ + for (i = 0; i < AMDGPU_MES_PRIORITY_NUM_LEVELS; i++) + adev->mes.aggregated_doorbells[i] = + aggregated_doorbell_start / sizeof(u32) + i * 2; + DRM_INFO("max_doorbell_slices=%zu\n", doorbell_process_limit); return 0; } @@ -150,6 +161,7 @@ int amdgpu_mes_init(struct amdgpu_device *adev) idr_init(&adev->mes.queue_id_idr); ida_init(&adev->mes.doorbell_ida); spin_lock_init(&adev->mes.queue_id_lock); + spin_lock_init(&adev->mes.ring_lock); mutex_init(&adev->mes.mutex_hidden); adev->mes.total_max_queue = AMDGPU_FENCE_MES_QUEUE_ID_MASK; @@ -173,9 +185,6 @@ int amdgpu_mes_init(struct amdgpu_device *adev) adev->mes.sdma_hqd_mask[i] = 0xfc; } - for (i = 0; i < AMDGPU_MES_PRIORITY_NUM_LEVELS; i++) - adev->mes.agreegated_doorbells[i] = 0xffffffff; - r = amdgpu_device_wb_get(adev, &adev->mes.sch_ctx_offs); if (r) { dev_err(adev->dev, @@ -189,15 +198,29 @@ int amdgpu_mes_init(struct amdgpu_device *adev) r = amdgpu_device_wb_get(adev, &adev->mes.query_status_fence_offs); if (r) { + amdgpu_device_wb_free(adev, adev->mes.sch_ctx_offs); dev_err(adev->dev, "(%d) query_status_fence_offs wb alloc failed\n", r); - return r; + goto error_ids; } adev->mes.query_status_fence_gpu_addr = adev->wb.gpu_addr + (adev->mes.query_status_fence_offs * 4); adev->mes.query_status_fence_ptr = (uint64_t *)&adev->wb.wb[adev->mes.query_status_fence_offs]; + r = amdgpu_device_wb_get(adev, &adev->mes.read_val_offs); + if (r) { + amdgpu_device_wb_free(adev, adev->mes.sch_ctx_offs); + amdgpu_device_wb_free(adev, adev->mes.query_status_fence_offs); + dev_err(adev->dev, + "(%d) read_val_offs alloc failed\n", r); + goto error_ids; + } + adev->mes.read_val_gpu_addr = + adev->wb.gpu_addr + (adev->mes.read_val_offs * 4); + adev->mes.read_val_ptr = + (uint32_t *)&adev->wb.wb[adev->mes.read_val_offs]; + r = amdgpu_mes_doorbell_init(adev); if (r) goto error; @@ -206,6 +229,8 @@ int amdgpu_mes_init(struct amdgpu_device *adev) error: amdgpu_device_wb_free(adev, adev->mes.sch_ctx_offs); + amdgpu_device_wb_free(adev, adev->mes.query_status_fence_offs); + amdgpu_device_wb_free(adev, adev->mes.read_val_offs); error_ids: idr_destroy(&adev->mes.pasid_idr); idr_destroy(&adev->mes.gang_id_idr); @@ -218,6 +243,8 @@ error_ids: void amdgpu_mes_fini(struct amdgpu_device *adev) { amdgpu_device_wb_free(adev, adev->mes.sch_ctx_offs); + amdgpu_device_wb_free(adev, adev->mes.query_status_fence_offs); + amdgpu_device_wb_free(adev, adev->mes.read_val_offs); idr_destroy(&adev->mes.pasid_idr); idr_destroy(&adev->mes.gang_id_idr); @@ -675,8 +702,10 @@ int amdgpu_mes_add_hw_queue(struct amdgpu_device *adev, int gang_id, queue_input.doorbell_offset = qprops->doorbell_off; queue_input.mqd_addr = queue->mqd_gpu_addr; queue_input.wptr_addr = qprops->wptr_gpu_addr; + queue_input.wptr_mc_addr = qprops->wptr_mc_addr; queue_input.queue_type = qprops->queue_type; queue_input.paging = qprops->paging; + queue_input.is_kfd_process = 0; r = adev->mes.funcs->add_hw_queue(&adev->mes, &queue_input); if (r) { @@ -696,6 +725,7 @@ int amdgpu_mes_add_hw_queue(struct amdgpu_device *adev, int gang_id, queue->queue_type = qprops->queue_type; queue->paging = qprops->paging; queue->gang = gang; + queue->ring->mqd_ptr = queue->mqd_cpu_ptr; list_add_tail(&queue->list, &gang->queue_list); amdgpu_mes_unlock(&adev->mes); @@ -774,8 +804,6 @@ int amdgpu_mes_unmap_legacy_queue(struct amdgpu_device *adev, struct mes_unmap_legacy_queue_input queue_input; int r; - amdgpu_mes_lock(&adev->mes); - queue_input.action = action; queue_input.queue_type = ring->funcs->type; queue_input.doorbell_offset = ring->doorbell_index; @@ -788,7 +816,106 @@ int amdgpu_mes_unmap_legacy_queue(struct amdgpu_device *adev, if (r) DRM_ERROR("failed to unmap legacy queue\n"); - amdgpu_mes_unlock(&adev->mes); + return r; +} + +uint32_t amdgpu_mes_rreg(struct amdgpu_device *adev, uint32_t reg) +{ + struct mes_misc_op_input op_input; + int r, val = 0; + + op_input.op = MES_MISC_OP_READ_REG; + op_input.read_reg.reg_offset = reg; + op_input.read_reg.buffer_addr = adev->mes.read_val_gpu_addr; + + if (!adev->mes.funcs->misc_op) { + DRM_ERROR("mes rreg is not supported!\n"); + goto error; + } + + r = adev->mes.funcs->misc_op(&adev->mes, &op_input); + if (r) + DRM_ERROR("failed to read reg (0x%x)\n", reg); + else + val = *(adev->mes.read_val_ptr); + +error: + return val; +} + +int amdgpu_mes_wreg(struct amdgpu_device *adev, + uint32_t reg, uint32_t val) +{ + struct mes_misc_op_input op_input; + int r; + + op_input.op = MES_MISC_OP_WRITE_REG; + op_input.write_reg.reg_offset = reg; + op_input.write_reg.reg_value = val; + + if (!adev->mes.funcs->misc_op) { + DRM_ERROR("mes wreg is not supported!\n"); + r = -EINVAL; + goto error; + } + + r = adev->mes.funcs->misc_op(&adev->mes, &op_input); + if (r) + DRM_ERROR("failed to write reg (0x%x)\n", reg); + +error: + return r; +} + +int amdgpu_mes_reg_write_reg_wait(struct amdgpu_device *adev, + uint32_t reg0, uint32_t reg1, + uint32_t ref, uint32_t mask) +{ + struct mes_misc_op_input op_input; + int r; + + op_input.op = MES_MISC_OP_WRM_REG_WR_WAIT; + op_input.wrm_reg.reg0 = reg0; + op_input.wrm_reg.reg1 = reg1; + op_input.wrm_reg.ref = ref; + op_input.wrm_reg.mask = mask; + + if (!adev->mes.funcs->misc_op) { + DRM_ERROR("mes reg_write_reg_wait is not supported!\n"); + r = -EINVAL; + goto error; + } + + r = adev->mes.funcs->misc_op(&adev->mes, &op_input); + if (r) + DRM_ERROR("failed to reg_write_reg_wait\n"); + +error: + return r; +} + +int amdgpu_mes_reg_wait(struct amdgpu_device *adev, uint32_t reg, + uint32_t val, uint32_t mask) +{ + struct mes_misc_op_input op_input; + int r; + + op_input.op = MES_MISC_OP_WRM_REG_WAIT; + op_input.wrm_reg.reg0 = reg; + op_input.wrm_reg.ref = val; + op_input.wrm_reg.mask = mask; + + if (!adev->mes.funcs->misc_op) { + DRM_ERROR("mes reg wait is not supported!\n"); + r = -EINVAL; + goto error; + } + + r = adev->mes.funcs->misc_op(&adev->mes, &op_input); + if (r) + DRM_ERROR("failed to reg_write_reg_wait\n"); + +error: return r; } @@ -801,6 +928,8 @@ amdgpu_mes_ring_to_queue_props(struct amdgpu_device *adev, props->hqd_base_gpu_addr = ring->gpu_addr; props->rptr_gpu_addr = ring->rptr_gpu_addr; props->wptr_gpu_addr = ring->wptr_gpu_addr; + props->wptr_mc_addr = + ring->mes_ctx->meta_data_mc_addr + ring->wptr_offs; props->queue_size = ring->ring_size; props->eop_gpu_addr = ring->eop_gpu_addr; props->hqd_pipe_priority = AMDGPU_GFX_PIPE_PRIO_NORMAL; @@ -953,6 +1082,12 @@ void amdgpu_mes_remove_ring(struct amdgpu_device *adev, kfree(ring); } +uint32_t amdgpu_mes_get_aggregated_doorbell_index(struct amdgpu_device *adev, + enum amdgpu_mes_priority_level prio) +{ + return adev->mes.aggregated_doorbells[prio]; +} + int amdgpu_mes_ctx_alloc_meta_data(struct amdgpu_device *adev, struct amdgpu_mes_ctx_data *ctx_data) { @@ -961,7 +1096,8 @@ int amdgpu_mes_ctx_alloc_meta_data(struct amdgpu_device *adev, r = amdgpu_bo_create_kernel(adev, sizeof(struct amdgpu_mes_ctx_meta_data), PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT, - &ctx_data->meta_data_obj, NULL, + &ctx_data->meta_data_obj, + &ctx_data->meta_data_mc_addr, &ctx_data->meta_data_ptr); if (!ctx_data->meta_data_obj) return -ENOMEM; @@ -975,7 +1111,9 @@ int amdgpu_mes_ctx_alloc_meta_data(struct amdgpu_device *adev, void amdgpu_mes_ctx_free_meta_data(struct amdgpu_mes_ctx_data *ctx_data) { if (ctx_data->meta_data_obj) - amdgpu_bo_free_kernel(&ctx_data->meta_data_obj, NULL, NULL); + amdgpu_bo_free_kernel(&ctx_data->meta_data_obj, + &ctx_data->meta_data_mc_addr, + &ctx_data->meta_data_ptr); } int amdgpu_mes_ctx_map_meta_data(struct amdgpu_device *adev, @@ -1051,6 +1189,63 @@ error: return r; } +int amdgpu_mes_ctx_unmap_meta_data(struct amdgpu_device *adev, + struct amdgpu_mes_ctx_data *ctx_data) +{ + struct amdgpu_bo_va *bo_va = ctx_data->meta_data_va; + struct amdgpu_bo *bo = ctx_data->meta_data_obj; + struct amdgpu_vm *vm = bo_va->base.vm; + struct amdgpu_bo_list_entry vm_pd; + struct list_head list, duplicates; + struct dma_fence *fence = NULL; + struct ttm_validate_buffer tv; + struct ww_acquire_ctx ticket; + long r = 0; + + INIT_LIST_HEAD(&list); + INIT_LIST_HEAD(&duplicates); + + tv.bo = &bo->tbo; + tv.num_shared = 2; + list_add(&tv.head, &list); + + amdgpu_vm_get_pd_bo(vm, &list, &vm_pd); + + r = ttm_eu_reserve_buffers(&ticket, &list, false, &duplicates); + if (r) { + dev_err(adev->dev, "leaking bo va because " + "we fail to reserve bo (%ld)\n", r); + return r; + } + + amdgpu_vm_bo_del(adev, bo_va); + if (!amdgpu_vm_ready(vm)) + goto out_unlock; + + r = dma_resv_get_singleton(bo->tbo.base.resv, DMA_RESV_USAGE_BOOKKEEP, &fence); + if (r) + goto out_unlock; + if (fence) { + amdgpu_bo_fence(bo, fence, true); + fence = NULL; + } + + r = amdgpu_vm_clear_freed(adev, vm, &fence); + if (r || !fence) + goto out_unlock; + + dma_fence_wait(fence, false); + amdgpu_bo_fence(bo, fence, true); + dma_fence_put(fence); + +out_unlock: + if (unlikely(r < 0)) + dev_err(adev->dev, "failed to clear page tables (%ld)\n", r); + ttm_eu_backoff_reservation(&ticket, &list); + + return r; +} + static int amdgpu_mes_test_create_gang_and_queues(struct amdgpu_device *adev, int pasid, int *gang_id, int queue_type, int num_queue, @@ -1157,7 +1352,7 @@ int amdgpu_mes_self_test(struct amdgpu_device *adev) r = amdgpu_mes_ctx_alloc_meta_data(adev, &ctx_data); if (r) { DRM_ERROR("failed to alloc ctx meta data\n"); - goto error_pasid; + goto error_fini; } ctx_data.meta_data_gpu_addr = AMDGPU_VA_RESERVED_SIZE; @@ -1212,9 +1407,9 @@ error_queues: amdgpu_mes_destroy_process(adev, pasid); error_vm: - BUG_ON(amdgpu_bo_reserve(ctx_data.meta_data_obj, true)); - amdgpu_vm_bo_del(adev, ctx_data.meta_data_va); - amdgpu_bo_unreserve(ctx_data.meta_data_obj); + amdgpu_mes_ctx_unmap_meta_data(adev, &ctx_data); + +error_fini: amdgpu_vm_fini(adev, vm); error_pasid: diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h index 25590b301f25..7b46f6bf4187 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h @@ -33,6 +33,13 @@ #define AMDGPU_MES_MAX_GFX_PIPES 2 #define AMDGPU_MES_MAX_SDMA_PIPES 2 +#define AMDGPU_MES_API_VERSION_SHIFT 12 +#define AMDGPU_MES_FEAT_VERSION_SHIFT 24 + +#define AMDGPU_MES_VERSION_MASK 0x00000fff +#define AMDGPU_MES_API_VERSION_MASK 0x00fff000 +#define AMDGPU_MES_FEAT_VERSION_MASK 0xff000000 + enum amdgpu_mes_priority_level { AMDGPU_MES_PRIORITY_LEVEL_LOW = 0, AMDGPU_MES_PRIORITY_LEVEL_NORMAL = 1, @@ -65,6 +72,9 @@ struct amdgpu_mes { spinlock_t queue_id_lock; + uint32_t sched_version; + uint32_t kiq_version; + uint32_t total_max_queue; uint32_t doorbell_id_offset; uint32_t max_doorbell_slices; @@ -73,6 +83,7 @@ struct amdgpu_mes { uint64_t default_gang_quantum; struct amdgpu_ring ring; + spinlock_t ring_lock; const struct firmware *fw[AMDGPU_MAX_MES_PIPES]; @@ -102,13 +113,17 @@ struct amdgpu_mes { uint32_t compute_hqd_mask[AMDGPU_MES_MAX_COMPUTE_PIPES]; uint32_t gfx_hqd_mask[AMDGPU_MES_MAX_GFX_PIPES]; uint32_t sdma_hqd_mask[AMDGPU_MES_MAX_SDMA_PIPES]; - uint32_t agreegated_doorbells[AMDGPU_MES_PRIORITY_NUM_LEVELS]; + uint32_t aggregated_doorbells[AMDGPU_MES_PRIORITY_NUM_LEVELS]; uint32_t sch_ctx_offs; uint64_t sch_ctx_gpu_addr; uint64_t *sch_ctx_ptr; uint32_t query_status_fence_offs; uint64_t query_status_fence_gpu_addr; uint64_t *query_status_fence_ptr; + uint32_t read_val_offs; + uint64_t read_val_gpu_addr; + uint32_t *read_val_ptr; + uint32_t saved_flags; /* initialize kiq pipe */ @@ -166,6 +181,7 @@ struct amdgpu_mes_queue_properties { uint64_t hqd_base_gpu_addr; uint64_t rptr_gpu_addr; uint64_t wptr_gpu_addr; + uint64_t wptr_mc_addr; uint32_t queue_size; uint64_t eop_gpu_addr; uint32_t hqd_pipe_priority; @@ -198,12 +214,14 @@ struct mes_add_queue_input { uint32_t doorbell_offset; uint64_t mqd_addr; uint64_t wptr_addr; + uint64_t wptr_mc_addr; uint32_t queue_type; uint32_t paging; uint32_t gws_base; uint32_t gws_size; uint64_t tba_addr; uint64_t tma_addr; + uint32_t is_kfd_process; }; struct mes_remove_queue_input { @@ -233,6 +251,36 @@ struct mes_resume_gang_input { uint64_t gang_context_addr; }; +enum mes_misc_opcode { + MES_MISC_OP_WRITE_REG, + MES_MISC_OP_READ_REG, + MES_MISC_OP_WRM_REG_WAIT, + MES_MISC_OP_WRM_REG_WR_WAIT, +}; + +struct mes_misc_op_input { + enum mes_misc_opcode op; + + union { + struct { + uint32_t reg_offset; + uint64_t buffer_addr; + } read_reg; + + struct { + uint32_t reg_offset; + uint32_t reg_value; + } write_reg; + + struct { + uint32_t ref; + uint32_t mask; + uint32_t reg0; + uint32_t reg1; + } wrm_reg; + }; +}; + struct amdgpu_mes_funcs { int (*add_hw_queue)(struct amdgpu_mes *mes, struct mes_add_queue_input *input); @@ -248,6 +296,9 @@ struct amdgpu_mes_funcs { int (*resume_gang)(struct amdgpu_mes *mes, struct mes_resume_gang_input *input); + + int (*misc_op)(struct amdgpu_mes *mes, + struct mes_misc_op_input *input); }; #define amdgpu_mes_kiq_hw_init(adev) (adev)->mes.kiq_hw_init((adev)) @@ -280,6 +331,15 @@ int amdgpu_mes_unmap_legacy_queue(struct amdgpu_device *adev, enum amdgpu_unmap_queues_action action, u64 gpu_addr, u64 seq); +uint32_t amdgpu_mes_rreg(struct amdgpu_device *adev, uint32_t reg); +int amdgpu_mes_wreg(struct amdgpu_device *adev, + uint32_t reg, uint32_t val); +int amdgpu_mes_reg_wait(struct amdgpu_device *adev, uint32_t reg, + uint32_t val, uint32_t mask); +int amdgpu_mes_reg_write_reg_wait(struct amdgpu_device *adev, + uint32_t reg0, uint32_t reg1, + uint32_t ref, uint32_t mask); + int amdgpu_mes_add_ring(struct amdgpu_device *adev, int gang_id, int queue_type, int idx, struct amdgpu_mes_ctx_data *ctx_data, @@ -287,12 +347,17 @@ int amdgpu_mes_add_ring(struct amdgpu_device *adev, int gang_id, void amdgpu_mes_remove_ring(struct amdgpu_device *adev, struct amdgpu_ring *ring); +uint32_t amdgpu_mes_get_aggregated_doorbell_index(struct amdgpu_device *adev, + enum amdgpu_mes_priority_level prio); + int amdgpu_mes_ctx_alloc_meta_data(struct amdgpu_device *adev, struct amdgpu_mes_ctx_data *ctx_data); void amdgpu_mes_ctx_free_meta_data(struct amdgpu_mes_ctx_data *ctx_data); int amdgpu_mes_ctx_map_meta_data(struct amdgpu_device *adev, struct amdgpu_vm *vm, struct amdgpu_mes_ctx_data *ctx_data); +int amdgpu_mes_ctx_unmap_meta_data(struct amdgpu_device *adev, + struct amdgpu_mes_ctx_data *ctx_data); int amdgpu_mes_self_test(struct amdgpu_device *adev); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes_ctx.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes_ctx.h index c000f656aae5..912a5be2ece6 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes_ctx.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes_ctx.h @@ -107,6 +107,7 @@ struct amdgpu_mes_ctx_meta_data { struct amdgpu_mes_ctx_data { struct amdgpu_bo *meta_data_obj; uint64_t meta_data_gpu_addr; + uint64_t meta_data_mc_addr; struct amdgpu_bo_va *meta_data_va; void *meta_data_ptr; uint32_t gang_ids[AMDGPU_HW_IP_DMA+1]; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h index f80b4838cea1..d788a00043a5 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h @@ -37,6 +37,7 @@ #include <drm/drm_fixed.h> #include <drm/drm_crtc_helper.h> #include <drm/drm_fb_helper.h> +#include <drm/drm_framebuffer.h> #include <drm/drm_plane_helper.h> #include <drm/drm_probe_helper.h> #include <linux/i2c.h> @@ -349,15 +350,11 @@ struct amdgpu_mode_info { #define AMDGPU_MAX_BL_LEVEL 0xFF -#if defined(CONFIG_BACKLIGHT_CLASS_DEVICE) || defined(CONFIG_BACKLIGHT_CLASS_DEVICE_MODULE) - struct amdgpu_backlight_privdata { struct amdgpu_encoder *encoder; uint8_t negative; }; -#endif - struct amdgpu_atom_ss { uint16_t percentage; uint16_t percentage_divider; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c index 2c82b1d5a0d7..4570ad449390 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c @@ -882,6 +882,10 @@ int amdgpu_bo_pin_restricted(struct amdgpu_bo *bo, u32 domain, if (WARN_ON_ONCE(min_offset > max_offset)) return -EINVAL; + /* Check domain to be pinned to against preferred domains */ + if (bo->preferred_domains & domain) + domain = bo->preferred_domains & domain; + /* A shared bo cannot be migrated to VRAM */ if (bo->tbo.base.import_attach) { if (domain & AMDGPU_GEM_DOMAIN_GTT) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c index e9411c28d88b..9f7a5e393f85 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c @@ -37,6 +37,7 @@ #include "psp_v11_0_8.h" #include "psp_v12_0.h" #include "psp_v13_0.h" +#include "psp_v13_0_4.h" #include "amdgpu_ras.h" #include "amdgpu_securedisplay.h" @@ -151,6 +152,10 @@ static int psp_early_init(void *handle) psp_v13_0_set_psp_funcs(psp); psp->autoload_supported = true; break; + case IP_VERSION(13, 0, 4): + psp_v13_0_4_set_psp_funcs(psp); + psp->autoload_supported = true; + break; default: return -EINVAL; } @@ -1292,6 +1297,8 @@ static void psp_xgmi_reflect_topology_info(struct psp_context *psp, break; } + + amdgpu_put_xgmi_hive(hive); } int psp_xgmi_get_topology_info(struct psp_context *psp, @@ -2168,6 +2175,21 @@ static int psp_get_fw_type(struct amdgpu_firmware_info *ucode, case AMDGPU_UCODE_ID_RLC_DRAM: *type = GFX_FW_TYPE_RLC_DRAM_BOOT; break; + case AMDGPU_UCODE_ID_GLOBAL_TAP_DELAYS: + *type = GFX_FW_TYPE_GLOBAL_TAP_DELAYS; + break; + case AMDGPU_UCODE_ID_SE0_TAP_DELAYS: + *type = GFX_FW_TYPE_SE0_TAP_DELAYS; + break; + case AMDGPU_UCODE_ID_SE1_TAP_DELAYS: + *type = GFX_FW_TYPE_SE1_TAP_DELAYS; + break; + case AMDGPU_UCODE_ID_SE2_TAP_DELAYS: + *type = GFX_FW_TYPE_SE2_TAP_DELAYS; + break; + case AMDGPU_UCODE_ID_SE3_TAP_DELAYS: + *type = GFX_FW_TYPE_SE3_TAP_DELAYS; + break; case AMDGPU_UCODE_ID_SMC: *type = GFX_FW_TYPE_SMU; break; @@ -2348,6 +2370,13 @@ static int psp_load_smu_fw(struct psp_context *psp) &adev->firmware.ucode[AMDGPU_UCODE_ID_SMC]; struct amdgpu_ras *ras = psp->ras_context.ras; + /* + * Skip SMU FW reloading in case of using BACO for runpm only, + * as SMU is always alive. + */ + if (adev->in_runpm && (adev->pm.rpm_mode == AMDGPU_RUNPM_BACO)) + return 0; + if (!ucode->fw || amdgpu_sriov_vf(psp->adev)) return 0; @@ -2372,7 +2401,7 @@ static int psp_load_smu_fw(struct psp_context *psp) static bool fw_load_skip_check(struct psp_context *psp, struct amdgpu_firmware_info *ucode) { - if (!ucode->fw) + if (!ucode->fw || !ucode->ucode_size) return true; if (ucode->ucode_id == AMDGPU_UCODE_ID_SMC && @@ -2612,6 +2641,9 @@ static int psp_hw_fini(void *handle) psp_rap_terminate(psp); psp_dtm_terminate(psp); psp_hdcp_terminate(psp); + + if (adev->gmc.xgmi.num_physical_nodes > 1) + psp_xgmi_terminate(psp); } psp_asd_terminate(psp); @@ -3670,3 +3702,11 @@ const struct amdgpu_ip_block_version psp_v13_0_ip_block = { .rev = 0, .funcs = &psp_ip_funcs, }; + +const struct amdgpu_ip_block_version psp_v13_0_4_ip_block = { + .type = AMD_IP_BLOCK_TYPE_PSP, + .major = 13, + .minor = 0, + .rev = 4, + .funcs = &psp_ip_funcs, +}; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h index e431f4994931..c32b74bd970f 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h @@ -69,8 +69,8 @@ enum psp_bootloader_cmd { PSP_BL__LOAD_SOSDRV = 0x20000, PSP_BL__LOAD_KEY_DATABASE = 0x80000, PSP_BL__LOAD_SOCDRV = 0xB0000, - PSP_BL__LOAD_INTFDRV = 0xC0000, - PSP_BL__LOAD_DBGDRV = 0xD0000, + PSP_BL__LOAD_DBGDRV = 0xC0000, + PSP_BL__LOAD_INTFDRV = 0xD0000, PSP_BL__DRAM_LONG_TRAIN = 0x100000, PSP_BL__DRAM_SHORT_TRAIN = 0x200000, PSP_BL__LOAD_TOS_SPL_TABLE = 0x10000000, @@ -439,6 +439,7 @@ extern const struct amdgpu_ip_block_version psp_v11_0_ip_block; extern const struct amdgpu_ip_block_version psp_v11_0_8_ip_block; extern const struct amdgpu_ip_block_version psp_v12_0_ip_block; extern const struct amdgpu_ip_block_version psp_v13_0_ip_block; +extern const struct amdgpu_ip_block_version psp_v13_0_4_ip_block; extern int psp_wait_for(struct psp_context *psp, uint32_t reg_index, uint32_t field_val, uint32_t mask, bool check_changed); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c index dac202ae864d..ff5361f5c2d4 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c @@ -35,6 +35,8 @@ #include "amdgpu_xgmi.h" #include "ivsrcid/nbio/irqsrcs_nbif_7_4.h" #include "atom.h" +#include "amdgpu_reset.h" + #ifdef CONFIG_X86_MCE_AMD #include <asm/mce.h> @@ -715,27 +717,30 @@ int amdgpu_ras_feature_enable(struct amdgpu_device *adev, if (!con) return -EINVAL; - info = kzalloc(sizeof(union ta_ras_cmd_input), GFP_KERNEL); - if (!info) - return -ENOMEM; + if (head->block == AMDGPU_RAS_BLOCK__GFX) { + info = kzalloc(sizeof(union ta_ras_cmd_input), GFP_KERNEL); + if (!info) + return -ENOMEM; - if (!enable) { - info->disable_features = (struct ta_ras_disable_features_input) { - .block_id = amdgpu_ras_block_to_ta(head->block), - .error_type = amdgpu_ras_error_to_ta(head->type), - }; - } else { - info->enable_features = (struct ta_ras_enable_features_input) { - .block_id = amdgpu_ras_block_to_ta(head->block), - .error_type = amdgpu_ras_error_to_ta(head->type), - }; + if (!enable) { + info->disable_features = (struct ta_ras_disable_features_input) { + .block_id = amdgpu_ras_block_to_ta(head->block), + .error_type = amdgpu_ras_error_to_ta(head->type), + }; + } else { + info->enable_features = (struct ta_ras_enable_features_input) { + .block_id = amdgpu_ras_block_to_ta(head->block), + .error_type = amdgpu_ras_error_to_ta(head->type), + }; + } } /* Do not enable if it is not allowed. */ WARN_ON(enable && !amdgpu_ras_is_feature_allowed(adev, head)); /* Only enable ras feature operation handle on host side */ - if (!amdgpu_sriov_vf(adev) && + if (head->block == AMDGPU_RAS_BLOCK__GFX && + !amdgpu_sriov_vf(adev) && !amdgpu_ras_intr_triggered()) { ret = psp_ras_enable_features(&adev->psp, info, enable); if (ret) { @@ -751,7 +756,8 @@ int amdgpu_ras_feature_enable(struct amdgpu_device *adev, __amdgpu_ras_feature_enable(adev, head, enable); ret = 0; out: - kfree(info); + if (head->block == AMDGPU_RAS_BLOCK__GFX) + kfree(info); return ret; } @@ -1936,8 +1942,16 @@ static void amdgpu_ras_do_recovery(struct work_struct *work) amdgpu_put_xgmi_hive(hive); } - if (amdgpu_device_should_recover_gpu(ras->adev)) - amdgpu_device_gpu_recover(ras->adev, NULL); + if (amdgpu_device_should_recover_gpu(ras->adev)) { + struct amdgpu_reset_context reset_context; + memset(&reset_context, 0, sizeof(reset_context)); + + reset_context.method = AMD_RESET_METHOD_NONE; + reset_context.reset_req_dev = adev; + clear_bit(AMDGPU_NEED_FULL_RESET, &reset_context.flags); + + amdgpu_device_gpu_recover(ras->adev, NULL, &reset_context); + } atomic_set(&ras->in_recovery, 0); } @@ -2148,7 +2162,7 @@ int amdgpu_ras_recovery_init(struct amdgpu_device *adev) bool exc_err_limit = false; int ret; - if (!con) + if (!con || amdgpu_sriov_vf(adev)) return 0; /* Allow access to RAS EEPROM via debugfs, when the ASIC @@ -2946,7 +2960,7 @@ int amdgpu_ras_reset_gpu(struct amdgpu_device *adev) struct amdgpu_ras *ras = amdgpu_ras_get_context(adev); if (atomic_cmpxchg(&ras->in_recovery, 0, 1) == 0) - schedule_work(&ras->recovery_work); + amdgpu_reset_domain_schedule(ras->adev->reset_domain, &ras->recovery_work); return 0; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h index b9a6fac2b8b2..bf5a95104ec1 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h @@ -328,10 +328,16 @@ struct ecc_info_per_ch { uint16_t ce_count_hi_chip; uint64_t mca_umc_status; uint64_t mca_umc_addr; + uint64_t mca_ceumc_addr; }; struct umc_ecc_info { struct ecc_info_per_ch ecc[MAX_UMC_CHANNEL_NUM]; + + /* Determine smu ecctable whether support + * record correctable error address + */ + int record_ce_addr_supported; }; struct amdgpu_ras { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.c index c80af0889773..32c86a0b145c 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.c @@ -132,6 +132,7 @@ struct amdgpu_reset_domain *amdgpu_reset_create_reset_domain(enum amdgpu_reset_d } atomic_set(&reset_domain->in_gpu_reset, 0); + atomic_set(&reset_domain->reset_res, 0); init_rwsem(&reset_domain->sem); return reset_domain; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.h index 1949dbe28a86..ffda1560c648 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.h @@ -37,6 +37,7 @@ struct amdgpu_reset_context { struct amdgpu_device *reset_req_dev; struct amdgpu_job *job; struct amdgpu_hive_info *hive; + struct list_head *reset_device_list; unsigned long flags; }; @@ -82,6 +83,7 @@ struct amdgpu_reset_domain { enum amdgpu_reset_domain_type type; struct rw_semaphore sem; atomic_t in_gpu_reset; + atomic_t reset_res; }; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c index 13db99d653bd..d3558c34d406 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c @@ -543,12 +543,12 @@ static void amdgpu_ring_to_mqd_prop(struct amdgpu_ring *ring, */ prop->hqd_active = ring->funcs->type == AMDGPU_RING_TYPE_KIQ; - if (ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE) { - if (amdgpu_gfx_is_high_priority_compute_queue(adev, ring)) { - prop->hqd_pipe_priority = AMDGPU_GFX_PIPE_PRIO_HIGH; - prop->hqd_queue_priority = - AMDGPU_GFX_QUEUE_PRIORITY_MAXIMUM; - } + if ((ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE && + amdgpu_gfx_is_high_priority_compute_queue(adev, ring)) || + (ring->funcs->type == AMDGPU_RING_TYPE_GFX && + amdgpu_gfx_is_high_priority_graphics_queue(adev, ring))) { + prop->hqd_pipe_priority = AMDGPU_GFX_PIPE_PRIO_HIGH; + prop->hqd_queue_priority = AMDGPU_GFX_QUEUE_PRIORITY_MAXIMUM; } } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h index 7d89a52091c0..82c178a9033a 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h @@ -143,6 +143,7 @@ signed long amdgpu_fence_wait_polling(struct amdgpu_ring *ring, uint32_t wait_seq, signed long timeout); unsigned amdgpu_fence_count_emitted(struct amdgpu_ring *ring); +void amdgpu_fence_driver_isr_toggle(struct amdgpu_device *adev, bool stop); /* * Rings. diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_rlc.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_rlc.h index f6fd9e1a7dac..03ac36b2c2cf 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_rlc.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_rlc.h @@ -222,6 +222,11 @@ struct amdgpu_rlc { u32 rlc_dram_ucode_size_bytes; u32 rlcp_ucode_size_bytes; u32 rlcv_ucode_size_bytes; + u32 global_tap_delays_ucode_size_bytes; + u32 se0_tap_delays_ucode_size_bytes; + u32 se1_tap_delays_ucode_size_bytes; + u32 se2_tap_delays_ucode_size_bytes; + u32 se3_tap_delays_ucode_size_bytes; u32 *register_list_format; u32 *register_restore; @@ -232,6 +237,11 @@ struct amdgpu_rlc { u8 *rlc_dram_ucode; u8 *rlcp_ucode; u8 *rlcv_ucode; + u8 *global_tap_delays_ucode; + u8 *se0_tap_delays_ucode; + u8 *se1_tap_delays_ucode; + u8 *se2_tap_delays_ucode; + u8 *se3_tap_delays_ucode; bool is_rlc_v2_1; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c index 3b4c19412625..134575a3893c 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c @@ -637,6 +637,8 @@ struct amdgpu_ttm_tt { #endif }; +#define ttm_to_amdgpu_ttm_tt(ptr) container_of(ptr, struct amdgpu_ttm_tt, ttm) + #ifdef CONFIG_DRM_AMDGPU_USERPTR /* * amdgpu_ttm_tt_get_user_pages - get device accessible pages that back user @@ -648,7 +650,7 @@ struct amdgpu_ttm_tt { int amdgpu_ttm_tt_get_user_pages(struct amdgpu_bo *bo, struct page **pages) { struct ttm_tt *ttm = bo->tbo.ttm; - struct amdgpu_ttm_tt *gtt = (void *)ttm; + struct amdgpu_ttm_tt *gtt = ttm_to_amdgpu_ttm_tt(ttm); unsigned long start = gtt->userptr; struct vm_area_struct *vma; struct mm_struct *mm; @@ -702,7 +704,7 @@ out_unlock: */ bool amdgpu_ttm_tt_get_user_pages_done(struct ttm_tt *ttm) { - struct amdgpu_ttm_tt *gtt = (void *)ttm; + struct amdgpu_ttm_tt *gtt = ttm_to_amdgpu_ttm_tt(ttm); bool r = false; if (!gtt || !gtt->userptr) @@ -751,7 +753,7 @@ static int amdgpu_ttm_tt_pin_userptr(struct ttm_device *bdev, struct ttm_tt *ttm) { struct amdgpu_device *adev = amdgpu_ttm_adev(bdev); - struct amdgpu_ttm_tt *gtt = (void *)ttm; + struct amdgpu_ttm_tt *gtt = ttm_to_amdgpu_ttm_tt(ttm); int write = !(gtt->userflags & AMDGPU_GEM_USERPTR_READONLY); enum dma_data_direction direction = write ? DMA_BIDIRECTIONAL : DMA_TO_DEVICE; @@ -788,7 +790,7 @@ static void amdgpu_ttm_tt_unpin_userptr(struct ttm_device *bdev, struct ttm_tt *ttm) { struct amdgpu_device *adev = amdgpu_ttm_adev(bdev); - struct amdgpu_ttm_tt *gtt = (void *)ttm; + struct amdgpu_ttm_tt *gtt = ttm_to_amdgpu_ttm_tt(ttm); int write = !(gtt->userflags & AMDGPU_GEM_USERPTR_READONLY); enum dma_data_direction direction = write ? DMA_BIDIRECTIONAL : DMA_TO_DEVICE; @@ -822,7 +824,7 @@ static void amdgpu_ttm_gart_bind(struct amdgpu_device *adev, { struct amdgpu_bo *abo = ttm_to_amdgpu_bo(tbo); struct ttm_tt *ttm = tbo->ttm; - struct amdgpu_ttm_tt *gtt = (void *)ttm; + struct amdgpu_ttm_tt *gtt = ttm_to_amdgpu_ttm_tt(ttm); if (amdgpu_bo_encrypted(abo)) flags |= AMDGPU_PTE_TMZ; @@ -860,7 +862,7 @@ static int amdgpu_ttm_backend_bind(struct ttm_device *bdev, struct ttm_resource *bo_mem) { struct amdgpu_device *adev = amdgpu_ttm_adev(bdev); - struct amdgpu_ttm_tt *gtt = (void*)ttm; + struct amdgpu_ttm_tt *gtt = ttm_to_amdgpu_ttm_tt(ttm); uint64_t flags; int r; @@ -927,7 +929,7 @@ int amdgpu_ttm_alloc_gart(struct ttm_buffer_object *bo) { struct amdgpu_device *adev = amdgpu_ttm_adev(bo->bdev); struct ttm_operation_ctx ctx = { false, false }; - struct amdgpu_ttm_tt *gtt = (void *)bo->ttm; + struct amdgpu_ttm_tt *gtt = ttm_to_amdgpu_ttm_tt(bo->ttm); struct ttm_placement placement; struct ttm_place placements; struct ttm_resource *tmp; @@ -998,7 +1000,7 @@ static void amdgpu_ttm_backend_unbind(struct ttm_device *bdev, struct ttm_tt *ttm) { struct amdgpu_device *adev = amdgpu_ttm_adev(bdev); - struct amdgpu_ttm_tt *gtt = (void *)ttm; + struct amdgpu_ttm_tt *gtt = ttm_to_amdgpu_ttm_tt(ttm); /* if the pages have userptr pinning then clear that first */ if (gtt->userptr) { @@ -1025,7 +1027,7 @@ static void amdgpu_ttm_backend_unbind(struct ttm_device *bdev, static void amdgpu_ttm_backend_destroy(struct ttm_device *bdev, struct ttm_tt *ttm) { - struct amdgpu_ttm_tt *gtt = (void *)ttm; + struct amdgpu_ttm_tt *gtt = ttm_to_amdgpu_ttm_tt(ttm); if (gtt->usertask) put_task_struct(gtt->usertask); @@ -1079,7 +1081,7 @@ static int amdgpu_ttm_tt_populate(struct ttm_device *bdev, struct ttm_operation_ctx *ctx) { struct amdgpu_device *adev = amdgpu_ttm_adev(bdev); - struct amdgpu_ttm_tt *gtt = (void *)ttm; + struct amdgpu_ttm_tt *gtt = ttm_to_amdgpu_ttm_tt(ttm); pgoff_t i; int ret; @@ -1113,7 +1115,7 @@ static int amdgpu_ttm_tt_populate(struct ttm_device *bdev, static void amdgpu_ttm_tt_unpopulate(struct ttm_device *bdev, struct ttm_tt *ttm) { - struct amdgpu_ttm_tt *gtt = (void *)ttm; + struct amdgpu_ttm_tt *gtt = ttm_to_amdgpu_ttm_tt(ttm); struct amdgpu_device *adev; pgoff_t i; @@ -1182,7 +1184,7 @@ int amdgpu_ttm_tt_set_userptr(struct ttm_buffer_object *bo, /* Set TTM_TT_FLAG_EXTERNAL before populate but after create. */ bo->ttm->page_flags |= TTM_TT_FLAG_EXTERNAL; - gtt = (void *)bo->ttm; + gtt = ttm_to_amdgpu_ttm_tt(bo->ttm); gtt->userptr = addr; gtt->userflags = flags; @@ -1199,7 +1201,7 @@ int amdgpu_ttm_tt_set_userptr(struct ttm_buffer_object *bo, */ struct mm_struct *amdgpu_ttm_tt_get_usermm(struct ttm_tt *ttm) { - struct amdgpu_ttm_tt *gtt = (void *)ttm; + struct amdgpu_ttm_tt *gtt = ttm_to_amdgpu_ttm_tt(ttm); if (gtt == NULL) return NULL; @@ -1218,7 +1220,7 @@ struct mm_struct *amdgpu_ttm_tt_get_usermm(struct ttm_tt *ttm) bool amdgpu_ttm_tt_affect_userptr(struct ttm_tt *ttm, unsigned long start, unsigned long end, unsigned long *userptr) { - struct amdgpu_ttm_tt *gtt = (void *)ttm; + struct amdgpu_ttm_tt *gtt = ttm_to_amdgpu_ttm_tt(ttm); unsigned long size; if (gtt == NULL || !gtt->userptr) @@ -1241,7 +1243,7 @@ bool amdgpu_ttm_tt_affect_userptr(struct ttm_tt *ttm, unsigned long start, */ bool amdgpu_ttm_tt_is_userptr(struct ttm_tt *ttm) { - struct amdgpu_ttm_tt *gtt = (void *)ttm; + struct amdgpu_ttm_tt *gtt = ttm_to_amdgpu_ttm_tt(ttm); if (gtt == NULL || !gtt->userptr) return false; @@ -1254,7 +1256,7 @@ bool amdgpu_ttm_tt_is_userptr(struct ttm_tt *ttm) */ bool amdgpu_ttm_tt_is_readonly(struct ttm_tt *ttm) { - struct amdgpu_ttm_tt *gtt = (void *)ttm; + struct amdgpu_ttm_tt *gtt = ttm_to_amdgpu_ttm_tt(ttm); if (gtt == NULL) return false; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c index ffa4c0d207db..939c8614f0e3 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c @@ -486,26 +486,6 @@ amdgpu_ucode_get_load_type(struct amdgpu_device *adev, int load_type) case CHIP_POLARIS12: case CHIP_VEGAM: return AMDGPU_FW_LOAD_SMU; - case CHIP_VEGA10: - case CHIP_RAVEN: - case CHIP_VEGA12: - case CHIP_VEGA20: - case CHIP_ARCTURUS: - case CHIP_RENOIR: - case CHIP_NAVI10: - case CHIP_NAVI14: - case CHIP_NAVI12: - case CHIP_SIENNA_CICHLID: - case CHIP_NAVY_FLOUNDER: - case CHIP_VANGOGH: - case CHIP_DIMGREY_CAVEFISH: - case CHIP_ALDEBARAN: - case CHIP_BEIGE_GOBY: - case CHIP_YELLOW_CARP: - if (!load_type) - return AMDGPU_FW_LOAD_DIRECT; - else - return AMDGPU_FW_LOAD_PSP; case CHIP_CYAN_SKILLFISH: if (!(load_type && adev->apu_flags & AMD_APU_IS_CYAN_SKILLFISH2)) @@ -581,6 +561,16 @@ const char *amdgpu_ucode_name(enum AMDGPU_UCODE_ID ucode_id) return "RLC_P"; case AMDGPU_UCODE_ID_RLC_V: return "RLC_V"; + case AMDGPU_UCODE_ID_GLOBAL_TAP_DELAYS: + return "GLOBAL_TAP_DELAYS"; + case AMDGPU_UCODE_ID_SE0_TAP_DELAYS: + return "SE0_TAP_DELAYS"; + case AMDGPU_UCODE_ID_SE1_TAP_DELAYS: + return "SE1_TAP_DELAYS"; + case AMDGPU_UCODE_ID_SE2_TAP_DELAYS: + return "SE2_TAP_DELAYS"; + case AMDGPU_UCODE_ID_SE3_TAP_DELAYS: + return "SE3_TAP_DELAYS"; case AMDGPU_UCODE_ID_IMU_I: return "IMU_I"; case AMDGPU_UCODE_ID_IMU_D: @@ -765,6 +755,26 @@ static int amdgpu_ucode_init_single_fw(struct amdgpu_device *adev, ucode->ucode_size = adev->gfx.rlc.rlcv_ucode_size_bytes; ucode_addr = adev->gfx.rlc.rlcv_ucode; break; + case AMDGPU_UCODE_ID_GLOBAL_TAP_DELAYS: + ucode->ucode_size = adev->gfx.rlc.global_tap_delays_ucode_size_bytes; + ucode_addr = adev->gfx.rlc.global_tap_delays_ucode; + break; + case AMDGPU_UCODE_ID_SE0_TAP_DELAYS: + ucode->ucode_size = adev->gfx.rlc.se0_tap_delays_ucode_size_bytes; + ucode_addr = adev->gfx.rlc.se0_tap_delays_ucode; + break; + case AMDGPU_UCODE_ID_SE1_TAP_DELAYS: + ucode->ucode_size = adev->gfx.rlc.se1_tap_delays_ucode_size_bytes; + ucode_addr = adev->gfx.rlc.se1_tap_delays_ucode; + break; + case AMDGPU_UCODE_ID_SE2_TAP_DELAYS: + ucode->ucode_size = adev->gfx.rlc.se2_tap_delays_ucode_size_bytes; + ucode_addr = adev->gfx.rlc.se2_tap_delays_ucode; + break; + case AMDGPU_UCODE_ID_SE3_TAP_DELAYS: + ucode->ucode_size = adev->gfx.rlc.se3_tap_delays_ucode_size_bytes; + ucode_addr = adev->gfx.rlc.se3_tap_delays_ucode; + break; case AMDGPU_UCODE_ID_CP_MES: ucode->ucode_size = le32_to_cpu(mes_hdr->mes_ucode_size_bytes); ucode_addr = (u8 *)ucode->fw->data + diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h index f510b6aa82ab..ebed3f5226db 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h @@ -266,6 +266,21 @@ struct rlc_firmware_header_v2_3 { uint32_t rlcv_ucode_offset_bytes; }; +/* version_major=2, version_minor=4 */ +struct rlc_firmware_header_v2_4 { + struct rlc_firmware_header_v2_3 v2_3; + uint32_t global_tap_delays_ucode_size_bytes; + uint32_t global_tap_delays_ucode_offset_bytes; + uint32_t se0_tap_delays_ucode_size_bytes; + uint32_t se0_tap_delays_ucode_offset_bytes; + uint32_t se1_tap_delays_ucode_size_bytes; + uint32_t se1_tap_delays_ucode_offset_bytes; + uint32_t se2_tap_delays_ucode_size_bytes; + uint32_t se2_tap_delays_ucode_offset_bytes; + uint32_t se3_tap_delays_ucode_size_bytes; + uint32_t se3_tap_delays_ucode_offset_bytes; +}; + /* version_major=1, version_minor=0 */ struct sdma_firmware_header_v1_0 { struct common_firmware_header header; @@ -426,6 +441,11 @@ enum AMDGPU_UCODE_ID { AMDGPU_UCODE_ID_CP_MES1_DATA, AMDGPU_UCODE_ID_IMU_I, AMDGPU_UCODE_ID_IMU_D, + AMDGPU_UCODE_ID_GLOBAL_TAP_DELAYS, + AMDGPU_UCODE_ID_SE0_TAP_DELAYS, + AMDGPU_UCODE_ID_SE1_TAP_DELAYS, + AMDGPU_UCODE_ID_SE2_TAP_DELAYS, + AMDGPU_UCODE_ID_SE3_TAP_DELAYS, AMDGPU_UCODE_ID_RLC_RESTORE_LIST_CNTL, AMDGPU_UCODE_ID_RLC_RESTORE_LIST_GPM_MEM, AMDGPU_UCODE_ID_RLC_RESTORE_LIST_SRM_MEM, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h index 2ec6698aa1fe..3629d8f292ef 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h @@ -41,6 +41,12 @@ #define LOOP_UMC_CH_INST(ch_inst) for ((ch_inst) = 0; (ch_inst) < adev->umc.channel_inst_num; (ch_inst)++) #define LOOP_UMC_INST_AND_CH(umc_inst, ch_inst) LOOP_UMC_INST((umc_inst)) LOOP_UMC_CH_INST((ch_inst)) +#define LOOP_UMC_NODE_INST(node_inst) \ + for ((node_inst) = 0; (node_inst) < adev->umc.node_inst_num; (node_inst)++) + +#define LOOP_UMC_EACH_NODE_INST_AND_CH(node_inst, umc_inst, ch_inst) \ + LOOP_UMC_NODE_INST((node_inst)) LOOP_UMC_INST_AND_CH((umc_inst), (ch_inst)) + struct amdgpu_umc_ras { struct amdgpu_ras_block_object ras_block; void (*err_cnt_init)(struct amdgpu_device *adev); @@ -62,6 +68,10 @@ struct amdgpu_umc { uint32_t channel_inst_num; /* number of umc instance with memory map register access */ uint32_t umc_inst_num; + + /*number of umc node instance with memory map register access*/ + uint32_t node_inst_num; + /* UMC regiser per channel offset */ uint32_t channel_offs; /* channel index table of interleaved memory */ diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c index aa7acfabf360..f36e4f08db6d 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c @@ -54,6 +54,7 @@ #define FIRMWARE_YELLOW_CARP "amdgpu/yellow_carp_vcn.bin" #define FIRMWARE_VCN_3_1_2 "amdgpu/vcn_3_1_2.bin" #define FIRMWARE_VCN4_0_0 "amdgpu/vcn_4_0_0.bin" +#define FIRMWARE_VCN4_0_2 "amdgpu/vcn_4_0_2.bin" #define FIRMWARE_VCN4_0_4 "amdgpu/vcn_4_0_4.bin" MODULE_FIRMWARE(FIRMWARE_RAVEN); @@ -74,6 +75,7 @@ MODULE_FIRMWARE(FIRMWARE_BEIGE_GOBY); MODULE_FIRMWARE(FIRMWARE_YELLOW_CARP); MODULE_FIRMWARE(FIRMWARE_VCN_3_1_2); MODULE_FIRMWARE(FIRMWARE_VCN4_0_0); +MODULE_FIRMWARE(FIRMWARE_VCN4_0_2); MODULE_FIRMWARE(FIRMWARE_VCN4_0_4); static void amdgpu_vcn_idle_work_handler(struct work_struct *work); @@ -185,6 +187,12 @@ int amdgpu_vcn_sw_init(struct amdgpu_device *adev) (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG)) adev->vcn.indirect_sram = true; break; + case IP_VERSION(4, 0, 2): + fw_name = FIRMWARE_VCN4_0_2; + if ((adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) && + (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG)) + adev->vcn.indirect_sram = false; + break; case IP_VERSION(4, 0, 4): fw_name = FIRMWARE_VCN4_0_4; if ((adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) && @@ -329,6 +337,18 @@ int amdgpu_vcn_sw_fini(struct amdgpu_device *adev) return 0; } +/* from vcn4 and above, only unified queue is used */ +static bool amdgpu_vcn_using_unified_queue(struct amdgpu_ring *ring) +{ + struct amdgpu_device *adev = ring->adev; + bool ret = false; + + if (adev->ip_versions[UVD_HWIP][0] >= IP_VERSION(4, 0, 0)) + ret = true; + + return ret; +} + bool amdgpu_vcn_is_disabled_vcn(struct amdgpu_device *adev, enum vcn_ring_type type, uint32_t vcn_instance) { bool ret = false; @@ -718,19 +738,55 @@ error: return r; } +static uint32_t *amdgpu_vcn_unified_ring_ib_header(struct amdgpu_ib *ib, + uint32_t ib_pack_in_dw, bool enc) +{ + uint32_t *ib_checksum; + + ib->ptr[ib->length_dw++] = 0x00000010; /* single queue checksum */ + ib->ptr[ib->length_dw++] = 0x30000002; + ib_checksum = &ib->ptr[ib->length_dw++]; + ib->ptr[ib->length_dw++] = ib_pack_in_dw; + + ib->ptr[ib->length_dw++] = 0x00000010; /* engine info */ + ib->ptr[ib->length_dw++] = 0x30000001; + ib->ptr[ib->length_dw++] = enc ? 0x2 : 0x3; + ib->ptr[ib->length_dw++] = ib_pack_in_dw * sizeof(uint32_t); + + return ib_checksum; +} + +static void amdgpu_vcn_unified_ring_ib_checksum(uint32_t **ib_checksum, + uint32_t ib_pack_in_dw) +{ + uint32_t i; + uint32_t checksum = 0; + + for (i = 0; i < ib_pack_in_dw; i++) + checksum += *(*ib_checksum + 2 + i); + + **ib_checksum = checksum; +} + static int amdgpu_vcn_dec_sw_send_msg(struct amdgpu_ring *ring, struct amdgpu_ib *ib_msg, struct dma_fence **fence) { struct amdgpu_vcn_decode_buffer *decode_buffer = NULL; - const unsigned int ib_size_dw = 64; + unsigned int ib_size_dw = 64; struct amdgpu_device *adev = ring->adev; struct dma_fence *f = NULL; struct amdgpu_job *job; struct amdgpu_ib *ib; uint64_t addr = AMDGPU_GPU_PAGE_ALIGN(ib_msg->gpu_addr); + bool sq = amdgpu_vcn_using_unified_queue(ring); + uint32_t *ib_checksum; + uint32_t ib_pack_in_dw; int i, r; + if (sq) + ib_size_dw += 8; + r = amdgpu_job_alloc_with_ib(adev, ib_size_dw * 4, AMDGPU_IB_POOL_DIRECT, &job); if (r) @@ -739,6 +795,13 @@ static int amdgpu_vcn_dec_sw_send_msg(struct amdgpu_ring *ring, ib = &job->ibs[0]; ib->length_dw = 0; + /* single queue headers */ + if (sq) { + ib_pack_in_dw = sizeof(struct amdgpu_vcn_decode_buffer) / sizeof(uint32_t) + + 4 + 2; /* engine info + decoding ib in dw */ + ib_checksum = amdgpu_vcn_unified_ring_ib_header(ib, ib_pack_in_dw, false); + } + ib->ptr[ib->length_dw++] = sizeof(struct amdgpu_vcn_decode_buffer) + 8; ib->ptr[ib->length_dw++] = cpu_to_le32(AMDGPU_VCN_IB_FLAG_DECODE_BUFFER); decode_buffer = (struct amdgpu_vcn_decode_buffer *)&(ib->ptr[ib->length_dw]); @@ -752,6 +815,9 @@ static int amdgpu_vcn_dec_sw_send_msg(struct amdgpu_ring *ring, for (i = ib->length_dw; i < ib_size_dw; ++i) ib->ptr[i] = 0x0; + if (sq) + amdgpu_vcn_unified_ring_ib_checksum(&ib_checksum, ib_pack_in_dw); + r = amdgpu_job_submit_direct(job, ring, &f); if (r) goto err_free; @@ -838,13 +904,18 @@ static int amdgpu_vcn_enc_get_create_msg(struct amdgpu_ring *ring, uint32_t hand struct amdgpu_ib *ib_msg, struct dma_fence **fence) { - const unsigned ib_size_dw = 16; + unsigned int ib_size_dw = 16; struct amdgpu_job *job; struct amdgpu_ib *ib; struct dma_fence *f = NULL; + uint32_t *ib_checksum = NULL; uint64_t addr; + bool sq = amdgpu_vcn_using_unified_queue(ring); int i, r; + if (sq) + ib_size_dw += 8; + r = amdgpu_job_alloc_with_ib(ring->adev, ib_size_dw * 4, AMDGPU_IB_POOL_DIRECT, &job); if (r) @@ -854,6 +925,10 @@ static int amdgpu_vcn_enc_get_create_msg(struct amdgpu_ring *ring, uint32_t hand addr = AMDGPU_GPU_PAGE_ALIGN(ib_msg->gpu_addr); ib->length_dw = 0; + + if (sq) + ib_checksum = amdgpu_vcn_unified_ring_ib_header(ib, 0x11, true); + ib->ptr[ib->length_dw++] = 0x00000018; ib->ptr[ib->length_dw++] = 0x00000001; /* session info */ ib->ptr[ib->length_dw++] = handle; @@ -873,6 +948,9 @@ static int amdgpu_vcn_enc_get_create_msg(struct amdgpu_ring *ring, uint32_t hand for (i = ib->length_dw; i < ib_size_dw; ++i) ib->ptr[i] = 0x0; + if (sq) + amdgpu_vcn_unified_ring_ib_checksum(&ib_checksum, 0x11); + r = amdgpu_job_submit_direct(job, ring, &f); if (r) goto err; @@ -892,13 +970,18 @@ static int amdgpu_vcn_enc_get_destroy_msg(struct amdgpu_ring *ring, uint32_t han struct amdgpu_ib *ib_msg, struct dma_fence **fence) { - const unsigned ib_size_dw = 16; + unsigned int ib_size_dw = 16; struct amdgpu_job *job; struct amdgpu_ib *ib; struct dma_fence *f = NULL; + uint32_t *ib_checksum = NULL; uint64_t addr; + bool sq = amdgpu_vcn_using_unified_queue(ring); int i, r; + if (sq) + ib_size_dw += 8; + r = amdgpu_job_alloc_with_ib(ring->adev, ib_size_dw * 4, AMDGPU_IB_POOL_DIRECT, &job); if (r) @@ -908,6 +991,10 @@ static int amdgpu_vcn_enc_get_destroy_msg(struct amdgpu_ring *ring, uint32_t han addr = AMDGPU_GPU_PAGE_ALIGN(ib_msg->gpu_addr); ib->length_dw = 0; + + if (sq) + ib_checksum = amdgpu_vcn_unified_ring_ib_header(ib, 0x11, true); + ib->ptr[ib->length_dw++] = 0x00000018; ib->ptr[ib->length_dw++] = 0x00000001; ib->ptr[ib->length_dw++] = handle; @@ -927,6 +1014,9 @@ static int amdgpu_vcn_enc_get_destroy_msg(struct amdgpu_ring *ring, uint32_t han for (i = ib->length_dw; i < ib_size_dw; ++i) ib->ptr[i] = 0x0; + if (sq) + amdgpu_vcn_unified_ring_ib_checksum(&ib_checksum, 0x11); + r = amdgpu_job_submit_direct(job, ring, &f); if (r) goto err; @@ -977,6 +1067,20 @@ error: return r; } +int amdgpu_vcn_unified_ring_test_ib(struct amdgpu_ring *ring, long timeout) +{ + long r; + + r = amdgpu_vcn_enc_ring_test_ib(ring, timeout); + if (r) + goto error; + + r = amdgpu_vcn_dec_sw_ring_test_ib(ring, timeout); + +error: + return r; +} + enum amdgpu_ring_priority_level amdgpu_vcn_get_enc_ring_prio(int ring) { switch(ring) { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h index 6f90fcee0f9c..60c608144480 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h @@ -364,6 +364,7 @@ int amdgpu_vcn_dec_ring_test_ring(struct amdgpu_ring *ring); int amdgpu_vcn_dec_ring_test_ib(struct amdgpu_ring *ring, long timeout); int amdgpu_vcn_dec_sw_ring_test_ring(struct amdgpu_ring *ring); int amdgpu_vcn_dec_sw_ring_test_ib(struct amdgpu_ring *ring, long timeout); +int amdgpu_vcn_unified_ring_test_ib(struct amdgpu_ring *ring, long timeout); int amdgpu_vcn_enc_ring_test_ring(struct amdgpu_ring *ring); int amdgpu_vcn_enc_ring_test_ib(struct amdgpu_ring *ring, long timeout); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c index a8ecf04389b3..9be57389301b 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c @@ -76,6 +76,12 @@ void amdgpu_virt_kiq_reg_write_reg_wait(struct amdgpu_device *adev, unsigned long flags; uint32_t seq; + if (adev->mes.ring.sched.ready) { + amdgpu_mes_reg_write_reg_wait(adev, reg0, reg1, + ref, mask); + return; + } + spin_lock_irqsave(&kiq->ring_lock, flags); amdgpu_ring_alloc(ring, 32); amdgpu_ring_emit_reg_write_reg_wait(ring, reg0, reg1, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c index dc76d2b3ce52..59cac347baa3 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c @@ -54,7 +54,7 @@ * (uncached system pages). * Each VM has an ID associated with it and there is a page table * associated with each VMID. When executing a command buffer, - * the kernel tells the the ring what VMID to use for that command + * the kernel tells the ring what VMID to use for that command * buffer. VMIDs are allocated dynamically as commands are submitted. * The userspace drivers maintain their own address space and the kernel * sets up their pages tables accordingly when they submit their @@ -2168,6 +2168,14 @@ int amdgpu_vm_make_compute(struct amdgpu_device *adev, struct amdgpu_vm *vm) } else { vm->update_funcs = &amdgpu_vm_sdma_funcs; } + /* + * Make sure root PD gets mapped. As vm_update_mode could be changed + * when turning a GFX VM into a compute VM. + */ + r = vm->update_funcs->map_table(to_amdgpu_bo_vm(vm->root.bo)); + if (r) + goto unreserve_bo; + dma_fence_put(vm->last_update); vm->last_update = NULL; vm->is_compute_context = true; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c index 49e4092f447f..28ec5f8ac1c1 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c @@ -50,6 +50,35 @@ to_amdgpu_device(struct amdgpu_vram_mgr *mgr) return container_of(mgr, struct amdgpu_device, mman.vram_mgr); } +static inline struct drm_buddy_block * +amdgpu_vram_mgr_first_block(struct list_head *list) +{ + return list_first_entry_or_null(list, struct drm_buddy_block, link); +} + +static inline bool amdgpu_is_vram_mgr_blocks_contiguous(struct list_head *head) +{ + struct drm_buddy_block *block; + u64 start, size; + + block = amdgpu_vram_mgr_first_block(head); + if (!block) + return false; + + while (head != block->link.next) { + start = amdgpu_vram_mgr_block_start(block); + size = amdgpu_vram_mgr_block_size(block); + + block = list_entry(block->link.next, struct drm_buddy_block, link); + if (start + size != amdgpu_vram_mgr_block_start(block)) + return false; + } + + return true; +} + + + /** * DOC: mem_info_vram_total * @@ -366,11 +395,11 @@ static int amdgpu_vram_mgr_new(struct ttm_resource_manager *man, unsigned long pages_per_block; int r; - lpfn = place->lpfn << PAGE_SHIFT; + lpfn = (u64)place->lpfn << PAGE_SHIFT; if (!lpfn) lpfn = man->size; - fpfn = place->fpfn << PAGE_SHIFT; + fpfn = (u64)place->fpfn << PAGE_SHIFT; max_bytes = adev->gmc.mc_vram_size; if (tbo->type != ttm_bo_type_kernel) @@ -410,12 +439,12 @@ static int amdgpu_vram_mgr_new(struct ttm_resource_manager *man, /* Allocate blocks in desired range */ vres->flags |= DRM_BUDDY_RANGE_ALLOCATION; - remaining_size = vres->base.num_pages << PAGE_SHIFT; + remaining_size = (u64)vres->base.num_pages << PAGE_SHIFT; mutex_lock(&mgr->lock); while (remaining_size) { if (tbo->page_alignment) - min_block_size = tbo->page_alignment << PAGE_SHIFT; + min_block_size = (u64)tbo->page_alignment << PAGE_SHIFT; else min_block_size = mgr->default_page_size; @@ -424,12 +453,12 @@ static int amdgpu_vram_mgr_new(struct ttm_resource_manager *man, /* Limit maximum size to 2GiB due to SG table limitations */ size = min(remaining_size, 2ULL << 30); - if (size >= pages_per_block << PAGE_SHIFT) - min_block_size = pages_per_block << PAGE_SHIFT; + if (size >= (u64)pages_per_block << PAGE_SHIFT) + min_block_size = (u64)pages_per_block << PAGE_SHIFT; cur_size = size; - if (fpfn + size != place->lpfn << PAGE_SHIFT) { + if (fpfn + size != (u64)place->lpfn << PAGE_SHIFT) { /* * Except for actual range allocation, modify the size and * min_block_size conforming to continuous flag enablement @@ -469,7 +498,7 @@ static int amdgpu_vram_mgr_new(struct ttm_resource_manager *man, LIST_HEAD(temp); trim_list = &vres->blocks; - original_size = vres->base.num_pages << PAGE_SHIFT; + original_size = (u64)vres->base.num_pages << PAGE_SHIFT; /* * If size value is rounded up to min_block_size, trim the last @@ -496,16 +525,22 @@ static int amdgpu_vram_mgr_new(struct ttm_resource_manager *man, list_splice_tail(trim_list, &vres->blocks); } - list_for_each_entry(block, &vres->blocks, link) - vis_usage += amdgpu_vram_mgr_vis_size(adev, block); + vres->base.start = 0; + list_for_each_entry(block, &vres->blocks, link) { + unsigned long start; - block = amdgpu_vram_mgr_first_block(&vres->blocks); - if (!block) { - r = -EINVAL; - goto error_fini; - } + start = amdgpu_vram_mgr_block_start(block) + + amdgpu_vram_mgr_block_size(block); + start >>= PAGE_SHIFT; + + if (start > vres->base.num_pages) + start -= vres->base.num_pages; + else + start = 0; + vres->base.start = max(vres->base.start, start); - vres->base.start = amdgpu_vram_mgr_block_start(block) >> PAGE_SHIFT; + vis_usage += amdgpu_vram_mgr_vis_size(adev, block); + } if (amdgpu_is_vram_mgr_blocks_contiguous(&vres->blocks)) vres->base.placement |= TTM_PL_FLAG_CONTIGUOUS; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.h index 9a2db87186c7..0e04e42cf809 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.h @@ -50,34 +50,7 @@ static inline u64 amdgpu_vram_mgr_block_start(struct drm_buddy_block *block) static inline u64 amdgpu_vram_mgr_block_size(struct drm_buddy_block *block) { - return PAGE_SIZE << drm_buddy_block_order(block); -} - -static inline struct drm_buddy_block * -amdgpu_vram_mgr_first_block(struct list_head *list) -{ - return list_first_entry_or_null(list, struct drm_buddy_block, link); -} - -static inline bool amdgpu_is_vram_mgr_blocks_contiguous(struct list_head *head) -{ - struct drm_buddy_block *block; - u64 start, size; - - block = amdgpu_vram_mgr_first_block(head); - if (!block) - return false; - - while (head != block->link.next) { - start = amdgpu_vram_mgr_block_start(block); - size = amdgpu_vram_mgr_block_size(block); - - block = list_entry(block->link.next, struct drm_buddy_block, link); - if (start + size != amdgpu_vram_mgr_block_start(block)) - return false; - } - - return true; + return (u64)PAGE_SIZE << drm_buddy_block_order(block); } static inline struct amdgpu_vram_mgr_resource * diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c index 1b108d03e785..f2aebbf3fbe3 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c @@ -742,7 +742,7 @@ int amdgpu_xgmi_remove_device(struct amdgpu_device *adev) amdgpu_put_xgmi_hive(hive); } - return psp_xgmi_terminate(&adev->psp); + return 0; } static int amdgpu_xgmi_ras_late_init(struct amdgpu_device *adev, struct ras_common_if *ras_block) diff --git a/drivers/gpu/drm/amd/amdgpu/athub_v3_0.c b/drivers/gpu/drm/amd/amdgpu/athub_v3_0.c index 33a8a7365aef..f0e235f98afb 100644 --- a/drivers/gpu/drm/amd/amdgpu/athub_v3_0.c +++ b/drivers/gpu/drm/amd/amdgpu/athub_v3_0.c @@ -28,13 +28,44 @@ #include "navi10_enum.h" #include "soc15_common.h" +#define regATHUB_MISC_CNTL_V3_0_1 0x00d7 +#define regATHUB_MISC_CNTL_V3_0_1_BASE_IDX 0 + + +static uint32_t athub_v3_0_get_cg_cntl(struct amdgpu_device *adev) +{ + uint32_t data; + + switch (adev->ip_versions[ATHUB_HWIP][0]) { + case IP_VERSION(3, 0, 1): + data = RREG32_SOC15(ATHUB, 0, regATHUB_MISC_CNTL_V3_0_1); + break; + default: + data = RREG32_SOC15(ATHUB, 0, regATHUB_MISC_CNTL); + break; + } + return data; +} + +static void athub_v3_0_set_cg_cntl(struct amdgpu_device *adev, uint32_t data) +{ + switch (adev->ip_versions[ATHUB_HWIP][0]) { + case IP_VERSION(3, 0, 1): + WREG32_SOC15(ATHUB, 0, regATHUB_MISC_CNTL_V3_0_1, data); + break; + default: + WREG32_SOC15(ATHUB, 0, regATHUB_MISC_CNTL, data); + break; + } +} + static void athub_v3_0_update_medium_grain_clock_gating(struct amdgpu_device *adev, bool enable) { uint32_t def, data; - def = data = RREG32_SOC15(ATHUB, 0, regATHUB_MISC_CNTL); + def = data = athub_v3_0_get_cg_cntl(adev); if (enable && (adev->cg_flags & AMD_CG_SUPPORT_ATHUB_MGCG)) data |= ATHUB_MISC_CNTL__CG_ENABLE_MASK; @@ -42,7 +73,7 @@ athub_v3_0_update_medium_grain_clock_gating(struct amdgpu_device *adev, data &= ~ATHUB_MISC_CNTL__CG_ENABLE_MASK; if (def != data) - WREG32_SOC15(ATHUB, 0, regATHUB_MISC_CNTL, data); + athub_v3_0_set_cg_cntl(adev, data); } static void @@ -51,7 +82,7 @@ athub_v3_0_update_medium_grain_light_sleep(struct amdgpu_device *adev, { uint32_t def, data; - def = data = RREG32_SOC15(ATHUB, 0, regATHUB_MISC_CNTL); + def = data = athub_v3_0_get_cg_cntl(adev); if (enable && (adev->cg_flags & AMD_CG_SUPPORT_ATHUB_LS)) data |= ATHUB_MISC_CNTL__CG_MEM_LS_ENABLE_MASK; @@ -59,7 +90,7 @@ athub_v3_0_update_medium_grain_light_sleep(struct amdgpu_device *adev, data &= ~ATHUB_MISC_CNTL__CG_MEM_LS_ENABLE_MASK; if (def != data) - WREG32_SOC15(ATHUB, 0, regATHUB_MISC_CNTL, data); + athub_v3_0_set_cg_cntl(adev, data); } int athub_v3_0_set_clockgating(struct amdgpu_device *adev, @@ -70,6 +101,7 @@ int athub_v3_0_set_clockgating(struct amdgpu_device *adev, switch (adev->ip_versions[ATHUB_HWIP][0]) { case IP_VERSION(3, 0, 0): + case IP_VERSION(3, 0, 1): case IP_VERSION(3, 0, 2): athub_v3_0_update_medium_grain_clock_gating(adev, state == AMD_CG_STATE_GATE); @@ -88,7 +120,7 @@ void athub_v3_0_get_clockgating(struct amdgpu_device *adev, u64 *flags) int data; /* AMD_CG_SUPPORT_ATHUB_MGCG */ - data = RREG32_SOC15(ATHUB, 0, regATHUB_MISC_CNTL); + data = athub_v3_0_get_cg_cntl(adev); if (data & ATHUB_MISC_CNTL__CG_ENABLE_MASK) *flags |= AMD_CG_SUPPORT_ATHUB_MGCG; diff --git a/drivers/gpu/drm/amd/amdgpu/atombios_encoders.c b/drivers/gpu/drm/amd/amdgpu/atombios_encoders.c index d4f5a584075d..fa7421afb9a6 100644 --- a/drivers/gpu/drm/amd/amdgpu/atombios_encoders.c +++ b/drivers/gpu/drm/amd/amdgpu/atombios_encoders.c @@ -118,8 +118,6 @@ amdgpu_atombios_encoder_set_backlight_level(struct amdgpu_encoder *amdgpu_encode } } -#if defined(CONFIG_BACKLIGHT_CLASS_DEVICE) || defined(CONFIG_BACKLIGHT_CLASS_DEVICE_MODULE) - static u8 amdgpu_atombios_encoder_backlight_level(struct backlight_device *bd) { u8 level; @@ -251,18 +249,6 @@ amdgpu_atombios_encoder_fini_backlight(struct amdgpu_encoder *amdgpu_encoder) } } -#else /* !CONFIG_BACKLIGHT_CLASS_DEVICE */ - -void amdgpu_atombios_encoder_init_backlight(struct amdgpu_encoder *encoder) -{ -} - -void amdgpu_atombios_encoder_fini_backlight(struct amdgpu_encoder *encoder) -{ -} - -#endif - bool amdgpu_atombios_encoder_is_digital(struct drm_encoder *encoder) { struct amdgpu_encoder *amdgpu_encoder = to_amdgpu_encoder(encoder); diff --git a/drivers/gpu/drm/amd/amdgpu/clearstate_gfx11.h b/drivers/gpu/drm/amd/amdgpu/clearstate_gfx11.h index f3852b59b1d6..a8b29d33c464 100644 --- a/drivers/gpu/drm/amd/amdgpu/clearstate_gfx11.h +++ b/drivers/gpu/drm/amd/amdgpu/clearstate_gfx11.h @@ -39,7 +39,7 @@ static const unsigned int gfx11_SECT_CONTEXT_def_1[] = 0x00000000, // DB_DEPTH_CLEAR 0x00000000, // PA_SC_SCREEN_SCISSOR_TL 0x40004000, // PA_SC_SCREEN_SCISSOR_BR - 0x00000000, // DB_DFSM_CONTROL + 0, // HOLE 0x00000000, // DB_RESERVED_REG_2 0x00000000, // DB_Z_INFO 0x00000000, // DB_STENCIL_INFO @@ -50,7 +50,7 @@ static const unsigned int gfx11_SECT_CONTEXT_def_1[] = 0x00000000, // DB_RESERVED_REG_1 0x00000000, // DB_RESERVED_REG_3 0x00000000, // DB_SPI_VRS_CENTER_LOCATION - 0x00000000, // DB_VRS_OVERRIDE_CNTL + 0, // HOLE 0x00000000, // DB_Z_READ_BASE_HI 0x00000000, // DB_STENCIL_READ_BASE_HI 0x00000000, // DB_Z_WRITE_BASE_HI @@ -270,29 +270,29 @@ static const unsigned int gfx11_SECT_CONTEXT_def_2[] = 0x00000000, // PA_SC_FSR_EN 0x00000000, // PA_SC_FSR_FBW_RECURSIONS_X 0x00000000, // PA_SC_FSR_FBW_RECURSIONS_Y - 0x00000000, // PA_SC_VRS_RATE_FEEDBACK_VIEW + 0, // HOLE 0x00000000, // PA_SC_VRS_OVERRIDE_CNTL 0x00000000, // PA_SC_VRS_RATE_FEEDBACK_BASE 0x00000000, // PA_SC_VRS_RATE_FEEDBACK_BASE_EXT 0x00000000, // PA_SC_VRS_RATE_FEEDBACK_SIZE_XY 0x00000000, // PA_SC_BINNER_OUTPUT_TIMEOUT_CNTL - 0, // HOLE + 0x00000000, // PA_SC_VRS_RATE_CACHE_CNTL 0, // HOLE 0, // HOLE 0x00000000, // PA_SC_VRS_RATE_BASE 0x00000000, // PA_SC_VRS_RATE_BASE_EXT 0x00000000, // PA_SC_VRS_RATE_SIZE_XY - 0x00000000, // PA_SC_VRS_RATE_VIEW - 0xffffffff, // VGT_MAX_VTX_INDX - 0x00000000, // VGT_MIN_VTX_INDX - 0x00000000, // VGT_INDX_OFFSET + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE 0x00000000, // VGT_MULTI_PRIM_IB_RESET_INDX 0x00550055, // CB_RMI_GL2_CACHE_CONTROL 0x00000000, // CB_BLEND_RED 0x00000000, // CB_BLEND_GREEN 0x00000000, // CB_BLEND_BLUE 0x00000000, // CB_BLEND_ALPHA - 0x00000000, // CB_DCC_CONTROL + 0x00000000, // CB_FDCC_CONTROL 0x00000000, // CB_COVERAGE_OUT_CONTROL 0x00000000, // DB_STENCIL_CONTROL 0x01000000, // DB_STENCILREFMASK @@ -470,8 +470,8 @@ static const unsigned int gfx11_SECT_CONTEXT_def_2[] = 0x00000000, // SPI_BARYC_CNTL 0, // HOLE 0x00000000, // SPI_TMPRING_SIZE - 0, // HOLE - 0, // HOLE + 0x00000000, // SPI_GFX_SCRATCH_BASE_LO + 0x00000000, // SPI_GFX_SCRATCH_BASE_HI 0, // HOLE 0, // HOLE 0, // HOLE @@ -545,7 +545,7 @@ static const unsigned int gfx11_SECT_CONTEXT_def_4[] = 0x00000000, // PA_STEREO_CNTL 0x00000000, // PA_STATE_STEREO_X 0x00000000, // PA_CL_VRS_CNTL - 0x00000000, // PA_SIDEBAND_REQUEST_DELAYS + 0, // HOLE 0, // HOLE 0, // HOLE 0, // HOLE @@ -658,30 +658,30 @@ static const unsigned int gfx11_SECT_CONTEXT_def_4[] = 0x00000000, // PA_SU_POINT_MINMAX 0x00000000, // PA_SU_LINE_CNTL 0x00000000, // PA_SC_LINE_STIPPLE - 0x00000000, // VGT_OUTPUT_PATH_CNTL - 0x00000000, // VGT_HOS_CNTL + 0, // HOLE + 0, // HOLE 0x00000000, // VGT_HOS_MAX_TESS_LEVEL 0x00000000, // VGT_HOS_MIN_TESS_LEVEL - 0x00000000, // VGT_HOS_REUSE_DEPTH - 0x00000000, // VGT_GROUP_PRIM_TYPE - 0x00000000, // VGT_GROUP_FIRST_DECR - 0x00000000, // VGT_GROUP_DECR - 0x00000000, // VGT_GROUP_VECT_0_CNTL - 0x00000000, // VGT_GROUP_VECT_1_CNTL - 0x00000000, // VGT_GROUP_VECT_0_FMT_CNTL - 0x00000000, // VGT_GROUP_VECT_1_FMT_CNTL - 0x00000000, // VGT_GS_MODE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE 0x00000000, // VGT_GS_ONCHIP_CNTL 0x00000000, // PA_SC_MODE_CNTL_0 0x00000000, // PA_SC_MODE_CNTL_1 0x00000000, // VGT_ENHANCE - 0x00000100, // VGT_GS_PER_ES - 0x00000080, // VGT_ES_PER_GS - 0x00000002, // VGT_GS_PER_VS - 0x00000000, // VGT_GSVS_RING_OFFSET_1 - 0x00000000, // VGT_GSVS_RING_OFFSET_2 - 0x00000000, // VGT_GSVS_RING_OFFSET_3 - 0x00000000, // VGT_GS_OUT_PRIM_TYPE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE 0x00000000, // IA_ENHANCE }; static const unsigned int gfx11_SECT_CONTEXT_def_5[] = @@ -695,37 +695,36 @@ static const unsigned int gfx11_SECT_CONTEXT_def_6[] = }; static const unsigned int gfx11_SECT_CONTEXT_def_7[] = { - 0x00000000, // VGT_MULTI_PRIM_IB_RESET_EN 0x00000000, // VGT_DRAW_PAYLOAD_CNTL 0, // HOLE - 0x00000000, // VGT_INSTANCE_STEP_RATE_0 - 0x00000000, // VGT_INSTANCE_STEP_RATE_1 - 0x000000ff, // IA_MULTI_VGT_PARAM + 0, // HOLE + 0, // HOLE + 0, // HOLE 0x00000000, // VGT_ESGS_RING_ITEMSIZE - 0x00000000, // VGT_GSVS_RING_ITEMSIZE + 0, // HOLE 0x00000000, // VGT_REUSE_OFF - 0x00000000, // VGT_VTX_CNT_EN + 0, // HOLE 0x00000000, // DB_HTILE_SURFACE 0x00000000, // DB_SRESULTS_COMPARE_STATE0 0x00000000, // DB_SRESULTS_COMPARE_STATE1 0x00000000, // DB_PRELOAD_CONTROL 0, // HOLE - 0x00000000, // VGT_STRMOUT_BUFFER_SIZE_0 - 0x00000000, // VGT_STRMOUT_VTX_STRIDE_0 0, // HOLE - 0x00000000, // VGT_STRMOUT_BUFFER_OFFSET_0 - 0x00000000, // VGT_STRMOUT_BUFFER_SIZE_1 - 0x00000000, // VGT_STRMOUT_VTX_STRIDE_1 0, // HOLE - 0x00000000, // VGT_STRMOUT_BUFFER_OFFSET_1 - 0x00000000, // VGT_STRMOUT_BUFFER_SIZE_2 - 0x00000000, // VGT_STRMOUT_VTX_STRIDE_2 0, // HOLE - 0x00000000, // VGT_STRMOUT_BUFFER_OFFSET_2 - 0x00000000, // VGT_STRMOUT_BUFFER_SIZE_3 - 0x00000000, // VGT_STRMOUT_VTX_STRIDE_3 0, // HOLE - 0x00000000, // VGT_STRMOUT_BUFFER_OFFSET_3 + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE 0, // HOLE 0, // HOLE 0, // HOLE @@ -745,10 +744,10 @@ static const unsigned int gfx11_SECT_CONTEXT_def_7[] = 0x00000000, // VGT_TESS_DISTRIBUTION 0x00000000, // VGT_SHADER_STAGES_EN 0x00000000, // VGT_LS_HS_CONFIG - 0x00000000, // VGT_GS_VERT_ITEMSIZE - 0x00000000, // VGT_GS_VERT_ITEMSIZE_1 - 0x00000000, // VGT_GS_VERT_ITEMSIZE_2 - 0x00000000, // VGT_GS_VERT_ITEMSIZE_3 + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE 0x00000000, // VGT_TF_PARAM 0x00000000, // DB_ALPHA_TO_MASK 0, // HOLE @@ -759,11 +758,22 @@ static const unsigned int gfx11_SECT_CONTEXT_def_7[] = 0x00000000, // PA_SU_POLY_OFFSET_BACK_SCALE 0x00000000, // PA_SU_POLY_OFFSET_BACK_OFFSET 0x00000000, // VGT_GS_INSTANCE_CNT - 0x00000000, // VGT_STRMOUT_CONFIG - 0x00000000, // VGT_STRMOUT_BUFFER_CONFIG -}; -static const unsigned int gfx11_SECT_CONTEXT_def_8[] = -{ + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE 0x00000000, // PA_SC_CENTROID_PRIORITY_0 0x00000000, // PA_SC_CENTROID_PRIORITY_1 0x00001000, // PA_SC_LINE_CNTL @@ -797,126 +807,126 @@ static const unsigned int gfx11_SECT_CONTEXT_def_8[] = 0x00100000, // PA_SC_CONSERVATIVE_RASTERIZATION_CNTL 0x00000000, // PA_SC_NGG_MODE_CNTL 0x00000000, // PA_SC_BINNER_CNTL_2 - 0x0000001e, // VGT_VERTEX_REUSE_BLOCK_CNTL - 0x00000020, // VGT_OUT_DEALLOC_CNTL + 0, // HOLE + 0, // HOLE 0x00000000, // CB_COLOR0_BASE - 0x00000000, // CB_COLOR0_PITCH - 0x00000000, // CB_COLOR0_SLICE + 0, // HOLE + 0, // HOLE 0x00000000, // CB_COLOR0_VIEW 0x00000000, // CB_COLOR0_INFO 0x00000000, // CB_COLOR0_ATTRIB - 0x00000000, // CB_COLOR0_DCC_CONTROL - 0x00000000, // CB_COLOR0_CMASK - 0x00000000, // CB_COLOR0_CMASK_SLICE - 0x00000000, // CB_COLOR0_FMASK - 0x00000000, // CB_COLOR0_FMASK_SLICE - 0x00000000, // CB_COLOR0_CLEAR_WORD0 - 0x00000000, // CB_COLOR0_CLEAR_WORD1 + 0x00000000, // CB_COLOR0_FDCC_CONTROL + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE 0x00000000, // CB_COLOR0_DCC_BASE 0, // HOLE 0x00000000, // CB_COLOR1_BASE - 0x00000000, // CB_COLOR1_PITCH - 0x00000000, // CB_COLOR1_SLICE + 0, // HOLE + 0, // HOLE 0x00000000, // CB_COLOR1_VIEW 0x00000000, // CB_COLOR1_INFO 0x00000000, // CB_COLOR1_ATTRIB - 0x00000000, // CB_COLOR1_DCC_CONTROL - 0x00000000, // CB_COLOR1_CMASK - 0x00000000, // CB_COLOR1_CMASK_SLICE - 0x00000000, // CB_COLOR1_FMASK - 0x00000000, // CB_COLOR1_FMASK_SLICE - 0x00000000, // CB_COLOR1_CLEAR_WORD0 - 0x00000000, // CB_COLOR1_CLEAR_WORD1 + 0x00000000, // CB_COLOR1_FDCC_CONTROL + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE 0x00000000, // CB_COLOR1_DCC_BASE 0, // HOLE 0x00000000, // CB_COLOR2_BASE - 0x00000000, // CB_COLOR2_PITCH - 0x00000000, // CB_COLOR2_SLICE + 0, // HOLE + 0, // HOLE 0x00000000, // CB_COLOR2_VIEW 0x00000000, // CB_COLOR2_INFO 0x00000000, // CB_COLOR2_ATTRIB - 0x00000000, // CB_COLOR2_DCC_CONTROL - 0x00000000, // CB_COLOR2_CMASK - 0x00000000, // CB_COLOR2_CMASK_SLICE - 0x00000000, // CB_COLOR2_FMASK - 0x00000000, // CB_COLOR2_FMASK_SLICE - 0x00000000, // CB_COLOR2_CLEAR_WORD0 - 0x00000000, // CB_COLOR2_CLEAR_WORD1 + 0x00000000, // CB_COLOR2_FDCC_CONTROL + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE 0x00000000, // CB_COLOR2_DCC_BASE 0, // HOLE 0x00000000, // CB_COLOR3_BASE - 0x00000000, // CB_COLOR3_PITCH - 0x00000000, // CB_COLOR3_SLICE + 0, // HOLE + 0, // HOLE 0x00000000, // CB_COLOR3_VIEW 0x00000000, // CB_COLOR3_INFO 0x00000000, // CB_COLOR3_ATTRIB - 0x00000000, // CB_COLOR3_DCC_CONTROL - 0x00000000, // CB_COLOR3_CMASK - 0x00000000, // CB_COLOR3_CMASK_SLICE - 0x00000000, // CB_COLOR3_FMASK - 0x00000000, // CB_COLOR3_FMASK_SLICE - 0x00000000, // CB_COLOR3_CLEAR_WORD0 - 0x00000000, // CB_COLOR3_CLEAR_WORD1 + 0x00000000, // CB_COLOR3_FDCC_CONTROL + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE 0x00000000, // CB_COLOR3_DCC_BASE 0, // HOLE 0x00000000, // CB_COLOR4_BASE - 0x00000000, // CB_COLOR4_PITCH - 0x00000000, // CB_COLOR4_SLICE + 0, // HOLE + 0, // HOLE 0x00000000, // CB_COLOR4_VIEW 0x00000000, // CB_COLOR4_INFO 0x00000000, // CB_COLOR4_ATTRIB - 0x00000000, // CB_COLOR4_DCC_CONTROL - 0x00000000, // CB_COLOR4_CMASK - 0x00000000, // CB_COLOR4_CMASK_SLICE - 0x00000000, // CB_COLOR4_FMASK - 0x00000000, // CB_COLOR4_FMASK_SLICE - 0x00000000, // CB_COLOR4_CLEAR_WORD0 - 0x00000000, // CB_COLOR4_CLEAR_WORD1 + 0x00000000, // CB_COLOR4_FDCC_CONTROL + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE 0x00000000, // CB_COLOR4_DCC_BASE 0, // HOLE 0x00000000, // CB_COLOR5_BASE - 0x00000000, // CB_COLOR5_PITCH - 0x00000000, // CB_COLOR5_SLICE + 0, // HOLE + 0, // HOLE 0x00000000, // CB_COLOR5_VIEW 0x00000000, // CB_COLOR5_INFO 0x00000000, // CB_COLOR5_ATTRIB - 0x00000000, // CB_COLOR5_DCC_CONTROL - 0x00000000, // CB_COLOR5_CMASK - 0x00000000, // CB_COLOR5_CMASK_SLICE - 0x00000000, // CB_COLOR5_FMASK - 0x00000000, // CB_COLOR5_FMASK_SLICE - 0x00000000, // CB_COLOR5_CLEAR_WORD0 - 0x00000000, // CB_COLOR5_CLEAR_WORD1 + 0x00000000, // CB_COLOR5_FDCC_CONTROL + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE 0x00000000, // CB_COLOR5_DCC_BASE 0, // HOLE 0x00000000, // CB_COLOR6_BASE - 0x00000000, // CB_COLOR6_PITCH - 0x00000000, // CB_COLOR6_SLICE + 0, // HOLE + 0, // HOLE 0x00000000, // CB_COLOR6_VIEW 0x00000000, // CB_COLOR6_INFO 0x00000000, // CB_COLOR6_ATTRIB - 0x00000000, // CB_COLOR6_DCC_CONTROL - 0x00000000, // CB_COLOR6_CMASK - 0x00000000, // CB_COLOR6_CMASK_SLICE - 0x00000000, // CB_COLOR6_FMASK - 0x00000000, // CB_COLOR6_FMASK_SLICE - 0x00000000, // CB_COLOR6_CLEAR_WORD0 - 0x00000000, // CB_COLOR6_CLEAR_WORD1 + 0x00000000, // CB_COLOR6_FDCC_CONTROL + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE 0x00000000, // CB_COLOR6_DCC_BASE 0, // HOLE 0x00000000, // CB_COLOR7_BASE - 0x00000000, // CB_COLOR7_PITCH - 0x00000000, // CB_COLOR7_SLICE + 0, // HOLE + 0, // HOLE 0x00000000, // CB_COLOR7_VIEW 0x00000000, // CB_COLOR7_INFO 0x00000000, // CB_COLOR7_ATTRIB - 0x00000000, // CB_COLOR7_DCC_CONTROL - 0x00000000, // CB_COLOR7_CMASK - 0x00000000, // CB_COLOR7_CMASK_SLICE - 0x00000000, // CB_COLOR7_FMASK - 0x00000000, // CB_COLOR7_FMASK_SLICE - 0x00000000, // CB_COLOR7_CLEAR_WORD0 - 0x00000000, // CB_COLOR7_CLEAR_WORD1 + 0x00000000, // CB_COLOR7_FDCC_CONTROL + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE 0x00000000, // CB_COLOR7_DCC_BASE 0, // HOLE 0x00000000, // CB_COLOR0_BASE_EXT @@ -927,22 +937,22 @@ static const unsigned int gfx11_SECT_CONTEXT_def_8[] = 0x00000000, // CB_COLOR5_BASE_EXT 0x00000000, // CB_COLOR6_BASE_EXT 0x00000000, // CB_COLOR7_BASE_EXT - 0x00000000, // CB_COLOR0_CMASK_BASE_EXT - 0x00000000, // CB_COLOR1_CMASK_BASE_EXT - 0x00000000, // CB_COLOR2_CMASK_BASE_EXT - 0x00000000, // CB_COLOR3_CMASK_BASE_EXT - 0x00000000, // CB_COLOR4_CMASK_BASE_EXT - 0x00000000, // CB_COLOR5_CMASK_BASE_EXT - 0x00000000, // CB_COLOR6_CMASK_BASE_EXT - 0x00000000, // CB_COLOR7_CMASK_BASE_EXT - 0x00000000, // CB_COLOR0_FMASK_BASE_EXT - 0x00000000, // CB_COLOR1_FMASK_BASE_EXT - 0x00000000, // CB_COLOR2_FMASK_BASE_EXT - 0x00000000, // CB_COLOR3_FMASK_BASE_EXT - 0x00000000, // CB_COLOR4_FMASK_BASE_EXT - 0x00000000, // CB_COLOR5_FMASK_BASE_EXT - 0x00000000, // CB_COLOR6_FMASK_BASE_EXT - 0x00000000, // CB_COLOR7_FMASK_BASE_EXT + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE + 0, // HOLE 0x00000000, // CB_COLOR0_DCC_BASE_EXT 0x00000000, // CB_COLOR1_DCC_BASE_EXT 0x00000000, // CB_COLOR2_DCC_BASE_EXT @@ -976,8 +986,7 @@ static const struct cs_extent_def gfx11_SECT_CONTEXT_defs[] = {gfx11_SECT_CONTEXT_def_4, 0x0000a1ff, 158 }, {gfx11_SECT_CONTEXT_def_5, 0x0000a2a0, 2 }, {gfx11_SECT_CONTEXT_def_6, 0x0000a2a3, 1 }, - {gfx11_SECT_CONTEXT_def_7, 0x0000a2a5, 66 }, - {gfx11_SECT_CONTEXT_def_8, 0x0000a2f5, 203 }, + {gfx11_SECT_CONTEXT_def_7, 0x0000a2a6, 282 }, { 0, 0, 0 } }; static const struct cs_section_def gfx11_cs_data[] = { diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c b/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c index 982855e6cf52..b1c44fab074f 100644 --- a/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c +++ b/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c @@ -339,7 +339,7 @@ static void dce_v6_0_hpd_fini(struct amdgpu_device *adev) tmp = RREG32(mmDC_HPD1_CONTROL + hpd_offsets[amdgpu_connector->hpd.hpd]); tmp &= ~DC_HPD1_CONTROL__DC_HPD1_EN_MASK; - WREG32(mmDC_HPD1_CONTROL + hpd_offsets[amdgpu_connector->hpd.hpd], 0); + WREG32(mmDC_HPD1_CONTROL + hpd_offsets[amdgpu_connector->hpd.hpd], tmp); amdgpu_irq_put(adev, &adev->hpd_irq, amdgpu_connector->hpd.hpd); } diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c b/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c index 84440741c60b..a22b45c92792 100644 --- a/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c +++ b/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c @@ -333,7 +333,7 @@ static void dce_v8_0_hpd_fini(struct amdgpu_device *adev) tmp = RREG32(mmDC_HPD1_CONTROL + hpd_offsets[amdgpu_connector->hpd.hpd]); tmp &= ~DC_HPD1_CONTROL__DC_HPD1_EN_MASK; - WREG32(mmDC_HPD1_CONTROL + hpd_offsets[amdgpu_connector->hpd.hpd], 0); + WREG32(mmDC_HPD1_CONTROL + hpd_offsets[amdgpu_connector->hpd.hpd], tmp); amdgpu_irq_put(adev, &adev->hpd_irq, amdgpu_connector->hpd.hpd); } @@ -2693,7 +2693,11 @@ static int dce_v8_0_sw_init(void *handle) adev_to_drm(adev)->mode_config.max_height = 16384; adev_to_drm(adev)->mode_config.preferred_depth = 24; - adev_to_drm(adev)->mode_config.prefer_shadow = 1; + if (adev->asic_type == CHIP_HAWAII) + /* disable prefer shadow for now due to hibernation issues */ + adev_to_drm(adev)->mode_config.prefer_shadow = 0; + else + adev_to_drm(adev)->mode_config.prefer_shadow = 1; adev_to_drm(adev)->mode_config.fb_modifiers_not_supported = true; diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c index c5f46d264b23..a3cd5c1e8529 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c @@ -53,7 +53,7 @@ * 2. Async ring */ #define GFX10_NUM_GFX_RINGS_NV1X 1 -#define GFX10_NUM_GFX_RINGS_Sienna_Cichlid 1 +#define GFX10_NUM_GFX_RINGS_Sienna_Cichlid 2 #define GFX10_MEC_HPD_SIZE 2048 #define F32_CE_PROGRAM_RAM_SIZE 65536 @@ -3780,11 +3780,12 @@ static void gfx_v10_0_wait_reg_mem(struct amdgpu_ring *ring, int eng_sel, static int gfx_v10_0_ring_test_ring(struct amdgpu_ring *ring) { struct amdgpu_device *adev = ring->adev; + uint32_t scratch = SOC15_REG_OFFSET(GC, 0, mmSCRATCH_REG0); uint32_t tmp = 0; unsigned i; int r; - WREG32_SOC15(GC, 0, mmSCRATCH_REG0, 0xCAFEDEAD); + WREG32(scratch, 0xCAFEDEAD); r = amdgpu_ring_alloc(ring, 3); if (r) { DRM_ERROR("amdgpu: cp failed to lock ring %d (%d).\n", @@ -3793,13 +3794,13 @@ static int gfx_v10_0_ring_test_ring(struct amdgpu_ring *ring) } amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1)); - amdgpu_ring_write(ring, SOC15_REG_OFFSET(GC, 0, mmSCRATCH_REG0) - + amdgpu_ring_write(ring, scratch - PACKET3_SET_UCONFIG_REG_START); amdgpu_ring_write(ring, 0xDEADBEEF); amdgpu_ring_commit(ring); for (i = 0; i < adev->usec_timeout; i++) { - tmp = RREG32_SOC15(GC, 0, mmSCRATCH_REG0); + tmp = RREG32(scratch); if (tmp == 0xDEADBEEF) break; if (amdgpu_emu_mode == 1) @@ -3975,6 +3976,23 @@ static void gfx_v10_0_init_rlc_iram_dram_microcode(struct amdgpu_device *adev) adev->gfx.rlc.rlc_dram_ucode = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->rlc_dram_ucode_offset_bytes); } +static void gfx_v10_0_init_tap_delays_microcode(struct amdgpu_device *adev) +{ + const struct rlc_firmware_header_v2_4 *rlc_hdr; + + rlc_hdr = (const struct rlc_firmware_header_v2_4 *)adev->gfx.rlc_fw->data; + adev->gfx.rlc.global_tap_delays_ucode_size_bytes = le32_to_cpu(rlc_hdr->global_tap_delays_ucode_size_bytes); + adev->gfx.rlc.global_tap_delays_ucode = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->global_tap_delays_ucode_offset_bytes); + adev->gfx.rlc.se0_tap_delays_ucode_size_bytes = le32_to_cpu(rlc_hdr->se0_tap_delays_ucode_size_bytes); + adev->gfx.rlc.se0_tap_delays_ucode = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->se0_tap_delays_ucode_offset_bytes); + adev->gfx.rlc.se1_tap_delays_ucode_size_bytes = le32_to_cpu(rlc_hdr->se1_tap_delays_ucode_size_bytes); + adev->gfx.rlc.se1_tap_delays_ucode = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->se1_tap_delays_ucode_offset_bytes); + adev->gfx.rlc.se2_tap_delays_ucode_size_bytes = le32_to_cpu(rlc_hdr->se2_tap_delays_ucode_size_bytes); + adev->gfx.rlc.se2_tap_delays_ucode = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->se2_tap_delays_ucode_offset_bytes); + adev->gfx.rlc.se3_tap_delays_ucode_size_bytes = le32_to_cpu(rlc_hdr->se3_tap_delays_ucode_size_bytes); + adev->gfx.rlc.se3_tap_delays_ucode = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->se3_tap_delays_ucode_offset_bytes); +} + static bool gfx_v10_0_navi10_gfxoff_should_enable(struct amdgpu_device *adev) { bool ret = false; @@ -4152,8 +4170,11 @@ static int gfx_v10_0_init_microcode(struct amdgpu_device *adev) if (version_major == 2) { if (version_minor >= 1) gfx_v10_0_init_rlc_ext_microcode(adev); - if (version_minor == 2) + if (version_minor >= 2) gfx_v10_0_init_rlc_iram_dram_microcode(adev); + if (version_minor == 4) { + gfx_v10_0_init_tap_delays_microcode(adev); + } } } @@ -4250,6 +4271,47 @@ static int gfx_v10_0_init_microcode(struct amdgpu_device *adev) adev->firmware.fw_size += ALIGN(adev->gfx.rlc.rlc_dram_ucode_size_bytes, PAGE_SIZE); } + + } + + if (adev->gfx.rlc.global_tap_delays_ucode_size_bytes) { + info = &adev->firmware.ucode[AMDGPU_UCODE_ID_GLOBAL_TAP_DELAYS]; + info->ucode_id = AMDGPU_UCODE_ID_GLOBAL_TAP_DELAYS; + info->fw = adev->gfx.rlc_fw; + adev->firmware.fw_size += + ALIGN(adev->gfx.rlc.global_tap_delays_ucode_size_bytes, PAGE_SIZE); + } + + if (adev->gfx.rlc.se0_tap_delays_ucode_size_bytes) { + info = &adev->firmware.ucode[AMDGPU_UCODE_ID_SE0_TAP_DELAYS]; + info->ucode_id = AMDGPU_UCODE_ID_SE0_TAP_DELAYS; + info->fw = adev->gfx.rlc_fw; + adev->firmware.fw_size += + ALIGN(adev->gfx.rlc.se0_tap_delays_ucode_size_bytes, PAGE_SIZE); + } + + if (adev->gfx.rlc.se1_tap_delays_ucode_size_bytes) { + info = &adev->firmware.ucode[AMDGPU_UCODE_ID_SE1_TAP_DELAYS]; + info->ucode_id = AMDGPU_UCODE_ID_SE1_TAP_DELAYS; + info->fw = adev->gfx.rlc_fw; + adev->firmware.fw_size += + ALIGN(adev->gfx.rlc.se1_tap_delays_ucode_size_bytes, PAGE_SIZE); + } + + if (adev->gfx.rlc.se2_tap_delays_ucode_size_bytes) { + info = &adev->firmware.ucode[AMDGPU_UCODE_ID_SE2_TAP_DELAYS]; + info->ucode_id = AMDGPU_UCODE_ID_SE2_TAP_DELAYS; + info->fw = adev->gfx.rlc_fw; + adev->firmware.fw_size += + ALIGN(adev->gfx.rlc.se2_tap_delays_ucode_size_bytes, PAGE_SIZE); + } + + if (adev->gfx.rlc.se3_tap_delays_ucode_size_bytes) { + info = &adev->firmware.ucode[AMDGPU_UCODE_ID_SE3_TAP_DELAYS]; + info->ucode_id = AMDGPU_UCODE_ID_SE3_TAP_DELAYS; + info->fw = adev->gfx.rlc_fw; + adev->firmware.fw_size += + ALIGN(adev->gfx.rlc.se3_tap_delays_ucode_size_bytes, PAGE_SIZE); } info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC1]; @@ -4711,6 +4773,7 @@ static int gfx_v10_0_gfx_ring_init(struct amdgpu_device *adev, int ring_id, { struct amdgpu_ring *ring; unsigned int irq_type; + unsigned int hw_prio; ring = &adev->gfx.gfx_ring[ring_id]; @@ -4728,8 +4791,10 @@ static int gfx_v10_0_gfx_ring_init(struct amdgpu_device *adev, int ring_id, sprintf(ring->name, "gfx_%d.%d.%d", ring->me, ring->pipe, ring->queue); irq_type = AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP + ring->pipe; + hw_prio = amdgpu_gfx_is_high_priority_graphics_queue(adev, ring) ? + AMDGPU_GFX_PIPE_PRIO_HIGH : AMDGPU_GFX_PIPE_PRIO_NORMAL; return amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq, irq_type, - AMDGPU_RING_PRIO_DEFAULT, NULL); + hw_prio, NULL); } static int gfx_v10_0_compute_ring_init(struct amdgpu_device *adev, int ring_id, @@ -6581,6 +6646,24 @@ static void gfx_v10_0_kiq_setting(struct amdgpu_ring *ring) } } +static void gfx_v10_0_gfx_mqd_set_priority(struct amdgpu_device *adev, + struct v10_gfx_mqd *mqd, + struct amdgpu_mqd_prop *prop) +{ + bool priority = 0; + u32 tmp; + + /* set up default queue priority level + * 0x0 = low priority, 0x1 = high priority + */ + if (prop->hqd_pipe_priority == AMDGPU_GFX_PIPE_PRIO_HIGH) + priority = 1; + + tmp = RREG32_SOC15(GC, 0, mmCP_GFX_HQD_QUEUE_PRIORITY); + tmp = REG_SET_FIELD(tmp, CP_GFX_HQD_QUEUE_PRIORITY, PRIORITY_LEVEL, priority); + mqd->cp_gfx_hqd_queue_priority = tmp; +} + static int gfx_v10_0_gfx_mqd_init(struct amdgpu_device *adev, void *m, struct amdgpu_mqd_prop *prop) { @@ -6609,11 +6692,8 @@ static int gfx_v10_0_gfx_mqd_init(struct amdgpu_device *adev, void *m, tmp = REG_SET_FIELD(tmp, CP_GFX_HQD_VMID, VMID, 0); mqd->cp_gfx_hqd_vmid = 0; - /* set up default queue priority level - * 0x0 = low priority, 0x1 = high priority */ - tmp = RREG32_SOC15(GC, 0, mmCP_GFX_HQD_QUEUE_PRIORITY); - tmp = REG_SET_FIELD(tmp, CP_GFX_HQD_QUEUE_PRIORITY, PRIORITY_LEVEL, 0); - mqd->cp_gfx_hqd_queue_priority = tmp; + /* set up gfx queue priority */ + gfx_v10_0_gfx_mqd_set_priority(adev, mqd, prop); /* set up time quantum */ tmp = RREG32_SOC15(GC, 0, mmCP_GFX_HQD_QUANTUM); @@ -8506,14 +8586,45 @@ static u64 gfx_v10_0_ring_get_wptr_gfx(struct amdgpu_ring *ring) static void gfx_v10_0_ring_set_wptr_gfx(struct amdgpu_ring *ring) { struct amdgpu_device *adev = ring->adev; + uint32_t *wptr_saved; + uint32_t *is_queue_unmap; + uint64_t aggregated_db_index; + uint32_t mqd_size = adev->mqds[AMDGPU_HW_IP_GFX].mqd_size; + uint64_t wptr_tmp; - if (ring->use_doorbell) { - /* XXX check if swapping is necessary on BE */ - atomic64_set((atomic64_t *)ring->wptr_cpu_addr, ring->wptr); - WDOORBELL64(ring->doorbell_index, ring->wptr); + if (ring->is_mes_queue) { + wptr_saved = (uint32_t *)(ring->mqd_ptr + mqd_size); + is_queue_unmap = (uint32_t *)(ring->mqd_ptr + mqd_size + + sizeof(uint32_t)); + aggregated_db_index = + amdgpu_mes_get_aggregated_doorbell_index(adev, + AMDGPU_MES_PRIORITY_LEVEL_NORMAL); + + wptr_tmp = ring->wptr & ring->buf_mask; + atomic64_set((atomic64_t *)ring->wptr_cpu_addr, wptr_tmp); + *wptr_saved = wptr_tmp; + /* assume doorbell always being used by mes mapped queue */ + if (*is_queue_unmap) { + WDOORBELL64(aggregated_db_index, wptr_tmp); + WDOORBELL64(ring->doorbell_index, wptr_tmp); + } else { + WDOORBELL64(ring->doorbell_index, wptr_tmp); + + if (*is_queue_unmap) + WDOORBELL64(aggregated_db_index, wptr_tmp); + } } else { - WREG32_SOC15(GC, 0, mmCP_RB0_WPTR, lower_32_bits(ring->wptr)); - WREG32_SOC15(GC, 0, mmCP_RB0_WPTR_HI, upper_32_bits(ring->wptr)); + if (ring->use_doorbell) { + /* XXX check if swapping is necessary on BE */ + atomic64_set((atomic64_t *)ring->wptr_cpu_addr, + ring->wptr); + WDOORBELL64(ring->doorbell_index, ring->wptr); + } else { + WREG32_SOC15(GC, 0, mmCP_RB0_WPTR, + lower_32_bits(ring->wptr)); + WREG32_SOC15(GC, 0, mmCP_RB0_WPTR_HI, + upper_32_bits(ring->wptr)); + } } } @@ -8538,13 +8649,42 @@ static u64 gfx_v10_0_ring_get_wptr_compute(struct amdgpu_ring *ring) static void gfx_v10_0_ring_set_wptr_compute(struct amdgpu_ring *ring) { struct amdgpu_device *adev = ring->adev; + uint32_t *wptr_saved; + uint32_t *is_queue_unmap; + uint64_t aggregated_db_index; + uint32_t mqd_size = adev->mqds[AMDGPU_HW_IP_COMPUTE].mqd_size; + uint64_t wptr_tmp; - /* XXX check if swapping is necessary on BE */ - if (ring->use_doorbell) { - atomic64_set((atomic64_t *)ring->wptr_cpu_addr, ring->wptr); - WDOORBELL64(ring->doorbell_index, ring->wptr); + if (ring->is_mes_queue) { + wptr_saved = (uint32_t *)(ring->mqd_ptr + mqd_size); + is_queue_unmap = (uint32_t *)(ring->mqd_ptr + mqd_size + + sizeof(uint32_t)); + aggregated_db_index = + amdgpu_mes_get_aggregated_doorbell_index(adev, + AMDGPU_MES_PRIORITY_LEVEL_NORMAL); + + wptr_tmp = ring->wptr & ring->buf_mask; + atomic64_set((atomic64_t *)ring->wptr_cpu_addr, wptr_tmp); + *wptr_saved = wptr_tmp; + /* assume doorbell always used by mes mapped queue */ + if (*is_queue_unmap) { + WDOORBELL64(aggregated_db_index, wptr_tmp); + WDOORBELL64(ring->doorbell_index, wptr_tmp); + } else { + WDOORBELL64(ring->doorbell_index, wptr_tmp); + + if (*is_queue_unmap) + WDOORBELL64(aggregated_db_index, wptr_tmp); + } } else { - BUG(); /* only DOORBELL method supported on gfx10 now */ + /* XXX check if swapping is necessary on BE */ + if (ring->use_doorbell) { + atomic64_set((atomic64_t *)ring->wptr_cpu_addr, + ring->wptr); + WDOORBELL64(ring->doorbell_index, ring->wptr); + } else { + BUG(); /* only DOORBELL method supported on gfx10 now */ + } } } diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c index a4a6751b1e44..f6b1bb40e503 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c @@ -53,9 +53,12 @@ #define GFX11_MEC_HPD_SIZE 2048 #define RLCG_UCODE_LOADING_START_ADDRESS 0x00002000L +#define RLC_PG_DELAY_3_DEFAULT_GC_11_0_1 0x1388 #define regCGTT_WD_CLK_CTRL 0x5086 #define regCGTT_WD_CLK_CTRL_BASE_IDX 1 +#define regRLC_RLCS_BOOTLOAD_STATUS_gc_11_0_1 0x4e7e +#define regRLC_RLCS_BOOTLOAD_STATUS_gc_11_0_1_BASE_IDX 1 MODULE_FIRMWARE("amdgpu/gc_11_0_0_pfp.bin"); MODULE_FIRMWARE("amdgpu/gc_11_0_0_me.bin"); @@ -126,6 +129,10 @@ static int gfx_v11_0_wait_for_rlc_autoload_complete(struct amdgpu_device *adev); static void gfx_v11_0_ring_invalidate_tlbs(struct amdgpu_ring *ring, uint16_t pasid, uint32_t flush_type, bool all_hub, uint8_t dst_sel); +static void gfx_v11_0_set_safe_mode(struct amdgpu_device *adev); +static void gfx_v11_0_unset_safe_mode(struct amdgpu_device *adev); +static void gfx_v11_0_update_perf_clk(struct amdgpu_device *adev, + bool enable); static void gfx11_kiq_set_resources(struct amdgpu_ring *kiq_ring, uint64_t queue_mask) { @@ -1134,6 +1141,7 @@ static const struct amdgpu_gfx_funcs gfx_v11_0_gfx_funcs = { .read_wave_vgprs = &gfx_v11_0_read_wave_vgprs, .select_me_pipe_q = &gfx_v11_0_select_me_pipe_q, .init_spm_golden = &gfx_v11_0_init_spm_golden_registers, + .update_perfmon_mgcg = &gfx_v11_0_update_perf_clk, }; static int gfx_v11_0_gpu_early_init(struct amdgpu_device *adev) @@ -2763,7 +2771,13 @@ static int gfx_v11_0_wait_for_rlc_autoload_complete(struct amdgpu_device *adev) for (i = 0; i < adev->usec_timeout; i++) { cp_status = RREG32_SOC15(GC, 0, regCP_STAT); - bootload_status = RREG32_SOC15(GC, 0, regRLC_RLCS_BOOTLOAD_STATUS); + + if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(11, 0, 1)) + bootload_status = RREG32_SOC15(GC, 0, + regRLC_RLCS_BOOTLOAD_STATUS_gc_11_0_1); + else + bootload_status = RREG32_SOC15(GC, 0, regRLC_RLCS_BOOTLOAD_STATUS); + if ((cp_status == 0) && (REG_GET_FIELD(bootload_status, RLC_RLCS_BOOTLOAD_STATUS, BOOTLOAD_COMPLETE) == 1)) { @@ -4563,6 +4577,9 @@ static int gfx_v11_0_hw_init(void *handle) if (adev->gfx.imu.funcs->start_imu) adev->gfx.imu.funcs->start_imu(adev); } + + /* disable gpa mode in backdoor loading */ + gfx_v11_0_disable_gpa_mode(adev); } } @@ -4740,65 +4757,143 @@ static int gfx_v11_0_soft_reset(void *handle) { u32 grbm_soft_reset = 0; u32 tmp; + int i, j, k; struct amdgpu_device *adev = (struct amdgpu_device *)handle; - /* GRBM_STATUS */ - tmp = RREG32_SOC15(GC, 0, regGRBM_STATUS); - if (tmp & (GRBM_STATUS__PA_BUSY_MASK | GRBM_STATUS__SC_BUSY_MASK | - GRBM_STATUS__BCI_BUSY_MASK | GRBM_STATUS__SX_BUSY_MASK | - GRBM_STATUS__TA_BUSY_MASK | GRBM_STATUS__DB_BUSY_MASK | - GRBM_STATUS__CB_BUSY_MASK | GRBM_STATUS__GDS_BUSY_MASK | - GRBM_STATUS__SPI_BUSY_MASK | GRBM_STATUS__GE_BUSY_NO_DMA_MASK)) { - grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, - GRBM_SOFT_RESET, SOFT_RESET_CP, - 1); - grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, - GRBM_SOFT_RESET, SOFT_RESET_GFX, - 1); - } - - if (tmp & (GRBM_STATUS__CP_BUSY_MASK | GRBM_STATUS__CP_COHERENCY_BUSY_MASK)) { - grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, - GRBM_SOFT_RESET, SOFT_RESET_CP, - 1); - } - - /* GRBM_STATUS2 */ - tmp = RREG32_SOC15(GC, 0, regGRBM_STATUS2); - if (REG_GET_FIELD(tmp, GRBM_STATUS2, RLC_BUSY)) - grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, - GRBM_SOFT_RESET, - SOFT_RESET_RLC, - 1); - - if (grbm_soft_reset) { - /* stop the rlc */ - gfx_v11_0_rlc_stop(adev); - - /* Disable GFX parsing/prefetching */ - gfx_v11_0_cp_gfx_enable(adev, false); - - /* Disable MEC parsing/prefetching */ - gfx_v11_0_cp_compute_enable(adev, false); + tmp = RREG32_SOC15(GC, 0, regCP_INT_CNTL); + tmp = REG_SET_FIELD(tmp, CP_INT_CNTL, CMP_BUSY_INT_ENABLE, 0); + tmp = REG_SET_FIELD(tmp, CP_INT_CNTL, CNTX_BUSY_INT_ENABLE, 0); + tmp = REG_SET_FIELD(tmp, CP_INT_CNTL, CNTX_EMPTY_INT_ENABLE, 0); + tmp = REG_SET_FIELD(tmp, CP_INT_CNTL, GFX_IDLE_INT_ENABLE, 0); + WREG32_SOC15(GC, 0, regCP_INT_CNTL, tmp); - if (grbm_soft_reset) { - tmp = RREG32_SOC15(GC, 0, regGRBM_SOFT_RESET); - tmp |= grbm_soft_reset; - dev_info(adev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp); - WREG32_SOC15(GC, 0, regGRBM_SOFT_RESET, tmp); - tmp = RREG32_SOC15(GC, 0, regGRBM_SOFT_RESET); + gfx_v11_0_set_safe_mode(adev); - udelay(50); + for (i = 0; i < adev->gfx.mec.num_mec; ++i) { + for (j = 0; j < adev->gfx.mec.num_queue_per_pipe; j++) { + for (k = 0; k < adev->gfx.mec.num_pipe_per_mec; k++) { + tmp = RREG32_SOC15(GC, 0, regGRBM_GFX_CNTL); + tmp = REG_SET_FIELD(tmp, GRBM_GFX_CNTL, MEID, i); + tmp = REG_SET_FIELD(tmp, GRBM_GFX_CNTL, QUEUEID, j); + tmp = REG_SET_FIELD(tmp, GRBM_GFX_CNTL, PIPEID, k); + WREG32_SOC15(GC, 0, regGRBM_GFX_CNTL, tmp); + + WREG32_SOC15(GC, 0, regCP_HQD_DEQUEUE_REQUEST, 0x2); + WREG32_SOC15(GC, 0, regSPI_COMPUTE_QUEUE_RESET, 0x1); + } + } + } + for (i = 0; i < adev->gfx.me.num_me; ++i) { + for (j = 0; j < adev->gfx.me.num_queue_per_pipe; j++) { + for (k = 0; k < adev->gfx.me.num_pipe_per_me; k++) { + tmp = RREG32_SOC15(GC, 0, regGRBM_GFX_CNTL); + tmp = REG_SET_FIELD(tmp, GRBM_GFX_CNTL, MEID, i); + tmp = REG_SET_FIELD(tmp, GRBM_GFX_CNTL, QUEUEID, j); + tmp = REG_SET_FIELD(tmp, GRBM_GFX_CNTL, PIPEID, k); + WREG32_SOC15(GC, 0, regGRBM_GFX_CNTL, tmp); - tmp &= ~grbm_soft_reset; - WREG32_SOC15(GC, 0, regGRBM_SOFT_RESET, tmp); - tmp = RREG32_SOC15(GC, 0, regGRBM_SOFT_RESET); + WREG32_SOC15(GC, 0, regCP_GFX_HQD_DEQUEUE_REQUEST, 0x1); + } } + } + + WREG32_SOC15(GC, 0, regCP_VMID_RESET, 0xfffffffe); + + // Read CP_VMID_RESET register three times. + // to get sufficient time for GFX_HQD_ACTIVE reach 0 + RREG32_SOC15(GC, 0, regCP_VMID_RESET); + RREG32_SOC15(GC, 0, regCP_VMID_RESET); + RREG32_SOC15(GC, 0, regCP_VMID_RESET); - /* Wait a little for things to settle down */ - udelay(50); + for (i = 0; i < adev->usec_timeout; i++) { + if (!RREG32_SOC15(GC, 0, regCP_HQD_ACTIVE) && + !RREG32_SOC15(GC, 0, regCP_GFX_HQD_ACTIVE)) + break; + udelay(1); } - return 0; + if (i >= adev->usec_timeout) { + printk("Failed to wait all pipes clean\n"); + return -EINVAL; + } + + /********** trigger soft reset ***********/ + grbm_soft_reset = RREG32_SOC15(GC, 0, regGRBM_SOFT_RESET); + grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, + SOFT_RESET_CP, 1); + grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, + SOFT_RESET_GFX, 1); + grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, + SOFT_RESET_CPF, 1); + grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, + SOFT_RESET_CPC, 1); + grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, + SOFT_RESET_CPG, 1); + WREG32_SOC15(GC, 0, regGRBM_SOFT_RESET, grbm_soft_reset); + /********** exit soft reset ***********/ + grbm_soft_reset = RREG32_SOC15(GC, 0, regGRBM_SOFT_RESET); + grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, + SOFT_RESET_CP, 0); + grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, + SOFT_RESET_GFX, 0); + grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, + SOFT_RESET_CPF, 0); + grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, + SOFT_RESET_CPC, 0); + grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, + SOFT_RESET_CPG, 0); + WREG32_SOC15(GC, 0, regGRBM_SOFT_RESET, grbm_soft_reset); + + tmp = RREG32_SOC15(GC, 0, regCP_SOFT_RESET_CNTL); + tmp = REG_SET_FIELD(tmp, CP_SOFT_RESET_CNTL, CMP_HQD_REG_RESET, 0x1); + WREG32_SOC15(GC, 0, regCP_SOFT_RESET_CNTL, tmp); + + WREG32_SOC15(GC, 0, regCP_ME_CNTL, 0x0); + WREG32_SOC15(GC, 0, regCP_MEC_RS64_CNTL, 0x0); + + for (i = 0; i < adev->usec_timeout; i++) { + if (!RREG32_SOC15(GC, 0, regCP_VMID_RESET)) + break; + udelay(1); + } + if (i >= adev->usec_timeout) { + printk("Failed to wait CP_VMID_RESET to 0\n"); + return -EINVAL; + } + + tmp = RREG32_SOC15(GC, 0, regCP_INT_CNTL); + tmp = REG_SET_FIELD(tmp, CP_INT_CNTL, CMP_BUSY_INT_ENABLE, 1); + tmp = REG_SET_FIELD(tmp, CP_INT_CNTL, CNTX_BUSY_INT_ENABLE, 1); + tmp = REG_SET_FIELD(tmp, CP_INT_CNTL, CNTX_EMPTY_INT_ENABLE, 1); + tmp = REG_SET_FIELD(tmp, CP_INT_CNTL, GFX_IDLE_INT_ENABLE, 1); + WREG32_SOC15(GC, 0, regCP_INT_CNTL, tmp); + + gfx_v11_0_unset_safe_mode(adev); + + return gfx_v11_0_cp_resume(adev); +} + +static bool gfx_v11_0_check_soft_reset(void *handle) +{ + int i, r; + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_ring *ring; + long tmo = msecs_to_jiffies(1000); + + for (i = 0; i < adev->gfx.num_gfx_rings; i++) { + ring = &adev->gfx.gfx_ring[i]; + r = amdgpu_ring_test_ib(ring, tmo); + if (r) + return true; + } + + for (i = 0; i < adev->gfx.num_compute_rings; i++) { + ring = &adev->gfx.compute_ring[i]; + r = amdgpu_ring_test_ib(ring, tmo); + if (r) + return true; + } + + return false; } static uint64_t gfx_v11_0_get_gpu_clock_counter(struct amdgpu_device *adev) @@ -5090,9 +5185,12 @@ static void gfx_v11_0_update_coarse_grain_clock_gating(struct amdgpu_device *ade data = REG_SET_FIELD(data, SDMA0_RLC_CGCG_CTRL, CGCG_INT_ENABLE, 1); WREG32_SOC15(GC, 0, regSDMA0_RLC_CGCG_CTRL, data); - data = RREG32_SOC15(GC, 0, regSDMA1_RLC_CGCG_CTRL); - data = REG_SET_FIELD(data, SDMA1_RLC_CGCG_CTRL, CGCG_INT_ENABLE, 1); - WREG32_SOC15(GC, 0, regSDMA1_RLC_CGCG_CTRL, data); + /* Some ASICs only have one SDMA instance, not need to configure SDMA1 */ + if (adev->sdma.num_instances > 1) { + data = RREG32_SOC15(GC, 0, regSDMA1_RLC_CGCG_CTRL); + data = REG_SET_FIELD(data, SDMA1_RLC_CGCG_CTRL, CGCG_INT_ENABLE, 1); + WREG32_SOC15(GC, 0, regSDMA1_RLC_CGCG_CTRL, data); + } } else { /* Program RLC_CGCG_CGLS_CTRL */ def = data = RREG32_SOC15(GC, 0, regRLC_CGCG_CGLS_CTRL); @@ -5121,9 +5219,12 @@ static void gfx_v11_0_update_coarse_grain_clock_gating(struct amdgpu_device *ade data &= ~SDMA0_RLC_CGCG_CTRL__CGCG_INT_ENABLE_MASK; WREG32_SOC15(GC, 0, regSDMA0_RLC_CGCG_CTRL, data); - data = RREG32_SOC15(GC, 0, regSDMA1_RLC_CGCG_CTRL); - data &= ~SDMA1_RLC_CGCG_CTRL__CGCG_INT_ENABLE_MASK; - WREG32_SOC15(GC, 0, regSDMA1_RLC_CGCG_CTRL, data); + /* Some ASICs only have one SDMA instance, not need to configure SDMA1 */ + if (adev->sdma.num_instances > 1) { + data = RREG32_SOC15(GC, 0, regSDMA1_RLC_CGCG_CTRL); + data &= ~SDMA1_RLC_CGCG_CTRL__CGCG_INT_ENABLE_MASK; + WREG32_SOC15(GC, 0, regSDMA1_RLC_CGCG_CTRL, data); + } } } @@ -5188,6 +5289,38 @@ static const struct amdgpu_rlc_funcs gfx_v11_0_rlc_funcs = { .update_spm_vmid = gfx_v11_0_update_spm_vmid, }; +static void gfx_v11_cntl_power_gating(struct amdgpu_device *adev, bool enable) +{ + u32 data = RREG32_SOC15(GC, 0, regRLC_PG_CNTL); + + if (enable && (adev->pg_flags & AMD_PG_SUPPORT_GFX_PG)) + data |= RLC_PG_CNTL__GFX_POWER_GATING_ENABLE_MASK; + else + data &= ~RLC_PG_CNTL__GFX_POWER_GATING_ENABLE_MASK; + + WREG32_SOC15(GC, 0, regRLC_PG_CNTL, data); + + // Program RLC_PG_DELAY3 for CGPG hysteresis + if (enable && (adev->pg_flags & AMD_PG_SUPPORT_GFX_PG)) { + switch (adev->ip_versions[GC_HWIP][0]) { + case IP_VERSION(11, 0, 1): + WREG32_SOC15(GC, 0, regRLC_PG_DELAY_3, RLC_PG_DELAY_3_DEFAULT_GC_11_0_1); + break; + default: + break; + } + } +} + +static void gfx_v11_cntl_pg(struct amdgpu_device *adev, bool enable) +{ + amdgpu_gfx_rlc_enter_safe_mode(adev); + + gfx_v11_cntl_power_gating(adev, enable); + + amdgpu_gfx_rlc_exit_safe_mode(adev); +} + static int gfx_v11_0_set_powergating_state(void *handle, enum amd_powergating_state state) { @@ -5202,6 +5335,10 @@ static int gfx_v11_0_set_powergating_state(void *handle, case IP_VERSION(11, 0, 2): amdgpu_gfx_off_ctrl(adev, enable); break; + case IP_VERSION(11, 0, 1): + gfx_v11_cntl_pg(adev, enable); + amdgpu_gfx_off_ctrl(adev, enable); + break; default: break; } @@ -5219,6 +5356,7 @@ static int gfx_v11_0_set_clockgating_state(void *handle, switch (adev->ip_versions[GC_HWIP][0]) { case IP_VERSION(11, 0, 0): + case IP_VERSION(11, 0, 1): case IP_VERSION(11, 0, 2): gfx_v11_0_update_gfx_clock_gating(adev, state == AMD_CG_STATE_GATE); @@ -5296,14 +5434,45 @@ static u64 gfx_v11_0_ring_get_wptr_gfx(struct amdgpu_ring *ring) static void gfx_v11_0_ring_set_wptr_gfx(struct amdgpu_ring *ring) { struct amdgpu_device *adev = ring->adev; + uint32_t *wptr_saved; + uint32_t *is_queue_unmap; + uint64_t aggregated_db_index; + uint32_t mqd_size = adev->mqds[AMDGPU_HW_IP_GFX].mqd_size; + uint64_t wptr_tmp; - if (ring->use_doorbell) { - /* XXX check if swapping is necessary on BE */ - atomic64_set((atomic64_t *)ring->wptr_cpu_addr, ring->wptr); - WDOORBELL64(ring->doorbell_index, ring->wptr); + if (ring->is_mes_queue) { + wptr_saved = (uint32_t *)(ring->mqd_ptr + mqd_size); + is_queue_unmap = (uint32_t *)(ring->mqd_ptr + mqd_size + + sizeof(uint32_t)); + aggregated_db_index = + amdgpu_mes_get_aggregated_doorbell_index(adev, + ring->hw_prio); + + wptr_tmp = ring->wptr & ring->buf_mask; + atomic64_set((atomic64_t *)ring->wptr_cpu_addr, wptr_tmp); + *wptr_saved = wptr_tmp; + /* assume doorbell always being used by mes mapped queue */ + if (*is_queue_unmap) { + WDOORBELL64(aggregated_db_index, wptr_tmp); + WDOORBELL64(ring->doorbell_index, wptr_tmp); + } else { + WDOORBELL64(ring->doorbell_index, wptr_tmp); + + if (*is_queue_unmap) + WDOORBELL64(aggregated_db_index, wptr_tmp); + } } else { - WREG32_SOC15(GC, 0, regCP_RB0_WPTR, lower_32_bits(ring->wptr)); - WREG32_SOC15(GC, 0, regCP_RB0_WPTR_HI, upper_32_bits(ring->wptr)); + if (ring->use_doorbell) { + /* XXX check if swapping is necessary on BE */ + atomic64_set((atomic64_t *)ring->wptr_cpu_addr, + ring->wptr); + WDOORBELL64(ring->doorbell_index, ring->wptr); + } else { + WREG32_SOC15(GC, 0, regCP_RB0_WPTR, + lower_32_bits(ring->wptr)); + WREG32_SOC15(GC, 0, regCP_RB0_WPTR_HI, + upper_32_bits(ring->wptr)); + } } } @@ -5328,13 +5497,42 @@ static u64 gfx_v11_0_ring_get_wptr_compute(struct amdgpu_ring *ring) static void gfx_v11_0_ring_set_wptr_compute(struct amdgpu_ring *ring) { struct amdgpu_device *adev = ring->adev; + uint32_t *wptr_saved; + uint32_t *is_queue_unmap; + uint64_t aggregated_db_index; + uint32_t mqd_size = adev->mqds[AMDGPU_HW_IP_COMPUTE].mqd_size; + uint64_t wptr_tmp; - /* XXX check if swapping is necessary on BE */ - if (ring->use_doorbell) { - atomic64_set((atomic64_t *)ring->wptr_cpu_addr, ring->wptr); - WDOORBELL64(ring->doorbell_index, ring->wptr); + if (ring->is_mes_queue) { + wptr_saved = (uint32_t *)(ring->mqd_ptr + mqd_size); + is_queue_unmap = (uint32_t *)(ring->mqd_ptr + mqd_size + + sizeof(uint32_t)); + aggregated_db_index = + amdgpu_mes_get_aggregated_doorbell_index(adev, + ring->hw_prio); + + wptr_tmp = ring->wptr & ring->buf_mask; + atomic64_set((atomic64_t *)ring->wptr_cpu_addr, wptr_tmp); + *wptr_saved = wptr_tmp; + /* assume doorbell always used by mes mapped queue */ + if (*is_queue_unmap) { + WDOORBELL64(aggregated_db_index, wptr_tmp); + WDOORBELL64(ring->doorbell_index, wptr_tmp); + } else { + WDOORBELL64(ring->doorbell_index, wptr_tmp); + + if (*is_queue_unmap) + WDOORBELL64(aggregated_db_index, wptr_tmp); + } } else { - BUG(); /* only DOORBELL method supported on gfx11 now */ + /* XXX check if swapping is necessary on BE */ + if (ring->use_doorbell) { + atomic64_set((atomic64_t *)ring->wptr_cpu_addr, + ring->wptr); + WDOORBELL64(ring->doorbell_index, ring->wptr); + } else { + BUG(); /* only DOORBELL method supported on gfx11 now */ + } } } @@ -6131,6 +6329,7 @@ static const struct amd_ip_funcs gfx_v11_0_ip_funcs = { .is_idle = gfx_v11_0_is_idle, .wait_for_idle = gfx_v11_0_wait_for_idle, .soft_reset = gfx_v11_0_soft_reset, + .check_soft_reset = gfx_v11_0_check_soft_reset, .set_clockgating_state = gfx_v11_0_set_clockgating_state, .set_powergating_state = gfx_v11_0_set_powergating_state, .get_clockgating_state = gfx_v11_0_get_clockgating_state, @@ -6293,6 +6492,11 @@ static void gfx_v11_0_set_irq_funcs(struct amdgpu_device *adev) static void gfx_v11_0_set_imu_funcs(struct amdgpu_device *adev) { + if (adev->flags & AMD_IS_APU) + adev->gfx.imu.mode = MISSION_MODE; + else + adev->gfx.imu.mode = DEBUG_MODE; + adev->gfx.imu.funcs = &gfx_v11_0_imu_funcs; } diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c index 5349ca4d19e3..fc9c1043244c 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c @@ -987,23 +987,23 @@ static void gfx_v9_0_wait_reg_mem(struct amdgpu_ring *ring, int eng_sel, static int gfx_v9_0_ring_test_ring(struct amdgpu_ring *ring) { struct amdgpu_device *adev = ring->adev; + uint32_t scratch = SOC15_REG_OFFSET(GC, 0, mmSCRATCH_REG0); uint32_t tmp = 0; unsigned i; int r; - WREG32_SOC15(GC, 0, mmSCRATCH_REG0, 0xCAFEDEAD); + WREG32(scratch, 0xCAFEDEAD); r = amdgpu_ring_alloc(ring, 3); if (r) return r; amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1)); - amdgpu_ring_write(ring, SOC15_REG_OFFSET(GC, 0, mmSCRATCH_REG0) - - PACKET3_SET_UCONFIG_REG_START); + amdgpu_ring_write(ring, scratch - PACKET3_SET_UCONFIG_REG_START); amdgpu_ring_write(ring, 0xDEADBEEF); amdgpu_ring_commit(ring); for (i = 0; i < adev->usec_timeout; i++) { - tmp = RREG32_SOC15(GC, 0, mmSCRATCH_REG0); + tmp = RREG32(scratch); if (tmp == 0xDEADBEEF) break; udelay(1); @@ -2587,7 +2587,8 @@ static void gfx_v9_0_constants_init(struct amdgpu_device *adev) gfx_v9_0_tiling_mode_table_init(adev); - gfx_v9_0_setup_rb(adev); + if (adev->gfx.num_gfx_rings) + gfx_v9_0_setup_rb(adev); gfx_v9_0_get_cu_info(adev, &adev->gfx.cu_info); adev->gfx.config.db_debug2 = RREG32_SOC15(GC, 0, mmDB_DEBUG2); diff --git a/drivers/gpu/drm/amd/amdgpu/gfxhub_v3_0.c b/drivers/gpu/drm/amd/amdgpu/gfxhub_v3_0.c index 5eccaa2c7ca0..0e13370c2057 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfxhub_v3_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfxhub_v3_0.c @@ -26,13 +26,10 @@ #include "gc/gc_11_0_0_offset.h" #include "gc/gc_11_0_0_sh_mask.h" +#include "gc/gc_11_0_0_default.h" #include "navi10_enum.h" #include "soc15_common.h" -#define regGCVM_L2_CNTL3_DEFAULT 0x80100007 -#define regGCVM_L2_CNTL4_DEFAULT 0x000000c1 -#define regGCVM_L2_CNTL5_DEFAULT 0x00003fe0 - static const char *gfxhub_client_ids[] = { "CB/DB", "Reserved", @@ -414,12 +411,39 @@ static void gfxhub_v3_0_set_fault_enable_default(struct amdgpu_device *adev, { u32 tmp; + /* NO halt CP when page fault */ + tmp = RREG32_SOC15(GC, 0, regCP_DEBUG); + tmp = REG_SET_FIELD(tmp, CP_DEBUG, CPG_UTCL1_ERROR_HALT_DISABLE, 1); + WREG32_SOC15(GC, 0, regCP_DEBUG, tmp); + + /** + * Set GRBM_GFX_INDEX in broad cast mode + * before programming GL1C_UTCL0_CNTL1 and SQG_CONFIG + */ + WREG32_SOC15(GC, 0, regGRBM_GFX_INDEX, regGRBM_GFX_INDEX_DEFAULT); + + /** + * Retry respond mode: RETRY + * Error (no retry) respond mode: SUCCESS + */ + tmp = RREG32_SOC15(GC, 0, regGL1C_UTCL0_CNTL1); + tmp = REG_SET_FIELD(tmp, GL1C_UTCL0_CNTL1, RESP_MODE, 0); + tmp = REG_SET_FIELD(tmp, GL1C_UTCL0_CNTL1, RESP_FAULT_MODE, 0x2); + WREG32_SOC15(GC, 0, regGL1C_UTCL0_CNTL1, tmp); + /* These registers are not accessible to VF-SRIOV. * The PF will program them instead. */ if (amdgpu_sriov_vf(adev)) return; + /* Disable SQ XNACK interrupt for all VMIDs */ + tmp = RREG32_SOC15(GC, 0, regSQG_CONFIG); + tmp = REG_SET_FIELD(tmp, SQG_CONFIG, XNACK_INTR_MASK, + SQG_CONFIG__XNACK_INTR_MASK_MASK >> + SQG_CONFIG__XNACK_INTR_MASK__SHIFT); + WREG32_SOC15(GC, 0, regSQG_CONFIG, tmp); + tmp = RREG32_SOC15(GC, 0, regGCVM_L2_PROTECTION_FAULT_CNTL); tmp = REG_SET_FIELD(tmp, GCVM_L2_PROTECTION_FAULT_CNTL, RANGE_PROTECTION_FAULT_ENABLE_DEFAULT, value); diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c index 9077dfccaf3c..f513e2c2e964 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c @@ -22,6 +22,9 @@ */ #include <linux/firmware.h> #include <linux/pci.h> + +#include <drm/drm_cache.h> + #include "amdgpu.h" #include "amdgpu_atomfirmware.h" #include "gmc_v10_0.h" @@ -416,6 +419,7 @@ static int gmc_v10_0_flush_gpu_tlb_pasid(struct amdgpu_device *adev, uint32_t seq; uint16_t queried_pasid; bool ret; + u32 usec_timeout = amdgpu_sriov_vf(adev) ? SRIOV_USEC_TIMEOUT : adev->usec_timeout; struct amdgpu_ring *ring = &adev->gfx.kiq.ring; struct amdgpu_kiq *kiq = &adev->gfx.kiq; @@ -434,7 +438,7 @@ static int gmc_v10_0_flush_gpu_tlb_pasid(struct amdgpu_device *adev, amdgpu_ring_commit(ring); spin_unlock(&adev->gfx.kiq.ring_lock); - r = amdgpu_fence_wait_polling(ring, seq, adev->usec_timeout); + r = amdgpu_fence_wait_polling(ring, seq, usec_timeout); if (r < 1) { dev_err(adev->dev, "wait for kiq fence error: %ld.\n", r); return -ETIME; @@ -456,7 +460,8 @@ static int gmc_v10_0_flush_gpu_tlb_pasid(struct amdgpu_device *adev, gmc_v10_0_flush_gpu_tlb(adev, vmid, AMDGPU_GFXHUB_0, flush_type); } - break; + if (!adev->enable_mes) + break; } } @@ -833,10 +838,21 @@ static int gmc_v10_0_mc_init(struct amdgpu_device *adev) adev->gmc.visible_vram_size = adev->gmc.real_vram_size; /* set the gart size */ - if (amdgpu_gart_size == -1) - adev->gmc.gart_size = 512ULL << 20; - else + if (amdgpu_gart_size == -1) { + switch (adev->ip_versions[GC_HWIP][0]) { + default: + adev->gmc.gart_size = 512ULL << 20; + break; + case IP_VERSION(10, 3, 1): /* DCE SG support */ + case IP_VERSION(10, 3, 3): /* DCE SG support */ + case IP_VERSION(10, 3, 6): /* DCE SG support */ + case IP_VERSION(10, 3, 7): /* DCE SG support */ + adev->gmc.gart_size = 1024ULL << 20; + break; + } + } else { adev->gmc.gart_size = (u64)amdgpu_gart_size << 20; + } gmc_v10_0_vram_gtt_location(adev, &adev->gmc); @@ -968,6 +984,8 @@ static int gmc_v10_0_sw_init(void *handle) return r; } + adev->need_swiotlb = drm_need_swiotlb(44); + r = gmc_v10_0_mc_init(adev); if (r) return r; diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c index 7f4b480ae66e..1471bfb9ae38 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c @@ -22,10 +22,13 @@ */ #include <linux/firmware.h> #include <linux/pci.h> + +#include <drm/drm_cache.h> + #include "amdgpu.h" #include "amdgpu_atomfirmware.h" #include "gmc_v11_0.h" -#include "umc_v8_7.h" +#include "umc_v8_10.h" #include "athub/athub_3_0_0_sh_mask.h" #include "athub/athub_3_0_0_offset.h" #include "oss/osssys_6_0_0_offset.h" @@ -37,6 +40,7 @@ #include "nbio_v4_3.h" #include "gfxhub_v3_0.h" #include "mmhub_v3_0.h" +#include "mmhub_v3_0_1.h" #include "mmhub_v3_0_2.h" #include "athub_v3_0.h" @@ -267,7 +271,7 @@ static void gmc_v11_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid, /* For SRIOV run time, driver shouldn't access the register through MMIO * Directly use kiq to do the vm invalidation instead */ - if (adev->gfx.kiq.ring.sched.ready && !adev->enable_mes && + if ((adev->gfx.kiq.ring.sched.ready || adev->mes.ring.sched.ready) && (amdgpu_sriov_runtime(adev) || !amdgpu_sriov_vf(adev))) { struct amdgpu_vmhub *hub = &adev->vmhub[vmhub]; const unsigned eng = 17; @@ -343,7 +347,6 @@ static int gmc_v11_0_flush_gpu_tlb_pasid(struct amdgpu_device *adev, gmc_v11_0_flush_gpu_tlb(adev, vmid, AMDGPU_GFXHUB_0, flush_type); } - break; } } @@ -537,17 +540,45 @@ static void gmc_v11_0_set_umc_funcs(struct amdgpu_device *adev) { switch (adev->ip_versions[UMC_HWIP][0]) { case IP_VERSION(8, 10, 0): + adev->umc.channel_inst_num = UMC_V8_10_CHANNEL_INSTANCE_NUM; + adev->umc.umc_inst_num = UMC_V8_10_UMC_INSTANCE_NUM; + adev->umc.node_inst_num = adev->gmc.num_umc; + adev->umc.max_ras_err_cnt_per_query = UMC_V8_10_TOTAL_CHANNEL_NUM(adev); + adev->umc.channel_offs = UMC_V8_10_PER_CHANNEL_OFFSET; + adev->umc.channel_idx_tbl = &umc_v8_10_channel_idx_tbl[0][0][0]; + adev->umc.ras = &umc_v8_10_ras; + break; case IP_VERSION(8, 11, 0): break; default: break; } + + if (adev->umc.ras) { + amdgpu_ras_register_ras_block(adev, &adev->umc.ras->ras_block); + + strcpy(adev->umc.ras->ras_block.ras_comm.name, "umc"); + adev->umc.ras->ras_block.ras_comm.block = AMDGPU_RAS_BLOCK__UMC; + adev->umc.ras->ras_block.ras_comm.type = AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE; + adev->umc.ras_if = &adev->umc.ras->ras_block.ras_comm; + + /* If don't define special ras_late_init function, use default ras_late_init */ + if (!adev->umc.ras->ras_block.ras_late_init) + adev->umc.ras->ras_block.ras_late_init = amdgpu_umc_ras_late_init; + + /* If not define special ras_cb function, use default ras_cb */ + if (!adev->umc.ras->ras_block.ras_cb) + adev->umc.ras->ras_block.ras_cb = amdgpu_umc_process_ras_data_cb; + } } static void gmc_v11_0_set_mmhub_funcs(struct amdgpu_device *adev) { switch (adev->ip_versions[MMHUB_HWIP][0]) { + case IP_VERSION(3, 0, 1): + adev->mmhub.funcs = &mmhub_v3_0_1_funcs; + break; case IP_VERSION(3, 0, 2): adev->mmhub.funcs = &mmhub_v3_0_2_funcs; break; @@ -747,6 +778,8 @@ static int gmc_v11_0_sw_init(void *handle) return r; } + adev->need_swiotlb = drm_need_swiotlb(44); + r = gmc_v11_0_mc_init(adev); if (r) return r; diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c index 22761a3bb818..4603653916f5 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c @@ -896,6 +896,7 @@ static int gmc_v9_0_flush_gpu_tlb_pasid(struct amdgpu_device *adev, uint32_t seq; uint16_t queried_pasid; bool ret; + u32 usec_timeout = amdgpu_sriov_vf(adev) ? SRIOV_USEC_TIMEOUT : adev->usec_timeout; struct amdgpu_ring *ring = &adev->gfx.kiq.ring; struct amdgpu_kiq *kiq = &adev->gfx.kiq; @@ -935,7 +936,7 @@ static int gmc_v9_0_flush_gpu_tlb_pasid(struct amdgpu_device *adev, amdgpu_ring_commit(ring); spin_unlock(&adev->gfx.kiq.ring_lock); - r = amdgpu_fence_wait_polling(ring, seq, adev->usec_timeout); + r = amdgpu_fence_wait_polling(ring, seq, usec_timeout); if (r < 1) { dev_err(adev->dev, "wait for kiq fence error: %ld.\n", r); up_read(&adev->reset_domain->sem); @@ -1624,12 +1625,15 @@ static int gmc_v9_0_sw_init(void *handle) amdgpu_vm_adjust_size(adev, 256 * 1024, 9, 3, 47); else amdgpu_vm_adjust_size(adev, 256 * 1024, 9, 3, 48); + if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 2)) + adev->gmc.translate_further = adev->vm_manager.num_level > 1; break; case IP_VERSION(9, 4, 1): adev->num_vmhubs = 3; /* Keep the vm size same with Vega20 */ amdgpu_vm_adjust_size(adev, 256 * 1024, 9, 3, 48); + adev->gmc.translate_further = adev->vm_manager.num_level > 1; break; default: break; diff --git a/drivers/gpu/drm/amd/amdgpu/hdp_v5_2.c b/drivers/gpu/drm/amd/amdgpu/hdp_v5_2.c index 39a696cd45b5..29c3484ae1f1 100644 --- a/drivers/gpu/drm/amd/amdgpu/hdp_v5_2.c +++ b/drivers/gpu/drm/amd/amdgpu/hdp_v5_2.c @@ -40,6 +40,156 @@ static void hdp_v5_2_flush_hdp(struct amdgpu_device *adev, 0); } +static void hdp_v5_2_update_mem_power_gating(struct amdgpu_device *adev, + bool enable) +{ + uint32_t hdp_clk_cntl; + uint32_t hdp_mem_pwr_cntl; + + if (!(adev->cg_flags & (AMD_CG_SUPPORT_HDP_LS | + AMD_CG_SUPPORT_HDP_DS | + AMD_CG_SUPPORT_HDP_SD))) + return; + + hdp_clk_cntl = RREG32_SOC15(HDP, 0, regHDP_CLK_CNTL); + hdp_mem_pwr_cntl = RREG32_SOC15(HDP, 0, regHDP_MEM_POWER_CTRL); + + /* Before doing clock/power mode switch, forced on MEM clock */ + hdp_clk_cntl = REG_SET_FIELD(hdp_clk_cntl, HDP_CLK_CNTL, + ATOMIC_MEM_CLK_SOFT_OVERRIDE, 1); + hdp_clk_cntl = REG_SET_FIELD(hdp_clk_cntl, HDP_CLK_CNTL, + RC_MEM_CLK_SOFT_OVERRIDE, 1); + WREG32_SOC15(HDP, 0, regHDP_CLK_CNTL, hdp_clk_cntl); + + /* disable clock and power gating before any changing */ + hdp_mem_pwr_cntl = REG_SET_FIELD(hdp_mem_pwr_cntl, HDP_MEM_POWER_CTRL, + ATOMIC_MEM_POWER_CTRL_EN, 0); + hdp_mem_pwr_cntl = REG_SET_FIELD(hdp_mem_pwr_cntl, HDP_MEM_POWER_CTRL, + ATOMIC_MEM_POWER_LS_EN, 0); + hdp_mem_pwr_cntl = REG_SET_FIELD(hdp_mem_pwr_cntl, HDP_MEM_POWER_CTRL, + ATOMIC_MEM_POWER_DS_EN, 0); + hdp_mem_pwr_cntl = REG_SET_FIELD(hdp_mem_pwr_cntl, HDP_MEM_POWER_CTRL, + ATOMIC_MEM_POWER_SD_EN, 0); + hdp_mem_pwr_cntl = REG_SET_FIELD(hdp_mem_pwr_cntl, HDP_MEM_POWER_CTRL, + RC_MEM_POWER_CTRL_EN, 0); + hdp_mem_pwr_cntl = REG_SET_FIELD(hdp_mem_pwr_cntl, HDP_MEM_POWER_CTRL, + RC_MEM_POWER_LS_EN, 0); + hdp_mem_pwr_cntl = REG_SET_FIELD(hdp_mem_pwr_cntl, HDP_MEM_POWER_CTRL, + RC_MEM_POWER_DS_EN, 0); + hdp_mem_pwr_cntl = REG_SET_FIELD(hdp_mem_pwr_cntl, HDP_MEM_POWER_CTRL, + RC_MEM_POWER_SD_EN, 0); + WREG32_SOC15(HDP, 0, regHDP_MEM_POWER_CTRL, hdp_mem_pwr_cntl); + + /* Already disabled above. The actions below are for "enabled" only */ + if (enable) { + /* only one clock gating mode (LS/DS/SD) can be enabled */ + if (adev->cg_flags & AMD_CG_SUPPORT_HDP_SD) { + hdp_mem_pwr_cntl = REG_SET_FIELD(hdp_mem_pwr_cntl, + HDP_MEM_POWER_CTRL, + ATOMIC_MEM_POWER_SD_EN, 1); + hdp_mem_pwr_cntl = REG_SET_FIELD(hdp_mem_pwr_cntl, + HDP_MEM_POWER_CTRL, + RC_MEM_POWER_SD_EN, 1); + } else if (adev->cg_flags & AMD_CG_SUPPORT_HDP_LS) { + hdp_mem_pwr_cntl = REG_SET_FIELD(hdp_mem_pwr_cntl, + HDP_MEM_POWER_CTRL, + ATOMIC_MEM_POWER_LS_EN, 1); + hdp_mem_pwr_cntl = REG_SET_FIELD(hdp_mem_pwr_cntl, + HDP_MEM_POWER_CTRL, + RC_MEM_POWER_LS_EN, 1); + } else if (adev->cg_flags & AMD_CG_SUPPORT_HDP_DS) { + hdp_mem_pwr_cntl = REG_SET_FIELD(hdp_mem_pwr_cntl, + HDP_MEM_POWER_CTRL, + ATOMIC_MEM_POWER_DS_EN, 1); + hdp_mem_pwr_cntl = REG_SET_FIELD(hdp_mem_pwr_cntl, + HDP_MEM_POWER_CTRL, + RC_MEM_POWER_DS_EN, 1); + } + + /* confirmed that ATOMIC/RC_MEM_POWER_CTRL_EN have to be set for SRAM LS/DS/SD */ + if (adev->cg_flags & (AMD_CG_SUPPORT_HDP_LS | AMD_CG_SUPPORT_HDP_DS | + AMD_CG_SUPPORT_HDP_SD)) { + hdp_mem_pwr_cntl = REG_SET_FIELD(hdp_mem_pwr_cntl, HDP_MEM_POWER_CTRL, + ATOMIC_MEM_POWER_CTRL_EN, 1); + hdp_mem_pwr_cntl = REG_SET_FIELD(hdp_mem_pwr_cntl, HDP_MEM_POWER_CTRL, + RC_MEM_POWER_CTRL_EN, 1); + WREG32_SOC15(HDP, 0, regHDP_MEM_POWER_CTRL, hdp_mem_pwr_cntl); + } + } + + /* disable MEM clock override after clock/power mode changing */ + hdp_clk_cntl = REG_SET_FIELD(hdp_clk_cntl, HDP_CLK_CNTL, + ATOMIC_MEM_CLK_SOFT_OVERRIDE, 0); + hdp_clk_cntl = REG_SET_FIELD(hdp_clk_cntl, HDP_CLK_CNTL, + RC_MEM_CLK_SOFT_OVERRIDE, 0); + WREG32_SOC15(HDP, 0, regHDP_CLK_CNTL, hdp_clk_cntl); +} + +static void hdp_v5_2_update_medium_grain_clock_gating(struct amdgpu_device *adev, + bool enable) +{ + uint32_t hdp_clk_cntl; + + if (!(adev->cg_flags & AMD_CG_SUPPORT_HDP_MGCG)) + return; + + hdp_clk_cntl = RREG32_SOC15(HDP, 0, regHDP_CLK_CNTL); + + if (enable) { + hdp_clk_cntl &= + ~(uint32_t) + (HDP_CLK_CNTL__ATOMIC_MEM_CLK_SOFT_OVERRIDE_MASK | + HDP_CLK_CNTL__RC_MEM_CLK_SOFT_OVERRIDE_MASK | + HDP_CLK_CNTL__DBUS_CLK_SOFT_OVERRIDE_MASK | + HDP_CLK_CNTL__DYN_CLK_SOFT_OVERRIDE_MASK | + HDP_CLK_CNTL__XDP_REG_CLK_SOFT_OVERRIDE_MASK | + HDP_CLK_CNTL__HDP_REG_CLK_SOFT_OVERRIDE_MASK); + } else { + hdp_clk_cntl |= HDP_CLK_CNTL__ATOMIC_MEM_CLK_SOFT_OVERRIDE_MASK | + HDP_CLK_CNTL__RC_MEM_CLK_SOFT_OVERRIDE_MASK | + HDP_CLK_CNTL__DBUS_CLK_SOFT_OVERRIDE_MASK | + HDP_CLK_CNTL__DYN_CLK_SOFT_OVERRIDE_MASK | + HDP_CLK_CNTL__XDP_REG_CLK_SOFT_OVERRIDE_MASK | + HDP_CLK_CNTL__HDP_REG_CLK_SOFT_OVERRIDE_MASK; + } + + WREG32_SOC15(HDP, 0, regHDP_CLK_CNTL, hdp_clk_cntl); +} + +static void hdp_v5_2_get_clockgating_state(struct amdgpu_device *adev, + u64 *flags) +{ + uint32_t tmp; + + /* AMD_CG_SUPPORT_HDP_MGCG */ + tmp = RREG32_SOC15(HDP, 0, regHDP_CLK_CNTL); + if (!(tmp & (HDP_CLK_CNTL__ATOMIC_MEM_CLK_SOFT_OVERRIDE_MASK | + HDP_CLK_CNTL__RC_MEM_CLK_SOFT_OVERRIDE_MASK | + HDP_CLK_CNTL__DBUS_CLK_SOFT_OVERRIDE_MASK | + HDP_CLK_CNTL__DYN_CLK_SOFT_OVERRIDE_MASK | + HDP_CLK_CNTL__XDP_REG_CLK_SOFT_OVERRIDE_MASK | + HDP_CLK_CNTL__HDP_REG_CLK_SOFT_OVERRIDE_MASK))) + *flags |= AMD_CG_SUPPORT_HDP_MGCG; + + /* AMD_CG_SUPPORT_HDP_LS/DS/SD */ + tmp = RREG32_SOC15(HDP, 0, regHDP_MEM_POWER_CTRL); + if (tmp & HDP_MEM_POWER_CTRL__ATOMIC_MEM_POWER_LS_EN_MASK) + *flags |= AMD_CG_SUPPORT_HDP_LS; + else if (tmp & HDP_MEM_POWER_CTRL__ATOMIC_MEM_POWER_DS_EN_MASK) + *flags |= AMD_CG_SUPPORT_HDP_DS; + else if (tmp & HDP_MEM_POWER_CTRL__ATOMIC_MEM_POWER_SD_EN_MASK) + *flags |= AMD_CG_SUPPORT_HDP_SD; +} + +static void hdp_v5_2_update_clock_gating(struct amdgpu_device *adev, + bool enable) +{ + hdp_v5_2_update_mem_power_gating(adev, enable); + hdp_v5_2_update_medium_grain_clock_gating(adev, enable); +} + const struct amdgpu_hdp_funcs hdp_v5_2_funcs = { .flush_hdp = hdp_v5_2_flush_hdp, + .update_clock_gating = hdp_v5_2_update_clock_gating, + .get_clock_gating_state = hdp_v5_2_get_clockgating_state, }; diff --git a/drivers/gpu/drm/amd/amdgpu/ih_v6_0.c b/drivers/gpu/drm/amd/amdgpu/ih_v6_0.c index 92dc60a9d209..085e613f3646 100644 --- a/drivers/gpu/drm/amd/amdgpu/ih_v6_0.c +++ b/drivers/gpu/drm/amd/amdgpu/ih_v6_0.c @@ -727,6 +727,7 @@ static const struct amd_ip_funcs ih_v6_0_ip_funcs = { static const struct amdgpu_ih_funcs ih_v6_0_funcs = { .get_wptr = ih_v6_0_get_wptr, .decode_iv = amdgpu_ih_decode_iv_helper, + .decode_iv_ts = amdgpu_ih_decode_iv_ts_helper, .set_rptr = ih_v6_0_set_rptr }; diff --git a/drivers/gpu/drm/amd/amdgpu/imu_v11_0.c b/drivers/gpu/drm/amd/amdgpu/imu_v11_0.c index d63d3f2b8a16..76383baa3929 100644 --- a/drivers/gpu/drm/amd/amdgpu/imu_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/imu_v11_0.c @@ -24,6 +24,7 @@ #include <linux/firmware.h> #include "amdgpu.h" #include "amdgpu_imu.h" +#include "amdgpu_dpm.h" #include "gc/gc_11_0_0_offset.h" #include "gc/gc_11_0_0_sh_mask.h" @@ -117,6 +118,25 @@ static int imu_v11_0_load_microcode(struct amdgpu_device *adev) return 0; } +static int imu_v11_0_wait_for_reset_status(struct amdgpu_device *adev) +{ + int i, imu_reg_val = 0; + + for (i = 0; i < adev->usec_timeout; i++) { + imu_reg_val = RREG32_SOC15(GC, 0, regGFX_IMU_GFX_RESET_CTRL); + if ((imu_reg_val & 0x1f) == 0x1f) + break; + udelay(1); + } + + if (i >= adev->usec_timeout) { + dev_err(adev->dev, "init imu: IMU start timeout\n"); + return -ETIMEDOUT; + } + + return 0; +} + static void imu_v11_0_setup(struct amdgpu_device *adev) { int imu_reg_val; @@ -125,9 +145,11 @@ static void imu_v11_0_setup(struct amdgpu_device *adev) WREG32_SOC15(GC, 0, regGFX_IMU_C2PMSG_ACCESS_CTRL0, 0xffffff); WREG32_SOC15(GC, 0, regGFX_IMU_C2PMSG_ACCESS_CTRL1, 0xffff); - imu_reg_val = RREG32_SOC15(GC, 0, regGFX_IMU_C2PMSG_16); - imu_reg_val |= 0x1; - WREG32_SOC15(GC, 0, regGFX_IMU_C2PMSG_16, imu_reg_val); + if (adev->gfx.imu.mode == DEBUG_MODE) { + imu_reg_val = RREG32_SOC15(GC, 0, regGFX_IMU_C2PMSG_16); + imu_reg_val |= 0x1; + WREG32_SOC15(GC, 0, regGFX_IMU_C2PMSG_16, imu_reg_val); + } //disble imu Rtavfs, SmsRepair, DfllBTC, and ClkB imu_reg_val = RREG32_SOC15(GC, 0, regGFX_IMU_SCRATCH_10); @@ -137,26 +159,17 @@ static void imu_v11_0_setup(struct amdgpu_device *adev) static int imu_v11_0_start(struct amdgpu_device *adev) { - int imu_reg_val, i; + int imu_reg_val; //Start IMU by set GFX_IMU_CORE_CTRL.CRESET = 0 imu_reg_val = RREG32_SOC15(GC, 0, regGFX_IMU_CORE_CTRL); imu_reg_val &= 0xfffffffe; WREG32_SOC15(GC, 0, regGFX_IMU_CORE_CTRL, imu_reg_val); - for (i = 0; i < adev->usec_timeout; i++) { - imu_reg_val = RREG32_SOC15(GC, 0, regGFX_IMU_GFX_RESET_CTRL); - if ((imu_reg_val & 0x1f) == 0x1f) - break; - udelay(1); - } + if (adev->flags & AMD_IS_APU) + amdgpu_dpm_set_gfx_power_up_by_imu(adev); - if (i >= adev->usec_timeout) { - dev_err(adev->dev, "init imu: IMU start timeout\n"); - return -ETIMEDOUT; - } - - return 0; + return imu_v11_0_wait_for_reset_status(adev); } static const struct imu_rlc_ram_golden imu_rlc_ram_golden_11[] = @@ -364,4 +377,5 @@ const struct amdgpu_imu_funcs gfx_v11_0_imu_funcs = { .setup_imu = imu_v11_0_setup, .start_imu = imu_v11_0_start, .program_rlc_ram = imu_v11_0_program_rlc_ram, + .wait_for_reset_status = imu_v11_0_wait_for_reset_status, }; diff --git a/drivers/gpu/drm/amd/amdgpu/mes_v10_1.c b/drivers/gpu/drm/amd/amdgpu/mes_v10_1.c index 18a129f36215..067d10073a56 100644 --- a/drivers/gpu/drm/amd/amdgpu/mes_v10_1.c +++ b/drivers/gpu/drm/amd/amdgpu/mes_v10_1.c @@ -87,21 +87,32 @@ static const struct amdgpu_ring_funcs mes_v10_1_ring_funcs = { }; static int mes_v10_1_submit_pkt_and_poll_completion(struct amdgpu_mes *mes, - void *pkt, int size) + void *pkt, int size, + int api_status_off) { int ndw = size / 4; signed long r; union MESAPI__ADD_QUEUE *x_pkt = pkt; + struct MES_API_STATUS *api_status; struct amdgpu_device *adev = mes->adev; struct amdgpu_ring *ring = &mes->ring; + unsigned long flags; BUG_ON(size % 4 != 0); - if (amdgpu_ring_alloc(ring, ndw)) + spin_lock_irqsave(&mes->ring_lock, flags); + if (amdgpu_ring_alloc(ring, ndw)) { + spin_unlock_irqrestore(&mes->ring_lock, flags); return -ENOMEM; + } + + api_status = (struct MES_API_STATUS *)((char *)pkt + api_status_off); + api_status->api_completion_fence_addr = mes->ring.fence_drv.gpu_addr; + api_status->api_completion_fence_value = ++mes->ring.fence_drv.sync_seq; amdgpu_ring_write_multiple(ring, pkt, ndw); amdgpu_ring_commit(ring); + spin_unlock_irqrestore(&mes->ring_lock, flags); DRM_DEBUG("MES msg=%d was emitted\n", x_pkt->header.opcode); @@ -166,13 +177,9 @@ static int mes_v10_1_add_hw_queue(struct amdgpu_mes *mes, mes_add_queue_pkt.gws_size = input->gws_size; mes_add_queue_pkt.trap_handler_addr = input->tba_addr; - mes_add_queue_pkt.api_status.api_completion_fence_addr = - mes->ring.fence_drv.gpu_addr; - mes_add_queue_pkt.api_status.api_completion_fence_value = - ++mes->ring.fence_drv.sync_seq; - return mes_v10_1_submit_pkt_and_poll_completion(mes, - &mes_add_queue_pkt, sizeof(mes_add_queue_pkt)); + &mes_add_queue_pkt, sizeof(mes_add_queue_pkt), + offsetof(union MESAPI__ADD_QUEUE, api_status)); } static int mes_v10_1_remove_hw_queue(struct amdgpu_mes *mes, @@ -189,13 +196,9 @@ static int mes_v10_1_remove_hw_queue(struct amdgpu_mes *mes, mes_remove_queue_pkt.doorbell_offset = input->doorbell_offset; mes_remove_queue_pkt.gang_context_addr = input->gang_context_addr; - mes_remove_queue_pkt.api_status.api_completion_fence_addr = - mes->ring.fence_drv.gpu_addr; - mes_remove_queue_pkt.api_status.api_completion_fence_value = - ++mes->ring.fence_drv.sync_seq; - return mes_v10_1_submit_pkt_and_poll_completion(mes, - &mes_remove_queue_pkt, sizeof(mes_remove_queue_pkt)); + &mes_remove_queue_pkt, sizeof(mes_remove_queue_pkt), + offsetof(union MESAPI__REMOVE_QUEUE, api_status)); } static int mes_v10_1_unmap_legacy_queue(struct amdgpu_mes *mes, @@ -227,13 +230,9 @@ static int mes_v10_1_unmap_legacy_queue(struct amdgpu_mes *mes, mes_remove_queue_pkt.unmap_kiq_utility_queue = 1; } - mes_remove_queue_pkt.api_status.api_completion_fence_addr = - mes->ring.fence_drv.gpu_addr; - mes_remove_queue_pkt.api_status.api_completion_fence_value = - ++mes->ring.fence_drv.sync_seq; - return mes_v10_1_submit_pkt_and_poll_completion(mes, - &mes_remove_queue_pkt, sizeof(mes_remove_queue_pkt)); + &mes_remove_queue_pkt, sizeof(mes_remove_queue_pkt), + offsetof(union MESAPI__REMOVE_QUEUE, api_status)); } static int mes_v10_1_suspend_gang(struct amdgpu_mes *mes, @@ -258,13 +257,9 @@ static int mes_v10_1_query_sched_status(struct amdgpu_mes *mes) mes_status_pkt.header.opcode = MES_SCH_API_QUERY_SCHEDULER_STATUS; mes_status_pkt.header.dwsize = API_FRAME_SIZE_IN_DWORDS; - mes_status_pkt.api_status.api_completion_fence_addr = - mes->ring.fence_drv.gpu_addr; - mes_status_pkt.api_status.api_completion_fence_value = - ++mes->ring.fence_drv.sync_seq; - return mes_v10_1_submit_pkt_and_poll_completion(mes, - &mes_status_pkt, sizeof(mes_status_pkt)); + &mes_status_pkt, sizeof(mes_status_pkt), + offsetof(union MESAPI__QUERY_MES_STATUS, api_status)); } static int mes_v10_1_set_hw_resources(struct amdgpu_mes *mes) @@ -299,7 +294,7 @@ static int mes_v10_1_set_hw_resources(struct amdgpu_mes *mes) for (i = 0; i < AMD_PRIORITY_NUM_LEVELS; i++) mes_set_hw_res_pkt.aggregated_doorbells[i] = - mes->agreegated_doorbells[i]; + mes->aggregated_doorbells[i]; for (i = 0; i < 5; i++) { mes_set_hw_res_pkt.gc_base[i] = adev->reg_offset[GC_HWIP][0][i]; @@ -313,13 +308,63 @@ static int mes_v10_1_set_hw_resources(struct amdgpu_mes *mes) mes_set_hw_res_pkt.disable_mes_log = 1; mes_set_hw_res_pkt.use_different_vmid_compute = 1; - mes_set_hw_res_pkt.api_status.api_completion_fence_addr = - mes->ring.fence_drv.gpu_addr; - mes_set_hw_res_pkt.api_status.api_completion_fence_value = - ++mes->ring.fence_drv.sync_seq; - return mes_v10_1_submit_pkt_and_poll_completion(mes, - &mes_set_hw_res_pkt, sizeof(mes_set_hw_res_pkt)); + &mes_set_hw_res_pkt, sizeof(mes_set_hw_res_pkt), + offsetof(union MESAPI_SET_HW_RESOURCES, api_status)); +} + +static void mes_v10_1_init_aggregated_doorbell(struct amdgpu_mes *mes) +{ + struct amdgpu_device *adev = mes->adev; + uint32_t data; + + data = RREG32_SOC15(GC, 0, mmCP_MES_DOORBELL_CONTROL1); + data &= ~(CP_MES_DOORBELL_CONTROL1__DOORBELL_OFFSET_MASK | + CP_MES_DOORBELL_CONTROL1__DOORBELL_EN_MASK | + CP_MES_DOORBELL_CONTROL1__DOORBELL_HIT_MASK); + data |= mes->aggregated_doorbells[AMDGPU_MES_PRIORITY_LEVEL_LOW] << + CP_MES_DOORBELL_CONTROL1__DOORBELL_OFFSET__SHIFT; + data |= 1 << CP_MES_DOORBELL_CONTROL1__DOORBELL_EN__SHIFT; + WREG32_SOC15(GC, 0, mmCP_MES_DOORBELL_CONTROL1, data); + + data = RREG32_SOC15(GC, 0, mmCP_MES_DOORBELL_CONTROL2); + data &= ~(CP_MES_DOORBELL_CONTROL2__DOORBELL_OFFSET_MASK | + CP_MES_DOORBELL_CONTROL2__DOORBELL_EN_MASK | + CP_MES_DOORBELL_CONTROL2__DOORBELL_HIT_MASK); + data |= mes->aggregated_doorbells[AMDGPU_MES_PRIORITY_LEVEL_NORMAL] << + CP_MES_DOORBELL_CONTROL2__DOORBELL_OFFSET__SHIFT; + data |= 1 << CP_MES_DOORBELL_CONTROL2__DOORBELL_EN__SHIFT; + WREG32_SOC15(GC, 0, mmCP_MES_DOORBELL_CONTROL2, data); + + data = RREG32_SOC15(GC, 0, mmCP_MES_DOORBELL_CONTROL3); + data &= ~(CP_MES_DOORBELL_CONTROL3__DOORBELL_OFFSET_MASK | + CP_MES_DOORBELL_CONTROL3__DOORBELL_EN_MASK | + CP_MES_DOORBELL_CONTROL3__DOORBELL_HIT_MASK); + data |= mes->aggregated_doorbells[AMDGPU_MES_PRIORITY_LEVEL_MEDIUM] << + CP_MES_DOORBELL_CONTROL3__DOORBELL_OFFSET__SHIFT; + data |= 1 << CP_MES_DOORBELL_CONTROL3__DOORBELL_EN__SHIFT; + WREG32_SOC15(GC, 0, mmCP_MES_DOORBELL_CONTROL3, data); + + data = RREG32_SOC15(GC, 0, mmCP_MES_DOORBELL_CONTROL4); + data &= ~(CP_MES_DOORBELL_CONTROL4__DOORBELL_OFFSET_MASK | + CP_MES_DOORBELL_CONTROL4__DOORBELL_EN_MASK | + CP_MES_DOORBELL_CONTROL4__DOORBELL_HIT_MASK); + data |= mes->aggregated_doorbells[AMDGPU_MES_PRIORITY_LEVEL_HIGH] << + CP_MES_DOORBELL_CONTROL4__DOORBELL_OFFSET__SHIFT; + data |= 1 << CP_MES_DOORBELL_CONTROL4__DOORBELL_EN__SHIFT; + WREG32_SOC15(GC, 0, mmCP_MES_DOORBELL_CONTROL4, data); + + data = RREG32_SOC15(GC, 0, mmCP_MES_DOORBELL_CONTROL5); + data &= ~(CP_MES_DOORBELL_CONTROL5__DOORBELL_OFFSET_MASK | + CP_MES_DOORBELL_CONTROL5__DOORBELL_EN_MASK | + CP_MES_DOORBELL_CONTROL5__DOORBELL_HIT_MASK); + data |= mes->aggregated_doorbells[AMDGPU_MES_PRIORITY_LEVEL_REALTIME] << + CP_MES_DOORBELL_CONTROL5__DOORBELL_OFFSET__SHIFT; + data |= 1 << CP_MES_DOORBELL_CONTROL5__DOORBELL_EN__SHIFT; + WREG32_SOC15(GC, 0, mmCP_MES_DOORBELL_CONTROL5, data); + + data = 1 << CP_HQD_GFX_CONTROL__DB_UPDATED_MSG_EN__SHIFT; + WREG32_SOC15(GC, 0, mmCP_HQD_GFX_CONTROL, data); } static const struct amdgpu_mes_funcs mes_v10_1_funcs = { @@ -1121,6 +1166,8 @@ static int mes_v10_1_hw_init(void *handle) if (r) goto failure; + mes_v10_1_init_aggregated_doorbell(&adev->mes); + r = mes_v10_1_query_sched_status(&adev->mes); if (r) { DRM_ERROR("MES is busy\n"); @@ -1133,6 +1180,7 @@ static int mes_v10_1_hw_init(void *handle) * with MES enabled. */ adev->gfx.kiq.ring.sched.ready = false; + adev->mes.ring.sched.ready = true; return 0; @@ -1145,6 +1193,8 @@ static int mes_v10_1_hw_fini(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; + adev->mes.ring.sched.ready = false; + mes_v10_1_enable(adev, false); if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) { @@ -1183,7 +1233,8 @@ static int mes_v10_0_late_init(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; - amdgpu_mes_self_test(adev); + if (!amdgpu_in_reset(adev)) + amdgpu_mes_self_test(adev); return 0; } diff --git a/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c b/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c index 7eee004cf3ce..cc3fdbbcd314 100644 --- a/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c @@ -86,21 +86,32 @@ static const struct amdgpu_ring_funcs mes_v11_0_ring_funcs = { }; static int mes_v11_0_submit_pkt_and_poll_completion(struct amdgpu_mes *mes, - void *pkt, int size) + void *pkt, int size, + int api_status_off) { int ndw = size / 4; signed long r; union MESAPI__ADD_QUEUE *x_pkt = pkt; + struct MES_API_STATUS *api_status; struct amdgpu_device *adev = mes->adev; struct amdgpu_ring *ring = &mes->ring; + unsigned long flags; BUG_ON(size % 4 != 0); - if (amdgpu_ring_alloc(ring, ndw)) + spin_lock_irqsave(&mes->ring_lock, flags); + if (amdgpu_ring_alloc(ring, ndw)) { + spin_unlock_irqrestore(&mes->ring_lock, flags); return -ENOMEM; + } + + api_status = (struct MES_API_STATUS *)((char *)pkt + api_status_off); + api_status->api_completion_fence_addr = mes->ring.fence_drv.gpu_addr; + api_status->api_completion_fence_value = ++mes->ring.fence_drv.sync_seq; amdgpu_ring_write_multiple(ring, pkt, ndw); amdgpu_ring_commit(ring); + spin_unlock_irqrestore(&mes->ring_lock, flags); DRM_DEBUG("MES msg=%d was emitted\n", x_pkt->header.opcode); @@ -156,7 +167,13 @@ static int mes_v11_0_add_hw_queue(struct amdgpu_mes *mes, input->gang_global_priority_level; mes_add_queue_pkt.doorbell_offset = input->doorbell_offset; mes_add_queue_pkt.mqd_addr = input->mqd_addr; - mes_add_queue_pkt.wptr_addr = input->wptr_addr; + + if (((adev->mes.sched_version & AMDGPU_MES_API_VERSION_MASK) >> + AMDGPU_MES_API_VERSION_SHIFT) >= 2) + mes_add_queue_pkt.wptr_addr = input->wptr_mc_addr; + else + mes_add_queue_pkt.wptr_addr = input->wptr_addr; + mes_add_queue_pkt.queue_type = convert_to_mes_queue_type(input->queue_type); mes_add_queue_pkt.paging = input->paging; @@ -165,14 +182,12 @@ static int mes_v11_0_add_hw_queue(struct amdgpu_mes *mes, mes_add_queue_pkt.gws_size = input->gws_size; mes_add_queue_pkt.trap_handler_addr = input->tba_addr; mes_add_queue_pkt.tma_addr = input->tma_addr; - - mes_add_queue_pkt.api_status.api_completion_fence_addr = - mes->ring.fence_drv.gpu_addr; - mes_add_queue_pkt.api_status.api_completion_fence_value = - ++mes->ring.fence_drv.sync_seq; + mes_add_queue_pkt.is_kfd_process = input->is_kfd_process; + mes_add_queue_pkt.trap_en = 1; return mes_v11_0_submit_pkt_and_poll_completion(mes, - &mes_add_queue_pkt, sizeof(mes_add_queue_pkt)); + &mes_add_queue_pkt, sizeof(mes_add_queue_pkt), + offsetof(union MESAPI__ADD_QUEUE, api_status)); } static int mes_v11_0_remove_hw_queue(struct amdgpu_mes *mes, @@ -189,13 +204,9 @@ static int mes_v11_0_remove_hw_queue(struct amdgpu_mes *mes, mes_remove_queue_pkt.doorbell_offset = input->doorbell_offset; mes_remove_queue_pkt.gang_context_addr = input->gang_context_addr; - mes_remove_queue_pkt.api_status.api_completion_fence_addr = - mes->ring.fence_drv.gpu_addr; - mes_remove_queue_pkt.api_status.api_completion_fence_value = - ++mes->ring.fence_drv.sync_seq; - return mes_v11_0_submit_pkt_and_poll_completion(mes, - &mes_remove_queue_pkt, sizeof(mes_remove_queue_pkt)); + &mes_remove_queue_pkt, sizeof(mes_remove_queue_pkt), + offsetof(union MESAPI__REMOVE_QUEUE, api_status)); } static int mes_v11_0_unmap_legacy_queue(struct amdgpu_mes *mes, @@ -209,7 +220,7 @@ static int mes_v11_0_unmap_legacy_queue(struct amdgpu_mes *mes, mes_remove_queue_pkt.header.opcode = MES_SCH_API_REMOVE_QUEUE; mes_remove_queue_pkt.header.dwsize = API_FRAME_SIZE_IN_DWORDS; - mes_remove_queue_pkt.doorbell_offset = input->doorbell_offset << 2; + mes_remove_queue_pkt.doorbell_offset = input->doorbell_offset; mes_remove_queue_pkt.gang_context_addr = 0; mes_remove_queue_pkt.pipe_id = input->pipe_id; @@ -221,19 +232,14 @@ static int mes_v11_0_unmap_legacy_queue(struct amdgpu_mes *mes, mes_remove_queue_pkt.tf_data = lower_32_bits(input->trail_fence_data); } else { - if (input->queue_type == AMDGPU_RING_TYPE_GFX) - mes_remove_queue_pkt.unmap_legacy_gfx_queue = 1; - else - mes_remove_queue_pkt.unmap_kiq_utility_queue = 1; + mes_remove_queue_pkt.unmap_legacy_queue = 1; + mes_remove_queue_pkt.queue_type = + convert_to_mes_queue_type(input->queue_type); } - mes_remove_queue_pkt.api_status.api_completion_fence_addr = - mes->ring.fence_drv.gpu_addr; - mes_remove_queue_pkt.api_status.api_completion_fence_value = - ++mes->ring.fence_drv.sync_seq; - return mes_v11_0_submit_pkt_and_poll_completion(mes, - &mes_remove_queue_pkt, sizeof(mes_remove_queue_pkt)); + &mes_remove_queue_pkt, sizeof(mes_remove_queue_pkt), + offsetof(union MESAPI__REMOVE_QUEUE, api_status)); } static int mes_v11_0_suspend_gang(struct amdgpu_mes *mes, @@ -258,13 +264,57 @@ static int mes_v11_0_query_sched_status(struct amdgpu_mes *mes) mes_status_pkt.header.opcode = MES_SCH_API_QUERY_SCHEDULER_STATUS; mes_status_pkt.header.dwsize = API_FRAME_SIZE_IN_DWORDS; - mes_status_pkt.api_status.api_completion_fence_addr = - mes->ring.fence_drv.gpu_addr; - mes_status_pkt.api_status.api_completion_fence_value = - ++mes->ring.fence_drv.sync_seq; + return mes_v11_0_submit_pkt_and_poll_completion(mes, + &mes_status_pkt, sizeof(mes_status_pkt), + offsetof(union MESAPI__QUERY_MES_STATUS, api_status)); +} + +static int mes_v11_0_misc_op(struct amdgpu_mes *mes, + struct mes_misc_op_input *input) +{ + union MESAPI__MISC misc_pkt; + + memset(&misc_pkt, 0, sizeof(misc_pkt)); + + misc_pkt.header.type = MES_API_TYPE_SCHEDULER; + misc_pkt.header.opcode = MES_SCH_API_MISC; + misc_pkt.header.dwsize = API_FRAME_SIZE_IN_DWORDS; + + switch (input->op) { + case MES_MISC_OP_READ_REG: + misc_pkt.opcode = MESAPI_MISC__READ_REG; + misc_pkt.read_reg.reg_offset = input->read_reg.reg_offset; + misc_pkt.read_reg.buffer_addr = input->read_reg.buffer_addr; + break; + case MES_MISC_OP_WRITE_REG: + misc_pkt.opcode = MESAPI_MISC__WRITE_REG; + misc_pkt.write_reg.reg_offset = input->write_reg.reg_offset; + misc_pkt.write_reg.reg_value = input->write_reg.reg_value; + break; + case MES_MISC_OP_WRM_REG_WAIT: + misc_pkt.opcode = MESAPI_MISC__WAIT_REG_MEM; + misc_pkt.wait_reg_mem.op = WRM_OPERATION__WAIT_REG_MEM; + misc_pkt.wait_reg_mem.reference = input->wrm_reg.ref; + misc_pkt.wait_reg_mem.mask = input->wrm_reg.mask; + misc_pkt.wait_reg_mem.reg_offset1 = input->wrm_reg.reg0; + misc_pkt.wait_reg_mem.reg_offset2 = 0; + break; + case MES_MISC_OP_WRM_REG_WR_WAIT: + misc_pkt.opcode = MESAPI_MISC__WAIT_REG_MEM; + misc_pkt.wait_reg_mem.op = WRM_OPERATION__WR_WAIT_WR_REG; + misc_pkt.wait_reg_mem.reference = input->wrm_reg.ref; + misc_pkt.wait_reg_mem.mask = input->wrm_reg.mask; + misc_pkt.wait_reg_mem.reg_offset1 = input->wrm_reg.reg0; + misc_pkt.wait_reg_mem.reg_offset2 = input->wrm_reg.reg1; + break; + default: + DRM_ERROR("unsupported misc op (%d) \n", input->op); + return -EINVAL; + } return mes_v11_0_submit_pkt_and_poll_completion(mes, - &mes_status_pkt, sizeof(mes_status_pkt)); + &misc_pkt, sizeof(misc_pkt), + offsetof(union MESAPI__MISC, api_status)); } static int mes_v11_0_set_hw_resources(struct amdgpu_mes *mes) @@ -299,7 +349,7 @@ static int mes_v11_0_set_hw_resources(struct amdgpu_mes *mes) for (i = 0; i < AMD_PRIORITY_NUM_LEVELS; i++) mes_set_hw_res_pkt.aggregated_doorbells[i] = - mes->agreegated_doorbells[i]; + mes->aggregated_doorbells[i]; for (i = 0; i < 5; i++) { mes_set_hw_res_pkt.gc_base[i] = adev->reg_offset[GC_HWIP][0][i]; @@ -312,14 +362,65 @@ static int mes_v11_0_set_hw_resources(struct amdgpu_mes *mes) mes_set_hw_res_pkt.disable_reset = 1; mes_set_hw_res_pkt.disable_mes_log = 1; mes_set_hw_res_pkt.use_different_vmid_compute = 1; - - mes_set_hw_res_pkt.api_status.api_completion_fence_addr = - mes->ring.fence_drv.gpu_addr; - mes_set_hw_res_pkt.api_status.api_completion_fence_value = - ++mes->ring.fence_drv.sync_seq; + mes_set_hw_res_pkt.oversubscription_timer = 50; return mes_v11_0_submit_pkt_and_poll_completion(mes, - &mes_set_hw_res_pkt, sizeof(mes_set_hw_res_pkt)); + &mes_set_hw_res_pkt, sizeof(mes_set_hw_res_pkt), + offsetof(union MESAPI_SET_HW_RESOURCES, api_status)); +} + +static void mes_v11_0_init_aggregated_doorbell(struct amdgpu_mes *mes) +{ + struct amdgpu_device *adev = mes->adev; + uint32_t data; + + data = RREG32_SOC15(GC, 0, regCP_MES_DOORBELL_CONTROL1); + data &= ~(CP_MES_DOORBELL_CONTROL1__DOORBELL_OFFSET_MASK | + CP_MES_DOORBELL_CONTROL1__DOORBELL_EN_MASK | + CP_MES_DOORBELL_CONTROL1__DOORBELL_HIT_MASK); + data |= mes->aggregated_doorbells[AMDGPU_MES_PRIORITY_LEVEL_LOW] << + CP_MES_DOORBELL_CONTROL1__DOORBELL_OFFSET__SHIFT; + data |= 1 << CP_MES_DOORBELL_CONTROL1__DOORBELL_EN__SHIFT; + WREG32_SOC15(GC, 0, regCP_MES_DOORBELL_CONTROL1, data); + + data = RREG32_SOC15(GC, 0, regCP_MES_DOORBELL_CONTROL2); + data &= ~(CP_MES_DOORBELL_CONTROL2__DOORBELL_OFFSET_MASK | + CP_MES_DOORBELL_CONTROL2__DOORBELL_EN_MASK | + CP_MES_DOORBELL_CONTROL2__DOORBELL_HIT_MASK); + data |= mes->aggregated_doorbells[AMDGPU_MES_PRIORITY_LEVEL_NORMAL] << + CP_MES_DOORBELL_CONTROL2__DOORBELL_OFFSET__SHIFT; + data |= 1 << CP_MES_DOORBELL_CONTROL2__DOORBELL_EN__SHIFT; + WREG32_SOC15(GC, 0, regCP_MES_DOORBELL_CONTROL2, data); + + data = RREG32_SOC15(GC, 0, regCP_MES_DOORBELL_CONTROL3); + data &= ~(CP_MES_DOORBELL_CONTROL3__DOORBELL_OFFSET_MASK | + CP_MES_DOORBELL_CONTROL3__DOORBELL_EN_MASK | + CP_MES_DOORBELL_CONTROL3__DOORBELL_HIT_MASK); + data |= mes->aggregated_doorbells[AMDGPU_MES_PRIORITY_LEVEL_MEDIUM] << + CP_MES_DOORBELL_CONTROL3__DOORBELL_OFFSET__SHIFT; + data |= 1 << CP_MES_DOORBELL_CONTROL3__DOORBELL_EN__SHIFT; + WREG32_SOC15(GC, 0, regCP_MES_DOORBELL_CONTROL3, data); + + data = RREG32_SOC15(GC, 0, regCP_MES_DOORBELL_CONTROL4); + data &= ~(CP_MES_DOORBELL_CONTROL4__DOORBELL_OFFSET_MASK | + CP_MES_DOORBELL_CONTROL4__DOORBELL_EN_MASK | + CP_MES_DOORBELL_CONTROL4__DOORBELL_HIT_MASK); + data |= mes->aggregated_doorbells[AMDGPU_MES_PRIORITY_LEVEL_HIGH] << + CP_MES_DOORBELL_CONTROL4__DOORBELL_OFFSET__SHIFT; + data |= 1 << CP_MES_DOORBELL_CONTROL4__DOORBELL_EN__SHIFT; + WREG32_SOC15(GC, 0, regCP_MES_DOORBELL_CONTROL4, data); + + data = RREG32_SOC15(GC, 0, regCP_MES_DOORBELL_CONTROL5); + data &= ~(CP_MES_DOORBELL_CONTROL5__DOORBELL_OFFSET_MASK | + CP_MES_DOORBELL_CONTROL5__DOORBELL_EN_MASK | + CP_MES_DOORBELL_CONTROL5__DOORBELL_HIT_MASK); + data |= mes->aggregated_doorbells[AMDGPU_MES_PRIORITY_LEVEL_REALTIME] << + CP_MES_DOORBELL_CONTROL5__DOORBELL_OFFSET__SHIFT; + data |= 1 << CP_MES_DOORBELL_CONTROL5__DOORBELL_EN__SHIFT; + WREG32_SOC15(GC, 0, regCP_MES_DOORBELL_CONTROL5, data); + + data = 1 << CP_HQD_GFX_CONTROL__DB_UPDATED_MSG_EN__SHIFT; + WREG32_SOC15(GC, 0, regCP_HQD_GFX_CONTROL, data); } static const struct amdgpu_mes_funcs mes_v11_0_funcs = { @@ -328,6 +429,7 @@ static const struct amdgpu_mes_funcs mes_v11_0_funcs = { .unmap_legacy_queue = mes_v11_0_unmap_legacy_queue, .suspend_gang = mes_v11_0_suspend_gang, .resume_gang = mes_v11_0_resume_gang, + .misc_op = mes_v11_0_misc_op, }; static int mes_v11_0_init_microcode(struct amdgpu_device *adev, @@ -858,6 +960,18 @@ static int mes_v11_0_queue_init(struct amdgpu_device *adev, mes_v11_0_queue_init_register(ring); } + /* get MES scheduler/KIQ versions */ + mutex_lock(&adev->srbm_mutex); + soc21_grbm_select(adev, 3, pipe, 0, 0); + + if (pipe == AMDGPU_MES_SCHED_PIPE) + adev->mes.sched_version = RREG32_SOC15(GC, 0, regCP_MES_GP3_LO); + else if (pipe == AMDGPU_MES_KIQ_PIPE && adev->enable_mes_kiq) + adev->mes.kiq_version = RREG32_SOC15(GC, 0, regCP_MES_GP3_LO); + + soc21_grbm_select(adev, 0, 0, 0, 0); + mutex_unlock(&adev->srbm_mutex); + return 0; } @@ -1108,6 +1222,8 @@ static int mes_v11_0_hw_init(void *handle) if (r) goto failure; + mes_v11_0_init_aggregated_doorbell(&adev->mes); + r = mes_v11_0_query_sched_status(&adev->mes); if (r) { DRM_ERROR("MES is busy\n"); @@ -1120,6 +1236,7 @@ static int mes_v11_0_hw_init(void *handle) * with MES enabled. */ adev->gfx.kiq.ring.sched.ready = false; + adev->mes.ring.sched.ready = true; return 0; @@ -1130,6 +1247,9 @@ failure: static int mes_v11_0_hw_fini(void *handle) { + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + + adev->mes.ring.sched.ready = false; return 0; } @@ -1161,7 +1281,8 @@ static int mes_v11_0_late_init(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; - amdgpu_mes_self_test(adev); + if (!amdgpu_in_reset(adev)) + amdgpu_mes_self_test(adev); return 0; } diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c index 3f44a099c52a..3e51e773f92b 100644 --- a/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c +++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c @@ -176,6 +176,7 @@ static void mmhub_v1_0_init_cache_regs(struct amdgpu_device *adev) tmp = REG_SET_FIELD(tmp, VM_L2_CNTL2, INVALIDATE_L2_CACHE, 1); WREG32_SOC15(MMHUB, 0, mmVM_L2_CNTL2, tmp); + tmp = mmVM_L2_CNTL3_DEFAULT; if (adev->gmc.translate_further) { tmp = REG_SET_FIELD(tmp, VM_L2_CNTL3, BANK_SELECT, 12); tmp = REG_SET_FIELD(tmp, VM_L2_CNTL3, diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v3_0_1.c b/drivers/gpu/drm/amd/amdgpu/mmhub_v3_0_1.c new file mode 100644 index 000000000000..e8058edc1d10 --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v3_0_1.c @@ -0,0 +1,591 @@ +/* + * Copyright 2022 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#include "amdgpu.h" +#include "mmhub_v3_0_1.h" + +#include "mmhub/mmhub_3_0_1_offset.h" +#include "mmhub/mmhub_3_0_1_sh_mask.h" +#include "navi10_enum.h" + +#include "soc15_common.h" + +#define regMMVM_L2_CNTL3_DEFAULT 0x80100007 +#define regMMVM_L2_CNTL4_DEFAULT 0x000000c1 +#define regMMVM_L2_CNTL5_DEFAULT 0x00003fe0 + +static const char *mmhub_client_ids_v3_0_1[][2] = { + [0][0] = "VMC", + [4][0] = "DCEDMC", + [5][0] = "DCEVGA", + [6][0] = "MP0", + [7][0] = "MP1", + [8][0] = "MPIO", + [16][0] = "HDP", + [17][0] = "LSDMA", + [18][0] = "JPEG", + [19][0] = "VCNU0", + [21][0] = "VSCH", + [22][0] = "VCNU1", + [23][0] = "VCN1", + [32+20][0] = "VCN0", + [2][1] = "DBGUNBIO", + [3][1] = "DCEDWB", + [4][1] = "DCEDMC", + [5][1] = "DCEVGA", + [6][1] = "MP0", + [7][1] = "MP1", + [8][1] = "MPIO", + [10][1] = "DBGU0", + [11][1] = "DBGU1", + [12][1] = "DBGU2", + [13][1] = "DBGU3", + [14][1] = "XDP", + [15][1] = "OSSSYS", + [16][1] = "HDP", + [17][1] = "LSDMA", + [18][1] = "JPEG", + [19][1] = "VCNU0", + [20][1] = "VCN0", + [21][1] = "VSCH", + [22][1] = "VCNU1", + [23][1] = "VCN1", +}; + +static uint32_t mmhub_v3_0_1_get_invalidate_req(unsigned int vmid, + uint32_t flush_type) +{ + u32 req = 0; + + /* invalidate using legacy mode on vmid*/ + req = REG_SET_FIELD(req, MMVM_INVALIDATE_ENG0_REQ, + PER_VMID_INVALIDATE_REQ, 1 << vmid); + req = REG_SET_FIELD(req, MMVM_INVALIDATE_ENG0_REQ, FLUSH_TYPE, flush_type); + req = REG_SET_FIELD(req, MMVM_INVALIDATE_ENG0_REQ, INVALIDATE_L2_PTES, 1); + req = REG_SET_FIELD(req, MMVM_INVALIDATE_ENG0_REQ, INVALIDATE_L2_PDE0, 1); + req = REG_SET_FIELD(req, MMVM_INVALIDATE_ENG0_REQ, INVALIDATE_L2_PDE1, 1); + req = REG_SET_FIELD(req, MMVM_INVALIDATE_ENG0_REQ, INVALIDATE_L2_PDE2, 1); + req = REG_SET_FIELD(req, MMVM_INVALIDATE_ENG0_REQ, INVALIDATE_L1_PTES, 1); + req = REG_SET_FIELD(req, MMVM_INVALIDATE_ENG0_REQ, + CLEAR_PROTECTION_FAULT_STATUS_ADDR, 0); + + return req; +} + +static void +mmhub_v3_0_1_print_l2_protection_fault_status(struct amdgpu_device *adev, + uint32_t status) +{ + uint32_t cid, rw; + const char *mmhub_cid = NULL; + + cid = REG_GET_FIELD(status, + MMVM_L2_PROTECTION_FAULT_STATUS, CID); + rw = REG_GET_FIELD(status, + MMVM_L2_PROTECTION_FAULT_STATUS, RW); + + dev_err(adev->dev, + "MMVM_L2_PROTECTION_FAULT_STATUS:0x%08X\n", + status); + + switch (adev->ip_versions[MMHUB_HWIP][0]) { + case IP_VERSION(3, 0, 1): + mmhub_cid = mmhub_client_ids_v3_0_1[cid][rw]; + break; + default: + mmhub_cid = NULL; + break; + } + + dev_err(adev->dev, "\t Faulty UTCL2 client ID: %s (0x%x)\n", + mmhub_cid ? mmhub_cid : "unknown", cid); + dev_err(adev->dev, "\t MORE_FAULTS: 0x%lx\n", + REG_GET_FIELD(status, + MMVM_L2_PROTECTION_FAULT_STATUS, MORE_FAULTS)); + dev_err(adev->dev, "\t WALKER_ERROR: 0x%lx\n", + REG_GET_FIELD(status, + MMVM_L2_PROTECTION_FAULT_STATUS, WALKER_ERROR)); + dev_err(adev->dev, "\t PERMISSION_FAULTS: 0x%lx\n", + REG_GET_FIELD(status, + MMVM_L2_PROTECTION_FAULT_STATUS, PERMISSION_FAULTS)); + dev_err(adev->dev, "\t MAPPING_ERROR: 0x%lx\n", + REG_GET_FIELD(status, + MMVM_L2_PROTECTION_FAULT_STATUS, MAPPING_ERROR)); + dev_err(adev->dev, "\t RW: 0x%x\n", rw); +} + +static void mmhub_v3_0_1_setup_vm_pt_regs(struct amdgpu_device *adev, + uint32_t vmid, + uint64_t page_table_base) +{ + struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB_0]; + + WREG32_SOC15_OFFSET(MMHUB, 0, regMMVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32, + hub->ctx_addr_distance * vmid, + lower_32_bits(page_table_base)); + + WREG32_SOC15_OFFSET(MMHUB, 0, regMMVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_HI32, + hub->ctx_addr_distance * vmid, + upper_32_bits(page_table_base)); +} + +static void mmhub_v3_0_1_init_gart_aperture_regs(struct amdgpu_device *adev) +{ + uint64_t pt_base = amdgpu_gmc_pd_addr(adev->gart.bo); + + mmhub_v3_0_1_setup_vm_pt_regs(adev, 0, pt_base); + + WREG32_SOC15(MMHUB, 0, regMMVM_CONTEXT0_PAGE_TABLE_START_ADDR_LO32, + (u32)(adev->gmc.gart_start >> 12)); + WREG32_SOC15(MMHUB, 0, regMMVM_CONTEXT0_PAGE_TABLE_START_ADDR_HI32, + (u32)(adev->gmc.gart_start >> 44)); + + WREG32_SOC15(MMHUB, 0, regMMVM_CONTEXT0_PAGE_TABLE_END_ADDR_LO32, + (u32)(adev->gmc.gart_end >> 12)); + WREG32_SOC15(MMHUB, 0, regMMVM_CONTEXT0_PAGE_TABLE_END_ADDR_HI32, + (u32)(adev->gmc.gart_end >> 44)); +} + +static void mmhub_v3_0_1_init_system_aperture_regs(struct amdgpu_device *adev) +{ + uint64_t value; + uint32_t tmp; + + /* Program the AGP BAR */ + WREG32_SOC15(MMHUB, 0, regMMMC_VM_AGP_BASE, 0); + WREG32_SOC15(MMHUB, 0, regMMMC_VM_AGP_BOT, adev->gmc.agp_start >> 24); + WREG32_SOC15(MMHUB, 0, regMMMC_VM_AGP_TOP, adev->gmc.agp_end >> 24); + + /* + * the new L1 policy will block SRIOV guest from writing + * these regs, and they will be programed at host. + * so skip programing these regs. + */ + /* Program the system aperture low logical page number. */ + WREG32_SOC15(MMHUB, 0, regMMMC_VM_SYSTEM_APERTURE_LOW_ADDR, + adev->gmc.vram_start >> 18); + WREG32_SOC15(MMHUB, 0, regMMMC_VM_SYSTEM_APERTURE_HIGH_ADDR, + adev->gmc.vram_end >> 18); + + /* Set default page address. */ + value = adev->vram_scratch.gpu_addr - adev->gmc.vram_start + + adev->vm_manager.vram_base_offset; + WREG32_SOC15(MMHUB, 0, regMMMC_VM_SYSTEM_APERTURE_DEFAULT_ADDR_LSB, + (u32)(value >> 12)); + WREG32_SOC15(MMHUB, 0, regMMMC_VM_SYSTEM_APERTURE_DEFAULT_ADDR_MSB, + (u32)(value >> 44)); + + /* Program "protection fault". */ + WREG32_SOC15(MMHUB, 0, regMMVM_L2_PROTECTION_FAULT_DEFAULT_ADDR_LO32, + (u32)(adev->dummy_page_addr >> 12)); + WREG32_SOC15(MMHUB, 0, regMMVM_L2_PROTECTION_FAULT_DEFAULT_ADDR_HI32, + (u32)((u64)adev->dummy_page_addr >> 44)); + + tmp = RREG32_SOC15(MMHUB, 0, regMMVM_L2_PROTECTION_FAULT_CNTL2); + tmp = REG_SET_FIELD(tmp, MMVM_L2_PROTECTION_FAULT_CNTL2, + ACTIVE_PAGE_MIGRATION_PTE_READ_RETRY, 1); + WREG32_SOC15(MMHUB, 0, regMMVM_L2_PROTECTION_FAULT_CNTL2, tmp); +} + +static void mmhub_v3_0_1_init_tlb_regs(struct amdgpu_device *adev) +{ + uint32_t tmp; + + /* Setup TLB control */ + tmp = RREG32_SOC15(MMHUB, 0, regMMMC_VM_MX_L1_TLB_CNTL); + + tmp = REG_SET_FIELD(tmp, MMMC_VM_MX_L1_TLB_CNTL, ENABLE_L1_TLB, 1); + tmp = REG_SET_FIELD(tmp, MMMC_VM_MX_L1_TLB_CNTL, SYSTEM_ACCESS_MODE, 3); + tmp = REG_SET_FIELD(tmp, MMMC_VM_MX_L1_TLB_CNTL, + ENABLE_ADVANCED_DRIVER_MODEL, 1); + tmp = REG_SET_FIELD(tmp, MMMC_VM_MX_L1_TLB_CNTL, + SYSTEM_APERTURE_UNMAPPED_ACCESS, 0); + tmp = REG_SET_FIELD(tmp, MMMC_VM_MX_L1_TLB_CNTL, ECO_BITS, 0); + tmp = REG_SET_FIELD(tmp, MMMC_VM_MX_L1_TLB_CNTL, + MTYPE, MTYPE_UC); /* UC, uncached */ + + WREG32_SOC15(MMHUB, 0, regMMMC_VM_MX_L1_TLB_CNTL, tmp); +} + +static void mmhub_v3_0_1_init_cache_regs(struct amdgpu_device *adev) +{ + uint32_t tmp; + + /* Setup L2 cache */ + tmp = RREG32_SOC15(MMHUB, 0, regMMVM_L2_CNTL); + tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL, ENABLE_L2_CACHE, 1); + tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL, ENABLE_L2_FRAGMENT_PROCESSING, 0); + tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL, + ENABLE_DEFAULT_PAGE_OUT_TO_SYSTEM_MEMORY, 1); + /* XXX for emulation, Refer to closed source code.*/ + tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL, L2_PDE0_CACHE_TAG_GENERATION_MODE, + 0); + tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL, PDE_FAULT_CLASSIFICATION, 0); + tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL, CONTEXT1_IDENTITY_ACCESS_MODE, 1); + tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL, IDENTITY_MODE_FRAGMENT_SIZE, 0); + WREG32_SOC15(MMHUB, 0, regMMVM_L2_CNTL, tmp); + + tmp = RREG32_SOC15(MMHUB, 0, regMMVM_L2_CNTL2); + tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL2, INVALIDATE_ALL_L1_TLBS, 1); + tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL2, INVALIDATE_L2_CACHE, 1); + WREG32_SOC15(MMHUB, 0, regMMVM_L2_CNTL2, tmp); + + tmp = regMMVM_L2_CNTL3_DEFAULT; + if (adev->gmc.translate_further) { + tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL3, BANK_SELECT, 12); + tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL3, + L2_CACHE_BIGK_FRAGMENT_SIZE, 9); + } else { + tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL3, BANK_SELECT, 9); + tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL3, + L2_CACHE_BIGK_FRAGMENT_SIZE, 6); + } + WREG32_SOC15(MMHUB, 0, regMMVM_L2_CNTL3, tmp); + + tmp = regMMVM_L2_CNTL4_DEFAULT; + tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL4, VMC_TAP_PDE_REQUEST_PHYSICAL, 0); + tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL4, VMC_TAP_PTE_REQUEST_PHYSICAL, 0); + WREG32_SOC15(MMHUB, 0, regMMVM_L2_CNTL4, tmp); + + tmp = regMMVM_L2_CNTL5_DEFAULT; + tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL5, L2_CACHE_SMALLK_FRAGMENT_SIZE, 0); + WREG32_SOC15(GC, 0, regMMVM_L2_CNTL5, tmp); +} + +static void mmhub_v3_0_1_enable_system_domain(struct amdgpu_device *adev) +{ + uint32_t tmp; + + tmp = RREG32_SOC15(MMHUB, 0, regMMVM_CONTEXT0_CNTL); + tmp = REG_SET_FIELD(tmp, MMVM_CONTEXT0_CNTL, ENABLE_CONTEXT, 1); + tmp = REG_SET_FIELD(tmp, MMVM_CONTEXT0_CNTL, PAGE_TABLE_DEPTH, 0); + tmp = REG_SET_FIELD(tmp, MMVM_CONTEXT0_CNTL, + RETRY_PERMISSION_OR_INVALID_PAGE_FAULT, 0); + WREG32_SOC15(MMHUB, 0, regMMVM_CONTEXT0_CNTL, tmp); +} + +static void mmhub_v3_0_1_disable_identity_aperture(struct amdgpu_device *adev) +{ + WREG32_SOC15(MMHUB, 0, + regMMVM_L2_CONTEXT1_IDENTITY_APERTURE_LOW_ADDR_LO32, + 0xFFFFFFFF); + WREG32_SOC15(MMHUB, 0, + regMMVM_L2_CONTEXT1_IDENTITY_APERTURE_LOW_ADDR_HI32, + 0x0000000F); + + WREG32_SOC15(MMHUB, 0, + regMMVM_L2_CONTEXT1_IDENTITY_APERTURE_HIGH_ADDR_LO32, 0); + WREG32_SOC15(MMHUB, 0, + regMMVM_L2_CONTEXT1_IDENTITY_APERTURE_HIGH_ADDR_HI32, 0); + + WREG32_SOC15(MMHUB, 0, regMMVM_L2_CONTEXT_IDENTITY_PHYSICAL_OFFSET_LO32, + 0); + WREG32_SOC15(MMHUB, 0, regMMVM_L2_CONTEXT_IDENTITY_PHYSICAL_OFFSET_HI32, + 0); +} + +static void mmhub_v3_0_1_setup_vmid_config(struct amdgpu_device *adev) +{ + struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB_0]; + int i; + uint32_t tmp; + + for (i = 0; i <= 14; i++) { + tmp = RREG32_SOC15_OFFSET(MMHUB, 0, regMMVM_CONTEXT1_CNTL, i); + tmp = REG_SET_FIELD(tmp, MMVM_CONTEXT1_CNTL, ENABLE_CONTEXT, 1); + tmp = REG_SET_FIELD(tmp, MMVM_CONTEXT1_CNTL, PAGE_TABLE_DEPTH, + adev->vm_manager.num_level); + tmp = REG_SET_FIELD(tmp, MMVM_CONTEXT1_CNTL, + RANGE_PROTECTION_FAULT_ENABLE_DEFAULT, 1); + tmp = REG_SET_FIELD(tmp, MMVM_CONTEXT1_CNTL, + DUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT, + 1); + tmp = REG_SET_FIELD(tmp, MMVM_CONTEXT1_CNTL, + PDE0_PROTECTION_FAULT_ENABLE_DEFAULT, 1); + tmp = REG_SET_FIELD(tmp, MMVM_CONTEXT1_CNTL, + VALID_PROTECTION_FAULT_ENABLE_DEFAULT, 1); + tmp = REG_SET_FIELD(tmp, MMVM_CONTEXT1_CNTL, + READ_PROTECTION_FAULT_ENABLE_DEFAULT, 1); + tmp = REG_SET_FIELD(tmp, MMVM_CONTEXT1_CNTL, + WRITE_PROTECTION_FAULT_ENABLE_DEFAULT, 1); + tmp = REG_SET_FIELD(tmp, MMVM_CONTEXT1_CNTL, + EXECUTE_PROTECTION_FAULT_ENABLE_DEFAULT, 1); + tmp = REG_SET_FIELD(tmp, MMVM_CONTEXT1_CNTL, + PAGE_TABLE_BLOCK_SIZE, + adev->vm_manager.block_size - 9); + /* Send no-retry XNACK on fault to suppress VM fault storm. */ + tmp = REG_SET_FIELD(tmp, MMVM_CONTEXT1_CNTL, + RETRY_PERMISSION_OR_INVALID_PAGE_FAULT, + !amdgpu_noretry); + WREG32_SOC15_OFFSET(MMHUB, 0, regMMVM_CONTEXT1_CNTL, + i * hub->ctx_distance, tmp); + WREG32_SOC15_OFFSET(MMHUB, 0, regMMVM_CONTEXT1_PAGE_TABLE_START_ADDR_LO32, + i * hub->ctx_addr_distance, 0); + WREG32_SOC15_OFFSET(MMHUB, 0, regMMVM_CONTEXT1_PAGE_TABLE_START_ADDR_HI32, + i * hub->ctx_addr_distance, 0); + WREG32_SOC15_OFFSET(MMHUB, 0, regMMVM_CONTEXT1_PAGE_TABLE_END_ADDR_LO32, + i * hub->ctx_addr_distance, + lower_32_bits(adev->vm_manager.max_pfn - 1)); + WREG32_SOC15_OFFSET(MMHUB, 0, regMMVM_CONTEXT1_PAGE_TABLE_END_ADDR_HI32, + i * hub->ctx_addr_distance, + upper_32_bits(adev->vm_manager.max_pfn - 1)); + } + + hub->vm_cntx_cntl = tmp; +} + +static void mmhub_v3_0_1_program_invalidation(struct amdgpu_device *adev) +{ + struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB_0]; + unsigned i; + + for (i = 0; i < 18; ++i) { + WREG32_SOC15_OFFSET(MMHUB, 0, regMMVM_INVALIDATE_ENG0_ADDR_RANGE_LO32, + i * hub->eng_addr_distance, 0xffffffff); + WREG32_SOC15_OFFSET(MMHUB, 0, regMMVM_INVALIDATE_ENG0_ADDR_RANGE_HI32, + i * hub->eng_addr_distance, 0x1f); + } +} + +static int mmhub_v3_0_1_gart_enable(struct amdgpu_device *adev) +{ + /* GART Enable. */ + mmhub_v3_0_1_init_gart_aperture_regs(adev); + mmhub_v3_0_1_init_system_aperture_regs(adev); + mmhub_v3_0_1_init_tlb_regs(adev); + mmhub_v3_0_1_init_cache_regs(adev); + + mmhub_v3_0_1_enable_system_domain(adev); + mmhub_v3_0_1_disable_identity_aperture(adev); + mmhub_v3_0_1_setup_vmid_config(adev); + mmhub_v3_0_1_program_invalidation(adev); + + return 0; +} + +static void mmhub_v3_0_1_gart_disable(struct amdgpu_device *adev) +{ + struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB_0]; + u32 tmp; + u32 i; + + /* Disable all tables */ + for (i = 0; i < 16; i++) + WREG32_SOC15_OFFSET(MMHUB, 0, regMMVM_CONTEXT0_CNTL, + i * hub->ctx_distance, 0); + + /* Setup TLB control */ + tmp = RREG32_SOC15(MMHUB, 0, regMMMC_VM_MX_L1_TLB_CNTL); + tmp = REG_SET_FIELD(tmp, MMMC_VM_MX_L1_TLB_CNTL, ENABLE_L1_TLB, 0); + tmp = REG_SET_FIELD(tmp, MMMC_VM_MX_L1_TLB_CNTL, + ENABLE_ADVANCED_DRIVER_MODEL, 0); + WREG32_SOC15(MMHUB, 0, regMMMC_VM_MX_L1_TLB_CNTL, tmp); + + /* Setup L2 cache */ + tmp = RREG32_SOC15(MMHUB, 0, regMMVM_L2_CNTL); + tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL, ENABLE_L2_CACHE, 0); + WREG32_SOC15(MMHUB, 0, regMMVM_L2_CNTL, tmp); + WREG32_SOC15(MMHUB, 0, regMMVM_L2_CNTL3, 0); +} + +/** + * mmhub_v3_0_1_set_fault_enable_default - update GART/VM fault handling + * + * @adev: amdgpu_device pointer + * @value: true redirects VM faults to the default page + */ +static void mmhub_v3_0_1_set_fault_enable_default(struct amdgpu_device *adev, + bool value) +{ + u32 tmp; + + tmp = RREG32_SOC15(MMHUB, 0, regMMVM_L2_PROTECTION_FAULT_CNTL); + tmp = REG_SET_FIELD(tmp, MMVM_L2_PROTECTION_FAULT_CNTL, + RANGE_PROTECTION_FAULT_ENABLE_DEFAULT, value); + tmp = REG_SET_FIELD(tmp, MMVM_L2_PROTECTION_FAULT_CNTL, + PDE0_PROTECTION_FAULT_ENABLE_DEFAULT, value); + tmp = REG_SET_FIELD(tmp, MMVM_L2_PROTECTION_FAULT_CNTL, + PDE1_PROTECTION_FAULT_ENABLE_DEFAULT, value); + tmp = REG_SET_FIELD(tmp, MMVM_L2_PROTECTION_FAULT_CNTL, + PDE2_PROTECTION_FAULT_ENABLE_DEFAULT, value); + tmp = REG_SET_FIELD(tmp, MMVM_L2_PROTECTION_FAULT_CNTL, + TRANSLATE_FURTHER_PROTECTION_FAULT_ENABLE_DEFAULT, + value); + tmp = REG_SET_FIELD(tmp, MMVM_L2_PROTECTION_FAULT_CNTL, + NACK_PROTECTION_FAULT_ENABLE_DEFAULT, value); + tmp = REG_SET_FIELD(tmp, MMVM_L2_PROTECTION_FAULT_CNTL, + DUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT, value); + tmp = REG_SET_FIELD(tmp, MMVM_L2_PROTECTION_FAULT_CNTL, + VALID_PROTECTION_FAULT_ENABLE_DEFAULT, value); + tmp = REG_SET_FIELD(tmp, MMVM_L2_PROTECTION_FAULT_CNTL, + READ_PROTECTION_FAULT_ENABLE_DEFAULT, value); + tmp = REG_SET_FIELD(tmp, MMVM_L2_PROTECTION_FAULT_CNTL, + WRITE_PROTECTION_FAULT_ENABLE_DEFAULT, value); + tmp = REG_SET_FIELD(tmp, MMVM_L2_PROTECTION_FAULT_CNTL, + EXECUTE_PROTECTION_FAULT_ENABLE_DEFAULT, value); + if (!value) { + tmp = REG_SET_FIELD(tmp, MMVM_L2_PROTECTION_FAULT_CNTL, + CRASH_ON_NO_RETRY_FAULT, 1); + tmp = REG_SET_FIELD(tmp, MMVM_L2_PROTECTION_FAULT_CNTL, + CRASH_ON_RETRY_FAULT, 1); + } + WREG32_SOC15(MMHUB, 0, regMMVM_L2_PROTECTION_FAULT_CNTL, tmp); +} + +static const struct amdgpu_vmhub_funcs mmhub_v3_0_1_vmhub_funcs = { + .print_l2_protection_fault_status = mmhub_v3_0_1_print_l2_protection_fault_status, + .get_invalidate_req = mmhub_v3_0_1_get_invalidate_req, +}; + +static void mmhub_v3_0_1_init(struct amdgpu_device *adev) +{ + struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB_0]; + + hub->ctx0_ptb_addr_lo32 = + SOC15_REG_OFFSET(MMHUB, 0, + regMMVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32); + hub->ctx0_ptb_addr_hi32 = + SOC15_REG_OFFSET(MMHUB, 0, + regMMVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_HI32); + hub->vm_inv_eng0_sem = + SOC15_REG_OFFSET(MMHUB, 0, regMMVM_INVALIDATE_ENG0_SEM); + hub->vm_inv_eng0_req = + SOC15_REG_OFFSET(MMHUB, 0, regMMVM_INVALIDATE_ENG0_REQ); + hub->vm_inv_eng0_ack = + SOC15_REG_OFFSET(MMHUB, 0, regMMVM_INVALIDATE_ENG0_ACK); + hub->vm_context0_cntl = + SOC15_REG_OFFSET(MMHUB, 0, regMMVM_CONTEXT0_CNTL); + hub->vm_l2_pro_fault_status = + SOC15_REG_OFFSET(MMHUB, 0, regMMVM_L2_PROTECTION_FAULT_STATUS); + hub->vm_l2_pro_fault_cntl = + SOC15_REG_OFFSET(MMHUB, 0, regMMVM_L2_PROTECTION_FAULT_CNTL); + + hub->ctx_distance = regMMVM_CONTEXT1_CNTL - regMMVM_CONTEXT0_CNTL; + hub->ctx_addr_distance = regMMVM_CONTEXT1_PAGE_TABLE_BASE_ADDR_LO32 - + regMMVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32; + hub->eng_distance = regMMVM_INVALIDATE_ENG1_REQ - + regMMVM_INVALIDATE_ENG0_REQ; + hub->eng_addr_distance = regMMVM_INVALIDATE_ENG1_ADDR_RANGE_LO32 - + regMMVM_INVALIDATE_ENG0_ADDR_RANGE_LO32; + + hub->vm_cntx_cntl_vm_fault = MMVM_CONTEXT1_CNTL__RANGE_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK | + MMVM_CONTEXT1_CNTL__DUMMY_PAGE_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK | + MMVM_CONTEXT1_CNTL__PDE0_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK | + MMVM_CONTEXT1_CNTL__VALID_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK | + MMVM_CONTEXT1_CNTL__READ_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK | + MMVM_CONTEXT1_CNTL__WRITE_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK | + MMVM_CONTEXT1_CNTL__EXECUTE_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK; + + hub->vmhub_funcs = &mmhub_v3_0_1_vmhub_funcs; +} + +static u64 mmhub_v3_0_1_get_fb_location(struct amdgpu_device *adev) +{ + u64 base; + + base = RREG32_SOC15(MMHUB, 0, regMMMC_VM_FB_LOCATION_BASE); + base &= MMMC_VM_FB_LOCATION_BASE__FB_BASE_MASK; + base <<= 24; + + return base; +} + +static u64 mmhub_v3_0_1_get_mc_fb_offset(struct amdgpu_device *adev) +{ + return (u64)RREG32_SOC15(MMHUB, 0, regMMMC_VM_FB_OFFSET) << 24; +} + +static void mmhub_v3_0_1_update_medium_grain_clock_gating(struct amdgpu_device *adev, + bool enable) +{ + uint32_t def, data; + + def = data = RREG32_SOC15(MMHUB, 0, regMM_ATC_L2_MISC_CG); + + if (enable) + data |= MM_ATC_L2_MISC_CG__ENABLE_MASK; + else + data &= ~MM_ATC_L2_MISC_CG__ENABLE_MASK; + + if (def != data) + WREG32_SOC15(MMHUB, 0, regMM_ATC_L2_MISC_CG, data); +} + +static void mmhub_v3_0_1_update_medium_grain_light_sleep(struct amdgpu_device *adev, + bool enable) +{ + uint32_t def, data; + + def = data = RREG32_SOC15(MMHUB, 0, regMM_ATC_L2_MISC_CG); + + if (enable) + data |= MM_ATC_L2_MISC_CG__MEM_LS_ENABLE_MASK; + else + data &= ~MM_ATC_L2_MISC_CG__MEM_LS_ENABLE_MASK; + + if (def != data) + WREG32_SOC15(MMHUB, 0, regMM_ATC_L2_MISC_CG, data); +} + +static int mmhub_v3_0_1_set_clockgating(struct amdgpu_device *adev, + enum amd_clockgating_state state) +{ + if (amdgpu_sriov_vf(adev)) + return 0; + + mmhub_v3_0_1_update_medium_grain_clock_gating(adev, + state == AMD_CG_STATE_GATE); + mmhub_v3_0_1_update_medium_grain_light_sleep(adev, + state == AMD_CG_STATE_GATE); + return 0; +} + +static void mmhub_v3_0_1_get_clockgating(struct amdgpu_device *adev, u64 *flags) +{ + int data; + + if (amdgpu_sriov_vf(adev)) + *flags = 0; + + data = RREG32_SOC15(MMHUB, 0, regMM_ATC_L2_MISC_CG); + + /* AMD_CG_SUPPORT_MC_MGCG */ + if (data & MM_ATC_L2_MISC_CG__ENABLE_MASK) + *flags |= AMD_CG_SUPPORT_MC_MGCG; + + /* AMD_CG_SUPPORT_MC_LS */ + if (data & MM_ATC_L2_MISC_CG__MEM_LS_ENABLE_MASK) + *flags |= AMD_CG_SUPPORT_MC_LS; +} + +const struct amdgpu_mmhub_funcs mmhub_v3_0_1_funcs = { + .init = mmhub_v3_0_1_init, + .get_fb_location = mmhub_v3_0_1_get_fb_location, + .get_mc_fb_offset = mmhub_v3_0_1_get_mc_fb_offset, + .gart_enable = mmhub_v3_0_1_gart_enable, + .set_fault_enable_default = mmhub_v3_0_1_set_fault_enable_default, + .gart_disable = mmhub_v3_0_1_gart_disable, + .set_clockgating = mmhub_v3_0_1_set_clockgating, + .get_clockgating = mmhub_v3_0_1_get_clockgating, + .setup_vm_pt_regs = mmhub_v3_0_1_setup_vm_pt_regs, +}; diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v3_0_1.h b/drivers/gpu/drm/amd/amdgpu/mmhub_v3_0_1.h new file mode 100644 index 000000000000..4c1246735e7d --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v3_0_1.h @@ -0,0 +1,28 @@ +/* + * Copyright 2022 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ +#ifndef __MMHUB_V3_0_1_H__ +#define __MMHUB_V3_0_1_H__ + +extern const struct amdgpu_mmhub_funcs mmhub_v3_0_1_funcs; + +#endif diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v9_4.c b/drivers/gpu/drm/amd/amdgpu/mmhub_v9_4.c index 6e0145b2b408..445cb06b9d26 100644 --- a/drivers/gpu/drm/amd/amdgpu/mmhub_v9_4.c +++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v9_4.c @@ -295,9 +295,17 @@ static void mmhub_v9_4_disable_identity_aperture(struct amdgpu_device *adev, static void mmhub_v9_4_setup_vmid_config(struct amdgpu_device *adev, int hubid) { struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB_0]; + unsigned int num_level, block_size; uint32_t tmp; int i; + num_level = adev->vm_manager.num_level; + block_size = adev->vm_manager.block_size; + if (adev->gmc.translate_further) + num_level -= 1; + else + block_size -= 9; + for (i = 0; i <= 14; i++) { tmp = RREG32_SOC15_OFFSET(MMHUB, 0, mmVML2VC0_VM_CONTEXT1_CNTL, hubid * MMHUB_INSTANCE_REGISTER_OFFSET + i); @@ -305,7 +313,7 @@ static void mmhub_v9_4_setup_vmid_config(struct amdgpu_device *adev, int hubid) ENABLE_CONTEXT, 1); tmp = REG_SET_FIELD(tmp, VML2VC0_VM_CONTEXT1_CNTL, PAGE_TABLE_DEPTH, - adev->vm_manager.num_level); + num_level); tmp = REG_SET_FIELD(tmp, VML2VC0_VM_CONTEXT1_CNTL, RANGE_PROTECTION_FAULT_ENABLE_DEFAULT, 1); tmp = REG_SET_FIELD(tmp, VML2VC0_VM_CONTEXT1_CNTL, @@ -323,7 +331,7 @@ static void mmhub_v9_4_setup_vmid_config(struct amdgpu_device *adev, int hubid) EXECUTE_PROTECTION_FAULT_ENABLE_DEFAULT, 1); tmp = REG_SET_FIELD(tmp, VML2VC0_VM_CONTEXT1_CNTL, PAGE_TABLE_BLOCK_SIZE, - adev->vm_manager.block_size - 9); + block_size); /* Send no-retry XNACK on fault to suppress VM fault storm. */ tmp = REG_SET_FIELD(tmp, VML2VC0_VM_CONTEXT1_CNTL, RETRY_PERMISSION_OR_INVALID_PAGE_FAULT, diff --git a/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c b/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c index b81acf59870c..12906ba74462 100644 --- a/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c +++ b/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c @@ -283,8 +283,16 @@ flr_done: /* Trigger recovery for world switch failure if no TDR */ if (amdgpu_device_should_recover_gpu(adev) && (!amdgpu_device_has_job_running(adev) || - adev->sdma_timeout == MAX_SCHEDULE_TIMEOUT)) - amdgpu_device_gpu_recover_imp(adev, NULL); + adev->sdma_timeout == MAX_SCHEDULE_TIMEOUT)) { + struct amdgpu_reset_context reset_context; + memset(&reset_context, 0, sizeof(reset_context)); + + reset_context.method = AMD_RESET_METHOD_NONE; + reset_context.reset_req_dev = adev; + clear_bit(AMDGPU_NEED_FULL_RESET, &reset_context.flags); + + amdgpu_device_gpu_recover(adev, NULL, &reset_context); + } } static int xgpu_ai_set_mailbox_rcv_irq(struct amdgpu_device *adev, diff --git a/drivers/gpu/drm/amd/amdgpu/mxgpu_nv.c b/drivers/gpu/drm/amd/amdgpu/mxgpu_nv.c index 22c10b97ea81..e07757eea7ad 100644 --- a/drivers/gpu/drm/amd/amdgpu/mxgpu_nv.c +++ b/drivers/gpu/drm/amd/amdgpu/mxgpu_nv.c @@ -310,8 +310,16 @@ flr_done: adev->sdma_timeout == MAX_SCHEDULE_TIMEOUT || adev->gfx_timeout == MAX_SCHEDULE_TIMEOUT || adev->compute_timeout == MAX_SCHEDULE_TIMEOUT || - adev->video_timeout == MAX_SCHEDULE_TIMEOUT)) - amdgpu_device_gpu_recover_imp(adev, NULL); + adev->video_timeout == MAX_SCHEDULE_TIMEOUT)) { + struct amdgpu_reset_context reset_context; + memset(&reset_context, 0, sizeof(reset_context)); + + reset_context.method = AMD_RESET_METHOD_NONE; + reset_context.reset_req_dev = adev; + clear_bit(AMDGPU_NEED_FULL_RESET, &reset_context.flags); + + amdgpu_device_gpu_recover(adev, NULL, &reset_context); + } } static int xgpu_nv_set_mailbox_rcv_irq(struct amdgpu_device *adev, diff --git a/drivers/gpu/drm/amd/amdgpu/mxgpu_vi.c b/drivers/gpu/drm/amd/amdgpu/mxgpu_vi.c index 7b63d30b9b79..288c414babdf 100644 --- a/drivers/gpu/drm/amd/amdgpu/mxgpu_vi.c +++ b/drivers/gpu/drm/amd/amdgpu/mxgpu_vi.c @@ -522,8 +522,16 @@ static void xgpu_vi_mailbox_flr_work(struct work_struct *work) } /* Trigger recovery due to world switch failure */ - if (amdgpu_device_should_recover_gpu(adev)) - amdgpu_device_gpu_recover_imp(adev, NULL); + if (amdgpu_device_should_recover_gpu(adev)) { + struct amdgpu_reset_context reset_context; + memset(&reset_context, 0, sizeof(reset_context)); + + reset_context.method = AMD_RESET_METHOD_NONE; + reset_context.reset_req_dev = adev; + clear_bit(AMDGPU_NEED_FULL_RESET, &reset_context.flags); + + amdgpu_device_gpu_recover(adev, NULL, &reset_context); + } } static int xgpu_vi_set_mailbox_rcv_irq(struct amdgpu_device *adev, diff --git a/drivers/gpu/drm/amd/amdgpu/navi10_ih.c b/drivers/gpu/drm/amd/amdgpu/navi10_ih.c index 4b5396d3e60f..eec13cb5bf75 100644 --- a/drivers/gpu/drm/amd/amdgpu/navi10_ih.c +++ b/drivers/gpu/drm/amd/amdgpu/navi10_ih.c @@ -409,9 +409,11 @@ static u32 navi10_ih_get_wptr(struct amdgpu_device *adev, u32 wptr, tmp; struct amdgpu_ih_regs *ih_regs; - if (ih == &adev->irq.ih) { + if (ih == &adev->irq.ih || ih == &adev->irq.ih_soft) { /* Only ring0 supports writeback. On other rings fall back * to register-based code with overflow checking below. + * ih_soft ring doesn't have any backing hardware registers, + * update wptr and return. */ wptr = le32_to_cpu(*ih->wptr_cpu); @@ -483,6 +485,9 @@ static void navi10_ih_set_rptr(struct amdgpu_device *adev, { struct amdgpu_ih_regs *ih_regs; + if (ih == &adev->irq.ih_soft) + return; + if (ih->use_doorbell) { /* XXX check if swapping is necessary on BE */ *ih->rptr_cpu = ih->rptr; diff --git a/drivers/gpu/drm/amd/amdgpu/nbio_v2_3.c b/drivers/gpu/drm/amd/amdgpu/nbio_v2_3.c index 6cd1fb2eb913..b465baa26762 100644 --- a/drivers/gpu/drm/amd/amdgpu/nbio_v2_3.c +++ b/drivers/gpu/drm/amd/amdgpu/nbio_v2_3.c @@ -328,27 +328,6 @@ const struct nbio_hdp_flush_reg nbio_v2_3_hdp_flush_reg = { .ref_and_mask_sdma1 = BIF_BX_PF_GPU_HDP_FLUSH_DONE__SDMA1_MASK, }; -const struct nbio_hdp_flush_reg nbio_v2_3_hdp_flush_reg_sc = { - .ref_and_mask_cp0 = BIF_BX_PF_GPU_HDP_FLUSH_DONE__CP0_MASK, - .ref_and_mask_cp1 = BIF_BX_PF_GPU_HDP_FLUSH_DONE__CP1_MASK, - .ref_and_mask_cp2 = BIF_BX_PF_GPU_HDP_FLUSH_DONE__CP2_MASK, - .ref_and_mask_cp3 = BIF_BX_PF_GPU_HDP_FLUSH_DONE__CP3_MASK, - .ref_and_mask_cp4 = BIF_BX_PF_GPU_HDP_FLUSH_DONE__CP4_MASK, - .ref_and_mask_cp5 = BIF_BX_PF_GPU_HDP_FLUSH_DONE__CP5_MASK, - .ref_and_mask_cp6 = BIF_BX_PF_GPU_HDP_FLUSH_DONE__CP6_MASK, - .ref_and_mask_cp7 = BIF_BX_PF_GPU_HDP_FLUSH_DONE__CP7_MASK, - .ref_and_mask_cp8 = BIF_BX_PF_GPU_HDP_FLUSH_DONE__CP8_MASK, - .ref_and_mask_cp9 = BIF_BX_PF_GPU_HDP_FLUSH_DONE__CP9_MASK, - .ref_and_mask_sdma0 = GPU_HDP_FLUSH_DONE__RSVD_ENG1_MASK, - .ref_and_mask_sdma1 = GPU_HDP_FLUSH_DONE__RSVD_ENG2_MASK, - .ref_and_mask_sdma2 = GPU_HDP_FLUSH_DONE__RSVD_ENG3_MASK, - .ref_and_mask_sdma3 = GPU_HDP_FLUSH_DONE__RSVD_ENG4_MASK, - .ref_and_mask_sdma4 = GPU_HDP_FLUSH_DONE__RSVD_ENG5_MASK, - .ref_and_mask_sdma5 = GPU_HDP_FLUSH_DONE__RSVD_ENG6_MASK, - .ref_and_mask_sdma6 = GPU_HDP_FLUSH_DONE__RSVD_ENG7_MASK, - .ref_and_mask_sdma7 = GPU_HDP_FLUSH_DONE__RSVD_ENG8_MASK, -}; - static void nbio_v2_3_init_registers(struct amdgpu_device *adev) { uint32_t def, data; @@ -547,7 +526,7 @@ static void nbio_v2_3_clear_doorbell_interrupt(struct amdgpu_device *adev) { uint32_t reg, reg_data; - if (adev->asic_type != CHIP_SIENNA_CICHLID) + if (adev->ip_versions[NBIO_HWIP][0] != IP_VERSION(3, 3, 0)) return; reg = RREG32_SOC15(NBIO, 0, mmBIF_RB_CNTL); diff --git a/drivers/gpu/drm/amd/amdgpu/nbio_v2_3.h b/drivers/gpu/drm/amd/amdgpu/nbio_v2_3.h index 6074dd3a1ed8..a43b60acf7f6 100644 --- a/drivers/gpu/drm/amd/amdgpu/nbio_v2_3.h +++ b/drivers/gpu/drm/amd/amdgpu/nbio_v2_3.h @@ -27,7 +27,6 @@ #include "soc15_common.h" extern const struct nbio_hdp_flush_reg nbio_v2_3_hdp_flush_reg; -extern const struct nbio_hdp_flush_reg nbio_v2_3_hdp_flush_reg_sc; extern const struct amdgpu_nbio_funcs nbio_v2_3_funcs; #endif diff --git a/drivers/gpu/drm/amd/amdgpu/nbio_v4_3.c b/drivers/gpu/drm/amd/amdgpu/nbio_v4_3.c index ed31d133f07a..982a89f841d5 100644 --- a/drivers/gpu/drm/amd/amdgpu/nbio_v4_3.c +++ b/drivers/gpu/drm/amd/amdgpu/nbio_v4_3.c @@ -240,8 +240,11 @@ static void nbio_v4_3_update_medium_grain_clock_gating(struct amdgpu_device *ade { uint32_t def, data; + if (enable && !(adev->cg_flags & AMD_CG_SUPPORT_BIF_MGCG)) + return; + def = data = RREG32_SOC15(NBIO, 0, regCPM_CONTROL); - if (enable && (adev->cg_flags & AMD_CG_SUPPORT_BIF_MGCG)) { + if (enable) { data |= (CPM_CONTROL__LCLK_DYN_GATE_ENABLE_MASK | CPM_CONTROL__TXCLK_DYN_GATE_ENABLE_MASK | CPM_CONTROL__TXCLK_LCNT_GATE_ENABLE_MASK | @@ -266,9 +269,12 @@ static void nbio_v4_3_update_medium_grain_light_sleep(struct amdgpu_device *adev { uint32_t def, data; + if (enable && !(adev->cg_flags & AMD_CG_SUPPORT_BIF_LS)) + return; + /* TODO: need update in future */ def = data = RREG32_SOC15(NBIO, 0, regPCIE_CNTL2); - if (enable && (adev->cg_flags & AMD_CG_SUPPORT_BIF_LS)) { + if (enable) { data |= PCIE_CNTL2__SLV_MEM_LS_EN_MASK; } else { data &= ~PCIE_CNTL2__SLV_MEM_LS_EN_MASK; @@ -344,6 +350,121 @@ static u32 nbio_v4_3_get_rom_offset(struct amdgpu_device *adev) return rom_offset; } +#ifdef CONFIG_PCIEASPM +static void nbio_v4_3_program_ltr(struct amdgpu_device *adev) +{ + uint32_t def, data; + + def = RREG32_SOC15(NBIO, 0, regRCC_EP_DEV0_0_EP_PCIE_TX_LTR_CNTL); + data = 0x35EB; + data &= ~EP_PCIE_TX_LTR_CNTL__LTR_PRIV_MSG_DIS_IN_PM_NON_D0_MASK; + data &= ~EP_PCIE_TX_LTR_CNTL__LTR_PRIV_RST_LTR_IN_DL_DOWN_MASK; + if (def != data) + WREG32_SOC15(NBIO, 0, regRCC_EP_DEV0_0_EP_PCIE_TX_LTR_CNTL, data); + + def = data = RREG32_SOC15(NBIO, 0, regRCC_STRAP0_RCC_BIF_STRAP2); + data &= ~RCC_BIF_STRAP2__STRAP_LTR_IN_ASPML1_DIS_MASK; + if (def != data) + WREG32_SOC15(NBIO, 0, regRCC_STRAP0_RCC_BIF_STRAP2, data); + + def = data = RREG32_SOC15(NBIO, 0, regBIF_CFG_DEV0_EPF0_DEVICE_CNTL2); + if (adev->pdev->ltr_path) + data |= BIF_CFG_DEV0_EPF0_DEVICE_CNTL2__LTR_EN_MASK; + else + data &= ~BIF_CFG_DEV0_EPF0_DEVICE_CNTL2__LTR_EN_MASK; + if (def != data) + WREG32_SOC15(NBIO, 0, regBIF_CFG_DEV0_EPF0_DEVICE_CNTL2, data); +} +#endif + +static void nbio_v4_3_program_aspm(struct amdgpu_device *adev) +{ +#ifdef CONFIG_PCIEASPM + uint32_t def, data; + + if (!(adev->ip_versions[PCIE_HWIP][0] == IP_VERSION(7, 4, 0)) && + !(adev->ip_versions[PCIE_HWIP][0] == IP_VERSION(7, 6, 0))) + return; + + def = data = RREG32_SOC15(NBIO, 0, regPCIE_LC_CNTL); + data &= ~PCIE_LC_CNTL__LC_L1_INACTIVITY_MASK; + data &= ~PCIE_LC_CNTL__LC_L0S_INACTIVITY_MASK; + data |= PCIE_LC_CNTL__LC_PMI_TO_L1_DIS_MASK; + if (def != data) + WREG32_SOC15(NBIO, 0, regPCIE_LC_CNTL, data); + + def = data = RREG32_SOC15(NBIO, 0, regPCIE_LC_CNTL7); + data |= PCIE_LC_CNTL7__LC_NBIF_ASPM_INPUT_EN_MASK; + if (def != data) + WREG32_SOC15(NBIO, 0, regPCIE_LC_CNTL7, data); + + def = data = RREG32_SOC15(NBIO, 0, regPCIE_LC_CNTL3); + data |= PCIE_LC_CNTL3__LC_DSC_DONT_ENTER_L23_AFTER_PME_ACK_MASK; + if (def != data) + WREG32_SOC15(NBIO, 0, regPCIE_LC_CNTL3, data); + + def = data = RREG32_SOC15(NBIO, 0, regRCC_STRAP0_RCC_BIF_STRAP3); + data &= ~RCC_BIF_STRAP3__STRAP_VLINK_ASPM_IDLE_TIMER_MASK; + data &= ~RCC_BIF_STRAP3__STRAP_VLINK_PM_L1_ENTRY_TIMER_MASK; + if (def != data) + WREG32_SOC15(NBIO, 0, regRCC_STRAP0_RCC_BIF_STRAP3, data); + + def = data = RREG32_SOC15(NBIO, 0, regRCC_STRAP0_RCC_BIF_STRAP5); + data &= ~RCC_BIF_STRAP5__STRAP_VLINK_LDN_ENTRY_TIMER_MASK; + if (def != data) + WREG32_SOC15(NBIO, 0, regRCC_STRAP0_RCC_BIF_STRAP5, data); + + def = data = RREG32_SOC15(NBIO, 0, regBIF_CFG_DEV0_EPF0_DEVICE_CNTL2); + data &= ~BIF_CFG_DEV0_EPF0_DEVICE_CNTL2__LTR_EN_MASK; + if (def != data) + WREG32_SOC15(NBIO, 0, regBIF_CFG_DEV0_EPF0_DEVICE_CNTL2, data); + + WREG32_SOC15(NBIO, 0, regBIF_CFG_DEV0_EPF0_PCIE_LTR_CAP, 0x10011001); + + def = data = RREG32_SOC15(NBIO, 0, regPSWUSP0_PCIE_LC_CNTL2); + data |= PSWUSP0_PCIE_LC_CNTL2__LC_ALLOW_PDWN_IN_L1_MASK | + PSWUSP0_PCIE_LC_CNTL2__LC_ALLOW_PDWN_IN_L23_MASK; + data &= ~PSWUSP0_PCIE_LC_CNTL2__LC_RCV_L0_TO_RCV_L0S_DIS_MASK; + if (def != data) + WREG32_SOC15(NBIO, 0, regPSWUSP0_PCIE_LC_CNTL2, data); + + def = data = RREG32_SOC15(NBIO, 0, regPCIE_LC_CNTL4); + data |= PCIE_LC_CNTL4__LC_L1_POWERDOWN_MASK; + if (def != data) + WREG32_SOC15(NBIO, 0, regPCIE_LC_CNTL4, data); + + def = data = RREG32_SOC15(NBIO, 0, regPCIE_LC_RXRECOVER_RXSTANDBY_CNTL); + data |= PCIE_LC_RXRECOVER_RXSTANDBY_CNTL__LC_RX_L0S_STANDBY_EN_MASK; + if (def != data) + WREG32_SOC15(NBIO, 0, regPCIE_LC_RXRECOVER_RXSTANDBY_CNTL, data); + + nbio_v4_3_program_ltr(adev); + + def = data = RREG32_SOC15(NBIO, 0, regRCC_STRAP0_RCC_BIF_STRAP3); + data |= 0x5DE0 << RCC_BIF_STRAP3__STRAP_VLINK_ASPM_IDLE_TIMER__SHIFT; + data |= 0x0010 << RCC_BIF_STRAP3__STRAP_VLINK_PM_L1_ENTRY_TIMER__SHIFT; + if (def != data) + WREG32_SOC15(NBIO, 0, regRCC_STRAP0_RCC_BIF_STRAP3, data); + + def = data = RREG32_SOC15(NBIO, 0, regRCC_STRAP0_RCC_BIF_STRAP5); + data |= 0x0010 << RCC_BIF_STRAP5__STRAP_VLINK_LDN_ENTRY_TIMER__SHIFT; + if (def != data) + WREG32_SOC15(NBIO, 0, regRCC_STRAP0_RCC_BIF_STRAP5, data); + + def = data = RREG32_SOC15(NBIO, 0, regPCIE_LC_CNTL); + data |= 0x0 << PCIE_LC_CNTL__LC_L0S_INACTIVITY__SHIFT; + data |= 0x9 << PCIE_LC_CNTL__LC_L1_INACTIVITY__SHIFT; + data &= ~PCIE_LC_CNTL__LC_PMI_TO_L1_DIS_MASK; + if (def != data) + WREG32_SOC15(NBIO, 0, regPCIE_LC_CNTL, data); + + def = data = RREG32_SOC15(NBIO, 0, regPCIE_LC_CNTL3); + data &= ~PCIE_LC_CNTL3__LC_DSC_DONT_ENTER_L23_AFTER_PME_ACK_MASK; + if (def != data) + WREG32_SOC15(NBIO, 0, regPCIE_LC_CNTL3, data); +#endif +} + const struct amdgpu_nbio_funcs nbio_v4_3_funcs = { .get_hdp_flush_req_offset = nbio_v4_3_get_hdp_flush_req_offset, .get_hdp_flush_done_offset = nbio_v4_3_get_hdp_flush_done_offset, @@ -365,4 +486,5 @@ const struct amdgpu_nbio_funcs nbio_v4_3_funcs = { .init_registers = nbio_v4_3_init_registers, .remap_hdp_registers = nbio_v4_3_remap_hdp_registers, .get_rom_offset = nbio_v4_3_get_rom_offset, + .program_aspm = nbio_v4_3_program_aspm, }; diff --git a/drivers/gpu/drm/amd/amdgpu/nbio_v7_4.c b/drivers/gpu/drm/amd/amdgpu/nbio_v7_4.c index 4531761dcf77..11848d1e238b 100644 --- a/drivers/gpu/drm/amd/amdgpu/nbio_v7_4.c +++ b/drivers/gpu/drm/amd/amdgpu/nbio_v7_4.c @@ -339,27 +339,6 @@ const struct nbio_hdp_flush_reg nbio_v7_4_hdp_flush_reg = { .ref_and_mask_sdma1 = GPU_HDP_FLUSH_DONE__SDMA1_MASK, }; -const struct nbio_hdp_flush_reg nbio_v7_4_hdp_flush_reg_ald = { - .ref_and_mask_cp0 = GPU_HDP_FLUSH_DONE__CP0_MASK, - .ref_and_mask_cp1 = GPU_HDP_FLUSH_DONE__CP1_MASK, - .ref_and_mask_cp2 = GPU_HDP_FLUSH_DONE__CP2_MASK, - .ref_and_mask_cp3 = GPU_HDP_FLUSH_DONE__CP3_MASK, - .ref_and_mask_cp4 = GPU_HDP_FLUSH_DONE__CP4_MASK, - .ref_and_mask_cp5 = GPU_HDP_FLUSH_DONE__CP5_MASK, - .ref_and_mask_cp6 = GPU_HDP_FLUSH_DONE__CP6_MASK, - .ref_and_mask_cp7 = GPU_HDP_FLUSH_DONE__CP7_MASK, - .ref_and_mask_cp8 = GPU_HDP_FLUSH_DONE__CP8_MASK, - .ref_and_mask_cp9 = GPU_HDP_FLUSH_DONE__CP9_MASK, - .ref_and_mask_sdma0 = GPU_HDP_FLUSH_DONE__RSVD_ENG1_MASK, - .ref_and_mask_sdma1 = GPU_HDP_FLUSH_DONE__RSVD_ENG2_MASK, - .ref_and_mask_sdma2 = GPU_HDP_FLUSH_DONE__RSVD_ENG3_MASK, - .ref_and_mask_sdma3 = GPU_HDP_FLUSH_DONE__RSVD_ENG4_MASK, - .ref_and_mask_sdma4 = GPU_HDP_FLUSH_DONE__RSVD_ENG5_MASK, - .ref_and_mask_sdma5 = GPU_HDP_FLUSH_DONE__RSVD_ENG6_MASK, - .ref_and_mask_sdma6 = GPU_HDP_FLUSH_DONE__RSVD_ENG7_MASK, - .ref_and_mask_sdma7 = GPU_HDP_FLUSH_DONE__RSVD_ENG8_MASK, -}; - static void nbio_v7_4_init_registers(struct amdgpu_device *adev) { uint32_t baco_cntl; diff --git a/drivers/gpu/drm/amd/amdgpu/nbio_v7_4.h b/drivers/gpu/drm/amd/amdgpu/nbio_v7_4.h index 7490022d79d4..f27c41728822 100644 --- a/drivers/gpu/drm/amd/amdgpu/nbio_v7_4.h +++ b/drivers/gpu/drm/amd/amdgpu/nbio_v7_4.h @@ -27,7 +27,6 @@ #include "soc15_common.h" extern const struct nbio_hdp_flush_reg nbio_v7_4_hdp_flush_reg; -extern const struct nbio_hdp_flush_reg nbio_v7_4_hdp_flush_reg_ald; extern const struct amdgpu_nbio_funcs nbio_v7_4_funcs; extern struct amdgpu_nbio_ras nbio_v7_4_ras; diff --git a/drivers/gpu/drm/amd/amdgpu/nbio_v7_7.c b/drivers/gpu/drm/amd/amdgpu/nbio_v7_7.c index cdc0c9779848..1dc95ef21da6 100644 --- a/drivers/gpu/drm/amd/amdgpu/nbio_v7_7.c +++ b/drivers/gpu/drm/amd/amdgpu/nbio_v7_7.c @@ -58,11 +58,17 @@ static void nbio_v7_7_sdma_doorbell_range(struct amdgpu_device *adev, int instan bool use_doorbell, int doorbell_index, int doorbell_size) { - u32 reg = SOC15_REG_OFFSET(NBIO, 0, regGDC0_BIF_SDMA0_DOORBELL_RANGE); + u32 reg = SOC15_REG_OFFSET(NBIO, 0, regGDC0_BIF_CSDMA_DOORBELL_RANGE); u32 doorbell_range = RREG32_PCIE_PORT(reg); if (use_doorbell) { doorbell_range = REG_SET_FIELD(doorbell_range, + GDC0_BIF_CSDMA_DOORBELL_RANGE, + OFFSET, doorbell_index); + doorbell_range = REG_SET_FIELD(doorbell_range, + GDC0_BIF_CSDMA_DOORBELL_RANGE, + SIZE, doorbell_size); + doorbell_range = REG_SET_FIELD(doorbell_range, GDC0_BIF_SDMA0_DOORBELL_RANGE, OFFSET, doorbell_index); doorbell_range = REG_SET_FIELD(doorbell_range, @@ -77,6 +83,26 @@ static void nbio_v7_7_sdma_doorbell_range(struct amdgpu_device *adev, int instan WREG32_PCIE_PORT(reg, doorbell_range); } +static void nbio_v7_7_vcn_doorbell_range(struct amdgpu_device *adev, bool use_doorbell, + int doorbell_index, int instance) +{ + u32 reg = SOC15_REG_OFFSET(NBIO, 0, regGDC0_BIF_VCN0_DOORBELL_RANGE); + u32 doorbell_range = RREG32_PCIE_PORT(reg); + + if (use_doorbell) { + doorbell_range = REG_SET_FIELD(doorbell_range, + GDC0_BIF_VCN0_DOORBELL_RANGE, OFFSET, + doorbell_index); + doorbell_range = REG_SET_FIELD(doorbell_range, + GDC0_BIF_VCN0_DOORBELL_RANGE, SIZE, 8); + } else { + doorbell_range = REG_SET_FIELD(doorbell_range, + GDC0_BIF_VCN0_DOORBELL_RANGE, SIZE, 0); + } + + WREG32_PCIE_PORT(reg, doorbell_range); +} + static void nbio_v7_7_enable_doorbell_aperture(struct amdgpu_device *adev, bool enable) { @@ -221,6 +247,81 @@ static void nbio_v7_7_init_registers(struct amdgpu_device *adev) } +static void nbio_v7_7_update_medium_grain_clock_gating(struct amdgpu_device *adev, + bool enable) +{ + uint32_t def, data; + + if (enable && !(adev->cg_flags & AMD_CG_SUPPORT_BIF_MGCG)) + return; + + def = data = RREG32_SOC15(NBIO, 0, regBIF0_CPM_CONTROL); + if (enable) { + data |= (BIF0_CPM_CONTROL__LCLK_DYN_GATE_ENABLE_MASK | + BIF0_CPM_CONTROL__TXCLK_DYN_GATE_ENABLE_MASK | + BIF0_CPM_CONTROL__TXCLK_LCNT_GATE_ENABLE_MASK | + BIF0_CPM_CONTROL__TXCLK_REGS_GATE_ENABLE_MASK | + BIF0_CPM_CONTROL__TXCLK_PRBS_GATE_ENABLE_MASK | + BIF0_CPM_CONTROL__REFCLK_REGS_GATE_ENABLE_MASK); + } else { + data &= ~(BIF0_CPM_CONTROL__LCLK_DYN_GATE_ENABLE_MASK | + BIF0_CPM_CONTROL__TXCLK_DYN_GATE_ENABLE_MASK | + BIF0_CPM_CONTROL__TXCLK_LCNT_GATE_ENABLE_MASK | + BIF0_CPM_CONTROL__TXCLK_REGS_GATE_ENABLE_MASK | + BIF0_CPM_CONTROL__TXCLK_PRBS_GATE_ENABLE_MASK | + BIF0_CPM_CONTROL__REFCLK_REGS_GATE_ENABLE_MASK); + } + + if (def != data) + WREG32_SOC15(NBIO, 0, regBIF0_CPM_CONTROL, data); +} + +static void nbio_v7_7_update_medium_grain_light_sleep(struct amdgpu_device *adev, + bool enable) +{ + uint32_t def, data; + + if (enable && !(adev->cg_flags & AMD_CG_SUPPORT_BIF_LS)) + return; + + def = data = RREG32_SOC15(NBIO, 0, regBIF0_PCIE_CNTL2); + if (enable) + data |= BIF0_PCIE_CNTL2__SLV_MEM_LS_EN_MASK; + else + data &= ~BIF0_PCIE_CNTL2__SLV_MEM_LS_EN_MASK; + + if (def != data) + WREG32_SOC15(NBIO, 0, regBIF0_PCIE_CNTL2, data); + + def = data = RREG32_SOC15(NBIO, 0, regBIF0_PCIE_TX_POWER_CTRL_1); + if (enable) { + data |= (BIF0_PCIE_TX_POWER_CTRL_1__MST_MEM_LS_EN_MASK | + BIF0_PCIE_TX_POWER_CTRL_1__REPLAY_MEM_LS_EN_MASK); + } else { + data &= ~(BIF0_PCIE_TX_POWER_CTRL_1__MST_MEM_LS_EN_MASK | + BIF0_PCIE_TX_POWER_CTRL_1__REPLAY_MEM_LS_EN_MASK); + } + + if (def != data) + WREG32_SOC15(NBIO, 0, regBIF0_PCIE_TX_POWER_CTRL_1, data); +} + +static void nbio_v7_7_get_clockgating_state(struct amdgpu_device *adev, + u64 *flags) +{ + uint32_t data; + + /* AMD_CG_SUPPORT_BIF_MGCG */ + data = RREG32_SOC15(NBIO, 0, regBIF0_CPM_CONTROL); + if (data & BIF0_CPM_CONTROL__LCLK_DYN_GATE_ENABLE_MASK) + *flags |= AMD_CG_SUPPORT_BIF_MGCG; + + /* AMD_CG_SUPPORT_BIF_LS */ + data = RREG32_SOC15(NBIO, 0, regBIF0_PCIE_CNTL2); + if (data & BIF0_PCIE_CNTL2__SLV_MEM_LS_EN_MASK) + *flags |= AMD_CG_SUPPORT_BIF_LS; +} + const struct amdgpu_nbio_funcs nbio_v7_7_funcs = { .get_hdp_flush_req_offset = nbio_v7_7_get_hdp_flush_req_offset, .get_hdp_flush_done_offset = nbio_v7_7_get_hdp_flush_done_offset, @@ -232,9 +333,13 @@ const struct amdgpu_nbio_funcs nbio_v7_7_funcs = { .mc_access_enable = nbio_v7_7_mc_access_enable, .get_memsize = nbio_v7_7_get_memsize, .sdma_doorbell_range = nbio_v7_7_sdma_doorbell_range, + .vcn_doorbell_range = nbio_v7_7_vcn_doorbell_range, .enable_doorbell_aperture = nbio_v7_7_enable_doorbell_aperture, .enable_doorbell_selfring_aperture = nbio_v7_7_enable_doorbell_selfring_aperture, .ih_doorbell_range = nbio_v7_7_ih_doorbell_range, + .update_medium_grain_clock_gating = nbio_v7_7_update_medium_grain_clock_gating, + .update_medium_grain_light_sleep = nbio_v7_7_update_medium_grain_light_sleep, + .get_clockgating_state = nbio_v7_7_get_clockgating_state, .ih_control = nbio_v7_7_ih_control, .init_registers = nbio_v7_7_init_registers, }; diff --git a/drivers/gpu/drm/amd/amdgpu/psp_gfx_if.h b/drivers/gpu/drm/amd/amdgpu/psp_gfx_if.h index 236b7a61443a..22c775f39119 100644 --- a/drivers/gpu/drm/amd/amdgpu/psp_gfx_if.h +++ b/drivers/gpu/drm/amd/amdgpu/psp_gfx_if.h @@ -259,6 +259,8 @@ enum psp_gfx_fw_type { GFX_FW_TYPE_SDMA7 = 57, /* SDMA7 MI */ GFX_FW_TYPE_VCN1 = 58, /* VCN1 MI */ GFX_FW_TYPE_CAP = 62, /* CAP_FW */ + GFX_FW_TYPE_SE2_TAP_DELAYS = 65, /* SE2 TAP DELAYS NV */ + GFX_FW_TYPE_SE3_TAP_DELAYS = 66, /* SE3 TAP DELAYS NV */ GFX_FW_TYPE_REG_LIST = 67, /* REG_LIST MI */ GFX_FW_TYPE_IMU_I = 68, /* IMU Instruction FW SOC21 */ GFX_FW_TYPE_IMU_D = 69, /* IMU Data FW SOC21 */ diff --git a/drivers/gpu/drm/amd/amdgpu/psp_v12_0.c b/drivers/gpu/drm/amd/amdgpu/psp_v12_0.c index a2588200ea58..0b2ac418e4ac 100644 --- a/drivers/gpu/drm/amd/amdgpu/psp_v12_0.c +++ b/drivers/gpu/drm/amd/amdgpu/psp_v12_0.c @@ -101,6 +101,16 @@ static int psp_v12_0_init_microcode(struct psp_context *psp) adev->psp.dtm_context.context.bin_desc.start_addr = (uint8_t *)adev->psp.hdcp_context.context.bin_desc.start_addr + le32_to_cpu(ta_hdr->dtm.offset_bytes); + + if (adev->apu_flags & AMD_APU_IS_RENOIR) { + adev->psp.securedisplay_context.context.bin_desc.fw_version = + le32_to_cpu(ta_hdr->securedisplay.fw_version); + adev->psp.securedisplay_context.context.bin_desc.size_bytes = + le32_to_cpu(ta_hdr->securedisplay.size_bytes); + adev->psp.securedisplay_context.context.bin_desc.start_addr = + (uint8_t *)adev->psp.hdcp_context.context.bin_desc.start_addr + + le32_to_cpu(ta_hdr->securedisplay.offset_bytes); + } } return 0; diff --git a/drivers/gpu/drm/amd/amdgpu/psp_v13_0.c b/drivers/gpu/drm/amd/amdgpu/psp_v13_0.c index 9e1ef81933ff..a75a286e1ecf 100644 --- a/drivers/gpu/drm/amd/amdgpu/psp_v13_0.c +++ b/drivers/gpu/drm/amd/amdgpu/psp_v13_0.c @@ -20,6 +20,8 @@ * OTHER DEALINGS IN THE SOFTWARE. * */ +#include <drm/drm_drv.h> +#include <linux/vmalloc.h> #include "amdgpu.h" #include "amdgpu_psp.h" #include "amdgpu_ucode.h" @@ -39,7 +41,9 @@ MODULE_FIRMWARE("amdgpu/psp_13_0_5_ta.bin"); MODULE_FIRMWARE("amdgpu/psp_13_0_8_toc.bin"); MODULE_FIRMWARE("amdgpu/psp_13_0_8_ta.bin"); MODULE_FIRMWARE("amdgpu/psp_13_0_0_sos.bin"); +MODULE_FIRMWARE("amdgpu/psp_13_0_0_ta.bin"); MODULE_FIRMWARE("amdgpu/psp_13_0_7_sos.bin"); +MODULE_FIRMWARE("amdgpu/psp_13_0_7_ta.bin"); /* For large FW files the time to complete can be very long */ #define USBC_PD_POLLING_LIMIT_S 240 @@ -56,6 +60,9 @@ MODULE_FIRMWARE("amdgpu/psp_13_0_7_sos.bin"); #define C2PMSG_CMD_SPI_UPDATE_ROM_IMAGE_ADDR_HI 0x3 #define C2PMSG_CMD_SPI_UPDATE_FLASH_IMAGE 0x4 +/* memory training timeout define */ +#define MEM_TRAIN_SEND_MSG_TIMEOUT_US 3000000 + static int psp_v13_0_init_microcode(struct psp_context *psp) { struct amdgpu_device *adev = psp->adev; @@ -105,6 +112,10 @@ static int psp_v13_0_init_microcode(struct psp_context *psp) err = psp_init_sos_microcode(psp, chip_name); if (err) return err; + /* It's not necessary to load ras ta on Guest side */ + err = psp_init_ta_microcode(psp, chip_name); + if (err) + return err; break; default: BUG(); @@ -413,6 +424,159 @@ static void psp_v13_0_ring_set_wptr(struct psp_context *psp, uint32_t value) WREG32_SOC15(MP0, 0, regMP0_SMN_C2PMSG_67, value); } +static int psp_v13_0_memory_training_send_msg(struct psp_context *psp, int msg) +{ + int ret; + int i; + uint32_t data_32; + int max_wait; + struct amdgpu_device *adev = psp->adev; + + data_32 = (psp->mem_train_ctx.c2p_train_data_offset >> 20); + WREG32_SOC15(MP0, 0, regMP0_SMN_C2PMSG_36, data_32); + WREG32_SOC15(MP0, 0, regMP0_SMN_C2PMSG_35, msg); + + max_wait = MEM_TRAIN_SEND_MSG_TIMEOUT_US / adev->usec_timeout; + for (i = 0; i < max_wait; i++) { + ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, regMP0_SMN_C2PMSG_35), + 0x80000000, 0x80000000, false); + if (ret == 0) + break; + } + if (i < max_wait) + ret = 0; + else + ret = -ETIME; + + dev_dbg(adev->dev, "training %s %s, cost %d @ %d ms\n", + (msg == PSP_BL__DRAM_SHORT_TRAIN) ? "short" : "long", + (ret == 0) ? "succeed" : "failed", + i, adev->usec_timeout/1000); + return ret; +} + + +static int psp_v13_0_memory_training(struct psp_context *psp, uint32_t ops) +{ + struct psp_memory_training_context *ctx = &psp->mem_train_ctx; + uint32_t *pcache = (uint32_t *)ctx->sys_cache; + struct amdgpu_device *adev = psp->adev; + uint32_t p2c_header[4]; + uint32_t sz; + void *buf; + int ret, idx; + + if (ctx->init == PSP_MEM_TRAIN_NOT_SUPPORT) { + dev_dbg(adev->dev, "Memory training is not supported.\n"); + return 0; + } else if (ctx->init != PSP_MEM_TRAIN_INIT_SUCCESS) { + dev_err(adev->dev, "Memory training initialization failure.\n"); + return -EINVAL; + } + + if (psp_v13_0_is_sos_alive(psp)) { + dev_dbg(adev->dev, "SOS is alive, skip memory training.\n"); + return 0; + } + + amdgpu_device_vram_access(adev, ctx->p2c_train_data_offset, p2c_header, sizeof(p2c_header), false); + dev_dbg(adev->dev, "sys_cache[%08x,%08x,%08x,%08x] p2c_header[%08x,%08x,%08x,%08x]\n", + pcache[0], pcache[1], pcache[2], pcache[3], + p2c_header[0], p2c_header[1], p2c_header[2], p2c_header[3]); + + if (ops & PSP_MEM_TRAIN_SEND_SHORT_MSG) { + dev_dbg(adev->dev, "Short training depends on restore.\n"); + ops |= PSP_MEM_TRAIN_RESTORE; + } + + if ((ops & PSP_MEM_TRAIN_RESTORE) && + pcache[0] != MEM_TRAIN_SYSTEM_SIGNATURE) { + dev_dbg(adev->dev, "sys_cache[0] is invalid, restore depends on save.\n"); + ops |= PSP_MEM_TRAIN_SAVE; + } + + if (p2c_header[0] == MEM_TRAIN_SYSTEM_SIGNATURE && + !(pcache[0] == MEM_TRAIN_SYSTEM_SIGNATURE && + pcache[3] == p2c_header[3])) { + dev_dbg(adev->dev, "sys_cache is invalid or out-of-date, need save training data to sys_cache.\n"); + ops |= PSP_MEM_TRAIN_SAVE; + } + + if ((ops & PSP_MEM_TRAIN_SAVE) && + p2c_header[0] != MEM_TRAIN_SYSTEM_SIGNATURE) { + dev_dbg(adev->dev, "p2c_header[0] is invalid, save depends on long training.\n"); + ops |= PSP_MEM_TRAIN_SEND_LONG_MSG; + } + + if (ops & PSP_MEM_TRAIN_SEND_LONG_MSG) { + ops &= ~PSP_MEM_TRAIN_SEND_SHORT_MSG; + ops |= PSP_MEM_TRAIN_SAVE; + } + + dev_dbg(adev->dev, "Memory training ops:%x.\n", ops); + + if (ops & PSP_MEM_TRAIN_SEND_LONG_MSG) { + /* + * Long training will encroach a certain amount on the bottom of VRAM; + * save the content from the bottom of VRAM to system memory + * before training, and restore it after training to avoid + * VRAM corruption. + */ + sz = GDDR6_MEM_TRAINING_ENCROACHED_SIZE; + + if (adev->gmc.visible_vram_size < sz || !adev->mman.aper_base_kaddr) { + dev_err(adev->dev, "visible_vram_size %llx or aper_base_kaddr %p is not initialized.\n", + adev->gmc.visible_vram_size, + adev->mman.aper_base_kaddr); + return -EINVAL; + } + + buf = vmalloc(sz); + if (!buf) { + dev_err(adev->dev, "failed to allocate system memory.\n"); + return -ENOMEM; + } + + if (drm_dev_enter(adev_to_drm(adev), &idx)) { + memcpy_fromio(buf, adev->mman.aper_base_kaddr, sz); + ret = psp_v13_0_memory_training_send_msg(psp, PSP_BL__DRAM_LONG_TRAIN); + if (ret) { + DRM_ERROR("Send long training msg failed.\n"); + vfree(buf); + drm_dev_exit(idx); + return ret; + } + + memcpy_toio(adev->mman.aper_base_kaddr, buf, sz); + adev->hdp.funcs->flush_hdp(adev, NULL); + vfree(buf); + drm_dev_exit(idx); + } else { + vfree(buf); + return -ENODEV; + } + } + + if (ops & PSP_MEM_TRAIN_SAVE) { + amdgpu_device_vram_access(psp->adev, ctx->p2c_train_data_offset, ctx->sys_cache, ctx->train_data_size, false); + } + + if (ops & PSP_MEM_TRAIN_RESTORE) { + amdgpu_device_vram_access(psp->adev, ctx->c2p_train_data_offset, ctx->sys_cache, ctx->train_data_size, true); + } + + if (ops & PSP_MEM_TRAIN_SEND_SHORT_MSG) { + ret = psp_v13_0_memory_training_send_msg(psp, (amdgpu_force_long_training > 0) ? + PSP_BL__DRAM_LONG_TRAIN : PSP_BL__DRAM_SHORT_TRAIN); + if (ret) { + dev_err(adev->dev, "send training msg failed.\n"); + return ret; + } + } + ctx->training_cnt++; + return 0; +} + static int psp_v13_0_load_usbc_pd_fw(struct psp_context *psp, uint64_t fw_pri_mc_addr) { struct amdgpu_device *adev = psp->adev; @@ -561,6 +725,7 @@ static const struct psp_funcs psp_v13_0_funcs = { .ring_destroy = psp_v13_0_ring_destroy, .ring_get_wptr = psp_v13_0_ring_get_wptr, .ring_set_wptr = psp_v13_0_ring_set_wptr, + .mem_training = psp_v13_0_memory_training, .load_usbc_pd_fw = psp_v13_0_load_usbc_pd_fw, .read_usbc_pd_fw = psp_v13_0_read_usbc_pd_fw, .update_spirom = psp_v13_0_update_spirom, diff --git a/drivers/gpu/drm/amd/amdgpu/psp_v13_0_4.c b/drivers/gpu/drm/amd/amdgpu/psp_v13_0_4.c new file mode 100644 index 000000000000..321089dfa7db --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/psp_v13_0_4.c @@ -0,0 +1,387 @@ +/* + * Copyright 2020 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ +#include "amdgpu.h" +#include "amdgpu_psp.h" +#include "amdgpu_ucode.h" +#include "soc15_common.h" +#include "psp_v13_0_4.h" + +#include "mp/mp_13_0_4_offset.h" +#include "mp/mp_13_0_4_sh_mask.h" + +MODULE_FIRMWARE("amdgpu/psp_13_0_4_toc.bin"); +MODULE_FIRMWARE("amdgpu/psp_13_0_4_ta.bin"); + +static int psp_v13_0_4_init_microcode(struct psp_context *psp) +{ + struct amdgpu_device *adev = psp->adev; + const char *chip_name; + char ucode_prefix[30]; + int err = 0; + + switch (adev->ip_versions[MP0_HWIP][0]) { + case IP_VERSION(13, 0, 4): + amdgpu_ucode_ip_version_decode(adev, MP0_HWIP, ucode_prefix, sizeof(ucode_prefix)); + chip_name = ucode_prefix; + break; + default: + BUG(); + } + + switch (adev->ip_versions[MP0_HWIP][0]) { + case IP_VERSION(13, 0, 4): + err = psp_init_toc_microcode(psp, chip_name); + if (err) + return err; + err = psp_init_ta_microcode(psp, chip_name); + if (err) + return err; + break; + default: + BUG(); + } + + return 0; +} + +static bool psp_v13_0_4_is_sos_alive(struct psp_context *psp) +{ + struct amdgpu_device *adev = psp->adev; + uint32_t sol_reg; + + sol_reg = RREG32_SOC15(MP0, 0, regMP0_SMN_C2PMSG_81); + + return sol_reg != 0x0; +} + +static int psp_v13_0_4_wait_for_bootloader(struct psp_context *psp) +{ + struct amdgpu_device *adev = psp->adev; + + int ret; + int retry_loop; + + for (retry_loop = 0; retry_loop < 10; retry_loop++) { + /* Wait for bootloader to signify that is + ready having bit 31 of C2PMSG_35 set to 1 */ + ret = psp_wait_for(psp, + SOC15_REG_OFFSET(MP0, 0, regMP0_SMN_C2PMSG_35), + 0x80000000, + 0x80000000, + false); + + if (ret == 0) + return 0; + } + + return ret; +} + +static int psp_v13_0_4_bootloader_load_component(struct psp_context *psp, + struct psp_bin_desc *bin_desc, + enum psp_bootloader_cmd bl_cmd) +{ + int ret; + uint32_t psp_gfxdrv_command_reg = 0; + struct amdgpu_device *adev = psp->adev; + + /* Check tOS sign of life register to confirm sys driver and sOS + * are already been loaded. + */ + if (psp_v13_0_4_is_sos_alive(psp)) + return 0; + + ret = psp_v13_0_4_wait_for_bootloader(psp); + if (ret) + return ret; + + memset(psp->fw_pri_buf, 0, PSP_1_MEG); + + /* Copy PSP KDB binary to memory */ + memcpy(psp->fw_pri_buf, bin_desc->start_addr, bin_desc->size_bytes); + + /* Provide the PSP KDB to bootloader */ + WREG32_SOC15(MP0, 0, regMP0_SMN_C2PMSG_36, + (uint32_t)(psp->fw_pri_mc_addr >> 20)); + psp_gfxdrv_command_reg = bl_cmd; + WREG32_SOC15(MP0, 0, regMP0_SMN_C2PMSG_35, + psp_gfxdrv_command_reg); + + ret = psp_v13_0_4_wait_for_bootloader(psp); + + return ret; +} + +static int psp_v13_0_4_bootloader_load_kdb(struct psp_context *psp) +{ + return psp_v13_0_4_bootloader_load_component(psp, &psp->kdb, PSP_BL__LOAD_KEY_DATABASE); +} + +static int psp_v13_0_4_bootloader_load_spl(struct psp_context *psp) +{ + return psp_v13_0_4_bootloader_load_component(psp, &psp->kdb, PSP_BL__LOAD_TOS_SPL_TABLE); +} + +static int psp_v13_0_4_bootloader_load_sysdrv(struct psp_context *psp) +{ + return psp_v13_0_4_bootloader_load_component(psp, &psp->sys, PSP_BL__LOAD_SYSDRV); +} + +static int psp_v13_0_4_bootloader_load_soc_drv(struct psp_context *psp) +{ + return psp_v13_0_4_bootloader_load_component(psp, &psp->soc_drv, PSP_BL__LOAD_SOCDRV); +} + +static int psp_v13_0_4_bootloader_load_intf_drv(struct psp_context *psp) +{ + return psp_v13_0_4_bootloader_load_component(psp, &psp->intf_drv, PSP_BL__LOAD_INTFDRV); +} + +static int psp_v13_0_4_bootloader_load_dbg_drv(struct psp_context *psp) +{ + return psp_v13_0_4_bootloader_load_component(psp, &psp->dbg_drv, PSP_BL__LOAD_DBGDRV); +} + +static int psp_v13_0_4_bootloader_load_sos(struct psp_context *psp) +{ + int ret; + unsigned int psp_gfxdrv_command_reg = 0; + struct amdgpu_device *adev = psp->adev; + + /* Check sOS sign of life register to confirm sys driver and sOS + * are already been loaded. + */ + if (psp_v13_0_4_is_sos_alive(psp)) + return 0; + + ret = psp_v13_0_4_wait_for_bootloader(psp); + if (ret) + return ret; + + memset(psp->fw_pri_buf, 0, PSP_1_MEG); + + /* Copy Secure OS binary to PSP memory */ + memcpy(psp->fw_pri_buf, psp->sos.start_addr, psp->sos.size_bytes); + + /* Provide the PSP secure OS to bootloader */ + WREG32_SOC15(MP0, 0, regMP0_SMN_C2PMSG_36, + (uint32_t)(psp->fw_pri_mc_addr >> 20)); + psp_gfxdrv_command_reg = PSP_BL__LOAD_SOSDRV; + WREG32_SOC15(MP0, 0, regMP0_SMN_C2PMSG_35, + psp_gfxdrv_command_reg); + + /* there might be handshake issue with hardware which needs delay */ + mdelay(20); + ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, regMP0_SMN_C2PMSG_81), + RREG32_SOC15(MP0, 0, regMP0_SMN_C2PMSG_81), + 0, true); + + return ret; +} + +static int psp_v13_0_4_ring_init(struct psp_context *psp, + enum psp_ring_type ring_type) +{ + int ret = 0; + struct psp_ring *ring; + struct amdgpu_device *adev = psp->adev; + + ring = &psp->km_ring; + + ring->ring_type = ring_type; + + /* allocate 4k Page of Local Frame Buffer memory for ring */ + ring->ring_size = 0x1000; + ret = amdgpu_bo_create_kernel(adev, ring->ring_size, PAGE_SIZE, + AMDGPU_GEM_DOMAIN_VRAM, + &adev->firmware.rbuf, + &ring->ring_mem_mc_addr, + (void **)&ring->ring_mem); + if (ret) { + ring->ring_size = 0; + return ret; + } + + return 0; +} + +static int psp_v13_0_4_ring_stop(struct psp_context *psp, + enum psp_ring_type ring_type) +{ + int ret = 0; + struct amdgpu_device *adev = psp->adev; + + if (amdgpu_sriov_vf(adev)) { + /* Write the ring destroy command*/ + WREG32_SOC15(MP0, 0, regMP0_SMN_C2PMSG_101, + GFX_CTRL_CMD_ID_DESTROY_GPCOM_RING); + /* there might be handshake issue with hardware which needs delay */ + mdelay(20); + /* Wait for response flag (bit 31) */ + ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, regMP0_SMN_C2PMSG_101), + 0x80000000, 0x80000000, false); + } else { + /* Write the ring destroy command*/ + WREG32_SOC15(MP0, 0, regMP0_SMN_C2PMSG_64, + GFX_CTRL_CMD_ID_DESTROY_RINGS); + /* there might be handshake issue with hardware which needs delay */ + mdelay(20); + /* Wait for response flag (bit 31) */ + ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, regMP0_SMN_C2PMSG_64), + 0x80000000, 0x80000000, false); + } + + return ret; +} + +static int psp_v13_0_4_ring_create(struct psp_context *psp, + enum psp_ring_type ring_type) +{ + int ret = 0; + unsigned int psp_ring_reg = 0; + struct psp_ring *ring = &psp->km_ring; + struct amdgpu_device *adev = psp->adev; + + if (amdgpu_sriov_vf(adev)) { + ret = psp_v13_0_4_ring_stop(psp, ring_type); + if (ret) { + DRM_ERROR("psp_v13_0_ring_stop_sriov failed!\n"); + return ret; + } + + /* Write low address of the ring to C2PMSG_102 */ + psp_ring_reg = lower_32_bits(ring->ring_mem_mc_addr); + WREG32_SOC15(MP0, 0, regMP0_SMN_C2PMSG_102, psp_ring_reg); + /* Write high address of the ring to C2PMSG_103 */ + psp_ring_reg = upper_32_bits(ring->ring_mem_mc_addr); + WREG32_SOC15(MP0, 0, regMP0_SMN_C2PMSG_103, psp_ring_reg); + + /* Write the ring initialization command to C2PMSG_101 */ + WREG32_SOC15(MP0, 0, regMP0_SMN_C2PMSG_101, + GFX_CTRL_CMD_ID_INIT_GPCOM_RING); + + /* there might be handshake issue with hardware which needs delay */ + mdelay(20); + + /* Wait for response flag (bit 31) in C2PMSG_101 */ + ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, regMP0_SMN_C2PMSG_101), + 0x80000000, 0x8000FFFF, false); + + } else { + /* Wait for sOS ready for ring creation */ + ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, regMP0_SMN_C2PMSG_64), + 0x80000000, 0x80000000, false); + if (ret) { + DRM_ERROR("Failed to wait for trust OS ready for ring creation\n"); + return ret; + } + + /* Write low address of the ring to C2PMSG_69 */ + psp_ring_reg = lower_32_bits(ring->ring_mem_mc_addr); + WREG32_SOC15(MP0, 0, regMP0_SMN_C2PMSG_69, psp_ring_reg); + /* Write high address of the ring to C2PMSG_70 */ + psp_ring_reg = upper_32_bits(ring->ring_mem_mc_addr); + WREG32_SOC15(MP0, 0, regMP0_SMN_C2PMSG_70, psp_ring_reg); + /* Write size of ring to C2PMSG_71 */ + psp_ring_reg = ring->ring_size; + WREG32_SOC15(MP0, 0, regMP0_SMN_C2PMSG_71, psp_ring_reg); + /* Write the ring initialization command to C2PMSG_64 */ + psp_ring_reg = ring_type; + psp_ring_reg = psp_ring_reg << 16; + WREG32_SOC15(MP0, 0, regMP0_SMN_C2PMSG_64, psp_ring_reg); + + /* there might be handshake issue with hardware which needs delay */ + mdelay(20); + + /* Wait for response flag (bit 31) in C2PMSG_64 */ + ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, regMP0_SMN_C2PMSG_64), + 0x80000000, 0x8000FFFF, false); + } + + return ret; +} + +static int psp_v13_0_4_ring_destroy(struct psp_context *psp, + enum psp_ring_type ring_type) +{ + int ret = 0; + struct psp_ring *ring = &psp->km_ring; + struct amdgpu_device *adev = psp->adev; + + ret = psp_v13_0_4_ring_stop(psp, ring_type); + if (ret) + DRM_ERROR("Fail to stop psp ring\n"); + + amdgpu_bo_free_kernel(&adev->firmware.rbuf, + &ring->ring_mem_mc_addr, + (void **)&ring->ring_mem); + + return ret; +} + +static uint32_t psp_v13_0_4_ring_get_wptr(struct psp_context *psp) +{ + uint32_t data; + struct amdgpu_device *adev = psp->adev; + + if (amdgpu_sriov_vf(adev)) + data = RREG32_SOC15(MP0, 0, regMP0_SMN_C2PMSG_102); + else + data = RREG32_SOC15(MP0, 0, regMP0_SMN_C2PMSG_67); + + return data; +} + +static void psp_v13_0_4_ring_set_wptr(struct psp_context *psp, uint32_t value) +{ + struct amdgpu_device *adev = psp->adev; + + if (amdgpu_sriov_vf(adev)) { + WREG32_SOC15(MP0, 0, regMP0_SMN_C2PMSG_102, value); + WREG32_SOC15(MP0, 0, regMP0_SMN_C2PMSG_101, + GFX_CTRL_CMD_ID_CONSUME_CMD); + } else + WREG32_SOC15(MP0, 0, regMP0_SMN_C2PMSG_67, value); +} + +static const struct psp_funcs psp_v13_0_4_funcs = { + .init_microcode = psp_v13_0_4_init_microcode, + .bootloader_load_kdb = psp_v13_0_4_bootloader_load_kdb, + .bootloader_load_spl = psp_v13_0_4_bootloader_load_spl, + .bootloader_load_sysdrv = psp_v13_0_4_bootloader_load_sysdrv, + .bootloader_load_soc_drv = psp_v13_0_4_bootloader_load_soc_drv, + .bootloader_load_intf_drv = psp_v13_0_4_bootloader_load_intf_drv, + .bootloader_load_dbg_drv = psp_v13_0_4_bootloader_load_dbg_drv, + .bootloader_load_sos = psp_v13_0_4_bootloader_load_sos, + .ring_init = psp_v13_0_4_ring_init, + .ring_create = psp_v13_0_4_ring_create, + .ring_stop = psp_v13_0_4_ring_stop, + .ring_destroy = psp_v13_0_4_ring_destroy, + .ring_get_wptr = psp_v13_0_4_ring_get_wptr, + .ring_set_wptr = psp_v13_0_4_ring_set_wptr, +}; + +void psp_v13_0_4_set_psp_funcs(struct psp_context *psp) +{ + psp->funcs = &psp_v13_0_4_funcs; +} diff --git a/drivers/gpu/drm/amd/amdgpu/psp_v13_0_4.h b/drivers/gpu/drm/amd/amdgpu/psp_v13_0_4.h new file mode 100644 index 000000000000..8547b8d514d5 --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/psp_v13_0_4.h @@ -0,0 +1,30 @@ +/* + * Copyright 2020 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ +#ifndef __PSP_V13_0_4_H__ +#define __PSP_V13_0_4_H__ + +#include "amdgpu_psp.h" + +void psp_v13_0_4_set_psp_funcs(struct psp_context *psp); + +#endif diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c index 1f9021f896a1..a019ac92edb7 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c @@ -389,34 +389,67 @@ static uint64_t sdma_v5_0_ring_get_wptr(struct amdgpu_ring *ring) static void sdma_v5_0_ring_set_wptr(struct amdgpu_ring *ring) { struct amdgpu_device *adev = ring->adev; + uint32_t *wptr_saved; + uint32_t *is_queue_unmap; + uint64_t aggregated_db_index; + uint32_t mqd_size = adev->mqds[AMDGPU_HW_IP_DMA].mqd_size; DRM_DEBUG("Setting write pointer\n"); - if (ring->use_doorbell) { - DRM_DEBUG("Using doorbell -- " - "wptr_offs == 0x%08x " - "lower_32_bits(ring->wptr << 2) == 0x%08x " - "upper_32_bits(ring->wptr << 2) == 0x%08x\n", - ring->wptr_offs, - lower_32_bits(ring->wptr << 2), - upper_32_bits(ring->wptr << 2)); - /* XXX check if swapping is necessary on BE */ + if (ring->is_mes_queue) { + wptr_saved = (uint32_t *)(ring->mqd_ptr + mqd_size); + is_queue_unmap = (uint32_t *)(ring->mqd_ptr + mqd_size + + sizeof(uint32_t)); + aggregated_db_index = + amdgpu_mes_get_aggregated_doorbell_index(adev, + AMDGPU_MES_PRIORITY_LEVEL_NORMAL); + atomic64_set((atomic64_t *)ring->wptr_cpu_addr, ring->wptr << 2); - DRM_DEBUG("calling WDOORBELL64(0x%08x, 0x%016llx)\n", - ring->doorbell_index, ring->wptr << 2); - WDOORBELL64(ring->doorbell_index, ring->wptr << 2); + *wptr_saved = ring->wptr << 2; + if (*is_queue_unmap) { + WDOORBELL64(aggregated_db_index, ring->wptr << 2); + DRM_DEBUG("calling WDOORBELL64(0x%08x, 0x%016llx)\n", + ring->doorbell_index, ring->wptr << 2); + WDOORBELL64(ring->doorbell_index, ring->wptr << 2); + } else { + DRM_DEBUG("calling WDOORBELL64(0x%08x, 0x%016llx)\n", + ring->doorbell_index, ring->wptr << 2); + WDOORBELL64(ring->doorbell_index, ring->wptr << 2); + + if (*is_queue_unmap) + WDOORBELL64(aggregated_db_index, + ring->wptr << 2); + } } else { - DRM_DEBUG("Not using doorbell -- " - "mmSDMA%i_GFX_RB_WPTR == 0x%08x " - "mmSDMA%i_GFX_RB_WPTR_HI == 0x%08x\n", - ring->me, - lower_32_bits(ring->wptr << 2), - ring->me, - upper_32_bits(ring->wptr << 2)); - WREG32_SOC15_IP(GC, sdma_v5_0_get_reg_offset(adev, ring->me, mmSDMA0_GFX_RB_WPTR), - lower_32_bits(ring->wptr << 2)); - WREG32_SOC15_IP(GC, sdma_v5_0_get_reg_offset(adev, ring->me, mmSDMA0_GFX_RB_WPTR_HI), - upper_32_bits(ring->wptr << 2)); + if (ring->use_doorbell) { + DRM_DEBUG("Using doorbell -- " + "wptr_offs == 0x%08x " + "lower_32_bits(ring->wptr) << 2 == 0x%08x " + "upper_32_bits(ring->wptr) << 2 == 0x%08x\n", + ring->wptr_offs, + lower_32_bits(ring->wptr << 2), + upper_32_bits(ring->wptr << 2)); + /* XXX check if swapping is necessary on BE */ + atomic64_set((atomic64_t *)ring->wptr_cpu_addr, + ring->wptr << 2); + DRM_DEBUG("calling WDOORBELL64(0x%08x, 0x%016llx)\n", + ring->doorbell_index, ring->wptr << 2); + WDOORBELL64(ring->doorbell_index, ring->wptr << 2); + } else { + DRM_DEBUG("Not using doorbell -- " + "mmSDMA%i_GFX_RB_WPTR == 0x%08x " + "mmSDMA%i_GFX_RB_WPTR_HI == 0x%08x\n", + ring->me, + lower_32_bits(ring->wptr << 2), + ring->me, + upper_32_bits(ring->wptr << 2)); + WREG32_SOC15_IP(GC, sdma_v5_0_get_reg_offset(adev, + ring->me, mmSDMA0_GFX_RB_WPTR), + lower_32_bits(ring->wptr << 2)); + WREG32_SOC15_IP(GC, sdma_v5_0_get_reg_offset(adev, + ring->me, mmSDMA0_GFX_RB_WPTR_HI), + upper_32_bits(ring->wptr << 2)); + } } } diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c index 8cfaed55b192..0200cb3a31a4 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c @@ -57,6 +57,7 @@ static void sdma_v6_0_set_ring_funcs(struct amdgpu_device *adev); static void sdma_v6_0_set_buffer_funcs(struct amdgpu_device *adev); static void sdma_v6_0_set_vm_pte_funcs(struct amdgpu_device *adev); static void sdma_v6_0_set_irq_funcs(struct amdgpu_device *adev); +static int sdma_v6_0_start(struct amdgpu_device *adev); static u32 sdma_v6_0_get_reg_offset(struct amdgpu_device *adev, u32 instance, u32 internal_offset) { @@ -245,34 +246,68 @@ static uint64_t sdma_v6_0_ring_get_wptr(struct amdgpu_ring *ring) static void sdma_v6_0_ring_set_wptr(struct amdgpu_ring *ring) { struct amdgpu_device *adev = ring->adev; + uint32_t *wptr_saved; + uint32_t *is_queue_unmap; + uint64_t aggregated_db_index; + uint32_t mqd_size = adev->mqds[AMDGPU_HW_IP_DMA].mqd_size; DRM_DEBUG("Setting write pointer\n"); - if (ring->use_doorbell) { - DRM_DEBUG("Using doorbell -- " - "wptr_offs == 0x%08x " - "lower_32_bits(ring->wptr) << 2 == 0x%08x " - "upper_32_bits(ring->wptr) << 2 == 0x%08x\n", - ring->wptr_offs, - lower_32_bits(ring->wptr << 2), - upper_32_bits(ring->wptr << 2)); - /* XXX check if swapping is necessary on BE */ + + if (ring->is_mes_queue) { + wptr_saved = (uint32_t *)(ring->mqd_ptr + mqd_size); + is_queue_unmap = (uint32_t *)(ring->mqd_ptr + mqd_size + + sizeof(uint32_t)); + aggregated_db_index = + amdgpu_mes_get_aggregated_doorbell_index(adev, + ring->hw_prio); + atomic64_set((atomic64_t *)ring->wptr_cpu_addr, ring->wptr << 2); - DRM_DEBUG("calling WDOORBELL64(0x%08x, 0x%016llx)\n", - ring->doorbell_index, ring->wptr << 2); - WDOORBELL64(ring->doorbell_index, ring->wptr << 2); + *wptr_saved = ring->wptr << 2; + if (*is_queue_unmap) { + WDOORBELL64(aggregated_db_index, ring->wptr << 2); + DRM_DEBUG("calling WDOORBELL64(0x%08x, 0x%016llx)\n", + ring->doorbell_index, ring->wptr << 2); + WDOORBELL64(ring->doorbell_index, ring->wptr << 2); + } else { + DRM_DEBUG("calling WDOORBELL64(0x%08x, 0x%016llx)\n", + ring->doorbell_index, ring->wptr << 2); + WDOORBELL64(ring->doorbell_index, ring->wptr << 2); + + if (*is_queue_unmap) + WDOORBELL64(aggregated_db_index, + ring->wptr << 2); + } } else { - DRM_DEBUG("Not using doorbell -- " - "regSDMA%i_GFX_RB_WPTR == 0x%08x " - "regSDMA%i_GFX_RB_WPTR_HI == 0x%08x\n", - ring->me, - lower_32_bits(ring->wptr << 2), - ring->me, - upper_32_bits(ring->wptr << 2)); - WREG32_SOC15_IP(GC, sdma_v6_0_get_reg_offset(adev, ring->me, regSDMA0_QUEUE0_RB_WPTR), - lower_32_bits(ring->wptr << 2)); - WREG32_SOC15_IP(GC, sdma_v6_0_get_reg_offset(adev, ring->me, regSDMA0_QUEUE0_RB_WPTR_HI), - upper_32_bits(ring->wptr << 2)); + if (ring->use_doorbell) { + DRM_DEBUG("Using doorbell -- " + "wptr_offs == 0x%08x " + "lower_32_bits(ring->wptr) << 2 == 0x%08x " + "upper_32_bits(ring->wptr) << 2 == 0x%08x\n", + ring->wptr_offs, + lower_32_bits(ring->wptr << 2), + upper_32_bits(ring->wptr << 2)); + /* XXX check if swapping is necessary on BE */ + atomic64_set((atomic64_t *)ring->wptr_cpu_addr, + ring->wptr << 2); + DRM_DEBUG("calling WDOORBELL64(0x%08x, 0x%016llx)\n", + ring->doorbell_index, ring->wptr << 2); + WDOORBELL64(ring->doorbell_index, ring->wptr << 2); + } else { + DRM_DEBUG("Not using doorbell -- " + "regSDMA%i_GFX_RB_WPTR == 0x%08x " + "regSDMA%i_GFX_RB_WPTR_HI == 0x%08x\n", + ring->me, + lower_32_bits(ring->wptr << 2), + ring->me, + upper_32_bits(ring->wptr << 2)); + WREG32_SOC15_IP(GC, sdma_v6_0_get_reg_offset(adev, + ring->me, regSDMA0_QUEUE0_RB_WPTR), + lower_32_bits(ring->wptr << 2)); + WREG32_SOC15_IP(GC, sdma_v6_0_get_reg_offset(adev, + ring->me, regSDMA0_QUEUE0_RB_WPTR_HI), + upper_32_bits(ring->wptr << 2)); + } } } @@ -771,32 +806,54 @@ static int sdma_v6_0_load_microcode(struct amdgpu_device *adev) static int sdma_v6_0_soft_reset(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; - u32 grbm_soft_reset; u32 tmp; int i; + sdma_v6_0_gfx_stop(adev); + for (i = 0; i < adev->sdma.num_instances; i++) { - grbm_soft_reset = REG_SET_FIELD(0, - GRBM_SOFT_RESET, SOFT_RESET_SDMA0, - 1); - grbm_soft_reset <<= i; + tmp = RREG32_SOC15_IP(GC, sdma_v6_0_get_reg_offset(adev, i, regSDMA0_FREEZE)); + tmp |= SDMA0_FREEZE__FREEZE_MASK; + WREG32_SOC15_IP(GC, sdma_v6_0_get_reg_offset(adev, i, regSDMA0_FREEZE), tmp); + tmp = RREG32_SOC15_IP(GC, sdma_v6_0_get_reg_offset(adev, i, regSDMA0_F32_CNTL)); + tmp |= SDMA0_F32_CNTL__HALT_MASK; + tmp |= SDMA0_F32_CNTL__TH1_RESET_MASK; + WREG32_SOC15_IP(GC, sdma_v6_0_get_reg_offset(adev, i, regSDMA0_F32_CNTL), tmp); - tmp = RREG32_SOC15(GC, 0, regGRBM_SOFT_RESET); - tmp |= grbm_soft_reset; - DRM_DEBUG("GRBM_SOFT_RESET=0x%08X\n", tmp); + WREG32_SOC15_IP(GC, sdma_v6_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_PREEMPT), 0); + + udelay(100); + + tmp = GRBM_SOFT_RESET__SOFT_RESET_SDMA0_MASK << i; WREG32_SOC15(GC, 0, regGRBM_SOFT_RESET, tmp); tmp = RREG32_SOC15(GC, 0, regGRBM_SOFT_RESET); - udelay(50); + udelay(100); - tmp &= ~grbm_soft_reset; - WREG32_SOC15(GC, 0, regGRBM_SOFT_RESET, tmp); + WREG32_SOC15(GC, 0, regGRBM_SOFT_RESET, 0); tmp = RREG32_SOC15(GC, 0, regGRBM_SOFT_RESET); - udelay(50); + udelay(100); } - return 0; + return sdma_v6_0_start(adev); +} + +static bool sdma_v6_0_check_soft_reset(void *handle) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_ring *ring; + int i, r; + long tmo = msecs_to_jiffies(1000); + + for (i = 0; i < adev->sdma.num_instances; i++) { + ring = &adev->sdma.instance[i].ring; + r = amdgpu_ring_test_ib(ring, tmo); + if (r) + return true; + } + + return false; } /** @@ -830,7 +887,6 @@ static int sdma_v6_0_start(struct amdgpu_device *adev) msleep(1000); } - sdma_v6_0_soft_reset(adev); /* unhalt the MEs */ sdma_v6_0_enable(adev, true); /* enable sdma ring preemption */ @@ -1526,6 +1582,7 @@ const struct amd_ip_funcs sdma_v6_0_ip_funcs = { .is_idle = sdma_v6_0_is_idle, .wait_for_idle = sdma_v6_0_wait_for_idle, .soft_reset = sdma_v6_0_soft_reset, + .check_soft_reset = sdma_v6_0_check_soft_reset, .set_clockgating_state = sdma_v6_0_set_clockgating_state, .set_powergating_state = sdma_v6_0_set_powergating_state, .get_clockgating_state = sdma_v6_0_get_clockgating_state, diff --git a/drivers/gpu/drm/amd/amdgpu/soc21.c b/drivers/gpu/drm/amd/amdgpu/soc21.c index 9e18a2b22607..55284b24f113 100644 --- a/drivers/gpu/drm/amd/amdgpu/soc21.c +++ b/drivers/gpu/drm/amd/amdgpu/soc21.c @@ -80,6 +80,7 @@ static int soc21_query_video_codecs(struct amdgpu_device *adev, bool encode, switch (adev->ip_versions[UVD_HWIP][0]) { case IP_VERSION(4, 0, 0): + case IP_VERSION(4, 0, 2): if (encode) *codecs = &vcn_4_0_0_video_codecs_encode; else @@ -310,6 +311,7 @@ static enum amd_reset_method soc21_asic_reset_method(struct amdgpu_device *adev) { if (amdgpu_reset_method == AMD_RESET_METHOD_MODE1 || + amdgpu_reset_method == AMD_RESET_METHOD_MODE2 || amdgpu_reset_method == AMD_RESET_METHOD_BACO) return amdgpu_reset_method; @@ -319,7 +321,10 @@ soc21_asic_reset_method(struct amdgpu_device *adev) switch (adev->ip_versions[MP1_HWIP][0]) { case IP_VERSION(13, 0, 0): + case IP_VERSION(13, 0, 7): return AMD_RESET_METHOD_MODE1; + case IP_VERSION(13, 0, 4): + return AMD_RESET_METHOD_MODE2; default: if (amdgpu_dpm_is_baco_supported(adev)) return AMD_RESET_METHOD_BACO; @@ -341,6 +346,10 @@ static int soc21_asic_reset(struct amdgpu_device *adev) dev_info(adev->dev, "BACO reset\n"); ret = amdgpu_dpm_baco_reset(adev); break; + case AMD_RESET_METHOD_MODE2: + dev_info(adev->dev, "MODE2 reset\n"); + ret = amdgpu_dpm_mode2_reset(adev); + break; default: dev_info(adev->dev, "MODE1 reset\n"); ret = amdgpu_device_mode1_reset(adev); @@ -379,11 +388,12 @@ static void soc21_pcie_gen3_enable(struct amdgpu_device *adev) static void soc21_program_aspm(struct amdgpu_device *adev) { - - if (amdgpu_aspm == 0) + if (!amdgpu_device_should_use_aspm(adev)) return; - /* todo */ + if (!(adev->flags & AMD_IS_APU) && + (adev->nbio.funcs->program_aspm)) + adev->nbio.funcs->program_aspm(adev); } static void soc21_enable_doorbell_aperture(struct amdgpu_device *adev, @@ -409,7 +419,13 @@ static uint32_t soc21_get_rev_id(struct amdgpu_device *adev) static bool soc21_need_full_reset(struct amdgpu_device *adev) { - return true; + switch (adev->ip_versions[GC_HWIP][0]) { + case IP_VERSION(11, 0, 0): + case IP_VERSION(11, 0, 2): + return false; + default: + return true; + } } static bool soc21_need_reset_on_init(struct amdgpu_device *adev) @@ -478,6 +494,20 @@ static void soc21_pre_asic_init(struct amdgpu_device *adev) { } +static int soc21_update_umd_stable_pstate(struct amdgpu_device *adev, + bool enter) +{ + if (enter) + amdgpu_gfx_rlc_enter_safe_mode(adev); + else + amdgpu_gfx_rlc_exit_safe_mode(adev); + + if (adev->gfx.funcs->update_perfmon_mgcg) + adev->gfx.funcs->update_perfmon_mgcg(adev, !enter); + + return 0; +} + static const struct amdgpu_asic_funcs soc21_asic_funcs = { .read_disabled_bios = &soc21_read_disabled_bios, @@ -497,6 +527,7 @@ static const struct amdgpu_asic_funcs soc21_asic_funcs = .supports_baco = &amdgpu_dpm_is_baco_supported, .pre_asic_init = &soc21_pre_asic_init, .query_video_codecs = &soc21_query_video_codecs, + .update_umd_stable_pstate = &soc21_update_umd_stable_pstate, }; static int soc21_common_early_init(void *handle) @@ -530,8 +561,10 @@ static int soc21_common_early_init(void *handle) case IP_VERSION(11, 0, 0): adev->cg_flags = AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_CGLS | +#if 0 AMD_CG_SUPPORT_GFX_3D_CGCG | AMD_CG_SUPPORT_GFX_3D_CGLS | +#endif AMD_CG_SUPPORT_GFX_MGCG | AMD_CG_SUPPORT_REPEATER_FGCG | AMD_CG_SUPPORT_GFX_FGCG | @@ -555,8 +588,13 @@ static int soc21_common_early_init(void *handle) adev->cg_flags = AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_CGLS | + AMD_CG_SUPPORT_REPEATER_FGCG | AMD_CG_SUPPORT_VCN_MGCG | - AMD_CG_SUPPORT_JPEG_MGCG; + AMD_CG_SUPPORT_JPEG_MGCG | + AMD_CG_SUPPORT_ATHUB_MGCG | + AMD_CG_SUPPORT_ATHUB_LS | + AMD_CG_SUPPORT_IH_CG | + AMD_CG_SUPPORT_HDP_SD; adev->pg_flags = AMD_PG_SUPPORT_VCN | AMD_PG_SUPPORT_VCN_DPG | @@ -566,8 +604,27 @@ static int soc21_common_early_init(void *handle) adev->external_rev_id = adev->rev_id + 0x10; break; case IP_VERSION(11, 0, 1): - adev->cg_flags = 0; - adev->pg_flags = 0; + adev->cg_flags = + AMD_CG_SUPPORT_GFX_CGCG | + AMD_CG_SUPPORT_GFX_CGLS | + AMD_CG_SUPPORT_GFX_MGCG | + AMD_CG_SUPPORT_GFX_FGCG | + AMD_CG_SUPPORT_REPEATER_FGCG | + AMD_CG_SUPPORT_GFX_PERF_CLK | + AMD_CG_SUPPORT_MC_MGCG | + AMD_CG_SUPPORT_MC_LS | + AMD_CG_SUPPORT_HDP_MGCG | + AMD_CG_SUPPORT_HDP_LS | + AMD_CG_SUPPORT_ATHUB_MGCG | + AMD_CG_SUPPORT_ATHUB_LS | + AMD_CG_SUPPORT_IH_CG | + AMD_CG_SUPPORT_BIF_MGCG | + AMD_CG_SUPPORT_BIF_LS | + AMD_CG_SUPPORT_VCN_MGCG | + AMD_CG_SUPPORT_JPEG_MGCG; + adev->pg_flags = + AMD_PG_SUPPORT_GFX_PG | + AMD_PG_SUPPORT_JPEG; adev->external_rev_id = adev->rev_id + 0x1; break; default: @@ -661,6 +718,8 @@ static int soc21_common_set_clockgating_state(void *handle, switch (adev->ip_versions[NBIO_HWIP][0]) { case IP_VERSION(4, 3, 0): + case IP_VERSION(4, 3, 1): + case IP_VERSION(7, 7, 0): adev->nbio.funcs->update_medium_grain_clock_gating(adev, state == AMD_CG_STATE_GATE); adev->nbio.funcs->update_medium_grain_light_sleep(adev, diff --git a/drivers/gpu/drm/amd/amdgpu/umc_v6_7.c b/drivers/gpu/drm/amd/amdgpu/umc_v6_7.c index 606892dbea1c..bf7524f16b66 100644 --- a/drivers/gpu/drm/amd/amdgpu/umc_v6_7.c +++ b/drivers/gpu/drm/amd/amdgpu/umc_v6_7.c @@ -119,6 +119,24 @@ static void umc_v6_7_ecc_info_query_correctable_error_count(struct amdgpu_device *error_count += 1; umc_v6_7_query_error_status_helper(adev, mc_umc_status, umc_reg_offset); + + if (ras->umc_ecc.record_ce_addr_supported) { + uint64_t err_addr, soc_pa; + uint32_t channel_index = + adev->umc.channel_idx_tbl[umc_inst * adev->umc.channel_inst_num + ch_inst]; + + err_addr = ras->umc_ecc.ecc[eccinfo_table_idx].mca_ceumc_addr; + err_addr = REG_GET_FIELD(err_addr, MCA_UMC_UMC0_MCUMC_ADDRT0, ErrorAddr); + /* translate umc channel address to soc pa, 3 parts are included */ + soc_pa = ADDR_OF_8KB_BLOCK(err_addr) | + ADDR_OF_256B_BLOCK(channel_index) | + OFFSET_IN_256B_BLOCK(err_addr); + + /* The umc channel bits are not original values, they are hashed */ + SET_CHANNEL_HASH(channel_index, soc_pa); + + dev_info(adev->dev, "Error Address(PA): 0x%llx\n", soc_pa); + } } } @@ -251,7 +269,9 @@ static void umc_v6_7_ecc_info_query_ras_error_address(struct amdgpu_device *adev static void umc_v6_7_query_correctable_error_count(struct amdgpu_device *adev, uint32_t umc_reg_offset, - unsigned long *error_count) + unsigned long *error_count, + uint32_t ch_inst, + uint32_t umc_inst) { uint32_t ecc_err_cnt_sel, ecc_err_cnt_sel_addr; uint32_t ecc_err_cnt, ecc_err_cnt_addr; @@ -295,6 +315,31 @@ static void umc_v6_7_query_correctable_error_count(struct amdgpu_device *adev, *error_count += 1; umc_v6_7_query_error_status_helper(adev, mc_umc_status, umc_reg_offset); + + { + uint64_t err_addr, soc_pa; + uint32_t mc_umc_addrt0; + uint32_t channel_index; + + mc_umc_addrt0 = + SOC15_REG_OFFSET(UMC, 0, regMCA_UMC_UMC0_MCUMC_ADDRT0); + + channel_index = + adev->umc.channel_idx_tbl[umc_inst * adev->umc.channel_inst_num + ch_inst]; + + err_addr = RREG64_PCIE((mc_umc_addrt0 + umc_reg_offset) * 4); + err_addr = REG_GET_FIELD(err_addr, MCA_UMC_UMC0_MCUMC_ADDRT0, ErrorAddr); + + /* translate umc channel address to soc pa, 3 parts are included */ + soc_pa = ADDR_OF_8KB_BLOCK(err_addr) | + ADDR_OF_256B_BLOCK(channel_index) | + OFFSET_IN_256B_BLOCK(err_addr); + + /* The umc channel bits are not original values, they are hashed */ + SET_CHANNEL_HASH(channel_index, soc_pa); + + dev_info(adev->dev, "Error Address(PA): 0x%llx\n", soc_pa); + } } } @@ -395,7 +440,8 @@ static void umc_v6_7_query_ras_error_count(struct amdgpu_device *adev, ch_inst); umc_v6_7_query_correctable_error_count(adev, umc_reg_offset, - &(err_data->ce_count)); + &(err_data->ce_count), + ch_inst, umc_inst); umc_v6_7_querry_uncorrectable_error_count(adev, umc_reg_offset, &(err_data->ue_count)); diff --git a/drivers/gpu/drm/amd/amdgpu/umc_v8_10.c b/drivers/gpu/drm/amd/amdgpu/umc_v8_10.c new file mode 100644 index 000000000000..36a2053f2e8b --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/umc_v8_10.c @@ -0,0 +1,357 @@ +/* + * Copyright 2022 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ +#include "umc_v8_10.h" +#include "amdgpu_ras.h" +#include "amdgpu_umc.h" +#include "amdgpu.h" +#include "umc/umc_8_10_0_offset.h" +#include "umc/umc_8_10_0_sh_mask.h" + +#define UMC_8_NODE_DIST 0x800000 +#define UMC_8_INST_DIST 0x4000 + +struct channelnum_map_colbit { + uint32_t channel_num; + uint32_t col_bit; +}; + +const struct channelnum_map_colbit umc_v8_10_channelnum_map_colbit_table[] = { + {24, 13}, + {20, 13}, + {16, 12}, + {14, 12}, + {12, 12}, + {10, 12}, + {6, 11}, +}; + +const uint32_t + umc_v8_10_channel_idx_tbl[] + [UMC_V8_10_UMC_INSTANCE_NUM] + [UMC_V8_10_CHANNEL_INSTANCE_NUM] = { + {{16, 18}, {17, 19}}, + {{15, 11}, {3, 7}}, + {{1, 5}, {13, 9}}, + {{23, 21}, {22, 20}}, + {{0, 4}, {12, 8}}, + {{14, 10}, {2, 6}} + }; + +static inline uint32_t get_umc_v8_10_reg_offset(struct amdgpu_device *adev, + uint32_t node_inst, + uint32_t umc_inst, + uint32_t ch_inst) +{ + return adev->umc.channel_offs * ch_inst + UMC_8_INST_DIST * umc_inst + + UMC_8_NODE_DIST * node_inst; +} + +static void umc_v8_10_clear_error_count_per_channel(struct amdgpu_device *adev, + uint32_t umc_reg_offset) +{ + uint32_t ecc_err_cnt_addr; + + ecc_err_cnt_addr = + SOC15_REG_OFFSET(UMC, 0, regUMCCH0_0_GeccErrCnt); + + /* clear error count */ + WREG32_PCIE((ecc_err_cnt_addr + umc_reg_offset) * 4, + UMC_V8_10_CE_CNT_INIT); +} + +static void umc_v8_10_clear_error_count(struct amdgpu_device *adev) +{ + uint32_t node_inst = 0; + uint32_t umc_inst = 0; + uint32_t ch_inst = 0; + uint32_t umc_reg_offset = 0; + + LOOP_UMC_EACH_NODE_INST_AND_CH(node_inst, umc_inst, ch_inst) { + umc_reg_offset = get_umc_v8_10_reg_offset(adev, + node_inst, + umc_inst, + ch_inst); + + umc_v8_10_clear_error_count_per_channel(adev, + umc_reg_offset); + } +} + +static void umc_v8_10_query_correctable_error_count(struct amdgpu_device *adev, + uint32_t umc_reg_offset, + unsigned long *error_count) +{ + uint32_t ecc_err_cnt, ecc_err_cnt_addr; + uint64_t mc_umc_status; + uint32_t mc_umc_status_addr; + + /* UMC 8_10 registers */ + ecc_err_cnt_addr = + SOC15_REG_OFFSET(UMC, 0, regUMCCH0_0_GeccErrCnt); + mc_umc_status_addr = + SOC15_REG_OFFSET(UMC, 0, regMCA_UMC_UMC0_MCUMC_STATUST0); + + ecc_err_cnt = RREG32_PCIE((ecc_err_cnt_addr + umc_reg_offset) * 4); + *error_count += + (REG_GET_FIELD(ecc_err_cnt, UMCCH0_0_GeccErrCnt, GeccErrCnt) - + UMC_V8_10_CE_CNT_INIT); + + /* Check for SRAM correctable error, MCUMC_STATUS is a 64 bit register */ + mc_umc_status = RREG64_PCIE((mc_umc_status_addr + umc_reg_offset) * 4); + if (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Val) == 1 && + REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, CECC) == 1) + *error_count += 1; +} + +static void umc_v8_10_query_uncorrectable_error_count(struct amdgpu_device *adev, + uint32_t umc_reg_offset, + unsigned long *error_count) +{ + uint64_t mc_umc_status; + uint32_t mc_umc_status_addr; + + mc_umc_status_addr = SOC15_REG_OFFSET(UMC, 0, regMCA_UMC_UMC0_MCUMC_STATUST0); + + /* Check the MCUMC_STATUS. */ + mc_umc_status = RREG64_PCIE((mc_umc_status_addr + umc_reg_offset) * 4); + if ((REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Val) == 1) && + (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Deferred) == 1 || + REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, UECC) == 1 || + REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, PCC) == 1 || + REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, UC) == 1 || + REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, TCC) == 1)) + *error_count += 1; +} + +static void umc_v8_10_query_ras_error_count(struct amdgpu_device *adev, + void *ras_error_status) +{ + struct ras_err_data *err_data = (struct ras_err_data *)ras_error_status; + + uint32_t node_inst = 0; + uint32_t umc_inst = 0; + uint32_t ch_inst = 0; + uint32_t umc_reg_offset = 0; + + LOOP_UMC_EACH_NODE_INST_AND_CH(node_inst, umc_inst, ch_inst) { + umc_reg_offset = get_umc_v8_10_reg_offset(adev, + node_inst, + umc_inst, + ch_inst); + + umc_v8_10_query_correctable_error_count(adev, + umc_reg_offset, + &(err_data->ce_count)); + umc_v8_10_query_uncorrectable_error_count(adev, + umc_reg_offset, + &(err_data->ue_count)); + } + + umc_v8_10_clear_error_count(adev); +} + +static uint32_t umc_v8_10_get_col_bit(uint32_t channel_num) +{ + uint32_t t = 0; + + for (t = 0; t < ARRAY_SIZE(umc_v8_10_channelnum_map_colbit_table); t++) + if (channel_num == umc_v8_10_channelnum_map_colbit_table[t].channel_num) + return umc_v8_10_channelnum_map_colbit_table[t].col_bit; + + /* Failed to get col_bit. */ + return U32_MAX; +} + +/* + * Mapping normal address to soc physical address in swizzle mode. + */ +static int umc_v8_10_swizzle_mode_na_to_pa(struct amdgpu_device *adev, + uint32_t channel_idx, + uint64_t na, uint64_t *soc_pa) +{ + uint32_t channel_num = UMC_V8_10_TOTAL_CHANNEL_NUM(adev); + uint32_t col_bit = umc_v8_10_get_col_bit(channel_num); + uint64_t tmp_addr; + + if (col_bit == U32_MAX) + return -1; + + tmp_addr = SWIZZLE_MODE_TMP_ADDR(na, channel_num, channel_idx); + *soc_pa = SWIZZLE_MODE_ADDR_HI(tmp_addr, col_bit) | + SWIZZLE_MODE_ADDR_MID(na, col_bit) | + SWIZZLE_MODE_ADDR_LOW(tmp_addr, col_bit) | + SWIZZLE_MODE_ADDR_LSB(na); + + return 0; +} + +static void umc_v8_10_query_error_address(struct amdgpu_device *adev, + struct ras_err_data *err_data, + uint32_t umc_reg_offset, + uint32_t node_inst, + uint32_t ch_inst, + uint32_t umc_inst) +{ + uint64_t mc_umc_status_addr; + uint64_t mc_umc_status, err_addr; + uint32_t channel_index; + + mc_umc_status_addr = + SOC15_REG_OFFSET(UMC, 0, regMCA_UMC_UMC0_MCUMC_STATUST0); + mc_umc_status = RREG64_PCIE((mc_umc_status_addr + umc_reg_offset) * 4); + + if (mc_umc_status == 0) + return; + + if (!err_data->err_addr) { + /* clear umc status */ + WREG64_PCIE((mc_umc_status_addr + umc_reg_offset) * 4, 0x0ULL); + return; + } + + channel_index = + adev->umc.channel_idx_tbl[node_inst * adev->umc.umc_inst_num * + adev->umc.channel_inst_num + + umc_inst * adev->umc.channel_inst_num + + ch_inst]; + + /* calculate error address if ue/ce error is detected */ + if (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Val) == 1 && + REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, AddrV) == 1 && + (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, UECC) == 1 || + REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, CECC) == 1)) { + uint32_t addr_lsb; + uint64_t mc_umc_addrt0; + + mc_umc_addrt0 = SOC15_REG_OFFSET(UMC, 0, regMCA_UMC_UMC0_MCUMC_ADDRT0); + err_addr = RREG64_PCIE((mc_umc_addrt0 + umc_reg_offset) * 4); + err_addr = REG_GET_FIELD(err_addr, MCA_UMC_UMC0_MCUMC_ADDRT0, ErrorAddr); + + /* the lowest lsb bits should be ignored */ + addr_lsb = REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, AddrLsb); + + err_addr &= ~((0x1ULL << addr_lsb) - 1); + + /* we only save ue error information currently, ce is skipped */ + if (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, UECC) == 1) { + uint64_t na_err_addr_base = err_addr & ~(0x3ULL << UMC_V8_10_NA_C5_BIT); + uint64_t na_err_addr, retired_page_addr; + uint32_t col = 0; + int ret = 0; + + /* loop for all possibilities of [C6 C5] in normal address. */ + for (col = 0; col < UMC_V8_10_NA_COL_2BITS_POWER_OF_2_NUM; col++) { + na_err_addr = na_err_addr_base | (col << UMC_V8_10_NA_C5_BIT); + + /* Mapping normal error address to retired soc physical address. */ + ret = umc_v8_10_swizzle_mode_na_to_pa(adev, channel_index, + na_err_addr, &retired_page_addr); + if (ret) { + dev_err(adev->dev, "Failed to map pa from umc na.\n"); + break; + } + dev_info(adev->dev, "Error Address(PA): 0x%llx\n", + retired_page_addr); + amdgpu_umc_fill_error_record(err_data, na_err_addr, + retired_page_addr, channel_index, umc_inst); + } + } + } + + /* clear umc status */ + WREG64_PCIE((mc_umc_status_addr + umc_reg_offset) * 4, 0x0ULL); +} + +static void umc_v8_10_query_ras_error_address(struct amdgpu_device *adev, + void *ras_error_status) +{ + struct ras_err_data *err_data = (struct ras_err_data *)ras_error_status; + uint32_t node_inst = 0; + uint32_t umc_inst = 0; + uint32_t ch_inst = 0; + uint32_t umc_reg_offset = 0; + + LOOP_UMC_EACH_NODE_INST_AND_CH(node_inst, umc_inst, ch_inst) { + umc_reg_offset = get_umc_v8_10_reg_offset(adev, + node_inst, + umc_inst, + ch_inst); + + umc_v8_10_query_error_address(adev, + err_data, + umc_reg_offset, + node_inst, + ch_inst, + umc_inst); + } +} + +static void umc_v8_10_err_cnt_init_per_channel(struct amdgpu_device *adev, + uint32_t umc_reg_offset) +{ + uint32_t ecc_err_cnt_sel, ecc_err_cnt_sel_addr; + uint32_t ecc_err_cnt_addr; + + ecc_err_cnt_sel_addr = + SOC15_REG_OFFSET(UMC, 0, regUMCCH0_0_GeccErrCntSel); + ecc_err_cnt_addr = + SOC15_REG_OFFSET(UMC, 0, regUMCCH0_0_GeccErrCnt); + + ecc_err_cnt_sel = RREG32_PCIE((ecc_err_cnt_sel_addr + umc_reg_offset) * 4); + + /* set ce error interrupt type to APIC based interrupt */ + ecc_err_cnt_sel = REG_SET_FIELD(ecc_err_cnt_sel, UMCCH0_0_GeccErrCntSel, + GeccErrInt, 0x1); + WREG32_PCIE((ecc_err_cnt_sel_addr + umc_reg_offset) * 4, ecc_err_cnt_sel); + /* set error count to initial value */ + WREG32_PCIE((ecc_err_cnt_addr + umc_reg_offset) * 4, UMC_V8_10_CE_CNT_INIT); +} + +static void umc_v8_10_err_cnt_init(struct amdgpu_device *adev) +{ + uint32_t node_inst = 0; + uint32_t umc_inst = 0; + uint32_t ch_inst = 0; + uint32_t umc_reg_offset = 0; + + LOOP_UMC_EACH_NODE_INST_AND_CH(node_inst, umc_inst, ch_inst) { + umc_reg_offset = get_umc_v8_10_reg_offset(adev, + node_inst, + umc_inst, + ch_inst); + + umc_v8_10_err_cnt_init_per_channel(adev, umc_reg_offset); + } +} + +const struct amdgpu_ras_block_hw_ops umc_v8_10_ras_hw_ops = { + .query_ras_error_count = umc_v8_10_query_ras_error_count, + .query_ras_error_address = umc_v8_10_query_ras_error_address, +}; + +struct amdgpu_umc_ras umc_v8_10_ras = { + .ras_block = { + .hw_ops = &umc_v8_10_ras_hw_ops, + }, + .err_cnt_init = umc_v8_10_err_cnt_init, +}; diff --git a/drivers/gpu/drm/amd/amdgpu/umc_v8_10.h b/drivers/gpu/drm/amd/amdgpu/umc_v8_10.h new file mode 100644 index 000000000000..849ede88e111 --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/umc_v8_10.h @@ -0,0 +1,70 @@ +/* + * Copyright 2022 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ +#ifndef __UMC_V8_10_H__ +#define __UMC_V8_10_H__ + +#include "soc15_common.h" +#include "amdgpu.h" + +/* number of umc channel instance with memory map register access */ +#define UMC_V8_10_CHANNEL_INSTANCE_NUM 2 +/* number of umc instance with memory map register access */ +#define UMC_V8_10_UMC_INSTANCE_NUM 2 + +/* Total channel instances for all umc nodes */ +#define UMC_V8_10_TOTAL_CHANNEL_NUM(adev) \ + (UMC_V8_10_CHANNEL_INSTANCE_NUM * UMC_V8_10_UMC_INSTANCE_NUM * (adev)->umc.node_inst_num) + +/* UMC regiser per channel offset */ +#define UMC_V8_10_PER_CHANNEL_OFFSET 0x400 + +/* EccErrCnt max value */ +#define UMC_V8_10_CE_CNT_MAX 0xffff +/* umc ce interrupt threshold */ +#define UUMC_V8_10_CE_INT_THRESHOLD 0xffff +/* umc ce count initial value */ +#define UMC_V8_10_CE_CNT_INIT (UMC_V8_10_CE_CNT_MAX - UUMC_V8_10_CE_INT_THRESHOLD) + +#define UMC_V8_10_NA_COL_2BITS_POWER_OF_2_NUM 4 + +/* The C5 bit in NA address */ +#define UMC_V8_10_NA_C5_BIT 14 + +/* Map to swizzle mode address */ +#define SWIZZLE_MODE_TMP_ADDR(na, ch_num, ch_idx) \ + ((((na) >> 10) * (ch_num) + (ch_idx)) << 10) +#define SWIZZLE_MODE_ADDR_HI(addr, col_bit) \ + (((addr) >> ((col_bit) + 2)) << ((col_bit) + 2)) +#define SWIZZLE_MODE_ADDR_MID(na, col_bit) ((((na) >> 8) & 0x3) << (col_bit)) +#define SWIZZLE_MODE_ADDR_LOW(addr, col_bit) \ + ((((addr) >> 10) & ((0x1ULL << (col_bit - 8)) - 1)) << 8) +#define SWIZZLE_MODE_ADDR_LSB(na) ((na) & 0xFF) + +extern struct amdgpu_umc_ras umc_v8_10_ras; +extern const uint32_t + umc_v8_10_channel_idx_tbl[] + [UMC_V8_10_UMC_INSTANCE_NUM] + [UMC_V8_10_CHANNEL_INSTANCE_NUM]; + +#endif + diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c index 9119e966ffff..fb2d74f30448 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c @@ -25,11 +25,11 @@ #include "amdgpu.h" #include "amdgpu_vcn.h" #include "amdgpu_pm.h" +#include "amdgpu_cs.h" #include "soc15.h" #include "soc15d.h" #include "soc15_hw_ip.h" #include "vcn_v2_0.h" -#include "vcn_sw_ring.h" #include "vcn/vcn_4_0_0_offset.h" #include "vcn/vcn_4_0_0_sh_mask.h" @@ -45,15 +45,15 @@ #define VCN_VID_SOC_ADDRESS_2_0 0x1fb00 #define VCN1_VID_SOC_ADDRESS_3_0 0x48300 -bool unifiedQ_enabled = false; +#define RDECODE_MSG_CREATE 0x00000000 +#define RDECODE_MESSAGE_CREATE 0x00000001 static int amdgpu_ih_clientid_vcns[] = { SOC15_IH_CLIENTID_VCN, SOC15_IH_CLIENTID_VCN1 }; -static void vcn_v4_0_set_dec_ring_funcs(struct amdgpu_device *adev); -static void vcn_v4_0_set_enc_ring_funcs(struct amdgpu_device *adev); +static void vcn_v4_0_set_unified_ring_funcs(struct amdgpu_device *adev); static void vcn_v4_0_set_irq_funcs(struct amdgpu_device *adev); static int vcn_v4_0_set_powergating_state(void *handle, enum amd_powergating_state state); @@ -71,36 +71,15 @@ static int vcn_v4_0_early_init(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; - if (unifiedQ_enabled) { - adev->vcn.num_vcn_inst = 1; - adev->vcn.num_enc_rings = 1; - } else { - adev->vcn.num_enc_rings = 2; - } - - if (!unifiedQ_enabled) - vcn_v4_0_set_dec_ring_funcs(adev); + /* re-use enc ring as unified ring */ + adev->vcn.num_enc_rings = 1; - vcn_v4_0_set_enc_ring_funcs(adev); + vcn_v4_0_set_unified_ring_funcs(adev); vcn_v4_0_set_irq_funcs(adev); return 0; } -static void amdgpu_vcn_setup_unified_queue_ucode(struct amdgpu_device *adev) -{ - if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) { - const struct common_firmware_header *hdr; - - hdr = (const struct common_firmware_header *)adev->vcn.fw->data; - adev->firmware.ucode[AMDGPU_UCODE_ID_VCN].ucode_id = AMDGPU_UCODE_ID_VCN; - adev->firmware.ucode[AMDGPU_UCODE_ID_VCN].fw = adev->vcn.fw; - adev->firmware.fw_size += - ALIGN(le32_to_cpu(hdr->ucode_size_bytes), PAGE_SIZE); - DRM_INFO("PSP loading VCN firmware\n"); - } -} - /** * vcn_v4_0_sw_init - sw init for VCN block * @@ -111,17 +90,14 @@ static void amdgpu_vcn_setup_unified_queue_ucode(struct amdgpu_device *adev) static int vcn_v4_0_sw_init(void *handle) { struct amdgpu_ring *ring; - int i, j, r; struct amdgpu_device *adev = (struct amdgpu_device *)handle; + int i, r; r = amdgpu_vcn_sw_init(adev); if (r) return r; - if (unifiedQ_enabled) - amdgpu_vcn_setup_unified_queue_ucode(adev); - else - amdgpu_vcn_setup_ucode(adev); + amdgpu_vcn_setup_ucode(adev); r = amdgpu_vcn_resume(adev); if (r) @@ -129,81 +105,40 @@ static int vcn_v4_0_sw_init(void *handle) for (i = 0; i < adev->vcn.num_vcn_inst; i++) { volatile struct amdgpu_vcn4_fw_shared *fw_shared; + if (adev->vcn.harvest_config & (1 << i)) continue; - /* VCN DEC TRAP */ + + atomic_set(&adev->vcn.inst[i].sched_score, 0); + + /* VCN UNIFIED TRAP */ r = amdgpu_irq_add_id(adev, amdgpu_ih_clientid_vcns[i], - VCN_4_0__SRCID__UVD_TRAP, &adev->vcn.inst[i].irq); + VCN_4_0__SRCID__UVD_ENC_GENERAL_PURPOSE, &adev->vcn.inst[i].irq); if (r) return r; - atomic_set(&adev->vcn.inst[i].sched_score, 0); - if (!unifiedQ_enabled) { - ring = &adev->vcn.inst[i].ring_dec; - ring->use_doorbell = true; - - /* VCN4 doorbell layout - * 1: VCN_JPEG_DB_CTRL UVD_JRBC_RB_WPTR; (jpeg) - * 2: VCN_RB1_DB_CTRL UVD_RB_WPTR; (decode/encode for unified queue) - * 3: VCN_RB2_DB_CTRL UVD_RB_WPTR2; (encode only for swqueue) - * 4: VCN_RB3_DB_CTRL UVD_RB_WPTR3; (Reserved) - * 5: VCN_RB4_DB_CTRL UVD_RB_WPTR4; (decode only for swqueue) - */ - - ring->doorbell_index = (adev->doorbell_index.vcn.vcn_ring0_1 << 1) - + 5 + 8 * i; - - sprintf(ring->name, "vcn_dec_%d", i); - r = amdgpu_ring_init(adev, ring, 512, &adev->vcn.inst[i].irq, 0, - AMDGPU_RING_PRIO_DEFAULT, - &adev->vcn.inst[i].sched_score); - if (r) - return r; - } - for (j = 0; j < adev->vcn.num_enc_rings; ++j) { - /* VCN ENC TRAP */ - r = amdgpu_irq_add_id(adev, amdgpu_ih_clientid_vcns[i], - j + VCN_4_0__SRCID__UVD_ENC_GENERAL_PURPOSE, &adev->vcn.inst[i].irq); - if (r) - return r; - - ring = &adev->vcn.inst[i].ring_enc[j]; - ring->use_doorbell = true; - - ring->doorbell_index = (adev->doorbell_index.vcn.vcn_ring0_1 << 1) + 2 + j + 8 * i; - - if (unifiedQ_enabled) { - sprintf(ring->name, "vcn_unified%d", i); - r = amdgpu_ring_init(adev, ring, 512, &adev->vcn.inst->irq, 0, - AMDGPU_RING_PRIO_DEFAULT, NULL); - } else { - enum amdgpu_ring_priority_level hw_prio; + ring = &adev->vcn.inst[i].ring_enc[0]; + ring->use_doorbell = true; + ring->doorbell_index = (adev->doorbell_index.vcn.vcn_ring0_1 << 1) + 2 + 8 * i; - hw_prio = amdgpu_vcn_get_enc_ring_prio(j); - sprintf(ring->name, "vcn_enc_%d.%d", i, j); - r = amdgpu_ring_init(adev, ring, 512, &adev->vcn.inst[i].irq, 0, - hw_prio, &adev->vcn.inst[i].sched_score); - } - if (r) - return r; - } + sprintf(ring->name, "vcn_unified_%d", i); - fw_shared = adev->vcn.inst[i].fw_shared.cpu_addr; - fw_shared->present_flag_0 = 0; + r = amdgpu_ring_init(adev, ring, 512, &adev->vcn.inst[i].irq, 0, + AMDGPU_RING_PRIO_0, &adev->vcn.inst[i].sched_score); + if (r) + return r; - if (unifiedQ_enabled) { - fw_shared->present_flag_0 |= cpu_to_le32(AMDGPU_FW_SHARED_FLAG_0_UNIFIED_QUEUE); - fw_shared->sq.is_enabled = 1; - } + fw_shared = adev->vcn.inst[i].fw_shared.cpu_addr; + fw_shared->present_flag_0 = cpu_to_le32(AMDGPU_FW_SHARED_FLAG_0_UNIFIED_QUEUE); + fw_shared->sq.is_enabled = 1; if (amdgpu_vcnfw_log) amdgpu_vcn_fwlog_init(&adev->vcn.inst[i]); } - if (!unifiedQ_enabled) { - if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) - adev->vcn.pause_dpg_mode = vcn_v4_0_pause_dpg_mode; - } + if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) + adev->vcn.pause_dpg_mode = vcn_v4_0_pause_dpg_mode; + return 0; } @@ -219,20 +154,20 @@ static int vcn_v4_0_sw_fini(void *handle) struct amdgpu_device *adev = (struct amdgpu_device *)handle; int i, r, idx; - if (drm_dev_enter(&adev->ddev, &idx)) { - for (i = 0; i < adev->vcn.num_vcn_inst; i++) { - volatile struct amdgpu_vcn4_fw_shared *fw_shared; + if (drm_dev_enter(adev_to_drm(adev), &idx)) { + for (i = 0; i < adev->vcn.num_vcn_inst; i++) { + volatile struct amdgpu_vcn4_fw_shared *fw_shared; - if (adev->vcn.harvest_config & (1 << i)) - continue; + if (adev->vcn.harvest_config & (1 << i)) + continue; - fw_shared = adev->vcn.inst[i].fw_shared.cpu_addr; - fw_shared->present_flag_0 = 0; - fw_shared->sq.is_enabled = 0; - } + fw_shared = adev->vcn.inst[i].fw_shared.cpu_addr; + fw_shared->present_flag_0 = 0; + fw_shared->sq.is_enabled = 0; + } - drm_dev_exit(idx); - } + drm_dev_exit(idx); + } r = amdgpu_vcn_suspend(adev); if (r) @@ -254,15 +189,13 @@ static int vcn_v4_0_hw_init(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; struct amdgpu_ring *ring; - int i, j, r; + int i, r; for (i = 0; i < adev->vcn.num_vcn_inst; ++i) { if (adev->vcn.harvest_config & (1 << i)) continue; - if (unifiedQ_enabled) - ring = &adev->vcn.inst[i].ring_enc[0]; - else - ring = &adev->vcn.inst[i].ring_dec; + + ring = &adev->vcn.inst[i].ring_enc[0]; adev->nbio.funcs->vcn_doorbell_range(adev, ring->use_doorbell, ((adev->doorbell_index.vcn.vcn_ring0_1 << 1) + 8 * i), i); @@ -270,13 +203,6 @@ static int vcn_v4_0_hw_init(void *handle) r = amdgpu_ring_test_helper(ring); if (r) goto done; - - for (j = 0; j < adev->vcn.num_enc_rings; ++j) { - ring = &adev->vcn.inst[i].ring_enc[j]; - r = amdgpu_ring_test_helper(ring); - if (r) - goto done; - } } done: @@ -464,7 +390,6 @@ static void vcn_v4_0_mc_resume_dpg_mode(struct amdgpu_device *adev, int inst_idx WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET( VCN, inst_idx, regUVD_VCPU_CACHE_OFFSET0), AMDGPU_UVD_FIRMWARE_OFFSET >> 3, 0, indirect); - } if (!indirect) @@ -888,7 +813,6 @@ static int vcn_v4_0_start_dpg_mode(struct amdgpu_device *adev, int inst_idx, boo volatile struct amdgpu_vcn4_fw_shared *fw_shared = adev->vcn.inst[inst_idx].fw_shared.cpu_addr; struct amdgpu_ring *ring; uint32_t tmp; - int i; /* disable register anti-hang mechanism */ WREG32_P(SOC15_REG_OFFSET(VCN, inst_idx, regUVD_POWER_STATUS), 1, @@ -974,74 +898,32 @@ static int vcn_v4_0_start_dpg_mode(struct amdgpu_device *adev, int inst_idx, boo (uint32_t)((uintptr_t)adev->vcn.inst[inst_idx].dpg_sram_curr_addr - (uintptr_t)adev->vcn.inst[inst_idx].dpg_sram_cpu_addr)); - if (unifiedQ_enabled) { - ring = &adev->vcn.inst[inst_idx].ring_enc[0]; - fw_shared->sq.queue_mode |= FW_QUEUE_RING_RESET; - } else - ring = &adev->vcn.inst[inst_idx].ring_dec; + ring = &adev->vcn.inst[inst_idx].ring_enc[0]; - WREG32_SOC15(VCN, inst_idx, regVCN_RB4_DB_CTRL, - ring->doorbell_index << VCN_RB4_DB_CTRL__OFFSET__SHIFT | - VCN_RB4_DB_CTRL__EN_MASK); + WREG32_SOC15(VCN, inst_idx, regUVD_RB_BASE_LO, ring->gpu_addr); + WREG32_SOC15(VCN, inst_idx, regUVD_RB_BASE_HI, upper_32_bits(ring->gpu_addr)); + WREG32_SOC15(VCN, inst_idx, regUVD_RB_SIZE, ring->ring_size / 4); - /* program the RB_BASE for ring buffer */ - WREG32_SOC15(VCN, inst_idx, regUVD_RB_BASE_LO4, - lower_32_bits(ring->gpu_addr)); - WREG32_SOC15(VCN, inst_idx, regUVD_RB_BASE_HI4, - upper_32_bits(ring->gpu_addr)); - - WREG32_SOC15(VCN, inst_idx, regUVD_RB_SIZE4, ring->ring_size / sizeof(uint32_t)); - - /* reseting ring, fw should not check RB ring */ tmp = RREG32_SOC15(VCN, inst_idx, regVCN_RB_ENABLE); - tmp &= ~(VCN_RB_ENABLE__RB4_EN_MASK); + tmp &= ~(VCN_RB_ENABLE__RB1_EN_MASK); WREG32_SOC15(VCN, inst_idx, regVCN_RB_ENABLE, tmp); + fw_shared->sq.queue_mode |= FW_QUEUE_RING_RESET; + WREG32_SOC15(VCN, inst_idx, regUVD_RB_RPTR, 0); + WREG32_SOC15(VCN, inst_idx, regUVD_RB_WPTR, 0); - /* Initialize the ring buffer's read and write pointers */ - tmp = RREG32_SOC15(VCN, inst_idx, regUVD_RB_RPTR4); - WREG32_SOC15(VCN, inst_idx, regUVD_RB_WPTR4, tmp); - ring->wptr = RREG32_SOC15(VCN, inst_idx, regUVD_RB_WPTR4); + tmp = RREG32_SOC15(VCN, inst_idx, regUVD_RB_RPTR); + WREG32_SOC15(VCN, inst_idx, regUVD_RB_WPTR, tmp); + ring->wptr = RREG32_SOC15(VCN, inst_idx, regUVD_RB_WPTR); tmp = RREG32_SOC15(VCN, inst_idx, regVCN_RB_ENABLE); - tmp |= VCN_RB_ENABLE__RB4_EN_MASK; + tmp |= VCN_RB_ENABLE__RB1_EN_MASK; WREG32_SOC15(VCN, inst_idx, regVCN_RB_ENABLE, tmp); + fw_shared->sq.queue_mode &= ~(FW_QUEUE_RING_RESET | FW_QUEUE_DPG_HOLD_OFF); - WREG32_SOC15(VCN, inst_idx, regUVD_SCRATCH2, 0); - - if (unifiedQ_enabled) - fw_shared->sq.queue_mode &= ~FW_QUEUE_RING_RESET; - - for (i = 0; i < adev->vcn.num_enc_rings; i++) { - ring = &adev->vcn.inst[inst_idx].ring_enc[i]; - - if (i) { - ring = &adev->vcn.inst[inst_idx].ring_enc[1]; - - WREG32_SOC15(VCN, inst_idx, regUVD_RB_BASE_LO2, ring->gpu_addr); - WREG32_SOC15(VCN, inst_idx, regUVD_RB_BASE_HI2, upper_32_bits(ring->gpu_addr)); - WREG32_SOC15(VCN, inst_idx, regUVD_RB_SIZE2, ring->ring_size / 4); - tmp= RREG32_SOC15(VCN, inst_idx, regUVD_RB_RPTR2); - WREG32_SOC15(VCN, inst_idx, regUVD_RB_WPTR2, tmp); - ring->wptr = RREG32_SOC15(VCN, inst_idx, regUVD_RB_WPTR2); - - WREG32_SOC15(VCN, inst_idx, regVCN_RB2_DB_CTRL, - ring->doorbell_index << VCN_RB2_DB_CTRL__OFFSET__SHIFT | - VCN_RB2_DB_CTRL__EN_MASK); - } else { - ring = &adev->vcn.inst[inst_idx].ring_enc[0]; - - WREG32_SOC15(VCN, inst_idx, regUVD_RB_BASE_LO, ring->gpu_addr); - WREG32_SOC15(VCN, inst_idx, regUVD_RB_BASE_HI, upper_32_bits(ring->gpu_addr)); - WREG32_SOC15(VCN, inst_idx, regUVD_RB_SIZE, ring->ring_size / 4); - tmp= RREG32_SOC15(VCN, inst_idx, regUVD_RB_RPTR); - WREG32_SOC15(VCN, inst_idx, regUVD_RB_WPTR, tmp); - ring->wptr = RREG32_SOC15(VCN, inst_idx, regUVD_RB_WPTR); + WREG32_SOC15(VCN, inst_idx, regVCN_RB1_DB_CTRL, + ring->doorbell_index << VCN_RB1_DB_CTRL__OFFSET__SHIFT | + VCN_RB1_DB_CTRL__EN_MASK); - WREG32_SOC15(VCN, inst_idx, regVCN_RB1_DB_CTRL, - ring->doorbell_index << VCN_RB1_DB_CTRL__OFFSET__SHIFT | - VCN_RB1_DB_CTRL__EN_MASK); - } - } return 0; } @@ -1064,6 +946,8 @@ static int vcn_v4_0_start(struct amdgpu_device *adev) amdgpu_dpm_enable_uvd(adev, true); for (i = 0; i < adev->vcn.num_vcn_inst; ++i) { + fw_shared = adev->vcn.inst[i].fw_shared.cpu_addr; + if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) { r = vcn_v4_0_start_dpg_mode(adev, i, adev->vcn.indirect_sram); continue; @@ -1081,15 +965,15 @@ static int vcn_v4_0_start(struct amdgpu_device *adev) /* enable VCPU clock */ WREG32_P(SOC15_REG_OFFSET(VCN, i, regUVD_VCPU_CNTL), - UVD_VCPU_CNTL__CLK_EN_MASK, ~UVD_VCPU_CNTL__CLK_EN_MASK); + UVD_VCPU_CNTL__CLK_EN_MASK, ~UVD_VCPU_CNTL__CLK_EN_MASK); /* disable master interrupt */ WREG32_P(SOC15_REG_OFFSET(VCN, i, regUVD_MASTINT_EN), 0, - ~UVD_MASTINT_EN__VCPU_EN_MASK); + ~UVD_MASTINT_EN__VCPU_EN_MASK); /* enable LMI MC and UMC channels */ WREG32_P(SOC15_REG_OFFSET(VCN, i, regUVD_LMI_CTRL2), 0, - ~UVD_LMI_CTRL2__STALL_ARB_UMC_MASK); + ~UVD_LMI_CTRL2__STALL_ARB_UMC_MASK); tmp = RREG32_SOC15(VCN, i, regUVD_SOFT_RESET); tmp &= ~UVD_SOFT_RESET__LMI_SOFT_RESET_MASK; @@ -1099,10 +983,10 @@ static int vcn_v4_0_start(struct amdgpu_device *adev) /* setup regUVD_LMI_CTRL */ tmp = RREG32_SOC15(VCN, i, regUVD_LMI_CTRL); WREG32_SOC15(VCN, i, regUVD_LMI_CTRL, tmp | - UVD_LMI_CTRL__WRITE_CLEAN_TIMER_EN_MASK | - UVD_LMI_CTRL__MASK_MC_URGENT_MASK | - UVD_LMI_CTRL__DATA_COHERENCY_EN_MASK | - UVD_LMI_CTRL__VCPU_DATA_COHERENCY_EN_MASK); + UVD_LMI_CTRL__WRITE_CLEAN_TIMER_EN_MASK | + UVD_LMI_CTRL__MASK_MC_URGENT_MASK | + UVD_LMI_CTRL__DATA_COHERENCY_EN_MASK | + UVD_LMI_CTRL__VCPU_DATA_COHERENCY_EN_MASK); /* setup regUVD_MPC_CNTL */ tmp = RREG32_SOC15(VCN, i, regUVD_MPC_CNTL); @@ -1112,37 +996,37 @@ static int vcn_v4_0_start(struct amdgpu_device *adev) /* setup UVD_MPC_SET_MUXA0 */ WREG32_SOC15(VCN, i, regUVD_MPC_SET_MUXA0, - ((0x1 << UVD_MPC_SET_MUXA0__VARA_1__SHIFT) | - (0x2 << UVD_MPC_SET_MUXA0__VARA_2__SHIFT) | - (0x3 << UVD_MPC_SET_MUXA0__VARA_3__SHIFT) | - (0x4 << UVD_MPC_SET_MUXA0__VARA_4__SHIFT))); + ((0x1 << UVD_MPC_SET_MUXA0__VARA_1__SHIFT) | + (0x2 << UVD_MPC_SET_MUXA0__VARA_2__SHIFT) | + (0x3 << UVD_MPC_SET_MUXA0__VARA_3__SHIFT) | + (0x4 << UVD_MPC_SET_MUXA0__VARA_4__SHIFT))); /* setup UVD_MPC_SET_MUXB0 */ WREG32_SOC15(VCN, i, regUVD_MPC_SET_MUXB0, - ((0x1 << UVD_MPC_SET_MUXB0__VARB_1__SHIFT) | - (0x2 << UVD_MPC_SET_MUXB0__VARB_2__SHIFT) | - (0x3 << UVD_MPC_SET_MUXB0__VARB_3__SHIFT) | - (0x4 << UVD_MPC_SET_MUXB0__VARB_4__SHIFT))); + ((0x1 << UVD_MPC_SET_MUXB0__VARB_1__SHIFT) | + (0x2 << UVD_MPC_SET_MUXB0__VARB_2__SHIFT) | + (0x3 << UVD_MPC_SET_MUXB0__VARB_3__SHIFT) | + (0x4 << UVD_MPC_SET_MUXB0__VARB_4__SHIFT))); /* setup UVD_MPC_SET_MUX */ WREG32_SOC15(VCN, i, regUVD_MPC_SET_MUX, - ((0x0 << UVD_MPC_SET_MUX__SET_0__SHIFT) | - (0x1 << UVD_MPC_SET_MUX__SET_1__SHIFT) | - (0x2 << UVD_MPC_SET_MUX__SET_2__SHIFT))); + ((0x0 << UVD_MPC_SET_MUX__SET_0__SHIFT) | + (0x1 << UVD_MPC_SET_MUX__SET_1__SHIFT) | + (0x2 << UVD_MPC_SET_MUX__SET_2__SHIFT))); vcn_v4_0_mc_resume(adev, i); /* VCN global tiling registers */ WREG32_SOC15(VCN, i, regUVD_GFX10_ADDR_CONFIG, - adev->gfx.config.gb_addr_config); + adev->gfx.config.gb_addr_config); /* unblock VCPU register access */ WREG32_P(SOC15_REG_OFFSET(VCN, i, regUVD_RB_ARB_CTRL), 0, - ~UVD_RB_ARB_CTRL__VCPU_DIS_MASK); + ~UVD_RB_ARB_CTRL__VCPU_DIS_MASK); /* release VCPU reset to boot */ WREG32_P(SOC15_REG_OFFSET(VCN, i, regUVD_VCPU_CNTL), 0, - ~UVD_VCPU_CNTL__BLK_RST_MASK); + ~UVD_VCPU_CNTL__BLK_RST_MASK); for (j = 0; j < 10; ++j) { uint32_t status; @@ -1157,6 +1041,7 @@ static int vcn_v4_0_start(struct amdgpu_device *adev) } if (amdgpu_emu_mode==1) { + r = -1; if (status & 2) { r = 0; break; @@ -1166,13 +1051,13 @@ static int vcn_v4_0_start(struct amdgpu_device *adev) if (status & 2) break; - dev_err(adev->dev, "VCN[%d] decode not responding, trying to reset the VCPU!!!\n", i); - WREG32_P(SOC15_REG_OFFSET(VCN, i, regUVD_VCPU_CNTL), - UVD_VCPU_CNTL__BLK_RST_MASK, - ~UVD_VCPU_CNTL__BLK_RST_MASK); + dev_err(adev->dev, "VCN[%d] is not responding, trying to reset the VCPU!!!\n", i); + WREG32_P(SOC15_REG_OFFSET(VCN, i, regUVD_VCPU_CNTL), + UVD_VCPU_CNTL__BLK_RST_MASK, + ~UVD_VCPU_CNTL__BLK_RST_MASK); mdelay(10); WREG32_P(SOC15_REG_OFFSET(VCN, i, regUVD_VCPU_CNTL), 0, - ~UVD_VCPU_CNTL__BLK_RST_MASK); + ~UVD_VCPU_CNTL__BLK_RST_MASK); mdelay(10); r = -1; @@ -1180,78 +1065,43 @@ static int vcn_v4_0_start(struct amdgpu_device *adev) } if (r) { - dev_err(adev->dev, "VCN[%d] decode not responding, giving up!!!\n", i); + dev_err(adev->dev, "VCN[%d] is not responding, giving up!!!\n", i); return r; } /* enable master interrupt */ WREG32_P(SOC15_REG_OFFSET(VCN, i, regUVD_MASTINT_EN), - UVD_MASTINT_EN__VCPU_EN_MASK, - ~UVD_MASTINT_EN__VCPU_EN_MASK); + UVD_MASTINT_EN__VCPU_EN_MASK, + ~UVD_MASTINT_EN__VCPU_EN_MASK); /* clear the busy bit of VCN_STATUS */ WREG32_P(SOC15_REG_OFFSET(VCN, i, regUVD_STATUS), 0, - ~(2 << UVD_STATUS__VCPU_REPORT__SHIFT)); + ~(2 << UVD_STATUS__VCPU_REPORT__SHIFT)); - fw_shared = adev->vcn.inst[i].fw_shared.cpu_addr; - if (unifiedQ_enabled) { - ring = &adev->vcn.inst[i].ring_enc[0]; - fw_shared->sq.queue_mode |= FW_QUEUE_RING_RESET; - } else { - ring = &adev->vcn.inst[i].ring_dec; - - WREG32_SOC15(VCN, i, regVCN_RB4_DB_CTRL, - ring->doorbell_index << VCN_RB4_DB_CTRL__OFFSET__SHIFT | - VCN_RB4_DB_CTRL__EN_MASK); - - /* program the RB_BASE for ring buffer */ - WREG32_SOC15(VCN, i, regUVD_RB_BASE_LO4, - lower_32_bits(ring->gpu_addr)); - WREG32_SOC15(VCN, i, regUVD_RB_BASE_HI4, - upper_32_bits(ring->gpu_addr)); - - WREG32_SOC15(VCN, i, regUVD_RB_SIZE4, ring->ring_size / sizeof(uint32_t)); - - /* resetting ring, fw should not check RB ring */ - tmp = RREG32_SOC15(VCN, i, regVCN_RB_ENABLE); - tmp &= ~(VCN_RB_ENABLE__RB4_EN_MASK); - WREG32_SOC15(VCN, i, regVCN_RB_ENABLE, tmp); + ring = &adev->vcn.inst[i].ring_enc[0]; + WREG32_SOC15(VCN, i, regVCN_RB1_DB_CTRL, + ring->doorbell_index << VCN_RB1_DB_CTRL__OFFSET__SHIFT | + VCN_RB1_DB_CTRL__EN_MASK); - /* Initialize the ring buffer's read and write pointers */ - tmp = RREG32_SOC15(VCN, i, regUVD_RB_RPTR4); - WREG32_SOC15(VCN, i, regUVD_RB_WPTR4, tmp); - ring->wptr = RREG32_SOC15(VCN, i, regUVD_RB_WPTR4); + WREG32_SOC15(VCN, i, regUVD_RB_BASE_LO, ring->gpu_addr); + WREG32_SOC15(VCN, i, regUVD_RB_BASE_HI, upper_32_bits(ring->gpu_addr)); + WREG32_SOC15(VCN, i, regUVD_RB_SIZE, ring->ring_size / 4); - tmp = RREG32_SOC15(VCN, i, regVCN_RB_ENABLE); - tmp |= VCN_RB_ENABLE__RB4_EN_MASK; - WREG32_SOC15(VCN, i, regVCN_RB_ENABLE, tmp); + tmp = RREG32_SOC15(VCN, i, regVCN_RB_ENABLE); + tmp &= ~(VCN_RB_ENABLE__RB1_EN_MASK); + WREG32_SOC15(VCN, i, regVCN_RB_ENABLE, tmp); + fw_shared->sq.queue_mode |= FW_QUEUE_RING_RESET; + WREG32_SOC15(VCN, i, regUVD_RB_RPTR, 0); + WREG32_SOC15(VCN, i, regUVD_RB_WPTR, 0); - ring->wptr = RREG32_SOC15(VCN, i, regUVD_RB_RPTR4); - } - ring = &adev->vcn.inst[i].ring_enc[0]; - WREG32_SOC15(VCN, i, regVCN_RB1_DB_CTRL, - ring->doorbell_index << VCN_RB1_DB_CTRL__OFFSET__SHIFT | - VCN_RB1_DB_CTRL__EN_MASK); tmp = RREG32_SOC15(VCN, i, regUVD_RB_RPTR); WREG32_SOC15(VCN, i, regUVD_RB_WPTR, tmp); ring->wptr = RREG32_SOC15(VCN, i, regUVD_RB_WPTR); - WREG32_SOC15(VCN, i, regUVD_RB_BASE_LO, ring->gpu_addr); - WREG32_SOC15(VCN, i, regUVD_RB_BASE_HI, upper_32_bits(ring->gpu_addr)); - WREG32_SOC15(VCN, i, regUVD_RB_SIZE, ring->ring_size / 4); - if (unifiedQ_enabled) - fw_shared->sq.queue_mode &= ~(FW_QUEUE_RING_RESET | FW_QUEUE_DPG_HOLD_OFF); - else { - ring = &adev->vcn.inst[i].ring_enc[1]; - WREG32_SOC15(VCN, i, regVCN_RB2_DB_CTRL, - ring->doorbell_index << VCN_RB2_DB_CTRL__OFFSET__SHIFT | - VCN_RB2_DB_CTRL__EN_MASK); - tmp = RREG32_SOC15(VCN, i, regUVD_RB_RPTR2); - WREG32_SOC15(VCN, i, regUVD_RB_WPTR2, tmp); - ring->wptr = RREG32_SOC15(VCN, i, regUVD_RB_WPTR2); - WREG32_SOC15(VCN, i, regUVD_RB_BASE_LO2, ring->gpu_addr); - WREG32_SOC15(VCN, i, regUVD_RB_BASE_HI2, upper_32_bits(ring->gpu_addr)); - WREG32_SOC15(VCN, i, regUVD_RB_SIZE2, ring->ring_size / 4); - } + + tmp = RREG32_SOC15(VCN, i, regVCN_RB_ENABLE); + tmp |= VCN_RB_ENABLE__RB1_EN_MASK; + WREG32_SOC15(VCN, i, regVCN_RB_ENABLE, tmp); + fw_shared->sq.queue_mode &= ~(FW_QUEUE_RING_RESET | FW_QUEUE_DPG_HOLD_OFF); } return 0; @@ -1265,7 +1115,7 @@ static int vcn_v4_0_start(struct amdgpu_device *adev) * * Stop VCN block with dpg mode */ -static int vcn_v4_0_stop_dpg_mode(struct amdgpu_device *adev, int inst_idx) +static void vcn_v4_0_stop_dpg_mode(struct amdgpu_device *adev, int inst_idx) { uint32_t tmp; @@ -1277,19 +1127,12 @@ static int vcn_v4_0_stop_dpg_mode(struct amdgpu_device *adev, int inst_idx) tmp = RREG32_SOC15(VCN, inst_idx, regUVD_RB_WPTR); SOC15_WAIT_ON_RREG(VCN, inst_idx, regUVD_RB_RPTR, tmp, 0xFFFFFFFF); - tmp = RREG32_SOC15(VCN, inst_idx, regUVD_RB_WPTR2); - SOC15_WAIT_ON_RREG(VCN, inst_idx, regUVD_RB_RPTR2, tmp, 0xFFFFFFFF); - - tmp = RREG32_SOC15(VCN, inst_idx, regUVD_RB_WPTR4); - SOC15_WAIT_ON_RREG(VCN, inst_idx, regUVD_RB_RPTR4, tmp, 0xFFFFFFFF); - SOC15_WAIT_ON_RREG(VCN, inst_idx, regUVD_POWER_STATUS, 1, UVD_POWER_STATUS__UVD_POWER_STATUS_MASK); /* disable dynamic power gating mode */ WREG32_P(SOC15_REG_OFFSET(VCN, inst_idx, regUVD_POWER_STATUS), 0, ~UVD_POWER_STATUS__UVD_PG_MODE_MASK); - return 0; } /** @@ -1301,12 +1144,16 @@ static int vcn_v4_0_stop_dpg_mode(struct amdgpu_device *adev, int inst_idx) */ static int vcn_v4_0_stop(struct amdgpu_device *adev) { + volatile struct amdgpu_vcn4_fw_shared *fw_shared; uint32_t tmp; int i, r = 0; for (i = 0; i < adev->vcn.num_vcn_inst; ++i) { + fw_shared = adev->vcn.inst[i].fw_shared.cpu_addr; + fw_shared->sq.queue_mode |= FW_QUEUE_DPG_HOLD_OFF; + if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) { - r = vcn_v4_0_stop_dpg_mode(adev, i); + vcn_v4_0_stop_dpg_mode(adev, i); continue; } @@ -1414,8 +1261,6 @@ static int vcn_v4_0_pause_dpg_mode(struct amdgpu_device *adev, int inst_idx, /* unpause dpg, no need to wait */ reg_data &= ~UVD_DPG_PAUSE__NJ_PAUSE_DPG_REQ_MASK; WREG32_SOC15(VCN, inst_idx, regUVD_DPG_PAUSE, reg_data); - SOC15_WAIT_ON_RREG(VCN, inst_idx, regUVD_POWER_STATUS, 0x1, - UVD_POWER_STATUS__UVD_POWER_STATUS_MASK); } adev->vcn.inst[inst_idx].pause_state.fw_based = new_state->fw_based; } @@ -1424,165 +1269,199 @@ static int vcn_v4_0_pause_dpg_mode(struct amdgpu_device *adev, int inst_idx, } /** - * vcn_v4_0_dec_ring_get_rptr - get read pointer + * vcn_v4_0_unified_ring_get_rptr - get unified read pointer * * @ring: amdgpu_ring pointer * - * Returns the current hardware read pointer + * Returns the current hardware unified read pointer */ -static uint64_t vcn_v4_0_dec_ring_get_rptr(struct amdgpu_ring *ring) +static uint64_t vcn_v4_0_unified_ring_get_rptr(struct amdgpu_ring *ring) { struct amdgpu_device *adev = ring->adev; - return RREG32_SOC15(VCN, ring->me, regUVD_RB_RPTR4); + if (ring != &adev->vcn.inst[ring->me].ring_enc[0]) + DRM_ERROR("wrong ring id is identified in %s", __func__); + + return RREG32_SOC15(VCN, ring->me, regUVD_RB_RPTR); } /** - * vcn_v4_0_dec_ring_get_wptr - get write pointer + * vcn_v4_0_unified_ring_get_wptr - get unified write pointer * * @ring: amdgpu_ring pointer * - * Returns the current hardware write pointer + * Returns the current hardware unified write pointer */ -static uint64_t vcn_v4_0_dec_ring_get_wptr(struct amdgpu_ring *ring) +static uint64_t vcn_v4_0_unified_ring_get_wptr(struct amdgpu_ring *ring) { struct amdgpu_device *adev = ring->adev; + if (ring != &adev->vcn.inst[ring->me].ring_enc[0]) + DRM_ERROR("wrong ring id is identified in %s", __func__); + if (ring->use_doorbell) return *ring->wptr_cpu_addr; else - return RREG32_SOC15(VCN, ring->me, regUVD_RB_WPTR4); + return RREG32_SOC15(VCN, ring->me, regUVD_RB_WPTR); } /** - * vcn_v4_0_dec_ring_set_wptr - set write pointer + * vcn_v4_0_unified_ring_set_wptr - set enc write pointer * * @ring: amdgpu_ring pointer * - * Commits the write pointer to the hardware + * Commits the enc write pointer to the hardware */ -static void vcn_v4_0_dec_ring_set_wptr(struct amdgpu_ring *ring) +static void vcn_v4_0_unified_ring_set_wptr(struct amdgpu_ring *ring) { struct amdgpu_device *adev = ring->adev; - if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) { - WREG32_SOC15(VCN, ring->me, regUVD_SCRATCH2, - lower_32_bits(ring->wptr)); - } + if (ring != &adev->vcn.inst[ring->me].ring_enc[0]) + DRM_ERROR("wrong ring id is identified in %s", __func__); if (ring->use_doorbell) { *ring->wptr_cpu_addr = lower_32_bits(ring->wptr); WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr)); } else { - WREG32_SOC15(VCN, ring->me, regUVD_RB_WPTR4, lower_32_bits(ring->wptr)); + WREG32_SOC15(VCN, ring->me, regUVD_RB_WPTR, lower_32_bits(ring->wptr)); } } -static const struct amdgpu_ring_funcs vcn_v4_0_dec_sw_ring_vm_funcs = { - .type = AMDGPU_RING_TYPE_VCN_DEC, - .align_mask = 0x3f, - .nop = VCN_DEC_SW_CMD_NO_OP, - .vmhub = AMDGPU_MMHUB_0, - .get_rptr = vcn_v4_0_dec_ring_get_rptr, - .get_wptr = vcn_v4_0_dec_ring_get_wptr, - .set_wptr = vcn_v4_0_dec_ring_set_wptr, - .emit_frame_size = - SOC15_FLUSH_GPU_TLB_NUM_WREG * 3 + - SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 4 + - VCN_SW_RING_EMIT_FRAME_SIZE, - .emit_ib_size = 5, /* vcn_dec_sw_ring_emit_ib */ - .emit_ib = vcn_dec_sw_ring_emit_ib, - .emit_fence = vcn_dec_sw_ring_emit_fence, - .emit_vm_flush = vcn_dec_sw_ring_emit_vm_flush, - .test_ring = amdgpu_vcn_dec_sw_ring_test_ring, - .test_ib = amdgpu_vcn_dec_sw_ring_test_ib, - .insert_nop = amdgpu_ring_insert_nop, - .insert_end = vcn_dec_sw_ring_insert_end, - .pad_ib = amdgpu_ring_generic_pad_ib, - .begin_use = amdgpu_vcn_ring_begin_use, - .end_use = amdgpu_vcn_ring_end_use, - .emit_wreg = vcn_dec_sw_ring_emit_wreg, - .emit_reg_wait = vcn_dec_sw_ring_emit_reg_wait, - .emit_reg_write_reg_wait = amdgpu_ring_emit_reg_write_reg_wait_helper, -}; - -/** - * vcn_v4_0_enc_ring_get_rptr - get enc read pointer - * - * @ring: amdgpu_ring pointer - * - * Returns the current hardware enc read pointer - */ -static uint64_t vcn_v4_0_enc_ring_get_rptr(struct amdgpu_ring *ring) +static int vcn_v4_0_limit_sched(struct amdgpu_cs_parser *p) { - struct amdgpu_device *adev = ring->adev; + struct drm_gpu_scheduler **scheds; - if (ring == &adev->vcn.inst[ring->me].ring_enc[0]) - return RREG32_SOC15(VCN, ring->me, regUVD_RB_RPTR); - else - return RREG32_SOC15(VCN, ring->me, regUVD_RB_RPTR2); + /* The create msg must be in the first IB submitted */ + if (atomic_read(&p->entity->fence_seq)) + return -EINVAL; + + scheds = p->adev->gpu_sched[AMDGPU_HW_IP_VCN_ENC] + [AMDGPU_RING_PRIO_0].sched; + drm_sched_entity_modify_sched(p->entity, scheds, 1); + return 0; } -/** - * vcn_v4_0_enc_ring_get_wptr - get enc write pointer - * - * @ring: amdgpu_ring pointer - * - * Returns the current hardware enc write pointer - */ -static uint64_t vcn_v4_0_enc_ring_get_wptr(struct amdgpu_ring *ring) +static int vcn_v4_0_dec_msg(struct amdgpu_cs_parser *p, uint64_t addr) { - struct amdgpu_device *adev = ring->adev; + struct ttm_operation_ctx ctx = { false, false }; + struct amdgpu_bo_va_mapping *map; + uint32_t *msg, num_buffers; + struct amdgpu_bo *bo; + uint64_t start, end; + unsigned int i; + void *ptr; + int r; - if (ring == &adev->vcn.inst[ring->me].ring_enc[0]) { - if (ring->use_doorbell) - return *ring->wptr_cpu_addr; - else - return RREG32_SOC15(VCN, ring->me, regUVD_RB_WPTR); - } else { - if (ring->use_doorbell) - return *ring->wptr_cpu_addr; - else - return RREG32_SOC15(VCN, ring->me, regUVD_RB_WPTR2); + addr &= AMDGPU_GMC_HOLE_MASK; + r = amdgpu_cs_find_mapping(p, addr, &bo, &map); + if (r) { + DRM_ERROR("Can't find BO for addr 0x%08llx\n", addr); + return r; } + + start = map->start * AMDGPU_GPU_PAGE_SIZE; + end = (map->last + 1) * AMDGPU_GPU_PAGE_SIZE; + if (addr & 0x7) { + DRM_ERROR("VCN messages must be 8 byte aligned!\n"); + return -EINVAL; + } + + bo->flags |= AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED; + amdgpu_bo_placement_from_domain(bo, bo->allowed_domains); + r = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx); + if (r) { + DRM_ERROR("Failed validating the VCN message BO (%d)!\n", r); + return r; + } + + r = amdgpu_bo_kmap(bo, &ptr); + if (r) { + DRM_ERROR("Failed mapping the VCN message (%d)!\n", r); + return r; + } + + msg = ptr + addr - start; + + /* Check length */ + if (msg[1] > end - addr) { + r = -EINVAL; + goto out; + } + + if (msg[3] != RDECODE_MSG_CREATE) + goto out; + + num_buffers = msg[2]; + for (i = 0, msg = &msg[6]; i < num_buffers; ++i, msg += 4) { + uint32_t offset, size, *create; + + if (msg[0] != RDECODE_MESSAGE_CREATE) + continue; + + offset = msg[1]; + size = msg[2]; + + if (offset + size > end) { + r = -EINVAL; + goto out; + } + + create = ptr + addr + offset - start; + + /* H246, HEVC and VP9 can run on any instance */ + if (create[0] == 0x7 || create[0] == 0x10 || create[0] == 0x11) + continue; + + r = vcn_v4_0_limit_sched(p); + if (r) + goto out; + } + +out: + amdgpu_bo_kunmap(bo); + return r; } -/** - * vcn_v4_0_enc_ring_set_wptr - set enc write pointer - * - * @ring: amdgpu_ring pointer - * - * Commits the enc write pointer to the hardware - */ -static void vcn_v4_0_enc_ring_set_wptr(struct amdgpu_ring *ring) +#define RADEON_VCN_ENGINE_TYPE_DECODE (0x00000003) + +static int vcn_v4_0_ring_patch_cs_in_place(struct amdgpu_cs_parser *p, + struct amdgpu_job *job, + struct amdgpu_ib *ib) { - struct amdgpu_device *adev = ring->adev; + struct amdgpu_ring *ring = to_amdgpu_ring(p->entity->rq->sched); + struct amdgpu_vcn_decode_buffer *decode_buffer = NULL; + uint32_t val; + int r = 0; - if (ring == &adev->vcn.inst[ring->me].ring_enc[0]) { - if (ring->use_doorbell) { - *ring->wptr_cpu_addr = lower_32_bits(ring->wptr); - WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr)); - } else { - WREG32_SOC15(VCN, ring->me, regUVD_RB_WPTR, lower_32_bits(ring->wptr)); - } - } else { - if (ring->use_doorbell) { - *ring->wptr_cpu_addr = lower_32_bits(ring->wptr); - WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr)); - } else { - WREG32_SOC15(VCN, ring->me, regUVD_RB_WPTR2, lower_32_bits(ring->wptr)); - } + /* The first instance can decode anything */ + if (!ring->me) + return r; + + /* unified queue ib header has 8 double words. */ + if (ib->length_dw < 8) + return r; + + val = amdgpu_ib_get_value(ib, 6); //RADEON_VCN_ENGINE_TYPE + + if (val == RADEON_VCN_ENGINE_TYPE_DECODE) { + decode_buffer = (struct amdgpu_vcn_decode_buffer *)&ib->ptr[10]; + + if (decode_buffer->valid_buf_flag & 0x1) + r = vcn_v4_0_dec_msg(p, ((u64)decode_buffer->msg_buffer_address_hi) << 32 | + decode_buffer->msg_buffer_address_lo); } + return r; } -static const struct amdgpu_ring_funcs vcn_v4_0_enc_ring_vm_funcs = { +static const struct amdgpu_ring_funcs vcn_v4_0_unified_ring_vm_funcs = { .type = AMDGPU_RING_TYPE_VCN_ENC, .align_mask = 0x3f, .nop = VCN_ENC_CMD_NO_OP, .vmhub = AMDGPU_MMHUB_0, - .get_rptr = vcn_v4_0_enc_ring_get_rptr, - .get_wptr = vcn_v4_0_enc_ring_get_wptr, - .set_wptr = vcn_v4_0_enc_ring_set_wptr, + .get_rptr = vcn_v4_0_unified_ring_get_rptr, + .get_wptr = vcn_v4_0_unified_ring_get_wptr, + .set_wptr = vcn_v4_0_unified_ring_set_wptr, + .patch_cs_in_place = vcn_v4_0_ring_patch_cs_in_place, .emit_frame_size = SOC15_FLUSH_GPU_TLB_NUM_WREG * 3 + SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 4 + @@ -1594,7 +1473,7 @@ static const struct amdgpu_ring_funcs vcn_v4_0_enc_ring_vm_funcs = { .emit_fence = vcn_v2_0_enc_ring_emit_fence, .emit_vm_flush = vcn_v2_0_enc_ring_emit_vm_flush, .test_ring = amdgpu_vcn_enc_ring_test_ring, - .test_ib = amdgpu_vcn_enc_ring_test_ib, + .test_ib = amdgpu_vcn_unified_ring_test_ib, .insert_nop = amdgpu_ring_insert_nop, .insert_end = vcn_v2_0_enc_ring_insert_end, .pad_ib = amdgpu_ring_generic_pad_ib, @@ -1606,13 +1485,13 @@ static const struct amdgpu_ring_funcs vcn_v4_0_enc_ring_vm_funcs = { }; /** - * vcn_v4_0_set_dec_ring_funcs - set dec ring functions + * vcn_v4_0_set_unified_ring_funcs - set unified ring functions * * @adev: amdgpu_device pointer * - * Set decode ring functions + * Set unified ring functions */ -static void vcn_v4_0_set_dec_ring_funcs(struct amdgpu_device *adev) +static void vcn_v4_0_set_unified_ring_funcs(struct amdgpu_device *adev) { int i; @@ -1620,32 +1499,10 @@ static void vcn_v4_0_set_dec_ring_funcs(struct amdgpu_device *adev) if (adev->vcn.harvest_config & (1 << i)) continue; - adev->vcn.inst[i].ring_dec.funcs = &vcn_v4_0_dec_sw_ring_vm_funcs; - adev->vcn.inst[i].ring_dec.me = i; - DRM_INFO("VCN(%d) decode software ring is enabled in VM mode\n", i); - } -} + adev->vcn.inst[i].ring_enc[0].funcs = &vcn_v4_0_unified_ring_vm_funcs; + adev->vcn.inst[i].ring_enc[0].me = i; -/** - * vcn_v4_0_set_enc_ring_funcs - set enc ring functions - * - * @adev: amdgpu_device pointer - * - * Set encode ring functions - */ -static void vcn_v4_0_set_enc_ring_funcs(struct amdgpu_device *adev) -{ - int i, j; - - for (i = 0; i < adev->vcn.num_vcn_inst; ++i) { - if (adev->vcn.harvest_config & (1 << i)) - continue; - - for (j = 0; j < adev->vcn.num_enc_rings; ++j) { - adev->vcn.inst[i].ring_enc[j].funcs = &vcn_v4_0_enc_ring_vm_funcs; - adev->vcn.inst[i].ring_enc[j].me = i; - } - DRM_INFO("VCN(%d) encode is enabled in VM mode\n", i); + DRM_INFO("VCN(%d) encode/decode are enabled in VM mode\n", i); } } @@ -1798,18 +1655,9 @@ static int vcn_v4_0_process_interrupt(struct amdgpu_device *adev, struct amdgpu_ DRM_DEBUG("IH: VCN TRAP\n"); switch (entry->src_id) { - case VCN_4_0__SRCID__UVD_TRAP: - if (!unifiedQ_enabled) { - amdgpu_fence_process(&adev->vcn.inst[ip_instance].ring_dec); - break; - } - break; case VCN_4_0__SRCID__UVD_ENC_GENERAL_PURPOSE: amdgpu_fence_process(&adev->vcn.inst[ip_instance].ring_enc[0]); break; - case VCN_4_0__SRCID__UVD_ENC_LOW_LATENCY: - amdgpu_fence_process(&adev->vcn.inst[ip_instance].ring_enc[1]); - break; default: DRM_ERROR("Unhandled interrupt: %d %d\n", entry->src_id, entry->src_data[0]); diff --git a/drivers/gpu/drm/amd/amdgpu/vega10_ih.c b/drivers/gpu/drm/amd/amdgpu/vega10_ih.c index cdd599a08125..03b7066471f9 100644 --- a/drivers/gpu/drm/amd/amdgpu/vega10_ih.c +++ b/drivers/gpu/drm/amd/amdgpu/vega10_ih.c @@ -334,9 +334,11 @@ static u32 vega10_ih_get_wptr(struct amdgpu_device *adev, u32 wptr, tmp; struct amdgpu_ih_regs *ih_regs; - if (ih == &adev->irq.ih) { + if (ih == &adev->irq.ih || ih == &adev->irq.ih_soft) { /* Only ring0 supports writeback. On other rings fall back * to register-based code with overflow checking below. + * ih_soft ring doesn't have any backing hardware registers, + * update wptr and return. */ wptr = le32_to_cpu(*ih->wptr_cpu); @@ -409,6 +411,9 @@ static void vega10_ih_set_rptr(struct amdgpu_device *adev, { struct amdgpu_ih_regs *ih_regs; + if (ih == &adev->irq.ih_soft) + return; + if (ih->use_doorbell) { /* XXX check if swapping is necessary on BE */ *ih->rptr_cpu = ih->rptr; diff --git a/drivers/gpu/drm/amd/amdgpu/vega20_ih.c b/drivers/gpu/drm/amd/amdgpu/vega20_ih.c index 3b4eb8285943..2022ffbb8dba 100644 --- a/drivers/gpu/drm/amd/amdgpu/vega20_ih.c +++ b/drivers/gpu/drm/amd/amdgpu/vega20_ih.c @@ -385,9 +385,11 @@ static u32 vega20_ih_get_wptr(struct amdgpu_device *adev, u32 wptr, tmp; struct amdgpu_ih_regs *ih_regs; - if (ih == &adev->irq.ih) { + if (ih == &adev->irq.ih || ih == &adev->irq.ih_soft) { /* Only ring0 supports writeback. On other rings fall back * to register-based code with overflow checking below. + * ih_soft ring doesn't have any backing hardware registers, + * update wptr and return. */ wptr = le32_to_cpu(*ih->wptr_cpu); @@ -461,6 +463,9 @@ static void vega20_ih_set_rptr(struct amdgpu_device *adev, { struct amdgpu_ih_regs *ih_regs; + if (ih == &adev->irq.ih_soft) + return; + if (ih->use_doorbell) { /* XXX check if swapping is necessary on BE */ *ih->rptr_cpu = ih->rptr; |