diff options
author | Dave Airlie <airlied@redhat.com> | 2022-07-27 09:33:44 +1000 |
---|---|---|
committer | Dave Airlie <airlied@redhat.com> | 2022-07-27 09:33:45 +1000 |
commit | ee8b1ef9a6b089abf7a9c7d094b6e93fa05f15b9 (patch) | |
tree | 04d9ed5e31f325b40e4d8c6af8b9de8e3c6394e5 /drivers/gpu/drm/amd/amdgpu | |
parent | 417c1c1963549e9a48b83ada59d90258e38c6594 (diff) | |
parent | 1b54a0121dba12af268fb75c413feabdb9f573d4 (diff) |
Merge tag 'amd-drm-next-5.20-2022-07-26' of https://gitlab.freedesktop.org/agd5f/linux into drm-next
amdgpu:
- VCN4 fixes
- RAS support for UMC 8.10
- ACP support for jadeite platforms
- NBIO HDP flush fixes
- Misc spelling and grammar fixes
- Runtime PM fixes
- Non-DC HPD fix
- Clean up amdgpu DM code
- DSC fixes
- Expose some additional GFXOFF data via debugfs
- More FP clean up for new DCN blocks
- PPC DC FP fixes
- DCN 3.1.4 fixes
- DC DML stack usage fixes
- GMC fixes
- SPM fixes for RDNA2
amdkfd:
- MMU notifier fix
- Mutex fix
UAPI:
- Add a comment about VCN4 unified queues
- IP version information for UMDs
Proposed mesa change: https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/17411/diffs?commit_id=c8a63590dfd0d64e6e6a634dcfed993f135dd075
Signed-off-by: Dave Airlie <airlied@redhat.com>
From: Alex Deucher <alexander.deucher@amd.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20220726181536.5759-1-alexander.deucher@amd.com
Diffstat (limited to 'drivers/gpu/drm/amd/amdgpu')
38 files changed, 1180 insertions, 354 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/Kconfig b/drivers/gpu/drm/amd/amdgpu/Kconfig index 74a8105fd2c0..7777d55275de 100644 --- a/drivers/gpu/drm/amd/amdgpu/Kconfig +++ b/drivers/gpu/drm/amd/amdgpu/Kconfig @@ -4,7 +4,7 @@ config DRM_AMDGPU_SI depends on DRM_AMDGPU help Choose this option if you want to enable experimental support - for SI asics. + for SI (Southern Islands) asics. SI is already supported in radeon. Experimental support for SI in amdgpu will be disabled by default and is still provided by @@ -16,7 +16,8 @@ config DRM_AMDGPU_CIK bool "Enable amdgpu support for CIK parts" depends on DRM_AMDGPU help - Choose this option if you want to enable support for CIK asics. + Choose this option if you want to enable support for CIK (Sea + Islands) asics. CIK is already supported in radeon. Support for CIK in amdgpu will be disabled by default and is still provided by radeon. diff --git a/drivers/gpu/drm/amd/amdgpu/Makefile b/drivers/gpu/drm/amd/amdgpu/Makefile index a87e42c2c8dc..c7d0cd15b5ef 100644 --- a/drivers/gpu/drm/amd/amdgpu/Makefile +++ b/drivers/gpu/drm/amd/amdgpu/Makefile @@ -93,7 +93,7 @@ amdgpu-y += \ # add UMC block amdgpu-y += \ - umc_v6_0.o umc_v6_1.o umc_v6_7.o umc_v8_7.o + umc_v6_0.o umc_v6_1.o umc_v6_7.o umc_v8_7.o umc_v8_10.o # add IH block amdgpu-y += \ diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index 2871a3e3801f..b075845a5328 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -197,6 +197,7 @@ extern uint amdgpu_smu_memory_pool_size; extern int amdgpu_smu_pptable_id; extern uint amdgpu_dc_feature_mask; extern uint amdgpu_dc_debug_mask; +extern uint amdgpu_dc_visual_confirm; extern uint amdgpu_dm_abm_level; extern int amdgpu_backlight; extern struct amdgpu_mgpu_info mgpu_info; @@ -1011,7 +1012,6 @@ struct amdgpu_device { uint64_t df_perfmon_config_assign_mask[AMDGPU_MAX_DF_PERFMONS]; /* enable runtime pm on the device */ - bool runpm; bool in_runpm; bool has_pr3; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_acp.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_acp.c index cc9c9f8b23b2..bcc7ee02e0fc 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_acp.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_acp.c @@ -29,6 +29,8 @@ #include <linux/platform_device.h> #include <sound/designware_i2s.h> #include <sound/pcm.h> +#include <linux/acpi.h> +#include <linux/dmi.h> #include "amdgpu.h" #include "atom.h" @@ -36,6 +38,7 @@ #include "acp_gfx_if.h" +#define ST_JADEITE 1 #define ACP_TILE_ON_MASK 0x03 #define ACP_TILE_OFF_MASK 0x02 #define ACP_TILE_ON_RETAIN_REG_MASK 0x1f @@ -85,6 +88,8 @@ #define ACP_DEVS 4 #define ACP_SRC_ID 162 +static unsigned long acp_machine_id; + enum { ACP_TILE_P1 = 0, ACP_TILE_P2, @@ -128,16 +133,14 @@ static int acp_poweroff(struct generic_pm_domain *genpd) struct amdgpu_device *adev; apd = container_of(genpd, struct acp_pm_domain, gpd); - if (apd != NULL) { - adev = apd->adev; + adev = apd->adev; /* call smu to POWER GATE ACP block * smu will * 1. turn off the acp clock * 2. power off the acp tiles * 3. check and enter ulv state */ - amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_ACP, true); - } + amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_ACP, true); return 0; } @@ -147,16 +150,14 @@ static int acp_poweron(struct generic_pm_domain *genpd) struct amdgpu_device *adev; apd = container_of(genpd, struct acp_pm_domain, gpd); - if (apd != NULL) { - adev = apd->adev; + adev = apd->adev; /* call smu to UNGATE ACP block * smu will * 1. exit ulv * 2. turn on acp clock * 3. power on acp tiles */ - amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_ACP, false); - } + amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_ACP, false); return 0; } @@ -184,6 +185,37 @@ static int acp_genpd_remove_device(struct device *dev, void *data) return 0; } +static int acp_quirk_cb(const struct dmi_system_id *id) +{ + acp_machine_id = ST_JADEITE; + return 1; +} + +static const struct dmi_system_id acp_quirk_table[] = { + { + .callback = acp_quirk_cb, + .matches = { + DMI_EXACT_MATCH(DMI_BOARD_VENDOR, "AMD"), + DMI_EXACT_MATCH(DMI_PRODUCT_NAME, "Jadeite"), + } + }, + { + .callback = acp_quirk_cb, + .matches = { + DMI_EXACT_MATCH(DMI_BOARD_VENDOR, "IP3 Technology CO.,Ltd."), + DMI_EXACT_MATCH(DMI_PRODUCT_NAME, "ASN1D"), + }, + }, + { + .callback = acp_quirk_cb, + .matches = { + DMI_EXACT_MATCH(DMI_BOARD_VENDOR, "Standard"), + DMI_EXACT_MATCH(DMI_PRODUCT_NAME, "ASN10"), + }, + }, + {} +}; + /** * acp_hw_init - start and test ACP block * @@ -193,7 +225,7 @@ static int acp_genpd_remove_device(struct device *dev, void *data) static int acp_hw_init(void *handle) { int r; - uint64_t acp_base; + u64 acp_base; u32 val = 0; u32 count = 0; struct i2s_platform_data *i2s_pdata = NULL; @@ -220,141 +252,210 @@ static int acp_hw_init(void *handle) return -EINVAL; acp_base = adev->rmmio_base; - - adev->acp.acp_genpd = kzalloc(sizeof(struct acp_pm_domain), GFP_KERNEL); - if (adev->acp.acp_genpd == NULL) + if (!adev->acp.acp_genpd) return -ENOMEM; adev->acp.acp_genpd->gpd.name = "ACP_AUDIO"; adev->acp.acp_genpd->gpd.power_off = acp_poweroff; adev->acp.acp_genpd->gpd.power_on = acp_poweron; - - adev->acp.acp_genpd->adev = adev; pm_genpd_init(&adev->acp.acp_genpd->gpd, NULL, false); + dmi_check_system(acp_quirk_table); + switch (acp_machine_id) { + case ST_JADEITE: + { + adev->acp.acp_cell = kcalloc(2, sizeof(struct mfd_cell), + GFP_KERNEL); + if (!adev->acp.acp_cell) { + r = -ENOMEM; + goto failure; + } - adev->acp.acp_cell = kcalloc(ACP_DEVS, sizeof(struct mfd_cell), - GFP_KERNEL); - - if (adev->acp.acp_cell == NULL) { - r = -ENOMEM; - goto failure; - } - - adev->acp.acp_res = kcalloc(5, sizeof(struct resource), GFP_KERNEL); - if (adev->acp.acp_res == NULL) { - r = -ENOMEM; - goto failure; - } + adev->acp.acp_res = kcalloc(3, sizeof(struct resource), GFP_KERNEL); + if (!adev->acp.acp_res) { + r = -ENOMEM; + goto failure; + } - i2s_pdata = kcalloc(3, sizeof(struct i2s_platform_data), GFP_KERNEL); - if (i2s_pdata == NULL) { - r = -ENOMEM; - goto failure; - } + i2s_pdata = kcalloc(1, sizeof(struct i2s_platform_data), GFP_KERNEL); + if (!i2s_pdata) { + r = -ENOMEM; + goto failure; + } - switch (adev->asic_type) { - case CHIP_STONEY: i2s_pdata[0].quirks = DW_I2S_QUIRK_COMP_REG_OFFSET | - DW_I2S_QUIRK_16BIT_IDX_OVERRIDE; + DW_I2S_QUIRK_16BIT_IDX_OVERRIDE; + i2s_pdata[0].cap = DWC_I2S_PLAY | DWC_I2S_RECORD; + i2s_pdata[0].snd_rates = SNDRV_PCM_RATE_8000_96000; + i2s_pdata[0].i2s_reg_comp1 = ACP_I2S_COMP1_CAP_REG_OFFSET; + i2s_pdata[0].i2s_reg_comp2 = ACP_I2S_COMP2_CAP_REG_OFFSET; + + adev->acp.acp_res[0].name = "acp2x_dma"; + adev->acp.acp_res[0].flags = IORESOURCE_MEM; + adev->acp.acp_res[0].start = acp_base; + adev->acp.acp_res[0].end = acp_base + ACP_DMA_REGS_END; + + adev->acp.acp_res[1].name = "acp2x_dw_i2s_play_cap"; + adev->acp.acp_res[1].flags = IORESOURCE_MEM; + adev->acp.acp_res[1].start = acp_base + ACP_I2S_CAP_REGS_START; + adev->acp.acp_res[1].end = acp_base + ACP_I2S_CAP_REGS_END; + + adev->acp.acp_res[2].name = "acp2x_dma_irq"; + adev->acp.acp_res[2].flags = IORESOURCE_IRQ; + adev->acp.acp_res[2].start = amdgpu_irq_create_mapping(adev, 162); + adev->acp.acp_res[2].end = adev->acp.acp_res[2].start; + + adev->acp.acp_cell[0].name = "acp_audio_dma"; + adev->acp.acp_cell[0].num_resources = 3; + adev->acp.acp_cell[0].resources = &adev->acp.acp_res[0]; + adev->acp.acp_cell[0].platform_data = &adev->asic_type; + adev->acp.acp_cell[0].pdata_size = sizeof(adev->asic_type); + + adev->acp.acp_cell[1].name = "designware-i2s"; + adev->acp.acp_cell[1].num_resources = 1; + adev->acp.acp_cell[1].resources = &adev->acp.acp_res[1]; + adev->acp.acp_cell[1].platform_data = &i2s_pdata[0]; + adev->acp.acp_cell[1].pdata_size = sizeof(struct i2s_platform_data); + r = mfd_add_hotplug_devices(adev->acp.parent, adev->acp.acp_cell, 2); + if (r) + goto failure; + r = device_for_each_child(adev->acp.parent, &adev->acp.acp_genpd->gpd, + acp_genpd_add_device); + if (r) + goto failure; break; - default: - i2s_pdata[0].quirks = DW_I2S_QUIRK_COMP_REG_OFFSET; } - i2s_pdata[0].cap = DWC_I2S_PLAY; - i2s_pdata[0].snd_rates = SNDRV_PCM_RATE_8000_96000; - i2s_pdata[0].i2s_reg_comp1 = ACP_I2S_COMP1_PLAY_REG_OFFSET; - i2s_pdata[0].i2s_reg_comp2 = ACP_I2S_COMP2_PLAY_REG_OFFSET; - switch (adev->asic_type) { - case CHIP_STONEY: - i2s_pdata[1].quirks = DW_I2S_QUIRK_COMP_REG_OFFSET | - DW_I2S_QUIRK_COMP_PARAM1 | - DW_I2S_QUIRK_16BIT_IDX_OVERRIDE; - break; default: - i2s_pdata[1].quirks = DW_I2S_QUIRK_COMP_REG_OFFSET | - DW_I2S_QUIRK_COMP_PARAM1; - } + adev->acp.acp_cell = kcalloc(ACP_DEVS, sizeof(struct mfd_cell), + GFP_KERNEL); - i2s_pdata[1].cap = DWC_I2S_RECORD; - i2s_pdata[1].snd_rates = SNDRV_PCM_RATE_8000_96000; - i2s_pdata[1].i2s_reg_comp1 = ACP_I2S_COMP1_CAP_REG_OFFSET; - i2s_pdata[1].i2s_reg_comp2 = ACP_I2S_COMP2_CAP_REG_OFFSET; + if (!adev->acp.acp_cell) { + r = -ENOMEM; + goto failure; + } - i2s_pdata[2].quirks = DW_I2S_QUIRK_COMP_REG_OFFSET; - switch (adev->asic_type) { - case CHIP_STONEY: - i2s_pdata[2].quirks |= DW_I2S_QUIRK_16BIT_IDX_OVERRIDE; - break; - default: - break; - } + adev->acp.acp_res = kcalloc(5, sizeof(struct resource), GFP_KERNEL); + if (!adev->acp.acp_res) { + r = -ENOMEM; + goto failure; + } + + i2s_pdata = kcalloc(3, sizeof(struct i2s_platform_data), GFP_KERNEL); + if (!i2s_pdata) { + r = -ENOMEM; + goto failure; + } + + switch (adev->asic_type) { + case CHIP_STONEY: + i2s_pdata[0].quirks = DW_I2S_QUIRK_COMP_REG_OFFSET | + DW_I2S_QUIRK_16BIT_IDX_OVERRIDE; + break; + default: + i2s_pdata[0].quirks = DW_I2S_QUIRK_COMP_REG_OFFSET; + } + i2s_pdata[0].cap = DWC_I2S_PLAY; + i2s_pdata[0].snd_rates = SNDRV_PCM_RATE_8000_96000; + i2s_pdata[0].i2s_reg_comp1 = ACP_I2S_COMP1_PLAY_REG_OFFSET; + i2s_pdata[0].i2s_reg_comp2 = ACP_I2S_COMP2_PLAY_REG_OFFSET; + switch (adev->asic_type) { + case CHIP_STONEY: + i2s_pdata[1].quirks = DW_I2S_QUIRK_COMP_REG_OFFSET | + DW_I2S_QUIRK_COMP_PARAM1 | + DW_I2S_QUIRK_16BIT_IDX_OVERRIDE; + break; + default: + i2s_pdata[1].quirks = DW_I2S_QUIRK_COMP_REG_OFFSET | + DW_I2S_QUIRK_COMP_PARAM1; + } + + i2s_pdata[1].cap = DWC_I2S_RECORD; + i2s_pdata[1].snd_rates = SNDRV_PCM_RATE_8000_96000; + i2s_pdata[1].i2s_reg_comp1 = ACP_I2S_COMP1_CAP_REG_OFFSET; + i2s_pdata[1].i2s_reg_comp2 = ACP_I2S_COMP2_CAP_REG_OFFSET; + + i2s_pdata[2].quirks = DW_I2S_QUIRK_COMP_REG_OFFSET; + switch (adev->asic_type) { + case CHIP_STONEY: + i2s_pdata[2].quirks |= DW_I2S_QUIRK_16BIT_IDX_OVERRIDE; + break; + default: + break; + } - i2s_pdata[2].cap = DWC_I2S_PLAY | DWC_I2S_RECORD; - i2s_pdata[2].snd_rates = SNDRV_PCM_RATE_8000_96000; - i2s_pdata[2].i2s_reg_comp1 = ACP_BT_COMP1_REG_OFFSET; - i2s_pdata[2].i2s_reg_comp2 = ACP_BT_COMP2_REG_OFFSET; - - adev->acp.acp_res[0].name = "acp2x_dma"; - adev->acp.acp_res[0].flags = IORESOURCE_MEM; - adev->acp.acp_res[0].start = acp_base; - adev->acp.acp_res[0].end = acp_base + ACP_DMA_REGS_END; - - adev->acp.acp_res[1].name = "acp2x_dw_i2s_play"; - adev->acp.acp_res[1].flags = IORESOURCE_MEM; - adev->acp.acp_res[1].start = acp_base + ACP_I2S_PLAY_REGS_START; - adev->acp.acp_res[1].end = acp_base + ACP_I2S_PLAY_REGS_END; - - adev->acp.acp_res[2].name = "acp2x_dw_i2s_cap"; - adev->acp.acp_res[2].flags = IORESOURCE_MEM; - adev->acp.acp_res[2].start = acp_base + ACP_I2S_CAP_REGS_START; - adev->acp.acp_res[2].end = acp_base + ACP_I2S_CAP_REGS_END; - - adev->acp.acp_res[3].name = "acp2x_dw_bt_i2s_play_cap"; - adev->acp.acp_res[3].flags = IORESOURCE_MEM; - adev->acp.acp_res[3].start = acp_base + ACP_BT_PLAY_REGS_START; - adev->acp.acp_res[3].end = acp_base + ACP_BT_PLAY_REGS_END; - - adev->acp.acp_res[4].name = "acp2x_dma_irq"; - adev->acp.acp_res[4].flags = IORESOURCE_IRQ; - adev->acp.acp_res[4].start = amdgpu_irq_create_mapping(adev, 162); - adev->acp.acp_res[4].end = adev->acp.acp_res[4].start; - - adev->acp.acp_cell[0].name = "acp_audio_dma"; - adev->acp.acp_cell[0].num_resources = 5; - adev->acp.acp_cell[0].resources = &adev->acp.acp_res[0]; - adev->acp.acp_cell[0].platform_data = &adev->asic_type; - adev->acp.acp_cell[0].pdata_size = sizeof(adev->asic_type); - - adev->acp.acp_cell[1].name = "designware-i2s"; - adev->acp.acp_cell[1].num_resources = 1; - adev->acp.acp_cell[1].resources = &adev->acp.acp_res[1]; - adev->acp.acp_cell[1].platform_data = &i2s_pdata[0]; - adev->acp.acp_cell[1].pdata_size = sizeof(struct i2s_platform_data); - - adev->acp.acp_cell[2].name = "designware-i2s"; - adev->acp.acp_cell[2].num_resources = 1; - adev->acp.acp_cell[2].resources = &adev->acp.acp_res[2]; - adev->acp.acp_cell[2].platform_data = &i2s_pdata[1]; - adev->acp.acp_cell[2].pdata_size = sizeof(struct i2s_platform_data); - - adev->acp.acp_cell[3].name = "designware-i2s"; - adev->acp.acp_cell[3].num_resources = 1; - adev->acp.acp_cell[3].resources = &adev->acp.acp_res[3]; - adev->acp.acp_cell[3].platform_data = &i2s_pdata[2]; - adev->acp.acp_cell[3].pdata_size = sizeof(struct i2s_platform_data); - - r = mfd_add_hotplug_devices(adev->acp.parent, adev->acp.acp_cell, - ACP_DEVS); - if (r) - goto failure; - - r = device_for_each_child(adev->acp.parent, &adev->acp.acp_genpd->gpd, - acp_genpd_add_device); - if (r) - goto failure; + i2s_pdata[2].cap = DWC_I2S_PLAY | DWC_I2S_RECORD; + i2s_pdata[2].snd_rates = SNDRV_PCM_RATE_8000_96000; + i2s_pdata[2].i2s_reg_comp1 = ACP_BT_COMP1_REG_OFFSET; + i2s_pdata[2].i2s_reg_comp2 = ACP_BT_COMP2_REG_OFFSET; + + i2s_pdata[3].quirks = DW_I2S_QUIRK_COMP_REG_OFFSET; + switch (adev->asic_type) { + case CHIP_STONEY: + i2s_pdata[3].quirks |= DW_I2S_QUIRK_16BIT_IDX_OVERRIDE; + break; + default: + break; + } + adev->acp.acp_res[0].name = "acp2x_dma"; + adev->acp.acp_res[0].flags = IORESOURCE_MEM; + adev->acp.acp_res[0].start = acp_base; + adev->acp.acp_res[0].end = acp_base + ACP_DMA_REGS_END; + + adev->acp.acp_res[1].name = "acp2x_dw_i2s_play"; + adev->acp.acp_res[1].flags = IORESOURCE_MEM; + adev->acp.acp_res[1].start = acp_base + ACP_I2S_PLAY_REGS_START; + adev->acp.acp_res[1].end = acp_base + ACP_I2S_PLAY_REGS_END; + + adev->acp.acp_res[2].name = "acp2x_dw_i2s_cap"; + adev->acp.acp_res[2].flags = IORESOURCE_MEM; + adev->acp.acp_res[2].start = acp_base + ACP_I2S_CAP_REGS_START; + adev->acp.acp_res[2].end = acp_base + ACP_I2S_CAP_REGS_END; + + adev->acp.acp_res[3].name = "acp2x_dw_bt_i2s_play_cap"; + adev->acp.acp_res[3].flags = IORESOURCE_MEM; + adev->acp.acp_res[3].start = acp_base + ACP_BT_PLAY_REGS_START; + adev->acp.acp_res[3].end = acp_base + ACP_BT_PLAY_REGS_END; + + adev->acp.acp_res[4].name = "acp2x_dma_irq"; + adev->acp.acp_res[4].flags = IORESOURCE_IRQ; + adev->acp.acp_res[4].start = amdgpu_irq_create_mapping(adev, 162); + adev->acp.acp_res[4].end = adev->acp.acp_res[4].start; + + adev->acp.acp_cell[0].name = "acp_audio_dma"; + adev->acp.acp_cell[0].num_resources = 5; + adev->acp.acp_cell[0].resources = &adev->acp.acp_res[0]; + adev->acp.acp_cell[0].platform_data = &adev->asic_type; + adev->acp.acp_cell[0].pdata_size = sizeof(adev->asic_type); + + adev->acp.acp_cell[1].name = "designware-i2s"; + adev->acp.acp_cell[1].num_resources = 1; + adev->acp.acp_cell[1].resources = &adev->acp.acp_res[1]; + adev->acp.acp_cell[1].platform_data = &i2s_pdata[0]; + adev->acp.acp_cell[1].pdata_size = sizeof(struct i2s_platform_data); + + adev->acp.acp_cell[2].name = "designware-i2s"; + adev->acp.acp_cell[2].num_resources = 1; + adev->acp.acp_cell[2].resources = &adev->acp.acp_res[2]; + adev->acp.acp_cell[2].platform_data = &i2s_pdata[1]; + adev->acp.acp_cell[2].pdata_size = sizeof(struct i2s_platform_data); + + adev->acp.acp_cell[3].name = "designware-i2s"; + adev->acp.acp_cell[3].num_resources = 1; + adev->acp.acp_cell[3].resources = &adev->acp.acp_res[3]; + adev->acp.acp_cell[3].platform_data = &i2s_pdata[2]; + adev->acp.acp_cell[3].pdata_size = sizeof(struct i2s_platform_data); + + r = mfd_add_hotplug_devices(adev->acp.parent, adev->acp.acp_cell, ACP_DEVS); + if (r) + goto failure; + + r = device_for_each_child(adev->acp.parent, &adev->acp.acp_genpd->gpd, + acp_genpd_add_device); + if (r) + goto failure; + } /* Assert Soft reset of ACP */ val = cgs_read_register(adev->acp.cgs_device, mmACP_SOFT_RESET); @@ -546,8 +647,7 @@ static const struct amd_ip_funcs acp_ip_funcs = { .set_powergating_state = acp_set_powergating_state, }; -const struct amdgpu_ip_block_version acp_ip_block = -{ +const struct amdgpu_ip_block_version acp_ip_block = { .type = AMD_IP_BLOCK_TYPE_ACP, .major = 2, .minor = 2, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c index 581c7ae41102..08997092e7f7 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c @@ -115,21 +115,12 @@ void amdgpu_amdkfd_reserve_system_mem(uint64_t size) * compromise that should work in most cases without reserving too * much memory for page tables unnecessarily (factor 16K, >> 14). */ -#define ESTIMATE_PT_SIZE(mem_size) max(((mem_size) >> 14), AMDGPU_VM_RESERVED_VRAM) - -static size_t amdgpu_amdkfd_acc_size(uint64_t size) -{ - size >>= PAGE_SHIFT; - size *= sizeof(dma_addr_t) + sizeof(void *); - return __roundup_pow_of_two(sizeof(struct amdgpu_bo)) + - __roundup_pow_of_two(sizeof(struct ttm_tt)) + - PAGE_ALIGN(size); -} +#define ESTIMATE_PT_SIZE(mem_size) max(((mem_size) >> 14), AMDGPU_VM_RESERVED_VRAM) /** * amdgpu_amdkfd_reserve_mem_limit() - Decrease available memory by size - * of buffer including any reserved for control structures + * of buffer. * * @adev: Device to which allocated BO belongs to * @size: Size of buffer, in bytes, encapsulated by B0. This should be @@ -143,19 +134,16 @@ static int amdgpu_amdkfd_reserve_mem_limit(struct amdgpu_device *adev, { uint64_t reserved_for_pt = ESTIMATE_PT_SIZE(amdgpu_amdkfd_total_mem_size); - size_t acc_size, system_mem_needed, ttm_mem_needed, vram_needed; + size_t system_mem_needed, ttm_mem_needed, vram_needed; int ret = 0; - acc_size = amdgpu_amdkfd_acc_size(size); - + system_mem_needed = 0; + ttm_mem_needed = 0; vram_needed = 0; if (alloc_flag & KFD_IOC_ALLOC_MEM_FLAGS_GTT) { - system_mem_needed = acc_size + size; - ttm_mem_needed = acc_size + size; + system_mem_needed = size; + ttm_mem_needed = size; } else if (alloc_flag & KFD_IOC_ALLOC_MEM_FLAGS_VRAM) { - system_mem_needed = acc_size; - ttm_mem_needed = acc_size; - /* * Conservatively round up the allocation requirement to 2 MB * to avoid fragmentation caused by 4K allocations in the tail @@ -163,14 +151,10 @@ static int amdgpu_amdkfd_reserve_mem_limit(struct amdgpu_device *adev, */ vram_needed = ALIGN(size, VRAM_ALLOCATION_ALIGN); } else if (alloc_flag & KFD_IOC_ALLOC_MEM_FLAGS_USERPTR) { - system_mem_needed = acc_size + size; - ttm_mem_needed = acc_size; - } else if (alloc_flag & - (KFD_IOC_ALLOC_MEM_FLAGS_DOORBELL | - KFD_IOC_ALLOC_MEM_FLAGS_MMIO_REMAP)) { - system_mem_needed = acc_size; - ttm_mem_needed = acc_size; - } else { + system_mem_needed = size; + } else if (!(alloc_flag & + (KFD_IOC_ALLOC_MEM_FLAGS_DOORBELL | + KFD_IOC_ALLOC_MEM_FLAGS_MMIO_REMAP))) { pr_err("%s: Invalid BO type %#x\n", __func__, alloc_flag); return -ENOMEM; } @@ -208,28 +192,18 @@ release: static void unreserve_mem_limit(struct amdgpu_device *adev, uint64_t size, u32 alloc_flag) { - size_t acc_size; - - acc_size = amdgpu_amdkfd_acc_size(size); - spin_lock(&kfd_mem_limit.mem_limit_lock); if (alloc_flag & KFD_IOC_ALLOC_MEM_FLAGS_GTT) { - kfd_mem_limit.system_mem_used -= (acc_size + size); - kfd_mem_limit.ttm_mem_used -= (acc_size + size); + kfd_mem_limit.system_mem_used -= size; + kfd_mem_limit.ttm_mem_used -= size; } else if (alloc_flag & KFD_IOC_ALLOC_MEM_FLAGS_VRAM) { - kfd_mem_limit.system_mem_used -= acc_size; - kfd_mem_limit.ttm_mem_used -= acc_size; adev->kfd.vram_used -= ALIGN(size, VRAM_ALLOCATION_ALIGN); } else if (alloc_flag & KFD_IOC_ALLOC_MEM_FLAGS_USERPTR) { - kfd_mem_limit.system_mem_used -= (acc_size + size); - kfd_mem_limit.ttm_mem_used -= acc_size; - } else if (alloc_flag & - (KFD_IOC_ALLOC_MEM_FLAGS_DOORBELL | - KFD_IOC_ALLOC_MEM_FLAGS_MMIO_REMAP)) { - kfd_mem_limit.system_mem_used -= acc_size; - kfd_mem_limit.ttm_mem_used -= acc_size; - } else { + kfd_mem_limit.system_mem_used -= size; + } else if (!(alloc_flag & + (KFD_IOC_ALLOC_MEM_FLAGS_DOORBELL | + KFD_IOC_ALLOC_MEM_FLAGS_MMIO_REMAP))) { pr_err("%s: Invalid BO type %#x\n", __func__, alloc_flag); goto release; } @@ -436,45 +410,42 @@ static uint64_t get_pte_flags(struct amdgpu_device *adev, struct kgd_mem *mem) switch (adev->asic_type) { case CHIP_ARCTURUS: - if (mem->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_VRAM) { - if (bo_adev == adev) - mapping_flags |= coherent ? - AMDGPU_VM_MTYPE_CC : AMDGPU_VM_MTYPE_RW; - else - mapping_flags |= coherent ? - AMDGPU_VM_MTYPE_UC : AMDGPU_VM_MTYPE_NC; - } else { - mapping_flags |= coherent ? - AMDGPU_VM_MTYPE_UC : AMDGPU_VM_MTYPE_NC; - } - break; case CHIP_ALDEBARAN: - if (coherent && uncached) { - if (adev->gmc.xgmi.connected_to_cpu || - !(mem->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_VRAM)) - snoop = true; - mapping_flags |= AMDGPU_VM_MTYPE_UC; - } else if (mem->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_VRAM) { + if (mem->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_VRAM) { if (bo_adev == adev) { - mapping_flags |= coherent ? - AMDGPU_VM_MTYPE_CC : AMDGPU_VM_MTYPE_RW; - if (adev->gmc.xgmi.connected_to_cpu) + if (uncached) + mapping_flags |= AMDGPU_VM_MTYPE_UC; + else if (coherent) + mapping_flags |= AMDGPU_VM_MTYPE_CC; + else + mapping_flags |= AMDGPU_VM_MTYPE_RW; + if (adev->asic_type == CHIP_ALDEBARAN && + adev->gmc.xgmi.connected_to_cpu) snoop = true; } else { - mapping_flags |= coherent ? - AMDGPU_VM_MTYPE_UC : AMDGPU_VM_MTYPE_NC; + if (uncached || coherent) + mapping_flags |= AMDGPU_VM_MTYPE_UC; + else + mapping_flags |= AMDGPU_VM_MTYPE_NC; if (amdgpu_xgmi_same_hive(adev, bo_adev)) snoop = true; } } else { + if (uncached || coherent) + mapping_flags |= AMDGPU_VM_MTYPE_UC; + else + mapping_flags |= AMDGPU_VM_MTYPE_NC; snoop = true; - mapping_flags |= coherent ? - AMDGPU_VM_MTYPE_UC : AMDGPU_VM_MTYPE_NC; } break; default: - mapping_flags |= coherent ? - AMDGPU_VM_MTYPE_UC : AMDGPU_VM_MTYPE_NC; + if (uncached || coherent) + mapping_flags |= AMDGPU_VM_MTYPE_UC; + else + mapping_flags |= AMDGPU_VM_MTYPE_NC; + + if (!(mem->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_VRAM)) + snoop = true; } pte_flags = amdgpu_gem_va_map_flags(adev, mapping_flags); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c index 714178f1b6c6..2168163aad2d 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c @@ -40,7 +40,7 @@ static void amdgpu_bo_list_free_rcu(struct rcu_head *rcu) { struct amdgpu_bo_list *list = container_of(rcu, struct amdgpu_bo_list, rhead); - + mutex_destroy(&list->bo_list_mutex); kvfree(list); } @@ -136,6 +136,7 @@ int amdgpu_bo_list_create(struct amdgpu_device *adev, struct drm_file *filp, trace_amdgpu_cs_bo_status(list->num_entries, total_size); + mutex_init(&list->bo_list_mutex); *result = list; return 0; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.h index 529d52a204cf..9caea1688fc3 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.h @@ -47,6 +47,10 @@ struct amdgpu_bo_list { struct amdgpu_bo *oa_obj; unsigned first_userptr; unsigned num_entries; + + /* Protect access during command submission. + */ + struct mutex bo_list_mutex; }; int amdgpu_bo_list_get(struct amdgpu_fpriv *fpriv, int id, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c index b28af04b0c3e..d8f1335bc68f 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c @@ -519,6 +519,8 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p, return r; } + mutex_lock(&p->bo_list->bo_list_mutex); + /* One for TTM and one for the CS job */ amdgpu_bo_list_for_each_entry(e, p->bo_list) e->tv.num_shared = 2; @@ -651,6 +653,7 @@ out_free_user_pages: kvfree(e->user_pages); e->user_pages = NULL; } + mutex_unlock(&p->bo_list->bo_list_mutex); } return r; } @@ -690,9 +693,11 @@ static void amdgpu_cs_parser_fini(struct amdgpu_cs_parser *parser, int error, { unsigned i; - if (error && backoff) + if (error && backoff) { ttm_eu_backoff_reservation(&parser->ticket, &parser->validated); + mutex_unlock(&parser->bo_list->bo_list_mutex); + } for (i = 0; i < parser->num_post_deps; i++) { drm_syncobj_put(parser->post_deps[i].syncobj); @@ -832,12 +837,16 @@ static int amdgpu_cs_vm_handling(struct amdgpu_cs_parser *p) continue; r = amdgpu_vm_bo_update(adev, bo_va, false); - if (r) + if (r) { + mutex_unlock(&p->bo_list->bo_list_mutex); return r; + } r = amdgpu_sync_fence(&p->job->sync, bo_va->last_pt_update); - if (r) + if (r) { + mutex_unlock(&p->bo_list->bo_list_mutex); return r; + } } r = amdgpu_vm_handle_moved(adev, vm); @@ -1278,6 +1287,7 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p, ttm_eu_fence_buffer_objects(&p->ticket, &p->validated, p->fence); mutex_unlock(&p->adev->notifier_lock); + mutex_unlock(&p->bo_list->bo_list_mutex); return 0; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c index 2ef5296216d6..8ee4e8491f39 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c @@ -272,32 +272,6 @@ static ktime_t amdgpu_ctx_fini_entity(struct amdgpu_ctx_entity *entity) return res; } -static int amdgpu_ctx_init(struct amdgpu_ctx_mgr *mgr, int32_t priority, - struct drm_file *filp, struct amdgpu_ctx *ctx) -{ - int r; - - r = amdgpu_ctx_priority_permit(filp, priority); - if (r) - return r; - - memset(ctx, 0, sizeof(*ctx)); - - kref_init(&ctx->refcount); - ctx->mgr = mgr; - spin_lock_init(&ctx->ring_lock); - mutex_init(&ctx->lock); - - ctx->reset_counter = atomic_read(&mgr->adev->gpu_reset_counter); - ctx->reset_counter_query = ctx->reset_counter; - ctx->vram_lost_counter = atomic_read(&mgr->adev->vram_lost_counter); - ctx->init_priority = priority; - ctx->override_priority = AMDGPU_CTX_PRIORITY_UNSET; - ctx->stable_pstate = AMDGPU_CTX_STABLE_PSTATE_NONE; - - return 0; -} - static int amdgpu_ctx_get_stable_pstate(struct amdgpu_ctx *ctx, u32 *stable_pstate) { @@ -326,6 +300,38 @@ static int amdgpu_ctx_get_stable_pstate(struct amdgpu_ctx *ctx, return 0; } +static int amdgpu_ctx_init(struct amdgpu_ctx_mgr *mgr, int32_t priority, + struct drm_file *filp, struct amdgpu_ctx *ctx) +{ + u32 current_stable_pstate; + int r; + + r = amdgpu_ctx_priority_permit(filp, priority); + if (r) + return r; + + memset(ctx, 0, sizeof(*ctx)); + + kref_init(&ctx->refcount); + ctx->mgr = mgr; + spin_lock_init(&ctx->ring_lock); + mutex_init(&ctx->lock); + + ctx->reset_counter = atomic_read(&mgr->adev->gpu_reset_counter); + ctx->reset_counter_query = ctx->reset_counter; + ctx->vram_lost_counter = atomic_read(&mgr->adev->vram_lost_counter); + ctx->init_priority = priority; + ctx->override_priority = AMDGPU_CTX_PRIORITY_UNSET; + + r = amdgpu_ctx_get_stable_pstate(ctx, ¤t_stable_pstate); + if (r) + return r; + + ctx->stable_pstate = current_stable_pstate; + + return 0; +} + static int amdgpu_ctx_set_stable_pstate(struct amdgpu_ctx *ctx, u32 stable_pstate) { @@ -397,7 +403,7 @@ static void amdgpu_ctx_fini(struct kref *ref) } if (drm_dev_enter(&adev->ddev, &idx)) { - amdgpu_ctx_set_stable_pstate(ctx, AMDGPU_CTX_STABLE_PSTATE_NONE); + amdgpu_ctx_set_stable_pstate(ctx, ctx->stable_pstate); drm_dev_exit(idx); } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c index f3b3c688e4e7..e2eec985adb3 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c @@ -1117,13 +1117,50 @@ static ssize_t amdgpu_debugfs_gfxoff_read(struct file *f, char __user *buf, } while (size) { - uint32_t value; + u32 value = adev->gfx.gfx_off_state; + + r = put_user(value, (u32 *)buf); + if (r) + goto out; + + result += 4; + buf += 4; + *pos += 4; + size -= 4; + } + + r = result; +out: + pm_runtime_mark_last_busy(adev_to_drm(adev)->dev); + pm_runtime_put_autosuspend(adev_to_drm(adev)->dev); + + return r; +} + +static ssize_t amdgpu_debugfs_gfxoff_status_read(struct file *f, char __user *buf, + size_t size, loff_t *pos) +{ + struct amdgpu_device *adev = file_inode(f)->i_private; + ssize_t result = 0; + int r; + + if (size & 0x3 || *pos & 0x3) + return -EINVAL; + + r = pm_runtime_get_sync(adev_to_drm(adev)->dev); + if (r < 0) { + pm_runtime_put_autosuspend(adev_to_drm(adev)->dev); + return r; + } + + while (size) { + u32 value; r = amdgpu_get_gfx_off_status(adev, &value); if (r) goto out; - r = put_user(value, (uint32_t *)buf); + r = put_user(value, (u32 *)buf); if (r) goto out; @@ -1206,6 +1243,12 @@ static const struct file_operations amdgpu_debugfs_gfxoff_fops = { .llseek = default_llseek }; +static const struct file_operations amdgpu_debugfs_gfxoff_status_fops = { + .owner = THIS_MODULE, + .read = amdgpu_debugfs_gfxoff_status_read, + .llseek = default_llseek +}; + static const struct file_operations *debugfs_regs[] = { &amdgpu_debugfs_regs_fops, &amdgpu_debugfs_regs2_fops, @@ -1217,6 +1260,7 @@ static const struct file_operations *debugfs_regs[] = { &amdgpu_debugfs_wave_fops, &amdgpu_debugfs_gpr_fops, &amdgpu_debugfs_gfxoff_fops, + &amdgpu_debugfs_gfxoff_status_fops, }; static const char *debugfs_regs_names[] = { @@ -1230,6 +1274,7 @@ static const char *debugfs_regs_names[] = { "amdgpu_wave", "amdgpu_gpr", "amdgpu_gfxoff", + "amdgpu_gfxoff_status", }; /** diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index e1c9587f659b..041bd906449d 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -5230,8 +5230,7 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev, * * job->base holds a reference to parent fence */ - if (job && (job->hw_fence.ops != NULL) && - dma_fence_is_signaled(&job->hw_fence)) { + if (job && dma_fence_is_signaled(&job->hw_fence)) { job_signaled = true; dev_info(adev->dev, "Guilty job already signaled, skipping HW reset"); goto skip_hw_reset; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c index 37234c2998d7..242d1847c4aa 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c @@ -1716,6 +1716,7 @@ static int amdgpu_discovery_set_display_ip_blocks(struct amdgpu_device *adev) case IP_VERSION(3, 0, 1): case IP_VERSION(3, 1, 2): case IP_VERSION(3, 1, 3): + case IP_VERSION(3, 1, 4): case IP_VERSION(3, 1, 5): case IP_VERSION(3, 1, 6): case IP_VERSION(3, 2, 0): @@ -2206,12 +2207,9 @@ int amdgpu_discovery_set_ip_blocks(struct amdgpu_device *adev) break; case IP_VERSION(7, 4, 0): case IP_VERSION(7, 4, 1): - adev->nbio.funcs = &nbio_v7_4_funcs; - adev->nbio.hdp_flush_reg = &nbio_v7_4_hdp_flush_reg; - break; case IP_VERSION(7, 4, 4): adev->nbio.funcs = &nbio_v7_4_funcs; - adev->nbio.hdp_flush_reg = &nbio_v7_4_hdp_flush_reg_ald; + adev->nbio.hdp_flush_reg = &nbio_v7_4_hdp_flush_reg; break; case IP_VERSION(7, 2, 0): case IP_VERSION(7, 2, 1): @@ -2225,15 +2223,12 @@ int amdgpu_discovery_set_ip_blocks(struct amdgpu_device *adev) case IP_VERSION(2, 3, 0): case IP_VERSION(2, 3, 1): case IP_VERSION(2, 3, 2): - adev->nbio.funcs = &nbio_v2_3_funcs; - adev->nbio.hdp_flush_reg = &nbio_v2_3_hdp_flush_reg; - break; case IP_VERSION(3, 3, 0): case IP_VERSION(3, 3, 1): case IP_VERSION(3, 3, 2): case IP_VERSION(3, 3, 3): adev->nbio.funcs = &nbio_v2_3_funcs; - adev->nbio.hdp_flush_reg = &nbio_v2_3_hdp_flush_reg_sc; + adev->nbio.hdp_flush_reg = &nbio_v2_3_hdp_flush_reg; break; case IP_VERSION(4, 3, 0): case IP_VERSION(4, 3, 1): diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c index e3d139708160..429fcdf28836 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c @@ -80,7 +80,7 @@ * - 3.24.0 - Add high priority compute support for gfx9 * - 3.25.0 - Add support for sensor query info (stable pstate sclk/mclk). * - 3.26.0 - GFX9: Process AMDGPU_IB_FLAG_TC_WB_NOT_INVALIDATE. - * - 3.27.0 - Add new chunk to to AMDGPU_CS to enable BO_LIST creation. + * - 3.27.0 - Add new chunk to AMDGPU_CS to enable BO_LIST creation. * - 3.28.0 - Add AMDGPU_CHUNK_ID_SCHEDULED_DEPENDENCIES * - 3.29.0 - Add AMDGPU_IB_FLAG_RESET_GDS_MAX_WAVE_ID * - 3.30.0 - Add AMDGPU_SCHED_OP_CONTEXT_PRIORITY_OVERRIDE. @@ -100,10 +100,11 @@ * - 3.44.0 - DCN3 supports DCC independent block settings: !64B && 128B, 64B && 128B * - 3.45.0 - Add context ioctl stable pstate interface * - 3.46.0 - To enable hot plug amdgpu tests in libdrm - * * 3.47.0 - Add AMDGPU_GEM_CREATE_DISCARDABLE and AMDGPU_VM_NOALLOC flags + * - 3.47.0 - Add AMDGPU_GEM_CREATE_DISCARDABLE and AMDGPU_VM_NOALLOC flags + * - 3.48.0 - Add IP discovery version info to HW INFO */ #define KMS_DRIVER_MAJOR 3 -#define KMS_DRIVER_MINOR 47 +#define KMS_DRIVER_MINOR 48 #define KMS_DRIVER_PATCHLEVEL 0 int amdgpu_vram_limit; @@ -167,6 +168,7 @@ int amdgpu_smu_pptable_id = -1; */ uint amdgpu_dc_feature_mask = 2; uint amdgpu_dc_debug_mask; +uint amdgpu_dc_visual_confirm; int amdgpu_async_gfx_ring = 1; int amdgpu_mcbp; int amdgpu_discovery = -1; @@ -827,6 +829,9 @@ module_param_named(dcfeaturemask, amdgpu_dc_feature_mask, uint, 0444); MODULE_PARM_DESC(dcdebugmask, "all debug options disabled (default))"); module_param_named(dcdebugmask, amdgpu_dc_debug_mask, uint, 0444); +MODULE_PARM_DESC(visualconfirm, "Visual confirm (0 = off (default), 1 = MPO, 5 = PSR)"); +module_param_named(visualconfirm, amdgpu_dc_visual_confirm, uint, 0444); + /** * DOC: abmlevel (uint) * Override the default ABM (Adaptive Backlight Management) level used for DC @@ -2121,7 +2126,7 @@ retry_init: if (ret) DRM_ERROR("Creating debugfs files failed (%d).\n", ret); - if (adev->runpm) { + if (adev->pm.rpm_mode != AMDGPU_RUNPM_NONE) { /* only need to skip on ATPX */ if (amdgpu_device_supports_px(ddev)) dev_pm_set_driver_flags(ddev->dev, DPM_FLAG_NO_DIRECT_COMPLETE); @@ -2178,7 +2183,7 @@ amdgpu_pci_remove(struct pci_dev *pdev) drm_dev_unplug(dev); - if (adev->runpm) { + if (adev->pm.rpm_mode != AMDGPU_RUNPM_NONE) { pm_runtime_get_sync(dev->dev); pm_runtime_forbid(dev->dev); } @@ -2461,7 +2466,7 @@ static int amdgpu_pmops_runtime_suspend(struct device *dev) struct amdgpu_device *adev = drm_to_adev(drm_dev); int ret, i; - if (!adev->runpm) { + if (adev->pm.rpm_mode == AMDGPU_RUNPM_NONE) { pm_runtime_forbid(dev); return -EBUSY; } @@ -2530,7 +2535,7 @@ static int amdgpu_pmops_runtime_resume(struct device *dev) struct amdgpu_device *adev = drm_to_adev(drm_dev); int ret; - if (!adev->runpm) + if (adev->pm.rpm_mode == AMDGPU_RUNPM_NONE) return -EINVAL; /* Avoids registers access if device is physically gone */ @@ -2574,7 +2579,7 @@ static int amdgpu_pmops_runtime_idle(struct device *dev) /* we don't want the main rpm_idle to call suspend - we want to autosuspend */ int ret = 1; - if (!adev->runpm) { + if (adev->pm.rpm_mode == AMDGPU_RUNPM_NONE) { pm_runtime_forbid(dev); return -EBUSY; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c index ff659d4f772b..8adeb7469f1e 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c @@ -47,7 +47,7 @@ * for GPU/CPU synchronization. When the fence is written, * it is expected that all buffers associated with that fence * are no longer in use by the associated ring on the GPU and - * that the the relevant GPU caches have been flushed. + * that the relevant GPU caches have been flushed. */ struct amdgpu_fence { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c index 36c1be77bf8f..5071b96be982 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c @@ -133,16 +133,10 @@ void amdgpu_job_free_resources(struct amdgpu_job *job) { struct amdgpu_ring *ring = to_amdgpu_ring(job->base.sched); struct dma_fence *f; - struct dma_fence *hw_fence; unsigned i; - if (job->hw_fence.ops == NULL) - hw_fence = job->external_hw_fence; - else - hw_fence = &job->hw_fence; - /* use sched fence if available */ - f = job->base.s_fence ? &job->base.s_fence->finished : hw_fence; + f = job->base.s_fence ? &job->base.s_fence->finished : &job->hw_fence; for (i = 0; i < job->num_ibs; ++i) amdgpu_ib_free(ring->adev, &job->ibs[i], f); } @@ -156,11 +150,7 @@ static void amdgpu_job_free_cb(struct drm_sched_job *s_job) amdgpu_sync_free(&job->sync); amdgpu_sync_free(&job->sched_sync); - /* only put the hw fence if has embedded fence */ - if (job->hw_fence.ops != NULL) - dma_fence_put(&job->hw_fence); - else - kfree(job); + dma_fence_put(&job->hw_fence); } void amdgpu_job_free(struct amdgpu_job *job) @@ -169,11 +159,7 @@ void amdgpu_job_free(struct amdgpu_job *job) amdgpu_sync_free(&job->sync); amdgpu_sync_free(&job->sched_sync); - /* only put the hw fence if has embedded fence */ - if (job->hw_fence.ops != NULL) - dma_fence_put(&job->hw_fence); - else - kfree(job); + dma_fence_put(&job->hw_fence); } int amdgpu_job_submit(struct amdgpu_job *job, struct drm_sched_entity *entity, @@ -203,15 +189,12 @@ int amdgpu_job_submit_direct(struct amdgpu_job *job, struct amdgpu_ring *ring, int r; job->base.sched = &ring->sched; - r = amdgpu_ib_schedule(ring, job->num_ibs, job->ibs, NULL, fence); - /* record external_hw_fence for direct submit */ - job->external_hw_fence = dma_fence_get(*fence); + r = amdgpu_ib_schedule(ring, job->num_ibs, job->ibs, job, fence); + if (r) return r; amdgpu_job_free(job); - dma_fence_put(*fence); - return 0; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.h index d599c0540b46..babc0af751c2 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.h @@ -50,7 +50,6 @@ struct amdgpu_job { struct amdgpu_sync sync; struct amdgpu_sync sched_sync; struct dma_fence hw_fence; - struct dma_fence *external_hw_fence; uint32_t preamble_status; uint32_t preemption_status; bool vm_needs_flush; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c index 6de63ea6687e..1369c25448dc 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c @@ -43,17 +43,6 @@ #include "amdgpu_display.h" #include "amdgpu_ras.h" -static void amdgpu_runtime_pm_quirk(struct amdgpu_device *adev) -{ - /* - * Add below quirk on several sienna_cichlid cards to disable - * runtime pm to fix EMI failures. - */ - if (((adev->pdev->device == 0x73A1) && (adev->pdev->revision == 0x00)) || - ((adev->pdev->device == 0x73BF) && (adev->pdev->revision == 0xCF))) - adev->runpm = false; -} - void amdgpu_unregister_gpu_instance(struct amdgpu_device *adev) { struct amdgpu_gpu_instance *gpu_instance; @@ -158,37 +147,36 @@ int amdgpu_driver_load_kms(struct amdgpu_device *adev, unsigned long flags) goto out; } + adev->pm.rpm_mode = AMDGPU_RUNPM_NONE; if (amdgpu_device_supports_px(dev) && - (amdgpu_runtime_pm != 0)) { /* enable runpm by default for atpx */ - adev->runpm = true; + (amdgpu_runtime_pm != 0)) { /* enable PX as runtime mode */ + adev->pm.rpm_mode = AMDGPU_RUNPM_PX; dev_info(adev->dev, "Using ATPX for runtime pm\n"); } else if (amdgpu_device_supports_boco(dev) && - (amdgpu_runtime_pm != 0)) { /* enable runpm by default for boco */ - adev->runpm = true; + (amdgpu_runtime_pm != 0)) { /* enable boco as runtime mode */ + adev->pm.rpm_mode = AMDGPU_RUNPM_BOCO; dev_info(adev->dev, "Using BOCO for runtime pm\n"); } else if (amdgpu_device_supports_baco(dev) && (amdgpu_runtime_pm != 0)) { switch (adev->asic_type) { case CHIP_VEGA20: case CHIP_ARCTURUS: - /* enable runpm if runpm=1 */ + /* enable BACO as runpm mode if runpm=1 */ if (amdgpu_runtime_pm > 0) - adev->runpm = true; + adev->pm.rpm_mode = AMDGPU_RUNPM_BACO; break; case CHIP_VEGA10: - /* turn runpm on if noretry=0 */ + /* enable BACO as runpm mode if noretry=0 */ if (!adev->gmc.noretry) - adev->runpm = true; + adev->pm.rpm_mode = AMDGPU_RUNPM_BACO; break; default: - /* enable runpm on CI+ */ - adev->runpm = true; + /* enable BACO as runpm mode on CI+ */ + adev->pm.rpm_mode = AMDGPU_RUNPM_BACO; break; } - amdgpu_runtime_pm_quirk(adev); - - if (adev->runpm) + if (adev->pm.rpm_mode == AMDGPU_RUNPM_BACO) dev_info(adev->dev, "Using BACO for runtime pm\n"); } @@ -473,6 +461,30 @@ static int amdgpu_hw_ip_info(struct amdgpu_device *adev, result->hw_ip_version_major = adev->ip_blocks[i].version->major; result->hw_ip_version_minor = adev->ip_blocks[i].version->minor; + + if (adev->asic_type >= CHIP_VEGA10) { + switch (type) { + case AMD_IP_BLOCK_TYPE_GFX: + result->ip_discovery_version = adev->ip_versions[GC_HWIP][0]; + break; + case AMD_IP_BLOCK_TYPE_SDMA: + result->ip_discovery_version = adev->ip_versions[SDMA0_HWIP][0]; + break; + case AMD_IP_BLOCK_TYPE_UVD: + case AMD_IP_BLOCK_TYPE_VCN: + case AMD_IP_BLOCK_TYPE_JPEG: + result->ip_discovery_version = adev->ip_versions[UVD_HWIP][0]; + break; + case AMD_IP_BLOCK_TYPE_VCE: + result->ip_discovery_version = adev->ip_versions[VCE_HWIP][0]; + break; + default: + result->ip_discovery_version = 0; + break; + } + } else { + result->ip_discovery_version = 0; + } result->capabilities_flags = 0; result->available_rings = (1 << num_rings) - 1; result->ib_start_alignment = ib_start_alignment; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c index e9411c28d88b..3ee363bfbac2 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c @@ -2168,6 +2168,21 @@ static int psp_get_fw_type(struct amdgpu_firmware_info *ucode, case AMDGPU_UCODE_ID_RLC_DRAM: *type = GFX_FW_TYPE_RLC_DRAM_BOOT; break; + case AMDGPU_UCODE_ID_GLOBAL_TAP_DELAYS: + *type = GFX_FW_TYPE_GLOBAL_TAP_DELAYS; + break; + case AMDGPU_UCODE_ID_SE0_TAP_DELAYS: + *type = GFX_FW_TYPE_SE0_TAP_DELAYS; + break; + case AMDGPU_UCODE_ID_SE1_TAP_DELAYS: + *type = GFX_FW_TYPE_SE1_TAP_DELAYS; + break; + case AMDGPU_UCODE_ID_SE2_TAP_DELAYS: + *type = GFX_FW_TYPE_SE2_TAP_DELAYS; + break; + case AMDGPU_UCODE_ID_SE3_TAP_DELAYS: + *type = GFX_FW_TYPE_SE3_TAP_DELAYS; + break; case AMDGPU_UCODE_ID_SMC: *type = GFX_FW_TYPE_SMU; break; @@ -2348,6 +2363,13 @@ static int psp_load_smu_fw(struct psp_context *psp) &adev->firmware.ucode[AMDGPU_UCODE_ID_SMC]; struct amdgpu_ras *ras = psp->ras_context.ras; + /* + * Skip SMU FW reloading in case of using BACO for runpm only, + * as SMU is always alive. + */ + if (adev->in_runpm && (adev->pm.rpm_mode == AMDGPU_RUNPM_BACO)) + return 0; + if (!ucode->fw || amdgpu_sriov_vf(psp->adev)) return 0; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h index e431f4994931..180634616b0f 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h @@ -69,8 +69,8 @@ enum psp_bootloader_cmd { PSP_BL__LOAD_SOSDRV = 0x20000, PSP_BL__LOAD_KEY_DATABASE = 0x80000, PSP_BL__LOAD_SOCDRV = 0xB0000, - PSP_BL__LOAD_INTFDRV = 0xC0000, - PSP_BL__LOAD_DBGDRV = 0xD0000, + PSP_BL__LOAD_DBGDRV = 0xC0000, + PSP_BL__LOAD_INTFDRV = 0xD0000, PSP_BL__DRAM_LONG_TRAIN = 0x100000, PSP_BL__DRAM_SHORT_TRAIN = 0x200000, PSP_BL__LOAD_TOS_SPL_TABLE = 0x10000000, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_rlc.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_rlc.h index f6fd9e1a7dac..03ac36b2c2cf 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_rlc.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_rlc.h @@ -222,6 +222,11 @@ struct amdgpu_rlc { u32 rlc_dram_ucode_size_bytes; u32 rlcp_ucode_size_bytes; u32 rlcv_ucode_size_bytes; + u32 global_tap_delays_ucode_size_bytes; + u32 se0_tap_delays_ucode_size_bytes; + u32 se1_tap_delays_ucode_size_bytes; + u32 se2_tap_delays_ucode_size_bytes; + u32 se3_tap_delays_ucode_size_bytes; u32 *register_list_format; u32 *register_restore; @@ -232,6 +237,11 @@ struct amdgpu_rlc { u8 *rlc_dram_ucode; u8 *rlcp_ucode; u8 *rlcv_ucode; + u8 *global_tap_delays_ucode; + u8 *se0_tap_delays_ucode; + u8 *se1_tap_delays_ucode; + u8 *se2_tap_delays_ucode; + u8 *se3_tap_delays_ucode; bool is_rlc_v2_1; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c index c312577df596..939c8614f0e3 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c @@ -561,6 +561,16 @@ const char *amdgpu_ucode_name(enum AMDGPU_UCODE_ID ucode_id) return "RLC_P"; case AMDGPU_UCODE_ID_RLC_V: return "RLC_V"; + case AMDGPU_UCODE_ID_GLOBAL_TAP_DELAYS: + return "GLOBAL_TAP_DELAYS"; + case AMDGPU_UCODE_ID_SE0_TAP_DELAYS: + return "SE0_TAP_DELAYS"; + case AMDGPU_UCODE_ID_SE1_TAP_DELAYS: + return "SE1_TAP_DELAYS"; + case AMDGPU_UCODE_ID_SE2_TAP_DELAYS: + return "SE2_TAP_DELAYS"; + case AMDGPU_UCODE_ID_SE3_TAP_DELAYS: + return "SE3_TAP_DELAYS"; case AMDGPU_UCODE_ID_IMU_I: return "IMU_I"; case AMDGPU_UCODE_ID_IMU_D: @@ -745,6 +755,26 @@ static int amdgpu_ucode_init_single_fw(struct amdgpu_device *adev, ucode->ucode_size = adev->gfx.rlc.rlcv_ucode_size_bytes; ucode_addr = adev->gfx.rlc.rlcv_ucode; break; + case AMDGPU_UCODE_ID_GLOBAL_TAP_DELAYS: + ucode->ucode_size = adev->gfx.rlc.global_tap_delays_ucode_size_bytes; + ucode_addr = adev->gfx.rlc.global_tap_delays_ucode; + break; + case AMDGPU_UCODE_ID_SE0_TAP_DELAYS: + ucode->ucode_size = adev->gfx.rlc.se0_tap_delays_ucode_size_bytes; + ucode_addr = adev->gfx.rlc.se0_tap_delays_ucode; + break; + case AMDGPU_UCODE_ID_SE1_TAP_DELAYS: + ucode->ucode_size = adev->gfx.rlc.se1_tap_delays_ucode_size_bytes; + ucode_addr = adev->gfx.rlc.se1_tap_delays_ucode; + break; + case AMDGPU_UCODE_ID_SE2_TAP_DELAYS: + ucode->ucode_size = adev->gfx.rlc.se2_tap_delays_ucode_size_bytes; + ucode_addr = adev->gfx.rlc.se2_tap_delays_ucode; + break; + case AMDGPU_UCODE_ID_SE3_TAP_DELAYS: + ucode->ucode_size = adev->gfx.rlc.se3_tap_delays_ucode_size_bytes; + ucode_addr = adev->gfx.rlc.se3_tap_delays_ucode; + break; case AMDGPU_UCODE_ID_CP_MES: ucode->ucode_size = le32_to_cpu(mes_hdr->mes_ucode_size_bytes); ucode_addr = (u8 *)ucode->fw->data + diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h index f510b6aa82ab..ebed3f5226db 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h @@ -266,6 +266,21 @@ struct rlc_firmware_header_v2_3 { uint32_t rlcv_ucode_offset_bytes; }; +/* version_major=2, version_minor=4 */ +struct rlc_firmware_header_v2_4 { + struct rlc_firmware_header_v2_3 v2_3; + uint32_t global_tap_delays_ucode_size_bytes; + uint32_t global_tap_delays_ucode_offset_bytes; + uint32_t se0_tap_delays_ucode_size_bytes; + uint32_t se0_tap_delays_ucode_offset_bytes; + uint32_t se1_tap_delays_ucode_size_bytes; + uint32_t se1_tap_delays_ucode_offset_bytes; + uint32_t se2_tap_delays_ucode_size_bytes; + uint32_t se2_tap_delays_ucode_offset_bytes; + uint32_t se3_tap_delays_ucode_size_bytes; + uint32_t se3_tap_delays_ucode_offset_bytes; +}; + /* version_major=1, version_minor=0 */ struct sdma_firmware_header_v1_0 { struct common_firmware_header header; @@ -426,6 +441,11 @@ enum AMDGPU_UCODE_ID { AMDGPU_UCODE_ID_CP_MES1_DATA, AMDGPU_UCODE_ID_IMU_I, AMDGPU_UCODE_ID_IMU_D, + AMDGPU_UCODE_ID_GLOBAL_TAP_DELAYS, + AMDGPU_UCODE_ID_SE0_TAP_DELAYS, + AMDGPU_UCODE_ID_SE1_TAP_DELAYS, + AMDGPU_UCODE_ID_SE2_TAP_DELAYS, + AMDGPU_UCODE_ID_SE3_TAP_DELAYS, AMDGPU_UCODE_ID_RLC_RESTORE_LIST_CNTL, AMDGPU_UCODE_ID_RLC_RESTORE_LIST_GPM_MEM, AMDGPU_UCODE_ID_RLC_RESTORE_LIST_SRM_MEM, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h index 2ec6698aa1fe..3629d8f292ef 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h @@ -41,6 +41,12 @@ #define LOOP_UMC_CH_INST(ch_inst) for ((ch_inst) = 0; (ch_inst) < adev->umc.channel_inst_num; (ch_inst)++) #define LOOP_UMC_INST_AND_CH(umc_inst, ch_inst) LOOP_UMC_INST((umc_inst)) LOOP_UMC_CH_INST((ch_inst)) +#define LOOP_UMC_NODE_INST(node_inst) \ + for ((node_inst) = 0; (node_inst) < adev->umc.node_inst_num; (node_inst)++) + +#define LOOP_UMC_EACH_NODE_INST_AND_CH(node_inst, umc_inst, ch_inst) \ + LOOP_UMC_NODE_INST((node_inst)) LOOP_UMC_INST_AND_CH((umc_inst), (ch_inst)) + struct amdgpu_umc_ras { struct amdgpu_ras_block_object ras_block; void (*err_cnt_init)(struct amdgpu_device *adev); @@ -62,6 +68,10 @@ struct amdgpu_umc { uint32_t channel_inst_num; /* number of umc instance with memory map register access */ uint32_t umc_inst_num; + + /*number of umc node instance with memory map register access*/ + uint32_t node_inst_num; + /* UMC regiser per channel offset */ uint32_t channel_offs; /* channel index table of interleaved memory */ diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c b/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c index 3caf6f386042..77f5e998a120 100644 --- a/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c +++ b/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c @@ -339,7 +339,7 @@ static void dce_v6_0_hpd_fini(struct amdgpu_device *adev) tmp = RREG32(mmDC_HPD1_CONTROL + hpd_offsets[amdgpu_connector->hpd.hpd]); tmp &= ~DC_HPD1_CONTROL__DC_HPD1_EN_MASK; - WREG32(mmDC_HPD1_CONTROL + hpd_offsets[amdgpu_connector->hpd.hpd], 0); + WREG32(mmDC_HPD1_CONTROL + hpd_offsets[amdgpu_connector->hpd.hpd], tmp); amdgpu_irq_put(adev, &adev->hpd_irq, amdgpu_connector->hpd.hpd); } diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c b/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c index 7c75df5bffed..802e5c753271 100644 --- a/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c +++ b/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c @@ -333,7 +333,7 @@ static void dce_v8_0_hpd_fini(struct amdgpu_device *adev) tmp = RREG32(mmDC_HPD1_CONTROL + hpd_offsets[amdgpu_connector->hpd.hpd]); tmp &= ~DC_HPD1_CONTROL__DC_HPD1_EN_MASK; - WREG32(mmDC_HPD1_CONTROL + hpd_offsets[amdgpu_connector->hpd.hpd], 0); + WREG32(mmDC_HPD1_CONTROL + hpd_offsets[amdgpu_connector->hpd.hpd], tmp); amdgpu_irq_put(adev, &adev->hpd_irq, amdgpu_connector->hpd.hpd); } diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c index 5820c3f0e215..fafbad3cf08d 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c @@ -3976,6 +3976,23 @@ static void gfx_v10_0_init_rlc_iram_dram_microcode(struct amdgpu_device *adev) adev->gfx.rlc.rlc_dram_ucode = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->rlc_dram_ucode_offset_bytes); } +static void gfx_v10_0_init_tap_delays_microcode(struct amdgpu_device *adev) +{ + const struct rlc_firmware_header_v2_4 *rlc_hdr; + + rlc_hdr = (const struct rlc_firmware_header_v2_4 *)adev->gfx.rlc_fw->data; + adev->gfx.rlc.global_tap_delays_ucode_size_bytes = le32_to_cpu(rlc_hdr->global_tap_delays_ucode_size_bytes); + adev->gfx.rlc.global_tap_delays_ucode = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->global_tap_delays_ucode_offset_bytes); + adev->gfx.rlc.se0_tap_delays_ucode_size_bytes = le32_to_cpu(rlc_hdr->se0_tap_delays_ucode_size_bytes); + adev->gfx.rlc.se0_tap_delays_ucode = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->se0_tap_delays_ucode_offset_bytes); + adev->gfx.rlc.se1_tap_delays_ucode_size_bytes = le32_to_cpu(rlc_hdr->se1_tap_delays_ucode_size_bytes); + adev->gfx.rlc.se1_tap_delays_ucode = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->se1_tap_delays_ucode_offset_bytes); + adev->gfx.rlc.se2_tap_delays_ucode_size_bytes = le32_to_cpu(rlc_hdr->se2_tap_delays_ucode_size_bytes); + adev->gfx.rlc.se2_tap_delays_ucode = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->se2_tap_delays_ucode_offset_bytes); + adev->gfx.rlc.se3_tap_delays_ucode_size_bytes = le32_to_cpu(rlc_hdr->se3_tap_delays_ucode_size_bytes); + adev->gfx.rlc.se3_tap_delays_ucode = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->se3_tap_delays_ucode_offset_bytes); +} + static bool gfx_v10_0_navi10_gfxoff_should_enable(struct amdgpu_device *adev) { bool ret = false; @@ -4153,8 +4170,11 @@ static int gfx_v10_0_init_microcode(struct amdgpu_device *adev) if (version_major == 2) { if (version_minor >= 1) gfx_v10_0_init_rlc_ext_microcode(adev); - if (version_minor == 2) + if (version_minor >= 2) gfx_v10_0_init_rlc_iram_dram_microcode(adev); + if (version_minor == 4) { + gfx_v10_0_init_tap_delays_microcode(adev); + } } } @@ -4251,8 +4271,39 @@ static int gfx_v10_0_init_microcode(struct amdgpu_device *adev) adev->firmware.fw_size += ALIGN(adev->gfx.rlc.rlc_dram_ucode_size_bytes, PAGE_SIZE); } + } + info = &adev->firmware.ucode[AMDGPU_UCODE_ID_GLOBAL_TAP_DELAYS]; + info->ucode_id = AMDGPU_UCODE_ID_GLOBAL_TAP_DELAYS; + info->fw = adev->gfx.rlc_fw; + adev->firmware.fw_size += + ALIGN(adev->gfx.rlc.global_tap_delays_ucode_size_bytes, PAGE_SIZE); + + info = &adev->firmware.ucode[AMDGPU_UCODE_ID_SE0_TAP_DELAYS]; + info->ucode_id = AMDGPU_UCODE_ID_SE0_TAP_DELAYS; + info->fw = adev->gfx.rlc_fw; + adev->firmware.fw_size += + ALIGN(adev->gfx.rlc.se0_tap_delays_ucode_size_bytes, PAGE_SIZE); + + info = &adev->firmware.ucode[AMDGPU_UCODE_ID_SE1_TAP_DELAYS]; + info->ucode_id = AMDGPU_UCODE_ID_SE1_TAP_DELAYS; + info->fw = adev->gfx.rlc_fw; + adev->firmware.fw_size += + ALIGN(adev->gfx.rlc.se1_tap_delays_ucode_size_bytes, PAGE_SIZE); + + info = &adev->firmware.ucode[AMDGPU_UCODE_ID_SE2_TAP_DELAYS]; + info->ucode_id = AMDGPU_UCODE_ID_SE2_TAP_DELAYS; + info->fw = adev->gfx.rlc_fw; + adev->firmware.fw_size += + ALIGN(adev->gfx.rlc.se2_tap_delays_ucode_size_bytes, PAGE_SIZE); + + info = &adev->firmware.ucode[AMDGPU_UCODE_ID_SE3_TAP_DELAYS]; + info->ucode_id = AMDGPU_UCODE_ID_SE3_TAP_DELAYS; + info->fw = adev->gfx.rlc_fw; + adev->firmware.fw_size += + ALIGN(adev->gfx.rlc.se3_tap_delays_ucode_size_bytes, PAGE_SIZE); + info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC1]; info->ucode_id = AMDGPU_UCODE_ID_CP_MEC1; info->fw = adev->gfx.mec_fw; diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c index 5349ca4d19e3..c6e0f9313a7f 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c @@ -987,23 +987,23 @@ static void gfx_v9_0_wait_reg_mem(struct amdgpu_ring *ring, int eng_sel, static int gfx_v9_0_ring_test_ring(struct amdgpu_ring *ring) { struct amdgpu_device *adev = ring->adev; + uint32_t scratch = SOC15_REG_OFFSET(GC, 0, mmSCRATCH_REG0); uint32_t tmp = 0; unsigned i; int r; - WREG32_SOC15(GC, 0, mmSCRATCH_REG0, 0xCAFEDEAD); + WREG32(scratch, 0xCAFEDEAD); r = amdgpu_ring_alloc(ring, 3); if (r) return r; amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1)); - amdgpu_ring_write(ring, SOC15_REG_OFFSET(GC, 0, mmSCRATCH_REG0) - - PACKET3_SET_UCONFIG_REG_START); + amdgpu_ring_write(ring, scratch - PACKET3_SET_UCONFIG_REG_START); amdgpu_ring_write(ring, 0xDEADBEEF); amdgpu_ring_commit(ring); for (i = 0; i < adev->usec_timeout; i++) { - tmp = RREG32_SOC15(GC, 0, mmSCRATCH_REG0); + tmp = RREG32(scratch); if (tmp == 0xDEADBEEF) break; udelay(1); diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c index 1772f006c61a..9ae8cdaa033e 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c @@ -22,6 +22,9 @@ */ #include <linux/firmware.h> #include <linux/pci.h> + +#include <drm/drm_cache.h> + #include "amdgpu.h" #include "amdgpu_atomfirmware.h" #include "gmc_v10_0.h" @@ -980,6 +983,8 @@ static int gmc_v10_0_sw_init(void *handle) return r; } + adev->need_swiotlb = drm_need_swiotlb(44); + r = gmc_v10_0_mc_init(adev); if (r) return r; diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c index edbdc0b934ea..1471bfb9ae38 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c @@ -22,10 +22,13 @@ */ #include <linux/firmware.h> #include <linux/pci.h> + +#include <drm/drm_cache.h> + #include "amdgpu.h" #include "amdgpu_atomfirmware.h" #include "gmc_v11_0.h" -#include "umc_v8_7.h" +#include "umc_v8_10.h" #include "athub/athub_3_0_0_sh_mask.h" #include "athub/athub_3_0_0_offset.h" #include "oss/osssys_6_0_0_offset.h" @@ -537,11 +540,36 @@ static void gmc_v11_0_set_umc_funcs(struct amdgpu_device *adev) { switch (adev->ip_versions[UMC_HWIP][0]) { case IP_VERSION(8, 10, 0): + adev->umc.channel_inst_num = UMC_V8_10_CHANNEL_INSTANCE_NUM; + adev->umc.umc_inst_num = UMC_V8_10_UMC_INSTANCE_NUM; + adev->umc.node_inst_num = adev->gmc.num_umc; + adev->umc.max_ras_err_cnt_per_query = UMC_V8_10_TOTAL_CHANNEL_NUM(adev); + adev->umc.channel_offs = UMC_V8_10_PER_CHANNEL_OFFSET; + adev->umc.channel_idx_tbl = &umc_v8_10_channel_idx_tbl[0][0][0]; + adev->umc.ras = &umc_v8_10_ras; + break; case IP_VERSION(8, 11, 0): break; default: break; } + + if (adev->umc.ras) { + amdgpu_ras_register_ras_block(adev, &adev->umc.ras->ras_block); + + strcpy(adev->umc.ras->ras_block.ras_comm.name, "umc"); + adev->umc.ras->ras_block.ras_comm.block = AMDGPU_RAS_BLOCK__UMC; + adev->umc.ras->ras_block.ras_comm.type = AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE; + adev->umc.ras_if = &adev->umc.ras->ras_block.ras_comm; + + /* If don't define special ras_late_init function, use default ras_late_init */ + if (!adev->umc.ras->ras_block.ras_late_init) + adev->umc.ras->ras_block.ras_late_init = amdgpu_umc_ras_late_init; + + /* If not define special ras_cb function, use default ras_cb */ + if (!adev->umc.ras->ras_block.ras_cb) + adev->umc.ras->ras_block.ras_cb = amdgpu_umc_process_ras_data_cb; + } } @@ -750,6 +778,8 @@ static int gmc_v11_0_sw_init(void *handle) return r; } + adev->need_swiotlb = drm_need_swiotlb(44); + r = gmc_v11_0_mc_init(adev); if (r) return r; diff --git a/drivers/gpu/drm/amd/amdgpu/nbio_v2_3.c b/drivers/gpu/drm/amd/amdgpu/nbio_v2_3.c index 34c610b9157d..b465baa26762 100644 --- a/drivers/gpu/drm/amd/amdgpu/nbio_v2_3.c +++ b/drivers/gpu/drm/amd/amdgpu/nbio_v2_3.c @@ -328,27 +328,6 @@ const struct nbio_hdp_flush_reg nbio_v2_3_hdp_flush_reg = { .ref_and_mask_sdma1 = BIF_BX_PF_GPU_HDP_FLUSH_DONE__SDMA1_MASK, }; -const struct nbio_hdp_flush_reg nbio_v2_3_hdp_flush_reg_sc = { - .ref_and_mask_cp0 = BIF_BX_PF_GPU_HDP_FLUSH_DONE__CP0_MASK, - .ref_and_mask_cp1 = BIF_BX_PF_GPU_HDP_FLUSH_DONE__CP1_MASK, - .ref_and_mask_cp2 = BIF_BX_PF_GPU_HDP_FLUSH_DONE__CP2_MASK, - .ref_and_mask_cp3 = BIF_BX_PF_GPU_HDP_FLUSH_DONE__CP3_MASK, - .ref_and_mask_cp4 = BIF_BX_PF_GPU_HDP_FLUSH_DONE__CP4_MASK, - .ref_and_mask_cp5 = BIF_BX_PF_GPU_HDP_FLUSH_DONE__CP5_MASK, - .ref_and_mask_cp6 = BIF_BX_PF_GPU_HDP_FLUSH_DONE__CP6_MASK, - .ref_and_mask_cp7 = BIF_BX_PF_GPU_HDP_FLUSH_DONE__CP7_MASK, - .ref_and_mask_cp8 = BIF_BX_PF_GPU_HDP_FLUSH_DONE__CP8_MASK, - .ref_and_mask_cp9 = BIF_BX_PF_GPU_HDP_FLUSH_DONE__CP9_MASK, - .ref_and_mask_sdma0 = GPU_HDP_FLUSH_DONE__RSVD_ENG1_MASK, - .ref_and_mask_sdma1 = GPU_HDP_FLUSH_DONE__RSVD_ENG2_MASK, - .ref_and_mask_sdma2 = GPU_HDP_FLUSH_DONE__RSVD_ENG3_MASK, - .ref_and_mask_sdma3 = GPU_HDP_FLUSH_DONE__RSVD_ENG4_MASK, - .ref_and_mask_sdma4 = GPU_HDP_FLUSH_DONE__RSVD_ENG5_MASK, - .ref_and_mask_sdma5 = GPU_HDP_FLUSH_DONE__RSVD_ENG6_MASK, - .ref_and_mask_sdma6 = GPU_HDP_FLUSH_DONE__RSVD_ENG7_MASK, - .ref_and_mask_sdma7 = GPU_HDP_FLUSH_DONE__RSVD_ENG8_MASK, -}; - static void nbio_v2_3_init_registers(struct amdgpu_device *adev) { uint32_t def, data; diff --git a/drivers/gpu/drm/amd/amdgpu/nbio_v2_3.h b/drivers/gpu/drm/amd/amdgpu/nbio_v2_3.h index 6074dd3a1ed8..a43b60acf7f6 100644 --- a/drivers/gpu/drm/amd/amdgpu/nbio_v2_3.h +++ b/drivers/gpu/drm/amd/amdgpu/nbio_v2_3.h @@ -27,7 +27,6 @@ #include "soc15_common.h" extern const struct nbio_hdp_flush_reg nbio_v2_3_hdp_flush_reg; -extern const struct nbio_hdp_flush_reg nbio_v2_3_hdp_flush_reg_sc; extern const struct amdgpu_nbio_funcs nbio_v2_3_funcs; #endif diff --git a/drivers/gpu/drm/amd/amdgpu/nbio_v7_4.c b/drivers/gpu/drm/amd/amdgpu/nbio_v7_4.c index 4531761dcf77..11848d1e238b 100644 --- a/drivers/gpu/drm/amd/amdgpu/nbio_v7_4.c +++ b/drivers/gpu/drm/amd/amdgpu/nbio_v7_4.c @@ -339,27 +339,6 @@ const struct nbio_hdp_flush_reg nbio_v7_4_hdp_flush_reg = { .ref_and_mask_sdma1 = GPU_HDP_FLUSH_DONE__SDMA1_MASK, }; -const struct nbio_hdp_flush_reg nbio_v7_4_hdp_flush_reg_ald = { - .ref_and_mask_cp0 = GPU_HDP_FLUSH_DONE__CP0_MASK, - .ref_and_mask_cp1 = GPU_HDP_FLUSH_DONE__CP1_MASK, - .ref_and_mask_cp2 = GPU_HDP_FLUSH_DONE__CP2_MASK, - .ref_and_mask_cp3 = GPU_HDP_FLUSH_DONE__CP3_MASK, - .ref_and_mask_cp4 = GPU_HDP_FLUSH_DONE__CP4_MASK, - .ref_and_mask_cp5 = GPU_HDP_FLUSH_DONE__CP5_MASK, - .ref_and_mask_cp6 = GPU_HDP_FLUSH_DONE__CP6_MASK, - .ref_and_mask_cp7 = GPU_HDP_FLUSH_DONE__CP7_MASK, - .ref_and_mask_cp8 = GPU_HDP_FLUSH_DONE__CP8_MASK, - .ref_and_mask_cp9 = GPU_HDP_FLUSH_DONE__CP9_MASK, - .ref_and_mask_sdma0 = GPU_HDP_FLUSH_DONE__RSVD_ENG1_MASK, - .ref_and_mask_sdma1 = GPU_HDP_FLUSH_DONE__RSVD_ENG2_MASK, - .ref_and_mask_sdma2 = GPU_HDP_FLUSH_DONE__RSVD_ENG3_MASK, - .ref_and_mask_sdma3 = GPU_HDP_FLUSH_DONE__RSVD_ENG4_MASK, - .ref_and_mask_sdma4 = GPU_HDP_FLUSH_DONE__RSVD_ENG5_MASK, - .ref_and_mask_sdma5 = GPU_HDP_FLUSH_DONE__RSVD_ENG6_MASK, - .ref_and_mask_sdma6 = GPU_HDP_FLUSH_DONE__RSVD_ENG7_MASK, - .ref_and_mask_sdma7 = GPU_HDP_FLUSH_DONE__RSVD_ENG8_MASK, -}; - static void nbio_v7_4_init_registers(struct amdgpu_device *adev) { uint32_t baco_cntl; diff --git a/drivers/gpu/drm/amd/amdgpu/nbio_v7_4.h b/drivers/gpu/drm/amd/amdgpu/nbio_v7_4.h index 7490022d79d4..f27c41728822 100644 --- a/drivers/gpu/drm/amd/amdgpu/nbio_v7_4.h +++ b/drivers/gpu/drm/amd/amdgpu/nbio_v7_4.h @@ -27,7 +27,6 @@ #include "soc15_common.h" extern const struct nbio_hdp_flush_reg nbio_v7_4_hdp_flush_reg; -extern const struct nbio_hdp_flush_reg nbio_v7_4_hdp_flush_reg_ald; extern const struct amdgpu_nbio_funcs nbio_v7_4_funcs; extern struct amdgpu_nbio_ras nbio_v7_4_ras; diff --git a/drivers/gpu/drm/amd/amdgpu/psp_gfx_if.h b/drivers/gpu/drm/amd/amdgpu/psp_gfx_if.h index 236b7a61443a..22c775f39119 100644 --- a/drivers/gpu/drm/amd/amdgpu/psp_gfx_if.h +++ b/drivers/gpu/drm/amd/amdgpu/psp_gfx_if.h @@ -259,6 +259,8 @@ enum psp_gfx_fw_type { GFX_FW_TYPE_SDMA7 = 57, /* SDMA7 MI */ GFX_FW_TYPE_VCN1 = 58, /* VCN1 MI */ GFX_FW_TYPE_CAP = 62, /* CAP_FW */ + GFX_FW_TYPE_SE2_TAP_DELAYS = 65, /* SE2 TAP DELAYS NV */ + GFX_FW_TYPE_SE3_TAP_DELAYS = 66, /* SE3 TAP DELAYS NV */ GFX_FW_TYPE_REG_LIST = 67, /* REG_LIST MI */ GFX_FW_TYPE_IMU_I = 68, /* IMU Instruction FW SOC21 */ GFX_FW_TYPE_IMU_D = 69, /* IMU Data FW SOC21 */ diff --git a/drivers/gpu/drm/amd/amdgpu/soc21.c b/drivers/gpu/drm/amd/amdgpu/soc21.c index 765c3543ad18..00e9b7089feb 100644 --- a/drivers/gpu/drm/amd/amdgpu/soc21.c +++ b/drivers/gpu/drm/amd/amdgpu/soc21.c @@ -320,6 +320,7 @@ soc21_asic_reset_method(struct amdgpu_device *adev) switch (adev->ip_versions[MP1_HWIP][0]) { case IP_VERSION(13, 0, 0): + case IP_VERSION(13, 0, 7): return AMD_RESET_METHOD_MODE1; case IP_VERSION(13, 0, 4): return AMD_RESET_METHOD_MODE2; diff --git a/drivers/gpu/drm/amd/amdgpu/umc_v8_10.c b/drivers/gpu/drm/amd/amdgpu/umc_v8_10.c new file mode 100644 index 000000000000..36a2053f2e8b --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/umc_v8_10.c @@ -0,0 +1,357 @@ +/* + * Copyright 2022 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ +#include "umc_v8_10.h" +#include "amdgpu_ras.h" +#include "amdgpu_umc.h" +#include "amdgpu.h" +#include "umc/umc_8_10_0_offset.h" +#include "umc/umc_8_10_0_sh_mask.h" + +#define UMC_8_NODE_DIST 0x800000 +#define UMC_8_INST_DIST 0x4000 + +struct channelnum_map_colbit { + uint32_t channel_num; + uint32_t col_bit; +}; + +const struct channelnum_map_colbit umc_v8_10_channelnum_map_colbit_table[] = { + {24, 13}, + {20, 13}, + {16, 12}, + {14, 12}, + {12, 12}, + {10, 12}, + {6, 11}, +}; + +const uint32_t + umc_v8_10_channel_idx_tbl[] + [UMC_V8_10_UMC_INSTANCE_NUM] + [UMC_V8_10_CHANNEL_INSTANCE_NUM] = { + {{16, 18}, {17, 19}}, + {{15, 11}, {3, 7}}, + {{1, 5}, {13, 9}}, + {{23, 21}, {22, 20}}, + {{0, 4}, {12, 8}}, + {{14, 10}, {2, 6}} + }; + +static inline uint32_t get_umc_v8_10_reg_offset(struct amdgpu_device *adev, + uint32_t node_inst, + uint32_t umc_inst, + uint32_t ch_inst) +{ + return adev->umc.channel_offs * ch_inst + UMC_8_INST_DIST * umc_inst + + UMC_8_NODE_DIST * node_inst; +} + +static void umc_v8_10_clear_error_count_per_channel(struct amdgpu_device *adev, + uint32_t umc_reg_offset) +{ + uint32_t ecc_err_cnt_addr; + + ecc_err_cnt_addr = + SOC15_REG_OFFSET(UMC, 0, regUMCCH0_0_GeccErrCnt); + + /* clear error count */ + WREG32_PCIE((ecc_err_cnt_addr + umc_reg_offset) * 4, + UMC_V8_10_CE_CNT_INIT); +} + +static void umc_v8_10_clear_error_count(struct amdgpu_device *adev) +{ + uint32_t node_inst = 0; + uint32_t umc_inst = 0; + uint32_t ch_inst = 0; + uint32_t umc_reg_offset = 0; + + LOOP_UMC_EACH_NODE_INST_AND_CH(node_inst, umc_inst, ch_inst) { + umc_reg_offset = get_umc_v8_10_reg_offset(adev, + node_inst, + umc_inst, + ch_inst); + + umc_v8_10_clear_error_count_per_channel(adev, + umc_reg_offset); + } +} + +static void umc_v8_10_query_correctable_error_count(struct amdgpu_device *adev, + uint32_t umc_reg_offset, + unsigned long *error_count) +{ + uint32_t ecc_err_cnt, ecc_err_cnt_addr; + uint64_t mc_umc_status; + uint32_t mc_umc_status_addr; + + /* UMC 8_10 registers */ + ecc_err_cnt_addr = + SOC15_REG_OFFSET(UMC, 0, regUMCCH0_0_GeccErrCnt); + mc_umc_status_addr = + SOC15_REG_OFFSET(UMC, 0, regMCA_UMC_UMC0_MCUMC_STATUST0); + + ecc_err_cnt = RREG32_PCIE((ecc_err_cnt_addr + umc_reg_offset) * 4); + *error_count += + (REG_GET_FIELD(ecc_err_cnt, UMCCH0_0_GeccErrCnt, GeccErrCnt) - + UMC_V8_10_CE_CNT_INIT); + + /* Check for SRAM correctable error, MCUMC_STATUS is a 64 bit register */ + mc_umc_status = RREG64_PCIE((mc_umc_status_addr + umc_reg_offset) * 4); + if (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Val) == 1 && + REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, CECC) == 1) + *error_count += 1; +} + +static void umc_v8_10_query_uncorrectable_error_count(struct amdgpu_device *adev, + uint32_t umc_reg_offset, + unsigned long *error_count) +{ + uint64_t mc_umc_status; + uint32_t mc_umc_status_addr; + + mc_umc_status_addr = SOC15_REG_OFFSET(UMC, 0, regMCA_UMC_UMC0_MCUMC_STATUST0); + + /* Check the MCUMC_STATUS. */ + mc_umc_status = RREG64_PCIE((mc_umc_status_addr + umc_reg_offset) * 4); + if ((REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Val) == 1) && + (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Deferred) == 1 || + REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, UECC) == 1 || + REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, PCC) == 1 || + REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, UC) == 1 || + REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, TCC) == 1)) + *error_count += 1; +} + +static void umc_v8_10_query_ras_error_count(struct amdgpu_device *adev, + void *ras_error_status) +{ + struct ras_err_data *err_data = (struct ras_err_data *)ras_error_status; + + uint32_t node_inst = 0; + uint32_t umc_inst = 0; + uint32_t ch_inst = 0; + uint32_t umc_reg_offset = 0; + + LOOP_UMC_EACH_NODE_INST_AND_CH(node_inst, umc_inst, ch_inst) { + umc_reg_offset = get_umc_v8_10_reg_offset(adev, + node_inst, + umc_inst, + ch_inst); + + umc_v8_10_query_correctable_error_count(adev, + umc_reg_offset, + &(err_data->ce_count)); + umc_v8_10_query_uncorrectable_error_count(adev, + umc_reg_offset, + &(err_data->ue_count)); + } + + umc_v8_10_clear_error_count(adev); +} + +static uint32_t umc_v8_10_get_col_bit(uint32_t channel_num) +{ + uint32_t t = 0; + + for (t = 0; t < ARRAY_SIZE(umc_v8_10_channelnum_map_colbit_table); t++) + if (channel_num == umc_v8_10_channelnum_map_colbit_table[t].channel_num) + return umc_v8_10_channelnum_map_colbit_table[t].col_bit; + + /* Failed to get col_bit. */ + return U32_MAX; +} + +/* + * Mapping normal address to soc physical address in swizzle mode. + */ +static int umc_v8_10_swizzle_mode_na_to_pa(struct amdgpu_device *adev, + uint32_t channel_idx, + uint64_t na, uint64_t *soc_pa) +{ + uint32_t channel_num = UMC_V8_10_TOTAL_CHANNEL_NUM(adev); + uint32_t col_bit = umc_v8_10_get_col_bit(channel_num); + uint64_t tmp_addr; + + if (col_bit == U32_MAX) + return -1; + + tmp_addr = SWIZZLE_MODE_TMP_ADDR(na, channel_num, channel_idx); + *soc_pa = SWIZZLE_MODE_ADDR_HI(tmp_addr, col_bit) | + SWIZZLE_MODE_ADDR_MID(na, col_bit) | + SWIZZLE_MODE_ADDR_LOW(tmp_addr, col_bit) | + SWIZZLE_MODE_ADDR_LSB(na); + + return 0; +} + +static void umc_v8_10_query_error_address(struct amdgpu_device *adev, + struct ras_err_data *err_data, + uint32_t umc_reg_offset, + uint32_t node_inst, + uint32_t ch_inst, + uint32_t umc_inst) +{ + uint64_t mc_umc_status_addr; + uint64_t mc_umc_status, err_addr; + uint32_t channel_index; + + mc_umc_status_addr = + SOC15_REG_OFFSET(UMC, 0, regMCA_UMC_UMC0_MCUMC_STATUST0); + mc_umc_status = RREG64_PCIE((mc_umc_status_addr + umc_reg_offset) * 4); + + if (mc_umc_status == 0) + return; + + if (!err_data->err_addr) { + /* clear umc status */ + WREG64_PCIE((mc_umc_status_addr + umc_reg_offset) * 4, 0x0ULL); + return; + } + + channel_index = + adev->umc.channel_idx_tbl[node_inst * adev->umc.umc_inst_num * + adev->umc.channel_inst_num + + umc_inst * adev->umc.channel_inst_num + + ch_inst]; + + /* calculate error address if ue/ce error is detected */ + if (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Val) == 1 && + REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, AddrV) == 1 && + (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, UECC) == 1 || + REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, CECC) == 1)) { + uint32_t addr_lsb; + uint64_t mc_umc_addrt0; + + mc_umc_addrt0 = SOC15_REG_OFFSET(UMC, 0, regMCA_UMC_UMC0_MCUMC_ADDRT0); + err_addr = RREG64_PCIE((mc_umc_addrt0 + umc_reg_offset) * 4); + err_addr = REG_GET_FIELD(err_addr, MCA_UMC_UMC0_MCUMC_ADDRT0, ErrorAddr); + + /* the lowest lsb bits should be ignored */ + addr_lsb = REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, AddrLsb); + + err_addr &= ~((0x1ULL << addr_lsb) - 1); + + /* we only save ue error information currently, ce is skipped */ + if (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, UECC) == 1) { + uint64_t na_err_addr_base = err_addr & ~(0x3ULL << UMC_V8_10_NA_C5_BIT); + uint64_t na_err_addr, retired_page_addr; + uint32_t col = 0; + int ret = 0; + + /* loop for all possibilities of [C6 C5] in normal address. */ + for (col = 0; col < UMC_V8_10_NA_COL_2BITS_POWER_OF_2_NUM; col++) { + na_err_addr = na_err_addr_base | (col << UMC_V8_10_NA_C5_BIT); + + /* Mapping normal error address to retired soc physical address. */ + ret = umc_v8_10_swizzle_mode_na_to_pa(adev, channel_index, + na_err_addr, &retired_page_addr); + if (ret) { + dev_err(adev->dev, "Failed to map pa from umc na.\n"); + break; + } + dev_info(adev->dev, "Error Address(PA): 0x%llx\n", + retired_page_addr); + amdgpu_umc_fill_error_record(err_data, na_err_addr, + retired_page_addr, channel_index, umc_inst); + } + } + } + + /* clear umc status */ + WREG64_PCIE((mc_umc_status_addr + umc_reg_offset) * 4, 0x0ULL); +} + +static void umc_v8_10_query_ras_error_address(struct amdgpu_device *adev, + void *ras_error_status) +{ + struct ras_err_data *err_data = (struct ras_err_data *)ras_error_status; + uint32_t node_inst = 0; + uint32_t umc_inst = 0; + uint32_t ch_inst = 0; + uint32_t umc_reg_offset = 0; + + LOOP_UMC_EACH_NODE_INST_AND_CH(node_inst, umc_inst, ch_inst) { + umc_reg_offset = get_umc_v8_10_reg_offset(adev, + node_inst, + umc_inst, + ch_inst); + + umc_v8_10_query_error_address(adev, + err_data, + umc_reg_offset, + node_inst, + ch_inst, + umc_inst); + } +} + +static void umc_v8_10_err_cnt_init_per_channel(struct amdgpu_device *adev, + uint32_t umc_reg_offset) +{ + uint32_t ecc_err_cnt_sel, ecc_err_cnt_sel_addr; + uint32_t ecc_err_cnt_addr; + + ecc_err_cnt_sel_addr = + SOC15_REG_OFFSET(UMC, 0, regUMCCH0_0_GeccErrCntSel); + ecc_err_cnt_addr = + SOC15_REG_OFFSET(UMC, 0, regUMCCH0_0_GeccErrCnt); + + ecc_err_cnt_sel = RREG32_PCIE((ecc_err_cnt_sel_addr + umc_reg_offset) * 4); + + /* set ce error interrupt type to APIC based interrupt */ + ecc_err_cnt_sel = REG_SET_FIELD(ecc_err_cnt_sel, UMCCH0_0_GeccErrCntSel, + GeccErrInt, 0x1); + WREG32_PCIE((ecc_err_cnt_sel_addr + umc_reg_offset) * 4, ecc_err_cnt_sel); + /* set error count to initial value */ + WREG32_PCIE((ecc_err_cnt_addr + umc_reg_offset) * 4, UMC_V8_10_CE_CNT_INIT); +} + +static void umc_v8_10_err_cnt_init(struct amdgpu_device *adev) +{ + uint32_t node_inst = 0; + uint32_t umc_inst = 0; + uint32_t ch_inst = 0; + uint32_t umc_reg_offset = 0; + + LOOP_UMC_EACH_NODE_INST_AND_CH(node_inst, umc_inst, ch_inst) { + umc_reg_offset = get_umc_v8_10_reg_offset(adev, + node_inst, + umc_inst, + ch_inst); + + umc_v8_10_err_cnt_init_per_channel(adev, umc_reg_offset); + } +} + +const struct amdgpu_ras_block_hw_ops umc_v8_10_ras_hw_ops = { + .query_ras_error_count = umc_v8_10_query_ras_error_count, + .query_ras_error_address = umc_v8_10_query_ras_error_address, +}; + +struct amdgpu_umc_ras umc_v8_10_ras = { + .ras_block = { + .hw_ops = &umc_v8_10_ras_hw_ops, + }, + .err_cnt_init = umc_v8_10_err_cnt_init, +}; diff --git a/drivers/gpu/drm/amd/amdgpu/umc_v8_10.h b/drivers/gpu/drm/amd/amdgpu/umc_v8_10.h new file mode 100644 index 000000000000..849ede88e111 --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/umc_v8_10.h @@ -0,0 +1,70 @@ +/* + * Copyright 2022 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ +#ifndef __UMC_V8_10_H__ +#define __UMC_V8_10_H__ + +#include "soc15_common.h" +#include "amdgpu.h" + +/* number of umc channel instance with memory map register access */ +#define UMC_V8_10_CHANNEL_INSTANCE_NUM 2 +/* number of umc instance with memory map register access */ +#define UMC_V8_10_UMC_INSTANCE_NUM 2 + +/* Total channel instances for all umc nodes */ +#define UMC_V8_10_TOTAL_CHANNEL_NUM(adev) \ + (UMC_V8_10_CHANNEL_INSTANCE_NUM * UMC_V8_10_UMC_INSTANCE_NUM * (adev)->umc.node_inst_num) + +/* UMC regiser per channel offset */ +#define UMC_V8_10_PER_CHANNEL_OFFSET 0x400 + +/* EccErrCnt max value */ +#define UMC_V8_10_CE_CNT_MAX 0xffff +/* umc ce interrupt threshold */ +#define UUMC_V8_10_CE_INT_THRESHOLD 0xffff +/* umc ce count initial value */ +#define UMC_V8_10_CE_CNT_INIT (UMC_V8_10_CE_CNT_MAX - UUMC_V8_10_CE_INT_THRESHOLD) + +#define UMC_V8_10_NA_COL_2BITS_POWER_OF_2_NUM 4 + +/* The C5 bit in NA address */ +#define UMC_V8_10_NA_C5_BIT 14 + +/* Map to swizzle mode address */ +#define SWIZZLE_MODE_TMP_ADDR(na, ch_num, ch_idx) \ + ((((na) >> 10) * (ch_num) + (ch_idx)) << 10) +#define SWIZZLE_MODE_ADDR_HI(addr, col_bit) \ + (((addr) >> ((col_bit) + 2)) << ((col_bit) + 2)) +#define SWIZZLE_MODE_ADDR_MID(na, col_bit) ((((na) >> 8) & 0x3) << (col_bit)) +#define SWIZZLE_MODE_ADDR_LOW(addr, col_bit) \ + ((((addr) >> 10) & ((0x1ULL << (col_bit - 8)) - 1)) << 8) +#define SWIZZLE_MODE_ADDR_LSB(na) ((na) & 0xFF) + +extern struct amdgpu_umc_ras umc_v8_10_ras; +extern const uint32_t + umc_v8_10_channel_idx_tbl[] + [UMC_V8_10_UMC_INSTANCE_NUM] + [UMC_V8_10_CHANNEL_INSTANCE_NUM]; + +#endif + diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c index 84ac2401895a..a91ffbf902d4 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c @@ -25,6 +25,7 @@ #include "amdgpu.h" #include "amdgpu_vcn.h" #include "amdgpu_pm.h" +#include "amdgpu_cs.h" #include "soc15.h" #include "soc15d.h" #include "soc15_hw_ip.h" @@ -44,6 +45,9 @@ #define VCN_VID_SOC_ADDRESS_2_0 0x1fb00 #define VCN1_VID_SOC_ADDRESS_3_0 0x48300 +#define RDECODE_MSG_CREATE 0x00000000 +#define RDECODE_MESSAGE_CREATE 0x00000001 + static int amdgpu_ih_clientid_vcns[] = { SOC15_IH_CLIENTID_VCN, SOC15_IH_CLIENTID_VCN1 @@ -1323,6 +1327,132 @@ static void vcn_v4_0_unified_ring_set_wptr(struct amdgpu_ring *ring) } } +static int vcn_v4_0_limit_sched(struct amdgpu_cs_parser *p) +{ + struct drm_gpu_scheduler **scheds; + + /* The create msg must be in the first IB submitted */ + if (atomic_read(&p->entity->fence_seq)) + return -EINVAL; + + scheds = p->adev->gpu_sched[AMDGPU_HW_IP_VCN_ENC] + [AMDGPU_RING_PRIO_0].sched; + drm_sched_entity_modify_sched(p->entity, scheds, 1); + return 0; +} + +static int vcn_v4_0_dec_msg(struct amdgpu_cs_parser *p, uint64_t addr) +{ + struct ttm_operation_ctx ctx = { false, false }; + struct amdgpu_bo_va_mapping *map; + uint32_t *msg, num_buffers; + struct amdgpu_bo *bo; + uint64_t start, end; + unsigned int i; + void *ptr; + int r; + + addr &= AMDGPU_GMC_HOLE_MASK; + r = amdgpu_cs_find_mapping(p, addr, &bo, &map); + if (r) { + DRM_ERROR("Can't find BO for addr 0x%08llx\n", addr); + return r; + } + + start = map->start * AMDGPU_GPU_PAGE_SIZE; + end = (map->last + 1) * AMDGPU_GPU_PAGE_SIZE; + if (addr & 0x7) { + DRM_ERROR("VCN messages must be 8 byte aligned!\n"); + return -EINVAL; + } + + bo->flags |= AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED; + amdgpu_bo_placement_from_domain(bo, bo->allowed_domains); + r = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx); + if (r) { + DRM_ERROR("Failed validating the VCN message BO (%d)!\n", r); + return r; + } + + r = amdgpu_bo_kmap(bo, &ptr); + if (r) { + DRM_ERROR("Failed mapping the VCN message (%d)!\n", r); + return r; + } + + msg = ptr + addr - start; + + /* Check length */ + if (msg[1] > end - addr) { + r = -EINVAL; + goto out; + } + + if (msg[3] != RDECODE_MSG_CREATE) + goto out; + + num_buffers = msg[2]; + for (i = 0, msg = &msg[6]; i < num_buffers; ++i, msg += 4) { + uint32_t offset, size, *create; + + if (msg[0] != RDECODE_MESSAGE_CREATE) + continue; + + offset = msg[1]; + size = msg[2]; + + if (offset + size > end) { + r = -EINVAL; + goto out; + } + + create = ptr + addr + offset - start; + + /* H246, HEVC and VP9 can run on any instance */ + if (create[0] == 0x7 || create[0] == 0x10 || create[0] == 0x11) + continue; + + r = vcn_v4_0_limit_sched(p); + if (r) + goto out; + } + +out: + amdgpu_bo_kunmap(bo); + return r; +} + +#define RADEON_VCN_ENGINE_TYPE_DECODE (0x00000003) + +static int vcn_v4_0_ring_patch_cs_in_place(struct amdgpu_cs_parser *p, + struct amdgpu_job *job, + struct amdgpu_ib *ib) +{ + struct amdgpu_ring *ring = to_amdgpu_ring(p->entity->rq->sched); + struct amdgpu_vcn_decode_buffer *decode_buffer = NULL; + uint32_t val; + int r = 0; + + /* The first instance can decode anything */ + if (!ring->me) + return r; + + /* unified queue ib header has 8 double words. */ + if (ib->length_dw < 8) + return r; + + val = amdgpu_ib_get_value(ib, 6); //RADEON_VCN_ENGINE_TYPE + + if (val == RADEON_VCN_ENGINE_TYPE_DECODE) { + decode_buffer = (struct amdgpu_vcn_decode_buffer *)&ib->ptr[10]; + + if (decode_buffer->valid_buf_flag & 0x1) + r = vcn_v4_0_dec_msg(p, ((u64)decode_buffer->msg_buffer_address_hi) << 32 | + decode_buffer->msg_buffer_address_lo); + } + return r; +} + static const struct amdgpu_ring_funcs vcn_v4_0_unified_ring_vm_funcs = { .type = AMDGPU_RING_TYPE_VCN_ENC, .align_mask = 0x3f, @@ -1331,6 +1461,7 @@ static const struct amdgpu_ring_funcs vcn_v4_0_unified_ring_vm_funcs = { .get_rptr = vcn_v4_0_unified_ring_get_rptr, .get_wptr = vcn_v4_0_unified_ring_get_wptr, .set_wptr = vcn_v4_0_unified_ring_set_wptr, + .patch_cs_in_place = vcn_v4_0_ring_patch_cs_in_place, .emit_frame_size = SOC15_FLUSH_GPU_TLB_NUM_WREG * 3 + SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 4 + |