aboutsummaryrefslogtreecommitdiff
path: root/drivers/gpu/drm/amd/amdgpu
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/gpu/drm/amd/amdgpu')
-rw-r--r--drivers/gpu/drm/amd/amdgpu/Kconfig5
-rw-r--r--drivers/gpu/drm/amd/amdgpu/Makefile8
-rw-r--r--drivers/gpu/drm/amd/amdgpu/aldebaran.c45
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu.h25
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_acp.c346
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c6
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c24
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h22
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_fence.c9
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c603
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.c2
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c3
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.h4
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c8
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c67
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c168
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_device.c293
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c28
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_display.c76
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_display.h2
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c31
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c57
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_fru_eeprom.c9
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c63
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h2
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_ih.c2
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_imu.h7
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_job.c46
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_job.h1
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c60
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c223
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h67
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_mes_ctx.h1
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h5
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_object.c4
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c42
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h5
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c52
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h6
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_reset.c1
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_reset.h2
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c12
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h1
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_rlc.h10
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c34
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c50
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h20
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h10
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c110
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h1
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c6
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c10
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c67
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.h29
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c2
-rw-r--r--drivers/gpu/drm/amd/amdgpu/athub_v3_0.c42
-rw-r--r--drivers/gpu/drm/amd/amdgpu/atombios_encoders.c14
-rw-r--r--drivers/gpu/drm/amd/amdgpu/clearstate_gfx11.h307
-rw-r--r--drivers/gpu/drm/amd/amdgpu/dce_v6_0.c2
-rw-r--r--drivers/gpu/drm/amd/amdgpu/dce_v8_0.c8
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c184
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c342
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c11
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gfxhub_v3_0.c32
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c28
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c39
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c6
-rw-r--r--drivers/gpu/drm/amd/amdgpu/hdp_v5_2.c150
-rw-r--r--drivers/gpu/drm/amd/amdgpu/ih_v6_0.c1
-rw-r--r--drivers/gpu/drm/amd/amdgpu/imu_v11_0.c46
-rw-r--r--drivers/gpu/drm/amd/amdgpu/mes_v10_1.c119
-rw-r--r--drivers/gpu/drm/amd/amdgpu/mes_v11_0.c199
-rw-r--r--drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c1
-rw-r--r--drivers/gpu/drm/amd/amdgpu/mmhub_v3_0_1.c591
-rw-r--r--drivers/gpu/drm/amd/amdgpu/mmhub_v3_0_1.h28
-rw-r--r--drivers/gpu/drm/amd/amdgpu/mmhub_v9_4.c12
-rw-r--r--drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c12
-rw-r--r--drivers/gpu/drm/amd/amdgpu/mxgpu_nv.c12
-rw-r--r--drivers/gpu/drm/amd/amdgpu/mxgpu_vi.c12
-rw-r--r--drivers/gpu/drm/amd/amdgpu/navi10_ih.c7
-rw-r--r--drivers/gpu/drm/amd/amdgpu/nbio_v2_3.c23
-rw-r--r--drivers/gpu/drm/amd/amdgpu/nbio_v2_3.h1
-rw-r--r--drivers/gpu/drm/amd/amdgpu/nbio_v4_3.c126
-rw-r--r--drivers/gpu/drm/amd/amdgpu/nbio_v7_4.c21
-rw-r--r--drivers/gpu/drm/amd/amdgpu/nbio_v7_4.h1
-rw-r--r--drivers/gpu/drm/amd/amdgpu/nbio_v7_7.c107
-rw-r--r--drivers/gpu/drm/amd/amdgpu/psp_gfx_if.h2
-rw-r--r--drivers/gpu/drm/amd/amdgpu/psp_v12_0.c10
-rw-r--r--drivers/gpu/drm/amd/amdgpu/psp_v13_0.c165
-rw-r--r--drivers/gpu/drm/amd/amdgpu/psp_v13_0_4.c387
-rw-r--r--drivers/gpu/drm/amd/amdgpu/psp_v13_0_4.h30
-rw-r--r--drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c79
-rw-r--r--drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c131
-rw-r--r--drivers/gpu/drm/amd/amdgpu/soc21.c73
-rw-r--r--drivers/gpu/drm/amd/amdgpu/umc_v6_7.c50
-rw-r--r--drivers/gpu/drm/amd/amdgpu/umc_v8_10.c357
-rw-r--r--drivers/gpu/drm/amd/amdgpu/umc_v8_10.h70
-rw-r--r--drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c672
-rw-r--r--drivers/gpu/drm/amd/amdgpu/vega10_ih.c7
-rw-r--r--drivers/gpu/drm/amd/amdgpu/vega20_ih.c7
100 files changed, 5588 insertions, 1728 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/Kconfig b/drivers/gpu/drm/amd/amdgpu/Kconfig
index 74a8105fd2c0..7777d55275de 100644
--- a/drivers/gpu/drm/amd/amdgpu/Kconfig
+++ b/drivers/gpu/drm/amd/amdgpu/Kconfig
@@ -4,7 +4,7 @@ config DRM_AMDGPU_SI
depends on DRM_AMDGPU
help
Choose this option if you want to enable experimental support
- for SI asics.
+ for SI (Southern Islands) asics.
SI is already supported in radeon. Experimental support for SI
in amdgpu will be disabled by default and is still provided by
@@ -16,7 +16,8 @@ config DRM_AMDGPU_CIK
bool "Enable amdgpu support for CIK parts"
depends on DRM_AMDGPU
help
- Choose this option if you want to enable support for CIK asics.
+ Choose this option if you want to enable support for CIK (Sea
+ Islands) asics.
CIK is already supported in radeon. Support for CIK in amdgpu
will be disabled by default and is still provided by radeon.
diff --git a/drivers/gpu/drm/amd/amdgpu/Makefile b/drivers/gpu/drm/amd/amdgpu/Makefile
index 3e0e2eb7e235..5a283d12f8e1 100644
--- a/drivers/gpu/drm/amd/amdgpu/Makefile
+++ b/drivers/gpu/drm/amd/amdgpu/Makefile
@@ -88,11 +88,12 @@ amdgpu-y += \
gmc_v8_0.o \
gfxhub_v1_0.o mmhub_v1_0.o gmc_v9_0.o gfxhub_v1_1.o mmhub_v9_4.o \
gfxhub_v2_0.o mmhub_v2_0.o gmc_v10_0.o gfxhub_v2_1.o mmhub_v2_3.o \
- mmhub_v1_7.o gfxhub_v3_0.o mmhub_v3_0.o mmhub_v3_0_2.o gmc_v11_0.o
+ mmhub_v1_7.o gfxhub_v3_0.o mmhub_v3_0.o mmhub_v3_0_2.o gmc_v11_0.o \
+ mmhub_v3_0_1.o
# add UMC block
amdgpu-y += \
- umc_v6_0.o umc_v6_1.o umc_v6_7.o umc_v8_7.o
+ umc_v6_0.o umc_v6_1.o umc_v6_7.o umc_v8_7.o umc_v8_10.o
# add IH block
amdgpu-y += \
@@ -114,7 +115,8 @@ amdgpu-y += \
psp_v11_0.o \
psp_v11_0_8.o \
psp_v12_0.o \
- psp_v13_0.o
+ psp_v13_0.o \
+ psp_v13_0_4.o
# add DCE block
amdgpu-y += \
diff --git a/drivers/gpu/drm/amd/amdgpu/aldebaran.c b/drivers/gpu/drm/amd/amdgpu/aldebaran.c
index c6cc493a5486..2b97b8a96fb4 100644
--- a/drivers/gpu/drm/amd/amdgpu/aldebaran.c
+++ b/drivers/gpu/drm/amd/amdgpu/aldebaran.c
@@ -148,30 +148,22 @@ aldebaran_mode2_perform_reset(struct amdgpu_reset_control *reset_ctl,
struct amdgpu_reset_context *reset_context)
{
struct amdgpu_device *adev = (struct amdgpu_device *)reset_ctl->handle;
+ struct list_head *reset_device_list = reset_context->reset_device_list;
struct amdgpu_device *tmp_adev = NULL;
- struct list_head reset_device_list;
int r = 0;
dev_dbg(adev->dev, "aldebaran perform hw reset\n");
+
+ if (reset_device_list == NULL)
+ return -EINVAL;
+
if (adev->ip_versions[MP1_HWIP][0] == IP_VERSION(13, 0, 2) &&
reset_context->hive == NULL) {
/* Wrong context, return error */
return -EINVAL;
}
- INIT_LIST_HEAD(&reset_device_list);
- if (reset_context->hive) {
- list_for_each_entry (tmp_adev,
- &reset_context->hive->device_list,
- gmc.xgmi.head)
- list_add_tail(&tmp_adev->reset_list,
- &reset_device_list);
- } else {
- list_add_tail(&reset_context->reset_req_dev->reset_list,
- &reset_device_list);
- }
-
- list_for_each_entry (tmp_adev, &reset_device_list, reset_list) {
+ list_for_each_entry(tmp_adev, reset_device_list, reset_list) {
mutex_lock(&tmp_adev->reset_cntl->reset_lock);
tmp_adev->reset_cntl->active_reset = AMD_RESET_METHOD_MODE2;
}
@@ -179,7 +171,7 @@ aldebaran_mode2_perform_reset(struct amdgpu_reset_control *reset_ctl,
* Mode2 reset doesn't need any sync between nodes in XGMI hive, instead launch
* them together so that they can be completed asynchronously on multiple nodes
*/
- list_for_each_entry (tmp_adev, &reset_device_list, reset_list) {
+ list_for_each_entry(tmp_adev, reset_device_list, reset_list) {
/* For XGMI run all resets in parallel to speed up the process */
if (tmp_adev->gmc.xgmi.num_physical_nodes > 1) {
if (!queue_work(system_unbound_wq,
@@ -197,7 +189,7 @@ aldebaran_mode2_perform_reset(struct amdgpu_reset_control *reset_ctl,
/* For XGMI wait for all resets to complete before proceed */
if (!r) {
- list_for_each_entry (tmp_adev, &reset_device_list, reset_list) {
+ list_for_each_entry(tmp_adev, reset_device_list, reset_list) {
if (tmp_adev->gmc.xgmi.num_physical_nodes > 1) {
flush_work(&tmp_adev->reset_cntl->reset_work);
r = tmp_adev->asic_reset_res;
@@ -207,7 +199,7 @@ aldebaran_mode2_perform_reset(struct amdgpu_reset_control *reset_ctl,
}
}
- list_for_each_entry (tmp_adev, &reset_device_list, reset_list) {
+ list_for_each_entry(tmp_adev, reset_device_list, reset_list) {
mutex_unlock(&tmp_adev->reset_cntl->reset_lock);
tmp_adev->reset_cntl->active_reset = AMD_RESET_METHOD_NONE;
}
@@ -339,10 +331,13 @@ static int
aldebaran_mode2_restore_hwcontext(struct amdgpu_reset_control *reset_ctl,
struct amdgpu_reset_context *reset_context)
{
+ struct list_head *reset_device_list = reset_context->reset_device_list;
struct amdgpu_device *tmp_adev = NULL;
- struct list_head reset_device_list;
int r;
+ if (reset_device_list == NULL)
+ return -EINVAL;
+
if (reset_context->reset_req_dev->ip_versions[MP1_HWIP][0] ==
IP_VERSION(13, 0, 2) &&
reset_context->hive == NULL) {
@@ -350,19 +345,7 @@ aldebaran_mode2_restore_hwcontext(struct amdgpu_reset_control *reset_ctl,
return -EINVAL;
}
- INIT_LIST_HEAD(&reset_device_list);
- if (reset_context->hive) {
- list_for_each_entry (tmp_adev,
- &reset_context->hive->device_list,
- gmc.xgmi.head)
- list_add_tail(&tmp_adev->reset_list,
- &reset_device_list);
- } else {
- list_add_tail(&reset_context->reset_req_dev->reset_list,
- &reset_device_list);
- }
-
- list_for_each_entry (tmp_adev, &reset_device_list, reset_list) {
+ list_for_each_entry(tmp_adev, reset_device_list, reset_list) {
dev_info(tmp_adev->dev,
"GPU reset succeeded, trying to resume\n");
r = aldebaran_mode2_restore_ip(tmp_adev);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index 30ce6bb6fa77..d597e2656c47 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -197,6 +197,7 @@ extern uint amdgpu_smu_memory_pool_size;
extern int amdgpu_smu_pptable_id;
extern uint amdgpu_dc_feature_mask;
extern uint amdgpu_dc_debug_mask;
+extern uint amdgpu_dc_visual_confirm;
extern uint amdgpu_dm_abm_level;
extern int amdgpu_backlight;
extern struct amdgpu_mgpu_info mgpu_info;
@@ -223,6 +224,9 @@ static const int __maybe_unused sched_policy = KFD_SCHED_POLICY_HWS;
static const bool __maybe_unused debug_evictions; /* = false */
static const bool __maybe_unused no_system_mem_limit;
#endif
+#ifdef CONFIG_HSA_AMD_P2P
+extern bool pcie_p2p;
+#endif
extern int amdgpu_tmz;
extern int amdgpu_reset_method;
@@ -274,7 +278,7 @@ extern int amdgpu_vcnfw_log;
#define CIK_CURSOR_WIDTH 128
#define CIK_CURSOR_HEIGHT 128
-/* smasrt shift bias level limits */
+/* smart shift bias level limits */
#define AMDGPU_SMARTSHIFT_MAX_BIAS (100)
#define AMDGPU_SMARTSHIFT_MIN_BIAS (-100)
@@ -313,7 +317,7 @@ enum amdgpu_kiq_irq {
AMDGPU_CP_KIQ_IRQ_DRIVER0 = 0,
AMDGPU_CP_KIQ_IRQ_LAST
};
-
+#define SRIOV_USEC_TIMEOUT 1200000 /* wait 12 * 100ms for SRIOV */
#define MAX_KIQ_REG_WAIT 5000 /* in usecs, 5ms */
#define MAX_KIQ_REG_BAILOUT_INTERVAL 5 /* in msecs, 5ms */
#define MAX_KIQ_REG_TRY 1000
@@ -667,6 +671,7 @@ enum amd_hw_ip_block_type {
RSMU_HWIP,
XGMI_HWIP,
DCI_HWIP,
+ PCIE_HWIP,
MAX_HWIP
};
@@ -1007,7 +1012,6 @@ struct amdgpu_device {
uint64_t df_perfmon_config_assign_mask[AMDGPU_MAX_DF_PERFMONS];
/* enable runtime pm on the device */
- bool runpm;
bool in_runpm;
bool has_pr3;
@@ -1016,7 +1020,7 @@ struct amdgpu_device {
bool psp_sysfs_en;
/* Chip product information */
- char product_number[16];
+ char product_number[20];
char product_name[AMDGPU_PRODUCT_NAME_LEN];
char serial[20];
@@ -1044,10 +1048,18 @@ struct amdgpu_device {
/* reset dump register */
uint32_t *reset_dump_reg_list;
+ uint32_t *reset_dump_reg_value;
int num_regs;
+#ifdef CONFIG_DEV_COREDUMP
+ struct amdgpu_task_info reset_task_info;
+ bool reset_vram_lost;
+ struct timespec64 reset_time;
+#endif
bool scpm_enabled;
uint32_t scpm_status;
+
+ struct work_struct reset_work;
};
static inline struct amdgpu_device *drm_to_adev(struct drm_device *ddev)
@@ -1241,9 +1253,8 @@ int emu_soc_asic_init(struct amdgpu_device *adev);
bool amdgpu_device_has_job_running(struct amdgpu_device *adev);
bool amdgpu_device_should_recover_gpu(struct amdgpu_device *adev);
int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
- struct amdgpu_job* job);
-int amdgpu_device_gpu_recover_imp(struct amdgpu_device *adev,
- struct amdgpu_job *job);
+ struct amdgpu_job *job,
+ struct amdgpu_reset_context *reset_context);
void amdgpu_device_pci_config_reset(struct amdgpu_device *adev);
int amdgpu_device_pci_reset(struct amdgpu_device *adev);
bool amdgpu_device_need_post(struct amdgpu_device *adev);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_acp.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_acp.c
index cc9c9f8b23b2..6d72355ac492 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_acp.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_acp.c
@@ -29,6 +29,8 @@
#include <linux/platform_device.h>
#include <sound/designware_i2s.h>
#include <sound/pcm.h>
+#include <linux/acpi.h>
+#include <linux/dmi.h>
#include "amdgpu.h"
#include "atom.h"
@@ -36,6 +38,7 @@
#include "acp_gfx_if.h"
+#define ST_JADEITE 1
#define ACP_TILE_ON_MASK 0x03
#define ACP_TILE_OFF_MASK 0x02
#define ACP_TILE_ON_RETAIN_REG_MASK 0x1f
@@ -85,6 +88,8 @@
#define ACP_DEVS 4
#define ACP_SRC_ID 162
+static unsigned long acp_machine_id;
+
enum {
ACP_TILE_P1 = 0,
ACP_TILE_P2,
@@ -128,16 +133,14 @@ static int acp_poweroff(struct generic_pm_domain *genpd)
struct amdgpu_device *adev;
apd = container_of(genpd, struct acp_pm_domain, gpd);
- if (apd != NULL) {
- adev = apd->adev;
+ adev = apd->adev;
/* call smu to POWER GATE ACP block
* smu will
* 1. turn off the acp clock
* 2. power off the acp tiles
* 3. check and enter ulv state
*/
- amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_ACP, true);
- }
+ amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_ACP, true);
return 0;
}
@@ -147,16 +150,14 @@ static int acp_poweron(struct generic_pm_domain *genpd)
struct amdgpu_device *adev;
apd = container_of(genpd, struct acp_pm_domain, gpd);
- if (apd != NULL) {
- adev = apd->adev;
+ adev = apd->adev;
/* call smu to UNGATE ACP block
* smu will
* 1. exit ulv
* 2. turn on acp clock
* 3. power on acp tiles
*/
- amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_ACP, false);
- }
+ amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_ACP, false);
return 0;
}
@@ -184,6 +185,37 @@ static int acp_genpd_remove_device(struct device *dev, void *data)
return 0;
}
+static int acp_quirk_cb(const struct dmi_system_id *id)
+{
+ acp_machine_id = ST_JADEITE;
+ return 1;
+}
+
+static const struct dmi_system_id acp_quirk_table[] = {
+ {
+ .callback = acp_quirk_cb,
+ .matches = {
+ DMI_EXACT_MATCH(DMI_BOARD_VENDOR, "AMD"),
+ DMI_EXACT_MATCH(DMI_PRODUCT_NAME, "Jadeite"),
+ }
+ },
+ {
+ .callback = acp_quirk_cb,
+ .matches = {
+ DMI_EXACT_MATCH(DMI_BOARD_VENDOR, "IP3 Technology CO.,Ltd."),
+ DMI_EXACT_MATCH(DMI_PRODUCT_NAME, "ASN1D"),
+ },
+ },
+ {
+ .callback = acp_quirk_cb,
+ .matches = {
+ DMI_EXACT_MATCH(DMI_BOARD_VENDOR, "Standard"),
+ DMI_EXACT_MATCH(DMI_PRODUCT_NAME, "ASN10"),
+ },
+ },
+ {}
+};
+
/**
* acp_hw_init - start and test ACP block
*
@@ -193,7 +225,7 @@ static int acp_genpd_remove_device(struct device *dev, void *data)
static int acp_hw_init(void *handle)
{
int r;
- uint64_t acp_base;
+ u64 acp_base;
u32 val = 0;
u32 count = 0;
struct i2s_platform_data *i2s_pdata = NULL;
@@ -220,141 +252,202 @@ static int acp_hw_init(void *handle)
return -EINVAL;
acp_base = adev->rmmio_base;
-
-
adev->acp.acp_genpd = kzalloc(sizeof(struct acp_pm_domain), GFP_KERNEL);
- if (adev->acp.acp_genpd == NULL)
+ if (!adev->acp.acp_genpd)
return -ENOMEM;
adev->acp.acp_genpd->gpd.name = "ACP_AUDIO";
adev->acp.acp_genpd->gpd.power_off = acp_poweroff;
adev->acp.acp_genpd->gpd.power_on = acp_poweron;
-
-
adev->acp.acp_genpd->adev = adev;
pm_genpd_init(&adev->acp.acp_genpd->gpd, NULL, false);
+ dmi_check_system(acp_quirk_table);
+ switch (acp_machine_id) {
+ case ST_JADEITE:
+ {
+ adev->acp.acp_cell = kcalloc(2, sizeof(struct mfd_cell),
+ GFP_KERNEL);
+ if (!adev->acp.acp_cell) {
+ r = -ENOMEM;
+ goto failure;
+ }
- adev->acp.acp_cell = kcalloc(ACP_DEVS, sizeof(struct mfd_cell),
- GFP_KERNEL);
-
- if (adev->acp.acp_cell == NULL) {
- r = -ENOMEM;
- goto failure;
- }
-
- adev->acp.acp_res = kcalloc(5, sizeof(struct resource), GFP_KERNEL);
- if (adev->acp.acp_res == NULL) {
- r = -ENOMEM;
- goto failure;
- }
+ adev->acp.acp_res = kcalloc(3, sizeof(struct resource), GFP_KERNEL);
+ if (!adev->acp.acp_res) {
+ r = -ENOMEM;
+ goto failure;
+ }
- i2s_pdata = kcalloc(3, sizeof(struct i2s_platform_data), GFP_KERNEL);
- if (i2s_pdata == NULL) {
- r = -ENOMEM;
- goto failure;
- }
+ i2s_pdata = kcalloc(1, sizeof(struct i2s_platform_data), GFP_KERNEL);
+ if (!i2s_pdata) {
+ r = -ENOMEM;
+ goto failure;
+ }
- switch (adev->asic_type) {
- case CHIP_STONEY:
i2s_pdata[0].quirks = DW_I2S_QUIRK_COMP_REG_OFFSET |
- DW_I2S_QUIRK_16BIT_IDX_OVERRIDE;
+ DW_I2S_QUIRK_16BIT_IDX_OVERRIDE;
+ i2s_pdata[0].cap = DWC_I2S_PLAY | DWC_I2S_RECORD;
+ i2s_pdata[0].snd_rates = SNDRV_PCM_RATE_8000_96000;
+ i2s_pdata[0].i2s_reg_comp1 = ACP_I2S_COMP1_CAP_REG_OFFSET;
+ i2s_pdata[0].i2s_reg_comp2 = ACP_I2S_COMP2_CAP_REG_OFFSET;
+
+ adev->acp.acp_res[0].name = "acp2x_dma";
+ adev->acp.acp_res[0].flags = IORESOURCE_MEM;
+ adev->acp.acp_res[0].start = acp_base;
+ adev->acp.acp_res[0].end = acp_base + ACP_DMA_REGS_END;
+
+ adev->acp.acp_res[1].name = "acp2x_dw_i2s_play_cap";
+ adev->acp.acp_res[1].flags = IORESOURCE_MEM;
+ adev->acp.acp_res[1].start = acp_base + ACP_I2S_CAP_REGS_START;
+ adev->acp.acp_res[1].end = acp_base + ACP_I2S_CAP_REGS_END;
+
+ adev->acp.acp_res[2].name = "acp2x_dma_irq";
+ adev->acp.acp_res[2].flags = IORESOURCE_IRQ;
+ adev->acp.acp_res[2].start = amdgpu_irq_create_mapping(adev, 162);
+ adev->acp.acp_res[2].end = adev->acp.acp_res[2].start;
+
+ adev->acp.acp_cell[0].name = "acp_audio_dma";
+ adev->acp.acp_cell[0].num_resources = 3;
+ adev->acp.acp_cell[0].resources = &adev->acp.acp_res[0];
+ adev->acp.acp_cell[0].platform_data = &adev->asic_type;
+ adev->acp.acp_cell[0].pdata_size = sizeof(adev->asic_type);
+
+ adev->acp.acp_cell[1].name = "designware-i2s";
+ adev->acp.acp_cell[1].num_resources = 1;
+ adev->acp.acp_cell[1].resources = &adev->acp.acp_res[1];
+ adev->acp.acp_cell[1].platform_data = &i2s_pdata[0];
+ adev->acp.acp_cell[1].pdata_size = sizeof(struct i2s_platform_data);
+ r = mfd_add_hotplug_devices(adev->acp.parent, adev->acp.acp_cell, 2);
+ if (r)
+ goto failure;
+ r = device_for_each_child(adev->acp.parent, &adev->acp.acp_genpd->gpd,
+ acp_genpd_add_device);
+ if (r)
+ goto failure;
break;
- default:
- i2s_pdata[0].quirks = DW_I2S_QUIRK_COMP_REG_OFFSET;
}
- i2s_pdata[0].cap = DWC_I2S_PLAY;
- i2s_pdata[0].snd_rates = SNDRV_PCM_RATE_8000_96000;
- i2s_pdata[0].i2s_reg_comp1 = ACP_I2S_COMP1_PLAY_REG_OFFSET;
- i2s_pdata[0].i2s_reg_comp2 = ACP_I2S_COMP2_PLAY_REG_OFFSET;
- switch (adev->asic_type) {
- case CHIP_STONEY:
- i2s_pdata[1].quirks = DW_I2S_QUIRK_COMP_REG_OFFSET |
- DW_I2S_QUIRK_COMP_PARAM1 |
- DW_I2S_QUIRK_16BIT_IDX_OVERRIDE;
- break;
default:
- i2s_pdata[1].quirks = DW_I2S_QUIRK_COMP_REG_OFFSET |
- DW_I2S_QUIRK_COMP_PARAM1;
- }
+ adev->acp.acp_cell = kcalloc(ACP_DEVS, sizeof(struct mfd_cell),
+ GFP_KERNEL);
- i2s_pdata[1].cap = DWC_I2S_RECORD;
- i2s_pdata[1].snd_rates = SNDRV_PCM_RATE_8000_96000;
- i2s_pdata[1].i2s_reg_comp1 = ACP_I2S_COMP1_CAP_REG_OFFSET;
- i2s_pdata[1].i2s_reg_comp2 = ACP_I2S_COMP2_CAP_REG_OFFSET;
+ if (!adev->acp.acp_cell) {
+ r = -ENOMEM;
+ goto failure;
+ }
- i2s_pdata[2].quirks = DW_I2S_QUIRK_COMP_REG_OFFSET;
- switch (adev->asic_type) {
- case CHIP_STONEY:
- i2s_pdata[2].quirks |= DW_I2S_QUIRK_16BIT_IDX_OVERRIDE;
- break;
- default:
- break;
- }
+ adev->acp.acp_res = kcalloc(5, sizeof(struct resource), GFP_KERNEL);
+ if (!adev->acp.acp_res) {
+ r = -ENOMEM;
+ goto failure;
+ }
+
+ i2s_pdata = kcalloc(3, sizeof(struct i2s_platform_data), GFP_KERNEL);
+ if (!i2s_pdata) {
+ r = -ENOMEM;
+ goto failure;
+ }
+
+ switch (adev->asic_type) {
+ case CHIP_STONEY:
+ i2s_pdata[0].quirks = DW_I2S_QUIRK_COMP_REG_OFFSET |
+ DW_I2S_QUIRK_16BIT_IDX_OVERRIDE;
+ break;
+ default:
+ i2s_pdata[0].quirks = DW_I2S_QUIRK_COMP_REG_OFFSET;
+ }
+ i2s_pdata[0].cap = DWC_I2S_PLAY;
+ i2s_pdata[0].snd_rates = SNDRV_PCM_RATE_8000_96000;
+ i2s_pdata[0].i2s_reg_comp1 = ACP_I2S_COMP1_PLAY_REG_OFFSET;
+ i2s_pdata[0].i2s_reg_comp2 = ACP_I2S_COMP2_PLAY_REG_OFFSET;
+ switch (adev->asic_type) {
+ case CHIP_STONEY:
+ i2s_pdata[1].quirks = DW_I2S_QUIRK_COMP_REG_OFFSET |
+ DW_I2S_QUIRK_COMP_PARAM1 |
+ DW_I2S_QUIRK_16BIT_IDX_OVERRIDE;
+ break;
+ default:
+ i2s_pdata[1].quirks = DW_I2S_QUIRK_COMP_REG_OFFSET |
+ DW_I2S_QUIRK_COMP_PARAM1;
+ }
+
+ i2s_pdata[1].cap = DWC_I2S_RECORD;
+ i2s_pdata[1].snd_rates = SNDRV_PCM_RATE_8000_96000;
+ i2s_pdata[1].i2s_reg_comp1 = ACP_I2S_COMP1_CAP_REG_OFFSET;
+ i2s_pdata[1].i2s_reg_comp2 = ACP_I2S_COMP2_CAP_REG_OFFSET;
+
+ i2s_pdata[2].quirks = DW_I2S_QUIRK_COMP_REG_OFFSET;
+ switch (adev->asic_type) {
+ case CHIP_STONEY:
+ i2s_pdata[2].quirks |= DW_I2S_QUIRK_16BIT_IDX_OVERRIDE;
+ break;
+ default:
+ break;
+ }
- i2s_pdata[2].cap = DWC_I2S_PLAY | DWC_I2S_RECORD;
- i2s_pdata[2].snd_rates = SNDRV_PCM_RATE_8000_96000;
- i2s_pdata[2].i2s_reg_comp1 = ACP_BT_COMP1_REG_OFFSET;
- i2s_pdata[2].i2s_reg_comp2 = ACP_BT_COMP2_REG_OFFSET;
-
- adev->acp.acp_res[0].name = "acp2x_dma";
- adev->acp.acp_res[0].flags = IORESOURCE_MEM;
- adev->acp.acp_res[0].start = acp_base;
- adev->acp.acp_res[0].end = acp_base + ACP_DMA_REGS_END;
-
- adev->acp.acp_res[1].name = "acp2x_dw_i2s_play";
- adev->acp.acp_res[1].flags = IORESOURCE_MEM;
- adev->acp.acp_res[1].start = acp_base + ACP_I2S_PLAY_REGS_START;
- adev->acp.acp_res[1].end = acp_base + ACP_I2S_PLAY_REGS_END;
-
- adev->acp.acp_res[2].name = "acp2x_dw_i2s_cap";
- adev->acp.acp_res[2].flags = IORESOURCE_MEM;
- adev->acp.acp_res[2].start = acp_base + ACP_I2S_CAP_REGS_START;
- adev->acp.acp_res[2].end = acp_base + ACP_I2S_CAP_REGS_END;
-
- adev->acp.acp_res[3].name = "acp2x_dw_bt_i2s_play_cap";
- adev->acp.acp_res[3].flags = IORESOURCE_MEM;
- adev->acp.acp_res[3].start = acp_base + ACP_BT_PLAY_REGS_START;
- adev->acp.acp_res[3].end = acp_base + ACP_BT_PLAY_REGS_END;
-
- adev->acp.acp_res[4].name = "acp2x_dma_irq";
- adev->acp.acp_res[4].flags = IORESOURCE_IRQ;
- adev->acp.acp_res[4].start = amdgpu_irq_create_mapping(adev, 162);
- adev->acp.acp_res[4].end = adev->acp.acp_res[4].start;
-
- adev->acp.acp_cell[0].name = "acp_audio_dma";
- adev->acp.acp_cell[0].num_resources = 5;
- adev->acp.acp_cell[0].resources = &adev->acp.acp_res[0];
- adev->acp.acp_cell[0].platform_data = &adev->asic_type;
- adev->acp.acp_cell[0].pdata_size = sizeof(adev->asic_type);
-
- adev->acp.acp_cell[1].name = "designware-i2s";
- adev->acp.acp_cell[1].num_resources = 1;
- adev->acp.acp_cell[1].resources = &adev->acp.acp_res[1];
- adev->acp.acp_cell[1].platform_data = &i2s_pdata[0];
- adev->acp.acp_cell[1].pdata_size = sizeof(struct i2s_platform_data);
-
- adev->acp.acp_cell[2].name = "designware-i2s";
- adev->acp.acp_cell[2].num_resources = 1;
- adev->acp.acp_cell[2].resources = &adev->acp.acp_res[2];
- adev->acp.acp_cell[2].platform_data = &i2s_pdata[1];
- adev->acp.acp_cell[2].pdata_size = sizeof(struct i2s_platform_data);
-
- adev->acp.acp_cell[3].name = "designware-i2s";
- adev->acp.acp_cell[3].num_resources = 1;
- adev->acp.acp_cell[3].resources = &adev->acp.acp_res[3];
- adev->acp.acp_cell[3].platform_data = &i2s_pdata[2];
- adev->acp.acp_cell[3].pdata_size = sizeof(struct i2s_platform_data);
-
- r = mfd_add_hotplug_devices(adev->acp.parent, adev->acp.acp_cell,
- ACP_DEVS);
- if (r)
- goto failure;
-
- r = device_for_each_child(adev->acp.parent, &adev->acp.acp_genpd->gpd,
- acp_genpd_add_device);
- if (r)
- goto failure;
+ i2s_pdata[2].cap = DWC_I2S_PLAY | DWC_I2S_RECORD;
+ i2s_pdata[2].snd_rates = SNDRV_PCM_RATE_8000_96000;
+ i2s_pdata[2].i2s_reg_comp1 = ACP_BT_COMP1_REG_OFFSET;
+ i2s_pdata[2].i2s_reg_comp2 = ACP_BT_COMP2_REG_OFFSET;
+
+ adev->acp.acp_res[0].name = "acp2x_dma";
+ adev->acp.acp_res[0].flags = IORESOURCE_MEM;
+ adev->acp.acp_res[0].start = acp_base;
+ adev->acp.acp_res[0].end = acp_base + ACP_DMA_REGS_END;
+
+ adev->acp.acp_res[1].name = "acp2x_dw_i2s_play";
+ adev->acp.acp_res[1].flags = IORESOURCE_MEM;
+ adev->acp.acp_res[1].start = acp_base + ACP_I2S_PLAY_REGS_START;
+ adev->acp.acp_res[1].end = acp_base + ACP_I2S_PLAY_REGS_END;
+
+ adev->acp.acp_res[2].name = "acp2x_dw_i2s_cap";
+ adev->acp.acp_res[2].flags = IORESOURCE_MEM;
+ adev->acp.acp_res[2].start = acp_base + ACP_I2S_CAP_REGS_START;
+ adev->acp.acp_res[2].end = acp_base + ACP_I2S_CAP_REGS_END;
+
+ adev->acp.acp_res[3].name = "acp2x_dw_bt_i2s_play_cap";
+ adev->acp.acp_res[3].flags = IORESOURCE_MEM;
+ adev->acp.acp_res[3].start = acp_base + ACP_BT_PLAY_REGS_START;
+ adev->acp.acp_res[3].end = acp_base + ACP_BT_PLAY_REGS_END;
+
+ adev->acp.acp_res[4].name = "acp2x_dma_irq";
+ adev->acp.acp_res[4].flags = IORESOURCE_IRQ;
+ adev->acp.acp_res[4].start = amdgpu_irq_create_mapping(adev, 162);
+ adev->acp.acp_res[4].end = adev->acp.acp_res[4].start;
+
+ adev->acp.acp_cell[0].name = "acp_audio_dma";
+ adev->acp.acp_cell[0].num_resources = 5;
+ adev->acp.acp_cell[0].resources = &adev->acp.acp_res[0];
+ adev->acp.acp_cell[0].platform_data = &adev->asic_type;
+ adev->acp.acp_cell[0].pdata_size = sizeof(adev->asic_type);
+
+ adev->acp.acp_cell[1].name = "designware-i2s";
+ adev->acp.acp_cell[1].num_resources = 1;
+ adev->acp.acp_cell[1].resources = &adev->acp.acp_res[1];
+ adev->acp.acp_cell[1].platform_data = &i2s_pdata[0];
+ adev->acp.acp_cell[1].pdata_size = sizeof(struct i2s_platform_data);
+
+ adev->acp.acp_cell[2].name = "designware-i2s";
+ adev->acp.acp_cell[2].num_resources = 1;
+ adev->acp.acp_cell[2].resources = &adev->acp.acp_res[2];
+ adev->acp.acp_cell[2].platform_data = &i2s_pdata[1];
+ adev->acp.acp_cell[2].pdata_size = sizeof(struct i2s_platform_data);
+
+ adev->acp.acp_cell[3].name = "designware-i2s";
+ adev->acp.acp_cell[3].num_resources = 1;
+ adev->acp.acp_cell[3].resources = &adev->acp.acp_res[3];
+ adev->acp.acp_cell[3].platform_data = &i2s_pdata[2];
+ adev->acp.acp_cell[3].pdata_size = sizeof(struct i2s_platform_data);
+
+ r = mfd_add_hotplug_devices(adev->acp.parent, adev->acp.acp_cell, ACP_DEVS);
+ if (r)
+ goto failure;
+
+ r = device_for_each_child(adev->acp.parent, &adev->acp.acp_genpd->gpd,
+ acp_genpd_add_device);
+ if (r)
+ goto failure;
+ }
/* Assert Soft reset of ACP */
val = cgs_read_register(adev->acp.cgs_device, mmACP_SOFT_RESET);
@@ -546,8 +639,7 @@ static const struct amd_ip_funcs acp_ip_funcs = {
.set_powergating_state = acp_set_powergating_state,
};
-const struct amdgpu_ip_block_version acp_ip_block =
-{
+const struct amdgpu_ip_block_version acp_ip_block = {
.type = AMD_IP_BLOCK_TYPE_ACP,
.major = 2,
.minor = 2,
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c
index 98ac53ee6bb5..130060834b4e 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c
@@ -66,9 +66,7 @@ struct amdgpu_atif {
struct amdgpu_atif_notifications notifications;
struct amdgpu_atif_functions functions;
struct amdgpu_atif_notification_cfg notification_cfg;
-#if defined(CONFIG_BACKLIGHT_CLASS_DEVICE) || defined(CONFIG_BACKLIGHT_CLASS_DEVICE_MODULE)
struct backlight_device *bd;
-#endif
struct amdgpu_dm_backlight_caps backlight_caps;
};
@@ -436,7 +434,6 @@ static int amdgpu_atif_handler(struct amdgpu_device *adev,
DRM_DEBUG_DRIVER("ATIF: %d pending SBIOS requests\n", count);
if (req.pending & ATIF_PANEL_BRIGHTNESS_CHANGE_REQUEST) {
-#if defined(CONFIG_BACKLIGHT_CLASS_DEVICE) || defined(CONFIG_BACKLIGHT_CLASS_DEVICE_MODULE)
if (atif->bd) {
DRM_DEBUG_DRIVER("Changing brightness to %d\n",
req.backlight_level);
@@ -447,7 +444,6 @@ static int amdgpu_atif_handler(struct amdgpu_device *adev,
*/
backlight_device_set_brightness(atif->bd, req.backlight_level);
}
-#endif
}
if (req.pending & ATIF_DGPU_DISPLAY_EVENT) {
@@ -849,7 +845,6 @@ int amdgpu_acpi_init(struct amdgpu_device *adev)
{
struct amdgpu_atif *atif = &amdgpu_acpi_priv.atif;
-#if defined(CONFIG_BACKLIGHT_CLASS_DEVICE) || defined(CONFIG_BACKLIGHT_CLASS_DEVICE_MODULE)
if (atif->notifications.brightness_change) {
if (amdgpu_device_has_dc_support(adev)) {
#if defined(CONFIG_DRM_AMD_DC)
@@ -876,7 +871,6 @@ int amdgpu_acpi_init(struct amdgpu_device *adev)
}
}
}
-#endif
adev->acpi_nb.notifier_call = amdgpu_acpi_event;
register_acpi_notifier(&adev->acpi_nb);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
index 3b1c675aba34..5e53a5293935 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
@@ -33,6 +33,7 @@
#include <uapi/linux/kfd_ioctl.h>
#include "amdgpu_ras.h"
#include "amdgpu_umc.h"
+#include "amdgpu_reset.h"
/* Total memory size in system memory and all GPU VRAM. Used to
* estimate worst case amount of memory to reserve for page tables
@@ -122,6 +123,22 @@ static void amdgpu_doorbell_get_kfd_info(struct amdgpu_device *adev,
}
}
+
+static void amdgpu_amdkfd_reset_work(struct work_struct *work)
+{
+ struct amdgpu_device *adev = container_of(work, struct amdgpu_device,
+ kfd.reset_work);
+
+ struct amdgpu_reset_context reset_context;
+ memset(&reset_context, 0, sizeof(reset_context));
+
+ reset_context.method = AMD_RESET_METHOD_NONE;
+ reset_context.reset_req_dev = adev;
+ clear_bit(AMDGPU_NEED_FULL_RESET, &reset_context.flags);
+
+ amdgpu_device_gpu_recover(adev, NULL, &reset_context);
+}
+
void amdgpu_amdkfd_device_init(struct amdgpu_device *adev)
{
int i;
@@ -180,6 +197,8 @@ void amdgpu_amdkfd_device_init(struct amdgpu_device *adev)
adev->kfd.init_complete = kgd2kfd_device_init(adev->kfd.dev,
adev_to_drm(adev), &gpu_resources);
+
+ INIT_WORK(&adev->kfd.reset_work, amdgpu_amdkfd_reset_work);
}
}
@@ -247,7 +266,8 @@ int amdgpu_amdkfd_post_reset(struct amdgpu_device *adev)
void amdgpu_amdkfd_gpu_reset(struct amdgpu_device *adev)
{
if (amdgpu_device_should_recover_gpu(adev))
- amdgpu_device_gpu_recover(adev, NULL);
+ amdgpu_reset_domain_schedule(adev->reset_domain,
+ &adev->kfd.reset_work);
}
int amdgpu_amdkfd_alloc_gtt_mem(struct amdgpu_device *adev, size_t size,
@@ -671,6 +691,8 @@ int amdgpu_amdkfd_submit_ib(struct amdgpu_device *adev,
goto err_ib_sched;
}
+ /* Drop the initial kref_init count (see drm_sched_main as example) */
+ dma_fence_put(f);
ret = dma_fence_wait(f, false);
err_ib_sched:
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
index f8b9f27adcf5..647220a8762d 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
@@ -48,6 +48,7 @@ enum kfd_mem_attachment_type {
KFD_MEM_ATT_SHARED, /* Share kgd_mem->bo or another attachment's */
KFD_MEM_ATT_USERPTR, /* SG bo to DMA map pages from a userptr bo */
KFD_MEM_ATT_DMABUF, /* DMAbuf to DMA map TTM BOs */
+ KFD_MEM_ATT_SG /* Tag to DMA map SG BOs */
};
struct kfd_mem_attachment {
@@ -95,7 +96,9 @@ struct amdgpu_amdkfd_fence {
struct amdgpu_kfd_dev {
struct kfd_dev *dev;
uint64_t vram_used;
+ uint64_t vram_used_aligned;
bool init_complete;
+ struct work_struct reset_work;
};
enum kgd_engine_type {
@@ -170,6 +173,9 @@ int amdgpu_queue_mask_bit_to_set_resource_bit(struct amdgpu_device *adev,
struct amdgpu_amdkfd_fence *amdgpu_amdkfd_fence_create(u64 context,
struct mm_struct *mm,
struct svm_range_bo *svm_bo);
+#if defined(CONFIG_DEBUG_FS)
+int kfd_debugfs_kfd_mem_limits(struct seq_file *m, void *data);
+#endif
#if IS_ENABLED(CONFIG_HSA_AMD)
bool amdkfd_fence_check_mm(struct dma_fence *f, struct mm_struct *mm);
struct amdgpu_amdkfd_fence *to_amdgpu_amdkfd_fence(struct dma_fence *f);
@@ -266,6 +272,7 @@ int amdgpu_amdkfd_gpuvm_acquire_process_vm(struct amdgpu_device *adev,
void amdgpu_amdkfd_gpuvm_release_process_vm(struct amdgpu_device *adev,
void *drm_priv);
uint64_t amdgpu_amdkfd_gpuvm_get_process_page_dir(void *drm_priv);
+size_t amdgpu_amdkfd_get_available_memory(struct amdgpu_device *adev);
int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(
struct amdgpu_device *adev, uint64_t va, uint64_t size,
void *drm_priv, struct kgd_mem **mem,
@@ -279,10 +286,11 @@ int amdgpu_amdkfd_gpuvm_unmap_memory_from_gpu(
struct amdgpu_device *adev, struct kgd_mem *mem, void *drm_priv);
int amdgpu_amdkfd_gpuvm_sync_memory(
struct amdgpu_device *adev, struct kgd_mem *mem, bool intr);
-int amdgpu_amdkfd_gpuvm_map_gtt_bo_to_kernel(struct amdgpu_device *adev,
- struct kgd_mem *mem, void **kptr, uint64_t *size);
-void amdgpu_amdkfd_gpuvm_unmap_gtt_bo_from_kernel(struct amdgpu_device *adev,
- struct kgd_mem *mem);
+int amdgpu_amdkfd_gpuvm_map_gtt_bo_to_kernel(struct kgd_mem *mem,
+ void **kptr, uint64_t *size);
+void amdgpu_amdkfd_gpuvm_unmap_gtt_bo_from_kernel(struct kgd_mem *mem);
+
+int amdgpu_amdkfd_map_gtt_bo_to_gart(struct amdgpu_device *adev, struct amdgpu_bo *bo);
int amdgpu_amdkfd_gpuvm_restore_process_bos(void *process_info,
struct dma_fence **ef);
@@ -301,6 +309,10 @@ bool amdgpu_amdkfd_bo_mapped_to_dev(struct amdgpu_device *adev, struct kgd_mem *
void amdgpu_amdkfd_block_mmu_notifications(void *p);
int amdgpu_amdkfd_criu_resume(void *p);
bool amdgpu_amdkfd_ras_query_utcl2_poison_status(struct amdgpu_device *adev);
+int amdgpu_amdkfd_reserve_mem_limit(struct amdgpu_device *adev,
+ uint64_t size, u32 alloc_flag);
+void amdgpu_amdkfd_unreserve_mem_limit(struct amdgpu_device *adev,
+ uint64_t size, u32 alloc_flag);
#if IS_ENABLED(CONFIG_HSA_AMD)
void amdgpu_amdkfd_gpuvm_init_mem_limits(void);
@@ -332,7 +344,7 @@ void amdgpu_amdkfd_release_notify(struct amdgpu_bo *bo)
}
#endif
/* KGD2KFD callbacks */
-int kgd2kfd_quiesce_mm(struct mm_struct *mm);
+int kgd2kfd_quiesce_mm(struct mm_struct *mm, uint32_t trigger);
int kgd2kfd_resume_mm(struct mm_struct *mm);
int kgd2kfd_schedule_evict_and_restore_process(struct mm_struct *mm,
struct dma_fence *fence);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_fence.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_fence.c
index 1d0dbff87d3f..469785d33791 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_fence.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_fence.c
@@ -159,11 +159,14 @@ static void amdkfd_fence_release(struct dma_fence *f)
}
/**
- * amdkfd_fence_check_mm - Check if @mm is same as that of the fence @f
- * if same return TRUE else return FALSE.
+ * amdkfd_fence_check_mm - Check whether to prevent eviction of @f by @mm
*
* @f: [IN] fence
* @mm: [IN] mm that needs to be verified
+ *
+ * Check if @mm is same as that of the fence @f, if same return TRUE else
+ * return FALSE.
+ * For svm bo, which support vram overcommitment, always return FALSE.
*/
bool amdkfd_fence_check_mm(struct dma_fence *f, struct mm_struct *mm)
{
@@ -171,7 +174,7 @@ bool amdkfd_fence_check_mm(struct dma_fence *f, struct mm_struct *mm)
if (!fence)
return false;
- else if (fence->mm == mm)
+ else if (fence->mm == mm && !fence->svm_bo)
return true;
return false;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
index 6b6d46e29e6e..cbd593f7d553 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
@@ -32,12 +32,19 @@
#include "amdgpu_dma_buf.h"
#include <uapi/linux/kfd_ioctl.h>
#include "amdgpu_xgmi.h"
+#include "kfd_smi_events.h"
/* Userptr restore delay, just long enough to allow consecutive VM
* changes to accumulate
*/
#define AMDGPU_USERPTR_RESTORE_DELAY_MS 1
+/*
+ * Align VRAM availability to 2MB to avoid fragmentation caused by 4K allocations in the tail 2MB
+ * BO chunk
+ */
+#define VRAM_AVAILABLITY_ALIGN (1 << 21)
+
/* Impose limit on how much memory KFD can use */
static struct {
uint64_t max_system_mem_limit;
@@ -108,21 +115,12 @@ void amdgpu_amdkfd_reserve_system_mem(uint64_t size)
* compromise that should work in most cases without reserving too
* much memory for page tables unnecessarily (factor 16K, >> 14).
*/
-#define ESTIMATE_PT_SIZE(mem_size) ((mem_size) >> 14)
-static size_t amdgpu_amdkfd_acc_size(uint64_t size)
-{
- size >>= PAGE_SHIFT;
- size *= sizeof(dma_addr_t) + sizeof(void *);
-
- return __roundup_pow_of_two(sizeof(struct amdgpu_bo)) +
- __roundup_pow_of_two(sizeof(struct ttm_tt)) +
- PAGE_ALIGN(size);
-}
+#define ESTIMATE_PT_SIZE(mem_size) max(((mem_size) >> 14), AMDGPU_VM_RESERVED_VRAM)
/**
* amdgpu_amdkfd_reserve_mem_limit() - Decrease available memory by size
- * of buffer including any reserved for control structures
+ * of buffer.
*
* @adev: Device to which allocated BO belongs to
* @size: Size of buffer, in bytes, encapsulated by B0. This should be
@@ -131,33 +129,32 @@ static size_t amdgpu_amdkfd_acc_size(uint64_t size)
*
* Return: returns -ENOMEM in case of error, ZERO otherwise
*/
-static int amdgpu_amdkfd_reserve_mem_limit(struct amdgpu_device *adev,
+int amdgpu_amdkfd_reserve_mem_limit(struct amdgpu_device *adev,
uint64_t size, u32 alloc_flag)
{
uint64_t reserved_for_pt =
ESTIMATE_PT_SIZE(amdgpu_amdkfd_total_mem_size);
- size_t acc_size, system_mem_needed, ttm_mem_needed, vram_needed;
+ size_t system_mem_needed, ttm_mem_needed, vram_needed;
int ret = 0;
- acc_size = amdgpu_amdkfd_acc_size(size);
-
+ system_mem_needed = 0;
+ ttm_mem_needed = 0;
vram_needed = 0;
if (alloc_flag & KFD_IOC_ALLOC_MEM_FLAGS_GTT) {
- system_mem_needed = acc_size + size;
- ttm_mem_needed = acc_size + size;
+ system_mem_needed = size;
+ ttm_mem_needed = size;
} else if (alloc_flag & KFD_IOC_ALLOC_MEM_FLAGS_VRAM) {
- system_mem_needed = acc_size;
- ttm_mem_needed = acc_size;
+ /*
+ * Conservatively round up the allocation requirement to 2 MB
+ * to avoid fragmentation caused by 4K allocations in the tail
+ * 2M BO chunk.
+ */
vram_needed = size;
} else if (alloc_flag & KFD_IOC_ALLOC_MEM_FLAGS_USERPTR) {
- system_mem_needed = acc_size + size;
- ttm_mem_needed = acc_size;
- } else if (alloc_flag &
- (KFD_IOC_ALLOC_MEM_FLAGS_DOORBELL |
- KFD_IOC_ALLOC_MEM_FLAGS_MMIO_REMAP)) {
- system_mem_needed = acc_size;
- ttm_mem_needed = acc_size;
- } else {
+ system_mem_needed = size;
+ } else if (!(alloc_flag &
+ (KFD_IOC_ALLOC_MEM_FLAGS_DOORBELL |
+ KFD_IOC_ALLOC_MEM_FLAGS_MMIO_REMAP))) {
pr_err("%s: Invalid BO type %#x\n", __func__, alloc_flag);
return -ENOMEM;
}
@@ -172,8 +169,10 @@ static int amdgpu_amdkfd_reserve_mem_limit(struct amdgpu_device *adev,
kfd_mem_limit.max_system_mem_limit && !no_system_mem_limit) ||
(kfd_mem_limit.ttm_mem_used + ttm_mem_needed >
kfd_mem_limit.max_ttm_mem_limit) ||
- (adev->kfd.vram_used + vram_needed >
- adev->gmc.real_vram_size - reserved_for_pt)) {
+ (adev && adev->kfd.vram_used + vram_needed >
+ adev->gmc.real_vram_size -
+ atomic64_read(&adev->vram_pin_size) -
+ reserved_for_pt)) {
ret = -ENOMEM;
goto release;
}
@@ -181,7 +180,12 @@ static int amdgpu_amdkfd_reserve_mem_limit(struct amdgpu_device *adev,
/* Update memory accounting by decreasing available system
* memory, TTM memory and GPU memory as computed above
*/
- adev->kfd.vram_used += vram_needed;
+ WARN_ONCE(vram_needed && !adev,
+ "adev reference can't be null when vram is used");
+ if (adev) {
+ adev->kfd.vram_used += vram_needed;
+ adev->kfd.vram_used_aligned += ALIGN(vram_needed, VRAM_AVAILABLITY_ALIGN);
+ }
kfd_mem_limit.system_mem_used += system_mem_needed;
kfd_mem_limit.ttm_mem_used += ttm_mem_needed;
@@ -190,36 +194,30 @@ release:
return ret;
}
-static void unreserve_mem_limit(struct amdgpu_device *adev,
+void amdgpu_amdkfd_unreserve_mem_limit(struct amdgpu_device *adev,
uint64_t size, u32 alloc_flag)
{
- size_t acc_size;
-
- acc_size = amdgpu_amdkfd_acc_size(size);
-
spin_lock(&kfd_mem_limit.mem_limit_lock);
if (alloc_flag & KFD_IOC_ALLOC_MEM_FLAGS_GTT) {
- kfd_mem_limit.system_mem_used -= (acc_size + size);
- kfd_mem_limit.ttm_mem_used -= (acc_size + size);
+ kfd_mem_limit.system_mem_used -= size;
+ kfd_mem_limit.ttm_mem_used -= size;
} else if (alloc_flag & KFD_IOC_ALLOC_MEM_FLAGS_VRAM) {
- kfd_mem_limit.system_mem_used -= acc_size;
- kfd_mem_limit.ttm_mem_used -= acc_size;
- adev->kfd.vram_used -= size;
+ WARN_ONCE(!adev,
+ "adev reference can't be null when alloc mem flags vram is set");
+ if (adev) {
+ adev->kfd.vram_used -= size;
+ adev->kfd.vram_used_aligned -= ALIGN(size, VRAM_AVAILABLITY_ALIGN);
+ }
} else if (alloc_flag & KFD_IOC_ALLOC_MEM_FLAGS_USERPTR) {
- kfd_mem_limit.system_mem_used -= (acc_size + size);
- kfd_mem_limit.ttm_mem_used -= acc_size;
- } else if (alloc_flag &
- (KFD_IOC_ALLOC_MEM_FLAGS_DOORBELL |
- KFD_IOC_ALLOC_MEM_FLAGS_MMIO_REMAP)) {
- kfd_mem_limit.system_mem_used -= acc_size;
- kfd_mem_limit.ttm_mem_used -= acc_size;
- } else {
+ kfd_mem_limit.system_mem_used -= size;
+ } else if (!(alloc_flag &
+ (KFD_IOC_ALLOC_MEM_FLAGS_DOORBELL |
+ KFD_IOC_ALLOC_MEM_FLAGS_MMIO_REMAP))) {
pr_err("%s: Invalid BO type %#x\n", __func__, alloc_flag);
goto release;
}
-
- WARN_ONCE(adev->kfd.vram_used < 0,
+ WARN_ONCE(adev && adev->kfd.vram_used < 0,
"KFD VRAM memory accounting unbalanced");
WARN_ONCE(kfd_mem_limit.ttm_mem_used < 0,
"KFD TTM memory accounting unbalanced");
@@ -236,11 +234,47 @@ void amdgpu_amdkfd_release_notify(struct amdgpu_bo *bo)
u32 alloc_flags = bo->kfd_bo->alloc_flags;
u64 size = amdgpu_bo_size(bo);
- unreserve_mem_limit(adev, size, alloc_flags);
+ amdgpu_amdkfd_unreserve_mem_limit(adev, size, alloc_flags);
kfree(bo->kfd_bo);
}
+/**
+ * @create_dmamap_sg_bo: Creates a amdgpu_bo object to reflect information
+ * about USERPTR or DOOREBELL or MMIO BO.
+ * @adev: Device for which dmamap BO is being created
+ * @mem: BO of peer device that is being DMA mapped. Provides parameters
+ * in building the dmamap BO
+ * @bo_out: Output parameter updated with handle of dmamap BO
+ */
+static int
+create_dmamap_sg_bo(struct amdgpu_device *adev,
+ struct kgd_mem *mem, struct amdgpu_bo **bo_out)
+{
+ struct drm_gem_object *gem_obj;
+ int ret, align;
+
+ ret = amdgpu_bo_reserve(mem->bo, false);
+ if (ret)
+ return ret;
+
+ align = 1;
+ ret = amdgpu_gem_object_create(adev, mem->bo->tbo.base.size, align,
+ AMDGPU_GEM_DOMAIN_CPU, AMDGPU_GEM_CREATE_PREEMPTIBLE,
+ ttm_bo_type_sg, mem->bo->tbo.base.resv, &gem_obj);
+
+ amdgpu_bo_unreserve(mem->bo);
+
+ if (ret) {
+ pr_err("Error in creating DMA mappable SG BO on domain: %d\n", ret);
+ return -EINVAL;
+ }
+
+ *bo_out = gem_to_amdgpu_bo(gem_obj);
+ (*bo_out)->parent = amdgpu_bo_ref(mem->bo);
+ return ret;
+}
+
/* amdgpu_amdkfd_remove_eviction_fence - Removes eviction fence from BO's
* reservation object.
*
@@ -350,22 +384,8 @@ static int vm_validate_pt_pd_bos(struct amdgpu_vm *vm)
return ret;
}
- ret = amdgpu_amdkfd_validate_vm_bo(NULL, pd);
- if (ret) {
- pr_err("failed to validate PD\n");
- return ret;
- }
-
vm->pd_phys_addr = amdgpu_gmc_pd_addr(vm->root.bo);
- if (vm->use_cpu_for_update) {
- ret = amdgpu_bo_kmap(pd, NULL);
- if (ret) {
- pr_err("failed to kmap PD, ret=%d\n", ret);
- return ret;
- }
- }
-
return 0;
}
@@ -399,45 +419,42 @@ static uint64_t get_pte_flags(struct amdgpu_device *adev, struct kgd_mem *mem)
switch (adev->asic_type) {
case CHIP_ARCTURUS:
- if (mem->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_VRAM) {
- if (bo_adev == adev)
- mapping_flags |= coherent ?
- AMDGPU_VM_MTYPE_CC : AMDGPU_VM_MTYPE_RW;
- else
- mapping_flags |= coherent ?
- AMDGPU_VM_MTYPE_UC : AMDGPU_VM_MTYPE_NC;
- } else {
- mapping_flags |= coherent ?
- AMDGPU_VM_MTYPE_UC : AMDGPU_VM_MTYPE_NC;
- }
- break;
case CHIP_ALDEBARAN:
- if (coherent && uncached) {
- if (adev->gmc.xgmi.connected_to_cpu ||
- !(mem->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_VRAM))
- snoop = true;
- mapping_flags |= AMDGPU_VM_MTYPE_UC;
- } else if (mem->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_VRAM) {
+ if (mem->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_VRAM) {
if (bo_adev == adev) {
- mapping_flags |= coherent ?
- AMDGPU_VM_MTYPE_CC : AMDGPU_VM_MTYPE_RW;
- if (adev->gmc.xgmi.connected_to_cpu)
+ if (uncached)
+ mapping_flags |= AMDGPU_VM_MTYPE_UC;
+ else if (coherent)
+ mapping_flags |= AMDGPU_VM_MTYPE_CC;
+ else
+ mapping_flags |= AMDGPU_VM_MTYPE_RW;
+ if (adev->asic_type == CHIP_ALDEBARAN &&
+ adev->gmc.xgmi.connected_to_cpu)
snoop = true;
} else {
- mapping_flags |= coherent ?
- AMDGPU_VM_MTYPE_UC : AMDGPU_VM_MTYPE_NC;
+ if (uncached || coherent)
+ mapping_flags |= AMDGPU_VM_MTYPE_UC;
+ else
+ mapping_flags |= AMDGPU_VM_MTYPE_NC;
if (amdgpu_xgmi_same_hive(adev, bo_adev))
snoop = true;
}
} else {
+ if (uncached || coherent)
+ mapping_flags |= AMDGPU_VM_MTYPE_UC;
+ else
+ mapping_flags |= AMDGPU_VM_MTYPE_NC;
snoop = true;
- mapping_flags |= coherent ?
- AMDGPU_VM_MTYPE_UC : AMDGPU_VM_MTYPE_NC;
}
break;
default:
- mapping_flags |= coherent ?
- AMDGPU_VM_MTYPE_UC : AMDGPU_VM_MTYPE_NC;
+ if (uncached || coherent)
+ mapping_flags |= AMDGPU_VM_MTYPE_UC;
+ else
+ mapping_flags |= AMDGPU_VM_MTYPE_NC;
+
+ if (!(mem->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_VRAM))
+ snoop = true;
}
pte_flags = amdgpu_gem_va_map_flags(adev, mapping_flags);
@@ -446,6 +463,38 @@ static uint64_t get_pte_flags(struct amdgpu_device *adev, struct kgd_mem *mem)
return pte_flags;
}
+/**
+ * create_sg_table() - Create an sg_table for a contiguous DMA addr range
+ * @addr: The starting address to point to
+ * @size: Size of memory area in bytes being pointed to
+ *
+ * Allocates an instance of sg_table and initializes it to point to memory
+ * area specified by input parameters. The address used to build is assumed
+ * to be DMA mapped, if needed.
+ *
+ * DOORBELL or MMIO BOs use only one scatterlist node in their sg_table
+ * because they are physically contiguous.
+ *
+ * Return: Initialized instance of SG Table or NULL
+ */
+static struct sg_table *create_sg_table(uint64_t addr, uint32_t size)
+{
+ struct sg_table *sg = kmalloc(sizeof(*sg), GFP_KERNEL);
+
+ if (!sg)
+ return NULL;
+ if (sg_alloc_table(sg, 1, GFP_KERNEL)) {
+ kfree(sg);
+ return NULL;
+ }
+ sg_dma_address(sg->sgl) = addr;
+ sg->sgl->length = size;
+#ifdef CONFIG_NEED_SG_DMA_LENGTH
+ sg->sgl->dma_length = size;
+#endif
+ return sg;
+}
+
static int
kfd_mem_dmamap_userptr(struct kgd_mem *mem,
struct kfd_mem_attachment *attachment)
@@ -510,6 +559,87 @@ kfd_mem_dmamap_dmabuf(struct kfd_mem_attachment *attachment)
return ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
}
+/**
+ * kfd_mem_dmamap_sg_bo() - Create DMA mapped sg_table to access DOORBELL or MMIO BO
+ * @mem: SG BO of the DOORBELL or MMIO resource on the owning device
+ * @attachment: Virtual address attachment of the BO on accessing device
+ *
+ * An access request from the device that owns DOORBELL does not require DMA mapping.
+ * This is because the request doesn't go through PCIe root complex i.e. it instead
+ * loops back. The need to DMA map arises only when accessing peer device's DOORBELL
+ *
+ * In contrast, all access requests for MMIO need to be DMA mapped without regard to
+ * device ownership. This is because access requests for MMIO go through PCIe root
+ * complex.
+ *
+ * This is accomplished in two steps:
+ * - Obtain DMA mapped address of DOORBELL or MMIO memory that could be used
+ * in updating requesting device's page table
+ * - Signal TTM to mark memory pointed to by requesting device's BO as GPU
+ * accessible. This allows an update of requesting device's page table
+ * with entries associated with DOOREBELL or MMIO memory
+ *
+ * This method is invoked in the following contexts:
+ * - Mapping of DOORBELL or MMIO BO of same or peer device
+ * - Validating an evicted DOOREBELL or MMIO BO on device seeking access
+ *
+ * Return: ZERO if successful, NON-ZERO otherwise
+ */
+static int
+kfd_mem_dmamap_sg_bo(struct kgd_mem *mem,
+ struct kfd_mem_attachment *attachment)
+{
+ struct ttm_operation_ctx ctx = {.interruptible = true};
+ struct amdgpu_bo *bo = attachment->bo_va->base.bo;
+ struct amdgpu_device *adev = attachment->adev;
+ struct ttm_tt *ttm = bo->tbo.ttm;
+ enum dma_data_direction dir;
+ dma_addr_t dma_addr;
+ bool mmio;
+ int ret;
+
+ /* Expect SG Table of dmapmap BO to be NULL */
+ mmio = (mem->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_MMIO_REMAP);
+ if (unlikely(ttm->sg)) {
+ pr_err("SG Table of %d BO for peer device is UNEXPECTEDLY NON-NULL", mmio);
+ return -EINVAL;
+ }
+
+ dir = mem->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_WRITABLE ?
+ DMA_BIDIRECTIONAL : DMA_TO_DEVICE;
+ dma_addr = mem->bo->tbo.sg->sgl->dma_address;
+ pr_debug("%d BO size: %d\n", mmio, mem->bo->tbo.sg->sgl->length);
+ pr_debug("%d BO address before DMA mapping: %llx\n", mmio, dma_addr);
+ dma_addr = dma_map_resource(adev->dev, dma_addr,
+ mem->bo->tbo.sg->sgl->length, dir, DMA_ATTR_SKIP_CPU_SYNC);
+ ret = dma_mapping_error(adev->dev, dma_addr);
+ if (unlikely(ret))
+ return ret;
+ pr_debug("%d BO address after DMA mapping: %llx\n", mmio, dma_addr);
+
+ ttm->sg = create_sg_table(dma_addr, mem->bo->tbo.sg->sgl->length);
+ if (unlikely(!ttm->sg)) {
+ ret = -ENOMEM;
+ goto unmap_sg;
+ }
+
+ amdgpu_bo_placement_from_domain(bo, AMDGPU_GEM_DOMAIN_GTT);
+ ret = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
+ if (unlikely(ret))
+ goto free_sg;
+
+ return ret;
+
+free_sg:
+ sg_free_table(ttm->sg);
+ kfree(ttm->sg);
+ ttm->sg = NULL;
+unmap_sg:
+ dma_unmap_resource(adev->dev, dma_addr, mem->bo->tbo.sg->sgl->length,
+ dir, DMA_ATTR_SKIP_CPU_SYNC);
+ return ret;
+}
+
static int
kfd_mem_dmamap_attachment(struct kgd_mem *mem,
struct kfd_mem_attachment *attachment)
@@ -521,6 +651,8 @@ kfd_mem_dmamap_attachment(struct kgd_mem *mem,
return kfd_mem_dmamap_userptr(mem, attachment);
case KFD_MEM_ATT_DMABUF:
return kfd_mem_dmamap_dmabuf(attachment);
+ case KFD_MEM_ATT_SG:
+ return kfd_mem_dmamap_sg_bo(mem, attachment);
default:
WARN_ON_ONCE(1);
}
@@ -561,6 +693,50 @@ kfd_mem_dmaunmap_dmabuf(struct kfd_mem_attachment *attachment)
ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
}
+/**
+ * kfd_mem_dmaunmap_sg_bo() - Free DMA mapped sg_table of DOORBELL or MMIO BO
+ * @mem: SG BO of the DOORBELL or MMIO resource on the owning device
+ * @attachment: Virtual address attachment of the BO on accessing device
+ *
+ * The method performs following steps:
+ * - Signal TTM to mark memory pointed to by BO as GPU inaccessible
+ * - Free SG Table that is used to encapsulate DMA mapped memory of
+ * peer device's DOORBELL or MMIO memory
+ *
+ * This method is invoked in the following contexts:
+ * UNMapping of DOORBELL or MMIO BO on a device having access to its memory
+ * Eviction of DOOREBELL or MMIO BO on device having access to its memory
+ *
+ * Return: void
+ */
+static void
+kfd_mem_dmaunmap_sg_bo(struct kgd_mem *mem,
+ struct kfd_mem_attachment *attachment)
+{
+ struct ttm_operation_ctx ctx = {.interruptible = true};
+ struct amdgpu_bo *bo = attachment->bo_va->base.bo;
+ struct amdgpu_device *adev = attachment->adev;
+ struct ttm_tt *ttm = bo->tbo.ttm;
+ enum dma_data_direction dir;
+
+ if (unlikely(!ttm->sg)) {
+ pr_err("SG Table of BO is UNEXPECTEDLY NULL");
+ return;
+ }
+
+ amdgpu_bo_placement_from_domain(bo, AMDGPU_GEM_DOMAIN_CPU);
+ ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
+
+ dir = mem->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_WRITABLE ?
+ DMA_BIDIRECTIONAL : DMA_TO_DEVICE;
+ dma_unmap_resource(adev->dev, ttm->sg->sgl->dma_address,
+ ttm->sg->sgl->length, dir, DMA_ATTR_SKIP_CPU_SYNC);
+ sg_free_table(ttm->sg);
+ kfree(ttm->sg);
+ ttm->sg = NULL;
+ bo->tbo.sg = NULL;
+}
+
static void
kfd_mem_dmaunmap_attachment(struct kgd_mem *mem,
struct kfd_mem_attachment *attachment)
@@ -574,39 +750,15 @@ kfd_mem_dmaunmap_attachment(struct kgd_mem *mem,
case KFD_MEM_ATT_DMABUF:
kfd_mem_dmaunmap_dmabuf(attachment);
break;
+ case KFD_MEM_ATT_SG:
+ kfd_mem_dmaunmap_sg_bo(mem, attachment);
+ break;
default:
WARN_ON_ONCE(1);
}
}
static int
-kfd_mem_attach_userptr(struct amdgpu_device *adev, struct kgd_mem *mem,
- struct amdgpu_bo **bo)
-{
- unsigned long bo_size = mem->bo->tbo.base.size;
- struct drm_gem_object *gobj;
- int ret;
-
- ret = amdgpu_bo_reserve(mem->bo, false);
- if (ret)
- return ret;
-
- ret = amdgpu_gem_object_create(adev, bo_size, 1,
- AMDGPU_GEM_DOMAIN_CPU,
- AMDGPU_GEM_CREATE_PREEMPTIBLE,
- ttm_bo_type_sg, mem->bo->tbo.base.resv,
- &gobj);
- amdgpu_bo_unreserve(mem->bo);
- if (ret)
- return ret;
-
- *bo = gem_to_amdgpu_bo(gobj);
- (*bo)->parent = amdgpu_bo_ref(mem->bo);
-
- return 0;
-}
-
-static int
kfd_mem_attach_dmabuf(struct amdgpu_device *adev, struct kgd_mem *mem,
struct amdgpu_bo **bo)
{
@@ -630,7 +782,6 @@ kfd_mem_attach_dmabuf(struct amdgpu_device *adev, struct kgd_mem *mem,
*bo = gem_to_amdgpu_bo(gobj);
(*bo)->flags |= AMDGPU_GEM_CREATE_PREEMPTIBLE;
- (*bo)->parent = amdgpu_bo_ref(mem->bo);
return 0;
}
@@ -656,6 +807,7 @@ static int kfd_mem_attach(struct amdgpu_device *adev, struct kgd_mem *mem,
uint64_t va = mem->va;
struct kfd_mem_attachment *attachment[2] = {NULL, NULL};
struct amdgpu_bo *bo[2] = {NULL, NULL};
+ bool same_hive = false;
int i, ret;
if (!va) {
@@ -663,6 +815,24 @@ static int kfd_mem_attach(struct amdgpu_device *adev, struct kgd_mem *mem,
return -EINVAL;
}
+ /* Determine access to VRAM, MMIO and DOORBELL BOs of peer devices
+ *
+ * The access path of MMIO and DOORBELL BOs of is always over PCIe.
+ * In contrast the access path of VRAM BOs depens upon the type of
+ * link that connects the peer device. Access over PCIe is allowed
+ * if peer device has large BAR. In contrast, access over xGMI is
+ * allowed for both small and large BAR configurations of peer device
+ */
+ if ((adev != bo_adev) &&
+ ((mem->domain == AMDGPU_GEM_DOMAIN_VRAM) ||
+ (mem->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_DOORBELL) ||
+ (mem->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_MMIO_REMAP))) {
+ if (mem->domain == AMDGPU_GEM_DOMAIN_VRAM)
+ same_hive = amdgpu_xgmi_same_hive(adev, bo_adev);
+ if (!same_hive && !amdgpu_device_is_peer_accessible(bo_adev, adev))
+ return -EINVAL;
+ }
+
for (i = 0; i <= is_aql; i++) {
attachment[i] = kzalloc(sizeof(*attachment[i]), GFP_KERNEL);
if (unlikely(!attachment[i])) {
@@ -673,9 +843,9 @@ static int kfd_mem_attach(struct amdgpu_device *adev, struct kgd_mem *mem,
pr_debug("\t add VA 0x%llx - 0x%llx to vm %p\n", va,
va + bo_size, vm);
- if (adev == bo_adev ||
- (amdgpu_ttm_tt_get_usermm(mem->bo->tbo.ttm) && adev->ram_is_direct_mapped) ||
- (mem->domain == AMDGPU_GEM_DOMAIN_VRAM && amdgpu_xgmi_same_hive(adev, bo_adev))) {
+ if ((adev == bo_adev && !(mem->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_MMIO_REMAP)) ||
+ (amdgpu_ttm_tt_get_usermm(mem->bo->tbo.ttm) && adev->ram_is_direct_mapped) ||
+ same_hive) {
/* Mappings on the local GPU, or VRAM mappings in the
* local hive, or userptr mapping IOMMU direct map mode
* share the original BO
@@ -691,26 +861,30 @@ static int kfd_mem_attach(struct amdgpu_device *adev, struct kgd_mem *mem,
} else if (amdgpu_ttm_tt_get_usermm(mem->bo->tbo.ttm)) {
/* Create an SG BO to DMA-map userptrs on other GPUs */
attachment[i]->type = KFD_MEM_ATT_USERPTR;
- ret = kfd_mem_attach_userptr(adev, mem, &bo[i]);
+ ret = create_dmamap_sg_bo(adev, mem, &bo[i]);
if (ret)
goto unwind;
- } else if (mem->domain == AMDGPU_GEM_DOMAIN_GTT &&
- mem->bo->tbo.type != ttm_bo_type_sg) {
- /* GTT BOs use DMA-mapping ability of dynamic-attach
- * DMA bufs. TODO: The same should work for VRAM on
- * large-BAR GPUs.
- */
+ /* Handle DOORBELL BOs of peer devices and MMIO BOs of local and peer devices */
+ } else if (mem->bo->tbo.type == ttm_bo_type_sg) {
+ WARN_ONCE(!(mem->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_DOORBELL ||
+ mem->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_MMIO_REMAP),
+ "Handing invalid SG BO in ATTACH request");
+ attachment[i]->type = KFD_MEM_ATT_SG;
+ ret = create_dmamap_sg_bo(adev, mem, &bo[i]);
+ if (ret)
+ goto unwind;
+ /* Enable acces to GTT and VRAM BOs of peer devices */
+ } else if (mem->domain == AMDGPU_GEM_DOMAIN_GTT ||
+ mem->domain == AMDGPU_GEM_DOMAIN_VRAM) {
attachment[i]->type = KFD_MEM_ATT_DMABUF;
ret = kfd_mem_attach_dmabuf(adev, mem, &bo[i]);
if (ret)
goto unwind;
+ pr_debug("Employ DMABUF mechanism to enable peer GPU access\n");
} else {
- /* FIXME: Need to DMA-map other BO types:
- * large-BAR VRAM, doorbells, MMIO remap
- */
- attachment[i]->type = KFD_MEM_ATT_SHARED;
- bo[i] = mem->bo;
- drm_gem_object_get(&bo[i]->tbo.base);
+ WARN_ONCE(true, "Handling invalid ATTACH request");
+ ret = -EINVAL;
+ goto unwind;
}
/* Add BO to VM internal data structures */
@@ -1111,24 +1285,6 @@ update_gpuvm_pte_failed:
return ret;
}
-static struct sg_table *create_doorbell_sg(uint64_t addr, uint32_t size)
-{
- struct sg_table *sg = kmalloc(sizeof(*sg), GFP_KERNEL);
-
- if (!sg)
- return NULL;
- if (sg_alloc_table(sg, 1, GFP_KERNEL)) {
- kfree(sg);
- return NULL;
- }
- sg->sgl->dma_address = addr;
- sg->sgl->length = size;
-#ifdef CONFIG_NEED_SG_DMA_LENGTH
- sg->sgl->dma_length = size;
-#endif
- return sg;
-}
-
static int process_validate_vms(struct amdkfd_process_info *process_info)
{
struct amdgpu_vm *peer_vm;
@@ -1364,16 +1520,10 @@ void amdgpu_amdkfd_gpuvm_destroy_cb(struct amdgpu_device *adev,
struct amdgpu_vm *vm)
{
struct amdkfd_process_info *process_info = vm->process_info;
- struct amdgpu_bo *pd = vm->root.bo;
if (!process_info)
return;
- /* Release eviction fence from PD */
- amdgpu_bo_reserve(pd, false);
- amdgpu_bo_fence(pd, NULL, false);
- amdgpu_bo_unreserve(pd);
-
/* Update process info */
mutex_lock(&process_info->lock);
process_info->n_vms--;
@@ -1457,6 +1607,21 @@ out_unlock:
return ret;
}
+size_t amdgpu_amdkfd_get_available_memory(struct amdgpu_device *adev)
+{
+ uint64_t reserved_for_pt =
+ ESTIMATE_PT_SIZE(amdgpu_amdkfd_total_mem_size);
+ size_t available;
+ spin_lock(&kfd_mem_limit.mem_limit_lock);
+ available = adev->gmc.real_vram_size
+ - adev->kfd.vram_used_aligned
+ - atomic64_read(&adev->vram_pin_size)
+ - reserved_for_pt;
+ spin_unlock(&kfd_mem_limit.mem_limit_lock);
+
+ return ALIGN_DOWN(available, VRAM_AVAILABLITY_ALIGN);
+}
+
int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(
struct amdgpu_device *adev, uint64_t va, uint64_t size,
void *drm_priv, struct kgd_mem **mem,
@@ -1497,7 +1662,7 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(
bo_type = ttm_bo_type_sg;
if (size > UINT_MAX)
return -EINVAL;
- sg = create_doorbell_sg(*offset, size);
+ sg = create_sg_table(*offset, size);
if (!sg)
return -ENOMEM;
} else {
@@ -1591,7 +1756,7 @@ err_node_allow:
/* Don't unreserve system mem limit twice */
goto err_reserve_limit;
err_bo_create:
- unreserve_mem_limit(adev, size, flags);
+ amdgpu_amdkfd_unreserve_mem_limit(adev, size, flags);
err_reserve_limit:
mutex_destroy(&(*mem)->lock);
if (gobj)
@@ -1612,6 +1777,7 @@ int amdgpu_amdkfd_gpuvm_free_memory_of_gpu(
{
struct amdkfd_process_info *process_info = mem->process_info;
unsigned long bo_size = mem->bo->tbo.base.size;
+ bool use_release_notifier = (mem->bo->kfd_bo == mem);
struct kfd_mem_attachment *entry, *tmp;
struct bo_vm_reservation_context ctx;
struct ttm_validate_buffer *bo_list_entry;
@@ -1703,6 +1869,13 @@ int amdgpu_amdkfd_gpuvm_free_memory_of_gpu(
*/
drm_gem_object_put(&mem->bo->tbo.base);
+ /*
+ * For kgd_mem allocated in amdgpu_amdkfd_gpuvm_import_dmabuf(),
+ * explicitly free it here.
+ */
+ if (!use_release_notifier)
+ kfree(mem);
+
return ret;
}
@@ -1907,8 +2080,69 @@ int amdgpu_amdkfd_gpuvm_sync_memory(
return ret;
}
-int amdgpu_amdkfd_gpuvm_map_gtt_bo_to_kernel(struct amdgpu_device *adev,
- struct kgd_mem *mem, void **kptr, uint64_t *size)
+/**
+ * amdgpu_amdkfd_map_gtt_bo_to_gart - Map BO to GART and increment reference count
+ * @adev: Device to which allocated BO belongs
+ * @bo: Buffer object to be mapped
+ *
+ * Before return, bo reference count is incremented. To release the reference and unpin/
+ * unmap the BO, call amdgpu_amdkfd_free_gtt_mem.
+ */
+int amdgpu_amdkfd_map_gtt_bo_to_gart(struct amdgpu_device *adev, struct amdgpu_bo *bo)
+{
+ int ret;
+
+ ret = amdgpu_bo_reserve(bo, true);
+ if (ret) {
+ pr_err("Failed to reserve bo. ret %d\n", ret);
+ goto err_reserve_bo_failed;
+ }
+
+ ret = amdgpu_bo_pin(bo, AMDGPU_GEM_DOMAIN_GTT);
+ if (ret) {
+ pr_err("Failed to pin bo. ret %d\n", ret);
+ goto err_pin_bo_failed;
+ }
+
+ ret = amdgpu_ttm_alloc_gart(&bo->tbo);
+ if (ret) {
+ pr_err("Failed to bind bo to GART. ret %d\n", ret);
+ goto err_map_bo_gart_failed;
+ }
+
+ amdgpu_amdkfd_remove_eviction_fence(
+ bo, bo->kfd_bo->process_info->eviction_fence);
+
+ amdgpu_bo_unreserve(bo);
+
+ bo = amdgpu_bo_ref(bo);
+
+ return 0;
+
+err_map_bo_gart_failed:
+ amdgpu_bo_unpin(bo);
+err_pin_bo_failed:
+ amdgpu_bo_unreserve(bo);
+err_reserve_bo_failed:
+
+ return ret;
+}
+
+/** amdgpu_amdkfd_gpuvm_map_gtt_bo_to_kernel() - Map a GTT BO for kernel CPU access
+ *
+ * @mem: Buffer object to be mapped for CPU access
+ * @kptr[out]: pointer in kernel CPU address space
+ * @size[out]: size of the buffer
+ *
+ * Pins the BO and maps it for kernel CPU access. The eviction fence is removed
+ * from the BO, since pinned BOs cannot be evicted. The bo must remain on the
+ * validate_list, so the GPU mapping can be restored after a page table was
+ * evicted.
+ *
+ * Return: 0 on success, error code on failure
+ */
+int amdgpu_amdkfd_gpuvm_map_gtt_bo_to_kernel(struct kgd_mem *mem,
+ void **kptr, uint64_t *size)
{
int ret;
struct amdgpu_bo *bo = mem->bo;
@@ -1959,8 +2193,15 @@ bo_reserve_failed:
return ret;
}
-void amdgpu_amdkfd_gpuvm_unmap_gtt_bo_from_kernel(struct amdgpu_device *adev,
- struct kgd_mem *mem)
+/** amdgpu_amdkfd_gpuvm_map_gtt_bo_to_kernel() - Unmap a GTT BO for kernel CPU access
+ *
+ * @mem: Buffer object to be unmapped for CPU access
+ *
+ * Removes the kernel CPU mapping and unpins the BO. It does not restore the
+ * eviction fence, so this function should only be used for cleanup before the
+ * BO is destroyed.
+ */
+void amdgpu_amdkfd_gpuvm_unmap_gtt_bo_from_kernel(struct kgd_mem *mem)
{
struct amdgpu_bo *bo = mem->bo;
@@ -2072,7 +2313,7 @@ int amdgpu_amdkfd_evict_userptr(struct kgd_mem *mem,
evicted_bos = atomic_inc_return(&process_info->evicted_bos);
if (evicted_bos == 1) {
/* First eviction, stop the queues */
- r = kgd2kfd_quiesce_mm(mm);
+ r = kgd2kfd_quiesce_mm(mm, KFD_QUEUE_EVICTION_TRIGGER_USERPTR);
if (r)
pr_err("Failed to quiesce KFD\n");
schedule_delayed_work(&process_info->restore_userptr_work,
@@ -2346,13 +2587,16 @@ static void amdgpu_amdkfd_restore_userptr_worker(struct work_struct *work)
unlock_out:
mutex_unlock(&process_info->lock);
- mmput(mm);
- put_task_struct(usertask);
/* If validation failed, reschedule another attempt */
- if (evicted_bos)
+ if (evicted_bos) {
schedule_delayed_work(&process_info->restore_userptr_work,
msecs_to_jiffies(AMDGPU_USERPTR_RESTORE_DELAY_MS));
+
+ kfd_smi_event_queue_restore_rescheduled(mm);
+ }
+ mmput(mm);
+ put_task_struct(usertask);
}
/** amdgpu_amdkfd_gpuvm_restore_process_bos - Restore all BOs for the given
@@ -2654,3 +2898,22 @@ bool amdgpu_amdkfd_bo_mapped_to_dev(struct amdgpu_device *adev, struct kgd_mem *
}
return false;
}
+
+#if defined(CONFIG_DEBUG_FS)
+
+int kfd_debugfs_kfd_mem_limits(struct seq_file *m, void *data)
+{
+
+ spin_lock(&kfd_mem_limit.mem_limit_lock);
+ seq_printf(m, "System mem used %lldM out of %lluM\n",
+ (kfd_mem_limit.system_mem_used >> 20),
+ (kfd_mem_limit.max_system_mem_limit >> 20));
+ seq_printf(m, "TTM mem used %lldM out of %lluM\n",
+ (kfd_mem_limit.ttm_mem_used >> 20),
+ (kfd_mem_limit.max_ttm_mem_limit >> 20));
+ spin_unlock(&kfd_mem_limit.mem_limit_lock);
+
+ return 0;
+}
+
+#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.c
index fd8f3731758e..b81b77a9efa6 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.c
@@ -314,7 +314,7 @@ amdgpu_atomfirmware_get_vram_info(struct amdgpu_device *adev,
mem_channel_number = vram_info->v30.channel_num;
mem_channel_width = vram_info->v30.channel_width;
if (vram_width)
- *vram_width = mem_channel_number * mem_channel_width;
+ *vram_width = mem_channel_number * (1 << mem_channel_width);
break;
default:
return -EINVAL;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c
index 714178f1b6c6..2168163aad2d 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c
@@ -40,7 +40,7 @@ static void amdgpu_bo_list_free_rcu(struct rcu_head *rcu)
{
struct amdgpu_bo_list *list = container_of(rcu, struct amdgpu_bo_list,
rhead);
-
+ mutex_destroy(&list->bo_list_mutex);
kvfree(list);
}
@@ -136,6 +136,7 @@ int amdgpu_bo_list_create(struct amdgpu_device *adev, struct drm_file *filp,
trace_amdgpu_cs_bo_status(list->num_entries, total_size);
+ mutex_init(&list->bo_list_mutex);
*result = list;
return 0;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.h
index 529d52a204cf..9caea1688fc3 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.h
@@ -47,6 +47,10 @@ struct amdgpu_bo_list {
struct amdgpu_bo *oa_obj;
unsigned first_userptr;
unsigned num_entries;
+
+ /* Protect access during command submission.
+ */
+ struct mutex bo_list_mutex;
};
int amdgpu_bo_list_get(struct amdgpu_fpriv *fpriv, int id,
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
index b28af04b0c3e..b7bae833c804 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
@@ -519,6 +519,8 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p,
return r;
}
+ mutex_lock(&p->bo_list->bo_list_mutex);
+
/* One for TTM and one for the CS job */
amdgpu_bo_list_for_each_entry(e, p->bo_list)
e->tv.num_shared = 2;
@@ -651,6 +653,7 @@ out_free_user_pages:
kvfree(e->user_pages);
e->user_pages = NULL;
}
+ mutex_unlock(&p->bo_list->bo_list_mutex);
}
return r;
}
@@ -690,9 +693,11 @@ static void amdgpu_cs_parser_fini(struct amdgpu_cs_parser *parser, int error,
{
unsigned i;
- if (error && backoff)
+ if (error && backoff) {
ttm_eu_backoff_reservation(&parser->ticket,
&parser->validated);
+ mutex_unlock(&parser->bo_list->bo_list_mutex);
+ }
for (i = 0; i < parser->num_post_deps; i++) {
drm_syncobj_put(parser->post_deps[i].syncobj);
@@ -1278,6 +1283,7 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p,
ttm_eu_fence_buffer_objects(&p->ticket, &p->validated, p->fence);
mutex_unlock(&p->adev->notifier_lock);
+ mutex_unlock(&p->bo_list->bo_list_mutex);
return 0;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
index 7dc92ef36b2b..8ee4e8491f39 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
@@ -110,7 +110,7 @@ static int amdgpu_ctx_priority_permit(struct drm_file *filp,
return -EACCES;
}
-static enum amdgpu_gfx_pipe_priority amdgpu_ctx_prio_to_compute_prio(int32_t prio)
+static enum amdgpu_gfx_pipe_priority amdgpu_ctx_prio_to_gfx_pipe_prio(int32_t prio)
{
switch (prio) {
case AMDGPU_CTX_PRIORITY_HIGH:
@@ -143,8 +143,9 @@ static unsigned int amdgpu_ctx_get_hw_prio(struct amdgpu_ctx *ctx, u32 hw_ip)
ctx->init_priority : ctx->override_priority;
switch (hw_ip) {
+ case AMDGPU_HW_IP_GFX:
case AMDGPU_HW_IP_COMPUTE:
- hw_prio = amdgpu_ctx_prio_to_compute_prio(ctx_prio);
+ hw_prio = amdgpu_ctx_prio_to_gfx_pipe_prio(ctx_prio);
break;
case AMDGPU_HW_IP_VCE:
case AMDGPU_HW_IP_VCN_ENC:
@@ -271,32 +272,6 @@ static ktime_t amdgpu_ctx_fini_entity(struct amdgpu_ctx_entity *entity)
return res;
}
-static int amdgpu_ctx_init(struct amdgpu_ctx_mgr *mgr, int32_t priority,
- struct drm_file *filp, struct amdgpu_ctx *ctx)
-{
- int r;
-
- r = amdgpu_ctx_priority_permit(filp, priority);
- if (r)
- return r;
-
- memset(ctx, 0, sizeof(*ctx));
-
- kref_init(&ctx->refcount);
- ctx->mgr = mgr;
- spin_lock_init(&ctx->ring_lock);
- mutex_init(&ctx->lock);
-
- ctx->reset_counter = atomic_read(&mgr->adev->gpu_reset_counter);
- ctx->reset_counter_query = ctx->reset_counter;
- ctx->vram_lost_counter = atomic_read(&mgr->adev->vram_lost_counter);
- ctx->init_priority = priority;
- ctx->override_priority = AMDGPU_CTX_PRIORITY_UNSET;
- ctx->stable_pstate = AMDGPU_CTX_STABLE_PSTATE_NONE;
-
- return 0;
-}
-
static int amdgpu_ctx_get_stable_pstate(struct amdgpu_ctx *ctx,
u32 *stable_pstate)
{
@@ -325,6 +300,38 @@ static int amdgpu_ctx_get_stable_pstate(struct amdgpu_ctx *ctx,
return 0;
}
+static int amdgpu_ctx_init(struct amdgpu_ctx_mgr *mgr, int32_t priority,
+ struct drm_file *filp, struct amdgpu_ctx *ctx)
+{
+ u32 current_stable_pstate;
+ int r;
+
+ r = amdgpu_ctx_priority_permit(filp, priority);
+ if (r)
+ return r;
+
+ memset(ctx, 0, sizeof(*ctx));
+
+ kref_init(&ctx->refcount);
+ ctx->mgr = mgr;
+ spin_lock_init(&ctx->ring_lock);
+ mutex_init(&ctx->lock);
+
+ ctx->reset_counter = atomic_read(&mgr->adev->gpu_reset_counter);
+ ctx->reset_counter_query = ctx->reset_counter;
+ ctx->vram_lost_counter = atomic_read(&mgr->adev->vram_lost_counter);
+ ctx->init_priority = priority;
+ ctx->override_priority = AMDGPU_CTX_PRIORITY_UNSET;
+
+ r = amdgpu_ctx_get_stable_pstate(ctx, &current_stable_pstate);
+ if (r)
+ return r;
+
+ ctx->stable_pstate = current_stable_pstate;
+
+ return 0;
+}
+
static int amdgpu_ctx_set_stable_pstate(struct amdgpu_ctx *ctx,
u32 stable_pstate)
{
@@ -396,7 +403,7 @@ static void amdgpu_ctx_fini(struct kref *ref)
}
if (drm_dev_enter(&adev->ddev, &idx)) {
- amdgpu_ctx_set_stable_pstate(ctx, AMDGPU_CTX_STABLE_PSTATE_NONE);
+ amdgpu_ctx_set_stable_pstate(ctx, ctx->stable_pstate);
drm_dev_exit(idx);
}
@@ -779,7 +786,7 @@ static void amdgpu_ctx_set_entity_priority(struct amdgpu_ctx *ctx,
amdgpu_ctx_to_drm_sched_prio(priority));
/* set hw priority */
- if (hw_ip == AMDGPU_HW_IP_COMPUTE) {
+ if (hw_ip == AMDGPU_HW_IP_COMPUTE || hw_ip == AMDGPU_HW_IP_GFX) {
hw_prio = amdgpu_ctx_get_hw_prio(ctx, hw_ip);
hw_prio = array_index_nospec(hw_prio, AMDGPU_RING_PRIO_MAX);
scheds = adev->gpu_sched[hw_ip][hw_prio].sched;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
index eedb12f6b8a3..cb00c7d6f50b 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
@@ -383,12 +383,8 @@ static ssize_t amdgpu_debugfs_regs_pcie_read(struct file *f, char __user *buf,
value = RREG32_PCIE(*pos);
r = put_user(value, (uint32_t *)buf);
- if (r) {
- pm_runtime_mark_last_busy(adev_to_drm(adev)->dev);
- pm_runtime_put_autosuspend(adev_to_drm(adev)->dev);
- amdgpu_virt_disable_access_debugfs(adev);
- return r;
- }
+ if (r)
+ goto out;
result += 4;
buf += 4;
@@ -396,11 +392,12 @@ static ssize_t amdgpu_debugfs_regs_pcie_read(struct file *f, char __user *buf,
size -= 4;
}
+ r = result;
+out:
pm_runtime_mark_last_busy(adev_to_drm(adev)->dev);
pm_runtime_put_autosuspend(adev_to_drm(adev)->dev);
-
amdgpu_virt_disable_access_debugfs(adev);
- return result;
+ return r;
}
/**
@@ -441,12 +438,8 @@ static ssize_t amdgpu_debugfs_regs_pcie_write(struct file *f, const char __user
uint32_t value;
r = get_user(value, (uint32_t *)buf);
- if (r) {
- pm_runtime_mark_last_busy(adev_to_drm(adev)->dev);
- pm_runtime_put_autosuspend(adev_to_drm(adev)->dev);
- amdgpu_virt_disable_access_debugfs(adev);
- return r;
- }
+ if (r)
+ goto out;
WREG32_PCIE(*pos, value);
@@ -456,11 +449,12 @@ static ssize_t amdgpu_debugfs_regs_pcie_write(struct file *f, const char __user
size -= 4;
}
+ r = result;
+out:
pm_runtime_mark_last_busy(adev_to_drm(adev)->dev);
pm_runtime_put_autosuspend(adev_to_drm(adev)->dev);
-
amdgpu_virt_disable_access_debugfs(adev);
- return result;
+ return r;
}
/**
@@ -502,12 +496,8 @@ static ssize_t amdgpu_debugfs_regs_didt_read(struct file *f, char __user *buf,
value = RREG32_DIDT(*pos >> 2);
r = put_user(value, (uint32_t *)buf);
- if (r) {
- pm_runtime_mark_last_busy(adev_to_drm(adev)->dev);
- pm_runtime_put_autosuspend(adev_to_drm(adev)->dev);
- amdgpu_virt_disable_access_debugfs(adev);
- return r;
- }
+ if (r)
+ goto out;
result += 4;
buf += 4;
@@ -515,11 +505,12 @@ static ssize_t amdgpu_debugfs_regs_didt_read(struct file *f, char __user *buf,
size -= 4;
}
+ r = result;
+out:
pm_runtime_mark_last_busy(adev_to_drm(adev)->dev);
pm_runtime_put_autosuspend(adev_to_drm(adev)->dev);
-
amdgpu_virt_disable_access_debugfs(adev);
- return result;
+ return r;
}
/**
@@ -560,12 +551,8 @@ static ssize_t amdgpu_debugfs_regs_didt_write(struct file *f, const char __user
uint32_t value;
r = get_user(value, (uint32_t *)buf);
- if (r) {
- pm_runtime_mark_last_busy(adev_to_drm(adev)->dev);
- pm_runtime_put_autosuspend(adev_to_drm(adev)->dev);
- amdgpu_virt_disable_access_debugfs(adev);
- return r;
- }
+ if (r)
+ goto out;
WREG32_DIDT(*pos >> 2, value);
@@ -575,11 +562,12 @@ static ssize_t amdgpu_debugfs_regs_didt_write(struct file *f, const char __user
size -= 4;
}
+ r = result;
+out:
pm_runtime_mark_last_busy(adev_to_drm(adev)->dev);
pm_runtime_put_autosuspend(adev_to_drm(adev)->dev);
-
amdgpu_virt_disable_access_debugfs(adev);
- return result;
+ return r;
}
/**
@@ -621,12 +609,8 @@ static ssize_t amdgpu_debugfs_regs_smc_read(struct file *f, char __user *buf,
value = RREG32_SMC(*pos);
r = put_user(value, (uint32_t *)buf);
- if (r) {
- pm_runtime_mark_last_busy(adev_to_drm(adev)->dev);
- pm_runtime_put_autosuspend(adev_to_drm(adev)->dev);
- amdgpu_virt_disable_access_debugfs(adev);
- return r;
- }
+ if (r)
+ goto out;
result += 4;
buf += 4;
@@ -634,11 +618,12 @@ static ssize_t amdgpu_debugfs_regs_smc_read(struct file *f, char __user *buf,
size -= 4;
}
+ r = result;
+out:
pm_runtime_mark_last_busy(adev_to_drm(adev)->dev);
pm_runtime_put_autosuspend(adev_to_drm(adev)->dev);
-
amdgpu_virt_disable_access_debugfs(adev);
- return result;
+ return r;
}
/**
@@ -679,12 +664,8 @@ static ssize_t amdgpu_debugfs_regs_smc_write(struct file *f, const char __user *
uint32_t value;
r = get_user(value, (uint32_t *)buf);
- if (r) {
- pm_runtime_mark_last_busy(adev_to_drm(adev)->dev);
- pm_runtime_put_autosuspend(adev_to_drm(adev)->dev);
- amdgpu_virt_disable_access_debugfs(adev);
- return r;
- }
+ if (r)
+ goto out;
WREG32_SMC(*pos, value);
@@ -694,11 +675,12 @@ static ssize_t amdgpu_debugfs_regs_smc_write(struct file *f, const char __user *
size -= 4;
}
+ r = result;
+out:
pm_runtime_mark_last_busy(adev_to_drm(adev)->dev);
pm_runtime_put_autosuspend(adev_to_drm(adev)->dev);
-
amdgpu_virt_disable_access_debugfs(adev);
- return result;
+ return r;
}
/**
@@ -1090,11 +1072,8 @@ static ssize_t amdgpu_debugfs_gfxoff_write(struct file *f, const char __user *bu
uint32_t value;
r = get_user(value, (uint32_t *)buf);
- if (r) {
- pm_runtime_mark_last_busy(adev_to_drm(adev)->dev);
- pm_runtime_put_autosuspend(adev_to_drm(adev)->dev);
- return r;
- }
+ if (r)
+ goto out;
amdgpu_gfx_off_ctrl(adev, value ? true : false);
@@ -1104,10 +1083,12 @@ static ssize_t amdgpu_debugfs_gfxoff_write(struct file *f, const char __user *bu
size -= 4;
}
+ r = result;
+out:
pm_runtime_mark_last_busy(adev_to_drm(adev)->dev);
pm_runtime_put_autosuspend(adev_to_drm(adev)->dev);
- return result;
+ return r;
}
@@ -1136,21 +1117,52 @@ static ssize_t amdgpu_debugfs_gfxoff_read(struct file *f, char __user *buf,
}
while (size) {
- uint32_t value;
+ u32 value = adev->gfx.gfx_off_state;
+
+ r = put_user(value, (u32 *)buf);
+ if (r)
+ goto out;
+
+ result += 4;
+ buf += 4;
+ *pos += 4;
+ size -= 4;
+ }
+
+ r = result;
+out:
+ pm_runtime_mark_last_busy(adev_to_drm(adev)->dev);
+ pm_runtime_put_autosuspend(adev_to_drm(adev)->dev);
+
+ return r;
+}
+
+static ssize_t amdgpu_debugfs_gfxoff_status_read(struct file *f, char __user *buf,
+ size_t size, loff_t *pos)
+{
+ struct amdgpu_device *adev = file_inode(f)->i_private;
+ ssize_t result = 0;
+ int r;
+
+ if (size & 0x3 || *pos & 0x3)
+ return -EINVAL;
+
+ r = pm_runtime_get_sync(adev_to_drm(adev)->dev);
+ if (r < 0) {
+ pm_runtime_put_autosuspend(adev_to_drm(adev)->dev);
+ return r;
+ }
+
+ while (size) {
+ u32 value;
r = amdgpu_get_gfx_off_status(adev, &value);
- if (r) {
- pm_runtime_mark_last_busy(adev_to_drm(adev)->dev);
- pm_runtime_put_autosuspend(adev_to_drm(adev)->dev);
- return r;
- }
+ if (r)
+ goto out;
- r = put_user(value, (uint32_t *)buf);
- if (r) {
- pm_runtime_mark_last_busy(adev_to_drm(adev)->dev);
- pm_runtime_put_autosuspend(adev_to_drm(adev)->dev);
- return r;
- }
+ r = put_user(value, (u32 *)buf);
+ if (r)
+ goto out;
result += 4;
buf += 4;
@@ -1158,10 +1170,12 @@ static ssize_t amdgpu_debugfs_gfxoff_read(struct file *f, char __user *buf,
size -= 4;
}
+ r = result;
+out:
pm_runtime_mark_last_busy(adev_to_drm(adev)->dev);
pm_runtime_put_autosuspend(adev_to_drm(adev)->dev);
- return result;
+ return r;
}
static const struct file_operations amdgpu_debugfs_regs2_fops = {
@@ -1229,6 +1243,12 @@ static const struct file_operations amdgpu_debugfs_gfxoff_fops = {
.llseek = default_llseek
};
+static const struct file_operations amdgpu_debugfs_gfxoff_status_fops = {
+ .owner = THIS_MODULE,
+ .read = amdgpu_debugfs_gfxoff_status_read,
+ .llseek = default_llseek
+};
+
static const struct file_operations *debugfs_regs[] = {
&amdgpu_debugfs_regs_fops,
&amdgpu_debugfs_regs2_fops,
@@ -1240,6 +1260,7 @@ static const struct file_operations *debugfs_regs[] = {
&amdgpu_debugfs_wave_fops,
&amdgpu_debugfs_gpr_fops,
&amdgpu_debugfs_gfxoff_fops,
+ &amdgpu_debugfs_gfxoff_status_fops,
};
static const char *debugfs_regs_names[] = {
@@ -1253,6 +1274,7 @@ static const char *debugfs_regs_names[] = {
"amdgpu_wave",
"amdgpu_gpr",
"amdgpu_gfxoff",
+ "amdgpu_gfxoff_status",
};
/**
@@ -1683,7 +1705,7 @@ static ssize_t amdgpu_reset_dump_register_list_write(struct file *f,
{
struct amdgpu_device *adev = (struct amdgpu_device *)file_inode(f)->i_private;
char reg_offset[11];
- uint32_t *new, *tmp = NULL;
+ uint32_t *new = NULL, *tmp = NULL;
int ret, i = 0, len = 0;
do {
@@ -1709,17 +1731,25 @@ static ssize_t amdgpu_reset_dump_register_list_write(struct file *f,
i++;
} while (len < size);
+ new = kmalloc_array(i, sizeof(uint32_t), GFP_KERNEL);
+ if (!new) {
+ ret = -ENOMEM;
+ goto error_free;
+ }
ret = down_write_killable(&adev->reset_domain->sem);
if (ret)
goto error_free;
swap(adev->reset_dump_reg_list, tmp);
+ swap(adev->reset_dump_reg_value, new);
adev->num_regs = i;
up_write(&adev->reset_domain->sem);
ret = size;
error_free:
- kfree(tmp);
+ if (tmp != new)
+ kfree(tmp);
+ kfree(new);
return ret;
}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index 58df107e3beb..1400abee9f40 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -32,6 +32,9 @@
#include <linux/slab.h>
#include <linux/iommu.h>
#include <linux/pci.h>
+#include <linux/devcoredump.h>
+#include <generated/utsrelease.h>
+#include <linux/pci-p2pdma.h>
#include <drm/drm_atomic_helper.h>
#include <drm/drm_probe_helper.h>
@@ -1942,35 +1945,6 @@ static int amdgpu_device_parse_gpu_info_fw(struct amdgpu_device *adev)
}
switch (adev->asic_type) {
-#ifdef CONFIG_DRM_AMDGPU_SI
- case CHIP_VERDE:
- case CHIP_TAHITI:
- case CHIP_PITCAIRN:
- case CHIP_OLAND:
- case CHIP_HAINAN:
-#endif
-#ifdef CONFIG_DRM_AMDGPU_CIK
- case CHIP_BONAIRE:
- case CHIP_HAWAII:
- case CHIP_KAVERI:
- case CHIP_KABINI:
- case CHIP_MULLINS:
-#endif
- case CHIP_TOPAZ:
- case CHIP_TONGA:
- case CHIP_FIJI:
- case CHIP_POLARIS10:
- case CHIP_POLARIS11:
- case CHIP_POLARIS12:
- case CHIP_VEGAM:
- case CHIP_CARRIZO:
- case CHIP_STONEY:
- case CHIP_VEGA20:
- case CHIP_ALDEBARAN:
- case CHIP_SIENNA_CICHLID:
- case CHIP_NAVY_FLOUNDER:
- case CHIP_DIMGREY_CAVEFISH:
- case CHIP_BEIGE_GOBY:
default:
return 0;
case CHIP_VEGA10:
@@ -2482,12 +2456,14 @@ static int amdgpu_device_ip_init(struct amdgpu_device *adev)
if (!hive->reset_domain ||
!amdgpu_reset_get_reset_domain(hive->reset_domain)) {
r = -ENOENT;
+ amdgpu_put_xgmi_hive(hive);
goto init_failed;
}
/* Drop the early temporary reset domain we created for device */
amdgpu_reset_put_reset_domain(adev->reset_domain);
adev->reset_domain = hive->reset_domain;
+ amdgpu_put_xgmi_hive(hive);
}
}
@@ -3316,38 +3292,12 @@ bool amdgpu_device_asic_has_dc_support(enum amd_asic_type asic_type)
case CHIP_MULLINS:
/*
* We have systems in the wild with these ASICs that require
- * LVDS and VGA support which is not supported with DC.
+ * VGA support which is not supported with DC.
*
* Fallback to the non-DC driver here by default so as not to
* cause regressions.
*/
return amdgpu_dc > 0;
- case CHIP_HAWAII:
- case CHIP_CARRIZO:
- case CHIP_STONEY:
- case CHIP_POLARIS10:
- case CHIP_POLARIS11:
- case CHIP_POLARIS12:
- case CHIP_VEGAM:
- case CHIP_TONGA:
- case CHIP_FIJI:
- case CHIP_VEGA10:
- case CHIP_VEGA12:
- case CHIP_VEGA20:
-#if defined(CONFIG_DRM_AMD_DC_DCN)
- case CHIP_RAVEN:
- case CHIP_NAVI10:
- case CHIP_NAVI14:
- case CHIP_NAVI12:
- case CHIP_RENOIR:
- case CHIP_CYAN_SKILLFISH:
- case CHIP_SIENNA_CICHLID:
- case CHIP_NAVY_FLOUNDER:
- case CHIP_DIMGREY_CAVEFISH:
- case CHIP_BEIGE_GOBY:
- case CHIP_VANGOGH:
- case CHIP_YELLOW_CARP:
-#endif
default:
return amdgpu_dc != 0;
#else
@@ -3369,7 +3319,7 @@ bool amdgpu_device_asic_has_dc_support(enum amd_asic_type asic_type)
*/
bool amdgpu_device_has_dc_support(struct amdgpu_device *adev)
{
- if (amdgpu_sriov_vf(adev) ||
+ if (amdgpu_sriov_vf(adev) ||
adev->enable_virtual_display ||
(adev->harvest_ip_mask & AMD_HARVEST_IP_DMU_MASK))
return false;
@@ -3667,14 +3617,6 @@ int amdgpu_device_init(struct amdgpu_device *adev,
if (amdgpu_mcbp)
DRM_INFO("MCBP is enabled\n");
- if (adev->asic_type >= CHIP_NAVI10) {
- if (amdgpu_mes || amdgpu_mes_kiq)
- adev->enable_mes = true;
-
- if (amdgpu_mes_kiq)
- adev->enable_mes_kiq = true;
- }
-
/*
* Reset domain needs to be present early, before XGMI hive discovered
* (if any) and intitialized to use reset sem and in_gpu reset flag
@@ -4473,8 +4415,6 @@ static int amdgpu_device_reset_sriov(struct amdgpu_device *adev,
retry:
amdgpu_amdkfd_pre_reset(adev);
- amdgpu_amdkfd_pre_reset(adev);
-
if (from_hypervisor)
r = amdgpu_virt_request_full_gpu(adev, true);
else
@@ -4666,6 +4606,8 @@ int amdgpu_device_pre_asic_reset(struct amdgpu_device *adev,
amdgpu_virt_fini_data_exchange(adev);
}
+ amdgpu_fence_driver_isr_toggle(adev, true);
+
/* block all schedulers and reset given job's ring */
for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
struct amdgpu_ring *ring = adev->rings[i];
@@ -4681,6 +4623,8 @@ int amdgpu_device_pre_asic_reset(struct amdgpu_device *adev,
amdgpu_fence_driver_force_completion(ring);
}
+ amdgpu_fence_driver_isr_toggle(adev, false);
+
if (job && job->vm)
drm_sched_increase_karma(&job->base);
@@ -4721,20 +4665,72 @@ int amdgpu_device_pre_asic_reset(struct amdgpu_device *adev,
static int amdgpu_reset_reg_dumps(struct amdgpu_device *adev)
{
- uint32_t reg_value;
int i;
lockdep_assert_held(&adev->reset_domain->sem);
- dump_stack();
for (i = 0; i < adev->num_regs; i++) {
- reg_value = RREG32(adev->reset_dump_reg_list[i]);
- trace_amdgpu_reset_reg_dumps(adev->reset_dump_reg_list[i], reg_value);
+ adev->reset_dump_reg_value[i] = RREG32(adev->reset_dump_reg_list[i]);
+ trace_amdgpu_reset_reg_dumps(adev->reset_dump_reg_list[i],
+ adev->reset_dump_reg_value[i]);
}
return 0;
}
+#ifdef CONFIG_DEV_COREDUMP
+static ssize_t amdgpu_devcoredump_read(char *buffer, loff_t offset,
+ size_t count, void *data, size_t datalen)
+{
+ struct drm_printer p;
+ struct amdgpu_device *adev = data;
+ struct drm_print_iterator iter;
+ int i;
+
+ iter.data = buffer;
+ iter.offset = 0;
+ iter.start = offset;
+ iter.remain = count;
+
+ p = drm_coredump_printer(&iter);
+
+ drm_printf(&p, "**** AMDGPU Device Coredump ****\n");
+ drm_printf(&p, "kernel: " UTS_RELEASE "\n");
+ drm_printf(&p, "module: " KBUILD_MODNAME "\n");
+ drm_printf(&p, "time: %lld.%09ld\n", adev->reset_time.tv_sec, adev->reset_time.tv_nsec);
+ if (adev->reset_task_info.pid)
+ drm_printf(&p, "process_name: %s PID: %d\n",
+ adev->reset_task_info.process_name,
+ adev->reset_task_info.pid);
+
+ if (adev->reset_vram_lost)
+ drm_printf(&p, "VRAM is lost due to GPU reset!\n");
+ if (adev->num_regs) {
+ drm_printf(&p, "AMDGPU register dumps:\nOffset: Value:\n");
+
+ for (i = 0; i < adev->num_regs; i++)
+ drm_printf(&p, "0x%08x: 0x%08x\n",
+ adev->reset_dump_reg_list[i],
+ adev->reset_dump_reg_value[i]);
+ }
+
+ return count - iter.remain;
+}
+
+static void amdgpu_devcoredump_free(void *data)
+{
+}
+
+static void amdgpu_reset_capture_coredumpm(struct amdgpu_device *adev)
+{
+ struct drm_device *dev = adev_to_drm(adev);
+
+ ktime_get_ts64(&adev->reset_time);
+ dev_coredumpm(dev->dev, THIS_MODULE, adev, 0, GFP_KERNEL,
+ amdgpu_devcoredump_read, amdgpu_devcoredump_free);
+}
+#endif
+
int amdgpu_do_asic_reset(struct list_head *device_list_handle,
struct amdgpu_reset_context *reset_context)
{
@@ -4746,6 +4742,8 @@ int amdgpu_do_asic_reset(struct list_head *device_list_handle,
tmp_adev = list_first_entry(device_list_handle, struct amdgpu_device,
reset_list);
amdgpu_reset_reg_dumps(tmp_adev);
+
+ reset_context->reset_device_list = device_list_handle;
r = amdgpu_reset_perform_reset(tmp_adev, reset_context);
/* If reset handler not implemented, continue; otherwise return */
if (r == -ENOSYS)
@@ -4819,6 +4817,15 @@ int amdgpu_do_asic_reset(struct list_head *device_list_handle,
goto out;
vram_lost = amdgpu_device_check_vram_lost(tmp_adev);
+#ifdef CONFIG_DEV_COREDUMP
+ tmp_adev->reset_vram_lost = vram_lost;
+ memset(&tmp_adev->reset_task_info, 0,
+ sizeof(tmp_adev->reset_task_info));
+ if (reset_context->job && reset_context->job->vm)
+ tmp_adev->reset_task_info =
+ reset_context->job->vm->task_info;
+ amdgpu_reset_capture_coredumpm(tmp_adev);
+#endif
if (vram_lost) {
DRM_INFO("VRAM is lost due to GPU reset!\n");
amdgpu_inc_vram_lost(tmp_adev);
@@ -5004,16 +5011,32 @@ static void amdgpu_device_recheck_guilty_jobs(
/* clear job's guilty and depend the folowing step to decide the real one */
drm_sched_reset_karma(s_job);
- /* for the real bad job, it will be resubmitted twice, adding a dma_fence_get
- * to make sure fence is balanced */
- dma_fence_get(s_job->s_fence->parent);
drm_sched_resubmit_jobs_ext(&ring->sched, 1);
+ if (!s_job->s_fence->parent) {
+ DRM_WARN("Failed to get a HW fence for job!");
+ continue;
+ }
+
ret = dma_fence_wait_timeout(s_job->s_fence->parent, false, ring->sched.timeout);
if (ret == 0) { /* timeout */
DRM_ERROR("Found the real bad job! ring:%s, job_id:%llx\n",
ring->sched.name, s_job->id);
+
+ amdgpu_fence_driver_isr_toggle(adev, true);
+
+ /* Clear this failed job from fence array */
+ amdgpu_fence_driver_clear_job_fences(ring);
+
+ amdgpu_fence_driver_isr_toggle(adev, false);
+
+ /* Since the job won't signal and we go for
+ * another resubmit drop this parent pointer
+ */
+ dma_fence_put(s_job->s_fence->parent);
+ s_job->s_fence->parent = NULL;
+
/* set guilty */
drm_sched_increase_karma(s_job);
retry:
@@ -5042,7 +5065,6 @@ retry:
/* got the hw fence, signal finished fence */
atomic_dec(ring->sched.score);
- dma_fence_put(s_job->s_fence->parent);
dma_fence_get(&s_job->s_fence->finished);
dma_fence_signal(&s_job->s_fence->finished);
dma_fence_put(&s_job->s_fence->finished);
@@ -5055,8 +5077,29 @@ retry:
}
}
+static inline void amdgpu_device_stop_pending_resets(struct amdgpu_device *adev)
+{
+ struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
+
+#if defined(CONFIG_DEBUG_FS)
+ if (!amdgpu_sriov_vf(adev))
+ cancel_work(&adev->reset_work);
+#endif
+
+ if (adev->kfd.dev)
+ cancel_work(&adev->kfd.reset_work);
+
+ if (amdgpu_sriov_vf(adev))
+ cancel_work(&adev->virt.flr_work);
+
+ if (con && adev->ras_enabled)
+ cancel_work(&con->recovery_work);
+
+}
+
+
/**
- * amdgpu_device_gpu_recover_imp - reset the asic and recover scheduler
+ * amdgpu_device_gpu_recover - reset the asic and recover scheduler
*
* @adev: amdgpu_device pointer
* @job: which job trigger hang
@@ -5066,8 +5109,9 @@ retry:
* Returns 0 for success or an error on failure.
*/
-int amdgpu_device_gpu_recover_imp(struct amdgpu_device *adev,
- struct amdgpu_job *job)
+int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
+ struct amdgpu_job *job,
+ struct amdgpu_reset_context *reset_context)
{
struct list_head device_list, *device_list_handle = NULL;
bool job_signaled = false;
@@ -5077,9 +5121,6 @@ int amdgpu_device_gpu_recover_imp(struct amdgpu_device *adev,
bool need_emergency_restart = false;
bool audio_suspended = false;
int tmp_vram_lost_counter;
- struct amdgpu_reset_context reset_context;
-
- memset(&reset_context, 0, sizeof(reset_context));
/*
* Special case: RAS triggered and full reset isn't supported
@@ -5105,12 +5146,8 @@ int amdgpu_device_gpu_recover_imp(struct amdgpu_device *adev,
if (hive)
mutex_lock(&hive->hive_lock);
- reset_context.method = AMD_RESET_METHOD_NONE;
- reset_context.reset_req_dev = adev;
- reset_context.job = job;
- reset_context.hive = hive;
- clear_bit(AMDGPU_NEED_FULL_RESET, &reset_context.flags);
-
+ reset_context->job = job;
+ reset_context->hive = hive;
/*
* Build list of devices to reset.
* In case we are in XGMI hive mode, resort the device list
@@ -5194,8 +5231,7 @@ int amdgpu_device_gpu_recover_imp(struct amdgpu_device *adev,
*
* job->base holds a reference to parent fence
*/
- if (job && job->base.s_fence->parent &&
- dma_fence_is_signaled(job->base.s_fence->parent)) {
+ if (job && dma_fence_is_signaled(&job->hw_fence)) {
job_signaled = true;
dev_info(adev->dev, "Guilty job already signaled, skipping HW reset");
goto skip_hw_reset;
@@ -5203,13 +5239,19 @@ int amdgpu_device_gpu_recover_imp(struct amdgpu_device *adev,
retry: /* Rest of adevs pre asic reset from XGMI hive. */
list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
- r = amdgpu_device_pre_asic_reset(tmp_adev, &reset_context);
+ r = amdgpu_device_pre_asic_reset(tmp_adev, reset_context);
/*TODO Should we stop ?*/
if (r) {
dev_err(tmp_adev->dev, "GPU pre asic reset failed with err, %d for drm dev, %s ",
r, adev_to_drm(tmp_adev)->unique);
tmp_adev->asic_reset_res = r;
}
+
+ /*
+ * Drop all pending non scheduler resets. Scheduler resets
+ * were already dropped during drm_sched_stop
+ */
+ amdgpu_device_stop_pending_resets(tmp_adev);
}
tmp_vram_lost_counter = atomic_read(&((adev)->vram_lost_counter));
@@ -5224,7 +5266,7 @@ retry: /* Rest of adevs pre asic reset from XGMI hive. */
if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 2))
amdgpu_ras_resume(adev);
} else {
- r = amdgpu_do_asic_reset(device_list_handle, &reset_context);
+ r = amdgpu_do_asic_reset(device_list_handle, reset_context);
if (r && r == -EAGAIN)
goto retry;
}
@@ -5244,7 +5286,7 @@ skip_hw_reset:
if (amdgpu_gpu_recovery == 2 &&
!(tmp_vram_lost_counter < atomic_read(&adev->vram_lost_counter)))
amdgpu_device_recheck_guilty_jobs(
- tmp_adev, device_list_handle, &reset_context);
+ tmp_adev, device_list_handle, reset_context);
for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
struct amdgpu_ring *ring = tmp_adev->rings[i];
@@ -5259,6 +5301,9 @@ skip_hw_reset:
drm_sched_start(&ring->sched, !tmp_adev->asic_reset_res);
}
+ if (adev->enable_mes)
+ amdgpu_mes_self_test(tmp_adev);
+
if (!drm_drv_uses_atomic_modeset(adev_to_drm(tmp_adev)) && !job_signaled) {
drm_helper_resume_force_mode(adev_to_drm(tmp_adev));
}
@@ -5308,38 +5353,9 @@ skip_sched_resume:
if (r)
dev_info(adev->dev, "GPU reset end with ret = %d\n", r);
- return r;
-}
-
-struct amdgpu_recover_work_struct {
- struct work_struct base;
- struct amdgpu_device *adev;
- struct amdgpu_job *job;
- int ret;
-};
-
-static void amdgpu_device_queue_gpu_recover_work(struct work_struct *work)
-{
- struct amdgpu_recover_work_struct *recover_work = container_of(work, struct amdgpu_recover_work_struct, base);
-
- recover_work->ret = amdgpu_device_gpu_recover_imp(recover_work->adev, recover_work->job);
-}
-/*
- * Serialize gpu recover into reset domain single threaded wq
- */
-int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
- struct amdgpu_job *job)
-{
- struct amdgpu_recover_work_struct work = {.adev = adev, .job = job};
-
- INIT_WORK(&work.base, amdgpu_device_queue_gpu_recover_work);
-
- if (!amdgpu_reset_domain_schedule(adev->reset_domain, &work.base))
- return -EAGAIN;
- flush_work(&work.base);
-
- return work.ret;
+ atomic_set(&adev->reset_domain->reset_res, r);
+ return r;
}
/**
@@ -5490,6 +5506,37 @@ static void amdgpu_device_get_pcie_info(struct amdgpu_device *adev)
}
}
+/**
+ * amdgpu_device_is_peer_accessible - Check peer access through PCIe BAR
+ *
+ * @adev: amdgpu_device pointer
+ * @peer_adev: amdgpu_device pointer for peer device trying to access @adev
+ *
+ * Return true if @peer_adev can access (DMA) @adev through the PCIe
+ * BAR, i.e. @adev is "large BAR" and the BAR matches the DMA mask of
+ * @peer_adev.
+ */
+bool amdgpu_device_is_peer_accessible(struct amdgpu_device *adev,
+ struct amdgpu_device *peer_adev)
+{
+#ifdef CONFIG_HSA_AMD_P2P
+ uint64_t address_mask = peer_adev->dev->dma_mask ?
+ ~*peer_adev->dev->dma_mask : ~((1ULL << 32) - 1);
+ resource_size_t aper_limit =
+ adev->gmc.aper_base + adev->gmc.aper_size - 1;
+ bool p2p_access = !adev->gmc.xgmi.connected_to_cpu &&
+ !(pci_p2pdma_distance_many(adev->pdev,
+ &peer_adev->dev, 1, true) < 0);
+
+ return pcie_p2p && p2p_access && (adev->gmc.visible_vram_size &&
+ adev->gmc.real_vram_size == adev->gmc.visible_vram_size &&
+ !(adev->gmc.aper_base & address_mask ||
+ aper_limit & address_mask));
+#else
+ return false;
+#endif
+}
+
int amdgpu_device_baco_enter(struct drm_device *dev)
{
struct amdgpu_device *adev = drm_to_adev(dev);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c
index 47f0344205ed..95d34590cad1 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c
@@ -194,6 +194,7 @@ static int hw_id_map[MAX_HWIP] = {
[UMC_HWIP] = UMC_HWID,
[XGMI_HWIP] = XGMI_HWID,
[DCI_HWIP] = DCI_HWID,
+ [PCIE_HWIP] = PCIE_HWID,
};
static int amdgpu_discovery_read_binary_from_vram(struct amdgpu_device *adev, uint8_t *binary)
@@ -1435,6 +1436,11 @@ static int amdgpu_discovery_get_vcn_info(struct amdgpu_device *adev)
return -EINVAL;
}
+ /* num_vcn_inst is currently limited to AMDGPU_MAX_VCN_INSTANCES
+ * which is smaller than VCN_INFO_TABLE_MAX_NUM_INSTANCES
+ * but that may change in the future with new GPUs so keep this
+ * check for defensive purposes.
+ */
if (adev->vcn.num_vcn_inst > VCN_INFO_TABLE_MAX_NUM_INSTANCES) {
dev_err(adev->dev, "invalid vcn instances\n");
return -EINVAL;
@@ -1450,6 +1456,9 @@ static int amdgpu_discovery_get_vcn_info(struct amdgpu_device *adev)
switch (le16_to_cpu(vcn_info->v1.header.version_major)) {
case 1:
+ /* num_vcn_inst is currently limited to AMDGPU_MAX_VCN_INSTANCES
+ * so this won't overflow.
+ */
for (v = 0; v < adev->vcn.num_vcn_inst; v++) {
adev->vcn.vcn_codec_disable_mask[v] =
le32_to_cpu(vcn_info->v1.instance_info[v].fuse_data.all_bits);
@@ -1621,12 +1630,14 @@ static int amdgpu_discovery_set_psp_ip_blocks(struct amdgpu_device *adev)
case IP_VERSION(13, 0, 1):
case IP_VERSION(13, 0, 2):
case IP_VERSION(13, 0, 3):
- case IP_VERSION(13, 0, 4):
case IP_VERSION(13, 0, 5):
case IP_VERSION(13, 0, 7):
case IP_VERSION(13, 0, 8):
amdgpu_device_ip_block_add(adev, &psp_v13_0_ip_block);
break;
+ case IP_VERSION(13, 0, 4):
+ amdgpu_device_ip_block_add(adev, &psp_v13_0_4_ip_block);
+ break;
default:
dev_err(adev->dev,
"Failed to add psp ip block(MP0_HWIP:0x%x)\n",
@@ -1707,8 +1718,11 @@ static int amdgpu_discovery_set_display_ip_blocks(struct amdgpu_device *adev)
case IP_VERSION(3, 0, 1):
case IP_VERSION(3, 1, 2):
case IP_VERSION(3, 1, 3):
+ case IP_VERSION(3, 1, 4):
case IP_VERSION(3, 1, 5):
case IP_VERSION(3, 1, 6):
+ case IP_VERSION(3, 2, 0):
+ case IP_VERSION(3, 2, 1):
amdgpu_device_ip_block_add(adev, &dm_ip_block);
break;
default:
@@ -1886,6 +1900,7 @@ static int amdgpu_discovery_set_mm_ip_blocks(struct amdgpu_device *adev)
amdgpu_device_ip_block_add(adev, &vcn_v3_0_ip_block);
break;
case IP_VERSION(4, 0, 0):
+ case IP_VERSION(4, 0, 2):
case IP_VERSION(4, 0, 4):
amdgpu_device_ip_block_add(adev, &vcn_v4_0_ip_block);
amdgpu_device_ip_block_add(adev, &jpeg_v4_0_ip_block);
@@ -2194,12 +2209,9 @@ int amdgpu_discovery_set_ip_blocks(struct amdgpu_device *adev)
break;
case IP_VERSION(7, 4, 0):
case IP_VERSION(7, 4, 1):
- adev->nbio.funcs = &nbio_v7_4_funcs;
- adev->nbio.hdp_flush_reg = &nbio_v7_4_hdp_flush_reg;
- break;
case IP_VERSION(7, 4, 4):
adev->nbio.funcs = &nbio_v7_4_funcs;
- adev->nbio.hdp_flush_reg = &nbio_v7_4_hdp_flush_reg_ald;
+ adev->nbio.hdp_flush_reg = &nbio_v7_4_hdp_flush_reg;
break;
case IP_VERSION(7, 2, 0):
case IP_VERSION(7, 2, 1):
@@ -2213,15 +2225,12 @@ int amdgpu_discovery_set_ip_blocks(struct amdgpu_device *adev)
case IP_VERSION(2, 3, 0):
case IP_VERSION(2, 3, 1):
case IP_VERSION(2, 3, 2):
- adev->nbio.funcs = &nbio_v2_3_funcs;
- adev->nbio.hdp_flush_reg = &nbio_v2_3_hdp_flush_reg;
- break;
case IP_VERSION(3, 3, 0):
case IP_VERSION(3, 3, 1):
case IP_VERSION(3, 3, 2):
case IP_VERSION(3, 3, 3):
adev->nbio.funcs = &nbio_v2_3_funcs;
- adev->nbio.hdp_flush_reg = &nbio_v2_3_hdp_flush_reg_sc;
+ adev->nbio.hdp_flush_reg = &nbio_v2_3_hdp_flush_reg;
break;
case IP_VERSION(4, 3, 0):
case IP_VERSION(4, 3, 1):
@@ -2321,6 +2330,7 @@ int amdgpu_discovery_set_ip_blocks(struct amdgpu_device *adev)
switch (adev->ip_versions[LSDMA_HWIP][0]) {
case IP_VERSION(6, 0, 0):
+ case IP_VERSION(6, 0, 1):
case IP_VERSION(6, 0, 2):
adev->lsdma.funcs = &lsdma_v6_0_funcs;
break;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c
index 17c9bbe0cbc5..c20922a5af9f 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c
@@ -30,6 +30,9 @@
#include "atom.h"
#include "amdgpu_connectors.h"
#include "amdgpu_display.h"
+#include "soc15_common.h"
+#include "gc/gc_11_0_0_offset.h"
+#include "gc/gc_11_0_0_sh_mask.h"
#include <asm/div64.h>
#include <linux/pci.h>
@@ -663,6 +666,11 @@ static int convert_tiling_flags_to_modifier(struct amdgpu_framebuffer *afb)
{
struct amdgpu_device *adev = drm_to_adev(afb->base.dev);
uint64_t modifier = 0;
+ int num_pipes = 0;
+ int num_pkrs = 0;
+
+ num_pkrs = adev->gfx.config.gb_addr_config_fields.num_pkrs;
+ num_pipes = adev->gfx.config.gb_addr_config_fields.num_pipes;
if (!afb->tiling_flags || !AMDGPU_TILING_GET(afb->tiling_flags, SWIZZLE_MODE)) {
modifier = DRM_FORMAT_MOD_LINEAR;
@@ -675,7 +683,7 @@ static int convert_tiling_flags_to_modifier(struct amdgpu_framebuffer *afb)
int bank_xor_bits = 0;
int packers = 0;
int rb = 0;
- int pipes = ilog2(adev->gfx.config.gb_addr_config_fields.num_pipes);
+ int pipes = ilog2(num_pipes);
uint32_t dcc_offset = AMDGPU_TILING_GET(afb->tiling_flags, DCC_OFFSET_256B);
switch (swizzle >> 2) {
@@ -691,12 +699,17 @@ static int convert_tiling_flags_to_modifier(struct amdgpu_framebuffer *afb)
case 6: /* 64 KiB _X */
block_size_bits = 16;
break;
+ case 7: /* 256 KiB */
+ block_size_bits = 18;
+ break;
default:
/* RESERVED or VAR */
return -EINVAL;
}
- if (adev->ip_versions[GC_HWIP][0] >= IP_VERSION(10, 3, 0))
+ if (adev->ip_versions[GC_HWIP][0] >= IP_VERSION(11, 0, 0))
+ version = AMD_FMT_MOD_TILE_VER_GFX11;
+ else if (adev->ip_versions[GC_HWIP][0] >= IP_VERSION(10, 3, 0))
version = AMD_FMT_MOD_TILE_VER_GFX10_RBPLUS;
else if (adev->ip_versions[GC_HWIP][0] >= IP_VERSION(10, 0, 0))
version = AMD_FMT_MOD_TILE_VER_GFX10;
@@ -707,19 +720,32 @@ static int convert_tiling_flags_to_modifier(struct amdgpu_framebuffer *afb)
case 0: /* Z microtiling */
return -EINVAL;
case 1: /* S microtiling */
- if (!has_xor)
- version = AMD_FMT_MOD_TILE_VER_GFX9;
+ if (adev->ip_versions[GC_HWIP][0] < IP_VERSION(11, 0, 0)) {
+ if (!has_xor)
+ version = AMD_FMT_MOD_TILE_VER_GFX9;
+ }
break;
case 2:
- if (!has_xor && afb->base.format->cpp[0] != 4)
- version = AMD_FMT_MOD_TILE_VER_GFX9;
+ if (adev->ip_versions[GC_HWIP][0] < IP_VERSION(11, 0, 0)) {
+ if (!has_xor && afb->base.format->cpp[0] != 4)
+ version = AMD_FMT_MOD_TILE_VER_GFX9;
+ }
break;
case 3:
break;
}
if (has_xor) {
+ if (num_pipes == num_pkrs && num_pkrs == 0) {
+ DRM_ERROR("invalid number of pipes and packers\n");
+ return -EINVAL;
+ }
+
switch (version) {
+ case AMD_FMT_MOD_TILE_VER_GFX11:
+ pipe_xor_bits = min(block_size_bits - 8, pipes);
+ packers = ilog2(adev->gfx.config.gb_addr_config_fields.num_pkrs);
+ break;
case AMD_FMT_MOD_TILE_VER_GFX10_RBPLUS:
pipe_xor_bits = min(block_size_bits - 8, pipes);
packers = min(block_size_bits - 8 - pipe_xor_bits,
@@ -753,9 +779,10 @@ static int convert_tiling_flags_to_modifier(struct amdgpu_framebuffer *afb)
u64 render_dcc_offset;
/* Enable constant encode on RAVEN2 and later. */
- bool dcc_constant_encode = adev->asic_type > CHIP_RAVEN ||
+ bool dcc_constant_encode = (adev->asic_type > CHIP_RAVEN ||
(adev->asic_type == CHIP_RAVEN &&
- adev->external_rev_id >= 0x81);
+ adev->external_rev_id >= 0x81)) &&
+ adev->ip_versions[GC_HWIP][0] < IP_VERSION(11, 0, 0);
int max_cblock_size = dcc_i64b ? AMD_FMT_MOD_DCC_BLOCK_64B :
dcc_i128b ? AMD_FMT_MOD_DCC_BLOCK_128B :
@@ -870,10 +897,11 @@ static unsigned int get_dcc_block_size(uint64_t modifier, bool rb_aligned,
return max(10 + (rb_aligned ? (int)AMD_FMT_MOD_GET(RB, modifier) : 0), 12);
}
case AMD_FMT_MOD_TILE_VER_GFX10:
- case AMD_FMT_MOD_TILE_VER_GFX10_RBPLUS: {
+ case AMD_FMT_MOD_TILE_VER_GFX10_RBPLUS:
+ case AMD_FMT_MOD_TILE_VER_GFX11: {
int pipes_log2 = AMD_FMT_MOD_GET(PIPE_XOR_BITS, modifier);
- if (ver == AMD_FMT_MOD_TILE_VER_GFX10_RBPLUS && pipes_log2 > 1 &&
+ if (ver >= AMD_FMT_MOD_TILE_VER_GFX10_RBPLUS && pipes_log2 > 1 &&
AMD_FMT_MOD_GET(PACKERS, modifier) == pipes_log2)
++pipes_log2;
@@ -966,6 +994,9 @@ static int amdgpu_display_verify_sizes(struct amdgpu_framebuffer *rfb)
case DC_SW_64KB_S_X:
block_size_log2 = 16;
break;
+ case DC_SW_VAR_S_X:
+ block_size_log2 = 18;
+ break;
default:
drm_dbg_kms(rfb->base.dev,
"Swizzle mode with unknown block size: %d\n", swizzle);
@@ -1528,6 +1559,21 @@ bool amdgpu_crtc_get_scanout_position(struct drm_crtc *crtc,
stime, etime, mode);
}
+static bool
+amdgpu_display_robj_is_fb(struct amdgpu_device *adev, struct amdgpu_bo *robj)
+{
+ struct drm_device *dev = adev_to_drm(adev);
+ struct drm_fb_helper *fb_helper = dev->fb_helper;
+
+ if (!fb_helper || !fb_helper->buffer)
+ return false;
+
+ if (gem_to_amdgpu_bo(fb_helper->buffer->gem) != robj)
+ return false;
+
+ return true;
+}
+
int amdgpu_display_suspend_helper(struct amdgpu_device *adev)
{
struct drm_device *dev = adev_to_drm(adev);
@@ -1563,10 +1609,12 @@ int amdgpu_display_suspend_helper(struct amdgpu_device *adev)
continue;
}
robj = gem_to_amdgpu_bo(fb->obj[0]);
- r = amdgpu_bo_reserve(robj, true);
- if (r == 0) {
- amdgpu_bo_unpin(robj);
- amdgpu_bo_unreserve(robj);
+ if (!amdgpu_display_robj_is_fb(adev, robj)) {
+ r = amdgpu_bo_reserve(robj, true);
+ if (r == 0) {
+ amdgpu_bo_unpin(robj);
+ amdgpu_bo_unreserve(robj);
+ }
}
}
return 0;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.h
index 7b6d83e2b13c..560352f7c317 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.h
@@ -35,8 +35,6 @@
#define amdgpu_display_add_encoder(adev, e, s, c) (adev)->mode_info.funcs->add_encoder((adev), (e), (s), (c))
#define amdgpu_display_add_connector(adev, ci, sd, ct, ib, coi, h, r) (adev)->mode_info.funcs->add_connector((adev), (ci), (sd), (ct), (ib), (coi), (h), (r))
-int amdgpu_display_freesync_ioctl(struct drm_device *dev, void *data,
- struct drm_file *filp);
void amdgpu_display_update_priority(struct amdgpu_device *adev);
uint32_t amdgpu_display_supported_domains(struct amdgpu_device *adev,
uint64_t bo_flags);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
index 8890300766a5..429fcdf28836 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
@@ -80,7 +80,7 @@
* - 3.24.0 - Add high priority compute support for gfx9
* - 3.25.0 - Add support for sensor query info (stable pstate sclk/mclk).
* - 3.26.0 - GFX9: Process AMDGPU_IB_FLAG_TC_WB_NOT_INVALIDATE.
- * - 3.27.0 - Add new chunk to to AMDGPU_CS to enable BO_LIST creation.
+ * - 3.27.0 - Add new chunk to AMDGPU_CS to enable BO_LIST creation.
* - 3.28.0 - Add AMDGPU_CHUNK_ID_SCHEDULED_DEPENDENCIES
* - 3.29.0 - Add AMDGPU_IB_FLAG_RESET_GDS_MAX_WAVE_ID
* - 3.30.0 - Add AMDGPU_SCHED_OP_CONTEXT_PRIORITY_OVERRIDE.
@@ -100,10 +100,11 @@
* - 3.44.0 - DCN3 supports DCC independent block settings: !64B && 128B, 64B && 128B
* - 3.45.0 - Add context ioctl stable pstate interface
* - 3.46.0 - To enable hot plug amdgpu tests in libdrm
- * * 3.47.0 - Add AMDGPU_GEM_CREATE_DISCARDABLE and AMDGPU_VM_NOALLOC flags
+ * - 3.47.0 - Add AMDGPU_GEM_CREATE_DISCARDABLE and AMDGPU_VM_NOALLOC flags
+ * - 3.48.0 - Add IP discovery version info to HW INFO
*/
#define KMS_DRIVER_MAJOR 3
-#define KMS_DRIVER_MINOR 47
+#define KMS_DRIVER_MINOR 48
#define KMS_DRIVER_PATCHLEVEL 0
int amdgpu_vram_limit;
@@ -167,6 +168,7 @@ int amdgpu_smu_pptable_id = -1;
*/
uint amdgpu_dc_feature_mask = 2;
uint amdgpu_dc_debug_mask;
+uint amdgpu_dc_visual_confirm;
int amdgpu_async_gfx_ring = 1;
int amdgpu_mcbp;
int amdgpu_discovery = -1;
@@ -803,6 +805,16 @@ module_param_named(no_queue_eviction_on_vm_fault, amdgpu_no_queue_eviction_on_vm
#endif
/**
+ * DOC: pcie_p2p (bool)
+ * Enable PCIe P2P (requires large-BAR). Default value: true (on)
+ */
+#ifdef CONFIG_HSA_AMD_P2P
+bool pcie_p2p = true;
+module_param(pcie_p2p, bool, 0444);
+MODULE_PARM_DESC(pcie_p2p, "Enable PCIe P2P (requires large-BAR). (N = off, Y = on(default))");
+#endif
+
+/**
* DOC: dcfeaturemask (uint)
* Override display features enabled. See enum DC_FEATURE_MASK in drivers/gpu/drm/amd/include/amd_shared.h.
* The default is the current set of stable display features.
@@ -817,6 +829,9 @@ module_param_named(dcfeaturemask, amdgpu_dc_feature_mask, uint, 0444);
MODULE_PARM_DESC(dcdebugmask, "all debug options disabled (default))");
module_param_named(dcdebugmask, amdgpu_dc_debug_mask, uint, 0444);
+MODULE_PARM_DESC(visualconfirm, "Visual confirm (0 = off (default), 1 = MPO, 5 = PSR)");
+module_param_named(visualconfirm, amdgpu_dc_visual_confirm, uint, 0444);
+
/**
* DOC: abmlevel (uint)
* Override the default ABM (Adaptive Backlight Management) level used for DC
@@ -2111,7 +2126,7 @@ retry_init:
if (ret)
DRM_ERROR("Creating debugfs files failed (%d).\n", ret);
- if (adev->runpm) {
+ if (adev->pm.rpm_mode != AMDGPU_RUNPM_NONE) {
/* only need to skip on ATPX */
if (amdgpu_device_supports_px(ddev))
dev_pm_set_driver_flags(ddev->dev, DPM_FLAG_NO_DIRECT_COMPLETE);
@@ -2168,7 +2183,7 @@ amdgpu_pci_remove(struct pci_dev *pdev)
drm_dev_unplug(dev);
- if (adev->runpm) {
+ if (adev->pm.rpm_mode != AMDGPU_RUNPM_NONE) {
pm_runtime_get_sync(dev->dev);
pm_runtime_forbid(dev->dev);
}
@@ -2451,7 +2466,7 @@ static int amdgpu_pmops_runtime_suspend(struct device *dev)
struct amdgpu_device *adev = drm_to_adev(drm_dev);
int ret, i;
- if (!adev->runpm) {
+ if (adev->pm.rpm_mode == AMDGPU_RUNPM_NONE) {
pm_runtime_forbid(dev);
return -EBUSY;
}
@@ -2520,7 +2535,7 @@ static int amdgpu_pmops_runtime_resume(struct device *dev)
struct amdgpu_device *adev = drm_to_adev(drm_dev);
int ret;
- if (!adev->runpm)
+ if (adev->pm.rpm_mode == AMDGPU_RUNPM_NONE)
return -EINVAL;
/* Avoids registers access if device is physically gone */
@@ -2564,7 +2579,7 @@ static int amdgpu_pmops_runtime_idle(struct device *dev)
/* we don't want the main rpm_idle to call suspend - we want to autosuspend */
int ret = 1;
- if (!adev->runpm) {
+ if (adev->pm.rpm_mode == AMDGPU_RUNPM_NONE) {
pm_runtime_forbid(dev);
return -EBUSY;
}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
index d16c8c1f72db..8adeb7469f1e 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
@@ -39,6 +39,7 @@
#include <drm/drm_drv.h>
#include "amdgpu.h"
#include "amdgpu_trace.h"
+#include "amdgpu_reset.h"
/*
* Fences
@@ -46,7 +47,7 @@
* for GPU/CPU synchronization. When the fence is written,
* it is expected that all buffers associated with that fence
* are no longer in use by the associated ring on the GPU and
- * that the the relevant GPU caches have been flushed.
+ * that the relevant GPU caches have been flushed.
*/
struct amdgpu_fence {
@@ -163,11 +164,16 @@ int amdgpu_fence_emit(struct amdgpu_ring *ring, struct dma_fence **f, struct amd
if (job && job->job_run_counter) {
/* reinit seq for resubmitted jobs */
fence->seqno = seq;
+ /* TO be inline with external fence creation and other drivers */
+ dma_fence_get(fence);
} else {
- if (job)
+ if (job) {
dma_fence_init(fence, &amdgpu_job_fence_ops,
&ring->fence_drv.lock,
adev->fence_context + ring->idx, seq);
+ /* Against remove in amdgpu_job_{free, free_cb} */
+ dma_fence_get(fence);
+ }
else
dma_fence_init(fence, &amdgpu_fence_ops,
&ring->fence_drv.lock,
@@ -531,6 +537,24 @@ void amdgpu_fence_driver_hw_fini(struct amdgpu_device *adev)
}
}
+/* Will either stop and flush handlers for amdgpu interrupt or reanble it */
+void amdgpu_fence_driver_isr_toggle(struct amdgpu_device *adev, bool stop)
+{
+ int i;
+
+ for (i = 0; i < AMDGPU_MAX_RINGS; i++) {
+ struct amdgpu_ring *ring = adev->rings[i];
+
+ if (!ring || !ring->fence_drv.initialized || !ring->fence_drv.irq_src)
+ continue;
+
+ if (stop)
+ disable_irq(adev->irq.irq);
+ else
+ enable_irq(adev->irq.irq);
+ }
+}
+
void amdgpu_fence_driver_sw_fini(struct amdgpu_device *adev)
{
unsigned int i, j;
@@ -594,8 +618,10 @@ void amdgpu_fence_driver_clear_job_fences(struct amdgpu_ring *ring)
for (i = 0; i <= ring->fence_drv.num_fences_mask; i++) {
ptr = &ring->fence_drv.fences[i];
old = rcu_dereference_protected(*ptr, 1);
- if (old && old->ops == &amdgpu_job_fence_ops)
+ if (old && old->ops == &amdgpu_job_fence_ops) {
RCU_INIT_POINTER(*ptr, NULL);
+ dma_fence_put(old);
+ }
}
}
@@ -798,7 +824,10 @@ static int gpu_recover_get(void *data, u64 *val)
return 0;
}
- *val = amdgpu_device_gpu_recover(adev, NULL);
+ if (amdgpu_reset_domain_schedule(adev->reset_domain, &adev->reset_work))
+ flush_work(&adev->reset_work);
+
+ *val = atomic_read(&adev->reset_domain->reset_res);
pm_runtime_mark_last_busy(dev->dev);
pm_runtime_put_autosuspend(dev->dev);
@@ -810,6 +839,21 @@ DEFINE_SHOW_ATTRIBUTE(amdgpu_debugfs_fence_info);
DEFINE_DEBUGFS_ATTRIBUTE(amdgpu_debugfs_gpu_recover_fops, gpu_recover_get, NULL,
"%lld\n");
+static void amdgpu_debugfs_reset_work(struct work_struct *work)
+{
+ struct amdgpu_device *adev = container_of(work, struct amdgpu_device,
+ reset_work);
+
+ struct amdgpu_reset_context reset_context;
+ memset(&reset_context, 0, sizeof(reset_context));
+
+ reset_context.method = AMD_RESET_METHOD_NONE;
+ reset_context.reset_req_dev = adev;
+ set_bit(AMDGPU_NEED_FULL_RESET, &reset_context.flags);
+
+ amdgpu_device_gpu_recover(adev, NULL, &reset_context);
+}
+
#endif
void amdgpu_debugfs_fence_init(struct amdgpu_device *adev)
@@ -821,9 +865,12 @@ void amdgpu_debugfs_fence_init(struct amdgpu_device *adev)
debugfs_create_file("amdgpu_fence_info", 0444, root, adev,
&amdgpu_debugfs_fence_info_fops);
- if (!amdgpu_sriov_vf(adev))
+ if (!amdgpu_sriov_vf(adev)) {
+
+ INIT_WORK(&adev->reset_work, amdgpu_debugfs_reset_work);
debugfs_create_file("amdgpu_gpu_recover", 0444, root, adev,
&amdgpu_debugfs_gpu_recover_fops);
+ }
#endif
}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fru_eeprom.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_fru_eeprom.c
index ecada5eadfe3..e325150879df 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fru_eeprom.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fru_eeprom.c
@@ -66,10 +66,15 @@ static bool is_fru_eeprom_supported(struct amdgpu_device *adev)
return true;
case CHIP_SIENNA_CICHLID:
if (strnstr(atom_ctx->vbios_version, "D603",
+ sizeof(atom_ctx->vbios_version))) {
+ if (strnstr(atom_ctx->vbios_version, "D603GLXE",
sizeof(atom_ctx->vbios_version)))
- return true;
- else
+ return false;
+ else
+ return true;
+ } else {
return false;
+ }
default:
return false;
}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
index 16699158e00d..222d3d7ea076 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
@@ -142,7 +142,12 @@ void amdgpu_gfx_parse_disable_cu(unsigned *mask, unsigned max_se, unsigned max_s
}
}
-static bool amdgpu_gfx_is_multipipe_capable(struct amdgpu_device *adev)
+static bool amdgpu_gfx_is_graphics_multipipe_capable(struct amdgpu_device *adev)
+{
+ return amdgpu_async_gfx_ring && adev->gfx.me.num_pipe_per_me > 1;
+}
+
+static bool amdgpu_gfx_is_compute_multipipe_capable(struct amdgpu_device *adev)
{
if (amdgpu_compute_multipipe != -1) {
DRM_INFO("amdgpu: forcing compute pipe policy %d\n",
@@ -158,6 +163,28 @@ static bool amdgpu_gfx_is_multipipe_capable(struct amdgpu_device *adev)
return adev->gfx.mec.num_mec > 1;
}
+bool amdgpu_gfx_is_high_priority_graphics_queue(struct amdgpu_device *adev,
+ struct amdgpu_ring *ring)
+{
+ int queue = ring->queue;
+ int pipe = ring->pipe;
+
+ /* Policy: use pipe1 queue0 as high priority graphics queue if we
+ * have more than one gfx pipe.
+ */
+ if (amdgpu_gfx_is_graphics_multipipe_capable(adev) &&
+ adev->gfx.num_gfx_rings > 1 && pipe == 1 && queue == 0) {
+ int me = ring->me;
+ int bit;
+
+ bit = amdgpu_gfx_me_queue_to_bit(adev, me, pipe, queue);
+ if (ring == &adev->gfx.gfx_ring[bit])
+ return true;
+ }
+
+ return false;
+}
+
bool amdgpu_gfx_is_high_priority_compute_queue(struct amdgpu_device *adev,
struct amdgpu_ring *ring)
{
@@ -174,7 +201,7 @@ bool amdgpu_gfx_is_high_priority_compute_queue(struct amdgpu_device *adev,
void amdgpu_gfx_compute_queue_acquire(struct amdgpu_device *adev)
{
int i, queue, pipe;
- bool multipipe_policy = amdgpu_gfx_is_multipipe_capable(adev);
+ bool multipipe_policy = amdgpu_gfx_is_compute_multipipe_capable(adev);
int max_queues_per_mec = min(adev->gfx.mec.num_pipe_per_mec *
adev->gfx.mec.num_queue_per_pipe,
adev->gfx.num_compute_rings);
@@ -200,18 +227,24 @@ void amdgpu_gfx_compute_queue_acquire(struct amdgpu_device *adev)
void amdgpu_gfx_graphics_queue_acquire(struct amdgpu_device *adev)
{
- int i, queue, me;
-
- for (i = 0; i < AMDGPU_MAX_GFX_QUEUES; ++i) {
- queue = i % adev->gfx.me.num_queue_per_pipe;
- me = (i / adev->gfx.me.num_queue_per_pipe)
- / adev->gfx.me.num_pipe_per_me;
+ int i, queue, pipe;
+ bool multipipe_policy = amdgpu_gfx_is_graphics_multipipe_capable(adev);
+ int max_queues_per_me = adev->gfx.me.num_pipe_per_me *
+ adev->gfx.me.num_queue_per_pipe;
- if (me >= adev->gfx.me.num_me)
- break;
+ if (multipipe_policy) {
/* policy: amdgpu owns the first queue per pipe at this stage
* will extend to mulitple queues per pipe later */
- if (me == 0 && queue < 1)
+ for (i = 0; i < max_queues_per_me; i++) {
+ pipe = i % adev->gfx.me.num_pipe_per_me;
+ queue = (i / adev->gfx.me.num_pipe_per_me) %
+ adev->gfx.me.num_queue_per_pipe;
+
+ set_bit(pipe * adev->gfx.me.num_queue_per_pipe + queue,
+ adev->gfx.me.queue_bitmap);
+ }
+ } else {
+ for (i = 0; i < max_queues_per_me; ++i)
set_bit(i, adev->gfx.me.queue_bitmap);
}
@@ -666,6 +699,9 @@ uint32_t amdgpu_kiq_rreg(struct amdgpu_device *adev, uint32_t reg)
if (amdgpu_device_skip_hw_access(adev))
return 0;
+ if (adev->mes.ring.sched.ready)
+ return amdgpu_mes_rreg(adev, reg);
+
BUG_ON(!ring->funcs->emit_rreg);
spin_lock_irqsave(&kiq->ring_lock, flags);
@@ -733,6 +769,11 @@ void amdgpu_kiq_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v)
if (amdgpu_device_skip_hw_access(adev))
return;
+ if (adev->mes.ring.sched.ready) {
+ amdgpu_mes_wreg(adev, reg, v);
+ return;
+ }
+
spin_lock_irqsave(&kiq->ring_lock, flags);
amdgpu_ring_alloc(ring, 32);
amdgpu_ring_emit_wreg(ring, reg, v);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
index 53526ffb2ce1..23a696d38390 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
@@ -396,6 +396,8 @@ bool amdgpu_gfx_is_mec_queue_enabled(struct amdgpu_device *adev, int mec,
int pipe, int queue);
bool amdgpu_gfx_is_high_priority_compute_queue(struct amdgpu_device *adev,
struct amdgpu_ring *ring);
+bool amdgpu_gfx_is_high_priority_graphics_queue(struct amdgpu_device *adev,
+ struct amdgpu_ring *ring);
int amdgpu_gfx_me_queue_to_bit(struct amdgpu_device *adev, int me,
int pipe, int queue);
void amdgpu_gfx_bit_to_me_queue(struct amdgpu_device *adev, int bit,
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.c
index 3df146579ad9..1d5af50331e4 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.c
@@ -242,7 +242,7 @@ restart_ih:
* @entry: IV entry
*
* Decodes the interrupt vector at the current rptr
- * position and also advance the position for for Vega10
+ * position and also advance the position for Vega10
* and later GPUs.
*/
void amdgpu_ih_decode_iv_helper(struct amdgpu_device *adev,
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_imu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_imu.h
index 56cf127cdf93..484e936812e4 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_imu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_imu.h
@@ -24,12 +24,18 @@
#ifndef __AMDGPU_IMU_H__
#define __AMDGPU_IMU_H__
+enum imu_work_mode {
+ DEBUG_MODE,
+ MISSION_MODE
+};
+
struct amdgpu_imu_funcs {
int (*init_microcode)(struct amdgpu_device *adev);
int (*load_microcode)(struct amdgpu_device *adev);
void (*setup_imu)(struct amdgpu_device *adev);
int (*start_imu)(struct amdgpu_device *adev);
void (*program_rlc_ram)(struct amdgpu_device *adev);
+ int (*wait_for_reset_status)(struct amdgpu_device *adev);
};
struct imu_rlc_ram_golden {
@@ -46,6 +52,7 @@ struct imu_rlc_ram_golden {
struct amdgpu_imu {
const struct amdgpu_imu_funcs *funcs;
+ enum imu_work_mode mode;
};
#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
index 67f66f2f1809..c2fd6f3076a6 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
@@ -29,6 +29,7 @@
#include "amdgpu.h"
#include "amdgpu_trace.h"
+#include "amdgpu_reset.h"
static enum drm_gpu_sched_stat amdgpu_job_timedout(struct drm_sched_job *s_job)
{
@@ -64,7 +65,14 @@ static enum drm_gpu_sched_stat amdgpu_job_timedout(struct drm_sched_job *s_job)
ti.process_name, ti.tgid, ti.task_name, ti.pid);
if (amdgpu_device_should_recover_gpu(ring->adev)) {
- r = amdgpu_device_gpu_recover_imp(ring->adev, job);
+ struct amdgpu_reset_context reset_context;
+ memset(&reset_context, 0, sizeof(reset_context));
+
+ reset_context.method = AMD_RESET_METHOD_NONE;
+ reset_context.reset_req_dev = adev;
+ clear_bit(AMDGPU_NEED_FULL_RESET, &reset_context.flags);
+
+ r = amdgpu_device_gpu_recover(ring->adev, job, &reset_context);
if (r)
DRM_ERROR("GPU Recovery Failed: %d\n", r);
} else {
@@ -125,16 +133,10 @@ void amdgpu_job_free_resources(struct amdgpu_job *job)
{
struct amdgpu_ring *ring = to_amdgpu_ring(job->base.sched);
struct dma_fence *f;
- struct dma_fence *hw_fence;
unsigned i;
- if (job->hw_fence.ops == NULL)
- hw_fence = job->external_hw_fence;
- else
- hw_fence = &job->hw_fence;
-
/* use sched fence if available */
- f = job->base.s_fence ? &job->base.s_fence->finished : hw_fence;
+ f = job->base.s_fence ? &job->base.s_fence->finished : &job->hw_fence;
for (i = 0; i < job->num_ibs; ++i)
amdgpu_ib_free(ring->adev, &job->ibs[i], f);
}
@@ -148,11 +150,7 @@ static void amdgpu_job_free_cb(struct drm_sched_job *s_job)
amdgpu_sync_free(&job->sync);
amdgpu_sync_free(&job->sched_sync);
- /* only put the hw fence if has embedded fence */
- if (job->hw_fence.ops != NULL)
- dma_fence_put(&job->hw_fence);
- else
- kfree(job);
+ dma_fence_put(&job->hw_fence);
}
void amdgpu_job_free(struct amdgpu_job *job)
@@ -161,11 +159,10 @@ void amdgpu_job_free(struct amdgpu_job *job)
amdgpu_sync_free(&job->sync);
amdgpu_sync_free(&job->sched_sync);
- /* only put the hw fence if has embedded fence */
- if (job->hw_fence.ops != NULL)
- dma_fence_put(&job->hw_fence);
- else
+ if (!job->hw_fence.ops)
kfree(job);
+ else
+ dma_fence_put(&job->hw_fence);
}
int amdgpu_job_submit(struct amdgpu_job *job, struct drm_sched_entity *entity,
@@ -195,15 +192,12 @@ int amdgpu_job_submit_direct(struct amdgpu_job *job, struct amdgpu_ring *ring,
int r;
job->base.sched = &ring->sched;
- r = amdgpu_ib_schedule(ring, job->num_ibs, job->ibs, NULL, fence);
- /* record external_hw_fence for direct submit */
- job->external_hw_fence = dma_fence_get(*fence);
+ r = amdgpu_ib_schedule(ring, job->num_ibs, job->ibs, job, fence);
+
if (r)
return r;
amdgpu_job_free(job);
- dma_fence_put(*fence);
-
return 0;
}
@@ -262,10 +256,6 @@ static struct dma_fence *amdgpu_job_run(struct drm_sched_job *sched_job)
DRM_ERROR("Error scheduling IBs (%d)\n", r);
}
- if (!job->job_run_counter)
- dma_fence_get(fence);
- else if (finished->error < 0)
- dma_fence_put(&job->hw_fence);
job->job_run_counter++;
amdgpu_job_free_resources(job);
@@ -285,10 +275,6 @@ void amdgpu_job_stop_all_jobs_on_sched(struct drm_gpu_scheduler *sched)
/* Signal all jobs not yet scheduled */
for (i = DRM_SCHED_PRIORITY_COUNT - 1; i >= DRM_SCHED_PRIORITY_MIN; i--) {
struct drm_sched_rq *rq = &sched->sched_rq[i];
-
- if (!rq)
- continue;
-
spin_lock(&rq->lock);
list_for_each_entry(s_entity, &rq->entities, list) {
while ((s_job = to_drm_sched_job(spsc_queue_pop(&s_entity->job_queue)))) {
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.h
index d599c0540b46..babc0af751c2 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.h
@@ -50,7 +50,6 @@ struct amdgpu_job {
struct amdgpu_sync sync;
struct amdgpu_sync sched_sync;
struct dma_fence hw_fence;
- struct dma_fence *external_hw_fence;
uint32_t preamble_status;
uint32_t preemption_status;
bool vm_needs_flush;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
index 6de63ea6687e..1369c25448dc 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
@@ -43,17 +43,6 @@
#include "amdgpu_display.h"
#include "amdgpu_ras.h"
-static void amdgpu_runtime_pm_quirk(struct amdgpu_device *adev)
-{
- /*
- * Add below quirk on several sienna_cichlid cards to disable
- * runtime pm to fix EMI failures.
- */
- if (((adev->pdev->device == 0x73A1) && (adev->pdev->revision == 0x00)) ||
- ((adev->pdev->device == 0x73BF) && (adev->pdev->revision == 0xCF)))
- adev->runpm = false;
-}
-
void amdgpu_unregister_gpu_instance(struct amdgpu_device *adev)
{
struct amdgpu_gpu_instance *gpu_instance;
@@ -158,37 +147,36 @@ int amdgpu_driver_load_kms(struct amdgpu_device *adev, unsigned long flags)
goto out;
}
+ adev->pm.rpm_mode = AMDGPU_RUNPM_NONE;
if (amdgpu_device_supports_px(dev) &&
- (amdgpu_runtime_pm != 0)) { /* enable runpm by default for atpx */
- adev->runpm = true;
+ (amdgpu_runtime_pm != 0)) { /* enable PX as runtime mode */
+ adev->pm.rpm_mode = AMDGPU_RUNPM_PX;
dev_info(adev->dev, "Using ATPX for runtime pm\n");
} else if (amdgpu_device_supports_boco(dev) &&
- (amdgpu_runtime_pm != 0)) { /* enable runpm by default for boco */
- adev->runpm = true;
+ (amdgpu_runtime_pm != 0)) { /* enable boco as runtime mode */
+ adev->pm.rpm_mode = AMDGPU_RUNPM_BOCO;
dev_info(adev->dev, "Using BOCO for runtime pm\n");
} else if (amdgpu_device_supports_baco(dev) &&
(amdgpu_runtime_pm != 0)) {
switch (adev->asic_type) {
case CHIP_VEGA20:
case CHIP_ARCTURUS:
- /* enable runpm if runpm=1 */
+ /* enable BACO as runpm mode if runpm=1 */
if (amdgpu_runtime_pm > 0)
- adev->runpm = true;
+ adev->pm.rpm_mode = AMDGPU_RUNPM_BACO;
break;
case CHIP_VEGA10:
- /* turn runpm on if noretry=0 */
+ /* enable BACO as runpm mode if noretry=0 */
if (!adev->gmc.noretry)
- adev->runpm = true;
+ adev->pm.rpm_mode = AMDGPU_RUNPM_BACO;
break;
default:
- /* enable runpm on CI+ */
- adev->runpm = true;
+ /* enable BACO as runpm mode on CI+ */
+ adev->pm.rpm_mode = AMDGPU_RUNPM_BACO;
break;
}
- amdgpu_runtime_pm_quirk(adev);
-
- if (adev->runpm)
+ if (adev->pm.rpm_mode == AMDGPU_RUNPM_BACO)
dev_info(adev->dev, "Using BACO for runtime pm\n");
}
@@ -473,6 +461,30 @@ static int amdgpu_hw_ip_info(struct amdgpu_device *adev,
result->hw_ip_version_major = adev->ip_blocks[i].version->major;
result->hw_ip_version_minor = adev->ip_blocks[i].version->minor;
+
+ if (adev->asic_type >= CHIP_VEGA10) {
+ switch (type) {
+ case AMD_IP_BLOCK_TYPE_GFX:
+ result->ip_discovery_version = adev->ip_versions[GC_HWIP][0];
+ break;
+ case AMD_IP_BLOCK_TYPE_SDMA:
+ result->ip_discovery_version = adev->ip_versions[SDMA0_HWIP][0];
+ break;
+ case AMD_IP_BLOCK_TYPE_UVD:
+ case AMD_IP_BLOCK_TYPE_VCN:
+ case AMD_IP_BLOCK_TYPE_JPEG:
+ result->ip_discovery_version = adev->ip_versions[UVD_HWIP][0];
+ break;
+ case AMD_IP_BLOCK_TYPE_VCE:
+ result->ip_discovery_version = adev->ip_versions[VCE_HWIP][0];
+ break;
+ default:
+ result->ip_discovery_version = 0;
+ break;
+ }
+ } else {
+ result->ip_discovery_version = 0;
+ }
result->capabilities_flags = 0;
result->available_rings = (1 << num_rings) - 1;
result->ib_start_alignment = ib_start_alignment;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c
index 69a70a0aaed9..fe82b8b19a4e 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c
@@ -114,8 +114,14 @@ static int amdgpu_mes_doorbell_init(struct amdgpu_device *adev)
size_t doorbell_start_offset;
size_t doorbell_aperture_size;
size_t doorbell_process_limit;
+ size_t aggregated_doorbell_start;
+ int i;
- doorbell_start_offset = (adev->doorbell_index.max_assignment+1) * sizeof(u32);
+ aggregated_doorbell_start = (adev->doorbell_index.max_assignment + 1) * sizeof(u32);
+ aggregated_doorbell_start =
+ roundup(aggregated_doorbell_start, PAGE_SIZE);
+
+ doorbell_start_offset = aggregated_doorbell_start + PAGE_SIZE;
doorbell_start_offset =
roundup(doorbell_start_offset,
amdgpu_mes_doorbell_process_slice(adev));
@@ -135,6 +141,11 @@ static int amdgpu_mes_doorbell_init(struct amdgpu_device *adev)
adev->mes.doorbell_id_offset = doorbell_start_offset / sizeof(u32);
adev->mes.max_doorbell_slices = doorbell_process_limit;
+ /* allocate Qword range for aggregated doorbell */
+ for (i = 0; i < AMDGPU_MES_PRIORITY_NUM_LEVELS; i++)
+ adev->mes.aggregated_doorbells[i] =
+ aggregated_doorbell_start / sizeof(u32) + i * 2;
+
DRM_INFO("max_doorbell_slices=%zu\n", doorbell_process_limit);
return 0;
}
@@ -150,6 +161,7 @@ int amdgpu_mes_init(struct amdgpu_device *adev)
idr_init(&adev->mes.queue_id_idr);
ida_init(&adev->mes.doorbell_ida);
spin_lock_init(&adev->mes.queue_id_lock);
+ spin_lock_init(&adev->mes.ring_lock);
mutex_init(&adev->mes.mutex_hidden);
adev->mes.total_max_queue = AMDGPU_FENCE_MES_QUEUE_ID_MASK;
@@ -173,9 +185,6 @@ int amdgpu_mes_init(struct amdgpu_device *adev)
adev->mes.sdma_hqd_mask[i] = 0xfc;
}
- for (i = 0; i < AMDGPU_MES_PRIORITY_NUM_LEVELS; i++)
- adev->mes.agreegated_doorbells[i] = 0xffffffff;
-
r = amdgpu_device_wb_get(adev, &adev->mes.sch_ctx_offs);
if (r) {
dev_err(adev->dev,
@@ -189,15 +198,29 @@ int amdgpu_mes_init(struct amdgpu_device *adev)
r = amdgpu_device_wb_get(adev, &adev->mes.query_status_fence_offs);
if (r) {
+ amdgpu_device_wb_free(adev, adev->mes.sch_ctx_offs);
dev_err(adev->dev,
"(%d) query_status_fence_offs wb alloc failed\n", r);
- return r;
+ goto error_ids;
}
adev->mes.query_status_fence_gpu_addr =
adev->wb.gpu_addr + (adev->mes.query_status_fence_offs * 4);
adev->mes.query_status_fence_ptr =
(uint64_t *)&adev->wb.wb[adev->mes.query_status_fence_offs];
+ r = amdgpu_device_wb_get(adev, &adev->mes.read_val_offs);
+ if (r) {
+ amdgpu_device_wb_free(adev, adev->mes.sch_ctx_offs);
+ amdgpu_device_wb_free(adev, adev->mes.query_status_fence_offs);
+ dev_err(adev->dev,
+ "(%d) read_val_offs alloc failed\n", r);
+ goto error_ids;
+ }
+ adev->mes.read_val_gpu_addr =
+ adev->wb.gpu_addr + (adev->mes.read_val_offs * 4);
+ adev->mes.read_val_ptr =
+ (uint32_t *)&adev->wb.wb[adev->mes.read_val_offs];
+
r = amdgpu_mes_doorbell_init(adev);
if (r)
goto error;
@@ -206,6 +229,8 @@ int amdgpu_mes_init(struct amdgpu_device *adev)
error:
amdgpu_device_wb_free(adev, adev->mes.sch_ctx_offs);
+ amdgpu_device_wb_free(adev, adev->mes.query_status_fence_offs);
+ amdgpu_device_wb_free(adev, adev->mes.read_val_offs);
error_ids:
idr_destroy(&adev->mes.pasid_idr);
idr_destroy(&adev->mes.gang_id_idr);
@@ -218,6 +243,8 @@ error_ids:
void amdgpu_mes_fini(struct amdgpu_device *adev)
{
amdgpu_device_wb_free(adev, adev->mes.sch_ctx_offs);
+ amdgpu_device_wb_free(adev, adev->mes.query_status_fence_offs);
+ amdgpu_device_wb_free(adev, adev->mes.read_val_offs);
idr_destroy(&adev->mes.pasid_idr);
idr_destroy(&adev->mes.gang_id_idr);
@@ -675,8 +702,10 @@ int amdgpu_mes_add_hw_queue(struct amdgpu_device *adev, int gang_id,
queue_input.doorbell_offset = qprops->doorbell_off;
queue_input.mqd_addr = queue->mqd_gpu_addr;
queue_input.wptr_addr = qprops->wptr_gpu_addr;
+ queue_input.wptr_mc_addr = qprops->wptr_mc_addr;
queue_input.queue_type = qprops->queue_type;
queue_input.paging = qprops->paging;
+ queue_input.is_kfd_process = 0;
r = adev->mes.funcs->add_hw_queue(&adev->mes, &queue_input);
if (r) {
@@ -696,6 +725,7 @@ int amdgpu_mes_add_hw_queue(struct amdgpu_device *adev, int gang_id,
queue->queue_type = qprops->queue_type;
queue->paging = qprops->paging;
queue->gang = gang;
+ queue->ring->mqd_ptr = queue->mqd_cpu_ptr;
list_add_tail(&queue->list, &gang->queue_list);
amdgpu_mes_unlock(&adev->mes);
@@ -774,8 +804,6 @@ int amdgpu_mes_unmap_legacy_queue(struct amdgpu_device *adev,
struct mes_unmap_legacy_queue_input queue_input;
int r;
- amdgpu_mes_lock(&adev->mes);
-
queue_input.action = action;
queue_input.queue_type = ring->funcs->type;
queue_input.doorbell_offset = ring->doorbell_index;
@@ -788,7 +816,106 @@ int amdgpu_mes_unmap_legacy_queue(struct amdgpu_device *adev,
if (r)
DRM_ERROR("failed to unmap legacy queue\n");
- amdgpu_mes_unlock(&adev->mes);
+ return r;
+}
+
+uint32_t amdgpu_mes_rreg(struct amdgpu_device *adev, uint32_t reg)
+{
+ struct mes_misc_op_input op_input;
+ int r, val = 0;
+
+ op_input.op = MES_MISC_OP_READ_REG;
+ op_input.read_reg.reg_offset = reg;
+ op_input.read_reg.buffer_addr = adev->mes.read_val_gpu_addr;
+
+ if (!adev->mes.funcs->misc_op) {
+ DRM_ERROR("mes rreg is not supported!\n");
+ goto error;
+ }
+
+ r = adev->mes.funcs->misc_op(&adev->mes, &op_input);
+ if (r)
+ DRM_ERROR("failed to read reg (0x%x)\n", reg);
+ else
+ val = *(adev->mes.read_val_ptr);
+
+error:
+ return val;
+}
+
+int amdgpu_mes_wreg(struct amdgpu_device *adev,
+ uint32_t reg, uint32_t val)
+{
+ struct mes_misc_op_input op_input;
+ int r;
+
+ op_input.op = MES_MISC_OP_WRITE_REG;
+ op_input.write_reg.reg_offset = reg;
+ op_input.write_reg.reg_value = val;
+
+ if (!adev->mes.funcs->misc_op) {
+ DRM_ERROR("mes wreg is not supported!\n");
+ r = -EINVAL;
+ goto error;
+ }
+
+ r = adev->mes.funcs->misc_op(&adev->mes, &op_input);
+ if (r)
+ DRM_ERROR("failed to write reg (0x%x)\n", reg);
+
+error:
+ return r;
+}
+
+int amdgpu_mes_reg_write_reg_wait(struct amdgpu_device *adev,
+ uint32_t reg0, uint32_t reg1,
+ uint32_t ref, uint32_t mask)
+{
+ struct mes_misc_op_input op_input;
+ int r;
+
+ op_input.op = MES_MISC_OP_WRM_REG_WR_WAIT;
+ op_input.wrm_reg.reg0 = reg0;
+ op_input.wrm_reg.reg1 = reg1;
+ op_input.wrm_reg.ref = ref;
+ op_input.wrm_reg.mask = mask;
+
+ if (!adev->mes.funcs->misc_op) {
+ DRM_ERROR("mes reg_write_reg_wait is not supported!\n");
+ r = -EINVAL;
+ goto error;
+ }
+
+ r = adev->mes.funcs->misc_op(&adev->mes, &op_input);
+ if (r)
+ DRM_ERROR("failed to reg_write_reg_wait\n");
+
+error:
+ return r;
+}
+
+int amdgpu_mes_reg_wait(struct amdgpu_device *adev, uint32_t reg,
+ uint32_t val, uint32_t mask)
+{
+ struct mes_misc_op_input op_input;
+ int r;
+
+ op_input.op = MES_MISC_OP_WRM_REG_WAIT;
+ op_input.wrm_reg.reg0 = reg;
+ op_input.wrm_reg.ref = val;
+ op_input.wrm_reg.mask = mask;
+
+ if (!adev->mes.funcs->misc_op) {
+ DRM_ERROR("mes reg wait is not supported!\n");
+ r = -EINVAL;
+ goto error;
+ }
+
+ r = adev->mes.funcs->misc_op(&adev->mes, &op_input);
+ if (r)
+ DRM_ERROR("failed to reg_write_reg_wait\n");
+
+error:
return r;
}
@@ -801,6 +928,8 @@ amdgpu_mes_ring_to_queue_props(struct amdgpu_device *adev,
props->hqd_base_gpu_addr = ring->gpu_addr;
props->rptr_gpu_addr = ring->rptr_gpu_addr;
props->wptr_gpu_addr = ring->wptr_gpu_addr;
+ props->wptr_mc_addr =
+ ring->mes_ctx->meta_data_mc_addr + ring->wptr_offs;
props->queue_size = ring->ring_size;
props->eop_gpu_addr = ring->eop_gpu_addr;
props->hqd_pipe_priority = AMDGPU_GFX_PIPE_PRIO_NORMAL;
@@ -953,6 +1082,12 @@ void amdgpu_mes_remove_ring(struct amdgpu_device *adev,
kfree(ring);
}
+uint32_t amdgpu_mes_get_aggregated_doorbell_index(struct amdgpu_device *adev,
+ enum amdgpu_mes_priority_level prio)
+{
+ return adev->mes.aggregated_doorbells[prio];
+}
+
int amdgpu_mes_ctx_alloc_meta_data(struct amdgpu_device *adev,
struct amdgpu_mes_ctx_data *ctx_data)
{
@@ -961,7 +1096,8 @@ int amdgpu_mes_ctx_alloc_meta_data(struct amdgpu_device *adev,
r = amdgpu_bo_create_kernel(adev,
sizeof(struct amdgpu_mes_ctx_meta_data),
PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT,
- &ctx_data->meta_data_obj, NULL,
+ &ctx_data->meta_data_obj,
+ &ctx_data->meta_data_mc_addr,
&ctx_data->meta_data_ptr);
if (!ctx_data->meta_data_obj)
return -ENOMEM;
@@ -975,7 +1111,9 @@ int amdgpu_mes_ctx_alloc_meta_data(struct amdgpu_device *adev,
void amdgpu_mes_ctx_free_meta_data(struct amdgpu_mes_ctx_data *ctx_data)
{
if (ctx_data->meta_data_obj)
- amdgpu_bo_free_kernel(&ctx_data->meta_data_obj, NULL, NULL);
+ amdgpu_bo_free_kernel(&ctx_data->meta_data_obj,
+ &ctx_data->meta_data_mc_addr,
+ &ctx_data->meta_data_ptr);
}
int amdgpu_mes_ctx_map_meta_data(struct amdgpu_device *adev,
@@ -1051,6 +1189,63 @@ error:
return r;
}
+int amdgpu_mes_ctx_unmap_meta_data(struct amdgpu_device *adev,
+ struct amdgpu_mes_ctx_data *ctx_data)
+{
+ struct amdgpu_bo_va *bo_va = ctx_data->meta_data_va;
+ struct amdgpu_bo *bo = ctx_data->meta_data_obj;
+ struct amdgpu_vm *vm = bo_va->base.vm;
+ struct amdgpu_bo_list_entry vm_pd;
+ struct list_head list, duplicates;
+ struct dma_fence *fence = NULL;
+ struct ttm_validate_buffer tv;
+ struct ww_acquire_ctx ticket;
+ long r = 0;
+
+ INIT_LIST_HEAD(&list);
+ INIT_LIST_HEAD(&duplicates);
+
+ tv.bo = &bo->tbo;
+ tv.num_shared = 2;
+ list_add(&tv.head, &list);
+
+ amdgpu_vm_get_pd_bo(vm, &list, &vm_pd);
+
+ r = ttm_eu_reserve_buffers(&ticket, &list, false, &duplicates);
+ if (r) {
+ dev_err(adev->dev, "leaking bo va because "
+ "we fail to reserve bo (%ld)\n", r);
+ return r;
+ }
+
+ amdgpu_vm_bo_del(adev, bo_va);
+ if (!amdgpu_vm_ready(vm))
+ goto out_unlock;
+
+ r = dma_resv_get_singleton(bo->tbo.base.resv, DMA_RESV_USAGE_BOOKKEEP, &fence);
+ if (r)
+ goto out_unlock;
+ if (fence) {
+ amdgpu_bo_fence(bo, fence, true);
+ fence = NULL;
+ }
+
+ r = amdgpu_vm_clear_freed(adev, vm, &fence);
+ if (r || !fence)
+ goto out_unlock;
+
+ dma_fence_wait(fence, false);
+ amdgpu_bo_fence(bo, fence, true);
+ dma_fence_put(fence);
+
+out_unlock:
+ if (unlikely(r < 0))
+ dev_err(adev->dev, "failed to clear page tables (%ld)\n", r);
+ ttm_eu_backoff_reservation(&ticket, &list);
+
+ return r;
+}
+
static int amdgpu_mes_test_create_gang_and_queues(struct amdgpu_device *adev,
int pasid, int *gang_id,
int queue_type, int num_queue,
@@ -1157,7 +1352,7 @@ int amdgpu_mes_self_test(struct amdgpu_device *adev)
r = amdgpu_mes_ctx_alloc_meta_data(adev, &ctx_data);
if (r) {
DRM_ERROR("failed to alloc ctx meta data\n");
- goto error_pasid;
+ goto error_fini;
}
ctx_data.meta_data_gpu_addr = AMDGPU_VA_RESERVED_SIZE;
@@ -1212,9 +1407,9 @@ error_queues:
amdgpu_mes_destroy_process(adev, pasid);
error_vm:
- BUG_ON(amdgpu_bo_reserve(ctx_data.meta_data_obj, true));
- amdgpu_vm_bo_del(adev, ctx_data.meta_data_va);
- amdgpu_bo_unreserve(ctx_data.meta_data_obj);
+ amdgpu_mes_ctx_unmap_meta_data(adev, &ctx_data);
+
+error_fini:
amdgpu_vm_fini(adev, vm);
error_pasid:
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h
index 25590b301f25..7b46f6bf4187 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h
@@ -33,6 +33,13 @@
#define AMDGPU_MES_MAX_GFX_PIPES 2
#define AMDGPU_MES_MAX_SDMA_PIPES 2
+#define AMDGPU_MES_API_VERSION_SHIFT 12
+#define AMDGPU_MES_FEAT_VERSION_SHIFT 24
+
+#define AMDGPU_MES_VERSION_MASK 0x00000fff
+#define AMDGPU_MES_API_VERSION_MASK 0x00fff000
+#define AMDGPU_MES_FEAT_VERSION_MASK 0xff000000
+
enum amdgpu_mes_priority_level {
AMDGPU_MES_PRIORITY_LEVEL_LOW = 0,
AMDGPU_MES_PRIORITY_LEVEL_NORMAL = 1,
@@ -65,6 +72,9 @@ struct amdgpu_mes {
spinlock_t queue_id_lock;
+ uint32_t sched_version;
+ uint32_t kiq_version;
+
uint32_t total_max_queue;
uint32_t doorbell_id_offset;
uint32_t max_doorbell_slices;
@@ -73,6 +83,7 @@ struct amdgpu_mes {
uint64_t default_gang_quantum;
struct amdgpu_ring ring;
+ spinlock_t ring_lock;
const struct firmware *fw[AMDGPU_MAX_MES_PIPES];
@@ -102,13 +113,17 @@ struct amdgpu_mes {
uint32_t compute_hqd_mask[AMDGPU_MES_MAX_COMPUTE_PIPES];
uint32_t gfx_hqd_mask[AMDGPU_MES_MAX_GFX_PIPES];
uint32_t sdma_hqd_mask[AMDGPU_MES_MAX_SDMA_PIPES];
- uint32_t agreegated_doorbells[AMDGPU_MES_PRIORITY_NUM_LEVELS];
+ uint32_t aggregated_doorbells[AMDGPU_MES_PRIORITY_NUM_LEVELS];
uint32_t sch_ctx_offs;
uint64_t sch_ctx_gpu_addr;
uint64_t *sch_ctx_ptr;
uint32_t query_status_fence_offs;
uint64_t query_status_fence_gpu_addr;
uint64_t *query_status_fence_ptr;
+ uint32_t read_val_offs;
+ uint64_t read_val_gpu_addr;
+ uint32_t *read_val_ptr;
+
uint32_t saved_flags;
/* initialize kiq pipe */
@@ -166,6 +181,7 @@ struct amdgpu_mes_queue_properties {
uint64_t hqd_base_gpu_addr;
uint64_t rptr_gpu_addr;
uint64_t wptr_gpu_addr;
+ uint64_t wptr_mc_addr;
uint32_t queue_size;
uint64_t eop_gpu_addr;
uint32_t hqd_pipe_priority;
@@ -198,12 +214,14 @@ struct mes_add_queue_input {
uint32_t doorbell_offset;
uint64_t mqd_addr;
uint64_t wptr_addr;
+ uint64_t wptr_mc_addr;
uint32_t queue_type;
uint32_t paging;
uint32_t gws_base;
uint32_t gws_size;
uint64_t tba_addr;
uint64_t tma_addr;
+ uint32_t is_kfd_process;
};
struct mes_remove_queue_input {
@@ -233,6 +251,36 @@ struct mes_resume_gang_input {
uint64_t gang_context_addr;
};
+enum mes_misc_opcode {
+ MES_MISC_OP_WRITE_REG,
+ MES_MISC_OP_READ_REG,
+ MES_MISC_OP_WRM_REG_WAIT,
+ MES_MISC_OP_WRM_REG_WR_WAIT,
+};
+
+struct mes_misc_op_input {
+ enum mes_misc_opcode op;
+
+ union {
+ struct {
+ uint32_t reg_offset;
+ uint64_t buffer_addr;
+ } read_reg;
+
+ struct {
+ uint32_t reg_offset;
+ uint32_t reg_value;
+ } write_reg;
+
+ struct {
+ uint32_t ref;
+ uint32_t mask;
+ uint32_t reg0;
+ uint32_t reg1;
+ } wrm_reg;
+ };
+};
+
struct amdgpu_mes_funcs {
int (*add_hw_queue)(struct amdgpu_mes *mes,
struct mes_add_queue_input *input);
@@ -248,6 +296,9 @@ struct amdgpu_mes_funcs {
int (*resume_gang)(struct amdgpu_mes *mes,
struct mes_resume_gang_input *input);
+
+ int (*misc_op)(struct amdgpu_mes *mes,
+ struct mes_misc_op_input *input);
};
#define amdgpu_mes_kiq_hw_init(adev) (adev)->mes.kiq_hw_init((adev))
@@ -280,6 +331,15 @@ int amdgpu_mes_unmap_legacy_queue(struct amdgpu_device *adev,
enum amdgpu_unmap_queues_action action,
u64 gpu_addr, u64 seq);
+uint32_t amdgpu_mes_rreg(struct amdgpu_device *adev, uint32_t reg);
+int amdgpu_mes_wreg(struct amdgpu_device *adev,
+ uint32_t reg, uint32_t val);
+int amdgpu_mes_reg_wait(struct amdgpu_device *adev, uint32_t reg,
+ uint32_t val, uint32_t mask);
+int amdgpu_mes_reg_write_reg_wait(struct amdgpu_device *adev,
+ uint32_t reg0, uint32_t reg1,
+ uint32_t ref, uint32_t mask);
+
int amdgpu_mes_add_ring(struct amdgpu_device *adev, int gang_id,
int queue_type, int idx,
struct amdgpu_mes_ctx_data *ctx_data,
@@ -287,12 +347,17 @@ int amdgpu_mes_add_ring(struct amdgpu_device *adev, int gang_id,
void amdgpu_mes_remove_ring(struct amdgpu_device *adev,
struct amdgpu_ring *ring);
+uint32_t amdgpu_mes_get_aggregated_doorbell_index(struct amdgpu_device *adev,
+ enum amdgpu_mes_priority_level prio);
+
int amdgpu_mes_ctx_alloc_meta_data(struct amdgpu_device *adev,
struct amdgpu_mes_ctx_data *ctx_data);
void amdgpu_mes_ctx_free_meta_data(struct amdgpu_mes_ctx_data *ctx_data);
int amdgpu_mes_ctx_map_meta_data(struct amdgpu_device *adev,
struct amdgpu_vm *vm,
struct amdgpu_mes_ctx_data *ctx_data);
+int amdgpu_mes_ctx_unmap_meta_data(struct amdgpu_device *adev,
+ struct amdgpu_mes_ctx_data *ctx_data);
int amdgpu_mes_self_test(struct amdgpu_device *adev);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes_ctx.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes_ctx.h
index c000f656aae5..912a5be2ece6 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes_ctx.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes_ctx.h
@@ -107,6 +107,7 @@ struct amdgpu_mes_ctx_meta_data {
struct amdgpu_mes_ctx_data {
struct amdgpu_bo *meta_data_obj;
uint64_t meta_data_gpu_addr;
+ uint64_t meta_data_mc_addr;
struct amdgpu_bo_va *meta_data_va;
void *meta_data_ptr;
uint32_t gang_ids[AMDGPU_HW_IP_DMA+1];
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h
index f80b4838cea1..d788a00043a5 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h
@@ -37,6 +37,7 @@
#include <drm/drm_fixed.h>
#include <drm/drm_crtc_helper.h>
#include <drm/drm_fb_helper.h>
+#include <drm/drm_framebuffer.h>
#include <drm/drm_plane_helper.h>
#include <drm/drm_probe_helper.h>
#include <linux/i2c.h>
@@ -349,15 +350,11 @@ struct amdgpu_mode_info {
#define AMDGPU_MAX_BL_LEVEL 0xFF
-#if defined(CONFIG_BACKLIGHT_CLASS_DEVICE) || defined(CONFIG_BACKLIGHT_CLASS_DEVICE_MODULE)
-
struct amdgpu_backlight_privdata {
struct amdgpu_encoder *encoder;
uint8_t negative;
};
-#endif
-
struct amdgpu_atom_ss {
uint16_t percentage;
uint16_t percentage_divider;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
index 2c82b1d5a0d7..4570ad449390 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
@@ -882,6 +882,10 @@ int amdgpu_bo_pin_restricted(struct amdgpu_bo *bo, u32 domain,
if (WARN_ON_ONCE(min_offset > max_offset))
return -EINVAL;
+ /* Check domain to be pinned to against preferred domains */
+ if (bo->preferred_domains & domain)
+ domain = bo->preferred_domains & domain;
+
/* A shared bo cannot be migrated to VRAM */
if (bo->tbo.base.import_attach) {
if (domain & AMDGPU_GEM_DOMAIN_GTT)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
index e9411c28d88b..9f7a5e393f85 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
@@ -37,6 +37,7 @@
#include "psp_v11_0_8.h"
#include "psp_v12_0.h"
#include "psp_v13_0.h"
+#include "psp_v13_0_4.h"
#include "amdgpu_ras.h"
#include "amdgpu_securedisplay.h"
@@ -151,6 +152,10 @@ static int psp_early_init(void *handle)
psp_v13_0_set_psp_funcs(psp);
psp->autoload_supported = true;
break;
+ case IP_VERSION(13, 0, 4):
+ psp_v13_0_4_set_psp_funcs(psp);
+ psp->autoload_supported = true;
+ break;
default:
return -EINVAL;
}
@@ -1292,6 +1297,8 @@ static void psp_xgmi_reflect_topology_info(struct psp_context *psp,
break;
}
+
+ amdgpu_put_xgmi_hive(hive);
}
int psp_xgmi_get_topology_info(struct psp_context *psp,
@@ -2168,6 +2175,21 @@ static int psp_get_fw_type(struct amdgpu_firmware_info *ucode,
case AMDGPU_UCODE_ID_RLC_DRAM:
*type = GFX_FW_TYPE_RLC_DRAM_BOOT;
break;
+ case AMDGPU_UCODE_ID_GLOBAL_TAP_DELAYS:
+ *type = GFX_FW_TYPE_GLOBAL_TAP_DELAYS;
+ break;
+ case AMDGPU_UCODE_ID_SE0_TAP_DELAYS:
+ *type = GFX_FW_TYPE_SE0_TAP_DELAYS;
+ break;
+ case AMDGPU_UCODE_ID_SE1_TAP_DELAYS:
+ *type = GFX_FW_TYPE_SE1_TAP_DELAYS;
+ break;
+ case AMDGPU_UCODE_ID_SE2_TAP_DELAYS:
+ *type = GFX_FW_TYPE_SE2_TAP_DELAYS;
+ break;
+ case AMDGPU_UCODE_ID_SE3_TAP_DELAYS:
+ *type = GFX_FW_TYPE_SE3_TAP_DELAYS;
+ break;
case AMDGPU_UCODE_ID_SMC:
*type = GFX_FW_TYPE_SMU;
break;
@@ -2348,6 +2370,13 @@ static int psp_load_smu_fw(struct psp_context *psp)
&adev->firmware.ucode[AMDGPU_UCODE_ID_SMC];
struct amdgpu_ras *ras = psp->ras_context.ras;
+ /*
+ * Skip SMU FW reloading in case of using BACO for runpm only,
+ * as SMU is always alive.
+ */
+ if (adev->in_runpm && (adev->pm.rpm_mode == AMDGPU_RUNPM_BACO))
+ return 0;
+
if (!ucode->fw || amdgpu_sriov_vf(psp->adev))
return 0;
@@ -2372,7 +2401,7 @@ static int psp_load_smu_fw(struct psp_context *psp)
static bool fw_load_skip_check(struct psp_context *psp,
struct amdgpu_firmware_info *ucode)
{
- if (!ucode->fw)
+ if (!ucode->fw || !ucode->ucode_size)
return true;
if (ucode->ucode_id == AMDGPU_UCODE_ID_SMC &&
@@ -2612,6 +2641,9 @@ static int psp_hw_fini(void *handle)
psp_rap_terminate(psp);
psp_dtm_terminate(psp);
psp_hdcp_terminate(psp);
+
+ if (adev->gmc.xgmi.num_physical_nodes > 1)
+ psp_xgmi_terminate(psp);
}
psp_asd_terminate(psp);
@@ -3670,3 +3702,11 @@ const struct amdgpu_ip_block_version psp_v13_0_ip_block = {
.rev = 0,
.funcs = &psp_ip_funcs,
};
+
+const struct amdgpu_ip_block_version psp_v13_0_4_ip_block = {
+ .type = AMD_IP_BLOCK_TYPE_PSP,
+ .major = 13,
+ .minor = 0,
+ .rev = 4,
+ .funcs = &psp_ip_funcs,
+};
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h
index e431f4994931..c32b74bd970f 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h
@@ -69,8 +69,8 @@ enum psp_bootloader_cmd {
PSP_BL__LOAD_SOSDRV = 0x20000,
PSP_BL__LOAD_KEY_DATABASE = 0x80000,
PSP_BL__LOAD_SOCDRV = 0xB0000,
- PSP_BL__LOAD_INTFDRV = 0xC0000,
- PSP_BL__LOAD_DBGDRV = 0xD0000,
+ PSP_BL__LOAD_DBGDRV = 0xC0000,
+ PSP_BL__LOAD_INTFDRV = 0xD0000,
PSP_BL__DRAM_LONG_TRAIN = 0x100000,
PSP_BL__DRAM_SHORT_TRAIN = 0x200000,
PSP_BL__LOAD_TOS_SPL_TABLE = 0x10000000,
@@ -439,6 +439,7 @@ extern const struct amdgpu_ip_block_version psp_v11_0_ip_block;
extern const struct amdgpu_ip_block_version psp_v11_0_8_ip_block;
extern const struct amdgpu_ip_block_version psp_v12_0_ip_block;
extern const struct amdgpu_ip_block_version psp_v13_0_ip_block;
+extern const struct amdgpu_ip_block_version psp_v13_0_4_ip_block;
extern int psp_wait_for(struct psp_context *psp, uint32_t reg_index,
uint32_t field_val, uint32_t mask, bool check_changed);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
index dac202ae864d..ff5361f5c2d4 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
@@ -35,6 +35,8 @@
#include "amdgpu_xgmi.h"
#include "ivsrcid/nbio/irqsrcs_nbif_7_4.h"
#include "atom.h"
+#include "amdgpu_reset.h"
+
#ifdef CONFIG_X86_MCE_AMD
#include <asm/mce.h>
@@ -715,27 +717,30 @@ int amdgpu_ras_feature_enable(struct amdgpu_device *adev,
if (!con)
return -EINVAL;
- info = kzalloc(sizeof(union ta_ras_cmd_input), GFP_KERNEL);
- if (!info)
- return -ENOMEM;
+ if (head->block == AMDGPU_RAS_BLOCK__GFX) {
+ info = kzalloc(sizeof(union ta_ras_cmd_input), GFP_KERNEL);
+ if (!info)
+ return -ENOMEM;
- if (!enable) {
- info->disable_features = (struct ta_ras_disable_features_input) {
- .block_id = amdgpu_ras_block_to_ta(head->block),
- .error_type = amdgpu_ras_error_to_ta(head->type),
- };
- } else {
- info->enable_features = (struct ta_ras_enable_features_input) {
- .block_id = amdgpu_ras_block_to_ta(head->block),
- .error_type = amdgpu_ras_error_to_ta(head->type),
- };
+ if (!enable) {
+ info->disable_features = (struct ta_ras_disable_features_input) {
+ .block_id = amdgpu_ras_block_to_ta(head->block),
+ .error_type = amdgpu_ras_error_to_ta(head->type),
+ };
+ } else {
+ info->enable_features = (struct ta_ras_enable_features_input) {
+ .block_id = amdgpu_ras_block_to_ta(head->block),
+ .error_type = amdgpu_ras_error_to_ta(head->type),
+ };
+ }
}
/* Do not enable if it is not allowed. */
WARN_ON(enable && !amdgpu_ras_is_feature_allowed(adev, head));
/* Only enable ras feature operation handle on host side */
- if (!amdgpu_sriov_vf(adev) &&
+ if (head->block == AMDGPU_RAS_BLOCK__GFX &&
+ !amdgpu_sriov_vf(adev) &&
!amdgpu_ras_intr_triggered()) {
ret = psp_ras_enable_features(&adev->psp, info, enable);
if (ret) {
@@ -751,7 +756,8 @@ int amdgpu_ras_feature_enable(struct amdgpu_device *adev,
__amdgpu_ras_feature_enable(adev, head, enable);
ret = 0;
out:
- kfree(info);
+ if (head->block == AMDGPU_RAS_BLOCK__GFX)
+ kfree(info);
return ret;
}
@@ -1936,8 +1942,16 @@ static void amdgpu_ras_do_recovery(struct work_struct *work)
amdgpu_put_xgmi_hive(hive);
}
- if (amdgpu_device_should_recover_gpu(ras->adev))
- amdgpu_device_gpu_recover(ras->adev, NULL);
+ if (amdgpu_device_should_recover_gpu(ras->adev)) {
+ struct amdgpu_reset_context reset_context;
+ memset(&reset_context, 0, sizeof(reset_context));
+
+ reset_context.method = AMD_RESET_METHOD_NONE;
+ reset_context.reset_req_dev = adev;
+ clear_bit(AMDGPU_NEED_FULL_RESET, &reset_context.flags);
+
+ amdgpu_device_gpu_recover(ras->adev, NULL, &reset_context);
+ }
atomic_set(&ras->in_recovery, 0);
}
@@ -2148,7 +2162,7 @@ int amdgpu_ras_recovery_init(struct amdgpu_device *adev)
bool exc_err_limit = false;
int ret;
- if (!con)
+ if (!con || amdgpu_sriov_vf(adev))
return 0;
/* Allow access to RAS EEPROM via debugfs, when the ASIC
@@ -2946,7 +2960,7 @@ int amdgpu_ras_reset_gpu(struct amdgpu_device *adev)
struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
if (atomic_cmpxchg(&ras->in_recovery, 0, 1) == 0)
- schedule_work(&ras->recovery_work);
+ amdgpu_reset_domain_schedule(ras->adev->reset_domain, &ras->recovery_work);
return 0;
}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h
index b9a6fac2b8b2..bf5a95104ec1 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h
@@ -328,10 +328,16 @@ struct ecc_info_per_ch {
uint16_t ce_count_hi_chip;
uint64_t mca_umc_status;
uint64_t mca_umc_addr;
+ uint64_t mca_ceumc_addr;
};
struct umc_ecc_info {
struct ecc_info_per_ch ecc[MAX_UMC_CHANNEL_NUM];
+
+ /* Determine smu ecctable whether support
+ * record correctable error address
+ */
+ int record_ce_addr_supported;
};
struct amdgpu_ras {
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.c
index c80af0889773..32c86a0b145c 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.c
@@ -132,6 +132,7 @@ struct amdgpu_reset_domain *amdgpu_reset_create_reset_domain(enum amdgpu_reset_d
}
atomic_set(&reset_domain->in_gpu_reset, 0);
+ atomic_set(&reset_domain->reset_res, 0);
init_rwsem(&reset_domain->sem);
return reset_domain;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.h
index 1949dbe28a86..ffda1560c648 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.h
@@ -37,6 +37,7 @@ struct amdgpu_reset_context {
struct amdgpu_device *reset_req_dev;
struct amdgpu_job *job;
struct amdgpu_hive_info *hive;
+ struct list_head *reset_device_list;
unsigned long flags;
};
@@ -82,6 +83,7 @@ struct amdgpu_reset_domain {
enum amdgpu_reset_domain_type type;
struct rw_semaphore sem;
atomic_t in_gpu_reset;
+ atomic_t reset_res;
};
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c
index 13db99d653bd..d3558c34d406 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c
@@ -543,12 +543,12 @@ static void amdgpu_ring_to_mqd_prop(struct amdgpu_ring *ring,
*/
prop->hqd_active = ring->funcs->type == AMDGPU_RING_TYPE_KIQ;
- if (ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE) {
- if (amdgpu_gfx_is_high_priority_compute_queue(adev, ring)) {
- prop->hqd_pipe_priority = AMDGPU_GFX_PIPE_PRIO_HIGH;
- prop->hqd_queue_priority =
- AMDGPU_GFX_QUEUE_PRIORITY_MAXIMUM;
- }
+ if ((ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE &&
+ amdgpu_gfx_is_high_priority_compute_queue(adev, ring)) ||
+ (ring->funcs->type == AMDGPU_RING_TYPE_GFX &&
+ amdgpu_gfx_is_high_priority_graphics_queue(adev, ring))) {
+ prop->hqd_pipe_priority = AMDGPU_GFX_PIPE_PRIO_HIGH;
+ prop->hqd_queue_priority = AMDGPU_GFX_QUEUE_PRIORITY_MAXIMUM;
}
}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
index 7d89a52091c0..82c178a9033a 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
@@ -143,6 +143,7 @@ signed long amdgpu_fence_wait_polling(struct amdgpu_ring *ring,
uint32_t wait_seq,
signed long timeout);
unsigned amdgpu_fence_count_emitted(struct amdgpu_ring *ring);
+void amdgpu_fence_driver_isr_toggle(struct amdgpu_device *adev, bool stop);
/*
* Rings.
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_rlc.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_rlc.h
index f6fd9e1a7dac..03ac36b2c2cf 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_rlc.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_rlc.h
@@ -222,6 +222,11 @@ struct amdgpu_rlc {
u32 rlc_dram_ucode_size_bytes;
u32 rlcp_ucode_size_bytes;
u32 rlcv_ucode_size_bytes;
+ u32 global_tap_delays_ucode_size_bytes;
+ u32 se0_tap_delays_ucode_size_bytes;
+ u32 se1_tap_delays_ucode_size_bytes;
+ u32 se2_tap_delays_ucode_size_bytes;
+ u32 se3_tap_delays_ucode_size_bytes;
u32 *register_list_format;
u32 *register_restore;
@@ -232,6 +237,11 @@ struct amdgpu_rlc {
u8 *rlc_dram_ucode;
u8 *rlcp_ucode;
u8 *rlcv_ucode;
+ u8 *global_tap_delays_ucode;
+ u8 *se0_tap_delays_ucode;
+ u8 *se1_tap_delays_ucode;
+ u8 *se2_tap_delays_ucode;
+ u8 *se3_tap_delays_ucode;
bool is_rlc_v2_1;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
index 3b4c19412625..134575a3893c 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
@@ -637,6 +637,8 @@ struct amdgpu_ttm_tt {
#endif
};
+#define ttm_to_amdgpu_ttm_tt(ptr) container_of(ptr, struct amdgpu_ttm_tt, ttm)
+
#ifdef CONFIG_DRM_AMDGPU_USERPTR
/*
* amdgpu_ttm_tt_get_user_pages - get device accessible pages that back user
@@ -648,7 +650,7 @@ struct amdgpu_ttm_tt {
int amdgpu_ttm_tt_get_user_pages(struct amdgpu_bo *bo, struct page **pages)
{
struct ttm_tt *ttm = bo->tbo.ttm;
- struct amdgpu_ttm_tt *gtt = (void *)ttm;
+ struct amdgpu_ttm_tt *gtt = ttm_to_amdgpu_ttm_tt(ttm);
unsigned long start = gtt->userptr;
struct vm_area_struct *vma;
struct mm_struct *mm;
@@ -702,7 +704,7 @@ out_unlock:
*/
bool amdgpu_ttm_tt_get_user_pages_done(struct ttm_tt *ttm)
{
- struct amdgpu_ttm_tt *gtt = (void *)ttm;
+ struct amdgpu_ttm_tt *gtt = ttm_to_amdgpu_ttm_tt(ttm);
bool r = false;
if (!gtt || !gtt->userptr)
@@ -751,7 +753,7 @@ static int amdgpu_ttm_tt_pin_userptr(struct ttm_device *bdev,
struct ttm_tt *ttm)
{
struct amdgpu_device *adev = amdgpu_ttm_adev(bdev);
- struct amdgpu_ttm_tt *gtt = (void *)ttm;
+ struct amdgpu_ttm_tt *gtt = ttm_to_amdgpu_ttm_tt(ttm);
int write = !(gtt->userflags & AMDGPU_GEM_USERPTR_READONLY);
enum dma_data_direction direction = write ?
DMA_BIDIRECTIONAL : DMA_TO_DEVICE;
@@ -788,7 +790,7 @@ static void amdgpu_ttm_tt_unpin_userptr(struct ttm_device *bdev,
struct ttm_tt *ttm)
{
struct amdgpu_device *adev = amdgpu_ttm_adev(bdev);
- struct amdgpu_ttm_tt *gtt = (void *)ttm;
+ struct amdgpu_ttm_tt *gtt = ttm_to_amdgpu_ttm_tt(ttm);
int write = !(gtt->userflags & AMDGPU_GEM_USERPTR_READONLY);
enum dma_data_direction direction = write ?
DMA_BIDIRECTIONAL : DMA_TO_DEVICE;
@@ -822,7 +824,7 @@ static void amdgpu_ttm_gart_bind(struct amdgpu_device *adev,
{
struct amdgpu_bo *abo = ttm_to_amdgpu_bo(tbo);
struct ttm_tt *ttm = tbo->ttm;
- struct amdgpu_ttm_tt *gtt = (void *)ttm;
+ struct amdgpu_ttm_tt *gtt = ttm_to_amdgpu_ttm_tt(ttm);
if (amdgpu_bo_encrypted(abo))
flags |= AMDGPU_PTE_TMZ;
@@ -860,7 +862,7 @@ static int amdgpu_ttm_backend_bind(struct ttm_device *bdev,
struct ttm_resource *bo_mem)
{
struct amdgpu_device *adev = amdgpu_ttm_adev(bdev);
- struct amdgpu_ttm_tt *gtt = (void*)ttm;
+ struct amdgpu_ttm_tt *gtt = ttm_to_amdgpu_ttm_tt(ttm);
uint64_t flags;
int r;
@@ -927,7 +929,7 @@ int amdgpu_ttm_alloc_gart(struct ttm_buffer_object *bo)
{
struct amdgpu_device *adev = amdgpu_ttm_adev(bo->bdev);
struct ttm_operation_ctx ctx = { false, false };
- struct amdgpu_ttm_tt *gtt = (void *)bo->ttm;
+ struct amdgpu_ttm_tt *gtt = ttm_to_amdgpu_ttm_tt(bo->ttm);
struct ttm_placement placement;
struct ttm_place placements;
struct ttm_resource *tmp;
@@ -998,7 +1000,7 @@ static void amdgpu_ttm_backend_unbind(struct ttm_device *bdev,
struct ttm_tt *ttm)
{
struct amdgpu_device *adev = amdgpu_ttm_adev(bdev);
- struct amdgpu_ttm_tt *gtt = (void *)ttm;
+ struct amdgpu_ttm_tt *gtt = ttm_to_amdgpu_ttm_tt(ttm);
/* if the pages have userptr pinning then clear that first */
if (gtt->userptr) {
@@ -1025,7 +1027,7 @@ static void amdgpu_ttm_backend_unbind(struct ttm_device *bdev,
static void amdgpu_ttm_backend_destroy(struct ttm_device *bdev,
struct ttm_tt *ttm)
{
- struct amdgpu_ttm_tt *gtt = (void *)ttm;
+ struct amdgpu_ttm_tt *gtt = ttm_to_amdgpu_ttm_tt(ttm);
if (gtt->usertask)
put_task_struct(gtt->usertask);
@@ -1079,7 +1081,7 @@ static int amdgpu_ttm_tt_populate(struct ttm_device *bdev,
struct ttm_operation_ctx *ctx)
{
struct amdgpu_device *adev = amdgpu_ttm_adev(bdev);
- struct amdgpu_ttm_tt *gtt = (void *)ttm;
+ struct amdgpu_ttm_tt *gtt = ttm_to_amdgpu_ttm_tt(ttm);
pgoff_t i;
int ret;
@@ -1113,7 +1115,7 @@ static int amdgpu_ttm_tt_populate(struct ttm_device *bdev,
static void amdgpu_ttm_tt_unpopulate(struct ttm_device *bdev,
struct ttm_tt *ttm)
{
- struct amdgpu_ttm_tt *gtt = (void *)ttm;
+ struct amdgpu_ttm_tt *gtt = ttm_to_amdgpu_ttm_tt(ttm);
struct amdgpu_device *adev;
pgoff_t i;
@@ -1182,7 +1184,7 @@ int amdgpu_ttm_tt_set_userptr(struct ttm_buffer_object *bo,
/* Set TTM_TT_FLAG_EXTERNAL before populate but after create. */
bo->ttm->page_flags |= TTM_TT_FLAG_EXTERNAL;
- gtt = (void *)bo->ttm;
+ gtt = ttm_to_amdgpu_ttm_tt(bo->ttm);
gtt->userptr = addr;
gtt->userflags = flags;
@@ -1199,7 +1201,7 @@ int amdgpu_ttm_tt_set_userptr(struct ttm_buffer_object *bo,
*/
struct mm_struct *amdgpu_ttm_tt_get_usermm(struct ttm_tt *ttm)
{
- struct amdgpu_ttm_tt *gtt = (void *)ttm;
+ struct amdgpu_ttm_tt *gtt = ttm_to_amdgpu_ttm_tt(ttm);
if (gtt == NULL)
return NULL;
@@ -1218,7 +1220,7 @@ struct mm_struct *amdgpu_ttm_tt_get_usermm(struct ttm_tt *ttm)
bool amdgpu_ttm_tt_affect_userptr(struct ttm_tt *ttm, unsigned long start,
unsigned long end, unsigned long *userptr)
{
- struct amdgpu_ttm_tt *gtt = (void *)ttm;
+ struct amdgpu_ttm_tt *gtt = ttm_to_amdgpu_ttm_tt(ttm);
unsigned long size;
if (gtt == NULL || !gtt->userptr)
@@ -1241,7 +1243,7 @@ bool amdgpu_ttm_tt_affect_userptr(struct ttm_tt *ttm, unsigned long start,
*/
bool amdgpu_ttm_tt_is_userptr(struct ttm_tt *ttm)
{
- struct amdgpu_ttm_tt *gtt = (void *)ttm;
+ struct amdgpu_ttm_tt *gtt = ttm_to_amdgpu_ttm_tt(ttm);
if (gtt == NULL || !gtt->userptr)
return false;
@@ -1254,7 +1256,7 @@ bool amdgpu_ttm_tt_is_userptr(struct ttm_tt *ttm)
*/
bool amdgpu_ttm_tt_is_readonly(struct ttm_tt *ttm)
{
- struct amdgpu_ttm_tt *gtt = (void *)ttm;
+ struct amdgpu_ttm_tt *gtt = ttm_to_amdgpu_ttm_tt(ttm);
if (gtt == NULL)
return false;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c
index ffa4c0d207db..939c8614f0e3 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c
@@ -486,26 +486,6 @@ amdgpu_ucode_get_load_type(struct amdgpu_device *adev, int load_type)
case CHIP_POLARIS12:
case CHIP_VEGAM:
return AMDGPU_FW_LOAD_SMU;
- case CHIP_VEGA10:
- case CHIP_RAVEN:
- case CHIP_VEGA12:
- case CHIP_VEGA20:
- case CHIP_ARCTURUS:
- case CHIP_RENOIR:
- case CHIP_NAVI10:
- case CHIP_NAVI14:
- case CHIP_NAVI12:
- case CHIP_SIENNA_CICHLID:
- case CHIP_NAVY_FLOUNDER:
- case CHIP_VANGOGH:
- case CHIP_DIMGREY_CAVEFISH:
- case CHIP_ALDEBARAN:
- case CHIP_BEIGE_GOBY:
- case CHIP_YELLOW_CARP:
- if (!load_type)
- return AMDGPU_FW_LOAD_DIRECT;
- else
- return AMDGPU_FW_LOAD_PSP;
case CHIP_CYAN_SKILLFISH:
if (!(load_type &&
adev->apu_flags & AMD_APU_IS_CYAN_SKILLFISH2))
@@ -581,6 +561,16 @@ const char *amdgpu_ucode_name(enum AMDGPU_UCODE_ID ucode_id)
return "RLC_P";
case AMDGPU_UCODE_ID_RLC_V:
return "RLC_V";
+ case AMDGPU_UCODE_ID_GLOBAL_TAP_DELAYS:
+ return "GLOBAL_TAP_DELAYS";
+ case AMDGPU_UCODE_ID_SE0_TAP_DELAYS:
+ return "SE0_TAP_DELAYS";
+ case AMDGPU_UCODE_ID_SE1_TAP_DELAYS:
+ return "SE1_TAP_DELAYS";
+ case AMDGPU_UCODE_ID_SE2_TAP_DELAYS:
+ return "SE2_TAP_DELAYS";
+ case AMDGPU_UCODE_ID_SE3_TAP_DELAYS:
+ return "SE3_TAP_DELAYS";
case AMDGPU_UCODE_ID_IMU_I:
return "IMU_I";
case AMDGPU_UCODE_ID_IMU_D:
@@ -765,6 +755,26 @@ static int amdgpu_ucode_init_single_fw(struct amdgpu_device *adev,
ucode->ucode_size = adev->gfx.rlc.rlcv_ucode_size_bytes;
ucode_addr = adev->gfx.rlc.rlcv_ucode;
break;
+ case AMDGPU_UCODE_ID_GLOBAL_TAP_DELAYS:
+ ucode->ucode_size = adev->gfx.rlc.global_tap_delays_ucode_size_bytes;
+ ucode_addr = adev->gfx.rlc.global_tap_delays_ucode;
+ break;
+ case AMDGPU_UCODE_ID_SE0_TAP_DELAYS:
+ ucode->ucode_size = adev->gfx.rlc.se0_tap_delays_ucode_size_bytes;
+ ucode_addr = adev->gfx.rlc.se0_tap_delays_ucode;
+ break;
+ case AMDGPU_UCODE_ID_SE1_TAP_DELAYS:
+ ucode->ucode_size = adev->gfx.rlc.se1_tap_delays_ucode_size_bytes;
+ ucode_addr = adev->gfx.rlc.se1_tap_delays_ucode;
+ break;
+ case AMDGPU_UCODE_ID_SE2_TAP_DELAYS:
+ ucode->ucode_size = adev->gfx.rlc.se2_tap_delays_ucode_size_bytes;
+ ucode_addr = adev->gfx.rlc.se2_tap_delays_ucode;
+ break;
+ case AMDGPU_UCODE_ID_SE3_TAP_DELAYS:
+ ucode->ucode_size = adev->gfx.rlc.se3_tap_delays_ucode_size_bytes;
+ ucode_addr = adev->gfx.rlc.se3_tap_delays_ucode;
+ break;
case AMDGPU_UCODE_ID_CP_MES:
ucode->ucode_size = le32_to_cpu(mes_hdr->mes_ucode_size_bytes);
ucode_addr = (u8 *)ucode->fw->data +
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h
index f510b6aa82ab..ebed3f5226db 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h
@@ -266,6 +266,21 @@ struct rlc_firmware_header_v2_3 {
uint32_t rlcv_ucode_offset_bytes;
};
+/* version_major=2, version_minor=4 */
+struct rlc_firmware_header_v2_4 {
+ struct rlc_firmware_header_v2_3 v2_3;
+ uint32_t global_tap_delays_ucode_size_bytes;
+ uint32_t global_tap_delays_ucode_offset_bytes;
+ uint32_t se0_tap_delays_ucode_size_bytes;
+ uint32_t se0_tap_delays_ucode_offset_bytes;
+ uint32_t se1_tap_delays_ucode_size_bytes;
+ uint32_t se1_tap_delays_ucode_offset_bytes;
+ uint32_t se2_tap_delays_ucode_size_bytes;
+ uint32_t se2_tap_delays_ucode_offset_bytes;
+ uint32_t se3_tap_delays_ucode_size_bytes;
+ uint32_t se3_tap_delays_ucode_offset_bytes;
+};
+
/* version_major=1, version_minor=0 */
struct sdma_firmware_header_v1_0 {
struct common_firmware_header header;
@@ -426,6 +441,11 @@ enum AMDGPU_UCODE_ID {
AMDGPU_UCODE_ID_CP_MES1_DATA,
AMDGPU_UCODE_ID_IMU_I,
AMDGPU_UCODE_ID_IMU_D,
+ AMDGPU_UCODE_ID_GLOBAL_TAP_DELAYS,
+ AMDGPU_UCODE_ID_SE0_TAP_DELAYS,
+ AMDGPU_UCODE_ID_SE1_TAP_DELAYS,
+ AMDGPU_UCODE_ID_SE2_TAP_DELAYS,
+ AMDGPU_UCODE_ID_SE3_TAP_DELAYS,
AMDGPU_UCODE_ID_RLC_RESTORE_LIST_CNTL,
AMDGPU_UCODE_ID_RLC_RESTORE_LIST_GPM_MEM,
AMDGPU_UCODE_ID_RLC_RESTORE_LIST_SRM_MEM,
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h
index 2ec6698aa1fe..3629d8f292ef 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h
@@ -41,6 +41,12 @@
#define LOOP_UMC_CH_INST(ch_inst) for ((ch_inst) = 0; (ch_inst) < adev->umc.channel_inst_num; (ch_inst)++)
#define LOOP_UMC_INST_AND_CH(umc_inst, ch_inst) LOOP_UMC_INST((umc_inst)) LOOP_UMC_CH_INST((ch_inst))
+#define LOOP_UMC_NODE_INST(node_inst) \
+ for ((node_inst) = 0; (node_inst) < adev->umc.node_inst_num; (node_inst)++)
+
+#define LOOP_UMC_EACH_NODE_INST_AND_CH(node_inst, umc_inst, ch_inst) \
+ LOOP_UMC_NODE_INST((node_inst)) LOOP_UMC_INST_AND_CH((umc_inst), (ch_inst))
+
struct amdgpu_umc_ras {
struct amdgpu_ras_block_object ras_block;
void (*err_cnt_init)(struct amdgpu_device *adev);
@@ -62,6 +68,10 @@ struct amdgpu_umc {
uint32_t channel_inst_num;
/* number of umc instance with memory map register access */
uint32_t umc_inst_num;
+
+ /*number of umc node instance with memory map register access*/
+ uint32_t node_inst_num;
+
/* UMC regiser per channel offset */
uint32_t channel_offs;
/* channel index table of interleaved memory */
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c
index aa7acfabf360..f36e4f08db6d 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c
@@ -54,6 +54,7 @@
#define FIRMWARE_YELLOW_CARP "amdgpu/yellow_carp_vcn.bin"
#define FIRMWARE_VCN_3_1_2 "amdgpu/vcn_3_1_2.bin"
#define FIRMWARE_VCN4_0_0 "amdgpu/vcn_4_0_0.bin"
+#define FIRMWARE_VCN4_0_2 "amdgpu/vcn_4_0_2.bin"
#define FIRMWARE_VCN4_0_4 "amdgpu/vcn_4_0_4.bin"
MODULE_FIRMWARE(FIRMWARE_RAVEN);
@@ -74,6 +75,7 @@ MODULE_FIRMWARE(FIRMWARE_BEIGE_GOBY);
MODULE_FIRMWARE(FIRMWARE_YELLOW_CARP);
MODULE_FIRMWARE(FIRMWARE_VCN_3_1_2);
MODULE_FIRMWARE(FIRMWARE_VCN4_0_0);
+MODULE_FIRMWARE(FIRMWARE_VCN4_0_2);
MODULE_FIRMWARE(FIRMWARE_VCN4_0_4);
static void amdgpu_vcn_idle_work_handler(struct work_struct *work);
@@ -185,6 +187,12 @@ int amdgpu_vcn_sw_init(struct amdgpu_device *adev)
(adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG))
adev->vcn.indirect_sram = true;
break;
+ case IP_VERSION(4, 0, 2):
+ fw_name = FIRMWARE_VCN4_0_2;
+ if ((adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) &&
+ (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG))
+ adev->vcn.indirect_sram = false;
+ break;
case IP_VERSION(4, 0, 4):
fw_name = FIRMWARE_VCN4_0_4;
if ((adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) &&
@@ -329,6 +337,18 @@ int amdgpu_vcn_sw_fini(struct amdgpu_device *adev)
return 0;
}
+/* from vcn4 and above, only unified queue is used */
+static bool amdgpu_vcn_using_unified_queue(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+ bool ret = false;
+
+ if (adev->ip_versions[UVD_HWIP][0] >= IP_VERSION(4, 0, 0))
+ ret = true;
+
+ return ret;
+}
+
bool amdgpu_vcn_is_disabled_vcn(struct amdgpu_device *adev, enum vcn_ring_type type, uint32_t vcn_instance)
{
bool ret = false;
@@ -718,19 +738,55 @@ error:
return r;
}
+static uint32_t *amdgpu_vcn_unified_ring_ib_header(struct amdgpu_ib *ib,
+ uint32_t ib_pack_in_dw, bool enc)
+{
+ uint32_t *ib_checksum;
+
+ ib->ptr[ib->length_dw++] = 0x00000010; /* single queue checksum */
+ ib->ptr[ib->length_dw++] = 0x30000002;
+ ib_checksum = &ib->ptr[ib->length_dw++];
+ ib->ptr[ib->length_dw++] = ib_pack_in_dw;
+
+ ib->ptr[ib->length_dw++] = 0x00000010; /* engine info */
+ ib->ptr[ib->length_dw++] = 0x30000001;
+ ib->ptr[ib->length_dw++] = enc ? 0x2 : 0x3;
+ ib->ptr[ib->length_dw++] = ib_pack_in_dw * sizeof(uint32_t);
+
+ return ib_checksum;
+}
+
+static void amdgpu_vcn_unified_ring_ib_checksum(uint32_t **ib_checksum,
+ uint32_t ib_pack_in_dw)
+{
+ uint32_t i;
+ uint32_t checksum = 0;
+
+ for (i = 0; i < ib_pack_in_dw; i++)
+ checksum += *(*ib_checksum + 2 + i);
+
+ **ib_checksum = checksum;
+}
+
static int amdgpu_vcn_dec_sw_send_msg(struct amdgpu_ring *ring,
struct amdgpu_ib *ib_msg,
struct dma_fence **fence)
{
struct amdgpu_vcn_decode_buffer *decode_buffer = NULL;
- const unsigned int ib_size_dw = 64;
+ unsigned int ib_size_dw = 64;
struct amdgpu_device *adev = ring->adev;
struct dma_fence *f = NULL;
struct amdgpu_job *job;
struct amdgpu_ib *ib;
uint64_t addr = AMDGPU_GPU_PAGE_ALIGN(ib_msg->gpu_addr);
+ bool sq = amdgpu_vcn_using_unified_queue(ring);
+ uint32_t *ib_checksum;
+ uint32_t ib_pack_in_dw;
int i, r;
+ if (sq)
+ ib_size_dw += 8;
+
r = amdgpu_job_alloc_with_ib(adev, ib_size_dw * 4,
AMDGPU_IB_POOL_DIRECT, &job);
if (r)
@@ -739,6 +795,13 @@ static int amdgpu_vcn_dec_sw_send_msg(struct amdgpu_ring *ring,
ib = &job->ibs[0];
ib->length_dw = 0;
+ /* single queue headers */
+ if (sq) {
+ ib_pack_in_dw = sizeof(struct amdgpu_vcn_decode_buffer) / sizeof(uint32_t)
+ + 4 + 2; /* engine info + decoding ib in dw */
+ ib_checksum = amdgpu_vcn_unified_ring_ib_header(ib, ib_pack_in_dw, false);
+ }
+
ib->ptr[ib->length_dw++] = sizeof(struct amdgpu_vcn_decode_buffer) + 8;
ib->ptr[ib->length_dw++] = cpu_to_le32(AMDGPU_VCN_IB_FLAG_DECODE_BUFFER);
decode_buffer = (struct amdgpu_vcn_decode_buffer *)&(ib->ptr[ib->length_dw]);
@@ -752,6 +815,9 @@ static int amdgpu_vcn_dec_sw_send_msg(struct amdgpu_ring *ring,
for (i = ib->length_dw; i < ib_size_dw; ++i)
ib->ptr[i] = 0x0;
+ if (sq)
+ amdgpu_vcn_unified_ring_ib_checksum(&ib_checksum, ib_pack_in_dw);
+
r = amdgpu_job_submit_direct(job, ring, &f);
if (r)
goto err_free;
@@ -838,13 +904,18 @@ static int amdgpu_vcn_enc_get_create_msg(struct amdgpu_ring *ring, uint32_t hand
struct amdgpu_ib *ib_msg,
struct dma_fence **fence)
{
- const unsigned ib_size_dw = 16;
+ unsigned int ib_size_dw = 16;
struct amdgpu_job *job;
struct amdgpu_ib *ib;
struct dma_fence *f = NULL;
+ uint32_t *ib_checksum = NULL;
uint64_t addr;
+ bool sq = amdgpu_vcn_using_unified_queue(ring);
int i, r;
+ if (sq)
+ ib_size_dw += 8;
+
r = amdgpu_job_alloc_with_ib(ring->adev, ib_size_dw * 4,
AMDGPU_IB_POOL_DIRECT, &job);
if (r)
@@ -854,6 +925,10 @@ static int amdgpu_vcn_enc_get_create_msg(struct amdgpu_ring *ring, uint32_t hand
addr = AMDGPU_GPU_PAGE_ALIGN(ib_msg->gpu_addr);
ib->length_dw = 0;
+
+ if (sq)
+ ib_checksum = amdgpu_vcn_unified_ring_ib_header(ib, 0x11, true);
+
ib->ptr[ib->length_dw++] = 0x00000018;
ib->ptr[ib->length_dw++] = 0x00000001; /* session info */
ib->ptr[ib->length_dw++] = handle;
@@ -873,6 +948,9 @@ static int amdgpu_vcn_enc_get_create_msg(struct amdgpu_ring *ring, uint32_t hand
for (i = ib->length_dw; i < ib_size_dw; ++i)
ib->ptr[i] = 0x0;
+ if (sq)
+ amdgpu_vcn_unified_ring_ib_checksum(&ib_checksum, 0x11);
+
r = amdgpu_job_submit_direct(job, ring, &f);
if (r)
goto err;
@@ -892,13 +970,18 @@ static int amdgpu_vcn_enc_get_destroy_msg(struct amdgpu_ring *ring, uint32_t han
struct amdgpu_ib *ib_msg,
struct dma_fence **fence)
{
- const unsigned ib_size_dw = 16;
+ unsigned int ib_size_dw = 16;
struct amdgpu_job *job;
struct amdgpu_ib *ib;
struct dma_fence *f = NULL;
+ uint32_t *ib_checksum = NULL;
uint64_t addr;
+ bool sq = amdgpu_vcn_using_unified_queue(ring);
int i, r;
+ if (sq)
+ ib_size_dw += 8;
+
r = amdgpu_job_alloc_with_ib(ring->adev, ib_size_dw * 4,
AMDGPU_IB_POOL_DIRECT, &job);
if (r)
@@ -908,6 +991,10 @@ static int amdgpu_vcn_enc_get_destroy_msg(struct amdgpu_ring *ring, uint32_t han
addr = AMDGPU_GPU_PAGE_ALIGN(ib_msg->gpu_addr);
ib->length_dw = 0;
+
+ if (sq)
+ ib_checksum = amdgpu_vcn_unified_ring_ib_header(ib, 0x11, true);
+
ib->ptr[ib->length_dw++] = 0x00000018;
ib->ptr[ib->length_dw++] = 0x00000001;
ib->ptr[ib->length_dw++] = handle;
@@ -927,6 +1014,9 @@ static int amdgpu_vcn_enc_get_destroy_msg(struct amdgpu_ring *ring, uint32_t han
for (i = ib->length_dw; i < ib_size_dw; ++i)
ib->ptr[i] = 0x0;
+ if (sq)
+ amdgpu_vcn_unified_ring_ib_checksum(&ib_checksum, 0x11);
+
r = amdgpu_job_submit_direct(job, ring, &f);
if (r)
goto err;
@@ -977,6 +1067,20 @@ error:
return r;
}
+int amdgpu_vcn_unified_ring_test_ib(struct amdgpu_ring *ring, long timeout)
+{
+ long r;
+
+ r = amdgpu_vcn_enc_ring_test_ib(ring, timeout);
+ if (r)
+ goto error;
+
+ r = amdgpu_vcn_dec_sw_ring_test_ib(ring, timeout);
+
+error:
+ return r;
+}
+
enum amdgpu_ring_priority_level amdgpu_vcn_get_enc_ring_prio(int ring)
{
switch(ring) {
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h
index 6f90fcee0f9c..60c608144480 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h
@@ -364,6 +364,7 @@ int amdgpu_vcn_dec_ring_test_ring(struct amdgpu_ring *ring);
int amdgpu_vcn_dec_ring_test_ib(struct amdgpu_ring *ring, long timeout);
int amdgpu_vcn_dec_sw_ring_test_ring(struct amdgpu_ring *ring);
int amdgpu_vcn_dec_sw_ring_test_ib(struct amdgpu_ring *ring, long timeout);
+int amdgpu_vcn_unified_ring_test_ib(struct amdgpu_ring *ring, long timeout);
int amdgpu_vcn_enc_ring_test_ring(struct amdgpu_ring *ring);
int amdgpu_vcn_enc_ring_test_ib(struct amdgpu_ring *ring, long timeout);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
index a8ecf04389b3..9be57389301b 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
@@ -76,6 +76,12 @@ void amdgpu_virt_kiq_reg_write_reg_wait(struct amdgpu_device *adev,
unsigned long flags;
uint32_t seq;
+ if (adev->mes.ring.sched.ready) {
+ amdgpu_mes_reg_write_reg_wait(adev, reg0, reg1,
+ ref, mask);
+ return;
+ }
+
spin_lock_irqsave(&kiq->ring_lock, flags);
amdgpu_ring_alloc(ring, 32);
amdgpu_ring_emit_reg_write_reg_wait(ring, reg0, reg1,
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index dc76d2b3ce52..59cac347baa3 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -54,7 +54,7 @@
* (uncached system pages).
* Each VM has an ID associated with it and there is a page table
* associated with each VMID. When executing a command buffer,
- * the kernel tells the the ring what VMID to use for that command
+ * the kernel tells the ring what VMID to use for that command
* buffer. VMIDs are allocated dynamically as commands are submitted.
* The userspace drivers maintain their own address space and the kernel
* sets up their pages tables accordingly when they submit their
@@ -2168,6 +2168,14 @@ int amdgpu_vm_make_compute(struct amdgpu_device *adev, struct amdgpu_vm *vm)
} else {
vm->update_funcs = &amdgpu_vm_sdma_funcs;
}
+ /*
+ * Make sure root PD gets mapped. As vm_update_mode could be changed
+ * when turning a GFX VM into a compute VM.
+ */
+ r = vm->update_funcs->map_table(to_amdgpu_bo_vm(vm->root.bo));
+ if (r)
+ goto unreserve_bo;
+
dma_fence_put(vm->last_update);
vm->last_update = NULL;
vm->is_compute_context = true;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
index 49e4092f447f..28ec5f8ac1c1 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
@@ -50,6 +50,35 @@ to_amdgpu_device(struct amdgpu_vram_mgr *mgr)
return container_of(mgr, struct amdgpu_device, mman.vram_mgr);
}
+static inline struct drm_buddy_block *
+amdgpu_vram_mgr_first_block(struct list_head *list)
+{
+ return list_first_entry_or_null(list, struct drm_buddy_block, link);
+}
+
+static inline bool amdgpu_is_vram_mgr_blocks_contiguous(struct list_head *head)
+{
+ struct drm_buddy_block *block;
+ u64 start, size;
+
+ block = amdgpu_vram_mgr_first_block(head);
+ if (!block)
+ return false;
+
+ while (head != block->link.next) {
+ start = amdgpu_vram_mgr_block_start(block);
+ size = amdgpu_vram_mgr_block_size(block);
+
+ block = list_entry(block->link.next, struct drm_buddy_block, link);
+ if (start + size != amdgpu_vram_mgr_block_start(block))
+ return false;
+ }
+
+ return true;
+}
+
+
+
/**
* DOC: mem_info_vram_total
*
@@ -366,11 +395,11 @@ static int amdgpu_vram_mgr_new(struct ttm_resource_manager *man,
unsigned long pages_per_block;
int r;
- lpfn = place->lpfn << PAGE_SHIFT;
+ lpfn = (u64)place->lpfn << PAGE_SHIFT;
if (!lpfn)
lpfn = man->size;
- fpfn = place->fpfn << PAGE_SHIFT;
+ fpfn = (u64)place->fpfn << PAGE_SHIFT;
max_bytes = adev->gmc.mc_vram_size;
if (tbo->type != ttm_bo_type_kernel)
@@ -410,12 +439,12 @@ static int amdgpu_vram_mgr_new(struct ttm_resource_manager *man,
/* Allocate blocks in desired range */
vres->flags |= DRM_BUDDY_RANGE_ALLOCATION;
- remaining_size = vres->base.num_pages << PAGE_SHIFT;
+ remaining_size = (u64)vres->base.num_pages << PAGE_SHIFT;
mutex_lock(&mgr->lock);
while (remaining_size) {
if (tbo->page_alignment)
- min_block_size = tbo->page_alignment << PAGE_SHIFT;
+ min_block_size = (u64)tbo->page_alignment << PAGE_SHIFT;
else
min_block_size = mgr->default_page_size;
@@ -424,12 +453,12 @@ static int amdgpu_vram_mgr_new(struct ttm_resource_manager *man,
/* Limit maximum size to 2GiB due to SG table limitations */
size = min(remaining_size, 2ULL << 30);
- if (size >= pages_per_block << PAGE_SHIFT)
- min_block_size = pages_per_block << PAGE_SHIFT;
+ if (size >= (u64)pages_per_block << PAGE_SHIFT)
+ min_block_size = (u64)pages_per_block << PAGE_SHIFT;
cur_size = size;
- if (fpfn + size != place->lpfn << PAGE_SHIFT) {
+ if (fpfn + size != (u64)place->lpfn << PAGE_SHIFT) {
/*
* Except for actual range allocation, modify the size and
* min_block_size conforming to continuous flag enablement
@@ -469,7 +498,7 @@ static int amdgpu_vram_mgr_new(struct ttm_resource_manager *man,
LIST_HEAD(temp);
trim_list = &vres->blocks;
- original_size = vres->base.num_pages << PAGE_SHIFT;
+ original_size = (u64)vres->base.num_pages << PAGE_SHIFT;
/*
* If size value is rounded up to min_block_size, trim the last
@@ -496,16 +525,22 @@ static int amdgpu_vram_mgr_new(struct ttm_resource_manager *man,
list_splice_tail(trim_list, &vres->blocks);
}
- list_for_each_entry(block, &vres->blocks, link)
- vis_usage += amdgpu_vram_mgr_vis_size(adev, block);
+ vres->base.start = 0;
+ list_for_each_entry(block, &vres->blocks, link) {
+ unsigned long start;
- block = amdgpu_vram_mgr_first_block(&vres->blocks);
- if (!block) {
- r = -EINVAL;
- goto error_fini;
- }
+ start = amdgpu_vram_mgr_block_start(block) +
+ amdgpu_vram_mgr_block_size(block);
+ start >>= PAGE_SHIFT;
+
+ if (start > vres->base.num_pages)
+ start -= vres->base.num_pages;
+ else
+ start = 0;
+ vres->base.start = max(vres->base.start, start);
- vres->base.start = amdgpu_vram_mgr_block_start(block) >> PAGE_SHIFT;
+ vis_usage += amdgpu_vram_mgr_vis_size(adev, block);
+ }
if (amdgpu_is_vram_mgr_blocks_contiguous(&vres->blocks))
vres->base.placement |= TTM_PL_FLAG_CONTIGUOUS;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.h
index 9a2db87186c7..0e04e42cf809 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.h
@@ -50,34 +50,7 @@ static inline u64 amdgpu_vram_mgr_block_start(struct drm_buddy_block *block)
static inline u64 amdgpu_vram_mgr_block_size(struct drm_buddy_block *block)
{
- return PAGE_SIZE << drm_buddy_block_order(block);
-}
-
-static inline struct drm_buddy_block *
-amdgpu_vram_mgr_first_block(struct list_head *list)
-{
- return list_first_entry_or_null(list, struct drm_buddy_block, link);
-}
-
-static inline bool amdgpu_is_vram_mgr_blocks_contiguous(struct list_head *head)
-{
- struct drm_buddy_block *block;
- u64 start, size;
-
- block = amdgpu_vram_mgr_first_block(head);
- if (!block)
- return false;
-
- while (head != block->link.next) {
- start = amdgpu_vram_mgr_block_start(block);
- size = amdgpu_vram_mgr_block_size(block);
-
- block = list_entry(block->link.next, struct drm_buddy_block, link);
- if (start + size != amdgpu_vram_mgr_block_start(block))
- return false;
- }
-
- return true;
+ return (u64)PAGE_SIZE << drm_buddy_block_order(block);
}
static inline struct amdgpu_vram_mgr_resource *
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c
index 1b108d03e785..f2aebbf3fbe3 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c
@@ -742,7 +742,7 @@ int amdgpu_xgmi_remove_device(struct amdgpu_device *adev)
amdgpu_put_xgmi_hive(hive);
}
- return psp_xgmi_terminate(&adev->psp);
+ return 0;
}
static int amdgpu_xgmi_ras_late_init(struct amdgpu_device *adev, struct ras_common_if *ras_block)
diff --git a/drivers/gpu/drm/amd/amdgpu/athub_v3_0.c b/drivers/gpu/drm/amd/amdgpu/athub_v3_0.c
index 33a8a7365aef..f0e235f98afb 100644
--- a/drivers/gpu/drm/amd/amdgpu/athub_v3_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/athub_v3_0.c
@@ -28,13 +28,44 @@
#include "navi10_enum.h"
#include "soc15_common.h"
+#define regATHUB_MISC_CNTL_V3_0_1 0x00d7
+#define regATHUB_MISC_CNTL_V3_0_1_BASE_IDX 0
+
+
+static uint32_t athub_v3_0_get_cg_cntl(struct amdgpu_device *adev)
+{
+ uint32_t data;
+
+ switch (adev->ip_versions[ATHUB_HWIP][0]) {
+ case IP_VERSION(3, 0, 1):
+ data = RREG32_SOC15(ATHUB, 0, regATHUB_MISC_CNTL_V3_0_1);
+ break;
+ default:
+ data = RREG32_SOC15(ATHUB, 0, regATHUB_MISC_CNTL);
+ break;
+ }
+ return data;
+}
+
+static void athub_v3_0_set_cg_cntl(struct amdgpu_device *adev, uint32_t data)
+{
+ switch (adev->ip_versions[ATHUB_HWIP][0]) {
+ case IP_VERSION(3, 0, 1):
+ WREG32_SOC15(ATHUB, 0, regATHUB_MISC_CNTL_V3_0_1, data);
+ break;
+ default:
+ WREG32_SOC15(ATHUB, 0, regATHUB_MISC_CNTL, data);
+ break;
+ }
+}
+
static void
athub_v3_0_update_medium_grain_clock_gating(struct amdgpu_device *adev,
bool enable)
{
uint32_t def, data;
- def = data = RREG32_SOC15(ATHUB, 0, regATHUB_MISC_CNTL);
+ def = data = athub_v3_0_get_cg_cntl(adev);
if (enable && (adev->cg_flags & AMD_CG_SUPPORT_ATHUB_MGCG))
data |= ATHUB_MISC_CNTL__CG_ENABLE_MASK;
@@ -42,7 +73,7 @@ athub_v3_0_update_medium_grain_clock_gating(struct amdgpu_device *adev,
data &= ~ATHUB_MISC_CNTL__CG_ENABLE_MASK;
if (def != data)
- WREG32_SOC15(ATHUB, 0, regATHUB_MISC_CNTL, data);
+ athub_v3_0_set_cg_cntl(adev, data);
}
static void
@@ -51,7 +82,7 @@ athub_v3_0_update_medium_grain_light_sleep(struct amdgpu_device *adev,
{
uint32_t def, data;
- def = data = RREG32_SOC15(ATHUB, 0, regATHUB_MISC_CNTL);
+ def = data = athub_v3_0_get_cg_cntl(adev);
if (enable && (adev->cg_flags & AMD_CG_SUPPORT_ATHUB_LS))
data |= ATHUB_MISC_CNTL__CG_MEM_LS_ENABLE_MASK;
@@ -59,7 +90,7 @@ athub_v3_0_update_medium_grain_light_sleep(struct amdgpu_device *adev,
data &= ~ATHUB_MISC_CNTL__CG_MEM_LS_ENABLE_MASK;
if (def != data)
- WREG32_SOC15(ATHUB, 0, regATHUB_MISC_CNTL, data);
+ athub_v3_0_set_cg_cntl(adev, data);
}
int athub_v3_0_set_clockgating(struct amdgpu_device *adev,
@@ -70,6 +101,7 @@ int athub_v3_0_set_clockgating(struct amdgpu_device *adev,
switch (adev->ip_versions[ATHUB_HWIP][0]) {
case IP_VERSION(3, 0, 0):
+ case IP_VERSION(3, 0, 1):
case IP_VERSION(3, 0, 2):
athub_v3_0_update_medium_grain_clock_gating(adev,
state == AMD_CG_STATE_GATE);
@@ -88,7 +120,7 @@ void athub_v3_0_get_clockgating(struct amdgpu_device *adev, u64 *flags)
int data;
/* AMD_CG_SUPPORT_ATHUB_MGCG */
- data = RREG32_SOC15(ATHUB, 0, regATHUB_MISC_CNTL);
+ data = athub_v3_0_get_cg_cntl(adev);
if (data & ATHUB_MISC_CNTL__CG_ENABLE_MASK)
*flags |= AMD_CG_SUPPORT_ATHUB_MGCG;
diff --git a/drivers/gpu/drm/amd/amdgpu/atombios_encoders.c b/drivers/gpu/drm/amd/amdgpu/atombios_encoders.c
index d4f5a584075d..fa7421afb9a6 100644
--- a/drivers/gpu/drm/amd/amdgpu/atombios_encoders.c
+++ b/drivers/gpu/drm/amd/amdgpu/atombios_encoders.c
@@ -118,8 +118,6 @@ amdgpu_atombios_encoder_set_backlight_level(struct amdgpu_encoder *amdgpu_encode
}
}
-#if defined(CONFIG_BACKLIGHT_CLASS_DEVICE) || defined(CONFIG_BACKLIGHT_CLASS_DEVICE_MODULE)
-
static u8 amdgpu_atombios_encoder_backlight_level(struct backlight_device *bd)
{
u8 level;
@@ -251,18 +249,6 @@ amdgpu_atombios_encoder_fini_backlight(struct amdgpu_encoder *amdgpu_encoder)
}
}
-#else /* !CONFIG_BACKLIGHT_CLASS_DEVICE */
-
-void amdgpu_atombios_encoder_init_backlight(struct amdgpu_encoder *encoder)
-{
-}
-
-void amdgpu_atombios_encoder_fini_backlight(struct amdgpu_encoder *encoder)
-{
-}
-
-#endif
-
bool amdgpu_atombios_encoder_is_digital(struct drm_encoder *encoder)
{
struct amdgpu_encoder *amdgpu_encoder = to_amdgpu_encoder(encoder);
diff --git a/drivers/gpu/drm/amd/amdgpu/clearstate_gfx11.h b/drivers/gpu/drm/amd/amdgpu/clearstate_gfx11.h
index f3852b59b1d6..a8b29d33c464 100644
--- a/drivers/gpu/drm/amd/amdgpu/clearstate_gfx11.h
+++ b/drivers/gpu/drm/amd/amdgpu/clearstate_gfx11.h
@@ -39,7 +39,7 @@ static const unsigned int gfx11_SECT_CONTEXT_def_1[] =
0x00000000, // DB_DEPTH_CLEAR
0x00000000, // PA_SC_SCREEN_SCISSOR_TL
0x40004000, // PA_SC_SCREEN_SCISSOR_BR
- 0x00000000, // DB_DFSM_CONTROL
+ 0, // HOLE
0x00000000, // DB_RESERVED_REG_2
0x00000000, // DB_Z_INFO
0x00000000, // DB_STENCIL_INFO
@@ -50,7 +50,7 @@ static const unsigned int gfx11_SECT_CONTEXT_def_1[] =
0x00000000, // DB_RESERVED_REG_1
0x00000000, // DB_RESERVED_REG_3
0x00000000, // DB_SPI_VRS_CENTER_LOCATION
- 0x00000000, // DB_VRS_OVERRIDE_CNTL
+ 0, // HOLE
0x00000000, // DB_Z_READ_BASE_HI
0x00000000, // DB_STENCIL_READ_BASE_HI
0x00000000, // DB_Z_WRITE_BASE_HI
@@ -270,29 +270,29 @@ static const unsigned int gfx11_SECT_CONTEXT_def_2[] =
0x00000000, // PA_SC_FSR_EN
0x00000000, // PA_SC_FSR_FBW_RECURSIONS_X
0x00000000, // PA_SC_FSR_FBW_RECURSIONS_Y
- 0x00000000, // PA_SC_VRS_RATE_FEEDBACK_VIEW
+ 0, // HOLE
0x00000000, // PA_SC_VRS_OVERRIDE_CNTL
0x00000000, // PA_SC_VRS_RATE_FEEDBACK_BASE
0x00000000, // PA_SC_VRS_RATE_FEEDBACK_BASE_EXT
0x00000000, // PA_SC_VRS_RATE_FEEDBACK_SIZE_XY
0x00000000, // PA_SC_BINNER_OUTPUT_TIMEOUT_CNTL
- 0, // HOLE
+ 0x00000000, // PA_SC_VRS_RATE_CACHE_CNTL
0, // HOLE
0, // HOLE
0x00000000, // PA_SC_VRS_RATE_BASE
0x00000000, // PA_SC_VRS_RATE_BASE_EXT
0x00000000, // PA_SC_VRS_RATE_SIZE_XY
- 0x00000000, // PA_SC_VRS_RATE_VIEW
- 0xffffffff, // VGT_MAX_VTX_INDX
- 0x00000000, // VGT_MIN_VTX_INDX
- 0x00000000, // VGT_INDX_OFFSET
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
0x00000000, // VGT_MULTI_PRIM_IB_RESET_INDX
0x00550055, // CB_RMI_GL2_CACHE_CONTROL
0x00000000, // CB_BLEND_RED
0x00000000, // CB_BLEND_GREEN
0x00000000, // CB_BLEND_BLUE
0x00000000, // CB_BLEND_ALPHA
- 0x00000000, // CB_DCC_CONTROL
+ 0x00000000, // CB_FDCC_CONTROL
0x00000000, // CB_COVERAGE_OUT_CONTROL
0x00000000, // DB_STENCIL_CONTROL
0x01000000, // DB_STENCILREFMASK
@@ -470,8 +470,8 @@ static const unsigned int gfx11_SECT_CONTEXT_def_2[] =
0x00000000, // SPI_BARYC_CNTL
0, // HOLE
0x00000000, // SPI_TMPRING_SIZE
- 0, // HOLE
- 0, // HOLE
+ 0x00000000, // SPI_GFX_SCRATCH_BASE_LO
+ 0x00000000, // SPI_GFX_SCRATCH_BASE_HI
0, // HOLE
0, // HOLE
0, // HOLE
@@ -545,7 +545,7 @@ static const unsigned int gfx11_SECT_CONTEXT_def_4[] =
0x00000000, // PA_STEREO_CNTL
0x00000000, // PA_STATE_STEREO_X
0x00000000, // PA_CL_VRS_CNTL
- 0x00000000, // PA_SIDEBAND_REQUEST_DELAYS
+ 0, // HOLE
0, // HOLE
0, // HOLE
0, // HOLE
@@ -658,30 +658,30 @@ static const unsigned int gfx11_SECT_CONTEXT_def_4[] =
0x00000000, // PA_SU_POINT_MINMAX
0x00000000, // PA_SU_LINE_CNTL
0x00000000, // PA_SC_LINE_STIPPLE
- 0x00000000, // VGT_OUTPUT_PATH_CNTL
- 0x00000000, // VGT_HOS_CNTL
+ 0, // HOLE
+ 0, // HOLE
0x00000000, // VGT_HOS_MAX_TESS_LEVEL
0x00000000, // VGT_HOS_MIN_TESS_LEVEL
- 0x00000000, // VGT_HOS_REUSE_DEPTH
- 0x00000000, // VGT_GROUP_PRIM_TYPE
- 0x00000000, // VGT_GROUP_FIRST_DECR
- 0x00000000, // VGT_GROUP_DECR
- 0x00000000, // VGT_GROUP_VECT_0_CNTL
- 0x00000000, // VGT_GROUP_VECT_1_CNTL
- 0x00000000, // VGT_GROUP_VECT_0_FMT_CNTL
- 0x00000000, // VGT_GROUP_VECT_1_FMT_CNTL
- 0x00000000, // VGT_GS_MODE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
0x00000000, // VGT_GS_ONCHIP_CNTL
0x00000000, // PA_SC_MODE_CNTL_0
0x00000000, // PA_SC_MODE_CNTL_1
0x00000000, // VGT_ENHANCE
- 0x00000100, // VGT_GS_PER_ES
- 0x00000080, // VGT_ES_PER_GS
- 0x00000002, // VGT_GS_PER_VS
- 0x00000000, // VGT_GSVS_RING_OFFSET_1
- 0x00000000, // VGT_GSVS_RING_OFFSET_2
- 0x00000000, // VGT_GSVS_RING_OFFSET_3
- 0x00000000, // VGT_GS_OUT_PRIM_TYPE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
0x00000000, // IA_ENHANCE
};
static const unsigned int gfx11_SECT_CONTEXT_def_5[] =
@@ -695,37 +695,36 @@ static const unsigned int gfx11_SECT_CONTEXT_def_6[] =
};
static const unsigned int gfx11_SECT_CONTEXT_def_7[] =
{
- 0x00000000, // VGT_MULTI_PRIM_IB_RESET_EN
0x00000000, // VGT_DRAW_PAYLOAD_CNTL
0, // HOLE
- 0x00000000, // VGT_INSTANCE_STEP_RATE_0
- 0x00000000, // VGT_INSTANCE_STEP_RATE_1
- 0x000000ff, // IA_MULTI_VGT_PARAM
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
0x00000000, // VGT_ESGS_RING_ITEMSIZE
- 0x00000000, // VGT_GSVS_RING_ITEMSIZE
+ 0, // HOLE
0x00000000, // VGT_REUSE_OFF
- 0x00000000, // VGT_VTX_CNT_EN
+ 0, // HOLE
0x00000000, // DB_HTILE_SURFACE
0x00000000, // DB_SRESULTS_COMPARE_STATE0
0x00000000, // DB_SRESULTS_COMPARE_STATE1
0x00000000, // DB_PRELOAD_CONTROL
0, // HOLE
- 0x00000000, // VGT_STRMOUT_BUFFER_SIZE_0
- 0x00000000, // VGT_STRMOUT_VTX_STRIDE_0
0, // HOLE
- 0x00000000, // VGT_STRMOUT_BUFFER_OFFSET_0
- 0x00000000, // VGT_STRMOUT_BUFFER_SIZE_1
- 0x00000000, // VGT_STRMOUT_VTX_STRIDE_1
0, // HOLE
- 0x00000000, // VGT_STRMOUT_BUFFER_OFFSET_1
- 0x00000000, // VGT_STRMOUT_BUFFER_SIZE_2
- 0x00000000, // VGT_STRMOUT_VTX_STRIDE_2
0, // HOLE
- 0x00000000, // VGT_STRMOUT_BUFFER_OFFSET_2
- 0x00000000, // VGT_STRMOUT_BUFFER_SIZE_3
- 0x00000000, // VGT_STRMOUT_VTX_STRIDE_3
0, // HOLE
- 0x00000000, // VGT_STRMOUT_BUFFER_OFFSET_3
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
0, // HOLE
0, // HOLE
0, // HOLE
@@ -745,10 +744,10 @@ static const unsigned int gfx11_SECT_CONTEXT_def_7[] =
0x00000000, // VGT_TESS_DISTRIBUTION
0x00000000, // VGT_SHADER_STAGES_EN
0x00000000, // VGT_LS_HS_CONFIG
- 0x00000000, // VGT_GS_VERT_ITEMSIZE
- 0x00000000, // VGT_GS_VERT_ITEMSIZE_1
- 0x00000000, // VGT_GS_VERT_ITEMSIZE_2
- 0x00000000, // VGT_GS_VERT_ITEMSIZE_3
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
0x00000000, // VGT_TF_PARAM
0x00000000, // DB_ALPHA_TO_MASK
0, // HOLE
@@ -759,11 +758,22 @@ static const unsigned int gfx11_SECT_CONTEXT_def_7[] =
0x00000000, // PA_SU_POLY_OFFSET_BACK_SCALE
0x00000000, // PA_SU_POLY_OFFSET_BACK_OFFSET
0x00000000, // VGT_GS_INSTANCE_CNT
- 0x00000000, // VGT_STRMOUT_CONFIG
- 0x00000000, // VGT_STRMOUT_BUFFER_CONFIG
-};
-static const unsigned int gfx11_SECT_CONTEXT_def_8[] =
-{
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
0x00000000, // PA_SC_CENTROID_PRIORITY_0
0x00000000, // PA_SC_CENTROID_PRIORITY_1
0x00001000, // PA_SC_LINE_CNTL
@@ -797,126 +807,126 @@ static const unsigned int gfx11_SECT_CONTEXT_def_8[] =
0x00100000, // PA_SC_CONSERVATIVE_RASTERIZATION_CNTL
0x00000000, // PA_SC_NGG_MODE_CNTL
0x00000000, // PA_SC_BINNER_CNTL_2
- 0x0000001e, // VGT_VERTEX_REUSE_BLOCK_CNTL
- 0x00000020, // VGT_OUT_DEALLOC_CNTL
+ 0, // HOLE
+ 0, // HOLE
0x00000000, // CB_COLOR0_BASE
- 0x00000000, // CB_COLOR0_PITCH
- 0x00000000, // CB_COLOR0_SLICE
+ 0, // HOLE
+ 0, // HOLE
0x00000000, // CB_COLOR0_VIEW
0x00000000, // CB_COLOR0_INFO
0x00000000, // CB_COLOR0_ATTRIB
- 0x00000000, // CB_COLOR0_DCC_CONTROL
- 0x00000000, // CB_COLOR0_CMASK
- 0x00000000, // CB_COLOR0_CMASK_SLICE
- 0x00000000, // CB_COLOR0_FMASK
- 0x00000000, // CB_COLOR0_FMASK_SLICE
- 0x00000000, // CB_COLOR0_CLEAR_WORD0
- 0x00000000, // CB_COLOR0_CLEAR_WORD1
+ 0x00000000, // CB_COLOR0_FDCC_CONTROL
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
0x00000000, // CB_COLOR0_DCC_BASE
0, // HOLE
0x00000000, // CB_COLOR1_BASE
- 0x00000000, // CB_COLOR1_PITCH
- 0x00000000, // CB_COLOR1_SLICE
+ 0, // HOLE
+ 0, // HOLE
0x00000000, // CB_COLOR1_VIEW
0x00000000, // CB_COLOR1_INFO
0x00000000, // CB_COLOR1_ATTRIB
- 0x00000000, // CB_COLOR1_DCC_CONTROL
- 0x00000000, // CB_COLOR1_CMASK
- 0x00000000, // CB_COLOR1_CMASK_SLICE
- 0x00000000, // CB_COLOR1_FMASK
- 0x00000000, // CB_COLOR1_FMASK_SLICE
- 0x00000000, // CB_COLOR1_CLEAR_WORD0
- 0x00000000, // CB_COLOR1_CLEAR_WORD1
+ 0x00000000, // CB_COLOR1_FDCC_CONTROL
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
0x00000000, // CB_COLOR1_DCC_BASE
0, // HOLE
0x00000000, // CB_COLOR2_BASE
- 0x00000000, // CB_COLOR2_PITCH
- 0x00000000, // CB_COLOR2_SLICE
+ 0, // HOLE
+ 0, // HOLE
0x00000000, // CB_COLOR2_VIEW
0x00000000, // CB_COLOR2_INFO
0x00000000, // CB_COLOR2_ATTRIB
- 0x00000000, // CB_COLOR2_DCC_CONTROL
- 0x00000000, // CB_COLOR2_CMASK
- 0x00000000, // CB_COLOR2_CMASK_SLICE
- 0x00000000, // CB_COLOR2_FMASK
- 0x00000000, // CB_COLOR2_FMASK_SLICE
- 0x00000000, // CB_COLOR2_CLEAR_WORD0
- 0x00000000, // CB_COLOR2_CLEAR_WORD1
+ 0x00000000, // CB_COLOR2_FDCC_CONTROL
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
0x00000000, // CB_COLOR2_DCC_BASE
0, // HOLE
0x00000000, // CB_COLOR3_BASE
- 0x00000000, // CB_COLOR3_PITCH
- 0x00000000, // CB_COLOR3_SLICE
+ 0, // HOLE
+ 0, // HOLE
0x00000000, // CB_COLOR3_VIEW
0x00000000, // CB_COLOR3_INFO
0x00000000, // CB_COLOR3_ATTRIB
- 0x00000000, // CB_COLOR3_DCC_CONTROL
- 0x00000000, // CB_COLOR3_CMASK
- 0x00000000, // CB_COLOR3_CMASK_SLICE
- 0x00000000, // CB_COLOR3_FMASK
- 0x00000000, // CB_COLOR3_FMASK_SLICE
- 0x00000000, // CB_COLOR3_CLEAR_WORD0
- 0x00000000, // CB_COLOR3_CLEAR_WORD1
+ 0x00000000, // CB_COLOR3_FDCC_CONTROL
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
0x00000000, // CB_COLOR3_DCC_BASE
0, // HOLE
0x00000000, // CB_COLOR4_BASE
- 0x00000000, // CB_COLOR4_PITCH
- 0x00000000, // CB_COLOR4_SLICE
+ 0, // HOLE
+ 0, // HOLE
0x00000000, // CB_COLOR4_VIEW
0x00000000, // CB_COLOR4_INFO
0x00000000, // CB_COLOR4_ATTRIB
- 0x00000000, // CB_COLOR4_DCC_CONTROL
- 0x00000000, // CB_COLOR4_CMASK
- 0x00000000, // CB_COLOR4_CMASK_SLICE
- 0x00000000, // CB_COLOR4_FMASK
- 0x00000000, // CB_COLOR4_FMASK_SLICE
- 0x00000000, // CB_COLOR4_CLEAR_WORD0
- 0x00000000, // CB_COLOR4_CLEAR_WORD1
+ 0x00000000, // CB_COLOR4_FDCC_CONTROL
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
0x00000000, // CB_COLOR4_DCC_BASE
0, // HOLE
0x00000000, // CB_COLOR5_BASE
- 0x00000000, // CB_COLOR5_PITCH
- 0x00000000, // CB_COLOR5_SLICE
+ 0, // HOLE
+ 0, // HOLE
0x00000000, // CB_COLOR5_VIEW
0x00000000, // CB_COLOR5_INFO
0x00000000, // CB_COLOR5_ATTRIB
- 0x00000000, // CB_COLOR5_DCC_CONTROL
- 0x00000000, // CB_COLOR5_CMASK
- 0x00000000, // CB_COLOR5_CMASK_SLICE
- 0x00000000, // CB_COLOR5_FMASK
- 0x00000000, // CB_COLOR5_FMASK_SLICE
- 0x00000000, // CB_COLOR5_CLEAR_WORD0
- 0x00000000, // CB_COLOR5_CLEAR_WORD1
+ 0x00000000, // CB_COLOR5_FDCC_CONTROL
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
0x00000000, // CB_COLOR5_DCC_BASE
0, // HOLE
0x00000000, // CB_COLOR6_BASE
- 0x00000000, // CB_COLOR6_PITCH
- 0x00000000, // CB_COLOR6_SLICE
+ 0, // HOLE
+ 0, // HOLE
0x00000000, // CB_COLOR6_VIEW
0x00000000, // CB_COLOR6_INFO
0x00000000, // CB_COLOR6_ATTRIB
- 0x00000000, // CB_COLOR6_DCC_CONTROL
- 0x00000000, // CB_COLOR6_CMASK
- 0x00000000, // CB_COLOR6_CMASK_SLICE
- 0x00000000, // CB_COLOR6_FMASK
- 0x00000000, // CB_COLOR6_FMASK_SLICE
- 0x00000000, // CB_COLOR6_CLEAR_WORD0
- 0x00000000, // CB_COLOR6_CLEAR_WORD1
+ 0x00000000, // CB_COLOR6_FDCC_CONTROL
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
0x00000000, // CB_COLOR6_DCC_BASE
0, // HOLE
0x00000000, // CB_COLOR7_BASE
- 0x00000000, // CB_COLOR7_PITCH
- 0x00000000, // CB_COLOR7_SLICE
+ 0, // HOLE
+ 0, // HOLE
0x00000000, // CB_COLOR7_VIEW
0x00000000, // CB_COLOR7_INFO
0x00000000, // CB_COLOR7_ATTRIB
- 0x00000000, // CB_COLOR7_DCC_CONTROL
- 0x00000000, // CB_COLOR7_CMASK
- 0x00000000, // CB_COLOR7_CMASK_SLICE
- 0x00000000, // CB_COLOR7_FMASK
- 0x00000000, // CB_COLOR7_FMASK_SLICE
- 0x00000000, // CB_COLOR7_CLEAR_WORD0
- 0x00000000, // CB_COLOR7_CLEAR_WORD1
+ 0x00000000, // CB_COLOR7_FDCC_CONTROL
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
0x00000000, // CB_COLOR7_DCC_BASE
0, // HOLE
0x00000000, // CB_COLOR0_BASE_EXT
@@ -927,22 +937,22 @@ static const unsigned int gfx11_SECT_CONTEXT_def_8[] =
0x00000000, // CB_COLOR5_BASE_EXT
0x00000000, // CB_COLOR6_BASE_EXT
0x00000000, // CB_COLOR7_BASE_EXT
- 0x00000000, // CB_COLOR0_CMASK_BASE_EXT
- 0x00000000, // CB_COLOR1_CMASK_BASE_EXT
- 0x00000000, // CB_COLOR2_CMASK_BASE_EXT
- 0x00000000, // CB_COLOR3_CMASK_BASE_EXT
- 0x00000000, // CB_COLOR4_CMASK_BASE_EXT
- 0x00000000, // CB_COLOR5_CMASK_BASE_EXT
- 0x00000000, // CB_COLOR6_CMASK_BASE_EXT
- 0x00000000, // CB_COLOR7_CMASK_BASE_EXT
- 0x00000000, // CB_COLOR0_FMASK_BASE_EXT
- 0x00000000, // CB_COLOR1_FMASK_BASE_EXT
- 0x00000000, // CB_COLOR2_FMASK_BASE_EXT
- 0x00000000, // CB_COLOR3_FMASK_BASE_EXT
- 0x00000000, // CB_COLOR4_FMASK_BASE_EXT
- 0x00000000, // CB_COLOR5_FMASK_BASE_EXT
- 0x00000000, // CB_COLOR6_FMASK_BASE_EXT
- 0x00000000, // CB_COLOR7_FMASK_BASE_EXT
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
+ 0, // HOLE
0x00000000, // CB_COLOR0_DCC_BASE_EXT
0x00000000, // CB_COLOR1_DCC_BASE_EXT
0x00000000, // CB_COLOR2_DCC_BASE_EXT
@@ -976,8 +986,7 @@ static const struct cs_extent_def gfx11_SECT_CONTEXT_defs[] =
{gfx11_SECT_CONTEXT_def_4, 0x0000a1ff, 158 },
{gfx11_SECT_CONTEXT_def_5, 0x0000a2a0, 2 },
{gfx11_SECT_CONTEXT_def_6, 0x0000a2a3, 1 },
- {gfx11_SECT_CONTEXT_def_7, 0x0000a2a5, 66 },
- {gfx11_SECT_CONTEXT_def_8, 0x0000a2f5, 203 },
+ {gfx11_SECT_CONTEXT_def_7, 0x0000a2a6, 282 },
{ 0, 0, 0 }
};
static const struct cs_section_def gfx11_cs_data[] = {
diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c b/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c
index 982855e6cf52..b1c44fab074f 100644
--- a/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c
@@ -339,7 +339,7 @@ static void dce_v6_0_hpd_fini(struct amdgpu_device *adev)
tmp = RREG32(mmDC_HPD1_CONTROL + hpd_offsets[amdgpu_connector->hpd.hpd]);
tmp &= ~DC_HPD1_CONTROL__DC_HPD1_EN_MASK;
- WREG32(mmDC_HPD1_CONTROL + hpd_offsets[amdgpu_connector->hpd.hpd], 0);
+ WREG32(mmDC_HPD1_CONTROL + hpd_offsets[amdgpu_connector->hpd.hpd], tmp);
amdgpu_irq_put(adev, &adev->hpd_irq, amdgpu_connector->hpd.hpd);
}
diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c b/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c
index 84440741c60b..a22b45c92792 100644
--- a/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c
@@ -333,7 +333,7 @@ static void dce_v8_0_hpd_fini(struct amdgpu_device *adev)
tmp = RREG32(mmDC_HPD1_CONTROL + hpd_offsets[amdgpu_connector->hpd.hpd]);
tmp &= ~DC_HPD1_CONTROL__DC_HPD1_EN_MASK;
- WREG32(mmDC_HPD1_CONTROL + hpd_offsets[amdgpu_connector->hpd.hpd], 0);
+ WREG32(mmDC_HPD1_CONTROL + hpd_offsets[amdgpu_connector->hpd.hpd], tmp);
amdgpu_irq_put(adev, &adev->hpd_irq, amdgpu_connector->hpd.hpd);
}
@@ -2693,7 +2693,11 @@ static int dce_v8_0_sw_init(void *handle)
adev_to_drm(adev)->mode_config.max_height = 16384;
adev_to_drm(adev)->mode_config.preferred_depth = 24;
- adev_to_drm(adev)->mode_config.prefer_shadow = 1;
+ if (adev->asic_type == CHIP_HAWAII)
+ /* disable prefer shadow for now due to hibernation issues */
+ adev_to_drm(adev)->mode_config.prefer_shadow = 0;
+ else
+ adev_to_drm(adev)->mode_config.prefer_shadow = 1;
adev_to_drm(adev)->mode_config.fb_modifiers_not_supported = true;
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
index c5f46d264b23..a3cd5c1e8529 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
@@ -53,7 +53,7 @@
* 2. Async ring
*/
#define GFX10_NUM_GFX_RINGS_NV1X 1
-#define GFX10_NUM_GFX_RINGS_Sienna_Cichlid 1
+#define GFX10_NUM_GFX_RINGS_Sienna_Cichlid 2
#define GFX10_MEC_HPD_SIZE 2048
#define F32_CE_PROGRAM_RAM_SIZE 65536
@@ -3780,11 +3780,12 @@ static void gfx_v10_0_wait_reg_mem(struct amdgpu_ring *ring, int eng_sel,
static int gfx_v10_0_ring_test_ring(struct amdgpu_ring *ring)
{
struct amdgpu_device *adev = ring->adev;
+ uint32_t scratch = SOC15_REG_OFFSET(GC, 0, mmSCRATCH_REG0);
uint32_t tmp = 0;
unsigned i;
int r;
- WREG32_SOC15(GC, 0, mmSCRATCH_REG0, 0xCAFEDEAD);
+ WREG32(scratch, 0xCAFEDEAD);
r = amdgpu_ring_alloc(ring, 3);
if (r) {
DRM_ERROR("amdgpu: cp failed to lock ring %d (%d).\n",
@@ -3793,13 +3794,13 @@ static int gfx_v10_0_ring_test_ring(struct amdgpu_ring *ring)
}
amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
- amdgpu_ring_write(ring, SOC15_REG_OFFSET(GC, 0, mmSCRATCH_REG0) -
+ amdgpu_ring_write(ring, scratch -
PACKET3_SET_UCONFIG_REG_START);
amdgpu_ring_write(ring, 0xDEADBEEF);
amdgpu_ring_commit(ring);
for (i = 0; i < adev->usec_timeout; i++) {
- tmp = RREG32_SOC15(GC, 0, mmSCRATCH_REG0);
+ tmp = RREG32(scratch);
if (tmp == 0xDEADBEEF)
break;
if (amdgpu_emu_mode == 1)
@@ -3975,6 +3976,23 @@ static void gfx_v10_0_init_rlc_iram_dram_microcode(struct amdgpu_device *adev)
adev->gfx.rlc.rlc_dram_ucode = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->rlc_dram_ucode_offset_bytes);
}
+static void gfx_v10_0_init_tap_delays_microcode(struct amdgpu_device *adev)
+{
+ const struct rlc_firmware_header_v2_4 *rlc_hdr;
+
+ rlc_hdr = (const struct rlc_firmware_header_v2_4 *)adev->gfx.rlc_fw->data;
+ adev->gfx.rlc.global_tap_delays_ucode_size_bytes = le32_to_cpu(rlc_hdr->global_tap_delays_ucode_size_bytes);
+ adev->gfx.rlc.global_tap_delays_ucode = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->global_tap_delays_ucode_offset_bytes);
+ adev->gfx.rlc.se0_tap_delays_ucode_size_bytes = le32_to_cpu(rlc_hdr->se0_tap_delays_ucode_size_bytes);
+ adev->gfx.rlc.se0_tap_delays_ucode = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->se0_tap_delays_ucode_offset_bytes);
+ adev->gfx.rlc.se1_tap_delays_ucode_size_bytes = le32_to_cpu(rlc_hdr->se1_tap_delays_ucode_size_bytes);
+ adev->gfx.rlc.se1_tap_delays_ucode = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->se1_tap_delays_ucode_offset_bytes);
+ adev->gfx.rlc.se2_tap_delays_ucode_size_bytes = le32_to_cpu(rlc_hdr->se2_tap_delays_ucode_size_bytes);
+ adev->gfx.rlc.se2_tap_delays_ucode = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->se2_tap_delays_ucode_offset_bytes);
+ adev->gfx.rlc.se3_tap_delays_ucode_size_bytes = le32_to_cpu(rlc_hdr->se3_tap_delays_ucode_size_bytes);
+ adev->gfx.rlc.se3_tap_delays_ucode = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->se3_tap_delays_ucode_offset_bytes);
+}
+
static bool gfx_v10_0_navi10_gfxoff_should_enable(struct amdgpu_device *adev)
{
bool ret = false;
@@ -4152,8 +4170,11 @@ static int gfx_v10_0_init_microcode(struct amdgpu_device *adev)
if (version_major == 2) {
if (version_minor >= 1)
gfx_v10_0_init_rlc_ext_microcode(adev);
- if (version_minor == 2)
+ if (version_minor >= 2)
gfx_v10_0_init_rlc_iram_dram_microcode(adev);
+ if (version_minor == 4) {
+ gfx_v10_0_init_tap_delays_microcode(adev);
+ }
}
}
@@ -4250,6 +4271,47 @@ static int gfx_v10_0_init_microcode(struct amdgpu_device *adev)
adev->firmware.fw_size +=
ALIGN(adev->gfx.rlc.rlc_dram_ucode_size_bytes, PAGE_SIZE);
}
+
+ }
+
+ if (adev->gfx.rlc.global_tap_delays_ucode_size_bytes) {
+ info = &adev->firmware.ucode[AMDGPU_UCODE_ID_GLOBAL_TAP_DELAYS];
+ info->ucode_id = AMDGPU_UCODE_ID_GLOBAL_TAP_DELAYS;
+ info->fw = adev->gfx.rlc_fw;
+ adev->firmware.fw_size +=
+ ALIGN(adev->gfx.rlc.global_tap_delays_ucode_size_bytes, PAGE_SIZE);
+ }
+
+ if (adev->gfx.rlc.se0_tap_delays_ucode_size_bytes) {
+ info = &adev->firmware.ucode[AMDGPU_UCODE_ID_SE0_TAP_DELAYS];
+ info->ucode_id = AMDGPU_UCODE_ID_SE0_TAP_DELAYS;
+ info->fw = adev->gfx.rlc_fw;
+ adev->firmware.fw_size +=
+ ALIGN(adev->gfx.rlc.se0_tap_delays_ucode_size_bytes, PAGE_SIZE);
+ }
+
+ if (adev->gfx.rlc.se1_tap_delays_ucode_size_bytes) {
+ info = &adev->firmware.ucode[AMDGPU_UCODE_ID_SE1_TAP_DELAYS];
+ info->ucode_id = AMDGPU_UCODE_ID_SE1_TAP_DELAYS;
+ info->fw = adev->gfx.rlc_fw;
+ adev->firmware.fw_size +=
+ ALIGN(adev->gfx.rlc.se1_tap_delays_ucode_size_bytes, PAGE_SIZE);
+ }
+
+ if (adev->gfx.rlc.se2_tap_delays_ucode_size_bytes) {
+ info = &adev->firmware.ucode[AMDGPU_UCODE_ID_SE2_TAP_DELAYS];
+ info->ucode_id = AMDGPU_UCODE_ID_SE2_TAP_DELAYS;
+ info->fw = adev->gfx.rlc_fw;
+ adev->firmware.fw_size +=
+ ALIGN(adev->gfx.rlc.se2_tap_delays_ucode_size_bytes, PAGE_SIZE);
+ }
+
+ if (adev->gfx.rlc.se3_tap_delays_ucode_size_bytes) {
+ info = &adev->firmware.ucode[AMDGPU_UCODE_ID_SE3_TAP_DELAYS];
+ info->ucode_id = AMDGPU_UCODE_ID_SE3_TAP_DELAYS;
+ info->fw = adev->gfx.rlc_fw;
+ adev->firmware.fw_size +=
+ ALIGN(adev->gfx.rlc.se3_tap_delays_ucode_size_bytes, PAGE_SIZE);
}
info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC1];
@@ -4711,6 +4773,7 @@ static int gfx_v10_0_gfx_ring_init(struct amdgpu_device *adev, int ring_id,
{
struct amdgpu_ring *ring;
unsigned int irq_type;
+ unsigned int hw_prio;
ring = &adev->gfx.gfx_ring[ring_id];
@@ -4728,8 +4791,10 @@ static int gfx_v10_0_gfx_ring_init(struct amdgpu_device *adev, int ring_id,
sprintf(ring->name, "gfx_%d.%d.%d", ring->me, ring->pipe, ring->queue);
irq_type = AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP + ring->pipe;
+ hw_prio = amdgpu_gfx_is_high_priority_graphics_queue(adev, ring) ?
+ AMDGPU_GFX_PIPE_PRIO_HIGH : AMDGPU_GFX_PIPE_PRIO_NORMAL;
return amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq, irq_type,
- AMDGPU_RING_PRIO_DEFAULT, NULL);
+ hw_prio, NULL);
}
static int gfx_v10_0_compute_ring_init(struct amdgpu_device *adev, int ring_id,
@@ -6581,6 +6646,24 @@ static void gfx_v10_0_kiq_setting(struct amdgpu_ring *ring)
}
}
+static void gfx_v10_0_gfx_mqd_set_priority(struct amdgpu_device *adev,
+ struct v10_gfx_mqd *mqd,
+ struct amdgpu_mqd_prop *prop)
+{
+ bool priority = 0;
+ u32 tmp;
+
+ /* set up default queue priority level
+ * 0x0 = low priority, 0x1 = high priority
+ */
+ if (prop->hqd_pipe_priority == AMDGPU_GFX_PIPE_PRIO_HIGH)
+ priority = 1;
+
+ tmp = RREG32_SOC15(GC, 0, mmCP_GFX_HQD_QUEUE_PRIORITY);
+ tmp = REG_SET_FIELD(tmp, CP_GFX_HQD_QUEUE_PRIORITY, PRIORITY_LEVEL, priority);
+ mqd->cp_gfx_hqd_queue_priority = tmp;
+}
+
static int gfx_v10_0_gfx_mqd_init(struct amdgpu_device *adev, void *m,
struct amdgpu_mqd_prop *prop)
{
@@ -6609,11 +6692,8 @@ static int gfx_v10_0_gfx_mqd_init(struct amdgpu_device *adev, void *m,
tmp = REG_SET_FIELD(tmp, CP_GFX_HQD_VMID, VMID, 0);
mqd->cp_gfx_hqd_vmid = 0;
- /* set up default queue priority level
- * 0x0 = low priority, 0x1 = high priority */
- tmp = RREG32_SOC15(GC, 0, mmCP_GFX_HQD_QUEUE_PRIORITY);
- tmp = REG_SET_FIELD(tmp, CP_GFX_HQD_QUEUE_PRIORITY, PRIORITY_LEVEL, 0);
- mqd->cp_gfx_hqd_queue_priority = tmp;
+ /* set up gfx queue priority */
+ gfx_v10_0_gfx_mqd_set_priority(adev, mqd, prop);
/* set up time quantum */
tmp = RREG32_SOC15(GC, 0, mmCP_GFX_HQD_QUANTUM);
@@ -8506,14 +8586,45 @@ static u64 gfx_v10_0_ring_get_wptr_gfx(struct amdgpu_ring *ring)
static void gfx_v10_0_ring_set_wptr_gfx(struct amdgpu_ring *ring)
{
struct amdgpu_device *adev = ring->adev;
+ uint32_t *wptr_saved;
+ uint32_t *is_queue_unmap;
+ uint64_t aggregated_db_index;
+ uint32_t mqd_size = adev->mqds[AMDGPU_HW_IP_GFX].mqd_size;
+ uint64_t wptr_tmp;
- if (ring->use_doorbell) {
- /* XXX check if swapping is necessary on BE */
- atomic64_set((atomic64_t *)ring->wptr_cpu_addr, ring->wptr);
- WDOORBELL64(ring->doorbell_index, ring->wptr);
+ if (ring->is_mes_queue) {
+ wptr_saved = (uint32_t *)(ring->mqd_ptr + mqd_size);
+ is_queue_unmap = (uint32_t *)(ring->mqd_ptr + mqd_size +
+ sizeof(uint32_t));
+ aggregated_db_index =
+ amdgpu_mes_get_aggregated_doorbell_index(adev,
+ AMDGPU_MES_PRIORITY_LEVEL_NORMAL);
+
+ wptr_tmp = ring->wptr & ring->buf_mask;
+ atomic64_set((atomic64_t *)ring->wptr_cpu_addr, wptr_tmp);
+ *wptr_saved = wptr_tmp;
+ /* assume doorbell always being used by mes mapped queue */
+ if (*is_queue_unmap) {
+ WDOORBELL64(aggregated_db_index, wptr_tmp);
+ WDOORBELL64(ring->doorbell_index, wptr_tmp);
+ } else {
+ WDOORBELL64(ring->doorbell_index, wptr_tmp);
+
+ if (*is_queue_unmap)
+ WDOORBELL64(aggregated_db_index, wptr_tmp);
+ }
} else {
- WREG32_SOC15(GC, 0, mmCP_RB0_WPTR, lower_32_bits(ring->wptr));
- WREG32_SOC15(GC, 0, mmCP_RB0_WPTR_HI, upper_32_bits(ring->wptr));
+ if (ring->use_doorbell) {
+ /* XXX check if swapping is necessary on BE */
+ atomic64_set((atomic64_t *)ring->wptr_cpu_addr,
+ ring->wptr);
+ WDOORBELL64(ring->doorbell_index, ring->wptr);
+ } else {
+ WREG32_SOC15(GC, 0, mmCP_RB0_WPTR,
+ lower_32_bits(ring->wptr));
+ WREG32_SOC15(GC, 0, mmCP_RB0_WPTR_HI,
+ upper_32_bits(ring->wptr));
+ }
}
}
@@ -8538,13 +8649,42 @@ static u64 gfx_v10_0_ring_get_wptr_compute(struct amdgpu_ring *ring)
static void gfx_v10_0_ring_set_wptr_compute(struct amdgpu_ring *ring)
{
struct amdgpu_device *adev = ring->adev;
+ uint32_t *wptr_saved;
+ uint32_t *is_queue_unmap;
+ uint64_t aggregated_db_index;
+ uint32_t mqd_size = adev->mqds[AMDGPU_HW_IP_COMPUTE].mqd_size;
+ uint64_t wptr_tmp;
- /* XXX check if swapping is necessary on BE */
- if (ring->use_doorbell) {
- atomic64_set((atomic64_t *)ring->wptr_cpu_addr, ring->wptr);
- WDOORBELL64(ring->doorbell_index, ring->wptr);
+ if (ring->is_mes_queue) {
+ wptr_saved = (uint32_t *)(ring->mqd_ptr + mqd_size);
+ is_queue_unmap = (uint32_t *)(ring->mqd_ptr + mqd_size +
+ sizeof(uint32_t));
+ aggregated_db_index =
+ amdgpu_mes_get_aggregated_doorbell_index(adev,
+ AMDGPU_MES_PRIORITY_LEVEL_NORMAL);
+
+ wptr_tmp = ring->wptr & ring->buf_mask;
+ atomic64_set((atomic64_t *)ring->wptr_cpu_addr, wptr_tmp);
+ *wptr_saved = wptr_tmp;
+ /* assume doorbell always used by mes mapped queue */
+ if (*is_queue_unmap) {
+ WDOORBELL64(aggregated_db_index, wptr_tmp);
+ WDOORBELL64(ring->doorbell_index, wptr_tmp);
+ } else {
+ WDOORBELL64(ring->doorbell_index, wptr_tmp);
+
+ if (*is_queue_unmap)
+ WDOORBELL64(aggregated_db_index, wptr_tmp);
+ }
} else {
- BUG(); /* only DOORBELL method supported on gfx10 now */
+ /* XXX check if swapping is necessary on BE */
+ if (ring->use_doorbell) {
+ atomic64_set((atomic64_t *)ring->wptr_cpu_addr,
+ ring->wptr);
+ WDOORBELL64(ring->doorbell_index, ring->wptr);
+ } else {
+ BUG(); /* only DOORBELL method supported on gfx10 now */
+ }
}
}
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c
index a4a6751b1e44..f6b1bb40e503 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c
@@ -53,9 +53,12 @@
#define GFX11_MEC_HPD_SIZE 2048
#define RLCG_UCODE_LOADING_START_ADDRESS 0x00002000L
+#define RLC_PG_DELAY_3_DEFAULT_GC_11_0_1 0x1388
#define regCGTT_WD_CLK_CTRL 0x5086
#define regCGTT_WD_CLK_CTRL_BASE_IDX 1
+#define regRLC_RLCS_BOOTLOAD_STATUS_gc_11_0_1 0x4e7e
+#define regRLC_RLCS_BOOTLOAD_STATUS_gc_11_0_1_BASE_IDX 1
MODULE_FIRMWARE("amdgpu/gc_11_0_0_pfp.bin");
MODULE_FIRMWARE("amdgpu/gc_11_0_0_me.bin");
@@ -126,6 +129,10 @@ static int gfx_v11_0_wait_for_rlc_autoload_complete(struct amdgpu_device *adev);
static void gfx_v11_0_ring_invalidate_tlbs(struct amdgpu_ring *ring,
uint16_t pasid, uint32_t flush_type,
bool all_hub, uint8_t dst_sel);
+static void gfx_v11_0_set_safe_mode(struct amdgpu_device *adev);
+static void gfx_v11_0_unset_safe_mode(struct amdgpu_device *adev);
+static void gfx_v11_0_update_perf_clk(struct amdgpu_device *adev,
+ bool enable);
static void gfx11_kiq_set_resources(struct amdgpu_ring *kiq_ring, uint64_t queue_mask)
{
@@ -1134,6 +1141,7 @@ static const struct amdgpu_gfx_funcs gfx_v11_0_gfx_funcs = {
.read_wave_vgprs = &gfx_v11_0_read_wave_vgprs,
.select_me_pipe_q = &gfx_v11_0_select_me_pipe_q,
.init_spm_golden = &gfx_v11_0_init_spm_golden_registers,
+ .update_perfmon_mgcg = &gfx_v11_0_update_perf_clk,
};
static int gfx_v11_0_gpu_early_init(struct amdgpu_device *adev)
@@ -2763,7 +2771,13 @@ static int gfx_v11_0_wait_for_rlc_autoload_complete(struct amdgpu_device *adev)
for (i = 0; i < adev->usec_timeout; i++) {
cp_status = RREG32_SOC15(GC, 0, regCP_STAT);
- bootload_status = RREG32_SOC15(GC, 0, regRLC_RLCS_BOOTLOAD_STATUS);
+
+ if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(11, 0, 1))
+ bootload_status = RREG32_SOC15(GC, 0,
+ regRLC_RLCS_BOOTLOAD_STATUS_gc_11_0_1);
+ else
+ bootload_status = RREG32_SOC15(GC, 0, regRLC_RLCS_BOOTLOAD_STATUS);
+
if ((cp_status == 0) &&
(REG_GET_FIELD(bootload_status,
RLC_RLCS_BOOTLOAD_STATUS, BOOTLOAD_COMPLETE) == 1)) {
@@ -4563,6 +4577,9 @@ static int gfx_v11_0_hw_init(void *handle)
if (adev->gfx.imu.funcs->start_imu)
adev->gfx.imu.funcs->start_imu(adev);
}
+
+ /* disable gpa mode in backdoor loading */
+ gfx_v11_0_disable_gpa_mode(adev);
}
}
@@ -4740,65 +4757,143 @@ static int gfx_v11_0_soft_reset(void *handle)
{
u32 grbm_soft_reset = 0;
u32 tmp;
+ int i, j, k;
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
- /* GRBM_STATUS */
- tmp = RREG32_SOC15(GC, 0, regGRBM_STATUS);
- if (tmp & (GRBM_STATUS__PA_BUSY_MASK | GRBM_STATUS__SC_BUSY_MASK |
- GRBM_STATUS__BCI_BUSY_MASK | GRBM_STATUS__SX_BUSY_MASK |
- GRBM_STATUS__TA_BUSY_MASK | GRBM_STATUS__DB_BUSY_MASK |
- GRBM_STATUS__CB_BUSY_MASK | GRBM_STATUS__GDS_BUSY_MASK |
- GRBM_STATUS__SPI_BUSY_MASK | GRBM_STATUS__GE_BUSY_NO_DMA_MASK)) {
- grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
- GRBM_SOFT_RESET, SOFT_RESET_CP,
- 1);
- grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
- GRBM_SOFT_RESET, SOFT_RESET_GFX,
- 1);
- }
-
- if (tmp & (GRBM_STATUS__CP_BUSY_MASK | GRBM_STATUS__CP_COHERENCY_BUSY_MASK)) {
- grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
- GRBM_SOFT_RESET, SOFT_RESET_CP,
- 1);
- }
-
- /* GRBM_STATUS2 */
- tmp = RREG32_SOC15(GC, 0, regGRBM_STATUS2);
- if (REG_GET_FIELD(tmp, GRBM_STATUS2, RLC_BUSY))
- grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
- GRBM_SOFT_RESET,
- SOFT_RESET_RLC,
- 1);
-
- if (grbm_soft_reset) {
- /* stop the rlc */
- gfx_v11_0_rlc_stop(adev);
-
- /* Disable GFX parsing/prefetching */
- gfx_v11_0_cp_gfx_enable(adev, false);
-
- /* Disable MEC parsing/prefetching */
- gfx_v11_0_cp_compute_enable(adev, false);
+ tmp = RREG32_SOC15(GC, 0, regCP_INT_CNTL);
+ tmp = REG_SET_FIELD(tmp, CP_INT_CNTL, CMP_BUSY_INT_ENABLE, 0);
+ tmp = REG_SET_FIELD(tmp, CP_INT_CNTL, CNTX_BUSY_INT_ENABLE, 0);
+ tmp = REG_SET_FIELD(tmp, CP_INT_CNTL, CNTX_EMPTY_INT_ENABLE, 0);
+ tmp = REG_SET_FIELD(tmp, CP_INT_CNTL, GFX_IDLE_INT_ENABLE, 0);
+ WREG32_SOC15(GC, 0, regCP_INT_CNTL, tmp);
- if (grbm_soft_reset) {
- tmp = RREG32_SOC15(GC, 0, regGRBM_SOFT_RESET);
- tmp |= grbm_soft_reset;
- dev_info(adev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
- WREG32_SOC15(GC, 0, regGRBM_SOFT_RESET, tmp);
- tmp = RREG32_SOC15(GC, 0, regGRBM_SOFT_RESET);
+ gfx_v11_0_set_safe_mode(adev);
- udelay(50);
+ for (i = 0; i < adev->gfx.mec.num_mec; ++i) {
+ for (j = 0; j < adev->gfx.mec.num_queue_per_pipe; j++) {
+ for (k = 0; k < adev->gfx.mec.num_pipe_per_mec; k++) {
+ tmp = RREG32_SOC15(GC, 0, regGRBM_GFX_CNTL);
+ tmp = REG_SET_FIELD(tmp, GRBM_GFX_CNTL, MEID, i);
+ tmp = REG_SET_FIELD(tmp, GRBM_GFX_CNTL, QUEUEID, j);
+ tmp = REG_SET_FIELD(tmp, GRBM_GFX_CNTL, PIPEID, k);
+ WREG32_SOC15(GC, 0, regGRBM_GFX_CNTL, tmp);
+
+ WREG32_SOC15(GC, 0, regCP_HQD_DEQUEUE_REQUEST, 0x2);
+ WREG32_SOC15(GC, 0, regSPI_COMPUTE_QUEUE_RESET, 0x1);
+ }
+ }
+ }
+ for (i = 0; i < adev->gfx.me.num_me; ++i) {
+ for (j = 0; j < adev->gfx.me.num_queue_per_pipe; j++) {
+ for (k = 0; k < adev->gfx.me.num_pipe_per_me; k++) {
+ tmp = RREG32_SOC15(GC, 0, regGRBM_GFX_CNTL);
+ tmp = REG_SET_FIELD(tmp, GRBM_GFX_CNTL, MEID, i);
+ tmp = REG_SET_FIELD(tmp, GRBM_GFX_CNTL, QUEUEID, j);
+ tmp = REG_SET_FIELD(tmp, GRBM_GFX_CNTL, PIPEID, k);
+ WREG32_SOC15(GC, 0, regGRBM_GFX_CNTL, tmp);
- tmp &= ~grbm_soft_reset;
- WREG32_SOC15(GC, 0, regGRBM_SOFT_RESET, tmp);
- tmp = RREG32_SOC15(GC, 0, regGRBM_SOFT_RESET);
+ WREG32_SOC15(GC, 0, regCP_GFX_HQD_DEQUEUE_REQUEST, 0x1);
+ }
}
+ }
+
+ WREG32_SOC15(GC, 0, regCP_VMID_RESET, 0xfffffffe);
+
+ // Read CP_VMID_RESET register three times.
+ // to get sufficient time for GFX_HQD_ACTIVE reach 0
+ RREG32_SOC15(GC, 0, regCP_VMID_RESET);
+ RREG32_SOC15(GC, 0, regCP_VMID_RESET);
+ RREG32_SOC15(GC, 0, regCP_VMID_RESET);
- /* Wait a little for things to settle down */
- udelay(50);
+ for (i = 0; i < adev->usec_timeout; i++) {
+ if (!RREG32_SOC15(GC, 0, regCP_HQD_ACTIVE) &&
+ !RREG32_SOC15(GC, 0, regCP_GFX_HQD_ACTIVE))
+ break;
+ udelay(1);
}
- return 0;
+ if (i >= adev->usec_timeout) {
+ printk("Failed to wait all pipes clean\n");
+ return -EINVAL;
+ }
+
+ /********** trigger soft reset ***********/
+ grbm_soft_reset = RREG32_SOC15(GC, 0, regGRBM_SOFT_RESET);
+ grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
+ SOFT_RESET_CP, 1);
+ grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
+ SOFT_RESET_GFX, 1);
+ grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
+ SOFT_RESET_CPF, 1);
+ grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
+ SOFT_RESET_CPC, 1);
+ grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
+ SOFT_RESET_CPG, 1);
+ WREG32_SOC15(GC, 0, regGRBM_SOFT_RESET, grbm_soft_reset);
+ /********** exit soft reset ***********/
+ grbm_soft_reset = RREG32_SOC15(GC, 0, regGRBM_SOFT_RESET);
+ grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
+ SOFT_RESET_CP, 0);
+ grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
+ SOFT_RESET_GFX, 0);
+ grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
+ SOFT_RESET_CPF, 0);
+ grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
+ SOFT_RESET_CPC, 0);
+ grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
+ SOFT_RESET_CPG, 0);
+ WREG32_SOC15(GC, 0, regGRBM_SOFT_RESET, grbm_soft_reset);
+
+ tmp = RREG32_SOC15(GC, 0, regCP_SOFT_RESET_CNTL);
+ tmp = REG_SET_FIELD(tmp, CP_SOFT_RESET_CNTL, CMP_HQD_REG_RESET, 0x1);
+ WREG32_SOC15(GC, 0, regCP_SOFT_RESET_CNTL, tmp);
+
+ WREG32_SOC15(GC, 0, regCP_ME_CNTL, 0x0);
+ WREG32_SOC15(GC, 0, regCP_MEC_RS64_CNTL, 0x0);
+
+ for (i = 0; i < adev->usec_timeout; i++) {
+ if (!RREG32_SOC15(GC, 0, regCP_VMID_RESET))
+ break;
+ udelay(1);
+ }
+ if (i >= adev->usec_timeout) {
+ printk("Failed to wait CP_VMID_RESET to 0\n");
+ return -EINVAL;
+ }
+
+ tmp = RREG32_SOC15(GC, 0, regCP_INT_CNTL);
+ tmp = REG_SET_FIELD(tmp, CP_INT_CNTL, CMP_BUSY_INT_ENABLE, 1);
+ tmp = REG_SET_FIELD(tmp, CP_INT_CNTL, CNTX_BUSY_INT_ENABLE, 1);
+ tmp = REG_SET_FIELD(tmp, CP_INT_CNTL, CNTX_EMPTY_INT_ENABLE, 1);
+ tmp = REG_SET_FIELD(tmp, CP_INT_CNTL, GFX_IDLE_INT_ENABLE, 1);
+ WREG32_SOC15(GC, 0, regCP_INT_CNTL, tmp);
+
+ gfx_v11_0_unset_safe_mode(adev);
+
+ return gfx_v11_0_cp_resume(adev);
+}
+
+static bool gfx_v11_0_check_soft_reset(void *handle)
+{
+ int i, r;
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_ring *ring;
+ long tmo = msecs_to_jiffies(1000);
+
+ for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
+ ring = &adev->gfx.gfx_ring[i];
+ r = amdgpu_ring_test_ib(ring, tmo);
+ if (r)
+ return true;
+ }
+
+ for (i = 0; i < adev->gfx.num_compute_rings; i++) {
+ ring = &adev->gfx.compute_ring[i];
+ r = amdgpu_ring_test_ib(ring, tmo);
+ if (r)
+ return true;
+ }
+
+ return false;
}
static uint64_t gfx_v11_0_get_gpu_clock_counter(struct amdgpu_device *adev)
@@ -5090,9 +5185,12 @@ static void gfx_v11_0_update_coarse_grain_clock_gating(struct amdgpu_device *ade
data = REG_SET_FIELD(data, SDMA0_RLC_CGCG_CTRL, CGCG_INT_ENABLE, 1);
WREG32_SOC15(GC, 0, regSDMA0_RLC_CGCG_CTRL, data);
- data = RREG32_SOC15(GC, 0, regSDMA1_RLC_CGCG_CTRL);
- data = REG_SET_FIELD(data, SDMA1_RLC_CGCG_CTRL, CGCG_INT_ENABLE, 1);
- WREG32_SOC15(GC, 0, regSDMA1_RLC_CGCG_CTRL, data);
+ /* Some ASICs only have one SDMA instance, not need to configure SDMA1 */
+ if (adev->sdma.num_instances > 1) {
+ data = RREG32_SOC15(GC, 0, regSDMA1_RLC_CGCG_CTRL);
+ data = REG_SET_FIELD(data, SDMA1_RLC_CGCG_CTRL, CGCG_INT_ENABLE, 1);
+ WREG32_SOC15(GC, 0, regSDMA1_RLC_CGCG_CTRL, data);
+ }
} else {
/* Program RLC_CGCG_CGLS_CTRL */
def = data = RREG32_SOC15(GC, 0, regRLC_CGCG_CGLS_CTRL);
@@ -5121,9 +5219,12 @@ static void gfx_v11_0_update_coarse_grain_clock_gating(struct amdgpu_device *ade
data &= ~SDMA0_RLC_CGCG_CTRL__CGCG_INT_ENABLE_MASK;
WREG32_SOC15(GC, 0, regSDMA0_RLC_CGCG_CTRL, data);
- data = RREG32_SOC15(GC, 0, regSDMA1_RLC_CGCG_CTRL);
- data &= ~SDMA1_RLC_CGCG_CTRL__CGCG_INT_ENABLE_MASK;
- WREG32_SOC15(GC, 0, regSDMA1_RLC_CGCG_CTRL, data);
+ /* Some ASICs only have one SDMA instance, not need to configure SDMA1 */
+ if (adev->sdma.num_instances > 1) {
+ data = RREG32_SOC15(GC, 0, regSDMA1_RLC_CGCG_CTRL);
+ data &= ~SDMA1_RLC_CGCG_CTRL__CGCG_INT_ENABLE_MASK;
+ WREG32_SOC15(GC, 0, regSDMA1_RLC_CGCG_CTRL, data);
+ }
}
}
@@ -5188,6 +5289,38 @@ static const struct amdgpu_rlc_funcs gfx_v11_0_rlc_funcs = {
.update_spm_vmid = gfx_v11_0_update_spm_vmid,
};
+static void gfx_v11_cntl_power_gating(struct amdgpu_device *adev, bool enable)
+{
+ u32 data = RREG32_SOC15(GC, 0, regRLC_PG_CNTL);
+
+ if (enable && (adev->pg_flags & AMD_PG_SUPPORT_GFX_PG))
+ data |= RLC_PG_CNTL__GFX_POWER_GATING_ENABLE_MASK;
+ else
+ data &= ~RLC_PG_CNTL__GFX_POWER_GATING_ENABLE_MASK;
+
+ WREG32_SOC15(GC, 0, regRLC_PG_CNTL, data);
+
+ // Program RLC_PG_DELAY3 for CGPG hysteresis
+ if (enable && (adev->pg_flags & AMD_PG_SUPPORT_GFX_PG)) {
+ switch (adev->ip_versions[GC_HWIP][0]) {
+ case IP_VERSION(11, 0, 1):
+ WREG32_SOC15(GC, 0, regRLC_PG_DELAY_3, RLC_PG_DELAY_3_DEFAULT_GC_11_0_1);
+ break;
+ default:
+ break;
+ }
+ }
+}
+
+static void gfx_v11_cntl_pg(struct amdgpu_device *adev, bool enable)
+{
+ amdgpu_gfx_rlc_enter_safe_mode(adev);
+
+ gfx_v11_cntl_power_gating(adev, enable);
+
+ amdgpu_gfx_rlc_exit_safe_mode(adev);
+}
+
static int gfx_v11_0_set_powergating_state(void *handle,
enum amd_powergating_state state)
{
@@ -5202,6 +5335,10 @@ static int gfx_v11_0_set_powergating_state(void *handle,
case IP_VERSION(11, 0, 2):
amdgpu_gfx_off_ctrl(adev, enable);
break;
+ case IP_VERSION(11, 0, 1):
+ gfx_v11_cntl_pg(adev, enable);
+ amdgpu_gfx_off_ctrl(adev, enable);
+ break;
default:
break;
}
@@ -5219,6 +5356,7 @@ static int gfx_v11_0_set_clockgating_state(void *handle,
switch (adev->ip_versions[GC_HWIP][0]) {
case IP_VERSION(11, 0, 0):
+ case IP_VERSION(11, 0, 1):
case IP_VERSION(11, 0, 2):
gfx_v11_0_update_gfx_clock_gating(adev,
state == AMD_CG_STATE_GATE);
@@ -5296,14 +5434,45 @@ static u64 gfx_v11_0_ring_get_wptr_gfx(struct amdgpu_ring *ring)
static void gfx_v11_0_ring_set_wptr_gfx(struct amdgpu_ring *ring)
{
struct amdgpu_device *adev = ring->adev;
+ uint32_t *wptr_saved;
+ uint32_t *is_queue_unmap;
+ uint64_t aggregated_db_index;
+ uint32_t mqd_size = adev->mqds[AMDGPU_HW_IP_GFX].mqd_size;
+ uint64_t wptr_tmp;
- if (ring->use_doorbell) {
- /* XXX check if swapping is necessary on BE */
- atomic64_set((atomic64_t *)ring->wptr_cpu_addr, ring->wptr);
- WDOORBELL64(ring->doorbell_index, ring->wptr);
+ if (ring->is_mes_queue) {
+ wptr_saved = (uint32_t *)(ring->mqd_ptr + mqd_size);
+ is_queue_unmap = (uint32_t *)(ring->mqd_ptr + mqd_size +
+ sizeof(uint32_t));
+ aggregated_db_index =
+ amdgpu_mes_get_aggregated_doorbell_index(adev,
+ ring->hw_prio);
+
+ wptr_tmp = ring->wptr & ring->buf_mask;
+ atomic64_set((atomic64_t *)ring->wptr_cpu_addr, wptr_tmp);
+ *wptr_saved = wptr_tmp;
+ /* assume doorbell always being used by mes mapped queue */
+ if (*is_queue_unmap) {
+ WDOORBELL64(aggregated_db_index, wptr_tmp);
+ WDOORBELL64(ring->doorbell_index, wptr_tmp);
+ } else {
+ WDOORBELL64(ring->doorbell_index, wptr_tmp);
+
+ if (*is_queue_unmap)
+ WDOORBELL64(aggregated_db_index, wptr_tmp);
+ }
} else {
- WREG32_SOC15(GC, 0, regCP_RB0_WPTR, lower_32_bits(ring->wptr));
- WREG32_SOC15(GC, 0, regCP_RB0_WPTR_HI, upper_32_bits(ring->wptr));
+ if (ring->use_doorbell) {
+ /* XXX check if swapping is necessary on BE */
+ atomic64_set((atomic64_t *)ring->wptr_cpu_addr,
+ ring->wptr);
+ WDOORBELL64(ring->doorbell_index, ring->wptr);
+ } else {
+ WREG32_SOC15(GC, 0, regCP_RB0_WPTR,
+ lower_32_bits(ring->wptr));
+ WREG32_SOC15(GC, 0, regCP_RB0_WPTR_HI,
+ upper_32_bits(ring->wptr));
+ }
}
}
@@ -5328,13 +5497,42 @@ static u64 gfx_v11_0_ring_get_wptr_compute(struct amdgpu_ring *ring)
static void gfx_v11_0_ring_set_wptr_compute(struct amdgpu_ring *ring)
{
struct amdgpu_device *adev = ring->adev;
+ uint32_t *wptr_saved;
+ uint32_t *is_queue_unmap;
+ uint64_t aggregated_db_index;
+ uint32_t mqd_size = adev->mqds[AMDGPU_HW_IP_COMPUTE].mqd_size;
+ uint64_t wptr_tmp;
- /* XXX check if swapping is necessary on BE */
- if (ring->use_doorbell) {
- atomic64_set((atomic64_t *)ring->wptr_cpu_addr, ring->wptr);
- WDOORBELL64(ring->doorbell_index, ring->wptr);
+ if (ring->is_mes_queue) {
+ wptr_saved = (uint32_t *)(ring->mqd_ptr + mqd_size);
+ is_queue_unmap = (uint32_t *)(ring->mqd_ptr + mqd_size +
+ sizeof(uint32_t));
+ aggregated_db_index =
+ amdgpu_mes_get_aggregated_doorbell_index(adev,
+ ring->hw_prio);
+
+ wptr_tmp = ring->wptr & ring->buf_mask;
+ atomic64_set((atomic64_t *)ring->wptr_cpu_addr, wptr_tmp);
+ *wptr_saved = wptr_tmp;
+ /* assume doorbell always used by mes mapped queue */
+ if (*is_queue_unmap) {
+ WDOORBELL64(aggregated_db_index, wptr_tmp);
+ WDOORBELL64(ring->doorbell_index, wptr_tmp);
+ } else {
+ WDOORBELL64(ring->doorbell_index, wptr_tmp);
+
+ if (*is_queue_unmap)
+ WDOORBELL64(aggregated_db_index, wptr_tmp);
+ }
} else {
- BUG(); /* only DOORBELL method supported on gfx11 now */
+ /* XXX check if swapping is necessary on BE */
+ if (ring->use_doorbell) {
+ atomic64_set((atomic64_t *)ring->wptr_cpu_addr,
+ ring->wptr);
+ WDOORBELL64(ring->doorbell_index, ring->wptr);
+ } else {
+ BUG(); /* only DOORBELL method supported on gfx11 now */
+ }
}
}
@@ -6131,6 +6329,7 @@ static const struct amd_ip_funcs gfx_v11_0_ip_funcs = {
.is_idle = gfx_v11_0_is_idle,
.wait_for_idle = gfx_v11_0_wait_for_idle,
.soft_reset = gfx_v11_0_soft_reset,
+ .check_soft_reset = gfx_v11_0_check_soft_reset,
.set_clockgating_state = gfx_v11_0_set_clockgating_state,
.set_powergating_state = gfx_v11_0_set_powergating_state,
.get_clockgating_state = gfx_v11_0_get_clockgating_state,
@@ -6293,6 +6492,11 @@ static void gfx_v11_0_set_irq_funcs(struct amdgpu_device *adev)
static void gfx_v11_0_set_imu_funcs(struct amdgpu_device *adev)
{
+ if (adev->flags & AMD_IS_APU)
+ adev->gfx.imu.mode = MISSION_MODE;
+ else
+ adev->gfx.imu.mode = DEBUG_MODE;
+
adev->gfx.imu.funcs = &gfx_v11_0_imu_funcs;
}
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
index 5349ca4d19e3..fc9c1043244c 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
@@ -987,23 +987,23 @@ static void gfx_v9_0_wait_reg_mem(struct amdgpu_ring *ring, int eng_sel,
static int gfx_v9_0_ring_test_ring(struct amdgpu_ring *ring)
{
struct amdgpu_device *adev = ring->adev;
+ uint32_t scratch = SOC15_REG_OFFSET(GC, 0, mmSCRATCH_REG0);
uint32_t tmp = 0;
unsigned i;
int r;
- WREG32_SOC15(GC, 0, mmSCRATCH_REG0, 0xCAFEDEAD);
+ WREG32(scratch, 0xCAFEDEAD);
r = amdgpu_ring_alloc(ring, 3);
if (r)
return r;
amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
- amdgpu_ring_write(ring, SOC15_REG_OFFSET(GC, 0, mmSCRATCH_REG0) -
- PACKET3_SET_UCONFIG_REG_START);
+ amdgpu_ring_write(ring, scratch - PACKET3_SET_UCONFIG_REG_START);
amdgpu_ring_write(ring, 0xDEADBEEF);
amdgpu_ring_commit(ring);
for (i = 0; i < adev->usec_timeout; i++) {
- tmp = RREG32_SOC15(GC, 0, mmSCRATCH_REG0);
+ tmp = RREG32(scratch);
if (tmp == 0xDEADBEEF)
break;
udelay(1);
@@ -2587,7 +2587,8 @@ static void gfx_v9_0_constants_init(struct amdgpu_device *adev)
gfx_v9_0_tiling_mode_table_init(adev);
- gfx_v9_0_setup_rb(adev);
+ if (adev->gfx.num_gfx_rings)
+ gfx_v9_0_setup_rb(adev);
gfx_v9_0_get_cu_info(adev, &adev->gfx.cu_info);
adev->gfx.config.db_debug2 = RREG32_SOC15(GC, 0, mmDB_DEBUG2);
diff --git a/drivers/gpu/drm/amd/amdgpu/gfxhub_v3_0.c b/drivers/gpu/drm/amd/amdgpu/gfxhub_v3_0.c
index 5eccaa2c7ca0..0e13370c2057 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfxhub_v3_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfxhub_v3_0.c
@@ -26,13 +26,10 @@
#include "gc/gc_11_0_0_offset.h"
#include "gc/gc_11_0_0_sh_mask.h"
+#include "gc/gc_11_0_0_default.h"
#include "navi10_enum.h"
#include "soc15_common.h"
-#define regGCVM_L2_CNTL3_DEFAULT 0x80100007
-#define regGCVM_L2_CNTL4_DEFAULT 0x000000c1
-#define regGCVM_L2_CNTL5_DEFAULT 0x00003fe0
-
static const char *gfxhub_client_ids[] = {
"CB/DB",
"Reserved",
@@ -414,12 +411,39 @@ static void gfxhub_v3_0_set_fault_enable_default(struct amdgpu_device *adev,
{
u32 tmp;
+ /* NO halt CP when page fault */
+ tmp = RREG32_SOC15(GC, 0, regCP_DEBUG);
+ tmp = REG_SET_FIELD(tmp, CP_DEBUG, CPG_UTCL1_ERROR_HALT_DISABLE, 1);
+ WREG32_SOC15(GC, 0, regCP_DEBUG, tmp);
+
+ /**
+ * Set GRBM_GFX_INDEX in broad cast mode
+ * before programming GL1C_UTCL0_CNTL1 and SQG_CONFIG
+ */
+ WREG32_SOC15(GC, 0, regGRBM_GFX_INDEX, regGRBM_GFX_INDEX_DEFAULT);
+
+ /**
+ * Retry respond mode: RETRY
+ * Error (no retry) respond mode: SUCCESS
+ */
+ tmp = RREG32_SOC15(GC, 0, regGL1C_UTCL0_CNTL1);
+ tmp = REG_SET_FIELD(tmp, GL1C_UTCL0_CNTL1, RESP_MODE, 0);
+ tmp = REG_SET_FIELD(tmp, GL1C_UTCL0_CNTL1, RESP_FAULT_MODE, 0x2);
+ WREG32_SOC15(GC, 0, regGL1C_UTCL0_CNTL1, tmp);
+
/* These registers are not accessible to VF-SRIOV.
* The PF will program them instead.
*/
if (amdgpu_sriov_vf(adev))
return;
+ /* Disable SQ XNACK interrupt for all VMIDs */
+ tmp = RREG32_SOC15(GC, 0, regSQG_CONFIG);
+ tmp = REG_SET_FIELD(tmp, SQG_CONFIG, XNACK_INTR_MASK,
+ SQG_CONFIG__XNACK_INTR_MASK_MASK >>
+ SQG_CONFIG__XNACK_INTR_MASK__SHIFT);
+ WREG32_SOC15(GC, 0, regSQG_CONFIG, tmp);
+
tmp = RREG32_SOC15(GC, 0, regGCVM_L2_PROTECTION_FAULT_CNTL);
tmp = REG_SET_FIELD(tmp, GCVM_L2_PROTECTION_FAULT_CNTL,
RANGE_PROTECTION_FAULT_ENABLE_DEFAULT, value);
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
index 9077dfccaf3c..f513e2c2e964 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
@@ -22,6 +22,9 @@
*/
#include <linux/firmware.h>
#include <linux/pci.h>
+
+#include <drm/drm_cache.h>
+
#include "amdgpu.h"
#include "amdgpu_atomfirmware.h"
#include "gmc_v10_0.h"
@@ -416,6 +419,7 @@ static int gmc_v10_0_flush_gpu_tlb_pasid(struct amdgpu_device *adev,
uint32_t seq;
uint16_t queried_pasid;
bool ret;
+ u32 usec_timeout = amdgpu_sriov_vf(adev) ? SRIOV_USEC_TIMEOUT : adev->usec_timeout;
struct amdgpu_ring *ring = &adev->gfx.kiq.ring;
struct amdgpu_kiq *kiq = &adev->gfx.kiq;
@@ -434,7 +438,7 @@ static int gmc_v10_0_flush_gpu_tlb_pasid(struct amdgpu_device *adev,
amdgpu_ring_commit(ring);
spin_unlock(&adev->gfx.kiq.ring_lock);
- r = amdgpu_fence_wait_polling(ring, seq, adev->usec_timeout);
+ r = amdgpu_fence_wait_polling(ring, seq, usec_timeout);
if (r < 1) {
dev_err(adev->dev, "wait for kiq fence error: %ld.\n", r);
return -ETIME;
@@ -456,7 +460,8 @@ static int gmc_v10_0_flush_gpu_tlb_pasid(struct amdgpu_device *adev,
gmc_v10_0_flush_gpu_tlb(adev, vmid,
AMDGPU_GFXHUB_0, flush_type);
}
- break;
+ if (!adev->enable_mes)
+ break;
}
}
@@ -833,10 +838,21 @@ static int gmc_v10_0_mc_init(struct amdgpu_device *adev)
adev->gmc.visible_vram_size = adev->gmc.real_vram_size;
/* set the gart size */
- if (amdgpu_gart_size == -1)
- adev->gmc.gart_size = 512ULL << 20;
- else
+ if (amdgpu_gart_size == -1) {
+ switch (adev->ip_versions[GC_HWIP][0]) {
+ default:
+ adev->gmc.gart_size = 512ULL << 20;
+ break;
+ case IP_VERSION(10, 3, 1): /* DCE SG support */
+ case IP_VERSION(10, 3, 3): /* DCE SG support */
+ case IP_VERSION(10, 3, 6): /* DCE SG support */
+ case IP_VERSION(10, 3, 7): /* DCE SG support */
+ adev->gmc.gart_size = 1024ULL << 20;
+ break;
+ }
+ } else {
adev->gmc.gart_size = (u64)amdgpu_gart_size << 20;
+ }
gmc_v10_0_vram_gtt_location(adev, &adev->gmc);
@@ -968,6 +984,8 @@ static int gmc_v10_0_sw_init(void *handle)
return r;
}
+ adev->need_swiotlb = drm_need_swiotlb(44);
+
r = gmc_v10_0_mc_init(adev);
if (r)
return r;
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c
index 7f4b480ae66e..1471bfb9ae38 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c
@@ -22,10 +22,13 @@
*/
#include <linux/firmware.h>
#include <linux/pci.h>
+
+#include <drm/drm_cache.h>
+
#include "amdgpu.h"
#include "amdgpu_atomfirmware.h"
#include "gmc_v11_0.h"
-#include "umc_v8_7.h"
+#include "umc_v8_10.h"
#include "athub/athub_3_0_0_sh_mask.h"
#include "athub/athub_3_0_0_offset.h"
#include "oss/osssys_6_0_0_offset.h"
@@ -37,6 +40,7 @@
#include "nbio_v4_3.h"
#include "gfxhub_v3_0.h"
#include "mmhub_v3_0.h"
+#include "mmhub_v3_0_1.h"
#include "mmhub_v3_0_2.h"
#include "athub_v3_0.h"
@@ -267,7 +271,7 @@ static void gmc_v11_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid,
/* For SRIOV run time, driver shouldn't access the register through MMIO
* Directly use kiq to do the vm invalidation instead
*/
- if (adev->gfx.kiq.ring.sched.ready && !adev->enable_mes &&
+ if ((adev->gfx.kiq.ring.sched.ready || adev->mes.ring.sched.ready) &&
(amdgpu_sriov_runtime(adev) || !amdgpu_sriov_vf(adev))) {
struct amdgpu_vmhub *hub = &adev->vmhub[vmhub];
const unsigned eng = 17;
@@ -343,7 +347,6 @@ static int gmc_v11_0_flush_gpu_tlb_pasid(struct amdgpu_device *adev,
gmc_v11_0_flush_gpu_tlb(adev, vmid,
AMDGPU_GFXHUB_0, flush_type);
}
- break;
}
}
@@ -537,17 +540,45 @@ static void gmc_v11_0_set_umc_funcs(struct amdgpu_device *adev)
{
switch (adev->ip_versions[UMC_HWIP][0]) {
case IP_VERSION(8, 10, 0):
+ adev->umc.channel_inst_num = UMC_V8_10_CHANNEL_INSTANCE_NUM;
+ adev->umc.umc_inst_num = UMC_V8_10_UMC_INSTANCE_NUM;
+ adev->umc.node_inst_num = adev->gmc.num_umc;
+ adev->umc.max_ras_err_cnt_per_query = UMC_V8_10_TOTAL_CHANNEL_NUM(adev);
+ adev->umc.channel_offs = UMC_V8_10_PER_CHANNEL_OFFSET;
+ adev->umc.channel_idx_tbl = &umc_v8_10_channel_idx_tbl[0][0][0];
+ adev->umc.ras = &umc_v8_10_ras;
+ break;
case IP_VERSION(8, 11, 0):
break;
default:
break;
}
+
+ if (adev->umc.ras) {
+ amdgpu_ras_register_ras_block(adev, &adev->umc.ras->ras_block);
+
+ strcpy(adev->umc.ras->ras_block.ras_comm.name, "umc");
+ adev->umc.ras->ras_block.ras_comm.block = AMDGPU_RAS_BLOCK__UMC;
+ adev->umc.ras->ras_block.ras_comm.type = AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE;
+ adev->umc.ras_if = &adev->umc.ras->ras_block.ras_comm;
+
+ /* If don't define special ras_late_init function, use default ras_late_init */
+ if (!adev->umc.ras->ras_block.ras_late_init)
+ adev->umc.ras->ras_block.ras_late_init = amdgpu_umc_ras_late_init;
+
+ /* If not define special ras_cb function, use default ras_cb */
+ if (!adev->umc.ras->ras_block.ras_cb)
+ adev->umc.ras->ras_block.ras_cb = amdgpu_umc_process_ras_data_cb;
+ }
}
static void gmc_v11_0_set_mmhub_funcs(struct amdgpu_device *adev)
{
switch (adev->ip_versions[MMHUB_HWIP][0]) {
+ case IP_VERSION(3, 0, 1):
+ adev->mmhub.funcs = &mmhub_v3_0_1_funcs;
+ break;
case IP_VERSION(3, 0, 2):
adev->mmhub.funcs = &mmhub_v3_0_2_funcs;
break;
@@ -747,6 +778,8 @@ static int gmc_v11_0_sw_init(void *handle)
return r;
}
+ adev->need_swiotlb = drm_need_swiotlb(44);
+
r = gmc_v11_0_mc_init(adev);
if (r)
return r;
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
index 22761a3bb818..4603653916f5 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
@@ -896,6 +896,7 @@ static int gmc_v9_0_flush_gpu_tlb_pasid(struct amdgpu_device *adev,
uint32_t seq;
uint16_t queried_pasid;
bool ret;
+ u32 usec_timeout = amdgpu_sriov_vf(adev) ? SRIOV_USEC_TIMEOUT : adev->usec_timeout;
struct amdgpu_ring *ring = &adev->gfx.kiq.ring;
struct amdgpu_kiq *kiq = &adev->gfx.kiq;
@@ -935,7 +936,7 @@ static int gmc_v9_0_flush_gpu_tlb_pasid(struct amdgpu_device *adev,
amdgpu_ring_commit(ring);
spin_unlock(&adev->gfx.kiq.ring_lock);
- r = amdgpu_fence_wait_polling(ring, seq, adev->usec_timeout);
+ r = amdgpu_fence_wait_polling(ring, seq, usec_timeout);
if (r < 1) {
dev_err(adev->dev, "wait for kiq fence error: %ld.\n", r);
up_read(&adev->reset_domain->sem);
@@ -1624,12 +1625,15 @@ static int gmc_v9_0_sw_init(void *handle)
amdgpu_vm_adjust_size(adev, 256 * 1024, 9, 3, 47);
else
amdgpu_vm_adjust_size(adev, 256 * 1024, 9, 3, 48);
+ if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 2))
+ adev->gmc.translate_further = adev->vm_manager.num_level > 1;
break;
case IP_VERSION(9, 4, 1):
adev->num_vmhubs = 3;
/* Keep the vm size same with Vega20 */
amdgpu_vm_adjust_size(adev, 256 * 1024, 9, 3, 48);
+ adev->gmc.translate_further = adev->vm_manager.num_level > 1;
break;
default:
break;
diff --git a/drivers/gpu/drm/amd/amdgpu/hdp_v5_2.c b/drivers/gpu/drm/amd/amdgpu/hdp_v5_2.c
index 39a696cd45b5..29c3484ae1f1 100644
--- a/drivers/gpu/drm/amd/amdgpu/hdp_v5_2.c
+++ b/drivers/gpu/drm/amd/amdgpu/hdp_v5_2.c
@@ -40,6 +40,156 @@ static void hdp_v5_2_flush_hdp(struct amdgpu_device *adev,
0);
}
+static void hdp_v5_2_update_mem_power_gating(struct amdgpu_device *adev,
+ bool enable)
+{
+ uint32_t hdp_clk_cntl;
+ uint32_t hdp_mem_pwr_cntl;
+
+ if (!(adev->cg_flags & (AMD_CG_SUPPORT_HDP_LS |
+ AMD_CG_SUPPORT_HDP_DS |
+ AMD_CG_SUPPORT_HDP_SD)))
+ return;
+
+ hdp_clk_cntl = RREG32_SOC15(HDP, 0, regHDP_CLK_CNTL);
+ hdp_mem_pwr_cntl = RREG32_SOC15(HDP, 0, regHDP_MEM_POWER_CTRL);
+
+ /* Before doing clock/power mode switch, forced on MEM clock */
+ hdp_clk_cntl = REG_SET_FIELD(hdp_clk_cntl, HDP_CLK_CNTL,
+ ATOMIC_MEM_CLK_SOFT_OVERRIDE, 1);
+ hdp_clk_cntl = REG_SET_FIELD(hdp_clk_cntl, HDP_CLK_CNTL,
+ RC_MEM_CLK_SOFT_OVERRIDE, 1);
+ WREG32_SOC15(HDP, 0, regHDP_CLK_CNTL, hdp_clk_cntl);
+
+ /* disable clock and power gating before any changing */
+ hdp_mem_pwr_cntl = REG_SET_FIELD(hdp_mem_pwr_cntl, HDP_MEM_POWER_CTRL,
+ ATOMIC_MEM_POWER_CTRL_EN, 0);
+ hdp_mem_pwr_cntl = REG_SET_FIELD(hdp_mem_pwr_cntl, HDP_MEM_POWER_CTRL,
+ ATOMIC_MEM_POWER_LS_EN, 0);
+ hdp_mem_pwr_cntl = REG_SET_FIELD(hdp_mem_pwr_cntl, HDP_MEM_POWER_CTRL,
+ ATOMIC_MEM_POWER_DS_EN, 0);
+ hdp_mem_pwr_cntl = REG_SET_FIELD(hdp_mem_pwr_cntl, HDP_MEM_POWER_CTRL,
+ ATOMIC_MEM_POWER_SD_EN, 0);
+ hdp_mem_pwr_cntl = REG_SET_FIELD(hdp_mem_pwr_cntl, HDP_MEM_POWER_CTRL,
+ RC_MEM_POWER_CTRL_EN, 0);
+ hdp_mem_pwr_cntl = REG_SET_FIELD(hdp_mem_pwr_cntl, HDP_MEM_POWER_CTRL,
+ RC_MEM_POWER_LS_EN, 0);
+ hdp_mem_pwr_cntl = REG_SET_FIELD(hdp_mem_pwr_cntl, HDP_MEM_POWER_CTRL,
+ RC_MEM_POWER_DS_EN, 0);
+ hdp_mem_pwr_cntl = REG_SET_FIELD(hdp_mem_pwr_cntl, HDP_MEM_POWER_CTRL,
+ RC_MEM_POWER_SD_EN, 0);
+ WREG32_SOC15(HDP, 0, regHDP_MEM_POWER_CTRL, hdp_mem_pwr_cntl);
+
+ /* Already disabled above. The actions below are for "enabled" only */
+ if (enable) {
+ /* only one clock gating mode (LS/DS/SD) can be enabled */
+ if (adev->cg_flags & AMD_CG_SUPPORT_HDP_SD) {
+ hdp_mem_pwr_cntl = REG_SET_FIELD(hdp_mem_pwr_cntl,
+ HDP_MEM_POWER_CTRL,
+ ATOMIC_MEM_POWER_SD_EN, 1);
+ hdp_mem_pwr_cntl = REG_SET_FIELD(hdp_mem_pwr_cntl,
+ HDP_MEM_POWER_CTRL,
+ RC_MEM_POWER_SD_EN, 1);
+ } else if (adev->cg_flags & AMD_CG_SUPPORT_HDP_LS) {
+ hdp_mem_pwr_cntl = REG_SET_FIELD(hdp_mem_pwr_cntl,
+ HDP_MEM_POWER_CTRL,
+ ATOMIC_MEM_POWER_LS_EN, 1);
+ hdp_mem_pwr_cntl = REG_SET_FIELD(hdp_mem_pwr_cntl,
+ HDP_MEM_POWER_CTRL,
+ RC_MEM_POWER_LS_EN, 1);
+ } else if (adev->cg_flags & AMD_CG_SUPPORT_HDP_DS) {
+ hdp_mem_pwr_cntl = REG_SET_FIELD(hdp_mem_pwr_cntl,
+ HDP_MEM_POWER_CTRL,
+ ATOMIC_MEM_POWER_DS_EN, 1);
+ hdp_mem_pwr_cntl = REG_SET_FIELD(hdp_mem_pwr_cntl,
+ HDP_MEM_POWER_CTRL,
+ RC_MEM_POWER_DS_EN, 1);
+ }
+
+ /* confirmed that ATOMIC/RC_MEM_POWER_CTRL_EN have to be set for SRAM LS/DS/SD */
+ if (adev->cg_flags & (AMD_CG_SUPPORT_HDP_LS | AMD_CG_SUPPORT_HDP_DS |
+ AMD_CG_SUPPORT_HDP_SD)) {
+ hdp_mem_pwr_cntl = REG_SET_FIELD(hdp_mem_pwr_cntl, HDP_MEM_POWER_CTRL,
+ ATOMIC_MEM_POWER_CTRL_EN, 1);
+ hdp_mem_pwr_cntl = REG_SET_FIELD(hdp_mem_pwr_cntl, HDP_MEM_POWER_CTRL,
+ RC_MEM_POWER_CTRL_EN, 1);
+ WREG32_SOC15(HDP, 0, regHDP_MEM_POWER_CTRL, hdp_mem_pwr_cntl);
+ }
+ }
+
+ /* disable MEM clock override after clock/power mode changing */
+ hdp_clk_cntl = REG_SET_FIELD(hdp_clk_cntl, HDP_CLK_CNTL,
+ ATOMIC_MEM_CLK_SOFT_OVERRIDE, 0);
+ hdp_clk_cntl = REG_SET_FIELD(hdp_clk_cntl, HDP_CLK_CNTL,
+ RC_MEM_CLK_SOFT_OVERRIDE, 0);
+ WREG32_SOC15(HDP, 0, regHDP_CLK_CNTL, hdp_clk_cntl);
+}
+
+static void hdp_v5_2_update_medium_grain_clock_gating(struct amdgpu_device *adev,
+ bool enable)
+{
+ uint32_t hdp_clk_cntl;
+
+ if (!(adev->cg_flags & AMD_CG_SUPPORT_HDP_MGCG))
+ return;
+
+ hdp_clk_cntl = RREG32_SOC15(HDP, 0, regHDP_CLK_CNTL);
+
+ if (enable) {
+ hdp_clk_cntl &=
+ ~(uint32_t)
+ (HDP_CLK_CNTL__ATOMIC_MEM_CLK_SOFT_OVERRIDE_MASK |
+ HDP_CLK_CNTL__RC_MEM_CLK_SOFT_OVERRIDE_MASK |
+ HDP_CLK_CNTL__DBUS_CLK_SOFT_OVERRIDE_MASK |
+ HDP_CLK_CNTL__DYN_CLK_SOFT_OVERRIDE_MASK |
+ HDP_CLK_CNTL__XDP_REG_CLK_SOFT_OVERRIDE_MASK |
+ HDP_CLK_CNTL__HDP_REG_CLK_SOFT_OVERRIDE_MASK);
+ } else {
+ hdp_clk_cntl |= HDP_CLK_CNTL__ATOMIC_MEM_CLK_SOFT_OVERRIDE_MASK |
+ HDP_CLK_CNTL__RC_MEM_CLK_SOFT_OVERRIDE_MASK |
+ HDP_CLK_CNTL__DBUS_CLK_SOFT_OVERRIDE_MASK |
+ HDP_CLK_CNTL__DYN_CLK_SOFT_OVERRIDE_MASK |
+ HDP_CLK_CNTL__XDP_REG_CLK_SOFT_OVERRIDE_MASK |
+ HDP_CLK_CNTL__HDP_REG_CLK_SOFT_OVERRIDE_MASK;
+ }
+
+ WREG32_SOC15(HDP, 0, regHDP_CLK_CNTL, hdp_clk_cntl);
+}
+
+static void hdp_v5_2_get_clockgating_state(struct amdgpu_device *adev,
+ u64 *flags)
+{
+ uint32_t tmp;
+
+ /* AMD_CG_SUPPORT_HDP_MGCG */
+ tmp = RREG32_SOC15(HDP, 0, regHDP_CLK_CNTL);
+ if (!(tmp & (HDP_CLK_CNTL__ATOMIC_MEM_CLK_SOFT_OVERRIDE_MASK |
+ HDP_CLK_CNTL__RC_MEM_CLK_SOFT_OVERRIDE_MASK |
+ HDP_CLK_CNTL__DBUS_CLK_SOFT_OVERRIDE_MASK |
+ HDP_CLK_CNTL__DYN_CLK_SOFT_OVERRIDE_MASK |
+ HDP_CLK_CNTL__XDP_REG_CLK_SOFT_OVERRIDE_MASK |
+ HDP_CLK_CNTL__HDP_REG_CLK_SOFT_OVERRIDE_MASK)))
+ *flags |= AMD_CG_SUPPORT_HDP_MGCG;
+
+ /* AMD_CG_SUPPORT_HDP_LS/DS/SD */
+ tmp = RREG32_SOC15(HDP, 0, regHDP_MEM_POWER_CTRL);
+ if (tmp & HDP_MEM_POWER_CTRL__ATOMIC_MEM_POWER_LS_EN_MASK)
+ *flags |= AMD_CG_SUPPORT_HDP_LS;
+ else if (tmp & HDP_MEM_POWER_CTRL__ATOMIC_MEM_POWER_DS_EN_MASK)
+ *flags |= AMD_CG_SUPPORT_HDP_DS;
+ else if (tmp & HDP_MEM_POWER_CTRL__ATOMIC_MEM_POWER_SD_EN_MASK)
+ *flags |= AMD_CG_SUPPORT_HDP_SD;
+}
+
+static void hdp_v5_2_update_clock_gating(struct amdgpu_device *adev,
+ bool enable)
+{
+ hdp_v5_2_update_mem_power_gating(adev, enable);
+ hdp_v5_2_update_medium_grain_clock_gating(adev, enable);
+}
+
const struct amdgpu_hdp_funcs hdp_v5_2_funcs = {
.flush_hdp = hdp_v5_2_flush_hdp,
+ .update_clock_gating = hdp_v5_2_update_clock_gating,
+ .get_clock_gating_state = hdp_v5_2_get_clockgating_state,
};
diff --git a/drivers/gpu/drm/amd/amdgpu/ih_v6_0.c b/drivers/gpu/drm/amd/amdgpu/ih_v6_0.c
index 92dc60a9d209..085e613f3646 100644
--- a/drivers/gpu/drm/amd/amdgpu/ih_v6_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/ih_v6_0.c
@@ -727,6 +727,7 @@ static const struct amd_ip_funcs ih_v6_0_ip_funcs = {
static const struct amdgpu_ih_funcs ih_v6_0_funcs = {
.get_wptr = ih_v6_0_get_wptr,
.decode_iv = amdgpu_ih_decode_iv_helper,
+ .decode_iv_ts = amdgpu_ih_decode_iv_ts_helper,
.set_rptr = ih_v6_0_set_rptr
};
diff --git a/drivers/gpu/drm/amd/amdgpu/imu_v11_0.c b/drivers/gpu/drm/amd/amdgpu/imu_v11_0.c
index d63d3f2b8a16..76383baa3929 100644
--- a/drivers/gpu/drm/amd/amdgpu/imu_v11_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/imu_v11_0.c
@@ -24,6 +24,7 @@
#include <linux/firmware.h>
#include "amdgpu.h"
#include "amdgpu_imu.h"
+#include "amdgpu_dpm.h"
#include "gc/gc_11_0_0_offset.h"
#include "gc/gc_11_0_0_sh_mask.h"
@@ -117,6 +118,25 @@ static int imu_v11_0_load_microcode(struct amdgpu_device *adev)
return 0;
}
+static int imu_v11_0_wait_for_reset_status(struct amdgpu_device *adev)
+{
+ int i, imu_reg_val = 0;
+
+ for (i = 0; i < adev->usec_timeout; i++) {
+ imu_reg_val = RREG32_SOC15(GC, 0, regGFX_IMU_GFX_RESET_CTRL);
+ if ((imu_reg_val & 0x1f) == 0x1f)
+ break;
+ udelay(1);
+ }
+
+ if (i >= adev->usec_timeout) {
+ dev_err(adev->dev, "init imu: IMU start timeout\n");
+ return -ETIMEDOUT;
+ }
+
+ return 0;
+}
+
static void imu_v11_0_setup(struct amdgpu_device *adev)
{
int imu_reg_val;
@@ -125,9 +145,11 @@ static void imu_v11_0_setup(struct amdgpu_device *adev)
WREG32_SOC15(GC, 0, regGFX_IMU_C2PMSG_ACCESS_CTRL0, 0xffffff);
WREG32_SOC15(GC, 0, regGFX_IMU_C2PMSG_ACCESS_CTRL1, 0xffff);
- imu_reg_val = RREG32_SOC15(GC, 0, regGFX_IMU_C2PMSG_16);
- imu_reg_val |= 0x1;
- WREG32_SOC15(GC, 0, regGFX_IMU_C2PMSG_16, imu_reg_val);
+ if (adev->gfx.imu.mode == DEBUG_MODE) {
+ imu_reg_val = RREG32_SOC15(GC, 0, regGFX_IMU_C2PMSG_16);
+ imu_reg_val |= 0x1;
+ WREG32_SOC15(GC, 0, regGFX_IMU_C2PMSG_16, imu_reg_val);
+ }
//disble imu Rtavfs, SmsRepair, DfllBTC, and ClkB
imu_reg_val = RREG32_SOC15(GC, 0, regGFX_IMU_SCRATCH_10);
@@ -137,26 +159,17 @@ static void imu_v11_0_setup(struct amdgpu_device *adev)
static int imu_v11_0_start(struct amdgpu_device *adev)
{
- int imu_reg_val, i;
+ int imu_reg_val;
//Start IMU by set GFX_IMU_CORE_CTRL.CRESET = 0
imu_reg_val = RREG32_SOC15(GC, 0, regGFX_IMU_CORE_CTRL);
imu_reg_val &= 0xfffffffe;
WREG32_SOC15(GC, 0, regGFX_IMU_CORE_CTRL, imu_reg_val);
- for (i = 0; i < adev->usec_timeout; i++) {
- imu_reg_val = RREG32_SOC15(GC, 0, regGFX_IMU_GFX_RESET_CTRL);
- if ((imu_reg_val & 0x1f) == 0x1f)
- break;
- udelay(1);
- }
+ if (adev->flags & AMD_IS_APU)
+ amdgpu_dpm_set_gfx_power_up_by_imu(adev);
- if (i >= adev->usec_timeout) {
- dev_err(adev->dev, "init imu: IMU start timeout\n");
- return -ETIMEDOUT;
- }
-
- return 0;
+ return imu_v11_0_wait_for_reset_status(adev);
}
static const struct imu_rlc_ram_golden imu_rlc_ram_golden_11[] =
@@ -364,4 +377,5 @@ const struct amdgpu_imu_funcs gfx_v11_0_imu_funcs = {
.setup_imu = imu_v11_0_setup,
.start_imu = imu_v11_0_start,
.program_rlc_ram = imu_v11_0_program_rlc_ram,
+ .wait_for_reset_status = imu_v11_0_wait_for_reset_status,
};
diff --git a/drivers/gpu/drm/amd/amdgpu/mes_v10_1.c b/drivers/gpu/drm/amd/amdgpu/mes_v10_1.c
index 18a129f36215..067d10073a56 100644
--- a/drivers/gpu/drm/amd/amdgpu/mes_v10_1.c
+++ b/drivers/gpu/drm/amd/amdgpu/mes_v10_1.c
@@ -87,21 +87,32 @@ static const struct amdgpu_ring_funcs mes_v10_1_ring_funcs = {
};
static int mes_v10_1_submit_pkt_and_poll_completion(struct amdgpu_mes *mes,
- void *pkt, int size)
+ void *pkt, int size,
+ int api_status_off)
{
int ndw = size / 4;
signed long r;
union MESAPI__ADD_QUEUE *x_pkt = pkt;
+ struct MES_API_STATUS *api_status;
struct amdgpu_device *adev = mes->adev;
struct amdgpu_ring *ring = &mes->ring;
+ unsigned long flags;
BUG_ON(size % 4 != 0);
- if (amdgpu_ring_alloc(ring, ndw))
+ spin_lock_irqsave(&mes->ring_lock, flags);
+ if (amdgpu_ring_alloc(ring, ndw)) {
+ spin_unlock_irqrestore(&mes->ring_lock, flags);
return -ENOMEM;
+ }
+
+ api_status = (struct MES_API_STATUS *)((char *)pkt + api_status_off);
+ api_status->api_completion_fence_addr = mes->ring.fence_drv.gpu_addr;
+ api_status->api_completion_fence_value = ++mes->ring.fence_drv.sync_seq;
amdgpu_ring_write_multiple(ring, pkt, ndw);
amdgpu_ring_commit(ring);
+ spin_unlock_irqrestore(&mes->ring_lock, flags);
DRM_DEBUG("MES msg=%d was emitted\n", x_pkt->header.opcode);
@@ -166,13 +177,9 @@ static int mes_v10_1_add_hw_queue(struct amdgpu_mes *mes,
mes_add_queue_pkt.gws_size = input->gws_size;
mes_add_queue_pkt.trap_handler_addr = input->tba_addr;
- mes_add_queue_pkt.api_status.api_completion_fence_addr =
- mes->ring.fence_drv.gpu_addr;
- mes_add_queue_pkt.api_status.api_completion_fence_value =
- ++mes->ring.fence_drv.sync_seq;
-
return mes_v10_1_submit_pkt_and_poll_completion(mes,
- &mes_add_queue_pkt, sizeof(mes_add_queue_pkt));
+ &mes_add_queue_pkt, sizeof(mes_add_queue_pkt),
+ offsetof(union MESAPI__ADD_QUEUE, api_status));
}
static int mes_v10_1_remove_hw_queue(struct amdgpu_mes *mes,
@@ -189,13 +196,9 @@ static int mes_v10_1_remove_hw_queue(struct amdgpu_mes *mes,
mes_remove_queue_pkt.doorbell_offset = input->doorbell_offset;
mes_remove_queue_pkt.gang_context_addr = input->gang_context_addr;
- mes_remove_queue_pkt.api_status.api_completion_fence_addr =
- mes->ring.fence_drv.gpu_addr;
- mes_remove_queue_pkt.api_status.api_completion_fence_value =
- ++mes->ring.fence_drv.sync_seq;
-
return mes_v10_1_submit_pkt_and_poll_completion(mes,
- &mes_remove_queue_pkt, sizeof(mes_remove_queue_pkt));
+ &mes_remove_queue_pkt, sizeof(mes_remove_queue_pkt),
+ offsetof(union MESAPI__REMOVE_QUEUE, api_status));
}
static int mes_v10_1_unmap_legacy_queue(struct amdgpu_mes *mes,
@@ -227,13 +230,9 @@ static int mes_v10_1_unmap_legacy_queue(struct amdgpu_mes *mes,
mes_remove_queue_pkt.unmap_kiq_utility_queue = 1;
}
- mes_remove_queue_pkt.api_status.api_completion_fence_addr =
- mes->ring.fence_drv.gpu_addr;
- mes_remove_queue_pkt.api_status.api_completion_fence_value =
- ++mes->ring.fence_drv.sync_seq;
-
return mes_v10_1_submit_pkt_and_poll_completion(mes,
- &mes_remove_queue_pkt, sizeof(mes_remove_queue_pkt));
+ &mes_remove_queue_pkt, sizeof(mes_remove_queue_pkt),
+ offsetof(union MESAPI__REMOVE_QUEUE, api_status));
}
static int mes_v10_1_suspend_gang(struct amdgpu_mes *mes,
@@ -258,13 +257,9 @@ static int mes_v10_1_query_sched_status(struct amdgpu_mes *mes)
mes_status_pkt.header.opcode = MES_SCH_API_QUERY_SCHEDULER_STATUS;
mes_status_pkt.header.dwsize = API_FRAME_SIZE_IN_DWORDS;
- mes_status_pkt.api_status.api_completion_fence_addr =
- mes->ring.fence_drv.gpu_addr;
- mes_status_pkt.api_status.api_completion_fence_value =
- ++mes->ring.fence_drv.sync_seq;
-
return mes_v10_1_submit_pkt_and_poll_completion(mes,
- &mes_status_pkt, sizeof(mes_status_pkt));
+ &mes_status_pkt, sizeof(mes_status_pkt),
+ offsetof(union MESAPI__QUERY_MES_STATUS, api_status));
}
static int mes_v10_1_set_hw_resources(struct amdgpu_mes *mes)
@@ -299,7 +294,7 @@ static int mes_v10_1_set_hw_resources(struct amdgpu_mes *mes)
for (i = 0; i < AMD_PRIORITY_NUM_LEVELS; i++)
mes_set_hw_res_pkt.aggregated_doorbells[i] =
- mes->agreegated_doorbells[i];
+ mes->aggregated_doorbells[i];
for (i = 0; i < 5; i++) {
mes_set_hw_res_pkt.gc_base[i] = adev->reg_offset[GC_HWIP][0][i];
@@ -313,13 +308,63 @@ static int mes_v10_1_set_hw_resources(struct amdgpu_mes *mes)
mes_set_hw_res_pkt.disable_mes_log = 1;
mes_set_hw_res_pkt.use_different_vmid_compute = 1;
- mes_set_hw_res_pkt.api_status.api_completion_fence_addr =
- mes->ring.fence_drv.gpu_addr;
- mes_set_hw_res_pkt.api_status.api_completion_fence_value =
- ++mes->ring.fence_drv.sync_seq;
-
return mes_v10_1_submit_pkt_and_poll_completion(mes,
- &mes_set_hw_res_pkt, sizeof(mes_set_hw_res_pkt));
+ &mes_set_hw_res_pkt, sizeof(mes_set_hw_res_pkt),
+ offsetof(union MESAPI_SET_HW_RESOURCES, api_status));
+}
+
+static void mes_v10_1_init_aggregated_doorbell(struct amdgpu_mes *mes)
+{
+ struct amdgpu_device *adev = mes->adev;
+ uint32_t data;
+
+ data = RREG32_SOC15(GC, 0, mmCP_MES_DOORBELL_CONTROL1);
+ data &= ~(CP_MES_DOORBELL_CONTROL1__DOORBELL_OFFSET_MASK |
+ CP_MES_DOORBELL_CONTROL1__DOORBELL_EN_MASK |
+ CP_MES_DOORBELL_CONTROL1__DOORBELL_HIT_MASK);
+ data |= mes->aggregated_doorbells[AMDGPU_MES_PRIORITY_LEVEL_LOW] <<
+ CP_MES_DOORBELL_CONTROL1__DOORBELL_OFFSET__SHIFT;
+ data |= 1 << CP_MES_DOORBELL_CONTROL1__DOORBELL_EN__SHIFT;
+ WREG32_SOC15(GC, 0, mmCP_MES_DOORBELL_CONTROL1, data);
+
+ data = RREG32_SOC15(GC, 0, mmCP_MES_DOORBELL_CONTROL2);
+ data &= ~(CP_MES_DOORBELL_CONTROL2__DOORBELL_OFFSET_MASK |
+ CP_MES_DOORBELL_CONTROL2__DOORBELL_EN_MASK |
+ CP_MES_DOORBELL_CONTROL2__DOORBELL_HIT_MASK);
+ data |= mes->aggregated_doorbells[AMDGPU_MES_PRIORITY_LEVEL_NORMAL] <<
+ CP_MES_DOORBELL_CONTROL2__DOORBELL_OFFSET__SHIFT;
+ data |= 1 << CP_MES_DOORBELL_CONTROL2__DOORBELL_EN__SHIFT;
+ WREG32_SOC15(GC, 0, mmCP_MES_DOORBELL_CONTROL2, data);
+
+ data = RREG32_SOC15(GC, 0, mmCP_MES_DOORBELL_CONTROL3);
+ data &= ~(CP_MES_DOORBELL_CONTROL3__DOORBELL_OFFSET_MASK |
+ CP_MES_DOORBELL_CONTROL3__DOORBELL_EN_MASK |
+ CP_MES_DOORBELL_CONTROL3__DOORBELL_HIT_MASK);
+ data |= mes->aggregated_doorbells[AMDGPU_MES_PRIORITY_LEVEL_MEDIUM] <<
+ CP_MES_DOORBELL_CONTROL3__DOORBELL_OFFSET__SHIFT;
+ data |= 1 << CP_MES_DOORBELL_CONTROL3__DOORBELL_EN__SHIFT;
+ WREG32_SOC15(GC, 0, mmCP_MES_DOORBELL_CONTROL3, data);
+
+ data = RREG32_SOC15(GC, 0, mmCP_MES_DOORBELL_CONTROL4);
+ data &= ~(CP_MES_DOORBELL_CONTROL4__DOORBELL_OFFSET_MASK |
+ CP_MES_DOORBELL_CONTROL4__DOORBELL_EN_MASK |
+ CP_MES_DOORBELL_CONTROL4__DOORBELL_HIT_MASK);
+ data |= mes->aggregated_doorbells[AMDGPU_MES_PRIORITY_LEVEL_HIGH] <<
+ CP_MES_DOORBELL_CONTROL4__DOORBELL_OFFSET__SHIFT;
+ data |= 1 << CP_MES_DOORBELL_CONTROL4__DOORBELL_EN__SHIFT;
+ WREG32_SOC15(GC, 0, mmCP_MES_DOORBELL_CONTROL4, data);
+
+ data = RREG32_SOC15(GC, 0, mmCP_MES_DOORBELL_CONTROL5);
+ data &= ~(CP_MES_DOORBELL_CONTROL5__DOORBELL_OFFSET_MASK |
+ CP_MES_DOORBELL_CONTROL5__DOORBELL_EN_MASK |
+ CP_MES_DOORBELL_CONTROL5__DOORBELL_HIT_MASK);
+ data |= mes->aggregated_doorbells[AMDGPU_MES_PRIORITY_LEVEL_REALTIME] <<
+ CP_MES_DOORBELL_CONTROL5__DOORBELL_OFFSET__SHIFT;
+ data |= 1 << CP_MES_DOORBELL_CONTROL5__DOORBELL_EN__SHIFT;
+ WREG32_SOC15(GC, 0, mmCP_MES_DOORBELL_CONTROL5, data);
+
+ data = 1 << CP_HQD_GFX_CONTROL__DB_UPDATED_MSG_EN__SHIFT;
+ WREG32_SOC15(GC, 0, mmCP_HQD_GFX_CONTROL, data);
}
static const struct amdgpu_mes_funcs mes_v10_1_funcs = {
@@ -1121,6 +1166,8 @@ static int mes_v10_1_hw_init(void *handle)
if (r)
goto failure;
+ mes_v10_1_init_aggregated_doorbell(&adev->mes);
+
r = mes_v10_1_query_sched_status(&adev->mes);
if (r) {
DRM_ERROR("MES is busy\n");
@@ -1133,6 +1180,7 @@ static int mes_v10_1_hw_init(void *handle)
* with MES enabled.
*/
adev->gfx.kiq.ring.sched.ready = false;
+ adev->mes.ring.sched.ready = true;
return 0;
@@ -1145,6 +1193,8 @@ static int mes_v10_1_hw_fini(void *handle)
{
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ adev->mes.ring.sched.ready = false;
+
mes_v10_1_enable(adev, false);
if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) {
@@ -1183,7 +1233,8 @@ static int mes_v10_0_late_init(void *handle)
{
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
- amdgpu_mes_self_test(adev);
+ if (!amdgpu_in_reset(adev))
+ amdgpu_mes_self_test(adev);
return 0;
}
diff --git a/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c b/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c
index 7eee004cf3ce..cc3fdbbcd314 100644
--- a/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c
@@ -86,21 +86,32 @@ static const struct amdgpu_ring_funcs mes_v11_0_ring_funcs = {
};
static int mes_v11_0_submit_pkt_and_poll_completion(struct amdgpu_mes *mes,
- void *pkt, int size)
+ void *pkt, int size,
+ int api_status_off)
{
int ndw = size / 4;
signed long r;
union MESAPI__ADD_QUEUE *x_pkt = pkt;
+ struct MES_API_STATUS *api_status;
struct amdgpu_device *adev = mes->adev;
struct amdgpu_ring *ring = &mes->ring;
+ unsigned long flags;
BUG_ON(size % 4 != 0);
- if (amdgpu_ring_alloc(ring, ndw))
+ spin_lock_irqsave(&mes->ring_lock, flags);
+ if (amdgpu_ring_alloc(ring, ndw)) {
+ spin_unlock_irqrestore(&mes->ring_lock, flags);
return -ENOMEM;
+ }
+
+ api_status = (struct MES_API_STATUS *)((char *)pkt + api_status_off);
+ api_status->api_completion_fence_addr = mes->ring.fence_drv.gpu_addr;
+ api_status->api_completion_fence_value = ++mes->ring.fence_drv.sync_seq;
amdgpu_ring_write_multiple(ring, pkt, ndw);
amdgpu_ring_commit(ring);
+ spin_unlock_irqrestore(&mes->ring_lock, flags);
DRM_DEBUG("MES msg=%d was emitted\n", x_pkt->header.opcode);
@@ -156,7 +167,13 @@ static int mes_v11_0_add_hw_queue(struct amdgpu_mes *mes,
input->gang_global_priority_level;
mes_add_queue_pkt.doorbell_offset = input->doorbell_offset;
mes_add_queue_pkt.mqd_addr = input->mqd_addr;
- mes_add_queue_pkt.wptr_addr = input->wptr_addr;
+
+ if (((adev->mes.sched_version & AMDGPU_MES_API_VERSION_MASK) >>
+ AMDGPU_MES_API_VERSION_SHIFT) >= 2)
+ mes_add_queue_pkt.wptr_addr = input->wptr_mc_addr;
+ else
+ mes_add_queue_pkt.wptr_addr = input->wptr_addr;
+
mes_add_queue_pkt.queue_type =
convert_to_mes_queue_type(input->queue_type);
mes_add_queue_pkt.paging = input->paging;
@@ -165,14 +182,12 @@ static int mes_v11_0_add_hw_queue(struct amdgpu_mes *mes,
mes_add_queue_pkt.gws_size = input->gws_size;
mes_add_queue_pkt.trap_handler_addr = input->tba_addr;
mes_add_queue_pkt.tma_addr = input->tma_addr;
-
- mes_add_queue_pkt.api_status.api_completion_fence_addr =
- mes->ring.fence_drv.gpu_addr;
- mes_add_queue_pkt.api_status.api_completion_fence_value =
- ++mes->ring.fence_drv.sync_seq;
+ mes_add_queue_pkt.is_kfd_process = input->is_kfd_process;
+ mes_add_queue_pkt.trap_en = 1;
return mes_v11_0_submit_pkt_and_poll_completion(mes,
- &mes_add_queue_pkt, sizeof(mes_add_queue_pkt));
+ &mes_add_queue_pkt, sizeof(mes_add_queue_pkt),
+ offsetof(union MESAPI__ADD_QUEUE, api_status));
}
static int mes_v11_0_remove_hw_queue(struct amdgpu_mes *mes,
@@ -189,13 +204,9 @@ static int mes_v11_0_remove_hw_queue(struct amdgpu_mes *mes,
mes_remove_queue_pkt.doorbell_offset = input->doorbell_offset;
mes_remove_queue_pkt.gang_context_addr = input->gang_context_addr;
- mes_remove_queue_pkt.api_status.api_completion_fence_addr =
- mes->ring.fence_drv.gpu_addr;
- mes_remove_queue_pkt.api_status.api_completion_fence_value =
- ++mes->ring.fence_drv.sync_seq;
-
return mes_v11_0_submit_pkt_and_poll_completion(mes,
- &mes_remove_queue_pkt, sizeof(mes_remove_queue_pkt));
+ &mes_remove_queue_pkt, sizeof(mes_remove_queue_pkt),
+ offsetof(union MESAPI__REMOVE_QUEUE, api_status));
}
static int mes_v11_0_unmap_legacy_queue(struct amdgpu_mes *mes,
@@ -209,7 +220,7 @@ static int mes_v11_0_unmap_legacy_queue(struct amdgpu_mes *mes,
mes_remove_queue_pkt.header.opcode = MES_SCH_API_REMOVE_QUEUE;
mes_remove_queue_pkt.header.dwsize = API_FRAME_SIZE_IN_DWORDS;
- mes_remove_queue_pkt.doorbell_offset = input->doorbell_offset << 2;
+ mes_remove_queue_pkt.doorbell_offset = input->doorbell_offset;
mes_remove_queue_pkt.gang_context_addr = 0;
mes_remove_queue_pkt.pipe_id = input->pipe_id;
@@ -221,19 +232,14 @@ static int mes_v11_0_unmap_legacy_queue(struct amdgpu_mes *mes,
mes_remove_queue_pkt.tf_data =
lower_32_bits(input->trail_fence_data);
} else {
- if (input->queue_type == AMDGPU_RING_TYPE_GFX)
- mes_remove_queue_pkt.unmap_legacy_gfx_queue = 1;
- else
- mes_remove_queue_pkt.unmap_kiq_utility_queue = 1;
+ mes_remove_queue_pkt.unmap_legacy_queue = 1;
+ mes_remove_queue_pkt.queue_type =
+ convert_to_mes_queue_type(input->queue_type);
}
- mes_remove_queue_pkt.api_status.api_completion_fence_addr =
- mes->ring.fence_drv.gpu_addr;
- mes_remove_queue_pkt.api_status.api_completion_fence_value =
- ++mes->ring.fence_drv.sync_seq;
-
return mes_v11_0_submit_pkt_and_poll_completion(mes,
- &mes_remove_queue_pkt, sizeof(mes_remove_queue_pkt));
+ &mes_remove_queue_pkt, sizeof(mes_remove_queue_pkt),
+ offsetof(union MESAPI__REMOVE_QUEUE, api_status));
}
static int mes_v11_0_suspend_gang(struct amdgpu_mes *mes,
@@ -258,13 +264,57 @@ static int mes_v11_0_query_sched_status(struct amdgpu_mes *mes)
mes_status_pkt.header.opcode = MES_SCH_API_QUERY_SCHEDULER_STATUS;
mes_status_pkt.header.dwsize = API_FRAME_SIZE_IN_DWORDS;
- mes_status_pkt.api_status.api_completion_fence_addr =
- mes->ring.fence_drv.gpu_addr;
- mes_status_pkt.api_status.api_completion_fence_value =
- ++mes->ring.fence_drv.sync_seq;
+ return mes_v11_0_submit_pkt_and_poll_completion(mes,
+ &mes_status_pkt, sizeof(mes_status_pkt),
+ offsetof(union MESAPI__QUERY_MES_STATUS, api_status));
+}
+
+static int mes_v11_0_misc_op(struct amdgpu_mes *mes,
+ struct mes_misc_op_input *input)
+{
+ union MESAPI__MISC misc_pkt;
+
+ memset(&misc_pkt, 0, sizeof(misc_pkt));
+
+ misc_pkt.header.type = MES_API_TYPE_SCHEDULER;
+ misc_pkt.header.opcode = MES_SCH_API_MISC;
+ misc_pkt.header.dwsize = API_FRAME_SIZE_IN_DWORDS;
+
+ switch (input->op) {
+ case MES_MISC_OP_READ_REG:
+ misc_pkt.opcode = MESAPI_MISC__READ_REG;
+ misc_pkt.read_reg.reg_offset = input->read_reg.reg_offset;
+ misc_pkt.read_reg.buffer_addr = input->read_reg.buffer_addr;
+ break;
+ case MES_MISC_OP_WRITE_REG:
+ misc_pkt.opcode = MESAPI_MISC__WRITE_REG;
+ misc_pkt.write_reg.reg_offset = input->write_reg.reg_offset;
+ misc_pkt.write_reg.reg_value = input->write_reg.reg_value;
+ break;
+ case MES_MISC_OP_WRM_REG_WAIT:
+ misc_pkt.opcode = MESAPI_MISC__WAIT_REG_MEM;
+ misc_pkt.wait_reg_mem.op = WRM_OPERATION__WAIT_REG_MEM;
+ misc_pkt.wait_reg_mem.reference = input->wrm_reg.ref;
+ misc_pkt.wait_reg_mem.mask = input->wrm_reg.mask;
+ misc_pkt.wait_reg_mem.reg_offset1 = input->wrm_reg.reg0;
+ misc_pkt.wait_reg_mem.reg_offset2 = 0;
+ break;
+ case MES_MISC_OP_WRM_REG_WR_WAIT:
+ misc_pkt.opcode = MESAPI_MISC__WAIT_REG_MEM;
+ misc_pkt.wait_reg_mem.op = WRM_OPERATION__WR_WAIT_WR_REG;
+ misc_pkt.wait_reg_mem.reference = input->wrm_reg.ref;
+ misc_pkt.wait_reg_mem.mask = input->wrm_reg.mask;
+ misc_pkt.wait_reg_mem.reg_offset1 = input->wrm_reg.reg0;
+ misc_pkt.wait_reg_mem.reg_offset2 = input->wrm_reg.reg1;
+ break;
+ default:
+ DRM_ERROR("unsupported misc op (%d) \n", input->op);
+ return -EINVAL;
+ }
return mes_v11_0_submit_pkt_and_poll_completion(mes,
- &mes_status_pkt, sizeof(mes_status_pkt));
+ &misc_pkt, sizeof(misc_pkt),
+ offsetof(union MESAPI__MISC, api_status));
}
static int mes_v11_0_set_hw_resources(struct amdgpu_mes *mes)
@@ -299,7 +349,7 @@ static int mes_v11_0_set_hw_resources(struct amdgpu_mes *mes)
for (i = 0; i < AMD_PRIORITY_NUM_LEVELS; i++)
mes_set_hw_res_pkt.aggregated_doorbells[i] =
- mes->agreegated_doorbells[i];
+ mes->aggregated_doorbells[i];
for (i = 0; i < 5; i++) {
mes_set_hw_res_pkt.gc_base[i] = adev->reg_offset[GC_HWIP][0][i];
@@ -312,14 +362,65 @@ static int mes_v11_0_set_hw_resources(struct amdgpu_mes *mes)
mes_set_hw_res_pkt.disable_reset = 1;
mes_set_hw_res_pkt.disable_mes_log = 1;
mes_set_hw_res_pkt.use_different_vmid_compute = 1;
-
- mes_set_hw_res_pkt.api_status.api_completion_fence_addr =
- mes->ring.fence_drv.gpu_addr;
- mes_set_hw_res_pkt.api_status.api_completion_fence_value =
- ++mes->ring.fence_drv.sync_seq;
+ mes_set_hw_res_pkt.oversubscription_timer = 50;
return mes_v11_0_submit_pkt_and_poll_completion(mes,
- &mes_set_hw_res_pkt, sizeof(mes_set_hw_res_pkt));
+ &mes_set_hw_res_pkt, sizeof(mes_set_hw_res_pkt),
+ offsetof(union MESAPI_SET_HW_RESOURCES, api_status));
+}
+
+static void mes_v11_0_init_aggregated_doorbell(struct amdgpu_mes *mes)
+{
+ struct amdgpu_device *adev = mes->adev;
+ uint32_t data;
+
+ data = RREG32_SOC15(GC, 0, regCP_MES_DOORBELL_CONTROL1);
+ data &= ~(CP_MES_DOORBELL_CONTROL1__DOORBELL_OFFSET_MASK |
+ CP_MES_DOORBELL_CONTROL1__DOORBELL_EN_MASK |
+ CP_MES_DOORBELL_CONTROL1__DOORBELL_HIT_MASK);
+ data |= mes->aggregated_doorbells[AMDGPU_MES_PRIORITY_LEVEL_LOW] <<
+ CP_MES_DOORBELL_CONTROL1__DOORBELL_OFFSET__SHIFT;
+ data |= 1 << CP_MES_DOORBELL_CONTROL1__DOORBELL_EN__SHIFT;
+ WREG32_SOC15(GC, 0, regCP_MES_DOORBELL_CONTROL1, data);
+
+ data = RREG32_SOC15(GC, 0, regCP_MES_DOORBELL_CONTROL2);
+ data &= ~(CP_MES_DOORBELL_CONTROL2__DOORBELL_OFFSET_MASK |
+ CP_MES_DOORBELL_CONTROL2__DOORBELL_EN_MASK |
+ CP_MES_DOORBELL_CONTROL2__DOORBELL_HIT_MASK);
+ data |= mes->aggregated_doorbells[AMDGPU_MES_PRIORITY_LEVEL_NORMAL] <<
+ CP_MES_DOORBELL_CONTROL2__DOORBELL_OFFSET__SHIFT;
+ data |= 1 << CP_MES_DOORBELL_CONTROL2__DOORBELL_EN__SHIFT;
+ WREG32_SOC15(GC, 0, regCP_MES_DOORBELL_CONTROL2, data);
+
+ data = RREG32_SOC15(GC, 0, regCP_MES_DOORBELL_CONTROL3);
+ data &= ~(CP_MES_DOORBELL_CONTROL3__DOORBELL_OFFSET_MASK |
+ CP_MES_DOORBELL_CONTROL3__DOORBELL_EN_MASK |
+ CP_MES_DOORBELL_CONTROL3__DOORBELL_HIT_MASK);
+ data |= mes->aggregated_doorbells[AMDGPU_MES_PRIORITY_LEVEL_MEDIUM] <<
+ CP_MES_DOORBELL_CONTROL3__DOORBELL_OFFSET__SHIFT;
+ data |= 1 << CP_MES_DOORBELL_CONTROL3__DOORBELL_EN__SHIFT;
+ WREG32_SOC15(GC, 0, regCP_MES_DOORBELL_CONTROL3, data);
+
+ data = RREG32_SOC15(GC, 0, regCP_MES_DOORBELL_CONTROL4);
+ data &= ~(CP_MES_DOORBELL_CONTROL4__DOORBELL_OFFSET_MASK |
+ CP_MES_DOORBELL_CONTROL4__DOORBELL_EN_MASK |
+ CP_MES_DOORBELL_CONTROL4__DOORBELL_HIT_MASK);
+ data |= mes->aggregated_doorbells[AMDGPU_MES_PRIORITY_LEVEL_HIGH] <<
+ CP_MES_DOORBELL_CONTROL4__DOORBELL_OFFSET__SHIFT;
+ data |= 1 << CP_MES_DOORBELL_CONTROL4__DOORBELL_EN__SHIFT;
+ WREG32_SOC15(GC, 0, regCP_MES_DOORBELL_CONTROL4, data);
+
+ data = RREG32_SOC15(GC, 0, regCP_MES_DOORBELL_CONTROL5);
+ data &= ~(CP_MES_DOORBELL_CONTROL5__DOORBELL_OFFSET_MASK |
+ CP_MES_DOORBELL_CONTROL5__DOORBELL_EN_MASK |
+ CP_MES_DOORBELL_CONTROL5__DOORBELL_HIT_MASK);
+ data |= mes->aggregated_doorbells[AMDGPU_MES_PRIORITY_LEVEL_REALTIME] <<
+ CP_MES_DOORBELL_CONTROL5__DOORBELL_OFFSET__SHIFT;
+ data |= 1 << CP_MES_DOORBELL_CONTROL5__DOORBELL_EN__SHIFT;
+ WREG32_SOC15(GC, 0, regCP_MES_DOORBELL_CONTROL5, data);
+
+ data = 1 << CP_HQD_GFX_CONTROL__DB_UPDATED_MSG_EN__SHIFT;
+ WREG32_SOC15(GC, 0, regCP_HQD_GFX_CONTROL, data);
}
static const struct amdgpu_mes_funcs mes_v11_0_funcs = {
@@ -328,6 +429,7 @@ static const struct amdgpu_mes_funcs mes_v11_0_funcs = {
.unmap_legacy_queue = mes_v11_0_unmap_legacy_queue,
.suspend_gang = mes_v11_0_suspend_gang,
.resume_gang = mes_v11_0_resume_gang,
+ .misc_op = mes_v11_0_misc_op,
};
static int mes_v11_0_init_microcode(struct amdgpu_device *adev,
@@ -858,6 +960,18 @@ static int mes_v11_0_queue_init(struct amdgpu_device *adev,
mes_v11_0_queue_init_register(ring);
}
+ /* get MES scheduler/KIQ versions */
+ mutex_lock(&adev->srbm_mutex);
+ soc21_grbm_select(adev, 3, pipe, 0, 0);
+
+ if (pipe == AMDGPU_MES_SCHED_PIPE)
+ adev->mes.sched_version = RREG32_SOC15(GC, 0, regCP_MES_GP3_LO);
+ else if (pipe == AMDGPU_MES_KIQ_PIPE && adev->enable_mes_kiq)
+ adev->mes.kiq_version = RREG32_SOC15(GC, 0, regCP_MES_GP3_LO);
+
+ soc21_grbm_select(adev, 0, 0, 0, 0);
+ mutex_unlock(&adev->srbm_mutex);
+
return 0;
}
@@ -1108,6 +1222,8 @@ static int mes_v11_0_hw_init(void *handle)
if (r)
goto failure;
+ mes_v11_0_init_aggregated_doorbell(&adev->mes);
+
r = mes_v11_0_query_sched_status(&adev->mes);
if (r) {
DRM_ERROR("MES is busy\n");
@@ -1120,6 +1236,7 @@ static int mes_v11_0_hw_init(void *handle)
* with MES enabled.
*/
adev->gfx.kiq.ring.sched.ready = false;
+ adev->mes.ring.sched.ready = true;
return 0;
@@ -1130,6 +1247,9 @@ failure:
static int mes_v11_0_hw_fini(void *handle)
{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ adev->mes.ring.sched.ready = false;
return 0;
}
@@ -1161,7 +1281,8 @@ static int mes_v11_0_late_init(void *handle)
{
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
- amdgpu_mes_self_test(adev);
+ if (!amdgpu_in_reset(adev))
+ amdgpu_mes_self_test(adev);
return 0;
}
diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c
index 3f44a099c52a..3e51e773f92b 100644
--- a/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c
@@ -176,6 +176,7 @@ static void mmhub_v1_0_init_cache_regs(struct amdgpu_device *adev)
tmp = REG_SET_FIELD(tmp, VM_L2_CNTL2, INVALIDATE_L2_CACHE, 1);
WREG32_SOC15(MMHUB, 0, mmVM_L2_CNTL2, tmp);
+ tmp = mmVM_L2_CNTL3_DEFAULT;
if (adev->gmc.translate_further) {
tmp = REG_SET_FIELD(tmp, VM_L2_CNTL3, BANK_SELECT, 12);
tmp = REG_SET_FIELD(tmp, VM_L2_CNTL3,
diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v3_0_1.c b/drivers/gpu/drm/amd/amdgpu/mmhub_v3_0_1.c
new file mode 100644
index 000000000000..e8058edc1d10
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v3_0_1.c
@@ -0,0 +1,591 @@
+/*
+ * Copyright 2022 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include "amdgpu.h"
+#include "mmhub_v3_0_1.h"
+
+#include "mmhub/mmhub_3_0_1_offset.h"
+#include "mmhub/mmhub_3_0_1_sh_mask.h"
+#include "navi10_enum.h"
+
+#include "soc15_common.h"
+
+#define regMMVM_L2_CNTL3_DEFAULT 0x80100007
+#define regMMVM_L2_CNTL4_DEFAULT 0x000000c1
+#define regMMVM_L2_CNTL5_DEFAULT 0x00003fe0
+
+static const char *mmhub_client_ids_v3_0_1[][2] = {
+ [0][0] = "VMC",
+ [4][0] = "DCEDMC",
+ [5][0] = "DCEVGA",
+ [6][0] = "MP0",
+ [7][0] = "MP1",
+ [8][0] = "MPIO",
+ [16][0] = "HDP",
+ [17][0] = "LSDMA",
+ [18][0] = "JPEG",
+ [19][0] = "VCNU0",
+ [21][0] = "VSCH",
+ [22][0] = "VCNU1",
+ [23][0] = "VCN1",
+ [32+20][0] = "VCN0",
+ [2][1] = "DBGUNBIO",
+ [3][1] = "DCEDWB",
+ [4][1] = "DCEDMC",
+ [5][1] = "DCEVGA",
+ [6][1] = "MP0",
+ [7][1] = "MP1",
+ [8][1] = "MPIO",
+ [10][1] = "DBGU0",
+ [11][1] = "DBGU1",
+ [12][1] = "DBGU2",
+ [13][1] = "DBGU3",
+ [14][1] = "XDP",
+ [15][1] = "OSSSYS",
+ [16][1] = "HDP",
+ [17][1] = "LSDMA",
+ [18][1] = "JPEG",
+ [19][1] = "VCNU0",
+ [20][1] = "VCN0",
+ [21][1] = "VSCH",
+ [22][1] = "VCNU1",
+ [23][1] = "VCN1",
+};
+
+static uint32_t mmhub_v3_0_1_get_invalidate_req(unsigned int vmid,
+ uint32_t flush_type)
+{
+ u32 req = 0;
+
+ /* invalidate using legacy mode on vmid*/
+ req = REG_SET_FIELD(req, MMVM_INVALIDATE_ENG0_REQ,
+ PER_VMID_INVALIDATE_REQ, 1 << vmid);
+ req = REG_SET_FIELD(req, MMVM_INVALIDATE_ENG0_REQ, FLUSH_TYPE, flush_type);
+ req = REG_SET_FIELD(req, MMVM_INVALIDATE_ENG0_REQ, INVALIDATE_L2_PTES, 1);
+ req = REG_SET_FIELD(req, MMVM_INVALIDATE_ENG0_REQ, INVALIDATE_L2_PDE0, 1);
+ req = REG_SET_FIELD(req, MMVM_INVALIDATE_ENG0_REQ, INVALIDATE_L2_PDE1, 1);
+ req = REG_SET_FIELD(req, MMVM_INVALIDATE_ENG0_REQ, INVALIDATE_L2_PDE2, 1);
+ req = REG_SET_FIELD(req, MMVM_INVALIDATE_ENG0_REQ, INVALIDATE_L1_PTES, 1);
+ req = REG_SET_FIELD(req, MMVM_INVALIDATE_ENG0_REQ,
+ CLEAR_PROTECTION_FAULT_STATUS_ADDR, 0);
+
+ return req;
+}
+
+static void
+mmhub_v3_0_1_print_l2_protection_fault_status(struct amdgpu_device *adev,
+ uint32_t status)
+{
+ uint32_t cid, rw;
+ const char *mmhub_cid = NULL;
+
+ cid = REG_GET_FIELD(status,
+ MMVM_L2_PROTECTION_FAULT_STATUS, CID);
+ rw = REG_GET_FIELD(status,
+ MMVM_L2_PROTECTION_FAULT_STATUS, RW);
+
+ dev_err(adev->dev,
+ "MMVM_L2_PROTECTION_FAULT_STATUS:0x%08X\n",
+ status);
+
+ switch (adev->ip_versions[MMHUB_HWIP][0]) {
+ case IP_VERSION(3, 0, 1):
+ mmhub_cid = mmhub_client_ids_v3_0_1[cid][rw];
+ break;
+ default:
+ mmhub_cid = NULL;
+ break;
+ }
+
+ dev_err(adev->dev, "\t Faulty UTCL2 client ID: %s (0x%x)\n",
+ mmhub_cid ? mmhub_cid : "unknown", cid);
+ dev_err(adev->dev, "\t MORE_FAULTS: 0x%lx\n",
+ REG_GET_FIELD(status,
+ MMVM_L2_PROTECTION_FAULT_STATUS, MORE_FAULTS));
+ dev_err(adev->dev, "\t WALKER_ERROR: 0x%lx\n",
+ REG_GET_FIELD(status,
+ MMVM_L2_PROTECTION_FAULT_STATUS, WALKER_ERROR));
+ dev_err(adev->dev, "\t PERMISSION_FAULTS: 0x%lx\n",
+ REG_GET_FIELD(status,
+ MMVM_L2_PROTECTION_FAULT_STATUS, PERMISSION_FAULTS));
+ dev_err(adev->dev, "\t MAPPING_ERROR: 0x%lx\n",
+ REG_GET_FIELD(status,
+ MMVM_L2_PROTECTION_FAULT_STATUS, MAPPING_ERROR));
+ dev_err(adev->dev, "\t RW: 0x%x\n", rw);
+}
+
+static void mmhub_v3_0_1_setup_vm_pt_regs(struct amdgpu_device *adev,
+ uint32_t vmid,
+ uint64_t page_table_base)
+{
+ struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB_0];
+
+ WREG32_SOC15_OFFSET(MMHUB, 0, regMMVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32,
+ hub->ctx_addr_distance * vmid,
+ lower_32_bits(page_table_base));
+
+ WREG32_SOC15_OFFSET(MMHUB, 0, regMMVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_HI32,
+ hub->ctx_addr_distance * vmid,
+ upper_32_bits(page_table_base));
+}
+
+static void mmhub_v3_0_1_init_gart_aperture_regs(struct amdgpu_device *adev)
+{
+ uint64_t pt_base = amdgpu_gmc_pd_addr(adev->gart.bo);
+
+ mmhub_v3_0_1_setup_vm_pt_regs(adev, 0, pt_base);
+
+ WREG32_SOC15(MMHUB, 0, regMMVM_CONTEXT0_PAGE_TABLE_START_ADDR_LO32,
+ (u32)(adev->gmc.gart_start >> 12));
+ WREG32_SOC15(MMHUB, 0, regMMVM_CONTEXT0_PAGE_TABLE_START_ADDR_HI32,
+ (u32)(adev->gmc.gart_start >> 44));
+
+ WREG32_SOC15(MMHUB, 0, regMMVM_CONTEXT0_PAGE_TABLE_END_ADDR_LO32,
+ (u32)(adev->gmc.gart_end >> 12));
+ WREG32_SOC15(MMHUB, 0, regMMVM_CONTEXT0_PAGE_TABLE_END_ADDR_HI32,
+ (u32)(adev->gmc.gart_end >> 44));
+}
+
+static void mmhub_v3_0_1_init_system_aperture_regs(struct amdgpu_device *adev)
+{
+ uint64_t value;
+ uint32_t tmp;
+
+ /* Program the AGP BAR */
+ WREG32_SOC15(MMHUB, 0, regMMMC_VM_AGP_BASE, 0);
+ WREG32_SOC15(MMHUB, 0, regMMMC_VM_AGP_BOT, adev->gmc.agp_start >> 24);
+ WREG32_SOC15(MMHUB, 0, regMMMC_VM_AGP_TOP, adev->gmc.agp_end >> 24);
+
+ /*
+ * the new L1 policy will block SRIOV guest from writing
+ * these regs, and they will be programed at host.
+ * so skip programing these regs.
+ */
+ /* Program the system aperture low logical page number. */
+ WREG32_SOC15(MMHUB, 0, regMMMC_VM_SYSTEM_APERTURE_LOW_ADDR,
+ adev->gmc.vram_start >> 18);
+ WREG32_SOC15(MMHUB, 0, regMMMC_VM_SYSTEM_APERTURE_HIGH_ADDR,
+ adev->gmc.vram_end >> 18);
+
+ /* Set default page address. */
+ value = adev->vram_scratch.gpu_addr - adev->gmc.vram_start +
+ adev->vm_manager.vram_base_offset;
+ WREG32_SOC15(MMHUB, 0, regMMMC_VM_SYSTEM_APERTURE_DEFAULT_ADDR_LSB,
+ (u32)(value >> 12));
+ WREG32_SOC15(MMHUB, 0, regMMMC_VM_SYSTEM_APERTURE_DEFAULT_ADDR_MSB,
+ (u32)(value >> 44));
+
+ /* Program "protection fault". */
+ WREG32_SOC15(MMHUB, 0, regMMVM_L2_PROTECTION_FAULT_DEFAULT_ADDR_LO32,
+ (u32)(adev->dummy_page_addr >> 12));
+ WREG32_SOC15(MMHUB, 0, regMMVM_L2_PROTECTION_FAULT_DEFAULT_ADDR_HI32,
+ (u32)((u64)adev->dummy_page_addr >> 44));
+
+ tmp = RREG32_SOC15(MMHUB, 0, regMMVM_L2_PROTECTION_FAULT_CNTL2);
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_PROTECTION_FAULT_CNTL2,
+ ACTIVE_PAGE_MIGRATION_PTE_READ_RETRY, 1);
+ WREG32_SOC15(MMHUB, 0, regMMVM_L2_PROTECTION_FAULT_CNTL2, tmp);
+}
+
+static void mmhub_v3_0_1_init_tlb_regs(struct amdgpu_device *adev)
+{
+ uint32_t tmp;
+
+ /* Setup TLB control */
+ tmp = RREG32_SOC15(MMHUB, 0, regMMMC_VM_MX_L1_TLB_CNTL);
+
+ tmp = REG_SET_FIELD(tmp, MMMC_VM_MX_L1_TLB_CNTL, ENABLE_L1_TLB, 1);
+ tmp = REG_SET_FIELD(tmp, MMMC_VM_MX_L1_TLB_CNTL, SYSTEM_ACCESS_MODE, 3);
+ tmp = REG_SET_FIELD(tmp, MMMC_VM_MX_L1_TLB_CNTL,
+ ENABLE_ADVANCED_DRIVER_MODEL, 1);
+ tmp = REG_SET_FIELD(tmp, MMMC_VM_MX_L1_TLB_CNTL,
+ SYSTEM_APERTURE_UNMAPPED_ACCESS, 0);
+ tmp = REG_SET_FIELD(tmp, MMMC_VM_MX_L1_TLB_CNTL, ECO_BITS, 0);
+ tmp = REG_SET_FIELD(tmp, MMMC_VM_MX_L1_TLB_CNTL,
+ MTYPE, MTYPE_UC); /* UC, uncached */
+
+ WREG32_SOC15(MMHUB, 0, regMMMC_VM_MX_L1_TLB_CNTL, tmp);
+}
+
+static void mmhub_v3_0_1_init_cache_regs(struct amdgpu_device *adev)
+{
+ uint32_t tmp;
+
+ /* Setup L2 cache */
+ tmp = RREG32_SOC15(MMHUB, 0, regMMVM_L2_CNTL);
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL, ENABLE_L2_CACHE, 1);
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL, ENABLE_L2_FRAGMENT_PROCESSING, 0);
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL,
+ ENABLE_DEFAULT_PAGE_OUT_TO_SYSTEM_MEMORY, 1);
+ /* XXX for emulation, Refer to closed source code.*/
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL, L2_PDE0_CACHE_TAG_GENERATION_MODE,
+ 0);
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL, PDE_FAULT_CLASSIFICATION, 0);
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL, CONTEXT1_IDENTITY_ACCESS_MODE, 1);
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL, IDENTITY_MODE_FRAGMENT_SIZE, 0);
+ WREG32_SOC15(MMHUB, 0, regMMVM_L2_CNTL, tmp);
+
+ tmp = RREG32_SOC15(MMHUB, 0, regMMVM_L2_CNTL2);
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL2, INVALIDATE_ALL_L1_TLBS, 1);
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL2, INVALIDATE_L2_CACHE, 1);
+ WREG32_SOC15(MMHUB, 0, regMMVM_L2_CNTL2, tmp);
+
+ tmp = regMMVM_L2_CNTL3_DEFAULT;
+ if (adev->gmc.translate_further) {
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL3, BANK_SELECT, 12);
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL3,
+ L2_CACHE_BIGK_FRAGMENT_SIZE, 9);
+ } else {
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL3, BANK_SELECT, 9);
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL3,
+ L2_CACHE_BIGK_FRAGMENT_SIZE, 6);
+ }
+ WREG32_SOC15(MMHUB, 0, regMMVM_L2_CNTL3, tmp);
+
+ tmp = regMMVM_L2_CNTL4_DEFAULT;
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL4, VMC_TAP_PDE_REQUEST_PHYSICAL, 0);
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL4, VMC_TAP_PTE_REQUEST_PHYSICAL, 0);
+ WREG32_SOC15(MMHUB, 0, regMMVM_L2_CNTL4, tmp);
+
+ tmp = regMMVM_L2_CNTL5_DEFAULT;
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL5, L2_CACHE_SMALLK_FRAGMENT_SIZE, 0);
+ WREG32_SOC15(GC, 0, regMMVM_L2_CNTL5, tmp);
+}
+
+static void mmhub_v3_0_1_enable_system_domain(struct amdgpu_device *adev)
+{
+ uint32_t tmp;
+
+ tmp = RREG32_SOC15(MMHUB, 0, regMMVM_CONTEXT0_CNTL);
+ tmp = REG_SET_FIELD(tmp, MMVM_CONTEXT0_CNTL, ENABLE_CONTEXT, 1);
+ tmp = REG_SET_FIELD(tmp, MMVM_CONTEXT0_CNTL, PAGE_TABLE_DEPTH, 0);
+ tmp = REG_SET_FIELD(tmp, MMVM_CONTEXT0_CNTL,
+ RETRY_PERMISSION_OR_INVALID_PAGE_FAULT, 0);
+ WREG32_SOC15(MMHUB, 0, regMMVM_CONTEXT0_CNTL, tmp);
+}
+
+static void mmhub_v3_0_1_disable_identity_aperture(struct amdgpu_device *adev)
+{
+ WREG32_SOC15(MMHUB, 0,
+ regMMVM_L2_CONTEXT1_IDENTITY_APERTURE_LOW_ADDR_LO32,
+ 0xFFFFFFFF);
+ WREG32_SOC15(MMHUB, 0,
+ regMMVM_L2_CONTEXT1_IDENTITY_APERTURE_LOW_ADDR_HI32,
+ 0x0000000F);
+
+ WREG32_SOC15(MMHUB, 0,
+ regMMVM_L2_CONTEXT1_IDENTITY_APERTURE_HIGH_ADDR_LO32, 0);
+ WREG32_SOC15(MMHUB, 0,
+ regMMVM_L2_CONTEXT1_IDENTITY_APERTURE_HIGH_ADDR_HI32, 0);
+
+ WREG32_SOC15(MMHUB, 0, regMMVM_L2_CONTEXT_IDENTITY_PHYSICAL_OFFSET_LO32,
+ 0);
+ WREG32_SOC15(MMHUB, 0, regMMVM_L2_CONTEXT_IDENTITY_PHYSICAL_OFFSET_HI32,
+ 0);
+}
+
+static void mmhub_v3_0_1_setup_vmid_config(struct amdgpu_device *adev)
+{
+ struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB_0];
+ int i;
+ uint32_t tmp;
+
+ for (i = 0; i <= 14; i++) {
+ tmp = RREG32_SOC15_OFFSET(MMHUB, 0, regMMVM_CONTEXT1_CNTL, i);
+ tmp = REG_SET_FIELD(tmp, MMVM_CONTEXT1_CNTL, ENABLE_CONTEXT, 1);
+ tmp = REG_SET_FIELD(tmp, MMVM_CONTEXT1_CNTL, PAGE_TABLE_DEPTH,
+ adev->vm_manager.num_level);
+ tmp = REG_SET_FIELD(tmp, MMVM_CONTEXT1_CNTL,
+ RANGE_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
+ tmp = REG_SET_FIELD(tmp, MMVM_CONTEXT1_CNTL,
+ DUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT,
+ 1);
+ tmp = REG_SET_FIELD(tmp, MMVM_CONTEXT1_CNTL,
+ PDE0_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
+ tmp = REG_SET_FIELD(tmp, MMVM_CONTEXT1_CNTL,
+ VALID_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
+ tmp = REG_SET_FIELD(tmp, MMVM_CONTEXT1_CNTL,
+ READ_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
+ tmp = REG_SET_FIELD(tmp, MMVM_CONTEXT1_CNTL,
+ WRITE_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
+ tmp = REG_SET_FIELD(tmp, MMVM_CONTEXT1_CNTL,
+ EXECUTE_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
+ tmp = REG_SET_FIELD(tmp, MMVM_CONTEXT1_CNTL,
+ PAGE_TABLE_BLOCK_SIZE,
+ adev->vm_manager.block_size - 9);
+ /* Send no-retry XNACK on fault to suppress VM fault storm. */
+ tmp = REG_SET_FIELD(tmp, MMVM_CONTEXT1_CNTL,
+ RETRY_PERMISSION_OR_INVALID_PAGE_FAULT,
+ !amdgpu_noretry);
+ WREG32_SOC15_OFFSET(MMHUB, 0, regMMVM_CONTEXT1_CNTL,
+ i * hub->ctx_distance, tmp);
+ WREG32_SOC15_OFFSET(MMHUB, 0, regMMVM_CONTEXT1_PAGE_TABLE_START_ADDR_LO32,
+ i * hub->ctx_addr_distance, 0);
+ WREG32_SOC15_OFFSET(MMHUB, 0, regMMVM_CONTEXT1_PAGE_TABLE_START_ADDR_HI32,
+ i * hub->ctx_addr_distance, 0);
+ WREG32_SOC15_OFFSET(MMHUB, 0, regMMVM_CONTEXT1_PAGE_TABLE_END_ADDR_LO32,
+ i * hub->ctx_addr_distance,
+ lower_32_bits(adev->vm_manager.max_pfn - 1));
+ WREG32_SOC15_OFFSET(MMHUB, 0, regMMVM_CONTEXT1_PAGE_TABLE_END_ADDR_HI32,
+ i * hub->ctx_addr_distance,
+ upper_32_bits(adev->vm_manager.max_pfn - 1));
+ }
+
+ hub->vm_cntx_cntl = tmp;
+}
+
+static void mmhub_v3_0_1_program_invalidation(struct amdgpu_device *adev)
+{
+ struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB_0];
+ unsigned i;
+
+ for (i = 0; i < 18; ++i) {
+ WREG32_SOC15_OFFSET(MMHUB, 0, regMMVM_INVALIDATE_ENG0_ADDR_RANGE_LO32,
+ i * hub->eng_addr_distance, 0xffffffff);
+ WREG32_SOC15_OFFSET(MMHUB, 0, regMMVM_INVALIDATE_ENG0_ADDR_RANGE_HI32,
+ i * hub->eng_addr_distance, 0x1f);
+ }
+}
+
+static int mmhub_v3_0_1_gart_enable(struct amdgpu_device *adev)
+{
+ /* GART Enable. */
+ mmhub_v3_0_1_init_gart_aperture_regs(adev);
+ mmhub_v3_0_1_init_system_aperture_regs(adev);
+ mmhub_v3_0_1_init_tlb_regs(adev);
+ mmhub_v3_0_1_init_cache_regs(adev);
+
+ mmhub_v3_0_1_enable_system_domain(adev);
+ mmhub_v3_0_1_disable_identity_aperture(adev);
+ mmhub_v3_0_1_setup_vmid_config(adev);
+ mmhub_v3_0_1_program_invalidation(adev);
+
+ return 0;
+}
+
+static void mmhub_v3_0_1_gart_disable(struct amdgpu_device *adev)
+{
+ struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB_0];
+ u32 tmp;
+ u32 i;
+
+ /* Disable all tables */
+ for (i = 0; i < 16; i++)
+ WREG32_SOC15_OFFSET(MMHUB, 0, regMMVM_CONTEXT0_CNTL,
+ i * hub->ctx_distance, 0);
+
+ /* Setup TLB control */
+ tmp = RREG32_SOC15(MMHUB, 0, regMMMC_VM_MX_L1_TLB_CNTL);
+ tmp = REG_SET_FIELD(tmp, MMMC_VM_MX_L1_TLB_CNTL, ENABLE_L1_TLB, 0);
+ tmp = REG_SET_FIELD(tmp, MMMC_VM_MX_L1_TLB_CNTL,
+ ENABLE_ADVANCED_DRIVER_MODEL, 0);
+ WREG32_SOC15(MMHUB, 0, regMMMC_VM_MX_L1_TLB_CNTL, tmp);
+
+ /* Setup L2 cache */
+ tmp = RREG32_SOC15(MMHUB, 0, regMMVM_L2_CNTL);
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL, ENABLE_L2_CACHE, 0);
+ WREG32_SOC15(MMHUB, 0, regMMVM_L2_CNTL, tmp);
+ WREG32_SOC15(MMHUB, 0, regMMVM_L2_CNTL3, 0);
+}
+
+/**
+ * mmhub_v3_0_1_set_fault_enable_default - update GART/VM fault handling
+ *
+ * @adev: amdgpu_device pointer
+ * @value: true redirects VM faults to the default page
+ */
+static void mmhub_v3_0_1_set_fault_enable_default(struct amdgpu_device *adev,
+ bool value)
+{
+ u32 tmp;
+
+ tmp = RREG32_SOC15(MMHUB, 0, regMMVM_L2_PROTECTION_FAULT_CNTL);
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_PROTECTION_FAULT_CNTL,
+ RANGE_PROTECTION_FAULT_ENABLE_DEFAULT, value);
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_PROTECTION_FAULT_CNTL,
+ PDE0_PROTECTION_FAULT_ENABLE_DEFAULT, value);
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_PROTECTION_FAULT_CNTL,
+ PDE1_PROTECTION_FAULT_ENABLE_DEFAULT, value);
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_PROTECTION_FAULT_CNTL,
+ PDE2_PROTECTION_FAULT_ENABLE_DEFAULT, value);
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_PROTECTION_FAULT_CNTL,
+ TRANSLATE_FURTHER_PROTECTION_FAULT_ENABLE_DEFAULT,
+ value);
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_PROTECTION_FAULT_CNTL,
+ NACK_PROTECTION_FAULT_ENABLE_DEFAULT, value);
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_PROTECTION_FAULT_CNTL,
+ DUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT, value);
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_PROTECTION_FAULT_CNTL,
+ VALID_PROTECTION_FAULT_ENABLE_DEFAULT, value);
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_PROTECTION_FAULT_CNTL,
+ READ_PROTECTION_FAULT_ENABLE_DEFAULT, value);
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_PROTECTION_FAULT_CNTL,
+ WRITE_PROTECTION_FAULT_ENABLE_DEFAULT, value);
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_PROTECTION_FAULT_CNTL,
+ EXECUTE_PROTECTION_FAULT_ENABLE_DEFAULT, value);
+ if (!value) {
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_PROTECTION_FAULT_CNTL,
+ CRASH_ON_NO_RETRY_FAULT, 1);
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_PROTECTION_FAULT_CNTL,
+ CRASH_ON_RETRY_FAULT, 1);
+ }
+ WREG32_SOC15(MMHUB, 0, regMMVM_L2_PROTECTION_FAULT_CNTL, tmp);
+}
+
+static const struct amdgpu_vmhub_funcs mmhub_v3_0_1_vmhub_funcs = {
+ .print_l2_protection_fault_status = mmhub_v3_0_1_print_l2_protection_fault_status,
+ .get_invalidate_req = mmhub_v3_0_1_get_invalidate_req,
+};
+
+static void mmhub_v3_0_1_init(struct amdgpu_device *adev)
+{
+ struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB_0];
+
+ hub->ctx0_ptb_addr_lo32 =
+ SOC15_REG_OFFSET(MMHUB, 0,
+ regMMVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32);
+ hub->ctx0_ptb_addr_hi32 =
+ SOC15_REG_OFFSET(MMHUB, 0,
+ regMMVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_HI32);
+ hub->vm_inv_eng0_sem =
+ SOC15_REG_OFFSET(MMHUB, 0, regMMVM_INVALIDATE_ENG0_SEM);
+ hub->vm_inv_eng0_req =
+ SOC15_REG_OFFSET(MMHUB, 0, regMMVM_INVALIDATE_ENG0_REQ);
+ hub->vm_inv_eng0_ack =
+ SOC15_REG_OFFSET(MMHUB, 0, regMMVM_INVALIDATE_ENG0_ACK);
+ hub->vm_context0_cntl =
+ SOC15_REG_OFFSET(MMHUB, 0, regMMVM_CONTEXT0_CNTL);
+ hub->vm_l2_pro_fault_status =
+ SOC15_REG_OFFSET(MMHUB, 0, regMMVM_L2_PROTECTION_FAULT_STATUS);
+ hub->vm_l2_pro_fault_cntl =
+ SOC15_REG_OFFSET(MMHUB, 0, regMMVM_L2_PROTECTION_FAULT_CNTL);
+
+ hub->ctx_distance = regMMVM_CONTEXT1_CNTL - regMMVM_CONTEXT0_CNTL;
+ hub->ctx_addr_distance = regMMVM_CONTEXT1_PAGE_TABLE_BASE_ADDR_LO32 -
+ regMMVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32;
+ hub->eng_distance = regMMVM_INVALIDATE_ENG1_REQ -
+ regMMVM_INVALIDATE_ENG0_REQ;
+ hub->eng_addr_distance = regMMVM_INVALIDATE_ENG1_ADDR_RANGE_LO32 -
+ regMMVM_INVALIDATE_ENG0_ADDR_RANGE_LO32;
+
+ hub->vm_cntx_cntl_vm_fault = MMVM_CONTEXT1_CNTL__RANGE_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK |
+ MMVM_CONTEXT1_CNTL__DUMMY_PAGE_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK |
+ MMVM_CONTEXT1_CNTL__PDE0_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK |
+ MMVM_CONTEXT1_CNTL__VALID_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK |
+ MMVM_CONTEXT1_CNTL__READ_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK |
+ MMVM_CONTEXT1_CNTL__WRITE_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK |
+ MMVM_CONTEXT1_CNTL__EXECUTE_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK;
+
+ hub->vmhub_funcs = &mmhub_v3_0_1_vmhub_funcs;
+}
+
+static u64 mmhub_v3_0_1_get_fb_location(struct amdgpu_device *adev)
+{
+ u64 base;
+
+ base = RREG32_SOC15(MMHUB, 0, regMMMC_VM_FB_LOCATION_BASE);
+ base &= MMMC_VM_FB_LOCATION_BASE__FB_BASE_MASK;
+ base <<= 24;
+
+ return base;
+}
+
+static u64 mmhub_v3_0_1_get_mc_fb_offset(struct amdgpu_device *adev)
+{
+ return (u64)RREG32_SOC15(MMHUB, 0, regMMMC_VM_FB_OFFSET) << 24;
+}
+
+static void mmhub_v3_0_1_update_medium_grain_clock_gating(struct amdgpu_device *adev,
+ bool enable)
+{
+ uint32_t def, data;
+
+ def = data = RREG32_SOC15(MMHUB, 0, regMM_ATC_L2_MISC_CG);
+
+ if (enable)
+ data |= MM_ATC_L2_MISC_CG__ENABLE_MASK;
+ else
+ data &= ~MM_ATC_L2_MISC_CG__ENABLE_MASK;
+
+ if (def != data)
+ WREG32_SOC15(MMHUB, 0, regMM_ATC_L2_MISC_CG, data);
+}
+
+static void mmhub_v3_0_1_update_medium_grain_light_sleep(struct amdgpu_device *adev,
+ bool enable)
+{
+ uint32_t def, data;
+
+ def = data = RREG32_SOC15(MMHUB, 0, regMM_ATC_L2_MISC_CG);
+
+ if (enable)
+ data |= MM_ATC_L2_MISC_CG__MEM_LS_ENABLE_MASK;
+ else
+ data &= ~MM_ATC_L2_MISC_CG__MEM_LS_ENABLE_MASK;
+
+ if (def != data)
+ WREG32_SOC15(MMHUB, 0, regMM_ATC_L2_MISC_CG, data);
+}
+
+static int mmhub_v3_0_1_set_clockgating(struct amdgpu_device *adev,
+ enum amd_clockgating_state state)
+{
+ if (amdgpu_sriov_vf(adev))
+ return 0;
+
+ mmhub_v3_0_1_update_medium_grain_clock_gating(adev,
+ state == AMD_CG_STATE_GATE);
+ mmhub_v3_0_1_update_medium_grain_light_sleep(adev,
+ state == AMD_CG_STATE_GATE);
+ return 0;
+}
+
+static void mmhub_v3_0_1_get_clockgating(struct amdgpu_device *adev, u64 *flags)
+{
+ int data;
+
+ if (amdgpu_sriov_vf(adev))
+ *flags = 0;
+
+ data = RREG32_SOC15(MMHUB, 0, regMM_ATC_L2_MISC_CG);
+
+ /* AMD_CG_SUPPORT_MC_MGCG */
+ if (data & MM_ATC_L2_MISC_CG__ENABLE_MASK)
+ *flags |= AMD_CG_SUPPORT_MC_MGCG;
+
+ /* AMD_CG_SUPPORT_MC_LS */
+ if (data & MM_ATC_L2_MISC_CG__MEM_LS_ENABLE_MASK)
+ *flags |= AMD_CG_SUPPORT_MC_LS;
+}
+
+const struct amdgpu_mmhub_funcs mmhub_v3_0_1_funcs = {
+ .init = mmhub_v3_0_1_init,
+ .get_fb_location = mmhub_v3_0_1_get_fb_location,
+ .get_mc_fb_offset = mmhub_v3_0_1_get_mc_fb_offset,
+ .gart_enable = mmhub_v3_0_1_gart_enable,
+ .set_fault_enable_default = mmhub_v3_0_1_set_fault_enable_default,
+ .gart_disable = mmhub_v3_0_1_gart_disable,
+ .set_clockgating = mmhub_v3_0_1_set_clockgating,
+ .get_clockgating = mmhub_v3_0_1_get_clockgating,
+ .setup_vm_pt_regs = mmhub_v3_0_1_setup_vm_pt_regs,
+};
diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v3_0_1.h b/drivers/gpu/drm/amd/amdgpu/mmhub_v3_0_1.h
new file mode 100644
index 000000000000..4c1246735e7d
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v3_0_1.h
@@ -0,0 +1,28 @@
+/*
+ * Copyright 2022 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+#ifndef __MMHUB_V3_0_1_H__
+#define __MMHUB_V3_0_1_H__
+
+extern const struct amdgpu_mmhub_funcs mmhub_v3_0_1_funcs;
+
+#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v9_4.c b/drivers/gpu/drm/amd/amdgpu/mmhub_v9_4.c
index 6e0145b2b408..445cb06b9d26 100644
--- a/drivers/gpu/drm/amd/amdgpu/mmhub_v9_4.c
+++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v9_4.c
@@ -295,9 +295,17 @@ static void mmhub_v9_4_disable_identity_aperture(struct amdgpu_device *adev,
static void mmhub_v9_4_setup_vmid_config(struct amdgpu_device *adev, int hubid)
{
struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB_0];
+ unsigned int num_level, block_size;
uint32_t tmp;
int i;
+ num_level = adev->vm_manager.num_level;
+ block_size = adev->vm_manager.block_size;
+ if (adev->gmc.translate_further)
+ num_level -= 1;
+ else
+ block_size -= 9;
+
for (i = 0; i <= 14; i++) {
tmp = RREG32_SOC15_OFFSET(MMHUB, 0, mmVML2VC0_VM_CONTEXT1_CNTL,
hubid * MMHUB_INSTANCE_REGISTER_OFFSET + i);
@@ -305,7 +313,7 @@ static void mmhub_v9_4_setup_vmid_config(struct amdgpu_device *adev, int hubid)
ENABLE_CONTEXT, 1);
tmp = REG_SET_FIELD(tmp, VML2VC0_VM_CONTEXT1_CNTL,
PAGE_TABLE_DEPTH,
- adev->vm_manager.num_level);
+ num_level);
tmp = REG_SET_FIELD(tmp, VML2VC0_VM_CONTEXT1_CNTL,
RANGE_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
tmp = REG_SET_FIELD(tmp, VML2VC0_VM_CONTEXT1_CNTL,
@@ -323,7 +331,7 @@ static void mmhub_v9_4_setup_vmid_config(struct amdgpu_device *adev, int hubid)
EXECUTE_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
tmp = REG_SET_FIELD(tmp, VML2VC0_VM_CONTEXT1_CNTL,
PAGE_TABLE_BLOCK_SIZE,
- adev->vm_manager.block_size - 9);
+ block_size);
/* Send no-retry XNACK on fault to suppress VM fault storm. */
tmp = REG_SET_FIELD(tmp, VML2VC0_VM_CONTEXT1_CNTL,
RETRY_PERMISSION_OR_INVALID_PAGE_FAULT,
diff --git a/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c b/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c
index b81acf59870c..12906ba74462 100644
--- a/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c
+++ b/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c
@@ -283,8 +283,16 @@ flr_done:
/* Trigger recovery for world switch failure if no TDR */
if (amdgpu_device_should_recover_gpu(adev)
&& (!amdgpu_device_has_job_running(adev) ||
- adev->sdma_timeout == MAX_SCHEDULE_TIMEOUT))
- amdgpu_device_gpu_recover_imp(adev, NULL);
+ adev->sdma_timeout == MAX_SCHEDULE_TIMEOUT)) {
+ struct amdgpu_reset_context reset_context;
+ memset(&reset_context, 0, sizeof(reset_context));
+
+ reset_context.method = AMD_RESET_METHOD_NONE;
+ reset_context.reset_req_dev = adev;
+ clear_bit(AMDGPU_NEED_FULL_RESET, &reset_context.flags);
+
+ amdgpu_device_gpu_recover(adev, NULL, &reset_context);
+ }
}
static int xgpu_ai_set_mailbox_rcv_irq(struct amdgpu_device *adev,
diff --git a/drivers/gpu/drm/amd/amdgpu/mxgpu_nv.c b/drivers/gpu/drm/amd/amdgpu/mxgpu_nv.c
index 22c10b97ea81..e07757eea7ad 100644
--- a/drivers/gpu/drm/amd/amdgpu/mxgpu_nv.c
+++ b/drivers/gpu/drm/amd/amdgpu/mxgpu_nv.c
@@ -310,8 +310,16 @@ flr_done:
adev->sdma_timeout == MAX_SCHEDULE_TIMEOUT ||
adev->gfx_timeout == MAX_SCHEDULE_TIMEOUT ||
adev->compute_timeout == MAX_SCHEDULE_TIMEOUT ||
- adev->video_timeout == MAX_SCHEDULE_TIMEOUT))
- amdgpu_device_gpu_recover_imp(adev, NULL);
+ adev->video_timeout == MAX_SCHEDULE_TIMEOUT)) {
+ struct amdgpu_reset_context reset_context;
+ memset(&reset_context, 0, sizeof(reset_context));
+
+ reset_context.method = AMD_RESET_METHOD_NONE;
+ reset_context.reset_req_dev = adev;
+ clear_bit(AMDGPU_NEED_FULL_RESET, &reset_context.flags);
+
+ amdgpu_device_gpu_recover(adev, NULL, &reset_context);
+ }
}
static int xgpu_nv_set_mailbox_rcv_irq(struct amdgpu_device *adev,
diff --git a/drivers/gpu/drm/amd/amdgpu/mxgpu_vi.c b/drivers/gpu/drm/amd/amdgpu/mxgpu_vi.c
index 7b63d30b9b79..288c414babdf 100644
--- a/drivers/gpu/drm/amd/amdgpu/mxgpu_vi.c
+++ b/drivers/gpu/drm/amd/amdgpu/mxgpu_vi.c
@@ -522,8 +522,16 @@ static void xgpu_vi_mailbox_flr_work(struct work_struct *work)
}
/* Trigger recovery due to world switch failure */
- if (amdgpu_device_should_recover_gpu(adev))
- amdgpu_device_gpu_recover_imp(adev, NULL);
+ if (amdgpu_device_should_recover_gpu(adev)) {
+ struct amdgpu_reset_context reset_context;
+ memset(&reset_context, 0, sizeof(reset_context));
+
+ reset_context.method = AMD_RESET_METHOD_NONE;
+ reset_context.reset_req_dev = adev;
+ clear_bit(AMDGPU_NEED_FULL_RESET, &reset_context.flags);
+
+ amdgpu_device_gpu_recover(adev, NULL, &reset_context);
+ }
}
static int xgpu_vi_set_mailbox_rcv_irq(struct amdgpu_device *adev,
diff --git a/drivers/gpu/drm/amd/amdgpu/navi10_ih.c b/drivers/gpu/drm/amd/amdgpu/navi10_ih.c
index 4b5396d3e60f..eec13cb5bf75 100644
--- a/drivers/gpu/drm/amd/amdgpu/navi10_ih.c
+++ b/drivers/gpu/drm/amd/amdgpu/navi10_ih.c
@@ -409,9 +409,11 @@ static u32 navi10_ih_get_wptr(struct amdgpu_device *adev,
u32 wptr, tmp;
struct amdgpu_ih_regs *ih_regs;
- if (ih == &adev->irq.ih) {
+ if (ih == &adev->irq.ih || ih == &adev->irq.ih_soft) {
/* Only ring0 supports writeback. On other rings fall back
* to register-based code with overflow checking below.
+ * ih_soft ring doesn't have any backing hardware registers,
+ * update wptr and return.
*/
wptr = le32_to_cpu(*ih->wptr_cpu);
@@ -483,6 +485,9 @@ static void navi10_ih_set_rptr(struct amdgpu_device *adev,
{
struct amdgpu_ih_regs *ih_regs;
+ if (ih == &adev->irq.ih_soft)
+ return;
+
if (ih->use_doorbell) {
/* XXX check if swapping is necessary on BE */
*ih->rptr_cpu = ih->rptr;
diff --git a/drivers/gpu/drm/amd/amdgpu/nbio_v2_3.c b/drivers/gpu/drm/amd/amdgpu/nbio_v2_3.c
index 6cd1fb2eb913..b465baa26762 100644
--- a/drivers/gpu/drm/amd/amdgpu/nbio_v2_3.c
+++ b/drivers/gpu/drm/amd/amdgpu/nbio_v2_3.c
@@ -328,27 +328,6 @@ const struct nbio_hdp_flush_reg nbio_v2_3_hdp_flush_reg = {
.ref_and_mask_sdma1 = BIF_BX_PF_GPU_HDP_FLUSH_DONE__SDMA1_MASK,
};
-const struct nbio_hdp_flush_reg nbio_v2_3_hdp_flush_reg_sc = {
- .ref_and_mask_cp0 = BIF_BX_PF_GPU_HDP_FLUSH_DONE__CP0_MASK,
- .ref_and_mask_cp1 = BIF_BX_PF_GPU_HDP_FLUSH_DONE__CP1_MASK,
- .ref_and_mask_cp2 = BIF_BX_PF_GPU_HDP_FLUSH_DONE__CP2_MASK,
- .ref_and_mask_cp3 = BIF_BX_PF_GPU_HDP_FLUSH_DONE__CP3_MASK,
- .ref_and_mask_cp4 = BIF_BX_PF_GPU_HDP_FLUSH_DONE__CP4_MASK,
- .ref_and_mask_cp5 = BIF_BX_PF_GPU_HDP_FLUSH_DONE__CP5_MASK,
- .ref_and_mask_cp6 = BIF_BX_PF_GPU_HDP_FLUSH_DONE__CP6_MASK,
- .ref_and_mask_cp7 = BIF_BX_PF_GPU_HDP_FLUSH_DONE__CP7_MASK,
- .ref_and_mask_cp8 = BIF_BX_PF_GPU_HDP_FLUSH_DONE__CP8_MASK,
- .ref_and_mask_cp9 = BIF_BX_PF_GPU_HDP_FLUSH_DONE__CP9_MASK,
- .ref_and_mask_sdma0 = GPU_HDP_FLUSH_DONE__RSVD_ENG1_MASK,
- .ref_and_mask_sdma1 = GPU_HDP_FLUSH_DONE__RSVD_ENG2_MASK,
- .ref_and_mask_sdma2 = GPU_HDP_FLUSH_DONE__RSVD_ENG3_MASK,
- .ref_and_mask_sdma3 = GPU_HDP_FLUSH_DONE__RSVD_ENG4_MASK,
- .ref_and_mask_sdma4 = GPU_HDP_FLUSH_DONE__RSVD_ENG5_MASK,
- .ref_and_mask_sdma5 = GPU_HDP_FLUSH_DONE__RSVD_ENG6_MASK,
- .ref_and_mask_sdma6 = GPU_HDP_FLUSH_DONE__RSVD_ENG7_MASK,
- .ref_and_mask_sdma7 = GPU_HDP_FLUSH_DONE__RSVD_ENG8_MASK,
-};
-
static void nbio_v2_3_init_registers(struct amdgpu_device *adev)
{
uint32_t def, data;
@@ -547,7 +526,7 @@ static void nbio_v2_3_clear_doorbell_interrupt(struct amdgpu_device *adev)
{
uint32_t reg, reg_data;
- if (adev->asic_type != CHIP_SIENNA_CICHLID)
+ if (adev->ip_versions[NBIO_HWIP][0] != IP_VERSION(3, 3, 0))
return;
reg = RREG32_SOC15(NBIO, 0, mmBIF_RB_CNTL);
diff --git a/drivers/gpu/drm/amd/amdgpu/nbio_v2_3.h b/drivers/gpu/drm/amd/amdgpu/nbio_v2_3.h
index 6074dd3a1ed8..a43b60acf7f6 100644
--- a/drivers/gpu/drm/amd/amdgpu/nbio_v2_3.h
+++ b/drivers/gpu/drm/amd/amdgpu/nbio_v2_3.h
@@ -27,7 +27,6 @@
#include "soc15_common.h"
extern const struct nbio_hdp_flush_reg nbio_v2_3_hdp_flush_reg;
-extern const struct nbio_hdp_flush_reg nbio_v2_3_hdp_flush_reg_sc;
extern const struct amdgpu_nbio_funcs nbio_v2_3_funcs;
#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/nbio_v4_3.c b/drivers/gpu/drm/amd/amdgpu/nbio_v4_3.c
index ed31d133f07a..982a89f841d5 100644
--- a/drivers/gpu/drm/amd/amdgpu/nbio_v4_3.c
+++ b/drivers/gpu/drm/amd/amdgpu/nbio_v4_3.c
@@ -240,8 +240,11 @@ static void nbio_v4_3_update_medium_grain_clock_gating(struct amdgpu_device *ade
{
uint32_t def, data;
+ if (enable && !(adev->cg_flags & AMD_CG_SUPPORT_BIF_MGCG))
+ return;
+
def = data = RREG32_SOC15(NBIO, 0, regCPM_CONTROL);
- if (enable && (adev->cg_flags & AMD_CG_SUPPORT_BIF_MGCG)) {
+ if (enable) {
data |= (CPM_CONTROL__LCLK_DYN_GATE_ENABLE_MASK |
CPM_CONTROL__TXCLK_DYN_GATE_ENABLE_MASK |
CPM_CONTROL__TXCLK_LCNT_GATE_ENABLE_MASK |
@@ -266,9 +269,12 @@ static void nbio_v4_3_update_medium_grain_light_sleep(struct amdgpu_device *adev
{
uint32_t def, data;
+ if (enable && !(adev->cg_flags & AMD_CG_SUPPORT_BIF_LS))
+ return;
+
/* TODO: need update in future */
def = data = RREG32_SOC15(NBIO, 0, regPCIE_CNTL2);
- if (enable && (adev->cg_flags & AMD_CG_SUPPORT_BIF_LS)) {
+ if (enable) {
data |= PCIE_CNTL2__SLV_MEM_LS_EN_MASK;
} else {
data &= ~PCIE_CNTL2__SLV_MEM_LS_EN_MASK;
@@ -344,6 +350,121 @@ static u32 nbio_v4_3_get_rom_offset(struct amdgpu_device *adev)
return rom_offset;
}
+#ifdef CONFIG_PCIEASPM
+static void nbio_v4_3_program_ltr(struct amdgpu_device *adev)
+{
+ uint32_t def, data;
+
+ def = RREG32_SOC15(NBIO, 0, regRCC_EP_DEV0_0_EP_PCIE_TX_LTR_CNTL);
+ data = 0x35EB;
+ data &= ~EP_PCIE_TX_LTR_CNTL__LTR_PRIV_MSG_DIS_IN_PM_NON_D0_MASK;
+ data &= ~EP_PCIE_TX_LTR_CNTL__LTR_PRIV_RST_LTR_IN_DL_DOWN_MASK;
+ if (def != data)
+ WREG32_SOC15(NBIO, 0, regRCC_EP_DEV0_0_EP_PCIE_TX_LTR_CNTL, data);
+
+ def = data = RREG32_SOC15(NBIO, 0, regRCC_STRAP0_RCC_BIF_STRAP2);
+ data &= ~RCC_BIF_STRAP2__STRAP_LTR_IN_ASPML1_DIS_MASK;
+ if (def != data)
+ WREG32_SOC15(NBIO, 0, regRCC_STRAP0_RCC_BIF_STRAP2, data);
+
+ def = data = RREG32_SOC15(NBIO, 0, regBIF_CFG_DEV0_EPF0_DEVICE_CNTL2);
+ if (adev->pdev->ltr_path)
+ data |= BIF_CFG_DEV0_EPF0_DEVICE_CNTL2__LTR_EN_MASK;
+ else
+ data &= ~BIF_CFG_DEV0_EPF0_DEVICE_CNTL2__LTR_EN_MASK;
+ if (def != data)
+ WREG32_SOC15(NBIO, 0, regBIF_CFG_DEV0_EPF0_DEVICE_CNTL2, data);
+}
+#endif
+
+static void nbio_v4_3_program_aspm(struct amdgpu_device *adev)
+{
+#ifdef CONFIG_PCIEASPM
+ uint32_t def, data;
+
+ if (!(adev->ip_versions[PCIE_HWIP][0] == IP_VERSION(7, 4, 0)) &&
+ !(adev->ip_versions[PCIE_HWIP][0] == IP_VERSION(7, 6, 0)))
+ return;
+
+ def = data = RREG32_SOC15(NBIO, 0, regPCIE_LC_CNTL);
+ data &= ~PCIE_LC_CNTL__LC_L1_INACTIVITY_MASK;
+ data &= ~PCIE_LC_CNTL__LC_L0S_INACTIVITY_MASK;
+ data |= PCIE_LC_CNTL__LC_PMI_TO_L1_DIS_MASK;
+ if (def != data)
+ WREG32_SOC15(NBIO, 0, regPCIE_LC_CNTL, data);
+
+ def = data = RREG32_SOC15(NBIO, 0, regPCIE_LC_CNTL7);
+ data |= PCIE_LC_CNTL7__LC_NBIF_ASPM_INPUT_EN_MASK;
+ if (def != data)
+ WREG32_SOC15(NBIO, 0, regPCIE_LC_CNTL7, data);
+
+ def = data = RREG32_SOC15(NBIO, 0, regPCIE_LC_CNTL3);
+ data |= PCIE_LC_CNTL3__LC_DSC_DONT_ENTER_L23_AFTER_PME_ACK_MASK;
+ if (def != data)
+ WREG32_SOC15(NBIO, 0, regPCIE_LC_CNTL3, data);
+
+ def = data = RREG32_SOC15(NBIO, 0, regRCC_STRAP0_RCC_BIF_STRAP3);
+ data &= ~RCC_BIF_STRAP3__STRAP_VLINK_ASPM_IDLE_TIMER_MASK;
+ data &= ~RCC_BIF_STRAP3__STRAP_VLINK_PM_L1_ENTRY_TIMER_MASK;
+ if (def != data)
+ WREG32_SOC15(NBIO, 0, regRCC_STRAP0_RCC_BIF_STRAP3, data);
+
+ def = data = RREG32_SOC15(NBIO, 0, regRCC_STRAP0_RCC_BIF_STRAP5);
+ data &= ~RCC_BIF_STRAP5__STRAP_VLINK_LDN_ENTRY_TIMER_MASK;
+ if (def != data)
+ WREG32_SOC15(NBIO, 0, regRCC_STRAP0_RCC_BIF_STRAP5, data);
+
+ def = data = RREG32_SOC15(NBIO, 0, regBIF_CFG_DEV0_EPF0_DEVICE_CNTL2);
+ data &= ~BIF_CFG_DEV0_EPF0_DEVICE_CNTL2__LTR_EN_MASK;
+ if (def != data)
+ WREG32_SOC15(NBIO, 0, regBIF_CFG_DEV0_EPF0_DEVICE_CNTL2, data);
+
+ WREG32_SOC15(NBIO, 0, regBIF_CFG_DEV0_EPF0_PCIE_LTR_CAP, 0x10011001);
+
+ def = data = RREG32_SOC15(NBIO, 0, regPSWUSP0_PCIE_LC_CNTL2);
+ data |= PSWUSP0_PCIE_LC_CNTL2__LC_ALLOW_PDWN_IN_L1_MASK |
+ PSWUSP0_PCIE_LC_CNTL2__LC_ALLOW_PDWN_IN_L23_MASK;
+ data &= ~PSWUSP0_PCIE_LC_CNTL2__LC_RCV_L0_TO_RCV_L0S_DIS_MASK;
+ if (def != data)
+ WREG32_SOC15(NBIO, 0, regPSWUSP0_PCIE_LC_CNTL2, data);
+
+ def = data = RREG32_SOC15(NBIO, 0, regPCIE_LC_CNTL4);
+ data |= PCIE_LC_CNTL4__LC_L1_POWERDOWN_MASK;
+ if (def != data)
+ WREG32_SOC15(NBIO, 0, regPCIE_LC_CNTL4, data);
+
+ def = data = RREG32_SOC15(NBIO, 0, regPCIE_LC_RXRECOVER_RXSTANDBY_CNTL);
+ data |= PCIE_LC_RXRECOVER_RXSTANDBY_CNTL__LC_RX_L0S_STANDBY_EN_MASK;
+ if (def != data)
+ WREG32_SOC15(NBIO, 0, regPCIE_LC_RXRECOVER_RXSTANDBY_CNTL, data);
+
+ nbio_v4_3_program_ltr(adev);
+
+ def = data = RREG32_SOC15(NBIO, 0, regRCC_STRAP0_RCC_BIF_STRAP3);
+ data |= 0x5DE0 << RCC_BIF_STRAP3__STRAP_VLINK_ASPM_IDLE_TIMER__SHIFT;
+ data |= 0x0010 << RCC_BIF_STRAP3__STRAP_VLINK_PM_L1_ENTRY_TIMER__SHIFT;
+ if (def != data)
+ WREG32_SOC15(NBIO, 0, regRCC_STRAP0_RCC_BIF_STRAP3, data);
+
+ def = data = RREG32_SOC15(NBIO, 0, regRCC_STRAP0_RCC_BIF_STRAP5);
+ data |= 0x0010 << RCC_BIF_STRAP5__STRAP_VLINK_LDN_ENTRY_TIMER__SHIFT;
+ if (def != data)
+ WREG32_SOC15(NBIO, 0, regRCC_STRAP0_RCC_BIF_STRAP5, data);
+
+ def = data = RREG32_SOC15(NBIO, 0, regPCIE_LC_CNTL);
+ data |= 0x0 << PCIE_LC_CNTL__LC_L0S_INACTIVITY__SHIFT;
+ data |= 0x9 << PCIE_LC_CNTL__LC_L1_INACTIVITY__SHIFT;
+ data &= ~PCIE_LC_CNTL__LC_PMI_TO_L1_DIS_MASK;
+ if (def != data)
+ WREG32_SOC15(NBIO, 0, regPCIE_LC_CNTL, data);
+
+ def = data = RREG32_SOC15(NBIO, 0, regPCIE_LC_CNTL3);
+ data &= ~PCIE_LC_CNTL3__LC_DSC_DONT_ENTER_L23_AFTER_PME_ACK_MASK;
+ if (def != data)
+ WREG32_SOC15(NBIO, 0, regPCIE_LC_CNTL3, data);
+#endif
+}
+
const struct amdgpu_nbio_funcs nbio_v4_3_funcs = {
.get_hdp_flush_req_offset = nbio_v4_3_get_hdp_flush_req_offset,
.get_hdp_flush_done_offset = nbio_v4_3_get_hdp_flush_done_offset,
@@ -365,4 +486,5 @@ const struct amdgpu_nbio_funcs nbio_v4_3_funcs = {
.init_registers = nbio_v4_3_init_registers,
.remap_hdp_registers = nbio_v4_3_remap_hdp_registers,
.get_rom_offset = nbio_v4_3_get_rom_offset,
+ .program_aspm = nbio_v4_3_program_aspm,
};
diff --git a/drivers/gpu/drm/amd/amdgpu/nbio_v7_4.c b/drivers/gpu/drm/amd/amdgpu/nbio_v7_4.c
index 4531761dcf77..11848d1e238b 100644
--- a/drivers/gpu/drm/amd/amdgpu/nbio_v7_4.c
+++ b/drivers/gpu/drm/amd/amdgpu/nbio_v7_4.c
@@ -339,27 +339,6 @@ const struct nbio_hdp_flush_reg nbio_v7_4_hdp_flush_reg = {
.ref_and_mask_sdma1 = GPU_HDP_FLUSH_DONE__SDMA1_MASK,
};
-const struct nbio_hdp_flush_reg nbio_v7_4_hdp_flush_reg_ald = {
- .ref_and_mask_cp0 = GPU_HDP_FLUSH_DONE__CP0_MASK,
- .ref_and_mask_cp1 = GPU_HDP_FLUSH_DONE__CP1_MASK,
- .ref_and_mask_cp2 = GPU_HDP_FLUSH_DONE__CP2_MASK,
- .ref_and_mask_cp3 = GPU_HDP_FLUSH_DONE__CP3_MASK,
- .ref_and_mask_cp4 = GPU_HDP_FLUSH_DONE__CP4_MASK,
- .ref_and_mask_cp5 = GPU_HDP_FLUSH_DONE__CP5_MASK,
- .ref_and_mask_cp6 = GPU_HDP_FLUSH_DONE__CP6_MASK,
- .ref_and_mask_cp7 = GPU_HDP_FLUSH_DONE__CP7_MASK,
- .ref_and_mask_cp8 = GPU_HDP_FLUSH_DONE__CP8_MASK,
- .ref_and_mask_cp9 = GPU_HDP_FLUSH_DONE__CP9_MASK,
- .ref_and_mask_sdma0 = GPU_HDP_FLUSH_DONE__RSVD_ENG1_MASK,
- .ref_and_mask_sdma1 = GPU_HDP_FLUSH_DONE__RSVD_ENG2_MASK,
- .ref_and_mask_sdma2 = GPU_HDP_FLUSH_DONE__RSVD_ENG3_MASK,
- .ref_and_mask_sdma3 = GPU_HDP_FLUSH_DONE__RSVD_ENG4_MASK,
- .ref_and_mask_sdma4 = GPU_HDP_FLUSH_DONE__RSVD_ENG5_MASK,
- .ref_and_mask_sdma5 = GPU_HDP_FLUSH_DONE__RSVD_ENG6_MASK,
- .ref_and_mask_sdma6 = GPU_HDP_FLUSH_DONE__RSVD_ENG7_MASK,
- .ref_and_mask_sdma7 = GPU_HDP_FLUSH_DONE__RSVD_ENG8_MASK,
-};
-
static void nbio_v7_4_init_registers(struct amdgpu_device *adev)
{
uint32_t baco_cntl;
diff --git a/drivers/gpu/drm/amd/amdgpu/nbio_v7_4.h b/drivers/gpu/drm/amd/amdgpu/nbio_v7_4.h
index 7490022d79d4..f27c41728822 100644
--- a/drivers/gpu/drm/amd/amdgpu/nbio_v7_4.h
+++ b/drivers/gpu/drm/amd/amdgpu/nbio_v7_4.h
@@ -27,7 +27,6 @@
#include "soc15_common.h"
extern const struct nbio_hdp_flush_reg nbio_v7_4_hdp_flush_reg;
-extern const struct nbio_hdp_flush_reg nbio_v7_4_hdp_flush_reg_ald;
extern const struct amdgpu_nbio_funcs nbio_v7_4_funcs;
extern struct amdgpu_nbio_ras nbio_v7_4_ras;
diff --git a/drivers/gpu/drm/amd/amdgpu/nbio_v7_7.c b/drivers/gpu/drm/amd/amdgpu/nbio_v7_7.c
index cdc0c9779848..1dc95ef21da6 100644
--- a/drivers/gpu/drm/amd/amdgpu/nbio_v7_7.c
+++ b/drivers/gpu/drm/amd/amdgpu/nbio_v7_7.c
@@ -58,11 +58,17 @@ static void nbio_v7_7_sdma_doorbell_range(struct amdgpu_device *adev, int instan
bool use_doorbell, int doorbell_index,
int doorbell_size)
{
- u32 reg = SOC15_REG_OFFSET(NBIO, 0, regGDC0_BIF_SDMA0_DOORBELL_RANGE);
+ u32 reg = SOC15_REG_OFFSET(NBIO, 0, regGDC0_BIF_CSDMA_DOORBELL_RANGE);
u32 doorbell_range = RREG32_PCIE_PORT(reg);
if (use_doorbell) {
doorbell_range = REG_SET_FIELD(doorbell_range,
+ GDC0_BIF_CSDMA_DOORBELL_RANGE,
+ OFFSET, doorbell_index);
+ doorbell_range = REG_SET_FIELD(doorbell_range,
+ GDC0_BIF_CSDMA_DOORBELL_RANGE,
+ SIZE, doorbell_size);
+ doorbell_range = REG_SET_FIELD(doorbell_range,
GDC0_BIF_SDMA0_DOORBELL_RANGE,
OFFSET, doorbell_index);
doorbell_range = REG_SET_FIELD(doorbell_range,
@@ -77,6 +83,26 @@ static void nbio_v7_7_sdma_doorbell_range(struct amdgpu_device *adev, int instan
WREG32_PCIE_PORT(reg, doorbell_range);
}
+static void nbio_v7_7_vcn_doorbell_range(struct amdgpu_device *adev, bool use_doorbell,
+ int doorbell_index, int instance)
+{
+ u32 reg = SOC15_REG_OFFSET(NBIO, 0, regGDC0_BIF_VCN0_DOORBELL_RANGE);
+ u32 doorbell_range = RREG32_PCIE_PORT(reg);
+
+ if (use_doorbell) {
+ doorbell_range = REG_SET_FIELD(doorbell_range,
+ GDC0_BIF_VCN0_DOORBELL_RANGE, OFFSET,
+ doorbell_index);
+ doorbell_range = REG_SET_FIELD(doorbell_range,
+ GDC0_BIF_VCN0_DOORBELL_RANGE, SIZE, 8);
+ } else {
+ doorbell_range = REG_SET_FIELD(doorbell_range,
+ GDC0_BIF_VCN0_DOORBELL_RANGE, SIZE, 0);
+ }
+
+ WREG32_PCIE_PORT(reg, doorbell_range);
+}
+
static void nbio_v7_7_enable_doorbell_aperture(struct amdgpu_device *adev,
bool enable)
{
@@ -221,6 +247,81 @@ static void nbio_v7_7_init_registers(struct amdgpu_device *adev)
}
+static void nbio_v7_7_update_medium_grain_clock_gating(struct amdgpu_device *adev,
+ bool enable)
+{
+ uint32_t def, data;
+
+ if (enable && !(adev->cg_flags & AMD_CG_SUPPORT_BIF_MGCG))
+ return;
+
+ def = data = RREG32_SOC15(NBIO, 0, regBIF0_CPM_CONTROL);
+ if (enable) {
+ data |= (BIF0_CPM_CONTROL__LCLK_DYN_GATE_ENABLE_MASK |
+ BIF0_CPM_CONTROL__TXCLK_DYN_GATE_ENABLE_MASK |
+ BIF0_CPM_CONTROL__TXCLK_LCNT_GATE_ENABLE_MASK |
+ BIF0_CPM_CONTROL__TXCLK_REGS_GATE_ENABLE_MASK |
+ BIF0_CPM_CONTROL__TXCLK_PRBS_GATE_ENABLE_MASK |
+ BIF0_CPM_CONTROL__REFCLK_REGS_GATE_ENABLE_MASK);
+ } else {
+ data &= ~(BIF0_CPM_CONTROL__LCLK_DYN_GATE_ENABLE_MASK |
+ BIF0_CPM_CONTROL__TXCLK_DYN_GATE_ENABLE_MASK |
+ BIF0_CPM_CONTROL__TXCLK_LCNT_GATE_ENABLE_MASK |
+ BIF0_CPM_CONTROL__TXCLK_REGS_GATE_ENABLE_MASK |
+ BIF0_CPM_CONTROL__TXCLK_PRBS_GATE_ENABLE_MASK |
+ BIF0_CPM_CONTROL__REFCLK_REGS_GATE_ENABLE_MASK);
+ }
+
+ if (def != data)
+ WREG32_SOC15(NBIO, 0, regBIF0_CPM_CONTROL, data);
+}
+
+static void nbio_v7_7_update_medium_grain_light_sleep(struct amdgpu_device *adev,
+ bool enable)
+{
+ uint32_t def, data;
+
+ if (enable && !(adev->cg_flags & AMD_CG_SUPPORT_BIF_LS))
+ return;
+
+ def = data = RREG32_SOC15(NBIO, 0, regBIF0_PCIE_CNTL2);
+ if (enable)
+ data |= BIF0_PCIE_CNTL2__SLV_MEM_LS_EN_MASK;
+ else
+ data &= ~BIF0_PCIE_CNTL2__SLV_MEM_LS_EN_MASK;
+
+ if (def != data)
+ WREG32_SOC15(NBIO, 0, regBIF0_PCIE_CNTL2, data);
+
+ def = data = RREG32_SOC15(NBIO, 0, regBIF0_PCIE_TX_POWER_CTRL_1);
+ if (enable) {
+ data |= (BIF0_PCIE_TX_POWER_CTRL_1__MST_MEM_LS_EN_MASK |
+ BIF0_PCIE_TX_POWER_CTRL_1__REPLAY_MEM_LS_EN_MASK);
+ } else {
+ data &= ~(BIF0_PCIE_TX_POWER_CTRL_1__MST_MEM_LS_EN_MASK |
+ BIF0_PCIE_TX_POWER_CTRL_1__REPLAY_MEM_LS_EN_MASK);
+ }
+
+ if (def != data)
+ WREG32_SOC15(NBIO, 0, regBIF0_PCIE_TX_POWER_CTRL_1, data);
+}
+
+static void nbio_v7_7_get_clockgating_state(struct amdgpu_device *adev,
+ u64 *flags)
+{
+ uint32_t data;
+
+ /* AMD_CG_SUPPORT_BIF_MGCG */
+ data = RREG32_SOC15(NBIO, 0, regBIF0_CPM_CONTROL);
+ if (data & BIF0_CPM_CONTROL__LCLK_DYN_GATE_ENABLE_MASK)
+ *flags |= AMD_CG_SUPPORT_BIF_MGCG;
+
+ /* AMD_CG_SUPPORT_BIF_LS */
+ data = RREG32_SOC15(NBIO, 0, regBIF0_PCIE_CNTL2);
+ if (data & BIF0_PCIE_CNTL2__SLV_MEM_LS_EN_MASK)
+ *flags |= AMD_CG_SUPPORT_BIF_LS;
+}
+
const struct amdgpu_nbio_funcs nbio_v7_7_funcs = {
.get_hdp_flush_req_offset = nbio_v7_7_get_hdp_flush_req_offset,
.get_hdp_flush_done_offset = nbio_v7_7_get_hdp_flush_done_offset,
@@ -232,9 +333,13 @@ const struct amdgpu_nbio_funcs nbio_v7_7_funcs = {
.mc_access_enable = nbio_v7_7_mc_access_enable,
.get_memsize = nbio_v7_7_get_memsize,
.sdma_doorbell_range = nbio_v7_7_sdma_doorbell_range,
+ .vcn_doorbell_range = nbio_v7_7_vcn_doorbell_range,
.enable_doorbell_aperture = nbio_v7_7_enable_doorbell_aperture,
.enable_doorbell_selfring_aperture = nbio_v7_7_enable_doorbell_selfring_aperture,
.ih_doorbell_range = nbio_v7_7_ih_doorbell_range,
+ .update_medium_grain_clock_gating = nbio_v7_7_update_medium_grain_clock_gating,
+ .update_medium_grain_light_sleep = nbio_v7_7_update_medium_grain_light_sleep,
+ .get_clockgating_state = nbio_v7_7_get_clockgating_state,
.ih_control = nbio_v7_7_ih_control,
.init_registers = nbio_v7_7_init_registers,
};
diff --git a/drivers/gpu/drm/amd/amdgpu/psp_gfx_if.h b/drivers/gpu/drm/amd/amdgpu/psp_gfx_if.h
index 236b7a61443a..22c775f39119 100644
--- a/drivers/gpu/drm/amd/amdgpu/psp_gfx_if.h
+++ b/drivers/gpu/drm/amd/amdgpu/psp_gfx_if.h
@@ -259,6 +259,8 @@ enum psp_gfx_fw_type {
GFX_FW_TYPE_SDMA7 = 57, /* SDMA7 MI */
GFX_FW_TYPE_VCN1 = 58, /* VCN1 MI */
GFX_FW_TYPE_CAP = 62, /* CAP_FW */
+ GFX_FW_TYPE_SE2_TAP_DELAYS = 65, /* SE2 TAP DELAYS NV */
+ GFX_FW_TYPE_SE3_TAP_DELAYS = 66, /* SE3 TAP DELAYS NV */
GFX_FW_TYPE_REG_LIST = 67, /* REG_LIST MI */
GFX_FW_TYPE_IMU_I = 68, /* IMU Instruction FW SOC21 */
GFX_FW_TYPE_IMU_D = 69, /* IMU Data FW SOC21 */
diff --git a/drivers/gpu/drm/amd/amdgpu/psp_v12_0.c b/drivers/gpu/drm/amd/amdgpu/psp_v12_0.c
index a2588200ea58..0b2ac418e4ac 100644
--- a/drivers/gpu/drm/amd/amdgpu/psp_v12_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/psp_v12_0.c
@@ -101,6 +101,16 @@ static int psp_v12_0_init_microcode(struct psp_context *psp)
adev->psp.dtm_context.context.bin_desc.start_addr =
(uint8_t *)adev->psp.hdcp_context.context.bin_desc.start_addr +
le32_to_cpu(ta_hdr->dtm.offset_bytes);
+
+ if (adev->apu_flags & AMD_APU_IS_RENOIR) {
+ adev->psp.securedisplay_context.context.bin_desc.fw_version =
+ le32_to_cpu(ta_hdr->securedisplay.fw_version);
+ adev->psp.securedisplay_context.context.bin_desc.size_bytes =
+ le32_to_cpu(ta_hdr->securedisplay.size_bytes);
+ adev->psp.securedisplay_context.context.bin_desc.start_addr =
+ (uint8_t *)adev->psp.hdcp_context.context.bin_desc.start_addr +
+ le32_to_cpu(ta_hdr->securedisplay.offset_bytes);
+ }
}
return 0;
diff --git a/drivers/gpu/drm/amd/amdgpu/psp_v13_0.c b/drivers/gpu/drm/amd/amdgpu/psp_v13_0.c
index 9e1ef81933ff..a75a286e1ecf 100644
--- a/drivers/gpu/drm/amd/amdgpu/psp_v13_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/psp_v13_0.c
@@ -20,6 +20,8 @@
* OTHER DEALINGS IN THE SOFTWARE.
*
*/
+#include <drm/drm_drv.h>
+#include <linux/vmalloc.h>
#include "amdgpu.h"
#include "amdgpu_psp.h"
#include "amdgpu_ucode.h"
@@ -39,7 +41,9 @@ MODULE_FIRMWARE("amdgpu/psp_13_0_5_ta.bin");
MODULE_FIRMWARE("amdgpu/psp_13_0_8_toc.bin");
MODULE_FIRMWARE("amdgpu/psp_13_0_8_ta.bin");
MODULE_FIRMWARE("amdgpu/psp_13_0_0_sos.bin");
+MODULE_FIRMWARE("amdgpu/psp_13_0_0_ta.bin");
MODULE_FIRMWARE("amdgpu/psp_13_0_7_sos.bin");
+MODULE_FIRMWARE("amdgpu/psp_13_0_7_ta.bin");
/* For large FW files the time to complete can be very long */
#define USBC_PD_POLLING_LIMIT_S 240
@@ -56,6 +60,9 @@ MODULE_FIRMWARE("amdgpu/psp_13_0_7_sos.bin");
#define C2PMSG_CMD_SPI_UPDATE_ROM_IMAGE_ADDR_HI 0x3
#define C2PMSG_CMD_SPI_UPDATE_FLASH_IMAGE 0x4
+/* memory training timeout define */
+#define MEM_TRAIN_SEND_MSG_TIMEOUT_US 3000000
+
static int psp_v13_0_init_microcode(struct psp_context *psp)
{
struct amdgpu_device *adev = psp->adev;
@@ -105,6 +112,10 @@ static int psp_v13_0_init_microcode(struct psp_context *psp)
err = psp_init_sos_microcode(psp, chip_name);
if (err)
return err;
+ /* It's not necessary to load ras ta on Guest side */
+ err = psp_init_ta_microcode(psp, chip_name);
+ if (err)
+ return err;
break;
default:
BUG();
@@ -413,6 +424,159 @@ static void psp_v13_0_ring_set_wptr(struct psp_context *psp, uint32_t value)
WREG32_SOC15(MP0, 0, regMP0_SMN_C2PMSG_67, value);
}
+static int psp_v13_0_memory_training_send_msg(struct psp_context *psp, int msg)
+{
+ int ret;
+ int i;
+ uint32_t data_32;
+ int max_wait;
+ struct amdgpu_device *adev = psp->adev;
+
+ data_32 = (psp->mem_train_ctx.c2p_train_data_offset >> 20);
+ WREG32_SOC15(MP0, 0, regMP0_SMN_C2PMSG_36, data_32);
+ WREG32_SOC15(MP0, 0, regMP0_SMN_C2PMSG_35, msg);
+
+ max_wait = MEM_TRAIN_SEND_MSG_TIMEOUT_US / adev->usec_timeout;
+ for (i = 0; i < max_wait; i++) {
+ ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, regMP0_SMN_C2PMSG_35),
+ 0x80000000, 0x80000000, false);
+ if (ret == 0)
+ break;
+ }
+ if (i < max_wait)
+ ret = 0;
+ else
+ ret = -ETIME;
+
+ dev_dbg(adev->dev, "training %s %s, cost %d @ %d ms\n",
+ (msg == PSP_BL__DRAM_SHORT_TRAIN) ? "short" : "long",
+ (ret == 0) ? "succeed" : "failed",
+ i, adev->usec_timeout/1000);
+ return ret;
+}
+
+
+static int psp_v13_0_memory_training(struct psp_context *psp, uint32_t ops)
+{
+ struct psp_memory_training_context *ctx = &psp->mem_train_ctx;
+ uint32_t *pcache = (uint32_t *)ctx->sys_cache;
+ struct amdgpu_device *adev = psp->adev;
+ uint32_t p2c_header[4];
+ uint32_t sz;
+ void *buf;
+ int ret, idx;
+
+ if (ctx->init == PSP_MEM_TRAIN_NOT_SUPPORT) {
+ dev_dbg(adev->dev, "Memory training is not supported.\n");
+ return 0;
+ } else if (ctx->init != PSP_MEM_TRAIN_INIT_SUCCESS) {
+ dev_err(adev->dev, "Memory training initialization failure.\n");
+ return -EINVAL;
+ }
+
+ if (psp_v13_0_is_sos_alive(psp)) {
+ dev_dbg(adev->dev, "SOS is alive, skip memory training.\n");
+ return 0;
+ }
+
+ amdgpu_device_vram_access(adev, ctx->p2c_train_data_offset, p2c_header, sizeof(p2c_header), false);
+ dev_dbg(adev->dev, "sys_cache[%08x,%08x,%08x,%08x] p2c_header[%08x,%08x,%08x,%08x]\n",
+ pcache[0], pcache[1], pcache[2], pcache[3],
+ p2c_header[0], p2c_header[1], p2c_header[2], p2c_header[3]);
+
+ if (ops & PSP_MEM_TRAIN_SEND_SHORT_MSG) {
+ dev_dbg(adev->dev, "Short training depends on restore.\n");
+ ops |= PSP_MEM_TRAIN_RESTORE;
+ }
+
+ if ((ops & PSP_MEM_TRAIN_RESTORE) &&
+ pcache[0] != MEM_TRAIN_SYSTEM_SIGNATURE) {
+ dev_dbg(adev->dev, "sys_cache[0] is invalid, restore depends on save.\n");
+ ops |= PSP_MEM_TRAIN_SAVE;
+ }
+
+ if (p2c_header[0] == MEM_TRAIN_SYSTEM_SIGNATURE &&
+ !(pcache[0] == MEM_TRAIN_SYSTEM_SIGNATURE &&
+ pcache[3] == p2c_header[3])) {
+ dev_dbg(adev->dev, "sys_cache is invalid or out-of-date, need save training data to sys_cache.\n");
+ ops |= PSP_MEM_TRAIN_SAVE;
+ }
+
+ if ((ops & PSP_MEM_TRAIN_SAVE) &&
+ p2c_header[0] != MEM_TRAIN_SYSTEM_SIGNATURE) {
+ dev_dbg(adev->dev, "p2c_header[0] is invalid, save depends on long training.\n");
+ ops |= PSP_MEM_TRAIN_SEND_LONG_MSG;
+ }
+
+ if (ops & PSP_MEM_TRAIN_SEND_LONG_MSG) {
+ ops &= ~PSP_MEM_TRAIN_SEND_SHORT_MSG;
+ ops |= PSP_MEM_TRAIN_SAVE;
+ }
+
+ dev_dbg(adev->dev, "Memory training ops:%x.\n", ops);
+
+ if (ops & PSP_MEM_TRAIN_SEND_LONG_MSG) {
+ /*
+ * Long training will encroach a certain amount on the bottom of VRAM;
+ * save the content from the bottom of VRAM to system memory
+ * before training, and restore it after training to avoid
+ * VRAM corruption.
+ */
+ sz = GDDR6_MEM_TRAINING_ENCROACHED_SIZE;
+
+ if (adev->gmc.visible_vram_size < sz || !adev->mman.aper_base_kaddr) {
+ dev_err(adev->dev, "visible_vram_size %llx or aper_base_kaddr %p is not initialized.\n",
+ adev->gmc.visible_vram_size,
+ adev->mman.aper_base_kaddr);
+ return -EINVAL;
+ }
+
+ buf = vmalloc(sz);
+ if (!buf) {
+ dev_err(adev->dev, "failed to allocate system memory.\n");
+ return -ENOMEM;
+ }
+
+ if (drm_dev_enter(adev_to_drm(adev), &idx)) {
+ memcpy_fromio(buf, adev->mman.aper_base_kaddr, sz);
+ ret = psp_v13_0_memory_training_send_msg(psp, PSP_BL__DRAM_LONG_TRAIN);
+ if (ret) {
+ DRM_ERROR("Send long training msg failed.\n");
+ vfree(buf);
+ drm_dev_exit(idx);
+ return ret;
+ }
+
+ memcpy_toio(adev->mman.aper_base_kaddr, buf, sz);
+ adev->hdp.funcs->flush_hdp(adev, NULL);
+ vfree(buf);
+ drm_dev_exit(idx);
+ } else {
+ vfree(buf);
+ return -ENODEV;
+ }
+ }
+
+ if (ops & PSP_MEM_TRAIN_SAVE) {
+ amdgpu_device_vram_access(psp->adev, ctx->p2c_train_data_offset, ctx->sys_cache, ctx->train_data_size, false);
+ }
+
+ if (ops & PSP_MEM_TRAIN_RESTORE) {
+ amdgpu_device_vram_access(psp->adev, ctx->c2p_train_data_offset, ctx->sys_cache, ctx->train_data_size, true);
+ }
+
+ if (ops & PSP_MEM_TRAIN_SEND_SHORT_MSG) {
+ ret = psp_v13_0_memory_training_send_msg(psp, (amdgpu_force_long_training > 0) ?
+ PSP_BL__DRAM_LONG_TRAIN : PSP_BL__DRAM_SHORT_TRAIN);
+ if (ret) {
+ dev_err(adev->dev, "send training msg failed.\n");
+ return ret;
+ }
+ }
+ ctx->training_cnt++;
+ return 0;
+}
+
static int psp_v13_0_load_usbc_pd_fw(struct psp_context *psp, uint64_t fw_pri_mc_addr)
{
struct amdgpu_device *adev = psp->adev;
@@ -561,6 +725,7 @@ static const struct psp_funcs psp_v13_0_funcs = {
.ring_destroy = psp_v13_0_ring_destroy,
.ring_get_wptr = psp_v13_0_ring_get_wptr,
.ring_set_wptr = psp_v13_0_ring_set_wptr,
+ .mem_training = psp_v13_0_memory_training,
.load_usbc_pd_fw = psp_v13_0_load_usbc_pd_fw,
.read_usbc_pd_fw = psp_v13_0_read_usbc_pd_fw,
.update_spirom = psp_v13_0_update_spirom,
diff --git a/drivers/gpu/drm/amd/amdgpu/psp_v13_0_4.c b/drivers/gpu/drm/amd/amdgpu/psp_v13_0_4.c
new file mode 100644
index 000000000000..321089dfa7db
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/psp_v13_0_4.c
@@ -0,0 +1,387 @@
+/*
+ * Copyright 2020 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+#include "amdgpu.h"
+#include "amdgpu_psp.h"
+#include "amdgpu_ucode.h"
+#include "soc15_common.h"
+#include "psp_v13_0_4.h"
+
+#include "mp/mp_13_0_4_offset.h"
+#include "mp/mp_13_0_4_sh_mask.h"
+
+MODULE_FIRMWARE("amdgpu/psp_13_0_4_toc.bin");
+MODULE_FIRMWARE("amdgpu/psp_13_0_4_ta.bin");
+
+static int psp_v13_0_4_init_microcode(struct psp_context *psp)
+{
+ struct amdgpu_device *adev = psp->adev;
+ const char *chip_name;
+ char ucode_prefix[30];
+ int err = 0;
+
+ switch (adev->ip_versions[MP0_HWIP][0]) {
+ case IP_VERSION(13, 0, 4):
+ amdgpu_ucode_ip_version_decode(adev, MP0_HWIP, ucode_prefix, sizeof(ucode_prefix));
+ chip_name = ucode_prefix;
+ break;
+ default:
+ BUG();
+ }
+
+ switch (adev->ip_versions[MP0_HWIP][0]) {
+ case IP_VERSION(13, 0, 4):
+ err = psp_init_toc_microcode(psp, chip_name);
+ if (err)
+ return err;
+ err = psp_init_ta_microcode(psp, chip_name);
+ if (err)
+ return err;
+ break;
+ default:
+ BUG();
+ }
+
+ return 0;
+}
+
+static bool psp_v13_0_4_is_sos_alive(struct psp_context *psp)
+{
+ struct amdgpu_device *adev = psp->adev;
+ uint32_t sol_reg;
+
+ sol_reg = RREG32_SOC15(MP0, 0, regMP0_SMN_C2PMSG_81);
+
+ return sol_reg != 0x0;
+}
+
+static int psp_v13_0_4_wait_for_bootloader(struct psp_context *psp)
+{
+ struct amdgpu_device *adev = psp->adev;
+
+ int ret;
+ int retry_loop;
+
+ for (retry_loop = 0; retry_loop < 10; retry_loop++) {
+ /* Wait for bootloader to signify that is
+ ready having bit 31 of C2PMSG_35 set to 1 */
+ ret = psp_wait_for(psp,
+ SOC15_REG_OFFSET(MP0, 0, regMP0_SMN_C2PMSG_35),
+ 0x80000000,
+ 0x80000000,
+ false);
+
+ if (ret == 0)
+ return 0;
+ }
+
+ return ret;
+}
+
+static int psp_v13_0_4_bootloader_load_component(struct psp_context *psp,
+ struct psp_bin_desc *bin_desc,
+ enum psp_bootloader_cmd bl_cmd)
+{
+ int ret;
+ uint32_t psp_gfxdrv_command_reg = 0;
+ struct amdgpu_device *adev = psp->adev;
+
+ /* Check tOS sign of life register to confirm sys driver and sOS
+ * are already been loaded.
+ */
+ if (psp_v13_0_4_is_sos_alive(psp))
+ return 0;
+
+ ret = psp_v13_0_4_wait_for_bootloader(psp);
+ if (ret)
+ return ret;
+
+ memset(psp->fw_pri_buf, 0, PSP_1_MEG);
+
+ /* Copy PSP KDB binary to memory */
+ memcpy(psp->fw_pri_buf, bin_desc->start_addr, bin_desc->size_bytes);
+
+ /* Provide the PSP KDB to bootloader */
+ WREG32_SOC15(MP0, 0, regMP0_SMN_C2PMSG_36,
+ (uint32_t)(psp->fw_pri_mc_addr >> 20));
+ psp_gfxdrv_command_reg = bl_cmd;
+ WREG32_SOC15(MP0, 0, regMP0_SMN_C2PMSG_35,
+ psp_gfxdrv_command_reg);
+
+ ret = psp_v13_0_4_wait_for_bootloader(psp);
+
+ return ret;
+}
+
+static int psp_v13_0_4_bootloader_load_kdb(struct psp_context *psp)
+{
+ return psp_v13_0_4_bootloader_load_component(psp, &psp->kdb, PSP_BL__LOAD_KEY_DATABASE);
+}
+
+static int psp_v13_0_4_bootloader_load_spl(struct psp_context *psp)
+{
+ return psp_v13_0_4_bootloader_load_component(psp, &psp->kdb, PSP_BL__LOAD_TOS_SPL_TABLE);
+}
+
+static int psp_v13_0_4_bootloader_load_sysdrv(struct psp_context *psp)
+{
+ return psp_v13_0_4_bootloader_load_component(psp, &psp->sys, PSP_BL__LOAD_SYSDRV);
+}
+
+static int psp_v13_0_4_bootloader_load_soc_drv(struct psp_context *psp)
+{
+ return psp_v13_0_4_bootloader_load_component(psp, &psp->soc_drv, PSP_BL__LOAD_SOCDRV);
+}
+
+static int psp_v13_0_4_bootloader_load_intf_drv(struct psp_context *psp)
+{
+ return psp_v13_0_4_bootloader_load_component(psp, &psp->intf_drv, PSP_BL__LOAD_INTFDRV);
+}
+
+static int psp_v13_0_4_bootloader_load_dbg_drv(struct psp_context *psp)
+{
+ return psp_v13_0_4_bootloader_load_component(psp, &psp->dbg_drv, PSP_BL__LOAD_DBGDRV);
+}
+
+static int psp_v13_0_4_bootloader_load_sos(struct psp_context *psp)
+{
+ int ret;
+ unsigned int psp_gfxdrv_command_reg = 0;
+ struct amdgpu_device *adev = psp->adev;
+
+ /* Check sOS sign of life register to confirm sys driver and sOS
+ * are already been loaded.
+ */
+ if (psp_v13_0_4_is_sos_alive(psp))
+ return 0;
+
+ ret = psp_v13_0_4_wait_for_bootloader(psp);
+ if (ret)
+ return ret;
+
+ memset(psp->fw_pri_buf, 0, PSP_1_MEG);
+
+ /* Copy Secure OS binary to PSP memory */
+ memcpy(psp->fw_pri_buf, psp->sos.start_addr, psp->sos.size_bytes);
+
+ /* Provide the PSP secure OS to bootloader */
+ WREG32_SOC15(MP0, 0, regMP0_SMN_C2PMSG_36,
+ (uint32_t)(psp->fw_pri_mc_addr >> 20));
+ psp_gfxdrv_command_reg = PSP_BL__LOAD_SOSDRV;
+ WREG32_SOC15(MP0, 0, regMP0_SMN_C2PMSG_35,
+ psp_gfxdrv_command_reg);
+
+ /* there might be handshake issue with hardware which needs delay */
+ mdelay(20);
+ ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, regMP0_SMN_C2PMSG_81),
+ RREG32_SOC15(MP0, 0, regMP0_SMN_C2PMSG_81),
+ 0, true);
+
+ return ret;
+}
+
+static int psp_v13_0_4_ring_init(struct psp_context *psp,
+ enum psp_ring_type ring_type)
+{
+ int ret = 0;
+ struct psp_ring *ring;
+ struct amdgpu_device *adev = psp->adev;
+
+ ring = &psp->km_ring;
+
+ ring->ring_type = ring_type;
+
+ /* allocate 4k Page of Local Frame Buffer memory for ring */
+ ring->ring_size = 0x1000;
+ ret = amdgpu_bo_create_kernel(adev, ring->ring_size, PAGE_SIZE,
+ AMDGPU_GEM_DOMAIN_VRAM,
+ &adev->firmware.rbuf,
+ &ring->ring_mem_mc_addr,
+ (void **)&ring->ring_mem);
+ if (ret) {
+ ring->ring_size = 0;
+ return ret;
+ }
+
+ return 0;
+}
+
+static int psp_v13_0_4_ring_stop(struct psp_context *psp,
+ enum psp_ring_type ring_type)
+{
+ int ret = 0;
+ struct amdgpu_device *adev = psp->adev;
+
+ if (amdgpu_sriov_vf(adev)) {
+ /* Write the ring destroy command*/
+ WREG32_SOC15(MP0, 0, regMP0_SMN_C2PMSG_101,
+ GFX_CTRL_CMD_ID_DESTROY_GPCOM_RING);
+ /* there might be handshake issue with hardware which needs delay */
+ mdelay(20);
+ /* Wait for response flag (bit 31) */
+ ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, regMP0_SMN_C2PMSG_101),
+ 0x80000000, 0x80000000, false);
+ } else {
+ /* Write the ring destroy command*/
+ WREG32_SOC15(MP0, 0, regMP0_SMN_C2PMSG_64,
+ GFX_CTRL_CMD_ID_DESTROY_RINGS);
+ /* there might be handshake issue with hardware which needs delay */
+ mdelay(20);
+ /* Wait for response flag (bit 31) */
+ ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, regMP0_SMN_C2PMSG_64),
+ 0x80000000, 0x80000000, false);
+ }
+
+ return ret;
+}
+
+static int psp_v13_0_4_ring_create(struct psp_context *psp,
+ enum psp_ring_type ring_type)
+{
+ int ret = 0;
+ unsigned int psp_ring_reg = 0;
+ struct psp_ring *ring = &psp->km_ring;
+ struct amdgpu_device *adev = psp->adev;
+
+ if (amdgpu_sriov_vf(adev)) {
+ ret = psp_v13_0_4_ring_stop(psp, ring_type);
+ if (ret) {
+ DRM_ERROR("psp_v13_0_ring_stop_sriov failed!\n");
+ return ret;
+ }
+
+ /* Write low address of the ring to C2PMSG_102 */
+ psp_ring_reg = lower_32_bits(ring->ring_mem_mc_addr);
+ WREG32_SOC15(MP0, 0, regMP0_SMN_C2PMSG_102, psp_ring_reg);
+ /* Write high address of the ring to C2PMSG_103 */
+ psp_ring_reg = upper_32_bits(ring->ring_mem_mc_addr);
+ WREG32_SOC15(MP0, 0, regMP0_SMN_C2PMSG_103, psp_ring_reg);
+
+ /* Write the ring initialization command to C2PMSG_101 */
+ WREG32_SOC15(MP0, 0, regMP0_SMN_C2PMSG_101,
+ GFX_CTRL_CMD_ID_INIT_GPCOM_RING);
+
+ /* there might be handshake issue with hardware which needs delay */
+ mdelay(20);
+
+ /* Wait for response flag (bit 31) in C2PMSG_101 */
+ ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, regMP0_SMN_C2PMSG_101),
+ 0x80000000, 0x8000FFFF, false);
+
+ } else {
+ /* Wait for sOS ready for ring creation */
+ ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, regMP0_SMN_C2PMSG_64),
+ 0x80000000, 0x80000000, false);
+ if (ret) {
+ DRM_ERROR("Failed to wait for trust OS ready for ring creation\n");
+ return ret;
+ }
+
+ /* Write low address of the ring to C2PMSG_69 */
+ psp_ring_reg = lower_32_bits(ring->ring_mem_mc_addr);
+ WREG32_SOC15(MP0, 0, regMP0_SMN_C2PMSG_69, psp_ring_reg);
+ /* Write high address of the ring to C2PMSG_70 */
+ psp_ring_reg = upper_32_bits(ring->ring_mem_mc_addr);
+ WREG32_SOC15(MP0, 0, regMP0_SMN_C2PMSG_70, psp_ring_reg);
+ /* Write size of ring to C2PMSG_71 */
+ psp_ring_reg = ring->ring_size;
+ WREG32_SOC15(MP0, 0, regMP0_SMN_C2PMSG_71, psp_ring_reg);
+ /* Write the ring initialization command to C2PMSG_64 */
+ psp_ring_reg = ring_type;
+ psp_ring_reg = psp_ring_reg << 16;
+ WREG32_SOC15(MP0, 0, regMP0_SMN_C2PMSG_64, psp_ring_reg);
+
+ /* there might be handshake issue with hardware which needs delay */
+ mdelay(20);
+
+ /* Wait for response flag (bit 31) in C2PMSG_64 */
+ ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, regMP0_SMN_C2PMSG_64),
+ 0x80000000, 0x8000FFFF, false);
+ }
+
+ return ret;
+}
+
+static int psp_v13_0_4_ring_destroy(struct psp_context *psp,
+ enum psp_ring_type ring_type)
+{
+ int ret = 0;
+ struct psp_ring *ring = &psp->km_ring;
+ struct amdgpu_device *adev = psp->adev;
+
+ ret = psp_v13_0_4_ring_stop(psp, ring_type);
+ if (ret)
+ DRM_ERROR("Fail to stop psp ring\n");
+
+ amdgpu_bo_free_kernel(&adev->firmware.rbuf,
+ &ring->ring_mem_mc_addr,
+ (void **)&ring->ring_mem);
+
+ return ret;
+}
+
+static uint32_t psp_v13_0_4_ring_get_wptr(struct psp_context *psp)
+{
+ uint32_t data;
+ struct amdgpu_device *adev = psp->adev;
+
+ if (amdgpu_sriov_vf(adev))
+ data = RREG32_SOC15(MP0, 0, regMP0_SMN_C2PMSG_102);
+ else
+ data = RREG32_SOC15(MP0, 0, regMP0_SMN_C2PMSG_67);
+
+ return data;
+}
+
+static void psp_v13_0_4_ring_set_wptr(struct psp_context *psp, uint32_t value)
+{
+ struct amdgpu_device *adev = psp->adev;
+
+ if (amdgpu_sriov_vf(adev)) {
+ WREG32_SOC15(MP0, 0, regMP0_SMN_C2PMSG_102, value);
+ WREG32_SOC15(MP0, 0, regMP0_SMN_C2PMSG_101,
+ GFX_CTRL_CMD_ID_CONSUME_CMD);
+ } else
+ WREG32_SOC15(MP0, 0, regMP0_SMN_C2PMSG_67, value);
+}
+
+static const struct psp_funcs psp_v13_0_4_funcs = {
+ .init_microcode = psp_v13_0_4_init_microcode,
+ .bootloader_load_kdb = psp_v13_0_4_bootloader_load_kdb,
+ .bootloader_load_spl = psp_v13_0_4_bootloader_load_spl,
+ .bootloader_load_sysdrv = psp_v13_0_4_bootloader_load_sysdrv,
+ .bootloader_load_soc_drv = psp_v13_0_4_bootloader_load_soc_drv,
+ .bootloader_load_intf_drv = psp_v13_0_4_bootloader_load_intf_drv,
+ .bootloader_load_dbg_drv = psp_v13_0_4_bootloader_load_dbg_drv,
+ .bootloader_load_sos = psp_v13_0_4_bootloader_load_sos,
+ .ring_init = psp_v13_0_4_ring_init,
+ .ring_create = psp_v13_0_4_ring_create,
+ .ring_stop = psp_v13_0_4_ring_stop,
+ .ring_destroy = psp_v13_0_4_ring_destroy,
+ .ring_get_wptr = psp_v13_0_4_ring_get_wptr,
+ .ring_set_wptr = psp_v13_0_4_ring_set_wptr,
+};
+
+void psp_v13_0_4_set_psp_funcs(struct psp_context *psp)
+{
+ psp->funcs = &psp_v13_0_4_funcs;
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/psp_v13_0_4.h b/drivers/gpu/drm/amd/amdgpu/psp_v13_0_4.h
new file mode 100644
index 000000000000..8547b8d514d5
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/psp_v13_0_4.h
@@ -0,0 +1,30 @@
+/*
+ * Copyright 2020 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+#ifndef __PSP_V13_0_4_H__
+#define __PSP_V13_0_4_H__
+
+#include "amdgpu_psp.h"
+
+void psp_v13_0_4_set_psp_funcs(struct psp_context *psp);
+
+#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c
index 1f9021f896a1..a019ac92edb7 100644
--- a/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c
@@ -389,34 +389,67 @@ static uint64_t sdma_v5_0_ring_get_wptr(struct amdgpu_ring *ring)
static void sdma_v5_0_ring_set_wptr(struct amdgpu_ring *ring)
{
struct amdgpu_device *adev = ring->adev;
+ uint32_t *wptr_saved;
+ uint32_t *is_queue_unmap;
+ uint64_t aggregated_db_index;
+ uint32_t mqd_size = adev->mqds[AMDGPU_HW_IP_DMA].mqd_size;
DRM_DEBUG("Setting write pointer\n");
- if (ring->use_doorbell) {
- DRM_DEBUG("Using doorbell -- "
- "wptr_offs == 0x%08x "
- "lower_32_bits(ring->wptr << 2) == 0x%08x "
- "upper_32_bits(ring->wptr << 2) == 0x%08x\n",
- ring->wptr_offs,
- lower_32_bits(ring->wptr << 2),
- upper_32_bits(ring->wptr << 2));
- /* XXX check if swapping is necessary on BE */
+ if (ring->is_mes_queue) {
+ wptr_saved = (uint32_t *)(ring->mqd_ptr + mqd_size);
+ is_queue_unmap = (uint32_t *)(ring->mqd_ptr + mqd_size +
+ sizeof(uint32_t));
+ aggregated_db_index =
+ amdgpu_mes_get_aggregated_doorbell_index(adev,
+ AMDGPU_MES_PRIORITY_LEVEL_NORMAL);
+
atomic64_set((atomic64_t *)ring->wptr_cpu_addr,
ring->wptr << 2);
- DRM_DEBUG("calling WDOORBELL64(0x%08x, 0x%016llx)\n",
- ring->doorbell_index, ring->wptr << 2);
- WDOORBELL64(ring->doorbell_index, ring->wptr << 2);
+ *wptr_saved = ring->wptr << 2;
+ if (*is_queue_unmap) {
+ WDOORBELL64(aggregated_db_index, ring->wptr << 2);
+ DRM_DEBUG("calling WDOORBELL64(0x%08x, 0x%016llx)\n",
+ ring->doorbell_index, ring->wptr << 2);
+ WDOORBELL64(ring->doorbell_index, ring->wptr << 2);
+ } else {
+ DRM_DEBUG("calling WDOORBELL64(0x%08x, 0x%016llx)\n",
+ ring->doorbell_index, ring->wptr << 2);
+ WDOORBELL64(ring->doorbell_index, ring->wptr << 2);
+
+ if (*is_queue_unmap)
+ WDOORBELL64(aggregated_db_index,
+ ring->wptr << 2);
+ }
} else {
- DRM_DEBUG("Not using doorbell -- "
- "mmSDMA%i_GFX_RB_WPTR == 0x%08x "
- "mmSDMA%i_GFX_RB_WPTR_HI == 0x%08x\n",
- ring->me,
- lower_32_bits(ring->wptr << 2),
- ring->me,
- upper_32_bits(ring->wptr << 2));
- WREG32_SOC15_IP(GC, sdma_v5_0_get_reg_offset(adev, ring->me, mmSDMA0_GFX_RB_WPTR),
- lower_32_bits(ring->wptr << 2));
- WREG32_SOC15_IP(GC, sdma_v5_0_get_reg_offset(adev, ring->me, mmSDMA0_GFX_RB_WPTR_HI),
- upper_32_bits(ring->wptr << 2));
+ if (ring->use_doorbell) {
+ DRM_DEBUG("Using doorbell -- "
+ "wptr_offs == 0x%08x "
+ "lower_32_bits(ring->wptr) << 2 == 0x%08x "
+ "upper_32_bits(ring->wptr) << 2 == 0x%08x\n",
+ ring->wptr_offs,
+ lower_32_bits(ring->wptr << 2),
+ upper_32_bits(ring->wptr << 2));
+ /* XXX check if swapping is necessary on BE */
+ atomic64_set((atomic64_t *)ring->wptr_cpu_addr,
+ ring->wptr << 2);
+ DRM_DEBUG("calling WDOORBELL64(0x%08x, 0x%016llx)\n",
+ ring->doorbell_index, ring->wptr << 2);
+ WDOORBELL64(ring->doorbell_index, ring->wptr << 2);
+ } else {
+ DRM_DEBUG("Not using doorbell -- "
+ "mmSDMA%i_GFX_RB_WPTR == 0x%08x "
+ "mmSDMA%i_GFX_RB_WPTR_HI == 0x%08x\n",
+ ring->me,
+ lower_32_bits(ring->wptr << 2),
+ ring->me,
+ upper_32_bits(ring->wptr << 2));
+ WREG32_SOC15_IP(GC, sdma_v5_0_get_reg_offset(adev,
+ ring->me, mmSDMA0_GFX_RB_WPTR),
+ lower_32_bits(ring->wptr << 2));
+ WREG32_SOC15_IP(GC, sdma_v5_0_get_reg_offset(adev,
+ ring->me, mmSDMA0_GFX_RB_WPTR_HI),
+ upper_32_bits(ring->wptr << 2));
+ }
}
}
diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c
index 8cfaed55b192..0200cb3a31a4 100644
--- a/drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c
@@ -57,6 +57,7 @@ static void sdma_v6_0_set_ring_funcs(struct amdgpu_device *adev);
static void sdma_v6_0_set_buffer_funcs(struct amdgpu_device *adev);
static void sdma_v6_0_set_vm_pte_funcs(struct amdgpu_device *adev);
static void sdma_v6_0_set_irq_funcs(struct amdgpu_device *adev);
+static int sdma_v6_0_start(struct amdgpu_device *adev);
static u32 sdma_v6_0_get_reg_offset(struct amdgpu_device *adev, u32 instance, u32 internal_offset)
{
@@ -245,34 +246,68 @@ static uint64_t sdma_v6_0_ring_get_wptr(struct amdgpu_ring *ring)
static void sdma_v6_0_ring_set_wptr(struct amdgpu_ring *ring)
{
struct amdgpu_device *adev = ring->adev;
+ uint32_t *wptr_saved;
+ uint32_t *is_queue_unmap;
+ uint64_t aggregated_db_index;
+ uint32_t mqd_size = adev->mqds[AMDGPU_HW_IP_DMA].mqd_size;
DRM_DEBUG("Setting write pointer\n");
- if (ring->use_doorbell) {
- DRM_DEBUG("Using doorbell -- "
- "wptr_offs == 0x%08x "
- "lower_32_bits(ring->wptr) << 2 == 0x%08x "
- "upper_32_bits(ring->wptr) << 2 == 0x%08x\n",
- ring->wptr_offs,
- lower_32_bits(ring->wptr << 2),
- upper_32_bits(ring->wptr << 2));
- /* XXX check if swapping is necessary on BE */
+
+ if (ring->is_mes_queue) {
+ wptr_saved = (uint32_t *)(ring->mqd_ptr + mqd_size);
+ is_queue_unmap = (uint32_t *)(ring->mqd_ptr + mqd_size +
+ sizeof(uint32_t));
+ aggregated_db_index =
+ amdgpu_mes_get_aggregated_doorbell_index(adev,
+ ring->hw_prio);
+
atomic64_set((atomic64_t *)ring->wptr_cpu_addr,
ring->wptr << 2);
- DRM_DEBUG("calling WDOORBELL64(0x%08x, 0x%016llx)\n",
- ring->doorbell_index, ring->wptr << 2);
- WDOORBELL64(ring->doorbell_index, ring->wptr << 2);
+ *wptr_saved = ring->wptr << 2;
+ if (*is_queue_unmap) {
+ WDOORBELL64(aggregated_db_index, ring->wptr << 2);
+ DRM_DEBUG("calling WDOORBELL64(0x%08x, 0x%016llx)\n",
+ ring->doorbell_index, ring->wptr << 2);
+ WDOORBELL64(ring->doorbell_index, ring->wptr << 2);
+ } else {
+ DRM_DEBUG("calling WDOORBELL64(0x%08x, 0x%016llx)\n",
+ ring->doorbell_index, ring->wptr << 2);
+ WDOORBELL64(ring->doorbell_index, ring->wptr << 2);
+
+ if (*is_queue_unmap)
+ WDOORBELL64(aggregated_db_index,
+ ring->wptr << 2);
+ }
} else {
- DRM_DEBUG("Not using doorbell -- "
- "regSDMA%i_GFX_RB_WPTR == 0x%08x "
- "regSDMA%i_GFX_RB_WPTR_HI == 0x%08x\n",
- ring->me,
- lower_32_bits(ring->wptr << 2),
- ring->me,
- upper_32_bits(ring->wptr << 2));
- WREG32_SOC15_IP(GC, sdma_v6_0_get_reg_offset(adev, ring->me, regSDMA0_QUEUE0_RB_WPTR),
- lower_32_bits(ring->wptr << 2));
- WREG32_SOC15_IP(GC, sdma_v6_0_get_reg_offset(adev, ring->me, regSDMA0_QUEUE0_RB_WPTR_HI),
- upper_32_bits(ring->wptr << 2));
+ if (ring->use_doorbell) {
+ DRM_DEBUG("Using doorbell -- "
+ "wptr_offs == 0x%08x "
+ "lower_32_bits(ring->wptr) << 2 == 0x%08x "
+ "upper_32_bits(ring->wptr) << 2 == 0x%08x\n",
+ ring->wptr_offs,
+ lower_32_bits(ring->wptr << 2),
+ upper_32_bits(ring->wptr << 2));
+ /* XXX check if swapping is necessary on BE */
+ atomic64_set((atomic64_t *)ring->wptr_cpu_addr,
+ ring->wptr << 2);
+ DRM_DEBUG("calling WDOORBELL64(0x%08x, 0x%016llx)\n",
+ ring->doorbell_index, ring->wptr << 2);
+ WDOORBELL64(ring->doorbell_index, ring->wptr << 2);
+ } else {
+ DRM_DEBUG("Not using doorbell -- "
+ "regSDMA%i_GFX_RB_WPTR == 0x%08x "
+ "regSDMA%i_GFX_RB_WPTR_HI == 0x%08x\n",
+ ring->me,
+ lower_32_bits(ring->wptr << 2),
+ ring->me,
+ upper_32_bits(ring->wptr << 2));
+ WREG32_SOC15_IP(GC, sdma_v6_0_get_reg_offset(adev,
+ ring->me, regSDMA0_QUEUE0_RB_WPTR),
+ lower_32_bits(ring->wptr << 2));
+ WREG32_SOC15_IP(GC, sdma_v6_0_get_reg_offset(adev,
+ ring->me, regSDMA0_QUEUE0_RB_WPTR_HI),
+ upper_32_bits(ring->wptr << 2));
+ }
}
}
@@ -771,32 +806,54 @@ static int sdma_v6_0_load_microcode(struct amdgpu_device *adev)
static int sdma_v6_0_soft_reset(void *handle)
{
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
- u32 grbm_soft_reset;
u32 tmp;
int i;
+ sdma_v6_0_gfx_stop(adev);
+
for (i = 0; i < adev->sdma.num_instances; i++) {
- grbm_soft_reset = REG_SET_FIELD(0,
- GRBM_SOFT_RESET, SOFT_RESET_SDMA0,
- 1);
- grbm_soft_reset <<= i;
+ tmp = RREG32_SOC15_IP(GC, sdma_v6_0_get_reg_offset(adev, i, regSDMA0_FREEZE));
+ tmp |= SDMA0_FREEZE__FREEZE_MASK;
+ WREG32_SOC15_IP(GC, sdma_v6_0_get_reg_offset(adev, i, regSDMA0_FREEZE), tmp);
+ tmp = RREG32_SOC15_IP(GC, sdma_v6_0_get_reg_offset(adev, i, regSDMA0_F32_CNTL));
+ tmp |= SDMA0_F32_CNTL__HALT_MASK;
+ tmp |= SDMA0_F32_CNTL__TH1_RESET_MASK;
+ WREG32_SOC15_IP(GC, sdma_v6_0_get_reg_offset(adev, i, regSDMA0_F32_CNTL), tmp);
- tmp = RREG32_SOC15(GC, 0, regGRBM_SOFT_RESET);
- tmp |= grbm_soft_reset;
- DRM_DEBUG("GRBM_SOFT_RESET=0x%08X\n", tmp);
+ WREG32_SOC15_IP(GC, sdma_v6_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_PREEMPT), 0);
+
+ udelay(100);
+
+ tmp = GRBM_SOFT_RESET__SOFT_RESET_SDMA0_MASK << i;
WREG32_SOC15(GC, 0, regGRBM_SOFT_RESET, tmp);
tmp = RREG32_SOC15(GC, 0, regGRBM_SOFT_RESET);
- udelay(50);
+ udelay(100);
- tmp &= ~grbm_soft_reset;
- WREG32_SOC15(GC, 0, regGRBM_SOFT_RESET, tmp);
+ WREG32_SOC15(GC, 0, regGRBM_SOFT_RESET, 0);
tmp = RREG32_SOC15(GC, 0, regGRBM_SOFT_RESET);
- udelay(50);
+ udelay(100);
}
- return 0;
+ return sdma_v6_0_start(adev);
+}
+
+static bool sdma_v6_0_check_soft_reset(void *handle)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_ring *ring;
+ int i, r;
+ long tmo = msecs_to_jiffies(1000);
+
+ for (i = 0; i < adev->sdma.num_instances; i++) {
+ ring = &adev->sdma.instance[i].ring;
+ r = amdgpu_ring_test_ib(ring, tmo);
+ if (r)
+ return true;
+ }
+
+ return false;
}
/**
@@ -830,7 +887,6 @@ static int sdma_v6_0_start(struct amdgpu_device *adev)
msleep(1000);
}
- sdma_v6_0_soft_reset(adev);
/* unhalt the MEs */
sdma_v6_0_enable(adev, true);
/* enable sdma ring preemption */
@@ -1526,6 +1582,7 @@ const struct amd_ip_funcs sdma_v6_0_ip_funcs = {
.is_idle = sdma_v6_0_is_idle,
.wait_for_idle = sdma_v6_0_wait_for_idle,
.soft_reset = sdma_v6_0_soft_reset,
+ .check_soft_reset = sdma_v6_0_check_soft_reset,
.set_clockgating_state = sdma_v6_0_set_clockgating_state,
.set_powergating_state = sdma_v6_0_set_powergating_state,
.get_clockgating_state = sdma_v6_0_get_clockgating_state,
diff --git a/drivers/gpu/drm/amd/amdgpu/soc21.c b/drivers/gpu/drm/amd/amdgpu/soc21.c
index 9e18a2b22607..55284b24f113 100644
--- a/drivers/gpu/drm/amd/amdgpu/soc21.c
+++ b/drivers/gpu/drm/amd/amdgpu/soc21.c
@@ -80,6 +80,7 @@ static int soc21_query_video_codecs(struct amdgpu_device *adev, bool encode,
switch (adev->ip_versions[UVD_HWIP][0]) {
case IP_VERSION(4, 0, 0):
+ case IP_VERSION(4, 0, 2):
if (encode)
*codecs = &vcn_4_0_0_video_codecs_encode;
else
@@ -310,6 +311,7 @@ static enum amd_reset_method
soc21_asic_reset_method(struct amdgpu_device *adev)
{
if (amdgpu_reset_method == AMD_RESET_METHOD_MODE1 ||
+ amdgpu_reset_method == AMD_RESET_METHOD_MODE2 ||
amdgpu_reset_method == AMD_RESET_METHOD_BACO)
return amdgpu_reset_method;
@@ -319,7 +321,10 @@ soc21_asic_reset_method(struct amdgpu_device *adev)
switch (adev->ip_versions[MP1_HWIP][0]) {
case IP_VERSION(13, 0, 0):
+ case IP_VERSION(13, 0, 7):
return AMD_RESET_METHOD_MODE1;
+ case IP_VERSION(13, 0, 4):
+ return AMD_RESET_METHOD_MODE2;
default:
if (amdgpu_dpm_is_baco_supported(adev))
return AMD_RESET_METHOD_BACO;
@@ -341,6 +346,10 @@ static int soc21_asic_reset(struct amdgpu_device *adev)
dev_info(adev->dev, "BACO reset\n");
ret = amdgpu_dpm_baco_reset(adev);
break;
+ case AMD_RESET_METHOD_MODE2:
+ dev_info(adev->dev, "MODE2 reset\n");
+ ret = amdgpu_dpm_mode2_reset(adev);
+ break;
default:
dev_info(adev->dev, "MODE1 reset\n");
ret = amdgpu_device_mode1_reset(adev);
@@ -379,11 +388,12 @@ static void soc21_pcie_gen3_enable(struct amdgpu_device *adev)
static void soc21_program_aspm(struct amdgpu_device *adev)
{
-
- if (amdgpu_aspm == 0)
+ if (!amdgpu_device_should_use_aspm(adev))
return;
- /* todo */
+ if (!(adev->flags & AMD_IS_APU) &&
+ (adev->nbio.funcs->program_aspm))
+ adev->nbio.funcs->program_aspm(adev);
}
static void soc21_enable_doorbell_aperture(struct amdgpu_device *adev,
@@ -409,7 +419,13 @@ static uint32_t soc21_get_rev_id(struct amdgpu_device *adev)
static bool soc21_need_full_reset(struct amdgpu_device *adev)
{
- return true;
+ switch (adev->ip_versions[GC_HWIP][0]) {
+ case IP_VERSION(11, 0, 0):
+ case IP_VERSION(11, 0, 2):
+ return false;
+ default:
+ return true;
+ }
}
static bool soc21_need_reset_on_init(struct amdgpu_device *adev)
@@ -478,6 +494,20 @@ static void soc21_pre_asic_init(struct amdgpu_device *adev)
{
}
+static int soc21_update_umd_stable_pstate(struct amdgpu_device *adev,
+ bool enter)
+{
+ if (enter)
+ amdgpu_gfx_rlc_enter_safe_mode(adev);
+ else
+ amdgpu_gfx_rlc_exit_safe_mode(adev);
+
+ if (adev->gfx.funcs->update_perfmon_mgcg)
+ adev->gfx.funcs->update_perfmon_mgcg(adev, !enter);
+
+ return 0;
+}
+
static const struct amdgpu_asic_funcs soc21_asic_funcs =
{
.read_disabled_bios = &soc21_read_disabled_bios,
@@ -497,6 +527,7 @@ static const struct amdgpu_asic_funcs soc21_asic_funcs =
.supports_baco = &amdgpu_dpm_is_baco_supported,
.pre_asic_init = &soc21_pre_asic_init,
.query_video_codecs = &soc21_query_video_codecs,
+ .update_umd_stable_pstate = &soc21_update_umd_stable_pstate,
};
static int soc21_common_early_init(void *handle)
@@ -530,8 +561,10 @@ static int soc21_common_early_init(void *handle)
case IP_VERSION(11, 0, 0):
adev->cg_flags = AMD_CG_SUPPORT_GFX_CGCG |
AMD_CG_SUPPORT_GFX_CGLS |
+#if 0
AMD_CG_SUPPORT_GFX_3D_CGCG |
AMD_CG_SUPPORT_GFX_3D_CGLS |
+#endif
AMD_CG_SUPPORT_GFX_MGCG |
AMD_CG_SUPPORT_REPEATER_FGCG |
AMD_CG_SUPPORT_GFX_FGCG |
@@ -555,8 +588,13 @@ static int soc21_common_early_init(void *handle)
adev->cg_flags =
AMD_CG_SUPPORT_GFX_CGCG |
AMD_CG_SUPPORT_GFX_CGLS |
+ AMD_CG_SUPPORT_REPEATER_FGCG |
AMD_CG_SUPPORT_VCN_MGCG |
- AMD_CG_SUPPORT_JPEG_MGCG;
+ AMD_CG_SUPPORT_JPEG_MGCG |
+ AMD_CG_SUPPORT_ATHUB_MGCG |
+ AMD_CG_SUPPORT_ATHUB_LS |
+ AMD_CG_SUPPORT_IH_CG |
+ AMD_CG_SUPPORT_HDP_SD;
adev->pg_flags =
AMD_PG_SUPPORT_VCN |
AMD_PG_SUPPORT_VCN_DPG |
@@ -566,8 +604,27 @@ static int soc21_common_early_init(void *handle)
adev->external_rev_id = adev->rev_id + 0x10;
break;
case IP_VERSION(11, 0, 1):
- adev->cg_flags = 0;
- adev->pg_flags = 0;
+ adev->cg_flags =
+ AMD_CG_SUPPORT_GFX_CGCG |
+ AMD_CG_SUPPORT_GFX_CGLS |
+ AMD_CG_SUPPORT_GFX_MGCG |
+ AMD_CG_SUPPORT_GFX_FGCG |
+ AMD_CG_SUPPORT_REPEATER_FGCG |
+ AMD_CG_SUPPORT_GFX_PERF_CLK |
+ AMD_CG_SUPPORT_MC_MGCG |
+ AMD_CG_SUPPORT_MC_LS |
+ AMD_CG_SUPPORT_HDP_MGCG |
+ AMD_CG_SUPPORT_HDP_LS |
+ AMD_CG_SUPPORT_ATHUB_MGCG |
+ AMD_CG_SUPPORT_ATHUB_LS |
+ AMD_CG_SUPPORT_IH_CG |
+ AMD_CG_SUPPORT_BIF_MGCG |
+ AMD_CG_SUPPORT_BIF_LS |
+ AMD_CG_SUPPORT_VCN_MGCG |
+ AMD_CG_SUPPORT_JPEG_MGCG;
+ adev->pg_flags =
+ AMD_PG_SUPPORT_GFX_PG |
+ AMD_PG_SUPPORT_JPEG;
adev->external_rev_id = adev->rev_id + 0x1;
break;
default:
@@ -661,6 +718,8 @@ static int soc21_common_set_clockgating_state(void *handle,
switch (adev->ip_versions[NBIO_HWIP][0]) {
case IP_VERSION(4, 3, 0):
+ case IP_VERSION(4, 3, 1):
+ case IP_VERSION(7, 7, 0):
adev->nbio.funcs->update_medium_grain_clock_gating(adev,
state == AMD_CG_STATE_GATE);
adev->nbio.funcs->update_medium_grain_light_sleep(adev,
diff --git a/drivers/gpu/drm/amd/amdgpu/umc_v6_7.c b/drivers/gpu/drm/amd/amdgpu/umc_v6_7.c
index 606892dbea1c..bf7524f16b66 100644
--- a/drivers/gpu/drm/amd/amdgpu/umc_v6_7.c
+++ b/drivers/gpu/drm/amd/amdgpu/umc_v6_7.c
@@ -119,6 +119,24 @@ static void umc_v6_7_ecc_info_query_correctable_error_count(struct amdgpu_device
*error_count += 1;
umc_v6_7_query_error_status_helper(adev, mc_umc_status, umc_reg_offset);
+
+ if (ras->umc_ecc.record_ce_addr_supported) {
+ uint64_t err_addr, soc_pa;
+ uint32_t channel_index =
+ adev->umc.channel_idx_tbl[umc_inst * adev->umc.channel_inst_num + ch_inst];
+
+ err_addr = ras->umc_ecc.ecc[eccinfo_table_idx].mca_ceumc_addr;
+ err_addr = REG_GET_FIELD(err_addr, MCA_UMC_UMC0_MCUMC_ADDRT0, ErrorAddr);
+ /* translate umc channel address to soc pa, 3 parts are included */
+ soc_pa = ADDR_OF_8KB_BLOCK(err_addr) |
+ ADDR_OF_256B_BLOCK(channel_index) |
+ OFFSET_IN_256B_BLOCK(err_addr);
+
+ /* The umc channel bits are not original values, they are hashed */
+ SET_CHANNEL_HASH(channel_index, soc_pa);
+
+ dev_info(adev->dev, "Error Address(PA): 0x%llx\n", soc_pa);
+ }
}
}
@@ -251,7 +269,9 @@ static void umc_v6_7_ecc_info_query_ras_error_address(struct amdgpu_device *adev
static void umc_v6_7_query_correctable_error_count(struct amdgpu_device *adev,
uint32_t umc_reg_offset,
- unsigned long *error_count)
+ unsigned long *error_count,
+ uint32_t ch_inst,
+ uint32_t umc_inst)
{
uint32_t ecc_err_cnt_sel, ecc_err_cnt_sel_addr;
uint32_t ecc_err_cnt, ecc_err_cnt_addr;
@@ -295,6 +315,31 @@ static void umc_v6_7_query_correctable_error_count(struct amdgpu_device *adev,
*error_count += 1;
umc_v6_7_query_error_status_helper(adev, mc_umc_status, umc_reg_offset);
+
+ {
+ uint64_t err_addr, soc_pa;
+ uint32_t mc_umc_addrt0;
+ uint32_t channel_index;
+
+ mc_umc_addrt0 =
+ SOC15_REG_OFFSET(UMC, 0, regMCA_UMC_UMC0_MCUMC_ADDRT0);
+
+ channel_index =
+ adev->umc.channel_idx_tbl[umc_inst * adev->umc.channel_inst_num + ch_inst];
+
+ err_addr = RREG64_PCIE((mc_umc_addrt0 + umc_reg_offset) * 4);
+ err_addr = REG_GET_FIELD(err_addr, MCA_UMC_UMC0_MCUMC_ADDRT0, ErrorAddr);
+
+ /* translate umc channel address to soc pa, 3 parts are included */
+ soc_pa = ADDR_OF_8KB_BLOCK(err_addr) |
+ ADDR_OF_256B_BLOCK(channel_index) |
+ OFFSET_IN_256B_BLOCK(err_addr);
+
+ /* The umc channel bits are not original values, they are hashed */
+ SET_CHANNEL_HASH(channel_index, soc_pa);
+
+ dev_info(adev->dev, "Error Address(PA): 0x%llx\n", soc_pa);
+ }
}
}
@@ -395,7 +440,8 @@ static void umc_v6_7_query_ras_error_count(struct amdgpu_device *adev,
ch_inst);
umc_v6_7_query_correctable_error_count(adev,
umc_reg_offset,
- &(err_data->ce_count));
+ &(err_data->ce_count),
+ ch_inst, umc_inst);
umc_v6_7_querry_uncorrectable_error_count(adev,
umc_reg_offset,
&(err_data->ue_count));
diff --git a/drivers/gpu/drm/amd/amdgpu/umc_v8_10.c b/drivers/gpu/drm/amd/amdgpu/umc_v8_10.c
new file mode 100644
index 000000000000..36a2053f2e8b
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/umc_v8_10.c
@@ -0,0 +1,357 @@
+/*
+ * Copyright 2022 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+#include "umc_v8_10.h"
+#include "amdgpu_ras.h"
+#include "amdgpu_umc.h"
+#include "amdgpu.h"
+#include "umc/umc_8_10_0_offset.h"
+#include "umc/umc_8_10_0_sh_mask.h"
+
+#define UMC_8_NODE_DIST 0x800000
+#define UMC_8_INST_DIST 0x4000
+
+struct channelnum_map_colbit {
+ uint32_t channel_num;
+ uint32_t col_bit;
+};
+
+const struct channelnum_map_colbit umc_v8_10_channelnum_map_colbit_table[] = {
+ {24, 13},
+ {20, 13},
+ {16, 12},
+ {14, 12},
+ {12, 12},
+ {10, 12},
+ {6, 11},
+};
+
+const uint32_t
+ umc_v8_10_channel_idx_tbl[]
+ [UMC_V8_10_UMC_INSTANCE_NUM]
+ [UMC_V8_10_CHANNEL_INSTANCE_NUM] = {
+ {{16, 18}, {17, 19}},
+ {{15, 11}, {3, 7}},
+ {{1, 5}, {13, 9}},
+ {{23, 21}, {22, 20}},
+ {{0, 4}, {12, 8}},
+ {{14, 10}, {2, 6}}
+ };
+
+static inline uint32_t get_umc_v8_10_reg_offset(struct amdgpu_device *adev,
+ uint32_t node_inst,
+ uint32_t umc_inst,
+ uint32_t ch_inst)
+{
+ return adev->umc.channel_offs * ch_inst + UMC_8_INST_DIST * umc_inst +
+ UMC_8_NODE_DIST * node_inst;
+}
+
+static void umc_v8_10_clear_error_count_per_channel(struct amdgpu_device *adev,
+ uint32_t umc_reg_offset)
+{
+ uint32_t ecc_err_cnt_addr;
+
+ ecc_err_cnt_addr =
+ SOC15_REG_OFFSET(UMC, 0, regUMCCH0_0_GeccErrCnt);
+
+ /* clear error count */
+ WREG32_PCIE((ecc_err_cnt_addr + umc_reg_offset) * 4,
+ UMC_V8_10_CE_CNT_INIT);
+}
+
+static void umc_v8_10_clear_error_count(struct amdgpu_device *adev)
+{
+ uint32_t node_inst = 0;
+ uint32_t umc_inst = 0;
+ uint32_t ch_inst = 0;
+ uint32_t umc_reg_offset = 0;
+
+ LOOP_UMC_EACH_NODE_INST_AND_CH(node_inst, umc_inst, ch_inst) {
+ umc_reg_offset = get_umc_v8_10_reg_offset(adev,
+ node_inst,
+ umc_inst,
+ ch_inst);
+
+ umc_v8_10_clear_error_count_per_channel(adev,
+ umc_reg_offset);
+ }
+}
+
+static void umc_v8_10_query_correctable_error_count(struct amdgpu_device *adev,
+ uint32_t umc_reg_offset,
+ unsigned long *error_count)
+{
+ uint32_t ecc_err_cnt, ecc_err_cnt_addr;
+ uint64_t mc_umc_status;
+ uint32_t mc_umc_status_addr;
+
+ /* UMC 8_10 registers */
+ ecc_err_cnt_addr =
+ SOC15_REG_OFFSET(UMC, 0, regUMCCH0_0_GeccErrCnt);
+ mc_umc_status_addr =
+ SOC15_REG_OFFSET(UMC, 0, regMCA_UMC_UMC0_MCUMC_STATUST0);
+
+ ecc_err_cnt = RREG32_PCIE((ecc_err_cnt_addr + umc_reg_offset) * 4);
+ *error_count +=
+ (REG_GET_FIELD(ecc_err_cnt, UMCCH0_0_GeccErrCnt, GeccErrCnt) -
+ UMC_V8_10_CE_CNT_INIT);
+
+ /* Check for SRAM correctable error, MCUMC_STATUS is a 64 bit register */
+ mc_umc_status = RREG64_PCIE((mc_umc_status_addr + umc_reg_offset) * 4);
+ if (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Val) == 1 &&
+ REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, CECC) == 1)
+ *error_count += 1;
+}
+
+static void umc_v8_10_query_uncorrectable_error_count(struct amdgpu_device *adev,
+ uint32_t umc_reg_offset,
+ unsigned long *error_count)
+{
+ uint64_t mc_umc_status;
+ uint32_t mc_umc_status_addr;
+
+ mc_umc_status_addr = SOC15_REG_OFFSET(UMC, 0, regMCA_UMC_UMC0_MCUMC_STATUST0);
+
+ /* Check the MCUMC_STATUS. */
+ mc_umc_status = RREG64_PCIE((mc_umc_status_addr + umc_reg_offset) * 4);
+ if ((REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Val) == 1) &&
+ (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Deferred) == 1 ||
+ REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, UECC) == 1 ||
+ REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, PCC) == 1 ||
+ REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, UC) == 1 ||
+ REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, TCC) == 1))
+ *error_count += 1;
+}
+
+static void umc_v8_10_query_ras_error_count(struct amdgpu_device *adev,
+ void *ras_error_status)
+{
+ struct ras_err_data *err_data = (struct ras_err_data *)ras_error_status;
+
+ uint32_t node_inst = 0;
+ uint32_t umc_inst = 0;
+ uint32_t ch_inst = 0;
+ uint32_t umc_reg_offset = 0;
+
+ LOOP_UMC_EACH_NODE_INST_AND_CH(node_inst, umc_inst, ch_inst) {
+ umc_reg_offset = get_umc_v8_10_reg_offset(adev,
+ node_inst,
+ umc_inst,
+ ch_inst);
+
+ umc_v8_10_query_correctable_error_count(adev,
+ umc_reg_offset,
+ &(err_data->ce_count));
+ umc_v8_10_query_uncorrectable_error_count(adev,
+ umc_reg_offset,
+ &(err_data->ue_count));
+ }
+
+ umc_v8_10_clear_error_count(adev);
+}
+
+static uint32_t umc_v8_10_get_col_bit(uint32_t channel_num)
+{
+ uint32_t t = 0;
+
+ for (t = 0; t < ARRAY_SIZE(umc_v8_10_channelnum_map_colbit_table); t++)
+ if (channel_num == umc_v8_10_channelnum_map_colbit_table[t].channel_num)
+ return umc_v8_10_channelnum_map_colbit_table[t].col_bit;
+
+ /* Failed to get col_bit. */
+ return U32_MAX;
+}
+
+/*
+ * Mapping normal address to soc physical address in swizzle mode.
+ */
+static int umc_v8_10_swizzle_mode_na_to_pa(struct amdgpu_device *adev,
+ uint32_t channel_idx,
+ uint64_t na, uint64_t *soc_pa)
+{
+ uint32_t channel_num = UMC_V8_10_TOTAL_CHANNEL_NUM(adev);
+ uint32_t col_bit = umc_v8_10_get_col_bit(channel_num);
+ uint64_t tmp_addr;
+
+ if (col_bit == U32_MAX)
+ return -1;
+
+ tmp_addr = SWIZZLE_MODE_TMP_ADDR(na, channel_num, channel_idx);
+ *soc_pa = SWIZZLE_MODE_ADDR_HI(tmp_addr, col_bit) |
+ SWIZZLE_MODE_ADDR_MID(na, col_bit) |
+ SWIZZLE_MODE_ADDR_LOW(tmp_addr, col_bit) |
+ SWIZZLE_MODE_ADDR_LSB(na);
+
+ return 0;
+}
+
+static void umc_v8_10_query_error_address(struct amdgpu_device *adev,
+ struct ras_err_data *err_data,
+ uint32_t umc_reg_offset,
+ uint32_t node_inst,
+ uint32_t ch_inst,
+ uint32_t umc_inst)
+{
+ uint64_t mc_umc_status_addr;
+ uint64_t mc_umc_status, err_addr;
+ uint32_t channel_index;
+
+ mc_umc_status_addr =
+ SOC15_REG_OFFSET(UMC, 0, regMCA_UMC_UMC0_MCUMC_STATUST0);
+ mc_umc_status = RREG64_PCIE((mc_umc_status_addr + umc_reg_offset) * 4);
+
+ if (mc_umc_status == 0)
+ return;
+
+ if (!err_data->err_addr) {
+ /* clear umc status */
+ WREG64_PCIE((mc_umc_status_addr + umc_reg_offset) * 4, 0x0ULL);
+ return;
+ }
+
+ channel_index =
+ adev->umc.channel_idx_tbl[node_inst * adev->umc.umc_inst_num *
+ adev->umc.channel_inst_num +
+ umc_inst * adev->umc.channel_inst_num +
+ ch_inst];
+
+ /* calculate error address if ue/ce error is detected */
+ if (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Val) == 1 &&
+ REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, AddrV) == 1 &&
+ (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, UECC) == 1 ||
+ REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, CECC) == 1)) {
+ uint32_t addr_lsb;
+ uint64_t mc_umc_addrt0;
+
+ mc_umc_addrt0 = SOC15_REG_OFFSET(UMC, 0, regMCA_UMC_UMC0_MCUMC_ADDRT0);
+ err_addr = RREG64_PCIE((mc_umc_addrt0 + umc_reg_offset) * 4);
+ err_addr = REG_GET_FIELD(err_addr, MCA_UMC_UMC0_MCUMC_ADDRT0, ErrorAddr);
+
+ /* the lowest lsb bits should be ignored */
+ addr_lsb = REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, AddrLsb);
+
+ err_addr &= ~((0x1ULL << addr_lsb) - 1);
+
+ /* we only save ue error information currently, ce is skipped */
+ if (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, UECC) == 1) {
+ uint64_t na_err_addr_base = err_addr & ~(0x3ULL << UMC_V8_10_NA_C5_BIT);
+ uint64_t na_err_addr, retired_page_addr;
+ uint32_t col = 0;
+ int ret = 0;
+
+ /* loop for all possibilities of [C6 C5] in normal address. */
+ for (col = 0; col < UMC_V8_10_NA_COL_2BITS_POWER_OF_2_NUM; col++) {
+ na_err_addr = na_err_addr_base | (col << UMC_V8_10_NA_C5_BIT);
+
+ /* Mapping normal error address to retired soc physical address. */
+ ret = umc_v8_10_swizzle_mode_na_to_pa(adev, channel_index,
+ na_err_addr, &retired_page_addr);
+ if (ret) {
+ dev_err(adev->dev, "Failed to map pa from umc na.\n");
+ break;
+ }
+ dev_info(adev->dev, "Error Address(PA): 0x%llx\n",
+ retired_page_addr);
+ amdgpu_umc_fill_error_record(err_data, na_err_addr,
+ retired_page_addr, channel_index, umc_inst);
+ }
+ }
+ }
+
+ /* clear umc status */
+ WREG64_PCIE((mc_umc_status_addr + umc_reg_offset) * 4, 0x0ULL);
+}
+
+static void umc_v8_10_query_ras_error_address(struct amdgpu_device *adev,
+ void *ras_error_status)
+{
+ struct ras_err_data *err_data = (struct ras_err_data *)ras_error_status;
+ uint32_t node_inst = 0;
+ uint32_t umc_inst = 0;
+ uint32_t ch_inst = 0;
+ uint32_t umc_reg_offset = 0;
+
+ LOOP_UMC_EACH_NODE_INST_AND_CH(node_inst, umc_inst, ch_inst) {
+ umc_reg_offset = get_umc_v8_10_reg_offset(adev,
+ node_inst,
+ umc_inst,
+ ch_inst);
+
+ umc_v8_10_query_error_address(adev,
+ err_data,
+ umc_reg_offset,
+ node_inst,
+ ch_inst,
+ umc_inst);
+ }
+}
+
+static void umc_v8_10_err_cnt_init_per_channel(struct amdgpu_device *adev,
+ uint32_t umc_reg_offset)
+{
+ uint32_t ecc_err_cnt_sel, ecc_err_cnt_sel_addr;
+ uint32_t ecc_err_cnt_addr;
+
+ ecc_err_cnt_sel_addr =
+ SOC15_REG_OFFSET(UMC, 0, regUMCCH0_0_GeccErrCntSel);
+ ecc_err_cnt_addr =
+ SOC15_REG_OFFSET(UMC, 0, regUMCCH0_0_GeccErrCnt);
+
+ ecc_err_cnt_sel = RREG32_PCIE((ecc_err_cnt_sel_addr + umc_reg_offset) * 4);
+
+ /* set ce error interrupt type to APIC based interrupt */
+ ecc_err_cnt_sel = REG_SET_FIELD(ecc_err_cnt_sel, UMCCH0_0_GeccErrCntSel,
+ GeccErrInt, 0x1);
+ WREG32_PCIE((ecc_err_cnt_sel_addr + umc_reg_offset) * 4, ecc_err_cnt_sel);
+ /* set error count to initial value */
+ WREG32_PCIE((ecc_err_cnt_addr + umc_reg_offset) * 4, UMC_V8_10_CE_CNT_INIT);
+}
+
+static void umc_v8_10_err_cnt_init(struct amdgpu_device *adev)
+{
+ uint32_t node_inst = 0;
+ uint32_t umc_inst = 0;
+ uint32_t ch_inst = 0;
+ uint32_t umc_reg_offset = 0;
+
+ LOOP_UMC_EACH_NODE_INST_AND_CH(node_inst, umc_inst, ch_inst) {
+ umc_reg_offset = get_umc_v8_10_reg_offset(adev,
+ node_inst,
+ umc_inst,
+ ch_inst);
+
+ umc_v8_10_err_cnt_init_per_channel(adev, umc_reg_offset);
+ }
+}
+
+const struct amdgpu_ras_block_hw_ops umc_v8_10_ras_hw_ops = {
+ .query_ras_error_count = umc_v8_10_query_ras_error_count,
+ .query_ras_error_address = umc_v8_10_query_ras_error_address,
+};
+
+struct amdgpu_umc_ras umc_v8_10_ras = {
+ .ras_block = {
+ .hw_ops = &umc_v8_10_ras_hw_ops,
+ },
+ .err_cnt_init = umc_v8_10_err_cnt_init,
+};
diff --git a/drivers/gpu/drm/amd/amdgpu/umc_v8_10.h b/drivers/gpu/drm/amd/amdgpu/umc_v8_10.h
new file mode 100644
index 000000000000..849ede88e111
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/umc_v8_10.h
@@ -0,0 +1,70 @@
+/*
+ * Copyright 2022 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+#ifndef __UMC_V8_10_H__
+#define __UMC_V8_10_H__
+
+#include "soc15_common.h"
+#include "amdgpu.h"
+
+/* number of umc channel instance with memory map register access */
+#define UMC_V8_10_CHANNEL_INSTANCE_NUM 2
+/* number of umc instance with memory map register access */
+#define UMC_V8_10_UMC_INSTANCE_NUM 2
+
+/* Total channel instances for all umc nodes */
+#define UMC_V8_10_TOTAL_CHANNEL_NUM(adev) \
+ (UMC_V8_10_CHANNEL_INSTANCE_NUM * UMC_V8_10_UMC_INSTANCE_NUM * (adev)->umc.node_inst_num)
+
+/* UMC regiser per channel offset */
+#define UMC_V8_10_PER_CHANNEL_OFFSET 0x400
+
+/* EccErrCnt max value */
+#define UMC_V8_10_CE_CNT_MAX 0xffff
+/* umc ce interrupt threshold */
+#define UUMC_V8_10_CE_INT_THRESHOLD 0xffff
+/* umc ce count initial value */
+#define UMC_V8_10_CE_CNT_INIT (UMC_V8_10_CE_CNT_MAX - UUMC_V8_10_CE_INT_THRESHOLD)
+
+#define UMC_V8_10_NA_COL_2BITS_POWER_OF_2_NUM 4
+
+/* The C5 bit in NA address */
+#define UMC_V8_10_NA_C5_BIT 14
+
+/* Map to swizzle mode address */
+#define SWIZZLE_MODE_TMP_ADDR(na, ch_num, ch_idx) \
+ ((((na) >> 10) * (ch_num) + (ch_idx)) << 10)
+#define SWIZZLE_MODE_ADDR_HI(addr, col_bit) \
+ (((addr) >> ((col_bit) + 2)) << ((col_bit) + 2))
+#define SWIZZLE_MODE_ADDR_MID(na, col_bit) ((((na) >> 8) & 0x3) << (col_bit))
+#define SWIZZLE_MODE_ADDR_LOW(addr, col_bit) \
+ ((((addr) >> 10) & ((0x1ULL << (col_bit - 8)) - 1)) << 8)
+#define SWIZZLE_MODE_ADDR_LSB(na) ((na) & 0xFF)
+
+extern struct amdgpu_umc_ras umc_v8_10_ras;
+extern const uint32_t
+ umc_v8_10_channel_idx_tbl[]
+ [UMC_V8_10_UMC_INSTANCE_NUM]
+ [UMC_V8_10_CHANNEL_INSTANCE_NUM];
+
+#endif
+
diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c
index 9119e966ffff..fb2d74f30448 100644
--- a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c
@@ -25,11 +25,11 @@
#include "amdgpu.h"
#include "amdgpu_vcn.h"
#include "amdgpu_pm.h"
+#include "amdgpu_cs.h"
#include "soc15.h"
#include "soc15d.h"
#include "soc15_hw_ip.h"
#include "vcn_v2_0.h"
-#include "vcn_sw_ring.h"
#include "vcn/vcn_4_0_0_offset.h"
#include "vcn/vcn_4_0_0_sh_mask.h"
@@ -45,15 +45,15 @@
#define VCN_VID_SOC_ADDRESS_2_0 0x1fb00
#define VCN1_VID_SOC_ADDRESS_3_0 0x48300
-bool unifiedQ_enabled = false;
+#define RDECODE_MSG_CREATE 0x00000000
+#define RDECODE_MESSAGE_CREATE 0x00000001
static int amdgpu_ih_clientid_vcns[] = {
SOC15_IH_CLIENTID_VCN,
SOC15_IH_CLIENTID_VCN1
};
-static void vcn_v4_0_set_dec_ring_funcs(struct amdgpu_device *adev);
-static void vcn_v4_0_set_enc_ring_funcs(struct amdgpu_device *adev);
+static void vcn_v4_0_set_unified_ring_funcs(struct amdgpu_device *adev);
static void vcn_v4_0_set_irq_funcs(struct amdgpu_device *adev);
static int vcn_v4_0_set_powergating_state(void *handle,
enum amd_powergating_state state);
@@ -71,36 +71,15 @@ static int vcn_v4_0_early_init(void *handle)
{
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
- if (unifiedQ_enabled) {
- adev->vcn.num_vcn_inst = 1;
- adev->vcn.num_enc_rings = 1;
- } else {
- adev->vcn.num_enc_rings = 2;
- }
-
- if (!unifiedQ_enabled)
- vcn_v4_0_set_dec_ring_funcs(adev);
+ /* re-use enc ring as unified ring */
+ adev->vcn.num_enc_rings = 1;
- vcn_v4_0_set_enc_ring_funcs(adev);
+ vcn_v4_0_set_unified_ring_funcs(adev);
vcn_v4_0_set_irq_funcs(adev);
return 0;
}
-static void amdgpu_vcn_setup_unified_queue_ucode(struct amdgpu_device *adev)
-{
- if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
- const struct common_firmware_header *hdr;
-
- hdr = (const struct common_firmware_header *)adev->vcn.fw->data;
- adev->firmware.ucode[AMDGPU_UCODE_ID_VCN].ucode_id = AMDGPU_UCODE_ID_VCN;
- adev->firmware.ucode[AMDGPU_UCODE_ID_VCN].fw = adev->vcn.fw;
- adev->firmware.fw_size +=
- ALIGN(le32_to_cpu(hdr->ucode_size_bytes), PAGE_SIZE);
- DRM_INFO("PSP loading VCN firmware\n");
- }
-}
-
/**
* vcn_v4_0_sw_init - sw init for VCN block
*
@@ -111,17 +90,14 @@ static void amdgpu_vcn_setup_unified_queue_ucode(struct amdgpu_device *adev)
static int vcn_v4_0_sw_init(void *handle)
{
struct amdgpu_ring *ring;
- int i, j, r;
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ int i, r;
r = amdgpu_vcn_sw_init(adev);
if (r)
return r;
- if (unifiedQ_enabled)
- amdgpu_vcn_setup_unified_queue_ucode(adev);
- else
- amdgpu_vcn_setup_ucode(adev);
+ amdgpu_vcn_setup_ucode(adev);
r = amdgpu_vcn_resume(adev);
if (r)
@@ -129,81 +105,40 @@ static int vcn_v4_0_sw_init(void *handle)
for (i = 0; i < adev->vcn.num_vcn_inst; i++) {
volatile struct amdgpu_vcn4_fw_shared *fw_shared;
+
if (adev->vcn.harvest_config & (1 << i))
continue;
- /* VCN DEC TRAP */
+
+ atomic_set(&adev->vcn.inst[i].sched_score, 0);
+
+ /* VCN UNIFIED TRAP */
r = amdgpu_irq_add_id(adev, amdgpu_ih_clientid_vcns[i],
- VCN_4_0__SRCID__UVD_TRAP, &adev->vcn.inst[i].irq);
+ VCN_4_0__SRCID__UVD_ENC_GENERAL_PURPOSE, &adev->vcn.inst[i].irq);
if (r)
return r;
- atomic_set(&adev->vcn.inst[i].sched_score, 0);
- if (!unifiedQ_enabled) {
- ring = &adev->vcn.inst[i].ring_dec;
- ring->use_doorbell = true;
-
- /* VCN4 doorbell layout
- * 1: VCN_JPEG_DB_CTRL UVD_JRBC_RB_WPTR; (jpeg)
- * 2: VCN_RB1_DB_CTRL UVD_RB_WPTR; (decode/encode for unified queue)
- * 3: VCN_RB2_DB_CTRL UVD_RB_WPTR2; (encode only for swqueue)
- * 4: VCN_RB3_DB_CTRL UVD_RB_WPTR3; (Reserved)
- * 5: VCN_RB4_DB_CTRL UVD_RB_WPTR4; (decode only for swqueue)
- */
-
- ring->doorbell_index = (adev->doorbell_index.vcn.vcn_ring0_1 << 1)
- + 5 + 8 * i;
-
- sprintf(ring->name, "vcn_dec_%d", i);
- r = amdgpu_ring_init(adev, ring, 512, &adev->vcn.inst[i].irq, 0,
- AMDGPU_RING_PRIO_DEFAULT,
- &adev->vcn.inst[i].sched_score);
- if (r)
- return r;
- }
- for (j = 0; j < adev->vcn.num_enc_rings; ++j) {
- /* VCN ENC TRAP */
- r = amdgpu_irq_add_id(adev, amdgpu_ih_clientid_vcns[i],
- j + VCN_4_0__SRCID__UVD_ENC_GENERAL_PURPOSE, &adev->vcn.inst[i].irq);
- if (r)
- return r;
-
- ring = &adev->vcn.inst[i].ring_enc[j];
- ring->use_doorbell = true;
-
- ring->doorbell_index = (adev->doorbell_index.vcn.vcn_ring0_1 << 1) + 2 + j + 8 * i;
-
- if (unifiedQ_enabled) {
- sprintf(ring->name, "vcn_unified%d", i);
- r = amdgpu_ring_init(adev, ring, 512, &adev->vcn.inst->irq, 0,
- AMDGPU_RING_PRIO_DEFAULT, NULL);
- } else {
- enum amdgpu_ring_priority_level hw_prio;
+ ring = &adev->vcn.inst[i].ring_enc[0];
+ ring->use_doorbell = true;
+ ring->doorbell_index = (adev->doorbell_index.vcn.vcn_ring0_1 << 1) + 2 + 8 * i;
- hw_prio = amdgpu_vcn_get_enc_ring_prio(j);
- sprintf(ring->name, "vcn_enc_%d.%d", i, j);
- r = amdgpu_ring_init(adev, ring, 512, &adev->vcn.inst[i].irq, 0,
- hw_prio, &adev->vcn.inst[i].sched_score);
- }
- if (r)
- return r;
- }
+ sprintf(ring->name, "vcn_unified_%d", i);
- fw_shared = adev->vcn.inst[i].fw_shared.cpu_addr;
- fw_shared->present_flag_0 = 0;
+ r = amdgpu_ring_init(adev, ring, 512, &adev->vcn.inst[i].irq, 0,
+ AMDGPU_RING_PRIO_0, &adev->vcn.inst[i].sched_score);
+ if (r)
+ return r;
- if (unifiedQ_enabled) {
- fw_shared->present_flag_0 |= cpu_to_le32(AMDGPU_FW_SHARED_FLAG_0_UNIFIED_QUEUE);
- fw_shared->sq.is_enabled = 1;
- }
+ fw_shared = adev->vcn.inst[i].fw_shared.cpu_addr;
+ fw_shared->present_flag_0 = cpu_to_le32(AMDGPU_FW_SHARED_FLAG_0_UNIFIED_QUEUE);
+ fw_shared->sq.is_enabled = 1;
if (amdgpu_vcnfw_log)
amdgpu_vcn_fwlog_init(&adev->vcn.inst[i]);
}
- if (!unifiedQ_enabled) {
- if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG)
- adev->vcn.pause_dpg_mode = vcn_v4_0_pause_dpg_mode;
- }
+ if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG)
+ adev->vcn.pause_dpg_mode = vcn_v4_0_pause_dpg_mode;
+
return 0;
}
@@ -219,20 +154,20 @@ static int vcn_v4_0_sw_fini(void *handle)
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
int i, r, idx;
- if (drm_dev_enter(&adev->ddev, &idx)) {
- for (i = 0; i < adev->vcn.num_vcn_inst; i++) {
- volatile struct amdgpu_vcn4_fw_shared *fw_shared;
+ if (drm_dev_enter(adev_to_drm(adev), &idx)) {
+ for (i = 0; i < adev->vcn.num_vcn_inst; i++) {
+ volatile struct amdgpu_vcn4_fw_shared *fw_shared;
- if (adev->vcn.harvest_config & (1 << i))
- continue;
+ if (adev->vcn.harvest_config & (1 << i))
+ continue;
- fw_shared = adev->vcn.inst[i].fw_shared.cpu_addr;
- fw_shared->present_flag_0 = 0;
- fw_shared->sq.is_enabled = 0;
- }
+ fw_shared = adev->vcn.inst[i].fw_shared.cpu_addr;
+ fw_shared->present_flag_0 = 0;
+ fw_shared->sq.is_enabled = 0;
+ }
- drm_dev_exit(idx);
- }
+ drm_dev_exit(idx);
+ }
r = amdgpu_vcn_suspend(adev);
if (r)
@@ -254,15 +189,13 @@ static int vcn_v4_0_hw_init(void *handle)
{
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
struct amdgpu_ring *ring;
- int i, j, r;
+ int i, r;
for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
if (adev->vcn.harvest_config & (1 << i))
continue;
- if (unifiedQ_enabled)
- ring = &adev->vcn.inst[i].ring_enc[0];
- else
- ring = &adev->vcn.inst[i].ring_dec;
+
+ ring = &adev->vcn.inst[i].ring_enc[0];
adev->nbio.funcs->vcn_doorbell_range(adev, ring->use_doorbell,
((adev->doorbell_index.vcn.vcn_ring0_1 << 1) + 8 * i), i);
@@ -270,13 +203,6 @@ static int vcn_v4_0_hw_init(void *handle)
r = amdgpu_ring_test_helper(ring);
if (r)
goto done;
-
- for (j = 0; j < adev->vcn.num_enc_rings; ++j) {
- ring = &adev->vcn.inst[i].ring_enc[j];
- r = amdgpu_ring_test_helper(ring);
- if (r)
- goto done;
- }
}
done:
@@ -464,7 +390,6 @@ static void vcn_v4_0_mc_resume_dpg_mode(struct amdgpu_device *adev, int inst_idx
WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
VCN, inst_idx, regUVD_VCPU_CACHE_OFFSET0),
AMDGPU_UVD_FIRMWARE_OFFSET >> 3, 0, indirect);
-
}
if (!indirect)
@@ -888,7 +813,6 @@ static int vcn_v4_0_start_dpg_mode(struct amdgpu_device *adev, int inst_idx, boo
volatile struct amdgpu_vcn4_fw_shared *fw_shared = adev->vcn.inst[inst_idx].fw_shared.cpu_addr;
struct amdgpu_ring *ring;
uint32_t tmp;
- int i;
/* disable register anti-hang mechanism */
WREG32_P(SOC15_REG_OFFSET(VCN, inst_idx, regUVD_POWER_STATUS), 1,
@@ -974,74 +898,32 @@ static int vcn_v4_0_start_dpg_mode(struct amdgpu_device *adev, int inst_idx, boo
(uint32_t)((uintptr_t)adev->vcn.inst[inst_idx].dpg_sram_curr_addr -
(uintptr_t)adev->vcn.inst[inst_idx].dpg_sram_cpu_addr));
- if (unifiedQ_enabled) {
- ring = &adev->vcn.inst[inst_idx].ring_enc[0];
- fw_shared->sq.queue_mode |= FW_QUEUE_RING_RESET;
- } else
- ring = &adev->vcn.inst[inst_idx].ring_dec;
+ ring = &adev->vcn.inst[inst_idx].ring_enc[0];
- WREG32_SOC15(VCN, inst_idx, regVCN_RB4_DB_CTRL,
- ring->doorbell_index << VCN_RB4_DB_CTRL__OFFSET__SHIFT |
- VCN_RB4_DB_CTRL__EN_MASK);
+ WREG32_SOC15(VCN, inst_idx, regUVD_RB_BASE_LO, ring->gpu_addr);
+ WREG32_SOC15(VCN, inst_idx, regUVD_RB_BASE_HI, upper_32_bits(ring->gpu_addr));
+ WREG32_SOC15(VCN, inst_idx, regUVD_RB_SIZE, ring->ring_size / 4);
- /* program the RB_BASE for ring buffer */
- WREG32_SOC15(VCN, inst_idx, regUVD_RB_BASE_LO4,
- lower_32_bits(ring->gpu_addr));
- WREG32_SOC15(VCN, inst_idx, regUVD_RB_BASE_HI4,
- upper_32_bits(ring->gpu_addr));
-
- WREG32_SOC15(VCN, inst_idx, regUVD_RB_SIZE4, ring->ring_size / sizeof(uint32_t));
-
- /* reseting ring, fw should not check RB ring */
tmp = RREG32_SOC15(VCN, inst_idx, regVCN_RB_ENABLE);
- tmp &= ~(VCN_RB_ENABLE__RB4_EN_MASK);
+ tmp &= ~(VCN_RB_ENABLE__RB1_EN_MASK);
WREG32_SOC15(VCN, inst_idx, regVCN_RB_ENABLE, tmp);
+ fw_shared->sq.queue_mode |= FW_QUEUE_RING_RESET;
+ WREG32_SOC15(VCN, inst_idx, regUVD_RB_RPTR, 0);
+ WREG32_SOC15(VCN, inst_idx, regUVD_RB_WPTR, 0);
- /* Initialize the ring buffer's read and write pointers */
- tmp = RREG32_SOC15(VCN, inst_idx, regUVD_RB_RPTR4);
- WREG32_SOC15(VCN, inst_idx, regUVD_RB_WPTR4, tmp);
- ring->wptr = RREG32_SOC15(VCN, inst_idx, regUVD_RB_WPTR4);
+ tmp = RREG32_SOC15(VCN, inst_idx, regUVD_RB_RPTR);
+ WREG32_SOC15(VCN, inst_idx, regUVD_RB_WPTR, tmp);
+ ring->wptr = RREG32_SOC15(VCN, inst_idx, regUVD_RB_WPTR);
tmp = RREG32_SOC15(VCN, inst_idx, regVCN_RB_ENABLE);
- tmp |= VCN_RB_ENABLE__RB4_EN_MASK;
+ tmp |= VCN_RB_ENABLE__RB1_EN_MASK;
WREG32_SOC15(VCN, inst_idx, regVCN_RB_ENABLE, tmp);
+ fw_shared->sq.queue_mode &= ~(FW_QUEUE_RING_RESET | FW_QUEUE_DPG_HOLD_OFF);
- WREG32_SOC15(VCN, inst_idx, regUVD_SCRATCH2, 0);
-
- if (unifiedQ_enabled)
- fw_shared->sq.queue_mode &= ~FW_QUEUE_RING_RESET;
-
- for (i = 0; i < adev->vcn.num_enc_rings; i++) {
- ring = &adev->vcn.inst[inst_idx].ring_enc[i];
-
- if (i) {
- ring = &adev->vcn.inst[inst_idx].ring_enc[1];
-
- WREG32_SOC15(VCN, inst_idx, regUVD_RB_BASE_LO2, ring->gpu_addr);
- WREG32_SOC15(VCN, inst_idx, regUVD_RB_BASE_HI2, upper_32_bits(ring->gpu_addr));
- WREG32_SOC15(VCN, inst_idx, regUVD_RB_SIZE2, ring->ring_size / 4);
- tmp= RREG32_SOC15(VCN, inst_idx, regUVD_RB_RPTR2);
- WREG32_SOC15(VCN, inst_idx, regUVD_RB_WPTR2, tmp);
- ring->wptr = RREG32_SOC15(VCN, inst_idx, regUVD_RB_WPTR2);
-
- WREG32_SOC15(VCN, inst_idx, regVCN_RB2_DB_CTRL,
- ring->doorbell_index << VCN_RB2_DB_CTRL__OFFSET__SHIFT |
- VCN_RB2_DB_CTRL__EN_MASK);
- } else {
- ring = &adev->vcn.inst[inst_idx].ring_enc[0];
-
- WREG32_SOC15(VCN, inst_idx, regUVD_RB_BASE_LO, ring->gpu_addr);
- WREG32_SOC15(VCN, inst_idx, regUVD_RB_BASE_HI, upper_32_bits(ring->gpu_addr));
- WREG32_SOC15(VCN, inst_idx, regUVD_RB_SIZE, ring->ring_size / 4);
- tmp= RREG32_SOC15(VCN, inst_idx, regUVD_RB_RPTR);
- WREG32_SOC15(VCN, inst_idx, regUVD_RB_WPTR, tmp);
- ring->wptr = RREG32_SOC15(VCN, inst_idx, regUVD_RB_WPTR);
+ WREG32_SOC15(VCN, inst_idx, regVCN_RB1_DB_CTRL,
+ ring->doorbell_index << VCN_RB1_DB_CTRL__OFFSET__SHIFT |
+ VCN_RB1_DB_CTRL__EN_MASK);
- WREG32_SOC15(VCN, inst_idx, regVCN_RB1_DB_CTRL,
- ring->doorbell_index << VCN_RB1_DB_CTRL__OFFSET__SHIFT |
- VCN_RB1_DB_CTRL__EN_MASK);
- }
- }
return 0;
}
@@ -1064,6 +946,8 @@ static int vcn_v4_0_start(struct amdgpu_device *adev)
amdgpu_dpm_enable_uvd(adev, true);
for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
+ fw_shared = adev->vcn.inst[i].fw_shared.cpu_addr;
+
if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) {
r = vcn_v4_0_start_dpg_mode(adev, i, adev->vcn.indirect_sram);
continue;
@@ -1081,15 +965,15 @@ static int vcn_v4_0_start(struct amdgpu_device *adev)
/* enable VCPU clock */
WREG32_P(SOC15_REG_OFFSET(VCN, i, regUVD_VCPU_CNTL),
- UVD_VCPU_CNTL__CLK_EN_MASK, ~UVD_VCPU_CNTL__CLK_EN_MASK);
+ UVD_VCPU_CNTL__CLK_EN_MASK, ~UVD_VCPU_CNTL__CLK_EN_MASK);
/* disable master interrupt */
WREG32_P(SOC15_REG_OFFSET(VCN, i, regUVD_MASTINT_EN), 0,
- ~UVD_MASTINT_EN__VCPU_EN_MASK);
+ ~UVD_MASTINT_EN__VCPU_EN_MASK);
/* enable LMI MC and UMC channels */
WREG32_P(SOC15_REG_OFFSET(VCN, i, regUVD_LMI_CTRL2), 0,
- ~UVD_LMI_CTRL2__STALL_ARB_UMC_MASK);
+ ~UVD_LMI_CTRL2__STALL_ARB_UMC_MASK);
tmp = RREG32_SOC15(VCN, i, regUVD_SOFT_RESET);
tmp &= ~UVD_SOFT_RESET__LMI_SOFT_RESET_MASK;
@@ -1099,10 +983,10 @@ static int vcn_v4_0_start(struct amdgpu_device *adev)
/* setup regUVD_LMI_CTRL */
tmp = RREG32_SOC15(VCN, i, regUVD_LMI_CTRL);
WREG32_SOC15(VCN, i, regUVD_LMI_CTRL, tmp |
- UVD_LMI_CTRL__WRITE_CLEAN_TIMER_EN_MASK |
- UVD_LMI_CTRL__MASK_MC_URGENT_MASK |
- UVD_LMI_CTRL__DATA_COHERENCY_EN_MASK |
- UVD_LMI_CTRL__VCPU_DATA_COHERENCY_EN_MASK);
+ UVD_LMI_CTRL__WRITE_CLEAN_TIMER_EN_MASK |
+ UVD_LMI_CTRL__MASK_MC_URGENT_MASK |
+ UVD_LMI_CTRL__DATA_COHERENCY_EN_MASK |
+ UVD_LMI_CTRL__VCPU_DATA_COHERENCY_EN_MASK);
/* setup regUVD_MPC_CNTL */
tmp = RREG32_SOC15(VCN, i, regUVD_MPC_CNTL);
@@ -1112,37 +996,37 @@ static int vcn_v4_0_start(struct amdgpu_device *adev)
/* setup UVD_MPC_SET_MUXA0 */
WREG32_SOC15(VCN, i, regUVD_MPC_SET_MUXA0,
- ((0x1 << UVD_MPC_SET_MUXA0__VARA_1__SHIFT) |
- (0x2 << UVD_MPC_SET_MUXA0__VARA_2__SHIFT) |
- (0x3 << UVD_MPC_SET_MUXA0__VARA_3__SHIFT) |
- (0x4 << UVD_MPC_SET_MUXA0__VARA_4__SHIFT)));
+ ((0x1 << UVD_MPC_SET_MUXA0__VARA_1__SHIFT) |
+ (0x2 << UVD_MPC_SET_MUXA0__VARA_2__SHIFT) |
+ (0x3 << UVD_MPC_SET_MUXA0__VARA_3__SHIFT) |
+ (0x4 << UVD_MPC_SET_MUXA0__VARA_4__SHIFT)));
/* setup UVD_MPC_SET_MUXB0 */
WREG32_SOC15(VCN, i, regUVD_MPC_SET_MUXB0,
- ((0x1 << UVD_MPC_SET_MUXB0__VARB_1__SHIFT) |
- (0x2 << UVD_MPC_SET_MUXB0__VARB_2__SHIFT) |
- (0x3 << UVD_MPC_SET_MUXB0__VARB_3__SHIFT) |
- (0x4 << UVD_MPC_SET_MUXB0__VARB_4__SHIFT)));
+ ((0x1 << UVD_MPC_SET_MUXB0__VARB_1__SHIFT) |
+ (0x2 << UVD_MPC_SET_MUXB0__VARB_2__SHIFT) |
+ (0x3 << UVD_MPC_SET_MUXB0__VARB_3__SHIFT) |
+ (0x4 << UVD_MPC_SET_MUXB0__VARB_4__SHIFT)));
/* setup UVD_MPC_SET_MUX */
WREG32_SOC15(VCN, i, regUVD_MPC_SET_MUX,
- ((0x0 << UVD_MPC_SET_MUX__SET_0__SHIFT) |
- (0x1 << UVD_MPC_SET_MUX__SET_1__SHIFT) |
- (0x2 << UVD_MPC_SET_MUX__SET_2__SHIFT)));
+ ((0x0 << UVD_MPC_SET_MUX__SET_0__SHIFT) |
+ (0x1 << UVD_MPC_SET_MUX__SET_1__SHIFT) |
+ (0x2 << UVD_MPC_SET_MUX__SET_2__SHIFT)));
vcn_v4_0_mc_resume(adev, i);
/* VCN global tiling registers */
WREG32_SOC15(VCN, i, regUVD_GFX10_ADDR_CONFIG,
- adev->gfx.config.gb_addr_config);
+ adev->gfx.config.gb_addr_config);
/* unblock VCPU register access */
WREG32_P(SOC15_REG_OFFSET(VCN, i, regUVD_RB_ARB_CTRL), 0,
- ~UVD_RB_ARB_CTRL__VCPU_DIS_MASK);
+ ~UVD_RB_ARB_CTRL__VCPU_DIS_MASK);
/* release VCPU reset to boot */
WREG32_P(SOC15_REG_OFFSET(VCN, i, regUVD_VCPU_CNTL), 0,
- ~UVD_VCPU_CNTL__BLK_RST_MASK);
+ ~UVD_VCPU_CNTL__BLK_RST_MASK);
for (j = 0; j < 10; ++j) {
uint32_t status;
@@ -1157,6 +1041,7 @@ static int vcn_v4_0_start(struct amdgpu_device *adev)
}
if (amdgpu_emu_mode==1) {
+ r = -1;
if (status & 2) {
r = 0;
break;
@@ -1166,13 +1051,13 @@ static int vcn_v4_0_start(struct amdgpu_device *adev)
if (status & 2)
break;
- dev_err(adev->dev, "VCN[%d] decode not responding, trying to reset the VCPU!!!\n", i);
- WREG32_P(SOC15_REG_OFFSET(VCN, i, regUVD_VCPU_CNTL),
- UVD_VCPU_CNTL__BLK_RST_MASK,
- ~UVD_VCPU_CNTL__BLK_RST_MASK);
+ dev_err(adev->dev, "VCN[%d] is not responding, trying to reset the VCPU!!!\n", i);
+ WREG32_P(SOC15_REG_OFFSET(VCN, i, regUVD_VCPU_CNTL),
+ UVD_VCPU_CNTL__BLK_RST_MASK,
+ ~UVD_VCPU_CNTL__BLK_RST_MASK);
mdelay(10);
WREG32_P(SOC15_REG_OFFSET(VCN, i, regUVD_VCPU_CNTL), 0,
- ~UVD_VCPU_CNTL__BLK_RST_MASK);
+ ~UVD_VCPU_CNTL__BLK_RST_MASK);
mdelay(10);
r = -1;
@@ -1180,78 +1065,43 @@ static int vcn_v4_0_start(struct amdgpu_device *adev)
}
if (r) {
- dev_err(adev->dev, "VCN[%d] decode not responding, giving up!!!\n", i);
+ dev_err(adev->dev, "VCN[%d] is not responding, giving up!!!\n", i);
return r;
}
/* enable master interrupt */
WREG32_P(SOC15_REG_OFFSET(VCN, i, regUVD_MASTINT_EN),
- UVD_MASTINT_EN__VCPU_EN_MASK,
- ~UVD_MASTINT_EN__VCPU_EN_MASK);
+ UVD_MASTINT_EN__VCPU_EN_MASK,
+ ~UVD_MASTINT_EN__VCPU_EN_MASK);
/* clear the busy bit of VCN_STATUS */
WREG32_P(SOC15_REG_OFFSET(VCN, i, regUVD_STATUS), 0,
- ~(2 << UVD_STATUS__VCPU_REPORT__SHIFT));
+ ~(2 << UVD_STATUS__VCPU_REPORT__SHIFT));
- fw_shared = adev->vcn.inst[i].fw_shared.cpu_addr;
- if (unifiedQ_enabled) {
- ring = &adev->vcn.inst[i].ring_enc[0];
- fw_shared->sq.queue_mode |= FW_QUEUE_RING_RESET;
- } else {
- ring = &adev->vcn.inst[i].ring_dec;
-
- WREG32_SOC15(VCN, i, regVCN_RB4_DB_CTRL,
- ring->doorbell_index << VCN_RB4_DB_CTRL__OFFSET__SHIFT |
- VCN_RB4_DB_CTRL__EN_MASK);
-
- /* program the RB_BASE for ring buffer */
- WREG32_SOC15(VCN, i, regUVD_RB_BASE_LO4,
- lower_32_bits(ring->gpu_addr));
- WREG32_SOC15(VCN, i, regUVD_RB_BASE_HI4,
- upper_32_bits(ring->gpu_addr));
-
- WREG32_SOC15(VCN, i, regUVD_RB_SIZE4, ring->ring_size / sizeof(uint32_t));
-
- /* resetting ring, fw should not check RB ring */
- tmp = RREG32_SOC15(VCN, i, regVCN_RB_ENABLE);
- tmp &= ~(VCN_RB_ENABLE__RB4_EN_MASK);
- WREG32_SOC15(VCN, i, regVCN_RB_ENABLE, tmp);
+ ring = &adev->vcn.inst[i].ring_enc[0];
+ WREG32_SOC15(VCN, i, regVCN_RB1_DB_CTRL,
+ ring->doorbell_index << VCN_RB1_DB_CTRL__OFFSET__SHIFT |
+ VCN_RB1_DB_CTRL__EN_MASK);
- /* Initialize the ring buffer's read and write pointers */
- tmp = RREG32_SOC15(VCN, i, regUVD_RB_RPTR4);
- WREG32_SOC15(VCN, i, regUVD_RB_WPTR4, tmp);
- ring->wptr = RREG32_SOC15(VCN, i, regUVD_RB_WPTR4);
+ WREG32_SOC15(VCN, i, regUVD_RB_BASE_LO, ring->gpu_addr);
+ WREG32_SOC15(VCN, i, regUVD_RB_BASE_HI, upper_32_bits(ring->gpu_addr));
+ WREG32_SOC15(VCN, i, regUVD_RB_SIZE, ring->ring_size / 4);
- tmp = RREG32_SOC15(VCN, i, regVCN_RB_ENABLE);
- tmp |= VCN_RB_ENABLE__RB4_EN_MASK;
- WREG32_SOC15(VCN, i, regVCN_RB_ENABLE, tmp);
+ tmp = RREG32_SOC15(VCN, i, regVCN_RB_ENABLE);
+ tmp &= ~(VCN_RB_ENABLE__RB1_EN_MASK);
+ WREG32_SOC15(VCN, i, regVCN_RB_ENABLE, tmp);
+ fw_shared->sq.queue_mode |= FW_QUEUE_RING_RESET;
+ WREG32_SOC15(VCN, i, regUVD_RB_RPTR, 0);
+ WREG32_SOC15(VCN, i, regUVD_RB_WPTR, 0);
- ring->wptr = RREG32_SOC15(VCN, i, regUVD_RB_RPTR4);
- }
- ring = &adev->vcn.inst[i].ring_enc[0];
- WREG32_SOC15(VCN, i, regVCN_RB1_DB_CTRL,
- ring->doorbell_index << VCN_RB1_DB_CTRL__OFFSET__SHIFT |
- VCN_RB1_DB_CTRL__EN_MASK);
tmp = RREG32_SOC15(VCN, i, regUVD_RB_RPTR);
WREG32_SOC15(VCN, i, regUVD_RB_WPTR, tmp);
ring->wptr = RREG32_SOC15(VCN, i, regUVD_RB_WPTR);
- WREG32_SOC15(VCN, i, regUVD_RB_BASE_LO, ring->gpu_addr);
- WREG32_SOC15(VCN, i, regUVD_RB_BASE_HI, upper_32_bits(ring->gpu_addr));
- WREG32_SOC15(VCN, i, regUVD_RB_SIZE, ring->ring_size / 4);
- if (unifiedQ_enabled)
- fw_shared->sq.queue_mode &= ~(FW_QUEUE_RING_RESET | FW_QUEUE_DPG_HOLD_OFF);
- else {
- ring = &adev->vcn.inst[i].ring_enc[1];
- WREG32_SOC15(VCN, i, regVCN_RB2_DB_CTRL,
- ring->doorbell_index << VCN_RB2_DB_CTRL__OFFSET__SHIFT |
- VCN_RB2_DB_CTRL__EN_MASK);
- tmp = RREG32_SOC15(VCN, i, regUVD_RB_RPTR2);
- WREG32_SOC15(VCN, i, regUVD_RB_WPTR2, tmp);
- ring->wptr = RREG32_SOC15(VCN, i, regUVD_RB_WPTR2);
- WREG32_SOC15(VCN, i, regUVD_RB_BASE_LO2, ring->gpu_addr);
- WREG32_SOC15(VCN, i, regUVD_RB_BASE_HI2, upper_32_bits(ring->gpu_addr));
- WREG32_SOC15(VCN, i, regUVD_RB_SIZE2, ring->ring_size / 4);
- }
+
+ tmp = RREG32_SOC15(VCN, i, regVCN_RB_ENABLE);
+ tmp |= VCN_RB_ENABLE__RB1_EN_MASK;
+ WREG32_SOC15(VCN, i, regVCN_RB_ENABLE, tmp);
+ fw_shared->sq.queue_mode &= ~(FW_QUEUE_RING_RESET | FW_QUEUE_DPG_HOLD_OFF);
}
return 0;
@@ -1265,7 +1115,7 @@ static int vcn_v4_0_start(struct amdgpu_device *adev)
*
* Stop VCN block with dpg mode
*/
-static int vcn_v4_0_stop_dpg_mode(struct amdgpu_device *adev, int inst_idx)
+static void vcn_v4_0_stop_dpg_mode(struct amdgpu_device *adev, int inst_idx)
{
uint32_t tmp;
@@ -1277,19 +1127,12 @@ static int vcn_v4_0_stop_dpg_mode(struct amdgpu_device *adev, int inst_idx)
tmp = RREG32_SOC15(VCN, inst_idx, regUVD_RB_WPTR);
SOC15_WAIT_ON_RREG(VCN, inst_idx, regUVD_RB_RPTR, tmp, 0xFFFFFFFF);
- tmp = RREG32_SOC15(VCN, inst_idx, regUVD_RB_WPTR2);
- SOC15_WAIT_ON_RREG(VCN, inst_idx, regUVD_RB_RPTR2, tmp, 0xFFFFFFFF);
-
- tmp = RREG32_SOC15(VCN, inst_idx, regUVD_RB_WPTR4);
- SOC15_WAIT_ON_RREG(VCN, inst_idx, regUVD_RB_RPTR4, tmp, 0xFFFFFFFF);
-
SOC15_WAIT_ON_RREG(VCN, inst_idx, regUVD_POWER_STATUS, 1,
UVD_POWER_STATUS__UVD_POWER_STATUS_MASK);
/* disable dynamic power gating mode */
WREG32_P(SOC15_REG_OFFSET(VCN, inst_idx, regUVD_POWER_STATUS), 0,
~UVD_POWER_STATUS__UVD_PG_MODE_MASK);
- return 0;
}
/**
@@ -1301,12 +1144,16 @@ static int vcn_v4_0_stop_dpg_mode(struct amdgpu_device *adev, int inst_idx)
*/
static int vcn_v4_0_stop(struct amdgpu_device *adev)
{
+ volatile struct amdgpu_vcn4_fw_shared *fw_shared;
uint32_t tmp;
int i, r = 0;
for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
+ fw_shared = adev->vcn.inst[i].fw_shared.cpu_addr;
+ fw_shared->sq.queue_mode |= FW_QUEUE_DPG_HOLD_OFF;
+
if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) {
- r = vcn_v4_0_stop_dpg_mode(adev, i);
+ vcn_v4_0_stop_dpg_mode(adev, i);
continue;
}
@@ -1414,8 +1261,6 @@ static int vcn_v4_0_pause_dpg_mode(struct amdgpu_device *adev, int inst_idx,
/* unpause dpg, no need to wait */
reg_data &= ~UVD_DPG_PAUSE__NJ_PAUSE_DPG_REQ_MASK;
WREG32_SOC15(VCN, inst_idx, regUVD_DPG_PAUSE, reg_data);
- SOC15_WAIT_ON_RREG(VCN, inst_idx, regUVD_POWER_STATUS, 0x1,
- UVD_POWER_STATUS__UVD_POWER_STATUS_MASK);
}
adev->vcn.inst[inst_idx].pause_state.fw_based = new_state->fw_based;
}
@@ -1424,165 +1269,199 @@ static int vcn_v4_0_pause_dpg_mode(struct amdgpu_device *adev, int inst_idx,
}
/**
- * vcn_v4_0_dec_ring_get_rptr - get read pointer
+ * vcn_v4_0_unified_ring_get_rptr - get unified read pointer
*
* @ring: amdgpu_ring pointer
*
- * Returns the current hardware read pointer
+ * Returns the current hardware unified read pointer
*/
-static uint64_t vcn_v4_0_dec_ring_get_rptr(struct amdgpu_ring *ring)
+static uint64_t vcn_v4_0_unified_ring_get_rptr(struct amdgpu_ring *ring)
{
struct amdgpu_device *adev = ring->adev;
- return RREG32_SOC15(VCN, ring->me, regUVD_RB_RPTR4);
+ if (ring != &adev->vcn.inst[ring->me].ring_enc[0])
+ DRM_ERROR("wrong ring id is identified in %s", __func__);
+
+ return RREG32_SOC15(VCN, ring->me, regUVD_RB_RPTR);
}
/**
- * vcn_v4_0_dec_ring_get_wptr - get write pointer
+ * vcn_v4_0_unified_ring_get_wptr - get unified write pointer
*
* @ring: amdgpu_ring pointer
*
- * Returns the current hardware write pointer
+ * Returns the current hardware unified write pointer
*/
-static uint64_t vcn_v4_0_dec_ring_get_wptr(struct amdgpu_ring *ring)
+static uint64_t vcn_v4_0_unified_ring_get_wptr(struct amdgpu_ring *ring)
{
struct amdgpu_device *adev = ring->adev;
+ if (ring != &adev->vcn.inst[ring->me].ring_enc[0])
+ DRM_ERROR("wrong ring id is identified in %s", __func__);
+
if (ring->use_doorbell)
return *ring->wptr_cpu_addr;
else
- return RREG32_SOC15(VCN, ring->me, regUVD_RB_WPTR4);
+ return RREG32_SOC15(VCN, ring->me, regUVD_RB_WPTR);
}
/**
- * vcn_v4_0_dec_ring_set_wptr - set write pointer
+ * vcn_v4_0_unified_ring_set_wptr - set enc write pointer
*
* @ring: amdgpu_ring pointer
*
- * Commits the write pointer to the hardware
+ * Commits the enc write pointer to the hardware
*/
-static void vcn_v4_0_dec_ring_set_wptr(struct amdgpu_ring *ring)
+static void vcn_v4_0_unified_ring_set_wptr(struct amdgpu_ring *ring)
{
struct amdgpu_device *adev = ring->adev;
- if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) {
- WREG32_SOC15(VCN, ring->me, regUVD_SCRATCH2,
- lower_32_bits(ring->wptr));
- }
+ if (ring != &adev->vcn.inst[ring->me].ring_enc[0])
+ DRM_ERROR("wrong ring id is identified in %s", __func__);
if (ring->use_doorbell) {
*ring->wptr_cpu_addr = lower_32_bits(ring->wptr);
WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr));
} else {
- WREG32_SOC15(VCN, ring->me, regUVD_RB_WPTR4, lower_32_bits(ring->wptr));
+ WREG32_SOC15(VCN, ring->me, regUVD_RB_WPTR, lower_32_bits(ring->wptr));
}
}
-static const struct amdgpu_ring_funcs vcn_v4_0_dec_sw_ring_vm_funcs = {
- .type = AMDGPU_RING_TYPE_VCN_DEC,
- .align_mask = 0x3f,
- .nop = VCN_DEC_SW_CMD_NO_OP,
- .vmhub = AMDGPU_MMHUB_0,
- .get_rptr = vcn_v4_0_dec_ring_get_rptr,
- .get_wptr = vcn_v4_0_dec_ring_get_wptr,
- .set_wptr = vcn_v4_0_dec_ring_set_wptr,
- .emit_frame_size =
- SOC15_FLUSH_GPU_TLB_NUM_WREG * 3 +
- SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 4 +
- VCN_SW_RING_EMIT_FRAME_SIZE,
- .emit_ib_size = 5, /* vcn_dec_sw_ring_emit_ib */
- .emit_ib = vcn_dec_sw_ring_emit_ib,
- .emit_fence = vcn_dec_sw_ring_emit_fence,
- .emit_vm_flush = vcn_dec_sw_ring_emit_vm_flush,
- .test_ring = amdgpu_vcn_dec_sw_ring_test_ring,
- .test_ib = amdgpu_vcn_dec_sw_ring_test_ib,
- .insert_nop = amdgpu_ring_insert_nop,
- .insert_end = vcn_dec_sw_ring_insert_end,
- .pad_ib = amdgpu_ring_generic_pad_ib,
- .begin_use = amdgpu_vcn_ring_begin_use,
- .end_use = amdgpu_vcn_ring_end_use,
- .emit_wreg = vcn_dec_sw_ring_emit_wreg,
- .emit_reg_wait = vcn_dec_sw_ring_emit_reg_wait,
- .emit_reg_write_reg_wait = amdgpu_ring_emit_reg_write_reg_wait_helper,
-};
-
-/**
- * vcn_v4_0_enc_ring_get_rptr - get enc read pointer
- *
- * @ring: amdgpu_ring pointer
- *
- * Returns the current hardware enc read pointer
- */
-static uint64_t vcn_v4_0_enc_ring_get_rptr(struct amdgpu_ring *ring)
+static int vcn_v4_0_limit_sched(struct amdgpu_cs_parser *p)
{
- struct amdgpu_device *adev = ring->adev;
+ struct drm_gpu_scheduler **scheds;
- if (ring == &adev->vcn.inst[ring->me].ring_enc[0])
- return RREG32_SOC15(VCN, ring->me, regUVD_RB_RPTR);
- else
- return RREG32_SOC15(VCN, ring->me, regUVD_RB_RPTR2);
+ /* The create msg must be in the first IB submitted */
+ if (atomic_read(&p->entity->fence_seq))
+ return -EINVAL;
+
+ scheds = p->adev->gpu_sched[AMDGPU_HW_IP_VCN_ENC]
+ [AMDGPU_RING_PRIO_0].sched;
+ drm_sched_entity_modify_sched(p->entity, scheds, 1);
+ return 0;
}
-/**
- * vcn_v4_0_enc_ring_get_wptr - get enc write pointer
- *
- * @ring: amdgpu_ring pointer
- *
- * Returns the current hardware enc write pointer
- */
-static uint64_t vcn_v4_0_enc_ring_get_wptr(struct amdgpu_ring *ring)
+static int vcn_v4_0_dec_msg(struct amdgpu_cs_parser *p, uint64_t addr)
{
- struct amdgpu_device *adev = ring->adev;
+ struct ttm_operation_ctx ctx = { false, false };
+ struct amdgpu_bo_va_mapping *map;
+ uint32_t *msg, num_buffers;
+ struct amdgpu_bo *bo;
+ uint64_t start, end;
+ unsigned int i;
+ void *ptr;
+ int r;
- if (ring == &adev->vcn.inst[ring->me].ring_enc[0]) {
- if (ring->use_doorbell)
- return *ring->wptr_cpu_addr;
- else
- return RREG32_SOC15(VCN, ring->me, regUVD_RB_WPTR);
- } else {
- if (ring->use_doorbell)
- return *ring->wptr_cpu_addr;
- else
- return RREG32_SOC15(VCN, ring->me, regUVD_RB_WPTR2);
+ addr &= AMDGPU_GMC_HOLE_MASK;
+ r = amdgpu_cs_find_mapping(p, addr, &bo, &map);
+ if (r) {
+ DRM_ERROR("Can't find BO for addr 0x%08llx\n", addr);
+ return r;
}
+
+ start = map->start * AMDGPU_GPU_PAGE_SIZE;
+ end = (map->last + 1) * AMDGPU_GPU_PAGE_SIZE;
+ if (addr & 0x7) {
+ DRM_ERROR("VCN messages must be 8 byte aligned!\n");
+ return -EINVAL;
+ }
+
+ bo->flags |= AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED;
+ amdgpu_bo_placement_from_domain(bo, bo->allowed_domains);
+ r = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
+ if (r) {
+ DRM_ERROR("Failed validating the VCN message BO (%d)!\n", r);
+ return r;
+ }
+
+ r = amdgpu_bo_kmap(bo, &ptr);
+ if (r) {
+ DRM_ERROR("Failed mapping the VCN message (%d)!\n", r);
+ return r;
+ }
+
+ msg = ptr + addr - start;
+
+ /* Check length */
+ if (msg[1] > end - addr) {
+ r = -EINVAL;
+ goto out;
+ }
+
+ if (msg[3] != RDECODE_MSG_CREATE)
+ goto out;
+
+ num_buffers = msg[2];
+ for (i = 0, msg = &msg[6]; i < num_buffers; ++i, msg += 4) {
+ uint32_t offset, size, *create;
+
+ if (msg[0] != RDECODE_MESSAGE_CREATE)
+ continue;
+
+ offset = msg[1];
+ size = msg[2];
+
+ if (offset + size > end) {
+ r = -EINVAL;
+ goto out;
+ }
+
+ create = ptr + addr + offset - start;
+
+ /* H246, HEVC and VP9 can run on any instance */
+ if (create[0] == 0x7 || create[0] == 0x10 || create[0] == 0x11)
+ continue;
+
+ r = vcn_v4_0_limit_sched(p);
+ if (r)
+ goto out;
+ }
+
+out:
+ amdgpu_bo_kunmap(bo);
+ return r;
}
-/**
- * vcn_v4_0_enc_ring_set_wptr - set enc write pointer
- *
- * @ring: amdgpu_ring pointer
- *
- * Commits the enc write pointer to the hardware
- */
-static void vcn_v4_0_enc_ring_set_wptr(struct amdgpu_ring *ring)
+#define RADEON_VCN_ENGINE_TYPE_DECODE (0x00000003)
+
+static int vcn_v4_0_ring_patch_cs_in_place(struct amdgpu_cs_parser *p,
+ struct amdgpu_job *job,
+ struct amdgpu_ib *ib)
{
- struct amdgpu_device *adev = ring->adev;
+ struct amdgpu_ring *ring = to_amdgpu_ring(p->entity->rq->sched);
+ struct amdgpu_vcn_decode_buffer *decode_buffer = NULL;
+ uint32_t val;
+ int r = 0;
- if (ring == &adev->vcn.inst[ring->me].ring_enc[0]) {
- if (ring->use_doorbell) {
- *ring->wptr_cpu_addr = lower_32_bits(ring->wptr);
- WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr));
- } else {
- WREG32_SOC15(VCN, ring->me, regUVD_RB_WPTR, lower_32_bits(ring->wptr));
- }
- } else {
- if (ring->use_doorbell) {
- *ring->wptr_cpu_addr = lower_32_bits(ring->wptr);
- WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr));
- } else {
- WREG32_SOC15(VCN, ring->me, regUVD_RB_WPTR2, lower_32_bits(ring->wptr));
- }
+ /* The first instance can decode anything */
+ if (!ring->me)
+ return r;
+
+ /* unified queue ib header has 8 double words. */
+ if (ib->length_dw < 8)
+ return r;
+
+ val = amdgpu_ib_get_value(ib, 6); //RADEON_VCN_ENGINE_TYPE
+
+ if (val == RADEON_VCN_ENGINE_TYPE_DECODE) {
+ decode_buffer = (struct amdgpu_vcn_decode_buffer *)&ib->ptr[10];
+
+ if (decode_buffer->valid_buf_flag & 0x1)
+ r = vcn_v4_0_dec_msg(p, ((u64)decode_buffer->msg_buffer_address_hi) << 32 |
+ decode_buffer->msg_buffer_address_lo);
}
+ return r;
}
-static const struct amdgpu_ring_funcs vcn_v4_0_enc_ring_vm_funcs = {
+static const struct amdgpu_ring_funcs vcn_v4_0_unified_ring_vm_funcs = {
.type = AMDGPU_RING_TYPE_VCN_ENC,
.align_mask = 0x3f,
.nop = VCN_ENC_CMD_NO_OP,
.vmhub = AMDGPU_MMHUB_0,
- .get_rptr = vcn_v4_0_enc_ring_get_rptr,
- .get_wptr = vcn_v4_0_enc_ring_get_wptr,
- .set_wptr = vcn_v4_0_enc_ring_set_wptr,
+ .get_rptr = vcn_v4_0_unified_ring_get_rptr,
+ .get_wptr = vcn_v4_0_unified_ring_get_wptr,
+ .set_wptr = vcn_v4_0_unified_ring_set_wptr,
+ .patch_cs_in_place = vcn_v4_0_ring_patch_cs_in_place,
.emit_frame_size =
SOC15_FLUSH_GPU_TLB_NUM_WREG * 3 +
SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 4 +
@@ -1594,7 +1473,7 @@ static const struct amdgpu_ring_funcs vcn_v4_0_enc_ring_vm_funcs = {
.emit_fence = vcn_v2_0_enc_ring_emit_fence,
.emit_vm_flush = vcn_v2_0_enc_ring_emit_vm_flush,
.test_ring = amdgpu_vcn_enc_ring_test_ring,
- .test_ib = amdgpu_vcn_enc_ring_test_ib,
+ .test_ib = amdgpu_vcn_unified_ring_test_ib,
.insert_nop = amdgpu_ring_insert_nop,
.insert_end = vcn_v2_0_enc_ring_insert_end,
.pad_ib = amdgpu_ring_generic_pad_ib,
@@ -1606,13 +1485,13 @@ static const struct amdgpu_ring_funcs vcn_v4_0_enc_ring_vm_funcs = {
};
/**
- * vcn_v4_0_set_dec_ring_funcs - set dec ring functions
+ * vcn_v4_0_set_unified_ring_funcs - set unified ring functions
*
* @adev: amdgpu_device pointer
*
- * Set decode ring functions
+ * Set unified ring functions
*/
-static void vcn_v4_0_set_dec_ring_funcs(struct amdgpu_device *adev)
+static void vcn_v4_0_set_unified_ring_funcs(struct amdgpu_device *adev)
{
int i;
@@ -1620,32 +1499,10 @@ static void vcn_v4_0_set_dec_ring_funcs(struct amdgpu_device *adev)
if (adev->vcn.harvest_config & (1 << i))
continue;
- adev->vcn.inst[i].ring_dec.funcs = &vcn_v4_0_dec_sw_ring_vm_funcs;
- adev->vcn.inst[i].ring_dec.me = i;
- DRM_INFO("VCN(%d) decode software ring is enabled in VM mode\n", i);
- }
-}
+ adev->vcn.inst[i].ring_enc[0].funcs = &vcn_v4_0_unified_ring_vm_funcs;
+ adev->vcn.inst[i].ring_enc[0].me = i;
-/**
- * vcn_v4_0_set_enc_ring_funcs - set enc ring functions
- *
- * @adev: amdgpu_device pointer
- *
- * Set encode ring functions
- */
-static void vcn_v4_0_set_enc_ring_funcs(struct amdgpu_device *adev)
-{
- int i, j;
-
- for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
- if (adev->vcn.harvest_config & (1 << i))
- continue;
-
- for (j = 0; j < adev->vcn.num_enc_rings; ++j) {
- adev->vcn.inst[i].ring_enc[j].funcs = &vcn_v4_0_enc_ring_vm_funcs;
- adev->vcn.inst[i].ring_enc[j].me = i;
- }
- DRM_INFO("VCN(%d) encode is enabled in VM mode\n", i);
+ DRM_INFO("VCN(%d) encode/decode are enabled in VM mode\n", i);
}
}
@@ -1798,18 +1655,9 @@ static int vcn_v4_0_process_interrupt(struct amdgpu_device *adev, struct amdgpu_
DRM_DEBUG("IH: VCN TRAP\n");
switch (entry->src_id) {
- case VCN_4_0__SRCID__UVD_TRAP:
- if (!unifiedQ_enabled) {
- amdgpu_fence_process(&adev->vcn.inst[ip_instance].ring_dec);
- break;
- }
- break;
case VCN_4_0__SRCID__UVD_ENC_GENERAL_PURPOSE:
amdgpu_fence_process(&adev->vcn.inst[ip_instance].ring_enc[0]);
break;
- case VCN_4_0__SRCID__UVD_ENC_LOW_LATENCY:
- amdgpu_fence_process(&adev->vcn.inst[ip_instance].ring_enc[1]);
- break;
default:
DRM_ERROR("Unhandled interrupt: %d %d\n",
entry->src_id, entry->src_data[0]);
diff --git a/drivers/gpu/drm/amd/amdgpu/vega10_ih.c b/drivers/gpu/drm/amd/amdgpu/vega10_ih.c
index cdd599a08125..03b7066471f9 100644
--- a/drivers/gpu/drm/amd/amdgpu/vega10_ih.c
+++ b/drivers/gpu/drm/amd/amdgpu/vega10_ih.c
@@ -334,9 +334,11 @@ static u32 vega10_ih_get_wptr(struct amdgpu_device *adev,
u32 wptr, tmp;
struct amdgpu_ih_regs *ih_regs;
- if (ih == &adev->irq.ih) {
+ if (ih == &adev->irq.ih || ih == &adev->irq.ih_soft) {
/* Only ring0 supports writeback. On other rings fall back
* to register-based code with overflow checking below.
+ * ih_soft ring doesn't have any backing hardware registers,
+ * update wptr and return.
*/
wptr = le32_to_cpu(*ih->wptr_cpu);
@@ -409,6 +411,9 @@ static void vega10_ih_set_rptr(struct amdgpu_device *adev,
{
struct amdgpu_ih_regs *ih_regs;
+ if (ih == &adev->irq.ih_soft)
+ return;
+
if (ih->use_doorbell) {
/* XXX check if swapping is necessary on BE */
*ih->rptr_cpu = ih->rptr;
diff --git a/drivers/gpu/drm/amd/amdgpu/vega20_ih.c b/drivers/gpu/drm/amd/amdgpu/vega20_ih.c
index 3b4eb8285943..2022ffbb8dba 100644
--- a/drivers/gpu/drm/amd/amdgpu/vega20_ih.c
+++ b/drivers/gpu/drm/amd/amdgpu/vega20_ih.c
@@ -385,9 +385,11 @@ static u32 vega20_ih_get_wptr(struct amdgpu_device *adev,
u32 wptr, tmp;
struct amdgpu_ih_regs *ih_regs;
- if (ih == &adev->irq.ih) {
+ if (ih == &adev->irq.ih || ih == &adev->irq.ih_soft) {
/* Only ring0 supports writeback. On other rings fall back
* to register-based code with overflow checking below.
+ * ih_soft ring doesn't have any backing hardware registers,
+ * update wptr and return.
*/
wptr = le32_to_cpu(*ih->wptr_cpu);
@@ -461,6 +463,9 @@ static void vega20_ih_set_rptr(struct amdgpu_device *adev,
{
struct amdgpu_ih_regs *ih_regs;
+ if (ih == &adev->irq.ih_soft)
+ return;
+
if (ih->use_doorbell) {
/* XXX check if swapping is necessary on BE */
*ih->rptr_cpu = ih->rptr;