aboutsummaryrefslogtreecommitdiff
path: root/drivers/gpu/drm/amd/amdgpu
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/gpu/drm/amd/amdgpu')
-rw-r--r--drivers/gpu/drm/amd/amdgpu/Kconfig1
-rw-r--r--drivers/gpu/drm/amd/amdgpu/Makefile18
-rw-r--r--drivers/gpu/drm/amd/amdgpu/ObjectID.h8
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu.h783
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_acp.c254
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c71
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c144
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h22
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_fence.c2
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c37
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c22
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c17
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c171
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.c1
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.c16
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_atpx_handler.c5
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_benchmark.c16
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c296
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.h84
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c22
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c86
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c570
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c281
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h88
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c12
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_device.c879
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_display.c11
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_display.h15
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_dpm.c7
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_dpm.h21
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c430
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_encoders.c3
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c17
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c17
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c41
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_gart.h5
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_gds.h7
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c36
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_gem.h92
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c74
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h351
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c215
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h88
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c21
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c7
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_ih.c239
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_ih.h54
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c247
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_irq.h25
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_job.c89
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_job.h74
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c330
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c242
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_object.c490
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_object.h36
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c484
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c128
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c34
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h65
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_queue_mgr.c307
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c128
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h42
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_sa.c6
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_sched.c21
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c44
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h97
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_test.c12
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h41
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_trace_points.c2
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c409
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h3
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c109
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h43
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c235
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.h16
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c71
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_vce.h1
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c354
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h22
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c9
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c1837
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h101
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c74
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c119
-rw-r--r--drivers/gpu/drm/amd/amdgpu/atombios_encoders.c1
-rw-r--r--drivers/gpu/drm/amd/amdgpu/ci_dpm.c44
-rw-r--r--drivers/gpu/drm/amd/amdgpu/cik.c27
-rw-r--r--drivers/gpu/drm/amd/amdgpu/cik_ih.c9
-rw-r--r--drivers/gpu/drm/amd/amdgpu/cik_sdma.c62
-rw-r--r--drivers/gpu/drm/amd/amdgpu/cz_ih.c9
-rw-r--r--drivers/gpu/drm/amd/amdgpu/dce_v10_0.c35
-rw-r--r--drivers/gpu/drm/amd/amdgpu/dce_v11_0.c35
-rw-r--r--drivers/gpu/drm/amd/amdgpu/dce_v6_0.c30
-rw-r--r--drivers/gpu/drm/amd/amdgpu/dce_v8_0.c30
-rw-r--r--drivers/gpu/drm/amd/amdgpu/dce_virtual.c23
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c66
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c129
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c898
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c678
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c32
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gfxhub_v1_1.c53
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gfxhub_v1_1.h29
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c51
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c88
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c87
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c186
-rw-r--r--drivers/gpu/drm/amd/amdgpu/iceland_ih.c9
-rw-r--r--drivers/gpu/drm/amd/amdgpu/kv_dpm.c70
-rw-r--r--drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c263
-rw-r--r--drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.h1
-rw-r--r--drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c4
-rw-r--r--drivers/gpu/drm/amd/amdgpu/mxgpu_vi.c7
-rw-r--r--drivers/gpu/drm/amd/amdgpu/nbio_v7_0.c18
-rw-r--r--drivers/gpu/drm/amd/amdgpu/nbio_v7_4.c248
-rw-r--r--drivers/gpu/drm/amd/amdgpu/nbio_v7_4.h31
-rw-r--r--drivers/gpu/drm/amd/amdgpu/psp_gfx_if.h3
-rw-r--r--drivers/gpu/drm/amd/amdgpu/psp_v10_0.c18
-rw-r--r--drivers/gpu/drm/amd/amdgpu/psp_v11_0.c595
-rw-r--r--drivers/gpu/drm/amd/amdgpu/psp_v11_0.h30
-rw-r--r--drivers/gpu/drm/amd/amdgpu/psp_v3_1.c2
-rw-r--r--drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c82
-rw-r--r--drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c99
-rw-r--r--drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c177
-rw-r--r--drivers/gpu/drm/amd/amdgpu/si.c13
-rw-r--r--drivers/gpu/drm/amd/amdgpu/si_dma.c47
-rw-r--r--drivers/gpu/drm/amd/amdgpu/si_dpm.c36
-rw-r--r--drivers/gpu/drm/amd/amdgpu/si_enums.h20
-rw-r--r--drivers/gpu/drm/amd/amdgpu/si_ih.c9
-rw-r--r--drivers/gpu/drm/amd/amdgpu/sid.h20
-rw-r--r--drivers/gpu/drm/amd/amdgpu/soc15.c130
-rw-r--r--drivers/gpu/drm/amd/amdgpu/soc15.h1
-rw-r--r--drivers/gpu/drm/amd/amdgpu/soc15_common.h22
-rw-r--r--drivers/gpu/drm/amd/amdgpu/soc15d.h23
-rw-r--r--drivers/gpu/drm/amd/amdgpu/tonga_ih.c9
-rw-r--r--drivers/gpu/drm/amd/amdgpu/uvd_v4_2.c6
-rw-r--r--drivers/gpu/drm/amd/amdgpu/uvd_v5_0.c7
-rw-r--r--drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c46
-rw-r--r--drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c154
-rw-r--r--drivers/gpu/drm/amd/amdgpu/vce_v2_0.c14
-rw-r--r--drivers/gpu/drm/amd/amdgpu/vce_v3_0.c25
-rw-r--r--drivers/gpu/drm/amd/amdgpu/vce_v4_0.c41
-rw-r--r--drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c1047
-rw-r--r--drivers/gpu/drm/amd/amdgpu/vega10_ih.c45
-rw-r--r--drivers/gpu/drm/amd/amdgpu/vega10_reg_init.c1
-rw-r--r--drivers/gpu/drm/amd/amdgpu/vega20_reg_init.c3
-rw-r--r--drivers/gpu/drm/amd/amdgpu/vi.c40
146 files changed, 11639 insertions, 6383 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/Kconfig b/drivers/gpu/drm/amd/amdgpu/Kconfig
index e8af1f5e8a79..9221e5489069 100644
--- a/drivers/gpu/drm/amd/amdgpu/Kconfig
+++ b/drivers/gpu/drm/amd/amdgpu/Kconfig
@@ -42,3 +42,4 @@ config DRM_AMDGPU_GART_DEBUGFS
source "drivers/gpu/drm/amd/acp/Kconfig"
source "drivers/gpu/drm/amd/display/Kconfig"
+source "drivers/gpu/drm/amd/amdkfd/Kconfig"
diff --git a/drivers/gpu/drm/amd/amdgpu/Makefile b/drivers/gpu/drm/amd/amdgpu/Makefile
index bfd332c95b61..138cb787d27e 100644
--- a/drivers/gpu/drm/amd/amdgpu/Makefile
+++ b/drivers/gpu/drm/amd/amdgpu/Makefile
@@ -35,7 +35,8 @@ ccflags-y := -I$(FULL_AMD_PATH)/include/asic_reg \
-I$(FULL_AMD_DISPLAY_PATH) \
-I$(FULL_AMD_DISPLAY_PATH)/include \
-I$(FULL_AMD_DISPLAY_PATH)/dc \
- -I$(FULL_AMD_DISPLAY_PATH)/amdgpu_dm
+ -I$(FULL_AMD_DISPLAY_PATH)/amdgpu_dm \
+ -I$(FULL_AMD_PATH)/amdkfd
amdgpu-y := amdgpu_drv.o
@@ -51,8 +52,8 @@ amdgpu-y += amdgpu_device.o amdgpu_kms.o \
amdgpu_prime.o amdgpu_vm.o amdgpu_ib.o amdgpu_pll.o \
amdgpu_ucode.o amdgpu_bo_list.o amdgpu_ctx.o amdgpu_sync.o \
amdgpu_gtt_mgr.o amdgpu_vram_mgr.o amdgpu_virt.o amdgpu_atomfirmware.o \
- amdgpu_queue_mgr.o amdgpu_vf_error.o amdgpu_sched.o amdgpu_debugfs.o \
- amdgpu_ids.o
+ amdgpu_vf_error.o amdgpu_sched.o amdgpu_debugfs.o amdgpu_ids.o \
+ amdgpu_gmc.o amdgpu_xgmi.o
# add asic specific block
amdgpu-$(CONFIG_DRM_AMDGPU_CIK)+= cik.o cik_ih.o kv_smc.o kv_dpm.o \
@@ -62,7 +63,7 @@ amdgpu-$(CONFIG_DRM_AMDGPU_SI)+= si.o gmc_v6_0.o gfx_v6_0.o si_ih.o si_dma.o dce
amdgpu-y += \
vi.o mxgpu_vi.o nbio_v6_1.o soc15.o emu_soc.o mxgpu_ai.o nbio_v7_0.o vega10_reg_init.o \
- vega20_reg_init.o
+ vega20_reg_init.o nbio_v7_4.o
# add DF block
amdgpu-y += \
@@ -73,7 +74,7 @@ amdgpu-y += \
amdgpu-y += \
gmc_v7_0.o \
gmc_v8_0.o \
- gfxhub_v1_0.o mmhub_v1_0.o gmc_v9_0.o
+ gfxhub_v1_0.o mmhub_v1_0.o gmc_v9_0.o gfxhub_v1_1.o
# add IH block
amdgpu-y += \
@@ -88,7 +89,8 @@ amdgpu-y += \
amdgpu-y += \
amdgpu_psp.o \
psp_v3_1.o \
- psp_v10_0.o
+ psp_v10_0.o \
+ psp_v11_0.o
# add SMC block
amdgpu-y += \
@@ -108,6 +110,7 @@ amdgpu-y += \
# add async DMA block
amdgpu-y += \
+ amdgpu_sdma.o \
sdma_v2_4.o \
sdma_v3_0.o \
sdma_v4_0.o
@@ -134,6 +137,9 @@ amdgpu-y += \
amdgpu-y += amdgpu_amdkfd.o
ifneq ($(CONFIG_HSA_AMD),)
+AMDKFD_PATH := ../amdkfd
+include $(FULL_AMD_PATH)/amdkfd/Makefile
+amdgpu-y += $(AMDKFD_FILES)
amdgpu-y += \
amdgpu_amdkfd_fence.o \
amdgpu_amdkfd_gpuvm.o \
diff --git a/drivers/gpu/drm/amd/amdgpu/ObjectID.h b/drivers/gpu/drm/amd/amdgpu/ObjectID.h
index 06192698bd96..5b393622f592 100644
--- a/drivers/gpu/drm/amd/amdgpu/ObjectID.h
+++ b/drivers/gpu/drm/amd/amdgpu/ObjectID.h
@@ -136,6 +136,7 @@
#define GENERIC_OBJECT_ID_PX2_NON_DRIVABLE 0x02
#define GENERIC_OBJECT_ID_MXM_OPM 0x03
#define GENERIC_OBJECT_ID_STEREO_PIN 0x04 //This object could show up from Misc Object table, it follows ATOM_OBJECT format, and contains one ATOM_OBJECT_GPIO_CNTL_RECORD for the stereo pin
+#define GENERIC_OBJECT_ID_BRACKET_LAYOUT 0x05
/****************************************************/
/* Graphics Object ENUM ID Definition */
@@ -714,6 +715,13 @@
GRAPH_OBJECT_ENUM_ID1 << ENUM_ID_SHIFT |\
GENERIC_OBJECT_ID_STEREO_PIN << OBJECT_ID_SHIFT)
+#define GENERICOBJECT_BRACKET_LAYOUT_ENUM_ID1 (GRAPH_OBJECT_TYPE_GENERIC << OBJECT_TYPE_SHIFT |\
+ GRAPH_OBJECT_ENUM_ID1 << ENUM_ID_SHIFT |\
+ GENERIC_OBJECT_ID_BRACKET_LAYOUT << OBJECT_ID_SHIFT)
+
+#define GENERICOBJECT_BRACKET_LAYOUT_ENUM_ID2 (GRAPH_OBJECT_TYPE_GENERIC << OBJECT_TYPE_SHIFT |\
+ GRAPH_OBJECT_ENUM_ID2 << ENUM_ID_SHIFT |\
+ GENERIC_OBJECT_ID_BRACKET_LAYOUT << OBJECT_ID_SHIFT)
/****************************************************/
/* Object Cap definition - Shared with BIOS */
/****************************************************/
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index 7dcbac8af9a7..d0102cfc8efb 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -28,6 +28,8 @@
#ifndef __AMDGPU_H__
#define __AMDGPU_H__
+#include "amdgpu_ctx.h"
+
#include <linux/atomic.h>
#include <linux/wait.h>
#include <linux/list.h>
@@ -69,10 +71,32 @@
#include "amdgpu_vcn.h"
#include "amdgpu_mn.h"
#include "amdgpu_gmc.h"
+#include "amdgpu_gfx.h"
+#include "amdgpu_sdma.h"
#include "amdgpu_dm.h"
#include "amdgpu_virt.h"
#include "amdgpu_gart.h"
#include "amdgpu_debugfs.h"
+#include "amdgpu_job.h"
+#include "amdgpu_bo_list.h"
+#include "amdgpu_gem.h"
+
+#define MAX_GPU_INSTANCE 16
+
+struct amdgpu_gpu_instance
+{
+ struct amdgpu_device *adev;
+ int mgpu_fan_enabled;
+};
+
+struct amdgpu_mgpu_info
+{
+ struct amdgpu_gpu_instance gpu_ins[MAX_GPU_INSTANCE];
+ struct mutex mutex;
+ uint32_t num_gpu;
+ uint32_t num_dgpu;
+ uint32_t num_apu;
+};
/*
* Modules parameters.
@@ -105,11 +129,8 @@ extern int amdgpu_vm_fault_stop;
extern int amdgpu_vm_debug;
extern int amdgpu_vm_update_mode;
extern int amdgpu_dc;
-extern int amdgpu_dc_log;
extern int amdgpu_sched_jobs;
extern int amdgpu_sched_hw_submission;
-extern int amdgpu_no_evict;
-extern int amdgpu_direct_gma_size;
extern uint amdgpu_pcie_gen_cap;
extern uint amdgpu_pcie_lane_cap;
extern uint amdgpu_cg_mask;
@@ -130,6 +151,7 @@ extern int amdgpu_compute_multipipe;
extern int amdgpu_gpu_recovery;
extern int amdgpu_emu_mode;
extern uint amdgpu_smu_memory_pool_size;
+extern struct amdgpu_mgpu_info mgpu_info;
#ifdef CONFIG_DRM_AMDGPU_SI
extern int amdgpu_si_support;
@@ -149,9 +171,6 @@ extern int amdgpu_cik_support;
#define AMDGPUFB_CONN_LIMIT 4
#define AMDGPU_BIOS_NUM_SCRATCH 16
-/* max number of IP instances */
-#define AMDGPU_MAX_SDMA_INSTANCES 2
-
/* hard reset data */
#define AMDGPU_ASIC_RESET_DATA 0x39d5e86b
@@ -172,13 +191,6 @@ extern int amdgpu_cik_support;
#define AMDGPU_RESET_VCE (1 << 13)
#define AMDGPU_RESET_VCE1 (1 << 14)
-/* GFX current status */
-#define AMDGPU_GFX_NORMAL_MODE 0x00000000L
-#define AMDGPU_GFX_SAFE_MODE 0x00000001L
-#define AMDGPU_GFX_PG_DISABLED_MODE 0x00000002L
-#define AMDGPU_GFX_CG_DISABLED_MODE 0x00000004L
-#define AMDGPU_GFX_LBPW_DISABLED_MODE 0x00000008L
-
/* max cursor sizes (in pixels) */
#define CIK_CURSOR_WIDTH 128
#define CIK_CURSOR_HEIGHT 128
@@ -206,13 +218,6 @@ enum amdgpu_cp_irq {
AMDGPU_CP_IRQ_LAST
};
-enum amdgpu_sdma_irq {
- AMDGPU_SDMA_IRQ_TRAP0 = 0,
- AMDGPU_SDMA_IRQ_TRAP1,
-
- AMDGPU_SDMA_IRQ_LAST
-};
-
enum amdgpu_thermal_irq {
AMDGPU_THERMAL_IRQ_LOW_TO_HIGH = 0,
AMDGPU_THERMAL_IRQ_HIGH_TO_LOW,
@@ -225,6 +230,10 @@ enum amdgpu_kiq_irq {
AMDGPU_CP_KIQ_IRQ_LAST
};
+#define MAX_KIQ_REG_WAIT 5000 /* in usecs, 5ms */
+#define MAX_KIQ_REG_BAILOUT_INTERVAL 5 /* in msecs, 5ms */
+#define MAX_KIQ_REG_TRY 20
+
int amdgpu_device_ip_set_clockgating_state(void *dev,
enum amd_ip_block_type block_type,
enum amd_clockgating_state state);
@@ -272,70 +281,6 @@ amdgpu_device_ip_get_ip_block(struct amdgpu_device *adev,
int amdgpu_device_ip_block_add(struct amdgpu_device *adev,
const struct amdgpu_ip_block_version *ip_block_version);
-/* provided by hw blocks that can move/clear data. e.g., gfx or sdma */
-struct amdgpu_buffer_funcs {
- /* maximum bytes in a single operation */
- uint32_t copy_max_bytes;
-
- /* number of dw to reserve per operation */
- unsigned copy_num_dw;
-
- /* used for buffer migration */
- void (*emit_copy_buffer)(struct amdgpu_ib *ib,
- /* src addr in bytes */
- uint64_t src_offset,
- /* dst addr in bytes */
- uint64_t dst_offset,
- /* number of byte to transfer */
- uint32_t byte_count);
-
- /* maximum bytes in a single operation */
- uint32_t fill_max_bytes;
-
- /* number of dw to reserve per operation */
- unsigned fill_num_dw;
-
- /* used for buffer clearing */
- void (*emit_fill_buffer)(struct amdgpu_ib *ib,
- /* value to write to memory */
- uint32_t src_data,
- /* dst addr in bytes */
- uint64_t dst_offset,
- /* number of byte to fill */
- uint32_t byte_count);
-};
-
-/* provided by hw blocks that can write ptes, e.g., sdma */
-struct amdgpu_vm_pte_funcs {
- /* number of dw to reserve per operation */
- unsigned copy_pte_num_dw;
-
- /* copy pte entries from GART */
- void (*copy_pte)(struct amdgpu_ib *ib,
- uint64_t pe, uint64_t src,
- unsigned count);
-
- /* write pte one entry at a time with addr mapping */
- void (*write_pte)(struct amdgpu_ib *ib, uint64_t pe,
- uint64_t value, unsigned count,
- uint32_t incr);
- /* for linear pte/pde updates without addr mapping */
- void (*set_pte_pde)(struct amdgpu_ib *ib,
- uint64_t pe,
- uint64_t addr, unsigned count,
- uint32_t incr, uint64_t flags);
-};
-
-/* provided by the ih block */
-struct amdgpu_ih_funcs {
- /* ring read/write ptr handling, called from interrupt context */
- u32 (*get_wptr)(struct amdgpu_device *adev);
- bool (*prescreen_iv)(struct amdgpu_device *adev);
- void (*decode_iv)(struct amdgpu_device *adev,
- struct amdgpu_iv_entry *entry);
- void (*set_rptr)(struct amdgpu_device *adev);
-};
-
/*
* BIOS.
*/
@@ -361,34 +306,6 @@ struct amdgpu_clock {
uint32_t max_pixel_clock;
};
-/*
- * GEM.
- */
-
-#define AMDGPU_GEM_DOMAIN_MAX 0x3
-#define gem_to_amdgpu_bo(gobj) container_of((gobj), struct amdgpu_bo, gem_base)
-
-void amdgpu_gem_object_free(struct drm_gem_object *obj);
-int amdgpu_gem_object_open(struct drm_gem_object *obj,
- struct drm_file *file_priv);
-void amdgpu_gem_object_close(struct drm_gem_object *obj,
- struct drm_file *file_priv);
-unsigned long amdgpu_gem_timeout(uint64_t timeout_ns);
-struct sg_table *amdgpu_gem_prime_get_sg_table(struct drm_gem_object *obj);
-struct drm_gem_object *
-amdgpu_gem_prime_import_sg_table(struct drm_device *dev,
- struct dma_buf_attachment *attach,
- struct sg_table *sg);
-struct dma_buf *amdgpu_gem_prime_export(struct drm_device *dev,
- struct drm_gem_object *gobj,
- int flags);
-struct drm_gem_object *amdgpu_gem_prime_import(struct drm_device *dev,
- struct dma_buf *dma_buf);
-struct reservation_object *amdgpu_gem_prime_res_obj(struct drm_gem_object *);
-void *amdgpu_gem_prime_vmap(struct drm_gem_object *obj);
-void amdgpu_gem_prime_vunmap(struct drm_gem_object *obj, void *vaddr);
-int amdgpu_gem_prime_mmap(struct drm_gem_object *obj, struct vm_area_struct *vma);
-
/* sub-allocation manager, it has to be protected by another lock.
* By conception this is an helper for other part of the driver
* like the indirect buffer or semaphore, which both have their
@@ -438,22 +355,6 @@ struct amdgpu_sa_bo {
struct dma_fence *fence;
};
-/*
- * GEM objects.
- */
-void amdgpu_gem_force_release(struct amdgpu_device *adev);
-int amdgpu_gem_object_create(struct amdgpu_device *adev, unsigned long size,
- int alignment, u32 initial_domain,
- u64 flags, enum ttm_bo_type type,
- struct reservation_object *resv,
- struct drm_gem_object **obj);
-
-int amdgpu_mode_dumb_create(struct drm_file *file_priv,
- struct drm_device *dev,
- struct drm_mode_create_dumb *args);
-int amdgpu_mode_dumb_mmap(struct drm_file *filp,
- struct drm_device *dev,
- uint32_t handle, uint64_t *offset_p);
int amdgpu_fence_slab_init(void);
void amdgpu_fence_slab_fini(void);
@@ -526,16 +427,25 @@ typedef enum _AMDGPU_DOORBELL64_ASSIGNMENT
AMDGPU_DOORBELL64_GFX_RING0 = 0x8b,
/*
- * Other graphics doorbells can be allocated here: from 0x8c to 0xef
+ * Other graphics doorbells can be allocated here: from 0x8c to 0xdf
* Graphics voltage island aperture 1
- * default non-graphics QWORD index is 0xF0 - 0xFF inclusive
+ * default non-graphics QWORD index is 0xe0 - 0xFF inclusive
*/
- /* sDMA engines */
- AMDGPU_DOORBELL64_sDMA_ENGINE0 = 0xF0,
- AMDGPU_DOORBELL64_sDMA_HI_PRI_ENGINE0 = 0xF1,
- AMDGPU_DOORBELL64_sDMA_ENGINE1 = 0xF2,
- AMDGPU_DOORBELL64_sDMA_HI_PRI_ENGINE1 = 0xF3,
+ /* sDMA engines reserved from 0xe0 -oxef */
+ AMDGPU_DOORBELL64_sDMA_ENGINE0 = 0xE0,
+ AMDGPU_DOORBELL64_sDMA_HI_PRI_ENGINE0 = 0xE1,
+ AMDGPU_DOORBELL64_sDMA_ENGINE1 = 0xE8,
+ AMDGPU_DOORBELL64_sDMA_HI_PRI_ENGINE1 = 0xE9,
+
+ /* For vega10 sriov, the sdma doorbell must be fixed as follow
+ * to keep the same setting with host driver, or it will
+ * happen conflicts
+ */
+ AMDGPU_VEGA10_DOORBELL64_sDMA_ENGINE0 = 0xF0,
+ AMDGPU_VEGA10_DOORBELL64_sDMA_HI_PRI_ENGINE0 = 0xF1,
+ AMDGPU_VEGA10_DOORBELL64_sDMA_ENGINE1 = 0xF2,
+ AMDGPU_VEGA10_DOORBELL64_sDMA_HI_PRI_ENGINE1 = 0xF3,
/* Interrupt handler */
AMDGPU_DOORBELL64_IH = 0xF4, /* For legacy interrupt ring buffer */
@@ -600,95 +510,6 @@ struct amdgpu_ib {
extern const struct drm_sched_backend_ops amdgpu_sched_ops;
-int amdgpu_job_alloc(struct amdgpu_device *adev, unsigned num_ibs,
- struct amdgpu_job **job, struct amdgpu_vm *vm);
-int amdgpu_job_alloc_with_ib(struct amdgpu_device *adev, unsigned size,
- struct amdgpu_job **job);
-
-void amdgpu_job_free_resources(struct amdgpu_job *job);
-void amdgpu_job_free(struct amdgpu_job *job);
-int amdgpu_job_submit(struct amdgpu_job *job, struct amdgpu_ring *ring,
- struct drm_sched_entity *entity, void *owner,
- struct dma_fence **f);
-
-/*
- * Queue manager
- */
-struct amdgpu_queue_mapper {
- int hw_ip;
- struct mutex lock;
- /* protected by lock */
- struct amdgpu_ring *queue_map[AMDGPU_MAX_RINGS];
-};
-
-struct amdgpu_queue_mgr {
- struct amdgpu_queue_mapper mapper[AMDGPU_MAX_IP_NUM];
-};
-
-int amdgpu_queue_mgr_init(struct amdgpu_device *adev,
- struct amdgpu_queue_mgr *mgr);
-int amdgpu_queue_mgr_fini(struct amdgpu_device *adev,
- struct amdgpu_queue_mgr *mgr);
-int amdgpu_queue_mgr_map(struct amdgpu_device *adev,
- struct amdgpu_queue_mgr *mgr,
- u32 hw_ip, u32 instance, u32 ring,
- struct amdgpu_ring **out_ring);
-
-/*
- * context related structures
- */
-
-struct amdgpu_ctx_ring {
- uint64_t sequence;
- struct dma_fence **fences;
- struct drm_sched_entity entity;
-};
-
-struct amdgpu_ctx {
- struct kref refcount;
- struct amdgpu_device *adev;
- struct amdgpu_queue_mgr queue_mgr;
- unsigned reset_counter;
- unsigned reset_counter_query;
- uint32_t vram_lost_counter;
- spinlock_t ring_lock;
- struct dma_fence **fences;
- struct amdgpu_ctx_ring rings[AMDGPU_MAX_RINGS];
- bool preamble_presented;
- enum drm_sched_priority init_priority;
- enum drm_sched_priority override_priority;
- struct mutex lock;
- atomic_t guilty;
-};
-
-struct amdgpu_ctx_mgr {
- struct amdgpu_device *adev;
- struct mutex lock;
- /* protected by lock */
- struct idr ctx_handles;
-};
-
-struct amdgpu_ctx *amdgpu_ctx_get(struct amdgpu_fpriv *fpriv, uint32_t id);
-int amdgpu_ctx_put(struct amdgpu_ctx *ctx);
-
-int amdgpu_ctx_add_fence(struct amdgpu_ctx *ctx, struct amdgpu_ring *ring,
- struct dma_fence *fence, uint64_t *seq);
-struct dma_fence *amdgpu_ctx_get_fence(struct amdgpu_ctx *ctx,
- struct amdgpu_ring *ring, uint64_t seq);
-void amdgpu_ctx_priority_override(struct amdgpu_ctx *ctx,
- enum drm_sched_priority priority);
-
-int amdgpu_ctx_ioctl(struct drm_device *dev, void *data,
- struct drm_file *filp);
-
-int amdgpu_ctx_wait_prev_fence(struct amdgpu_ctx *ctx, unsigned ring_id);
-
-void amdgpu_ctx_mgr_init(struct amdgpu_ctx_mgr *mgr);
-void amdgpu_ctx_mgr_entity_cleanup(struct amdgpu_ctx_mgr *mgr);
-void amdgpu_ctx_mgr_entity_fini(struct amdgpu_ctx_mgr *mgr);
-void amdgpu_ctx_mgr_fini(struct amdgpu_ctx_mgr *mgr);
-
-
/*
* file private structure
*/
@@ -702,293 +523,6 @@ struct amdgpu_fpriv {
struct amdgpu_ctx_mgr ctx_mgr;
};
-/*
- * residency list
- */
-struct amdgpu_bo_list_entry {
- struct amdgpu_bo *robj;
- struct ttm_validate_buffer tv;
- struct amdgpu_bo_va *bo_va;
- uint32_t priority;
- struct page **user_pages;
- int user_invalidated;
-};
-
-struct amdgpu_bo_list {
- struct mutex lock;
- struct rcu_head rhead;
- struct kref refcount;
- struct amdgpu_bo *gds_obj;
- struct amdgpu_bo *gws_obj;
- struct amdgpu_bo *oa_obj;
- unsigned first_userptr;
- unsigned num_entries;
- struct amdgpu_bo_list_entry *array;
-};
-
-struct amdgpu_bo_list *
-amdgpu_bo_list_get(struct amdgpu_fpriv *fpriv, int id);
-void amdgpu_bo_list_get_list(struct amdgpu_bo_list *list,
- struct list_head *validated);
-void amdgpu_bo_list_put(struct amdgpu_bo_list *list);
-void amdgpu_bo_list_free(struct amdgpu_bo_list *list);
-
-/*
- * GFX stuff
- */
-#include "clearstate_defs.h"
-
-struct amdgpu_rlc_funcs {
- void (*enter_safe_mode)(struct amdgpu_device *adev);
- void (*exit_safe_mode)(struct amdgpu_device *adev);
-};
-
-struct amdgpu_rlc {
- /* for power gating */
- struct amdgpu_bo *save_restore_obj;
- uint64_t save_restore_gpu_addr;
- volatile uint32_t *sr_ptr;
- const u32 *reg_list;
- u32 reg_list_size;
- /* for clear state */
- struct amdgpu_bo *clear_state_obj;
- uint64_t clear_state_gpu_addr;
- volatile uint32_t *cs_ptr;
- const struct cs_section_def *cs_data;
- u32 clear_state_size;
- /* for cp tables */
- struct amdgpu_bo *cp_table_obj;
- uint64_t cp_table_gpu_addr;
- volatile uint32_t *cp_table_ptr;
- u32 cp_table_size;
-
- /* safe mode for updating CG/PG state */
- bool in_safe_mode;
- const struct amdgpu_rlc_funcs *funcs;
-
- /* for firmware data */
- u32 save_and_restore_offset;
- u32 clear_state_descriptor_offset;
- u32 avail_scratch_ram_locations;
- u32 reg_restore_list_size;
- u32 reg_list_format_start;
- u32 reg_list_format_separate_start;
- u32 starting_offsets_start;
- u32 reg_list_format_size_bytes;
- u32 reg_list_size_bytes;
- u32 reg_list_format_direct_reg_list_length;
- u32 save_restore_list_cntl_size_bytes;
- u32 save_restore_list_gpm_size_bytes;
- u32 save_restore_list_srm_size_bytes;
-
- u32 *register_list_format;
- u32 *register_restore;
- u8 *save_restore_list_cntl;
- u8 *save_restore_list_gpm;
- u8 *save_restore_list_srm;
-
- bool is_rlc_v2_1;
-};
-
-#define AMDGPU_MAX_COMPUTE_QUEUES KGD_MAX_QUEUES
-
-struct amdgpu_mec {
- struct amdgpu_bo *hpd_eop_obj;
- u64 hpd_eop_gpu_addr;
- struct amdgpu_bo *mec_fw_obj;
- u64 mec_fw_gpu_addr;
- u32 num_mec;
- u32 num_pipe_per_mec;
- u32 num_queue_per_pipe;
- void *mqd_backup[AMDGPU_MAX_COMPUTE_RINGS + 1];
-
- /* These are the resources for which amdgpu takes ownership */
- DECLARE_BITMAP(queue_bitmap, AMDGPU_MAX_COMPUTE_QUEUES);
-};
-
-struct amdgpu_kiq {
- u64 eop_gpu_addr;
- struct amdgpu_bo *eop_obj;
- spinlock_t ring_lock;
- struct amdgpu_ring ring;
- struct amdgpu_irq_src irq;
-};
-
-/*
- * GPU scratch registers structures, functions & helpers
- */
-struct amdgpu_scratch {
- unsigned num_reg;
- uint32_t reg_base;
- uint32_t free_mask;
-};
-
-/*
- * GFX configurations
- */
-#define AMDGPU_GFX_MAX_SE 4
-#define AMDGPU_GFX_MAX_SH_PER_SE 2
-
-struct amdgpu_rb_config {
- uint32_t rb_backend_disable;
- uint32_t user_rb_backend_disable;
- uint32_t raster_config;
- uint32_t raster_config_1;
-};
-
-struct gb_addr_config {
- uint16_t pipe_interleave_size;
- uint8_t num_pipes;
- uint8_t max_compress_frags;
- uint8_t num_banks;
- uint8_t num_se;
- uint8_t num_rb_per_se;
-};
-
-struct amdgpu_gfx_config {
- unsigned max_shader_engines;
- unsigned max_tile_pipes;
- unsigned max_cu_per_sh;
- unsigned max_sh_per_se;
- unsigned max_backends_per_se;
- unsigned max_texture_channel_caches;
- unsigned max_gprs;
- unsigned max_gs_threads;
- unsigned max_hw_contexts;
- unsigned sc_prim_fifo_size_frontend;
- unsigned sc_prim_fifo_size_backend;
- unsigned sc_hiz_tile_fifo_size;
- unsigned sc_earlyz_tile_fifo_size;
-
- unsigned num_tile_pipes;
- unsigned backend_enable_mask;
- unsigned mem_max_burst_length_bytes;
- unsigned mem_row_size_in_kb;
- unsigned shader_engine_tile_size;
- unsigned num_gpus;
- unsigned multi_gpu_tile_size;
- unsigned mc_arb_ramcfg;
- unsigned gb_addr_config;
- unsigned num_rbs;
- unsigned gs_vgt_table_depth;
- unsigned gs_prim_buffer_depth;
-
- uint32_t tile_mode_array[32];
- uint32_t macrotile_mode_array[16];
-
- struct gb_addr_config gb_addr_config_fields;
- struct amdgpu_rb_config rb_config[AMDGPU_GFX_MAX_SE][AMDGPU_GFX_MAX_SH_PER_SE];
-
- /* gfx configure feature */
- uint32_t double_offchip_lds_buf;
- /* cached value of DB_DEBUG2 */
- uint32_t db_debug2;
-};
-
-struct amdgpu_cu_info {
- uint32_t simd_per_cu;
- uint32_t max_waves_per_simd;
- uint32_t wave_front_size;
- uint32_t max_scratch_slots_per_cu;
- uint32_t lds_size;
-
- /* total active CU number */
- uint32_t number;
- uint32_t ao_cu_mask;
- uint32_t ao_cu_bitmap[4][4];
- uint32_t bitmap[4][4];
-};
-
-struct amdgpu_gfx_funcs {
- /* get the gpu clock counter */
- uint64_t (*get_gpu_clock_counter)(struct amdgpu_device *adev);
- void (*select_se_sh)(struct amdgpu_device *adev, u32 se_num, u32 sh_num, u32 instance);
- void (*read_wave_data)(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t *dst, int *no_fields);
- void (*read_wave_vgprs)(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t thread, uint32_t start, uint32_t size, uint32_t *dst);
- void (*read_wave_sgprs)(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t start, uint32_t size, uint32_t *dst);
- void (*select_me_pipe_q)(struct amdgpu_device *adev, u32 me, u32 pipe, u32 queue);
-};
-
-struct amdgpu_ngg_buf {
- struct amdgpu_bo *bo;
- uint64_t gpu_addr;
- uint32_t size;
- uint32_t bo_size;
-};
-
-enum {
- NGG_PRIM = 0,
- NGG_POS,
- NGG_CNTL,
- NGG_PARAM,
- NGG_BUF_MAX
-};
-
-struct amdgpu_ngg {
- struct amdgpu_ngg_buf buf[NGG_BUF_MAX];
- uint32_t gds_reserve_addr;
- uint32_t gds_reserve_size;
- bool init;
-};
-
-struct amdgpu_gfx {
- struct mutex gpu_clock_mutex;
- struct amdgpu_gfx_config config;
- struct amdgpu_rlc rlc;
- struct amdgpu_mec mec;
- struct amdgpu_kiq kiq;
- struct amdgpu_scratch scratch;
- const struct firmware *me_fw; /* ME firmware */
- uint32_t me_fw_version;
- const struct firmware *pfp_fw; /* PFP firmware */
- uint32_t pfp_fw_version;
- const struct firmware *ce_fw; /* CE firmware */
- uint32_t ce_fw_version;
- const struct firmware *rlc_fw; /* RLC firmware */
- uint32_t rlc_fw_version;
- const struct firmware *mec_fw; /* MEC firmware */
- uint32_t mec_fw_version;
- const struct firmware *mec2_fw; /* MEC2 firmware */
- uint32_t mec2_fw_version;
- uint32_t me_feature_version;
- uint32_t ce_feature_version;
- uint32_t pfp_feature_version;
- uint32_t rlc_feature_version;
- uint32_t rlc_srlc_fw_version;
- uint32_t rlc_srlc_feature_version;
- uint32_t rlc_srlg_fw_version;
- uint32_t rlc_srlg_feature_version;
- uint32_t rlc_srls_fw_version;
- uint32_t rlc_srls_feature_version;
- uint32_t mec_feature_version;
- uint32_t mec2_feature_version;
- struct amdgpu_ring gfx_ring[AMDGPU_MAX_GFX_RINGS];
- unsigned num_gfx_rings;
- struct amdgpu_ring compute_ring[AMDGPU_MAX_COMPUTE_RINGS];
- unsigned num_compute_rings;
- struct amdgpu_irq_src eop_irq;
- struct amdgpu_irq_src priv_reg_irq;
- struct amdgpu_irq_src priv_inst_irq;
- /* gfx status */
- uint32_t gfx_current_status;
- /* ce ram size*/
- unsigned ce_ram_size;
- struct amdgpu_cu_info cu_info;
- const struct amdgpu_gfx_funcs *funcs;
-
- /* reset mask */
- uint32_t grbm_soft_reset;
- uint32_t srbm_soft_reset;
- /* s3/s4 mask */
- bool in_suspend;
- /* NGG */
- struct amdgpu_ngg ngg;
-
- /* pipe reservation */
- struct mutex pipe_reserve_mutex;
- DECLARE_BITMAP (pipe_reserve_bitmap, AMDGPU_MAX_COMPUTE_QUEUES);
-};
-
int amdgpu_ib_get(struct amdgpu_device *adev, struct amdgpu_vm *vm,
unsigned size, struct amdgpu_ib *ib);
void amdgpu_ib_free(struct amdgpu_device *adev, struct amdgpu_ib *ib,
@@ -1020,6 +554,7 @@ struct amdgpu_cs_parser {
/* scheduler job object */
struct amdgpu_job *job;
+ struct drm_sched_entity *entity;
/* buffer objects */
struct ww_acquire_ctx ticket;
@@ -1041,40 +576,6 @@ struct amdgpu_cs_parser {
struct drm_syncobj **post_dep_syncobjs;
};
-#define AMDGPU_PREAMBLE_IB_PRESENT (1 << 0) /* bit set means command submit involves a preamble IB */
-#define AMDGPU_PREAMBLE_IB_PRESENT_FIRST (1 << 1) /* bit set means preamble IB is first presented in belonging context */
-#define AMDGPU_HAVE_CTX_SWITCH (1 << 2) /* bit set means context switch occured */
-
-struct amdgpu_job {
- struct drm_sched_job base;
- struct amdgpu_device *adev;
- struct amdgpu_vm *vm;
- struct amdgpu_ring *ring;
- struct amdgpu_sync sync;
- struct amdgpu_sync sched_sync;
- struct amdgpu_ib *ibs;
- struct dma_fence *fence; /* the hw fence */
- uint32_t preamble_status;
- uint32_t num_ibs;
- void *owner;
- uint64_t fence_ctx; /* the fence_context this job uses */
- bool vm_needs_flush;
- uint64_t vm_pd_addr;
- unsigned vmid;
- unsigned pasid;
- uint32_t gds_base, gds_size;
- uint32_t gws_base, gws_size;
- uint32_t oa_base, oa_size;
- uint32_t vram_lost_counter;
-
- /* user fence handling */
- uint64_t uf_addr;
- uint64_t uf_sequence;
-
-};
-#define to_amdgpu_job(sched_job) \
- container_of((sched_job), struct amdgpu_job, base)
-
static inline u32 amdgpu_get_ib_value(struct amdgpu_cs_parser *p,
uint32_t ib_idx, int idx)
{
@@ -1105,58 +606,6 @@ int amdgpu_device_wb_get(struct amdgpu_device *adev, u32 *wb);
void amdgpu_device_wb_free(struct amdgpu_device *adev, u32 wb);
/*
- * SDMA
- */
-struct amdgpu_sdma_instance {
- /* SDMA firmware */
- const struct firmware *fw;
- uint32_t fw_version;
- uint32_t feature_version;
-
- struct amdgpu_ring ring;
- bool burst_nop;
-};
-
-struct amdgpu_sdma {
- struct amdgpu_sdma_instance instance[AMDGPU_MAX_SDMA_INSTANCES];
-#ifdef CONFIG_DRM_AMDGPU_SI
- //SI DMA has a difference trap irq number for the second engine
- struct amdgpu_irq_src trap_irq_1;
-#endif
- struct amdgpu_irq_src trap_irq;
- struct amdgpu_irq_src illegal_inst_irq;
- int num_instances;
- uint32_t srbm_soft_reset;
-};
-
-/*
- * Firmware
- */
-enum amdgpu_firmware_load_type {
- AMDGPU_FW_LOAD_DIRECT = 0,
- AMDGPU_FW_LOAD_SMU,
- AMDGPU_FW_LOAD_PSP,
-};
-
-struct amdgpu_firmware {
- struct amdgpu_firmware_info ucode[AMDGPU_UCODE_ID_MAXIMUM];
- enum amdgpu_firmware_load_type load_type;
- struct amdgpu_bo *fw_buf;
- unsigned int fw_size;
- unsigned int max_ucodes;
- /* firmwares are loaded by psp instead of smu from vega10 */
- const struct amdgpu_psp_funcs *funcs;
- struct amdgpu_bo *rbuf;
- struct mutex mutex;
-
- /* gpu info firmware data pointer */
- const struct firmware *gpu_info_fw;
-
- void *fw_buf_ptr;
- uint64_t fw_buf_mc;
-};
-
-/*
* Benchmarking
*/
void amdgpu_benchmark(struct amdgpu_device *adev, int test_number);
@@ -1167,31 +616,6 @@ void amdgpu_benchmark(struct amdgpu_device *adev, int test_number);
*/
void amdgpu_test_moves(struct amdgpu_device *adev);
-
-/*
- * amdgpu smumgr functions
- */
-struct amdgpu_smumgr_funcs {
- int (*check_fw_load_finish)(struct amdgpu_device *adev, uint32_t fwtype);
- int (*request_smu_load_fw)(struct amdgpu_device *adev);
- int (*request_smu_specific_fw)(struct amdgpu_device *adev, uint32_t fwtype);
-};
-
-/*
- * amdgpu smumgr
- */
-struct amdgpu_smumgr {
- struct amdgpu_bo *toc_buf;
- struct amdgpu_bo *smu_buf;
- /* asic priv smu data */
- void *priv;
- spinlock_t smu_lock;
- /* smumgr functions */
- const struct amdgpu_smumgr_funcs *smumgr_funcs;
- /* ucode loading complete flag */
- uint32_t fw_flags;
-};
-
/*
* ASIC specific register table accessible by UMD
*/
@@ -1233,23 +657,9 @@ struct amdgpu_asic_funcs {
/*
* IOCTL.
*/
-int amdgpu_gem_create_ioctl(struct drm_device *dev, void *data,
- struct drm_file *filp);
int amdgpu_bo_list_ioctl(struct drm_device *dev, void *data,
struct drm_file *filp);
-int amdgpu_gem_info_ioctl(struct drm_device *dev, void *data,
- struct drm_file *filp);
-int amdgpu_gem_userptr_ioctl(struct drm_device *dev, void *data,
- struct drm_file *filp);
-int amdgpu_gem_mmap_ioctl(struct drm_device *dev, void *data,
- struct drm_file *filp);
-int amdgpu_gem_wait_idle_ioctl(struct drm_device *dev, void *data,
- struct drm_file *filp);
-int amdgpu_gem_va_ioctl(struct drm_device *dev, void *data,
- struct drm_file *filp);
-int amdgpu_gem_op_ioctl(struct drm_device *dev, void *data,
- struct drm_file *filp);
int amdgpu_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp);
int amdgpu_cs_fence_to_handle_ioctl(struct drm_device *dev, void *data,
struct drm_file *filp);
@@ -1257,9 +667,6 @@ int amdgpu_cs_wait_ioctl(struct drm_device *dev, void *data, struct drm_file *fi
int amdgpu_cs_wait_fences_ioctl(struct drm_device *dev, void *data,
struct drm_file *filp);
-int amdgpu_gem_metadata_ioctl(struct drm_device *dev, void *data,
- struct drm_file *filp);
-
/* VRAM scratch page for HDP bug, default vram page */
struct amdgpu_vram_scratch {
struct amdgpu_bo *robj;
@@ -1389,6 +796,7 @@ enum amd_hw_ip_block_type {
PWR_HWIP,
NBIF_HWIP,
THM_HWIP,
+ CLK_HWIP,
MAX_HWIP
};
@@ -1543,9 +951,6 @@ struct amdgpu_device {
u32 cg_flags;
u32 pg_flags;
- /* amdgpu smumgr */
- struct amdgpu_smumgr smu;
-
/* gfx */
struct amdgpu_gfx gfx;
@@ -1579,9 +984,9 @@ struct amdgpu_device {
DECLARE_HASHTABLE(mn_hash, 7);
/* tracking pinned memory */
- u64 vram_pin_size;
- u64 invisible_pin_size;
- u64 gart_pin_size;
+ atomic64_t vram_pin_size;
+ atomic64_t visible_pin_size;
+ atomic64_t gart_pin_size;
/* amdkfd interface */
struct kfd_dev *kfd;
@@ -1610,6 +1015,9 @@ struct amdgpu_device {
bool has_hw_reset;
u8 reset_magic[AMDGPU_RESET_MAGIC_NUM];
+ /* s3/s4 mask */
+ bool in_suspend;
+
/* record last mm index being written through WREG32*/
unsigned long last_mm_index;
bool in_gpu_reset;
@@ -1732,22 +1140,6 @@ int emu_soc_asic_init(struct amdgpu_device *adev);
#define RBIOS16(i) (RBIOS8(i) | (RBIOS8((i)+1) << 8))
#define RBIOS32(i) ((RBIOS16(i)) | (RBIOS16((i)+2) << 16))
-static inline struct amdgpu_sdma_instance *
-amdgpu_get_sdma_instance(struct amdgpu_ring *ring)
-{
- struct amdgpu_device *adev = ring->adev;
- int i;
-
- for (i = 0; i < adev->sdma.num_instances; i++)
- if (&adev->sdma.instance[i].ring == ring)
- break;
-
- if (i < AMDGPU_MAX_SDMA_INSTANCES)
- return &adev->sdma.instance[i];
- else
- return NULL;
-}
-
/*
* ASICs macro.
*/
@@ -1766,75 +1158,16 @@ amdgpu_get_sdma_instance(struct amdgpu_ring *ring)
#define amdgpu_asic_flush_hdp(adev, r) (adev)->asic_funcs->flush_hdp((adev), (r))
#define amdgpu_asic_invalidate_hdp(adev, r) (adev)->asic_funcs->invalidate_hdp((adev), (r))
#define amdgpu_asic_need_full_reset(adev) (adev)->asic_funcs->need_full_reset((adev))
-#define amdgpu_gmc_flush_gpu_tlb(adev, vmid) (adev)->gmc.gmc_funcs->flush_gpu_tlb((adev), (vmid))
-#define amdgpu_gmc_emit_flush_gpu_tlb(r, vmid, addr) (r)->adev->gmc.gmc_funcs->emit_flush_gpu_tlb((r), (vmid), (addr))
-#define amdgpu_gmc_emit_pasid_mapping(r, vmid, pasid) (r)->adev->gmc.gmc_funcs->emit_pasid_mapping((r), (vmid), (pasid))
-#define amdgpu_gmc_set_pte_pde(adev, pt, idx, addr, flags) (adev)->gmc.gmc_funcs->set_pte_pde((adev), (pt), (idx), (addr), (flags))
-#define amdgpu_gmc_get_vm_pde(adev, level, dst, flags) (adev)->gmc.gmc_funcs->get_vm_pde((adev), (level), (dst), (flags))
-#define amdgpu_gmc_get_pte_flags(adev, flags) (adev)->gmc.gmc_funcs->get_vm_pte_flags((adev),(flags))
-#define amdgpu_vm_copy_pte(adev, ib, pe, src, count) ((adev)->vm_manager.vm_pte_funcs->copy_pte((ib), (pe), (src), (count)))
-#define amdgpu_vm_write_pte(adev, ib, pe, value, count, incr) ((adev)->vm_manager.vm_pte_funcs->write_pte((ib), (pe), (value), (count), (incr)))
-#define amdgpu_vm_set_pte_pde(adev, ib, pe, addr, count, incr, flags) ((adev)->vm_manager.vm_pte_funcs->set_pte_pde((ib), (pe), (addr), (count), (incr), (flags)))
-#define amdgpu_ring_parse_cs(r, p, ib) ((r)->funcs->parse_cs((p), (ib)))
-#define amdgpu_ring_test_ring(r) (r)->funcs->test_ring((r))
-#define amdgpu_ring_test_ib(r, t) (r)->funcs->test_ib((r), (t))
-#define amdgpu_ring_get_rptr(r) (r)->funcs->get_rptr((r))
-#define amdgpu_ring_get_wptr(r) (r)->funcs->get_wptr((r))
-#define amdgpu_ring_set_wptr(r) (r)->funcs->set_wptr((r))
-#define amdgpu_ring_emit_ib(r, ib, vmid, c) (r)->funcs->emit_ib((r), (ib), (vmid), (c))
-#define amdgpu_ring_emit_pipeline_sync(r) (r)->funcs->emit_pipeline_sync((r))
-#define amdgpu_ring_emit_vm_flush(r, vmid, addr) (r)->funcs->emit_vm_flush((r), (vmid), (addr))
-#define amdgpu_ring_emit_fence(r, addr, seq, flags) (r)->funcs->emit_fence((r), (addr), (seq), (flags))
-#define amdgpu_ring_emit_gds_switch(r, v, db, ds, wb, ws, ab, as) (r)->funcs->emit_gds_switch((r), (v), (db), (ds), (wb), (ws), (ab), (as))
-#define amdgpu_ring_emit_hdp_flush(r) (r)->funcs->emit_hdp_flush((r))
-#define amdgpu_ring_emit_switch_buffer(r) (r)->funcs->emit_switch_buffer((r))
-#define amdgpu_ring_emit_cntxcntl(r, d) (r)->funcs->emit_cntxcntl((r), (d))
-#define amdgpu_ring_emit_rreg(r, d) (r)->funcs->emit_rreg((r), (d))
-#define amdgpu_ring_emit_wreg(r, d, v) (r)->funcs->emit_wreg((r), (d), (v))
-#define amdgpu_ring_emit_reg_wait(r, d, v, m) (r)->funcs->emit_reg_wait((r), (d), (v), (m))
-#define amdgpu_ring_emit_reg_write_reg_wait(r, d0, d1, v, m) (r)->funcs->emit_reg_write_reg_wait((r), (d0), (d1), (v), (m))
-#define amdgpu_ring_emit_tmz(r, b) (r)->funcs->emit_tmz((r), (b))
-#define amdgpu_ring_pad_ib(r, ib) ((r)->funcs->pad_ib((r), (ib)))
-#define amdgpu_ring_init_cond_exec(r) (r)->funcs->init_cond_exec((r))
-#define amdgpu_ring_patch_cond_exec(r,o) (r)->funcs->patch_cond_exec((r),(o))
-#define amdgpu_ih_get_wptr(adev) (adev)->irq.ih_funcs->get_wptr((adev))
-#define amdgpu_ih_prescreen_iv(adev) (adev)->irq.ih_funcs->prescreen_iv((adev))
-#define amdgpu_ih_decode_iv(adev, iv) (adev)->irq.ih_funcs->decode_iv((adev), (iv))
-#define amdgpu_ih_set_rptr(adev) (adev)->irq.ih_funcs->set_rptr((adev))
-#define amdgpu_display_vblank_get_counter(adev, crtc) (adev)->mode_info.funcs->vblank_get_counter((adev), (crtc))
-#define amdgpu_display_backlight_set_level(adev, e, l) (adev)->mode_info.funcs->backlight_set_level((e), (l))
-#define amdgpu_display_backlight_get_level(adev, e) (adev)->mode_info.funcs->backlight_get_level((e))
-#define amdgpu_display_hpd_sense(adev, h) (adev)->mode_info.funcs->hpd_sense((adev), (h))
-#define amdgpu_display_hpd_set_polarity(adev, h) (adev)->mode_info.funcs->hpd_set_polarity((adev), (h))
-#define amdgpu_display_hpd_get_gpio_reg(adev) (adev)->mode_info.funcs->hpd_get_gpio_reg((adev))
-#define amdgpu_display_bandwidth_update(adev) (adev)->mode_info.funcs->bandwidth_update((adev))
-#define amdgpu_display_page_flip(adev, crtc, base, async) (adev)->mode_info.funcs->page_flip((adev), (crtc), (base), (async))
-#define amdgpu_display_page_flip_get_scanoutpos(adev, crtc, vbl, pos) (adev)->mode_info.funcs->page_flip_get_scanoutpos((adev), (crtc), (vbl), (pos))
-#define amdgpu_display_add_encoder(adev, e, s, c) (adev)->mode_info.funcs->add_encoder((adev), (e), (s), (c))
-#define amdgpu_display_add_connector(adev, ci, sd, ct, ib, coi, h, r) (adev)->mode_info.funcs->add_connector((adev), (ci), (sd), (ct), (ib), (coi), (h), (r))
-#define amdgpu_emit_copy_buffer(adev, ib, s, d, b) (adev)->mman.buffer_funcs->emit_copy_buffer((ib), (s), (d), (b))
-#define amdgpu_emit_fill_buffer(adev, ib, s, d, b) (adev)->mman.buffer_funcs->emit_fill_buffer((ib), (s), (d), (b))
-#define amdgpu_gfx_get_gpu_clock_counter(adev) (adev)->gfx.funcs->get_gpu_clock_counter((adev))
-#define amdgpu_gfx_select_se_sh(adev, se, sh, instance) (adev)->gfx.funcs->select_se_sh((adev), (se), (sh), (instance))
-#define amdgpu_gds_switch(adev, r, v, d, w, a) (adev)->gds.funcs->patch_gds_switch((r), (v), (d), (w), (a))
-#define amdgpu_psp_check_fw_loading_status(adev, i) (adev)->firmware.funcs->check_fw_loading_status((adev), (i))
-#define amdgpu_gfx_select_me_pipe_q(adev, me, pipe, q) (adev)->gfx.funcs->select_me_pipe_q((adev), (me), (pipe), (q))
/* Common functions */
+bool amdgpu_device_should_recover_gpu(struct amdgpu_device *adev);
int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
- struct amdgpu_job* job, bool force);
+ struct amdgpu_job* job);
void amdgpu_device_pci_config_reset(struct amdgpu_device *adev);
bool amdgpu_device_need_post(struct amdgpu_device *adev);
-void amdgpu_display_update_priority(struct amdgpu_device *adev);
void amdgpu_cs_report_moved_bytes(struct amdgpu_device *adev, u64 num_bytes,
u64 num_vis_bytes);
-void amdgpu_ttm_placement_from_domain(struct amdgpu_bo *abo, u32 domain);
-bool amdgpu_ttm_bo_is_amdgpu_bo(struct ttm_buffer_object *bo);
-void amdgpu_device_vram_location(struct amdgpu_device *adev,
- struct amdgpu_gmc *mc, u64 base);
-void amdgpu_device_gart_location(struct amdgpu_device *adev,
- struct amdgpu_gmc *mc);
int amdgpu_device_resize_fb_bar(struct amdgpu_device *adev);
void amdgpu_device_program_register_sequence(struct amdgpu_device *adev,
const u32 *registers,
@@ -1885,6 +1218,12 @@ void amdgpu_disable_vblank_kms(struct drm_device *dev, unsigned int pipe);
long amdgpu_kms_compat_ioctl(struct file *filp, unsigned int cmd,
unsigned long arg);
+
+/*
+ * functions used by amdgpu_xgmi.c
+ */
+int amdgpu_xgmi_add_device(struct amdgpu_device *adev);
+
/*
* functions used by amdgpu_encoder.c
*/
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_acp.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_acp.c
index f4c474a95875..297a5490ad8c 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_acp.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_acp.c
@@ -57,6 +57,10 @@
#define ACP_I2S_COMP2_CAP_REG_OFFSET 0xa8
#define ACP_I2S_COMP1_PLAY_REG_OFFSET 0x6c
#define ACP_I2S_COMP2_PLAY_REG_OFFSET 0x68
+#define ACP_BT_PLAY_REGS_START 0x14970
+#define ACP_BT_PLAY_REGS_END 0x14a24
+#define ACP_BT_COMP1_REG_OFFSET 0xac
+#define ACP_BT_COMP2_REG_OFFSET 0xa8
#define mmACP_PGFSM_RETAIN_REG 0x51c9
#define mmACP_PGFSM_CONFIG_REG 0x51ca
@@ -77,7 +81,7 @@
#define ACP_SOFT_RESET_DONE_TIME_OUT_VALUE 0x000000FF
#define ACP_TIMEOUT_LOOP 0x000000FF
-#define ACP_DEVS 3
+#define ACP_DEVS 4
#define ACP_SRC_ID 162
enum {
@@ -112,136 +116,47 @@ static int acp_sw_fini(void *handle)
return 0;
}
-/* power off a tile/block within ACP */
-static int acp_suspend_tile(void *cgs_dev, int tile)
-{
- u32 val = 0;
- u32 count = 0;
-
- if ((tile < ACP_TILE_P1) || (tile > ACP_TILE_DSP2)) {
- pr_err("Invalid ACP tile : %d to suspend\n", tile);
- return -1;
- }
-
- val = cgs_read_register(cgs_dev, mmACP_PGFSM_READ_REG_0 + tile);
- val &= ACP_TILE_ON_MASK;
-
- if (val == 0x0) {
- val = cgs_read_register(cgs_dev, mmACP_PGFSM_RETAIN_REG);
- val = val | (1 << tile);
- cgs_write_register(cgs_dev, mmACP_PGFSM_RETAIN_REG, val);
- cgs_write_register(cgs_dev, mmACP_PGFSM_CONFIG_REG,
- 0x500 + tile);
-
- count = ACP_TIMEOUT_LOOP;
- while (true) {
- val = cgs_read_register(cgs_dev, mmACP_PGFSM_READ_REG_0
- + tile);
- val = val & ACP_TILE_ON_MASK;
- if (val == ACP_TILE_OFF_MASK)
- break;
- if (--count == 0) {
- pr_err("Timeout reading ACP PGFSM status\n");
- return -ETIMEDOUT;
- }
- udelay(100);
- }
-
- val = cgs_read_register(cgs_dev, mmACP_PGFSM_RETAIN_REG);
-
- val |= ACP_TILE_OFF_RETAIN_REG_MASK;
- cgs_write_register(cgs_dev, mmACP_PGFSM_RETAIN_REG, val);
- }
- return 0;
-}
-
-/* power on a tile/block within ACP */
-static int acp_resume_tile(void *cgs_dev, int tile)
-{
- u32 val = 0;
- u32 count = 0;
-
- if ((tile < ACP_TILE_P1) || (tile > ACP_TILE_DSP2)) {
- pr_err("Invalid ACP tile to resume\n");
- return -1;
- }
-
- val = cgs_read_register(cgs_dev, mmACP_PGFSM_READ_REG_0 + tile);
- val = val & ACP_TILE_ON_MASK;
-
- if (val != 0x0) {
- cgs_write_register(cgs_dev, mmACP_PGFSM_CONFIG_REG,
- 0x600 + tile);
- count = ACP_TIMEOUT_LOOP;
- while (true) {
- val = cgs_read_register(cgs_dev, mmACP_PGFSM_READ_REG_0
- + tile);
- val = val & ACP_TILE_ON_MASK;
- if (val == 0x0)
- break;
- if (--count == 0) {
- pr_err("Timeout reading ACP PGFSM status\n");
- return -ETIMEDOUT;
- }
- udelay(100);
- }
- val = cgs_read_register(cgs_dev, mmACP_PGFSM_RETAIN_REG);
- if (tile == ACP_TILE_P1)
- val = val & (ACP_TILE_P1_MASK);
- else if (tile == ACP_TILE_P2)
- val = val & (ACP_TILE_P2_MASK);
-
- cgs_write_register(cgs_dev, mmACP_PGFSM_RETAIN_REG, val);
- }
- return 0;
-}
-
struct acp_pm_domain {
- void *cgs_dev;
+ void *adev;
struct generic_pm_domain gpd;
};
static int acp_poweroff(struct generic_pm_domain *genpd)
{
- int i, ret;
struct acp_pm_domain *apd;
+ struct amdgpu_device *adev;
apd = container_of(genpd, struct acp_pm_domain, gpd);
if (apd != NULL) {
- /* Donot return abruptly if any of power tile fails to suspend.
- * Log it and continue powering off other tile
- */
- for (i = 4; i >= 0 ; i--) {
- ret = acp_suspend_tile(apd->cgs_dev, ACP_TILE_P1 + i);
- if (ret)
- pr_err("ACP tile %d tile suspend failed\n", i);
- }
+ adev = apd->adev;
+ /* call smu to POWER GATE ACP block
+ * smu will
+ * 1. turn off the acp clock
+ * 2. power off the acp tiles
+ * 3. check and enter ulv state
+ */
+ if (adev->powerplay.pp_funcs->set_powergating_by_smu)
+ amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_ACP, true);
}
return 0;
}
static int acp_poweron(struct generic_pm_domain *genpd)
{
- int i, ret;
struct acp_pm_domain *apd;
+ struct amdgpu_device *adev;
apd = container_of(genpd, struct acp_pm_domain, gpd);
if (apd != NULL) {
- for (i = 0; i < 2; i++) {
- ret = acp_resume_tile(apd->cgs_dev, ACP_TILE_P1 + i);
- if (ret) {
- pr_err("ACP tile %d resume failed\n", i);
- break;
- }
- }
-
- /* Disable DSPs which are not going to be used */
- for (i = 0; i < 3; i++) {
- ret = acp_suspend_tile(apd->cgs_dev, ACP_TILE_DSP0 + i);
- /* Continue suspending other DSP, even if one fails */
- if (ret)
- pr_err("ACP DSP %d suspend failed\n", i);
- }
+ adev = apd->adev;
+ /* call smu to UNGATE ACP block
+ * smu will
+ * 1. exit ulv
+ * 2. turn on acp clock
+ * 3. power on acp tiles
+ */
+ if (adev->powerplay.pp_funcs->set_powergating_by_smu)
+ amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_ACP, false);
}
return 0;
}
@@ -285,30 +200,31 @@ static int acp_hw_init(void *handle)
r = amd_acp_hw_init(adev->acp.cgs_device,
ip_block->version->major, ip_block->version->minor);
/* -ENODEV means board uses AZ rather than ACP */
- if (r == -ENODEV)
+ if (r == -ENODEV) {
+ amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_ACP, true);
return 0;
- else if (r)
+ } else if (r) {
return r;
+ }
if (adev->rmmio_size == 0 || adev->rmmio_size < 0x5289)
return -EINVAL;
acp_base = adev->rmmio_base;
- if (adev->asic_type != CHIP_STONEY) {
- adev->acp.acp_genpd = kzalloc(sizeof(struct acp_pm_domain), GFP_KERNEL);
- if (adev->acp.acp_genpd == NULL)
- return -ENOMEM;
- adev->acp.acp_genpd->gpd.name = "ACP_AUDIO";
- adev->acp.acp_genpd->gpd.power_off = acp_poweroff;
- adev->acp.acp_genpd->gpd.power_on = acp_poweron;
+ adev->acp.acp_genpd = kzalloc(sizeof(struct acp_pm_domain), GFP_KERNEL);
+ if (adev->acp.acp_genpd == NULL)
+ return -ENOMEM;
+ adev->acp.acp_genpd->gpd.name = "ACP_AUDIO";
+ adev->acp.acp_genpd->gpd.power_off = acp_poweroff;
+ adev->acp.acp_genpd->gpd.power_on = acp_poweron;
- adev->acp.acp_genpd->cgs_dev = adev->acp.cgs_device;
- pm_genpd_init(&adev->acp.acp_genpd->gpd, NULL, false);
- }
+ adev->acp.acp_genpd->adev = adev;
+
+ pm_genpd_init(&adev->acp.acp_genpd->gpd, NULL, false);
adev->acp.acp_cell = kcalloc(ACP_DEVS, sizeof(struct mfd_cell),
GFP_KERNEL);
@@ -316,14 +232,13 @@ static int acp_hw_init(void *handle)
if (adev->acp.acp_cell == NULL)
return -ENOMEM;
- adev->acp.acp_res = kcalloc(4, sizeof(struct resource), GFP_KERNEL);
-
+ adev->acp.acp_res = kcalloc(5, sizeof(struct resource), GFP_KERNEL);
if (adev->acp.acp_res == NULL) {
kfree(adev->acp.acp_cell);
return -ENOMEM;
}
- i2s_pdata = kcalloc(2, sizeof(struct i2s_platform_data), GFP_KERNEL);
+ i2s_pdata = kcalloc(3, sizeof(struct i2s_platform_data), GFP_KERNEL);
if (i2s_pdata == NULL) {
kfree(adev->acp.acp_res);
kfree(adev->acp.acp_cell);
@@ -358,6 +273,20 @@ static int acp_hw_init(void *handle)
i2s_pdata[1].i2s_reg_comp1 = ACP_I2S_COMP1_CAP_REG_OFFSET;
i2s_pdata[1].i2s_reg_comp2 = ACP_I2S_COMP2_CAP_REG_OFFSET;
+ i2s_pdata[2].quirks = DW_I2S_QUIRK_COMP_REG_OFFSET;
+ switch (adev->asic_type) {
+ case CHIP_STONEY:
+ i2s_pdata[2].quirks |= DW_I2S_QUIRK_16BIT_IDX_OVERRIDE;
+ break;
+ default:
+ break;
+ }
+
+ i2s_pdata[2].cap = DWC_I2S_PLAY | DWC_I2S_RECORD;
+ i2s_pdata[2].snd_rates = SNDRV_PCM_RATE_8000_96000;
+ i2s_pdata[2].i2s_reg_comp1 = ACP_BT_COMP1_REG_OFFSET;
+ i2s_pdata[2].i2s_reg_comp2 = ACP_BT_COMP2_REG_OFFSET;
+
adev->acp.acp_res[0].name = "acp2x_dma";
adev->acp.acp_res[0].flags = IORESOURCE_MEM;
adev->acp.acp_res[0].start = acp_base;
@@ -373,13 +302,18 @@ static int acp_hw_init(void *handle)
adev->acp.acp_res[2].start = acp_base + ACP_I2S_CAP_REGS_START;
adev->acp.acp_res[2].end = acp_base + ACP_I2S_CAP_REGS_END;
- adev->acp.acp_res[3].name = "acp2x_dma_irq";
- adev->acp.acp_res[3].flags = IORESOURCE_IRQ;
- adev->acp.acp_res[3].start = amdgpu_irq_create_mapping(adev, 162);
- adev->acp.acp_res[3].end = adev->acp.acp_res[3].start;
+ adev->acp.acp_res[3].name = "acp2x_dw_bt_i2s_play_cap";
+ adev->acp.acp_res[3].flags = IORESOURCE_MEM;
+ adev->acp.acp_res[3].start = acp_base + ACP_BT_PLAY_REGS_START;
+ adev->acp.acp_res[3].end = acp_base + ACP_BT_PLAY_REGS_END;
+
+ adev->acp.acp_res[4].name = "acp2x_dma_irq";
+ adev->acp.acp_res[4].flags = IORESOURCE_IRQ;
+ adev->acp.acp_res[4].start = amdgpu_irq_create_mapping(adev, 162);
+ adev->acp.acp_res[4].end = adev->acp.acp_res[4].start;
adev->acp.acp_cell[0].name = "acp_audio_dma";
- adev->acp.acp_cell[0].num_resources = 4;
+ adev->acp.acp_cell[0].num_resources = 5;
adev->acp.acp_cell[0].resources = &adev->acp.acp_res[0];
adev->acp.acp_cell[0].platform_data = &adev->asic_type;
adev->acp.acp_cell[0].pdata_size = sizeof(adev->asic_type);
@@ -396,22 +330,27 @@ static int acp_hw_init(void *handle)
adev->acp.acp_cell[2].platform_data = &i2s_pdata[1];
adev->acp.acp_cell[2].pdata_size = sizeof(struct i2s_platform_data);
+ adev->acp.acp_cell[3].name = "designware-i2s";
+ adev->acp.acp_cell[3].num_resources = 1;
+ adev->acp.acp_cell[3].resources = &adev->acp.acp_res[3];
+ adev->acp.acp_cell[3].platform_data = &i2s_pdata[2];
+ adev->acp.acp_cell[3].pdata_size = sizeof(struct i2s_platform_data);
+
r = mfd_add_hotplug_devices(adev->acp.parent, adev->acp.acp_cell,
ACP_DEVS);
if (r)
return r;
- if (adev->asic_type != CHIP_STONEY) {
- for (i = 0; i < ACP_DEVS ; i++) {
- dev = get_mfd_cell_dev(adev->acp.acp_cell[i].name, i);
- r = pm_genpd_add_device(&adev->acp.acp_genpd->gpd, dev);
- if (r) {
- dev_err(dev, "Failed to add dev to genpd\n");
- return r;
- }
+ for (i = 0; i < ACP_DEVS ; i++) {
+ dev = get_mfd_cell_dev(adev->acp.acp_cell[i].name, i);
+ r = pm_genpd_add_device(&adev->acp.acp_genpd->gpd, dev);
+ if (r) {
+ dev_err(dev, "Failed to add dev to genpd\n");
+ return r;
}
}
+
/* Assert Soft reset of ACP */
val = cgs_read_register(adev->acp.cgs_device, mmACP_SOFT_RESET);
@@ -451,7 +390,6 @@ static int acp_hw_init(void *handle)
val = cgs_read_register(adev->acp.cgs_device, mmACP_SOFT_RESET);
val &= ~ACP_SOFT_RESET__SoftResetAud_MASK;
cgs_write_register(adev->acp.cgs_device, mmACP_SOFT_RESET, val);
-
return 0;
}
@@ -470,8 +408,10 @@ static int acp_hw_fini(void *handle)
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
/* return early if no ACP */
- if (!adev->acp.acp_cell)
+ if (!adev->acp.acp_genpd) {
+ amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_ACP, false);
return 0;
+ }
/* Assert Soft reset of ACP */
val = cgs_read_register(adev->acp.cgs_device, mmACP_SOFT_RESET);
@@ -509,19 +449,17 @@ static int acp_hw_fini(void *handle)
udelay(100);
}
- if (adev->acp.acp_genpd) {
- for (i = 0; i < ACP_DEVS ; i++) {
- dev = get_mfd_cell_dev(adev->acp.acp_cell[i].name, i);
- ret = pm_genpd_remove_device(dev);
- /* If removal fails, dont giveup and try rest */
- if (ret)
- dev_err(dev, "remove dev from genpd failed\n");
- }
- kfree(adev->acp.acp_genpd);
+ for (i = 0; i < ACP_DEVS ; i++) {
+ dev = get_mfd_cell_dev(adev->acp.acp_cell[i].name, i);
+ ret = pm_genpd_remove_device(dev);
+ /* If removal fails, dont giveup and try rest */
+ if (ret)
+ dev_err(dev, "remove dev from genpd failed\n");
}
mfd_remove_devices(adev->acp.parent);
kfree(adev->acp.acp_res);
+ kfree(adev->acp.acp_genpd);
kfree(adev->acp.acp_cell);
return 0;
@@ -529,11 +467,21 @@ static int acp_hw_fini(void *handle)
static int acp_suspend(void *handle)
{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ /* power up on suspend */
+ if (!adev->acp.acp_cell)
+ amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_ACP, false);
return 0;
}
static int acp_resume(void *handle)
{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ /* power down again on resume */
+ if (!adev->acp.acp_cell)
+ amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_ACP, true);
return 0;
}
@@ -566,6 +514,12 @@ static int acp_set_clockgating_state(void *handle,
static int acp_set_powergating_state(void *handle,
enum amd_powergating_state state)
{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ bool enable = state == AMD_PG_STATE_GATE ? true : false;
+
+ if (adev->powerplay.pp_funcs->set_powergating_by_smu)
+ amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_ACP, enable);
+
return 0;
}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c
index 0d8c3fc6eace..7f0afc526419 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c
@@ -31,6 +31,7 @@
#include <drm/drm_crtc_helper.h>
#include "amdgpu.h"
#include "amdgpu_pm.h"
+#include "amdgpu_display.h"
#include "amd_acpi.h"
#include "atom.h"
@@ -358,13 +359,14 @@ out:
*
* Checks the acpi event and if it matches an atif event,
* handles it.
- * Returns NOTIFY code
+ *
+ * Returns:
+ * NOTIFY_BAD or NOTIFY_DONE, depending on the event.
*/
static int amdgpu_atif_handler(struct amdgpu_device *adev,
struct acpi_bus_event *event)
{
struct amdgpu_atif *atif = adev->atif;
- struct atif_sbios_requests req;
int count;
DRM_DEBUG_DRIVER("event, device_class = %s, type = %#x\n",
@@ -373,48 +375,59 @@ static int amdgpu_atif_handler(struct amdgpu_device *adev,
if (strcmp(event->device_class, ACPI_VIDEO_CLASS) != 0)
return NOTIFY_DONE;
+ /* Is this actually our event? */
if (!atif ||
!atif->notification_cfg.enabled ||
- event->type != atif->notification_cfg.command_code)
- /* Not our event */
- return NOTIFY_DONE;
+ event->type != atif->notification_cfg.command_code) {
+ /* These events will generate keypresses otherwise */
+ if (event->type == ACPI_VIDEO_NOTIFY_PROBE)
+ return NOTIFY_BAD;
+ else
+ return NOTIFY_DONE;
+ }
- /* Check pending SBIOS requests */
- count = amdgpu_atif_get_sbios_requests(atif, &req);
+ if (atif->functions.sbios_requests) {
+ struct atif_sbios_requests req;
- if (count <= 0)
- return NOTIFY_DONE;
+ /* Check pending SBIOS requests */
+ count = amdgpu_atif_get_sbios_requests(atif, &req);
- DRM_DEBUG_DRIVER("ATIF: %d pending SBIOS requests\n", count);
+ if (count <= 0)
+ return NOTIFY_BAD;
- if (req.pending & ATIF_PANEL_BRIGHTNESS_CHANGE_REQUEST) {
- struct amdgpu_encoder *enc = atif->encoder_for_bl;
+ DRM_DEBUG_DRIVER("ATIF: %d pending SBIOS requests\n", count);
- if (enc) {
- struct amdgpu_encoder_atom_dig *dig = enc->enc_priv;
+ /* todo: add DC handling */
+ if ((req.pending & ATIF_PANEL_BRIGHTNESS_CHANGE_REQUEST) &&
+ !amdgpu_device_has_dc_support(adev)) {
+ struct amdgpu_encoder *enc = atif->encoder_for_bl;
- DRM_DEBUG_DRIVER("Changing brightness to %d\n",
- req.backlight_level);
+ if (enc) {
+ struct amdgpu_encoder_atom_dig *dig = enc->enc_priv;
+
+ DRM_DEBUG_DRIVER("Changing brightness to %d\n",
+ req.backlight_level);
- amdgpu_display_backlight_set_level(adev, enc, req.backlight_level);
+ amdgpu_display_backlight_set_level(adev, enc, req.backlight_level);
#if defined(CONFIG_BACKLIGHT_CLASS_DEVICE) || defined(CONFIG_BACKLIGHT_CLASS_DEVICE_MODULE)
- backlight_force_update(dig->bl_dev,
- BACKLIGHT_UPDATE_HOTKEY);
+ backlight_force_update(dig->bl_dev,
+ BACKLIGHT_UPDATE_HOTKEY);
#endif
+ }
}
- }
- if (req.pending & ATIF_DGPU_DISPLAY_EVENT) {
- if ((adev->flags & AMD_IS_PX) &&
- amdgpu_atpx_dgpu_req_power_for_displays()) {
- pm_runtime_get_sync(adev->ddev->dev);
- /* Just fire off a uevent and let userspace tell us what to do */
- drm_helper_hpd_irq_event(adev->ddev);
- pm_runtime_mark_last_busy(adev->ddev->dev);
- pm_runtime_put_autosuspend(adev->ddev->dev);
+ if (req.pending & ATIF_DGPU_DISPLAY_EVENT) {
+ if ((adev->flags & AMD_IS_PX) &&
+ amdgpu_atpx_dgpu_req_power_for_displays()) {
+ pm_runtime_get_sync(adev->ddev->dev);
+ /* Just fire off a uevent and let userspace tell us what to do */
+ drm_helper_hpd_irq_event(adev->ddev);
+ pm_runtime_mark_last_busy(adev->ddev->dev);
+ pm_runtime_put_autosuspend(adev->ddev->dev);
+ }
}
+ /* TODO: check other events */
}
- /* TODO: check other events */
/* We've handled the event, stop the notifier chain. The ACPI interface
* overloads ACPI_VIDEO_NOTIFY_PROBE, we don't want to send that to
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
index 305143fcc1ce..c31a8849e9f8 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
@@ -28,7 +28,6 @@
#include <linux/module.h>
const struct kgd2kfd_calls *kgd2kfd;
-bool (*kgd2kfd_init_p)(unsigned int, const struct kgd2kfd_calls**);
static const unsigned int compute_vmid_bitmap = 0xFF00;
@@ -36,45 +35,23 @@ int amdgpu_amdkfd_init(void)
{
int ret;
-#if defined(CONFIG_HSA_AMD_MODULE)
- int (*kgd2kfd_init_p)(unsigned int, const struct kgd2kfd_calls**);
-
- kgd2kfd_init_p = symbol_request(kgd2kfd_init);
-
- if (kgd2kfd_init_p == NULL)
- return -ENOENT;
-
- ret = kgd2kfd_init_p(KFD_INTERFACE_VERSION, &kgd2kfd);
- if (ret) {
- symbol_put(kgd2kfd_init);
- kgd2kfd = NULL;
- }
-
-
-#elif defined(CONFIG_HSA_AMD)
-
+#ifdef CONFIG_HSA_AMD
ret = kgd2kfd_init(KFD_INTERFACE_VERSION, &kgd2kfd);
if (ret)
kgd2kfd = NULL;
-
+ amdgpu_amdkfd_gpuvm_init_mem_limits();
#else
kgd2kfd = NULL;
ret = -ENOENT;
#endif
-#if defined(CONFIG_HSA_AMD_MODULE) || defined(CONFIG_HSA_AMD)
- amdgpu_amdkfd_gpuvm_init_mem_limits();
-#endif
-
return ret;
}
void amdgpu_amdkfd_fini(void)
{
- if (kgd2kfd) {
+ if (kgd2kfd)
kgd2kfd->exit();
- symbol_put(kgd2kfd_init);
- }
}
void amdgpu_amdkfd_device_probe(struct amdgpu_device *adev)
@@ -99,6 +76,7 @@ void amdgpu_amdkfd_device_probe(struct amdgpu_device *adev)
kfd2kgd = amdgpu_amdkfd_gfx_8_0_get_functions();
break;
case CHIP_VEGA10:
+ case CHIP_VEGA20:
case CHIP_RAVEN:
kfd2kgd = amdgpu_amdkfd_gfx_9_0_get_functions();
break;
@@ -146,7 +124,7 @@ static void amdgpu_doorbell_get_kfd_info(struct amdgpu_device *adev,
void amdgpu_amdkfd_device_init(struct amdgpu_device *adev)
{
- int i;
+ int i, n;
int last_valid_bit;
if (adev->kfd) {
struct kgd2kfd_shared_resources gpu_resources = {
@@ -155,7 +133,7 @@ void amdgpu_amdkfd_device_init(struct amdgpu_device *adev)
.num_queue_per_pipe = adev->gfx.mec.num_queue_per_pipe,
.gpuvm_size = min(adev->vm_manager.max_pfn
<< AMDGPU_GPU_PAGE_SHIFT,
- AMDGPU_VA_HOLE_START),
+ AMDGPU_GMC_HOLE_START),
.drm_render_minor = adev->ddev->render->index
};
@@ -185,7 +163,15 @@ void amdgpu_amdkfd_device_init(struct amdgpu_device *adev)
&gpu_resources.doorbell_physical_address,
&gpu_resources.doorbell_aperture_size,
&gpu_resources.doorbell_start_offset);
- if (adev->asic_type >= CHIP_VEGA10) {
+
+ if (adev->asic_type < CHIP_VEGA10) {
+ kgd2kfd->device_init(adev->kfd, &gpu_resources);
+ return;
+ }
+
+ n = (adev->asic_type < CHIP_VEGA20) ? 2 : 8;
+
+ for (i = 0; i < n; i += 2) {
/* On SOC15 the BIF is involved in routing
* doorbells using the low 12 bits of the
* address. Communicate the assignments to
@@ -193,20 +179,31 @@ void amdgpu_amdkfd_device_init(struct amdgpu_device *adev)
* process in case of 64-bit doorbells so we
* can use each doorbell assignment twice.
*/
- gpu_resources.sdma_doorbell[0][0] =
- AMDGPU_DOORBELL64_sDMA_ENGINE0;
- gpu_resources.sdma_doorbell[0][1] =
- AMDGPU_DOORBELL64_sDMA_ENGINE0 + 0x200;
- gpu_resources.sdma_doorbell[1][0] =
- AMDGPU_DOORBELL64_sDMA_ENGINE1;
- gpu_resources.sdma_doorbell[1][1] =
- AMDGPU_DOORBELL64_sDMA_ENGINE1 + 0x200;
- /* Doorbells 0x0f0-0ff and 0x2f0-2ff are reserved for
- * SDMA, IH and VCN. So don't use them for the CP.
- */
- gpu_resources.reserved_doorbell_mask = 0x1f0;
- gpu_resources.reserved_doorbell_val = 0x0f0;
+ if (adev->asic_type == CHIP_VEGA10) {
+ gpu_resources.sdma_doorbell[0][i] =
+ AMDGPU_VEGA10_DOORBELL64_sDMA_ENGINE0 + (i >> 1);
+ gpu_resources.sdma_doorbell[0][i+1] =
+ AMDGPU_VEGA10_DOORBELL64_sDMA_ENGINE0 + 0x200 + (i >> 1);
+ gpu_resources.sdma_doorbell[1][i] =
+ AMDGPU_VEGA10_DOORBELL64_sDMA_ENGINE1 + (i >> 1);
+ gpu_resources.sdma_doorbell[1][i+1] =
+ AMDGPU_VEGA10_DOORBELL64_sDMA_ENGINE1 + 0x200 + (i >> 1);
+ } else {
+ gpu_resources.sdma_doorbell[0][i] =
+ AMDGPU_DOORBELL64_sDMA_ENGINE0 + (i >> 1);
+ gpu_resources.sdma_doorbell[0][i+1] =
+ AMDGPU_DOORBELL64_sDMA_ENGINE0 + 0x200 + (i >> 1);
+ gpu_resources.sdma_doorbell[1][i] =
+ AMDGPU_DOORBELL64_sDMA_ENGINE1 + (i >> 1);
+ gpu_resources.sdma_doorbell[1][i+1] =
+ AMDGPU_DOORBELL64_sDMA_ENGINE1 + 0x200 + (i >> 1);
+ }
}
+ /* Doorbells 0x0e0-0ff and 0x2e0-2ff are reserved for
+ * SDMA, IH and VCN. So don't use them for the CP.
+ */
+ gpu_resources.reserved_doorbell_mask = 0x1e0;
+ gpu_resources.reserved_doorbell_val = 0x0e0;
kgd2kfd->device_init(adev->kfd, &gpu_resources);
}
@@ -243,15 +240,42 @@ int amdgpu_amdkfd_resume(struct amdgpu_device *adev)
return r;
}
+int amdgpu_amdkfd_pre_reset(struct amdgpu_device *adev)
+{
+ int r = 0;
+
+ if (adev->kfd)
+ r = kgd2kfd->pre_reset(adev->kfd);
+
+ return r;
+}
+
+int amdgpu_amdkfd_post_reset(struct amdgpu_device *adev)
+{
+ int r = 0;
+
+ if (adev->kfd)
+ r = kgd2kfd->post_reset(adev->kfd);
+
+ return r;
+}
+
+void amdgpu_amdkfd_gpu_reset(struct kgd_dev *kgd)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)kgd;
+
+ if (amdgpu_device_should_recover_gpu(adev))
+ amdgpu_device_gpu_recover(adev, NULL);
+}
+
int alloc_gtt_mem(struct kgd_dev *kgd, size_t size,
void **mem_obj, uint64_t *gpu_addr,
- void **cpu_ptr)
+ void **cpu_ptr, bool mqd_gfx9)
{
struct amdgpu_device *adev = (struct amdgpu_device *)kgd;
struct amdgpu_bo *bo = NULL;
struct amdgpu_bo_param bp;
int r;
- uint64_t gpu_addr_tmp = 0;
void *cpu_ptr_tmp = NULL;
memset(&bp, 0, sizeof(bp));
@@ -261,6 +285,10 @@ int alloc_gtt_mem(struct kgd_dev *kgd, size_t size,
bp.flags = AMDGPU_GEM_CREATE_CPU_GTT_USWC;
bp.type = ttm_bo_type_kernel;
bp.resv = NULL;
+
+ if (mqd_gfx9)
+ bp.flags |= AMDGPU_GEM_CREATE_MQD_GFX9;
+
r = amdgpu_bo_create(adev, &bp, &bo);
if (r) {
dev_err(adev->dev,
@@ -275,13 +303,18 @@ int alloc_gtt_mem(struct kgd_dev *kgd, size_t size,
goto allocate_mem_reserve_bo_failed;
}
- r = amdgpu_bo_pin(bo, AMDGPU_GEM_DOMAIN_GTT,
- &gpu_addr_tmp);
+ r = amdgpu_bo_pin(bo, AMDGPU_GEM_DOMAIN_GTT);
if (r) {
dev_err(adev->dev, "(%d) failed to pin bo for amdkfd\n", r);
goto allocate_mem_pin_bo_failed;
}
+ r = amdgpu_ttm_alloc_gart(&bo->tbo);
+ if (r) {
+ dev_err(adev->dev, "%p bind failed\n", bo);
+ goto allocate_mem_kmap_bo_failed;
+ }
+
r = amdgpu_bo_kmap(bo, &cpu_ptr_tmp);
if (r) {
dev_err(adev->dev,
@@ -290,7 +323,7 @@ int alloc_gtt_mem(struct kgd_dev *kgd, size_t size,
}
*mem_obj = bo;
- *gpu_addr = gpu_addr_tmp;
+ *gpu_addr = amdgpu_bo_gpu_offset(bo);
*cpu_ptr = cpu_ptr_tmp;
amdgpu_bo_unreserve(bo);
@@ -402,6 +435,13 @@ uint64_t amdgpu_amdkfd_get_vram_usage(struct kgd_dev *kgd)
return amdgpu_vram_mgr_usage(&adev->mman.bdev.man[TTM_PL_VRAM]);
}
+uint64_t amdgpu_amdkfd_get_hive_id(struct kgd_dev *kgd)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)kgd;
+
+ return adev->gmc.xgmi.hive_id;
+}
+
int amdgpu_amdkfd_submit_ib(struct kgd_dev *kgd, enum kgd_engine_type engine,
uint32_t vmid, uint64_t gpu_addr,
uint32_t *ib_cmd, uint32_t ib_len)
@@ -457,6 +497,14 @@ err:
return ret;
}
+void amdgpu_amdkfd_set_compute_idle(struct kgd_dev *kgd, bool idle)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)kgd;
+
+ amdgpu_dpm_switch_power_profile(adev,
+ PP_SMC_POWER_PROFILE_COMPUTE, !idle);
+}
+
bool amdgpu_amdkfd_is_kfd_vmid(struct amdgpu_device *adev, u32 vmid)
{
if (adev->kfd) {
@@ -467,7 +515,7 @@ bool amdgpu_amdkfd_is_kfd_vmid(struct amdgpu_device *adev, u32 vmid)
return false;
}
-#if !defined(CONFIG_HSA_AMD_MODULE) && !defined(CONFIG_HSA_AMD)
+#ifndef CONFIG_HSA_AMD
bool amdkfd_fence_check_mm(struct dma_fence *f, struct mm_struct *mm)
{
return false;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
index a8418a3f4e9d..8e0d4f7196b4 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
@@ -119,6 +119,7 @@ int amdgpu_amdkfd_evict_userptr(struct kgd_mem *mem, struct mm_struct *mm);
int amdgpu_amdkfd_submit_ib(struct kgd_dev *kgd, enum kgd_engine_type engine,
uint32_t vmid, uint64_t gpu_addr,
uint32_t *ib_cmd, uint32_t ib_len);
+void amdgpu_amdkfd_set_compute_idle(struct kgd_dev *kgd, bool idle);
struct kfd2kgd_calls *amdgpu_amdkfd_gfx_7_get_functions(void);
struct kfd2kgd_calls *amdgpu_amdkfd_gfx_8_0_get_functions(void);
@@ -126,10 +127,16 @@ struct kfd2kgd_calls *amdgpu_amdkfd_gfx_9_0_get_functions(void);
bool amdgpu_amdkfd_is_kfd_vmid(struct amdgpu_device *adev, u32 vmid);
+int amdgpu_amdkfd_pre_reset(struct amdgpu_device *adev);
+
+int amdgpu_amdkfd_post_reset(struct amdgpu_device *adev);
+
+void amdgpu_amdkfd_gpu_reset(struct kgd_dev *kgd);
+
/* Shared API */
int alloc_gtt_mem(struct kgd_dev *kgd, size_t size,
void **mem_obj, uint64_t *gpu_addr,
- void **cpu_ptr);
+ void **cpu_ptr, bool mqd_gfx9);
void free_gtt_mem(struct kgd_dev *kgd, void *mem_obj);
void get_local_mem_info(struct kgd_dev *kgd,
struct kfd_local_mem_info *mem_info);
@@ -138,6 +145,7 @@ uint64_t get_gpu_clock_counter(struct kgd_dev *kgd);
uint32_t get_max_engine_clock_in_mhz(struct kgd_dev *kgd);
void get_cu_info(struct kgd_dev *kgd, struct kfd_cu_info *cu_info);
uint64_t amdgpu_amdkfd_get_vram_usage(struct kgd_dev *kgd);
+uint64_t amdgpu_amdkfd_get_hive_id(struct kgd_dev *kgd);
#define read_user_wptr(mmptr, wptr, dst) \
({ \
@@ -155,17 +163,18 @@ uint64_t amdgpu_amdkfd_get_vram_usage(struct kgd_dev *kgd);
})
/* GPUVM API */
-int amdgpu_amdkfd_gpuvm_create_process_vm(struct kgd_dev *kgd, void **vm,
- void **process_info,
+int amdgpu_amdkfd_gpuvm_create_process_vm(struct kgd_dev *kgd, unsigned int pasid,
+ void **vm, void **process_info,
struct dma_fence **ef);
int amdgpu_amdkfd_gpuvm_acquire_process_vm(struct kgd_dev *kgd,
- struct file *filp,
+ struct file *filp, unsigned int pasid,
void **vm, void **process_info,
struct dma_fence **ef);
void amdgpu_amdkfd_gpuvm_destroy_cb(struct amdgpu_device *adev,
struct amdgpu_vm *vm);
void amdgpu_amdkfd_gpuvm_destroy_process_vm(struct kgd_dev *kgd, void *vm);
-uint32_t amdgpu_amdkfd_gpuvm_get_process_page_dir(void *vm);
+void amdgpu_amdkfd_gpuvm_release_process_vm(struct kgd_dev *kgd, void *vm);
+uint64_t amdgpu_amdkfd_gpuvm_get_process_page_dir(void *vm);
int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(
struct kgd_dev *kgd, uint64_t va, uint64_t size,
void *vm, struct kgd_mem **mem,
@@ -183,6 +192,9 @@ int amdgpu_amdkfd_gpuvm_map_gtt_bo_to_kernel(struct kgd_dev *kgd,
int amdgpu_amdkfd_gpuvm_restore_process_bos(void *process_info,
struct dma_fence **ef);
+int amdgpu_amdkfd_gpuvm_get_vm_fault_info(struct kgd_dev *kgd,
+ struct kfd_vm_fault_info *info);
+
void amdgpu_amdkfd_gpuvm_init_mem_limits(void);
void amdgpu_amdkfd_unreserve_system_memory_limit(struct amdgpu_bo *bo);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_fence.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_fence.c
index 2c14025e5e76..574c1181ae9a 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_fence.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_fence.c
@@ -173,7 +173,5 @@ static const struct dma_fence_ops amdkfd_fence_ops = {
.get_driver_name = amdkfd_fence_get_driver_name,
.get_timeline_name = amdkfd_fence_get_timeline_name,
.enable_signaling = amdkfd_fence_enable_signaling,
- .signaled = NULL,
- .wait = dma_fence_default_wait,
.release = amdkfd_fence_release,
};
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c
index ea79908dac4c..244d9834a381 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c
@@ -142,9 +142,10 @@ static uint16_t get_fw_version(struct kgd_dev *kgd, enum kgd_engine_type type);
static void set_scratch_backing_va(struct kgd_dev *kgd,
uint64_t va, uint32_t vmid);
static void set_vm_context_page_table_base(struct kgd_dev *kgd, uint32_t vmid,
- uint32_t page_table_base);
+ uint64_t page_table_base);
static int invalidate_tlbs(struct kgd_dev *kgd, uint16_t pasid);
static int invalidate_tlbs_vmid(struct kgd_dev *kgd, uint16_t vmid);
+static uint32_t read_vmid_from_vmfault_reg(struct kgd_dev *kgd);
/* Because of REG_GET_FIELD() being used, we put this function in the
* asic specific file.
@@ -204,6 +205,7 @@ static const struct kfd2kgd_calls kfd2kgd = {
.create_process_vm = amdgpu_amdkfd_gpuvm_create_process_vm,
.acquire_process_vm = amdgpu_amdkfd_gpuvm_acquire_process_vm,
.destroy_process_vm = amdgpu_amdkfd_gpuvm_destroy_process_vm,
+ .release_process_vm = amdgpu_amdkfd_gpuvm_release_process_vm,
.get_process_page_dir = amdgpu_amdkfd_gpuvm_get_process_page_dir,
.set_vm_context_page_table_base = set_vm_context_page_table_base,
.alloc_memory_of_gpu = amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu,
@@ -216,6 +218,10 @@ static const struct kfd2kgd_calls kfd2kgd = {
.invalidate_tlbs = invalidate_tlbs,
.invalidate_tlbs_vmid = invalidate_tlbs_vmid,
.submit_ib = amdgpu_amdkfd_submit_ib,
+ .get_vm_fault_info = amdgpu_amdkfd_gpuvm_get_vm_fault_info,
+ .read_vmid_from_vmfault_reg = read_vmid_from_vmfault_reg,
+ .gpu_recover = amdgpu_amdkfd_gpu_reset,
+ .set_compute_idle = amdgpu_amdkfd_set_compute_idle
};
struct kfd2kgd_calls *amdgpu_amdkfd_gfx_7_get_functions(void)
@@ -571,6 +577,9 @@ static int kgd_hqd_destroy(struct kgd_dev *kgd, void *mqd,
unsigned long flags, end_jiffies;
int retry;
+ if (adev->in_gpu_reset)
+ return -EIO;
+
acquire_queue(kgd, pipe_id, queue_id);
WREG32(mmCP_HQD_PQ_DOORBELL_CONTROL, 0);
@@ -677,7 +686,7 @@ static int kgd_hqd_sdma_destroy(struct kgd_dev *kgd, void *mqd,
while (true) {
temp = RREG32(sdma_base_addr + mmSDMA0_RLC0_CONTEXT_STATUS);
- if (temp & SDMA0_STATUS_REG__RB_CMD_IDLE__SHIFT)
+ if (temp & SDMA0_RLC0_CONTEXT_STATUS__IDLE_MASK)
break;
if (time_after(jiffies, end_jiffies))
return -ETIME;
@@ -865,7 +874,7 @@ static uint16_t get_fw_version(struct kgd_dev *kgd, enum kgd_engine_type type)
}
static void set_vm_context_page_table_base(struct kgd_dev *kgd, uint32_t vmid,
- uint32_t page_table_base)
+ uint64_t page_table_base)
{
struct amdgpu_device *adev = get_amdgpu_device(kgd);
@@ -873,7 +882,8 @@ static void set_vm_context_page_table_base(struct kgd_dev *kgd, uint32_t vmid,
pr_err("trying to set page table base for wrong VMID\n");
return;
}
- WREG32(mmVM_CONTEXT8_PAGE_TABLE_BASE_ADDR + vmid - 8, page_table_base);
+ WREG32(mmVM_CONTEXT8_PAGE_TABLE_BASE_ADDR + vmid - 8,
+ lower_32_bits(page_table_base));
}
static int invalidate_tlbs(struct kgd_dev *kgd, uint16_t pasid)
@@ -882,6 +892,9 @@ static int invalidate_tlbs(struct kgd_dev *kgd, uint16_t pasid)
int vmid;
unsigned int tmp;
+ if (adev->in_gpu_reset)
+ return -EIO;
+
for (vmid = 0; vmid < 16; vmid++) {
if (!amdgpu_amdkfd_is_kfd_vmid(adev, vmid))
continue;
@@ -911,3 +924,19 @@ static int invalidate_tlbs_vmid(struct kgd_dev *kgd, uint16_t vmid)
RREG32(mmVM_INVALIDATE_RESPONSE);
return 0;
}
+
+ /**
+ * read_vmid_from_vmfault_reg - read vmid from register
+ *
+ * adev: amdgpu_device pointer
+ * @vmid: vmid pointer
+ * read vmid from register (CIK).
+ */
+static uint32_t read_vmid_from_vmfault_reg(struct kgd_dev *kgd)
+{
+ struct amdgpu_device *adev = get_amdgpu_device(kgd);
+
+ uint32_t status = RREG32(mmVM_CONTEXT1_PROTECTION_FAULT_STATUS);
+
+ return REG_GET_FIELD(status, VM_CONTEXT1_PROTECTION_FAULT_STATUS, VMID);
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c
index 19dd665e7307..9f149914ad6c 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c
@@ -45,8 +45,6 @@ enum hqd_dequeue_request_type {
RESET_WAVES
};
-struct vi_sdma_mqd;
-
/*
* Register access functions
*/
@@ -100,7 +98,7 @@ static uint16_t get_fw_version(struct kgd_dev *kgd, enum kgd_engine_type type);
static void set_scratch_backing_va(struct kgd_dev *kgd,
uint64_t va, uint32_t vmid);
static void set_vm_context_page_table_base(struct kgd_dev *kgd, uint32_t vmid,
- uint32_t page_table_base);
+ uint64_t page_table_base);
static int invalidate_tlbs(struct kgd_dev *kgd, uint16_t pasid);
static int invalidate_tlbs_vmid(struct kgd_dev *kgd, uint16_t vmid);
@@ -164,6 +162,7 @@ static const struct kfd2kgd_calls kfd2kgd = {
.create_process_vm = amdgpu_amdkfd_gpuvm_create_process_vm,
.acquire_process_vm = amdgpu_amdkfd_gpuvm_acquire_process_vm,
.destroy_process_vm = amdgpu_amdkfd_gpuvm_destroy_process_vm,
+ .release_process_vm = amdgpu_amdkfd_gpuvm_release_process_vm,
.get_process_page_dir = amdgpu_amdkfd_gpuvm_get_process_page_dir,
.set_vm_context_page_table_base = set_vm_context_page_table_base,
.alloc_memory_of_gpu = amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu,
@@ -176,6 +175,9 @@ static const struct kfd2kgd_calls kfd2kgd = {
.invalidate_tlbs = invalidate_tlbs,
.invalidate_tlbs_vmid = invalidate_tlbs_vmid,
.submit_ib = amdgpu_amdkfd_submit_ib,
+ .get_vm_fault_info = amdgpu_amdkfd_gpuvm_get_vm_fault_info,
+ .gpu_recover = amdgpu_amdkfd_gpu_reset,
+ .set_compute_idle = amdgpu_amdkfd_set_compute_idle
};
struct kfd2kgd_calls *amdgpu_amdkfd_gfx_8_0_get_functions(void)
@@ -278,7 +280,8 @@ static int kgd_init_interrupts(struct kgd_dev *kgd, uint32_t pipe_id)
lock_srbm(kgd, mec, pipe, 0, 0);
- WREG32(mmCPC_INT_CNTL, CP_INT_CNTL_RING0__TIME_STAMP_INT_ENABLE_MASK);
+ WREG32(mmCPC_INT_CNTL, CP_INT_CNTL_RING0__TIME_STAMP_INT_ENABLE_MASK |
+ CP_INT_CNTL_RING0__OPCODE_ERROR_INT_ENABLE_MASK);
unlock_srbm(kgd);
@@ -568,6 +571,9 @@ static int kgd_hqd_destroy(struct kgd_dev *kgd, void *mqd,
int retry;
struct vi_mqd *m = get_mqd(mqd);
+ if (adev->in_gpu_reset)
+ return -EIO;
+
acquire_queue(kgd, pipe_id, queue_id);
if (m->cp_hqd_vmid == 0)
@@ -827,7 +833,7 @@ static uint16_t get_fw_version(struct kgd_dev *kgd, enum kgd_engine_type type)
}
static void set_vm_context_page_table_base(struct kgd_dev *kgd, uint32_t vmid,
- uint32_t page_table_base)
+ uint64_t page_table_base)
{
struct amdgpu_device *adev = get_amdgpu_device(kgd);
@@ -835,7 +841,8 @@ static void set_vm_context_page_table_base(struct kgd_dev *kgd, uint32_t vmid,
pr_err("trying to set page table base for wrong VMID\n");
return;
}
- WREG32(mmVM_CONTEXT8_PAGE_TABLE_BASE_ADDR + vmid - 8, page_table_base);
+ WREG32(mmVM_CONTEXT8_PAGE_TABLE_BASE_ADDR + vmid - 8,
+ lower_32_bits(page_table_base));
}
static int invalidate_tlbs(struct kgd_dev *kgd, uint16_t pasid)
@@ -844,6 +851,9 @@ static int invalidate_tlbs(struct kgd_dev *kgd, uint16_t pasid)
int vmid;
unsigned int tmp;
+ if (adev->in_gpu_reset)
+ return -EIO;
+
for (vmid = 0; vmid < 16; vmid++) {
if (!amdgpu_amdkfd_is_kfd_vmid(adev, vmid))
continue;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c
index 1db60aa5b7f0..42cb4c4e0929 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c
@@ -138,7 +138,7 @@ static bool get_atc_vmid_pasid_mapping_valid(struct kgd_dev *kgd,
static uint16_t get_atc_vmid_pasid_mapping_pasid(struct kgd_dev *kgd,
uint8_t vmid);
static void set_vm_context_page_table_base(struct kgd_dev *kgd, uint32_t vmid,
- uint32_t page_table_base);
+ uint64_t page_table_base);
static uint16_t get_fw_version(struct kgd_dev *kgd, enum kgd_engine_type type);
static void set_scratch_backing_va(struct kgd_dev *kgd,
uint64_t va, uint32_t vmid);
@@ -201,6 +201,7 @@ static const struct kfd2kgd_calls kfd2kgd = {
.create_process_vm = amdgpu_amdkfd_gpuvm_create_process_vm,
.acquire_process_vm = amdgpu_amdkfd_gpuvm_acquire_process_vm,
.destroy_process_vm = amdgpu_amdkfd_gpuvm_destroy_process_vm,
+ .release_process_vm = amdgpu_amdkfd_gpuvm_release_process_vm,
.get_process_page_dir = amdgpu_amdkfd_gpuvm_get_process_page_dir,
.set_vm_context_page_table_base = set_vm_context_page_table_base,
.alloc_memory_of_gpu = amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu,
@@ -213,6 +214,9 @@ static const struct kfd2kgd_calls kfd2kgd = {
.invalidate_tlbs = invalidate_tlbs,
.invalidate_tlbs_vmid = invalidate_tlbs_vmid,
.submit_ib = amdgpu_amdkfd_submit_ib,
+ .gpu_recover = amdgpu_amdkfd_gpu_reset,
+ .set_compute_idle = amdgpu_amdkfd_set_compute_idle,
+ .get_hive_id = amdgpu_amdkfd_get_hive_id,
};
struct kfd2kgd_calls *amdgpu_amdkfd_gfx_9_0_get_functions(void)
@@ -679,6 +683,9 @@ static int kgd_hqd_destroy(struct kgd_dev *kgd, void *mqd,
uint32_t temp;
struct v9_mqd *m = get_mqd(mqd);
+ if (adev->in_gpu_reset)
+ return -EIO;
+
acquire_queue(kgd, pipe_id, queue_id);
if (m->cp_hqd_vmid == 0)
@@ -866,6 +873,9 @@ static int invalidate_tlbs(struct kgd_dev *kgd, uint16_t pasid)
int vmid;
struct amdgpu_ring *ring = &adev->gfx.kiq.ring;
+ if (adev->in_gpu_reset)
+ return -EIO;
+
if (ring->ready)
return invalidate_tlbs_with_kiq(adev, pasid);
@@ -1003,11 +1013,10 @@ static uint16_t get_fw_version(struct kgd_dev *kgd, enum kgd_engine_type type)
}
static void set_vm_context_page_table_base(struct kgd_dev *kgd, uint32_t vmid,
- uint32_t page_table_base)
+ uint64_t page_table_base)
{
struct amdgpu_device *adev = get_amdgpu_device(kgd);
- uint64_t base = (uint64_t)page_table_base << PAGE_SHIFT |
- AMDGPU_PTE_VALID;
+ uint64_t base = page_table_base | AMDGPU_PTE_VALID;
if (!amdgpu_amdkfd_is_kfd_vmid(adev, vmid)) {
pr_err("trying to set page table base for wrong VMID %u\n",
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
index ff8fd75f7ca5..df0a059565f9 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
@@ -206,11 +206,9 @@ static int amdgpu_amdkfd_remove_eviction_fence(struct amdgpu_bo *bo,
struct amdgpu_amdkfd_fence ***ef_list,
unsigned int *ef_count)
{
- struct reservation_object_list *fobj;
- struct reservation_object *resv;
- unsigned int i = 0, j = 0, k = 0, shared_count;
- unsigned int count = 0;
- struct amdgpu_amdkfd_fence **fence_list;
+ struct reservation_object *resv = bo->tbo.resv;
+ struct reservation_object_list *old, *new;
+ unsigned int i, j, k;
if (!ef && !ef_list)
return -EINVAL;
@@ -220,76 +218,67 @@ static int amdgpu_amdkfd_remove_eviction_fence(struct amdgpu_bo *bo,
*ef_count = 0;
}
- resv = bo->tbo.resv;
- fobj = reservation_object_get_list(resv);
-
- if (!fobj)
+ old = reservation_object_get_list(resv);
+ if (!old)
return 0;
- preempt_disable();
- write_seqcount_begin(&resv->seq);
+ new = kmalloc(offsetof(typeof(*new), shared[old->shared_max]),
+ GFP_KERNEL);
+ if (!new)
+ return -ENOMEM;
- /* Go through all the shared fences in the resevation object. If
- * ef is specified and it exists in the list, remove it and reduce the
- * count. If ef is not specified, then get the count of eviction fences
- * present.
+ /* Go through all the shared fences in the resevation object and sort
+ * the interesting ones to the end of the list.
*/
- shared_count = fobj->shared_count;
- for (i = 0; i < shared_count; ++i) {
+ for (i = 0, j = old->shared_count, k = 0; i < old->shared_count; ++i) {
struct dma_fence *f;
- f = rcu_dereference_protected(fobj->shared[i],
+ f = rcu_dereference_protected(old->shared[i],
reservation_object_held(resv));
- if (ef) {
- if (f->context == ef->base.context) {
- dma_fence_put(f);
- fobj->shared_count--;
- } else {
- RCU_INIT_POINTER(fobj->shared[j++], f);
- }
- } else if (to_amdgpu_amdkfd_fence(f))
- count++;
+ if ((ef && f->context == ef->base.context) ||
+ (!ef && to_amdgpu_amdkfd_fence(f)))
+ RCU_INIT_POINTER(new->shared[--j], f);
+ else
+ RCU_INIT_POINTER(new->shared[k++], f);
}
- write_seqcount_end(&resv->seq);
- preempt_enable();
+ new->shared_max = old->shared_max;
+ new->shared_count = k;
- if (ef || !count)
- return 0;
+ if (!ef) {
+ unsigned int count = old->shared_count - j;
- /* Alloc memory for count number of eviction fence pointers. Fill the
- * ef_list array and ef_count
- */
- fence_list = kcalloc(count, sizeof(struct amdgpu_amdkfd_fence *),
- GFP_KERNEL);
- if (!fence_list)
- return -ENOMEM;
-
- preempt_disable();
- write_seqcount_begin(&resv->seq);
-
- j = 0;
- for (i = 0; i < shared_count; ++i) {
- struct dma_fence *f;
- struct amdgpu_amdkfd_fence *efence;
-
- f = rcu_dereference_protected(fobj->shared[i],
- reservation_object_held(resv));
+ /* Alloc memory for count number of eviction fence pointers.
+ * Fill the ef_list array and ef_count
+ */
+ *ef_list = kcalloc(count, sizeof(**ef_list), GFP_KERNEL);
+ *ef_count = count;
- efence = to_amdgpu_amdkfd_fence(f);
- if (efence) {
- fence_list[k++] = efence;
- fobj->shared_count--;
- } else {
- RCU_INIT_POINTER(fobj->shared[j++], f);
+ if (!*ef_list) {
+ kfree(new);
+ return -ENOMEM;
}
}
+ /* Install the new fence list, seqcount provides the barriers */
+ preempt_disable();
+ write_seqcount_begin(&resv->seq);
+ RCU_INIT_POINTER(resv->fence, new);
write_seqcount_end(&resv->seq);
preempt_enable();
- *ef_list = fence_list;
- *ef_count = k;
+ /* Drop the references to the removed fences or move them to ef_list */
+ for (i = j, k = 0; i < old->shared_count; ++i) {
+ struct dma_fence *f;
+
+ f = rcu_dereference_protected(new->shared[i],
+ reservation_object_held(resv));
+ if (!ef)
+ (*ef_list)[k++] = to_amdgpu_amdkfd_fence(f);
+ else
+ dma_fence_put(f);
+ }
+ kfree_rcu(old, rcu);
return 0;
}
@@ -334,7 +323,7 @@ static int amdgpu_amdkfd_bo_validate(struct amdgpu_bo *bo, uint32_t domain,
"Called with userptr BO"))
return -EINVAL;
- amdgpu_ttm_placement_from_domain(bo, domain);
+ amdgpu_bo_placement_from_domain(bo, domain);
ret = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
if (ret)
@@ -375,7 +364,6 @@ static int vm_validate_pt_pd_bos(struct amdgpu_vm *vm)
struct amdgpu_bo *pd = vm->root.base.bo;
struct amdgpu_device *adev = amdgpu_ttm_adev(pd->tbo.bdev);
struct amdgpu_vm_parser param;
- uint64_t addr, flags = AMDGPU_PTE_VALID;
int ret;
param.domain = AMDGPU_GEM_DOMAIN_VRAM;
@@ -394,9 +382,7 @@ static int vm_validate_pt_pd_bos(struct amdgpu_vm *vm)
return ret;
}
- addr = amdgpu_bo_gpu_offset(vm->root.base.bo);
- amdgpu_gmc_get_vm_pde(adev, -1, &addr, &flags);
- vm->pd_phys_addr = addr;
+ vm->pd_phys_addr = amdgpu_gmc_pd_addr(vm->root.base.bo);
if (vm->use_cpu_for_update) {
ret = amdgpu_bo_kmap(pd, NULL);
@@ -622,7 +608,7 @@ static int init_user_pages(struct kgd_mem *mem, struct mm_struct *mm,
pr_err("%s: Failed to reserve BO\n", __func__);
goto release_out;
}
- amdgpu_ttm_placement_from_domain(bo, mem->domain);
+ amdgpu_bo_placement_from_domain(bo, mem->domain);
ret = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
if (ret)
pr_err("%s: failed to validate BO\n", __func__);
@@ -689,7 +675,6 @@ static int reserve_bo_and_vm(struct kgd_mem *mem,
if (!ctx->vm_pd)
return -ENOMEM;
- ctx->kfd_bo.robj = bo;
ctx->kfd_bo.priority = 0;
ctx->kfd_bo.tv.bo = &bo->tbo;
ctx->kfd_bo.tv.shared = true;
@@ -754,7 +739,6 @@ static int reserve_bo_and_cond_vms(struct kgd_mem *mem,
return -ENOMEM;
}
- ctx->kfd_bo.robj = bo;
ctx->kfd_bo.priority = 0;
ctx->kfd_bo.tv.bo = &bo->tbo;
ctx->kfd_bo.tv.shared = true;
@@ -1014,8 +998,8 @@ create_evict_fence_fail:
return ret;
}
-int amdgpu_amdkfd_gpuvm_create_process_vm(struct kgd_dev *kgd, void **vm,
- void **process_info,
+int amdgpu_amdkfd_gpuvm_create_process_vm(struct kgd_dev *kgd, unsigned int pasid,
+ void **vm, void **process_info,
struct dma_fence **ef)
{
struct amdgpu_device *adev = get_amdgpu_device(kgd);
@@ -1027,7 +1011,7 @@ int amdgpu_amdkfd_gpuvm_create_process_vm(struct kgd_dev *kgd, void **vm,
return -ENOMEM;
/* Initialize AMDGPU part of the VM */
- ret = amdgpu_vm_init(adev, new_vm, AMDGPU_VM_CONTEXT_COMPUTE, 0);
+ ret = amdgpu_vm_init(adev, new_vm, AMDGPU_VM_CONTEXT_COMPUTE, pasid);
if (ret) {
pr_err("Failed init vm ret %d\n", ret);
goto amdgpu_vm_init_fail;
@@ -1050,7 +1034,7 @@ amdgpu_vm_init_fail:
}
int amdgpu_amdkfd_gpuvm_acquire_process_vm(struct kgd_dev *kgd,
- struct file *filp,
+ struct file *filp, unsigned int pasid,
void **vm, void **process_info,
struct dma_fence **ef)
{
@@ -1065,7 +1049,7 @@ int amdgpu_amdkfd_gpuvm_acquire_process_vm(struct kgd_dev *kgd,
return -EINVAL;
/* Convert VM into a compute VM */
- ret = amdgpu_vm_make_compute(adev, avm);
+ ret = amdgpu_vm_make_compute(adev, avm, pasid);
if (ret)
return ret;
@@ -1128,11 +1112,34 @@ void amdgpu_amdkfd_gpuvm_destroy_process_vm(struct kgd_dev *kgd, void *vm)
kfree(vm);
}
-uint32_t amdgpu_amdkfd_gpuvm_get_process_page_dir(void *vm)
+void amdgpu_amdkfd_gpuvm_release_process_vm(struct kgd_dev *kgd, void *vm)
+{
+ struct amdgpu_device *adev = get_amdgpu_device(kgd);
+ struct amdgpu_vm *avm = (struct amdgpu_vm *)vm;
+
+ if (WARN_ON(!kgd || !vm))
+ return;
+
+ pr_debug("Releasing process vm %p\n", vm);
+
+ /* The original pasid of amdgpu vm has already been
+ * released during making a amdgpu vm to a compute vm
+ * The current pasid is managed by kfd and will be
+ * released on kfd process destroy. Set amdgpu pasid
+ * to 0 to avoid duplicate release.
+ */
+ amdgpu_vm_release_compute(adev, avm);
+}
+
+uint64_t amdgpu_amdkfd_gpuvm_get_process_page_dir(void *vm)
{
struct amdgpu_vm *avm = (struct amdgpu_vm *)vm;
+ struct amdgpu_bo *pd = avm->root.base.bo;
+ struct amdgpu_device *adev = amdgpu_ttm_adev(pd->tbo.bdev);
- return avm->pd_phys_addr >> AMDGPU_GPU_PAGE_SHIFT;
+ if (adev->asic_type < CHIP_VEGA10)
+ return avm->pd_phys_addr >> AMDGPU_GPU_PAGE_SHIFT;
+ return avm->pd_phys_addr;
}
int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(
@@ -1587,7 +1594,7 @@ int amdgpu_amdkfd_gpuvm_map_gtt_bo_to_kernel(struct kgd_dev *kgd,
goto bo_reserve_failed;
}
- ret = amdgpu_bo_pin(bo, AMDGPU_GEM_DOMAIN_GTT, NULL);
+ ret = amdgpu_bo_pin(bo, AMDGPU_GEM_DOMAIN_GTT);
if (ret) {
pr_err("Failed to pin bo. ret %d\n", ret);
goto pin_failed;
@@ -1621,6 +1628,20 @@ bo_reserve_failed:
return ret;
}
+int amdgpu_amdkfd_gpuvm_get_vm_fault_info(struct kgd_dev *kgd,
+ struct kfd_vm_fault_info *mem)
+{
+ struct amdgpu_device *adev;
+
+ adev = (struct amdgpu_device *)kgd;
+ if (atomic_read(&adev->gmc.vm_fault_info_updated) == 1) {
+ *mem = *adev->gmc.vm_fault_info;
+ mb();
+ atomic_set(&adev->gmc.vm_fault_info_updated, 0);
+ }
+ return 0;
+}
+
/* Evict a userptr BO by stopping the queues if necessary
*
* Runs in MMU notifier, may be in RECLAIM_FS context. This means it
@@ -1680,7 +1701,7 @@ static int update_invalid_user_pages(struct amdkfd_process_info *process_info,
if (amdgpu_bo_reserve(bo, true))
return -EAGAIN;
- amdgpu_ttm_placement_from_domain(bo, AMDGPU_GEM_DOMAIN_CPU);
+ amdgpu_bo_placement_from_domain(bo, AMDGPU_GEM_DOMAIN_CPU);
ret = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
amdgpu_bo_unreserve(bo);
if (ret) {
@@ -1824,7 +1845,7 @@ static int validate_invalid_user_pages(struct amdkfd_process_info *process_info)
if (mem->user_pages[0]) {
amdgpu_ttm_tt_set_user_pages(bo->tbo.ttm,
mem->user_pages);
- amdgpu_ttm_placement_from_domain(bo, mem->domain);
+ amdgpu_bo_placement_from_domain(bo, mem->domain);
ret = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
if (ret) {
pr_err("%s: failed to validate BO\n", __func__);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.c
index bf872f694f50..e02781b37e73 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.c
@@ -29,6 +29,7 @@
#include "amdgpu_atombios.h"
#include "amdgpu_atomfirmware.h"
#include "amdgpu_i2c.h"
+#include "amdgpu_display.h"
#include "atom.h"
#include "atom-bits.h"
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.c
index 236915849cfe..b61e1dc61b4c 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.c
@@ -117,6 +117,10 @@ union igp_info {
union umc_info {
struct atom_umc_info_v3_1 v31;
};
+
+union vram_info {
+ struct atom_vram_info_header_v2_3 v23;
+};
/*
* Return vram width from integrated system info table, if available,
* or 0 if not.
@@ -174,7 +178,7 @@ static int convert_atom_mem_type_to_vram_type (struct amdgpu_device *adev,
case ATOM_DGPU_VRAM_TYPE_GDDR5:
vram_type = AMDGPU_VRAM_TYPE_GDDR5;
break;
- case ATOM_DGPU_VRAM_TYPE_HBM:
+ case ATOM_DGPU_VRAM_TYPE_HBM2:
vram_type = AMDGPU_VRAM_TYPE_HBM;
break;
default:
@@ -195,7 +199,7 @@ int amdgpu_atomfirmware_get_vram_type(struct amdgpu_device *adev)
int index;
u16 data_offset, size;
union igp_info *igp_info;
- union umc_info *umc_info;
+ union vram_info *vram_info;
u8 frev, crev;
u8 mem_type;
@@ -204,7 +208,7 @@ int amdgpu_atomfirmware_get_vram_type(struct amdgpu_device *adev)
integratedsysteminfo);
else
index = get_index_into_master_table(atom_master_list_of_data_tables_v2_1,
- umc_info);
+ vram_info);
if (amdgpu_atom_parse_data_header(mode_info->atom_context,
index, &size,
&frev, &crev, &data_offset)) {
@@ -219,11 +223,11 @@ int amdgpu_atomfirmware_get_vram_type(struct amdgpu_device *adev)
return 0;
}
} else {
- umc_info = (union umc_info *)
+ vram_info = (union vram_info *)
(mode_info->atom_context->bios + data_offset);
switch (crev) {
- case 1:
- mem_type = umc_info->v31.vram_type;
+ case 3:
+ mem_type = vram_info->v23.vram_module[0].memory_type;
return convert_atom_mem_type_to_vram_type(adev, mem_type);
default:
return 0;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_atpx_handler.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_atpx_handler.c
index 9ab89371d9e8..a028661d9e20 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_atpx_handler.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_atpx_handler.c
@@ -32,7 +32,7 @@ struct amdgpu_atpx_functions {
bool switch_start;
bool switch_end;
bool disp_connectors_mapping;
- bool disp_detetion_ports;
+ bool disp_detection_ports;
};
struct amdgpu_atpx {
@@ -162,7 +162,7 @@ static void amdgpu_atpx_parse_functions(struct amdgpu_atpx_functions *f, u32 mas
f->switch_start = mask & ATPX_GRAPHICS_DEVICE_SWITCH_START_NOTIFICATION_SUPPORTED;
f->switch_end = mask & ATPX_GRAPHICS_DEVICE_SWITCH_END_NOTIFICATION_SUPPORTED;
f->disp_connectors_mapping = mask & ATPX_GET_DISPLAY_CONNECTORS_MAPPING_SUPPORTED;
- f->disp_detetion_ports = mask & ATPX_GET_DISPLAY_DETECTION_PORTS_SUPPORTED;
+ f->disp_detection_ports = mask & ATPX_GET_DISPLAY_DETECTION_PORTS_SUPPORTED;
}
/**
@@ -575,6 +575,7 @@ static const struct amdgpu_px_quirk amdgpu_px_quirk_list[] = {
{ 0x1002, 0x6900, 0x1002, 0x0124, AMDGPU_PX_QUIRK_FORCE_ATPX },
{ 0x1002, 0x6900, 0x1028, 0x0812, AMDGPU_PX_QUIRK_FORCE_ATPX },
{ 0x1002, 0x6900, 0x1028, 0x0813, AMDGPU_PX_QUIRK_FORCE_ATPX },
+ { 0x1002, 0x6900, 0x1025, 0x125A, AMDGPU_PX_QUIRK_FORCE_ATPX },
{ 0, 0, 0, 0, 0 },
};
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_benchmark.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_benchmark.c
index 19cfff31f2e1..3079ea8523c5 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_benchmark.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_benchmark.c
@@ -95,11 +95,17 @@ static void amdgpu_benchmark_move(struct amdgpu_device *adev, unsigned size,
r = amdgpu_bo_reserve(sobj, false);
if (unlikely(r != 0))
goto out_cleanup;
- r = amdgpu_bo_pin(sobj, sdomain, &saddr);
+ r = amdgpu_bo_pin(sobj, sdomain);
+ if (r) {
+ amdgpu_bo_unreserve(sobj);
+ goto out_cleanup;
+ }
+ r = amdgpu_ttm_alloc_gart(&sobj->tbo);
amdgpu_bo_unreserve(sobj);
if (r) {
goto out_cleanup;
}
+ saddr = amdgpu_bo_gpu_offset(sobj);
bp.domain = ddomain;
r = amdgpu_bo_create(adev, &bp, &dobj);
if (r) {
@@ -108,11 +114,17 @@ static void amdgpu_benchmark_move(struct amdgpu_device *adev, unsigned size,
r = amdgpu_bo_reserve(dobj, false);
if (unlikely(r != 0))
goto out_cleanup;
- r = amdgpu_bo_pin(dobj, ddomain, &daddr);
+ r = amdgpu_bo_pin(dobj, ddomain);
+ if (r) {
+ amdgpu_bo_unreserve(sobj);
+ goto out_cleanup;
+ }
+ r = amdgpu_ttm_alloc_gart(&dobj->tbo);
amdgpu_bo_unreserve(dobj);
if (r) {
goto out_cleanup;
}
+ daddr = amdgpu_bo_gpu_offset(dobj);
if (adev->mman.buffer_funcs) {
time = amdgpu_benchmark_do_move(adev, size, saddr, daddr, n);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c
index 92be7f6de197..14d2982a47cc 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c
@@ -35,92 +35,57 @@
#define AMDGPU_BO_LIST_MAX_PRIORITY 32u
#define AMDGPU_BO_LIST_NUM_BUCKETS (AMDGPU_BO_LIST_MAX_PRIORITY + 1)
-static int amdgpu_bo_list_set(struct amdgpu_device *adev,
- struct drm_file *filp,
- struct amdgpu_bo_list *list,
- struct drm_amdgpu_bo_list_entry *info,
- unsigned num_entries);
-
-static void amdgpu_bo_list_release_rcu(struct kref *ref)
+static void amdgpu_bo_list_free_rcu(struct rcu_head *rcu)
{
- unsigned i;
- struct amdgpu_bo_list *list = container_of(ref, struct amdgpu_bo_list,
- refcount);
+ struct amdgpu_bo_list *list = container_of(rcu, struct amdgpu_bo_list,
+ rhead);
- for (i = 0; i < list->num_entries; ++i)
- amdgpu_bo_unref(&list->array[i].robj);
-
- mutex_destroy(&list->lock);
- kvfree(list->array);
- kfree_rcu(list, rhead);
+ kvfree(list);
}
-static int amdgpu_bo_list_create(struct amdgpu_device *adev,
- struct drm_file *filp,
- struct drm_amdgpu_bo_list_entry *info,
- unsigned num_entries,
- int *id)
+static void amdgpu_bo_list_free(struct kref *ref)
{
- int r;
- struct amdgpu_fpriv *fpriv = filp->driver_priv;
- struct amdgpu_bo_list *list;
+ struct amdgpu_bo_list *list = container_of(ref, struct amdgpu_bo_list,
+ refcount);
+ struct amdgpu_bo_list_entry *e;
- list = kzalloc(sizeof(struct amdgpu_bo_list), GFP_KERNEL);
- if (!list)
- return -ENOMEM;
+ amdgpu_bo_list_for_each_entry(e, list) {
+ struct amdgpu_bo *bo = ttm_to_amdgpu_bo(e->tv.bo);
- /* initialize bo list*/
- mutex_init(&list->lock);
- kref_init(&list->refcount);
- r = amdgpu_bo_list_set(adev, filp, list, info, num_entries);
- if (r) {
- kfree(list);
- return r;
+ amdgpu_bo_unref(&bo);
}
- /* idr alloc should be called only after initialization of bo list. */
- mutex_lock(&fpriv->bo_list_lock);
- r = idr_alloc(&fpriv->bo_list_handles, list, 1, 0, GFP_KERNEL);
- mutex_unlock(&fpriv->bo_list_lock);
- if (r < 0) {
- amdgpu_bo_list_free(list);
- return r;
- }
- *id = r;
-
- return 0;
+ call_rcu(&list->rhead, amdgpu_bo_list_free_rcu);
}
-static void amdgpu_bo_list_destroy(struct amdgpu_fpriv *fpriv, int id)
+int amdgpu_bo_list_create(struct amdgpu_device *adev, struct drm_file *filp,
+ struct drm_amdgpu_bo_list_entry *info,
+ unsigned num_entries, struct amdgpu_bo_list **result)
{
- struct amdgpu_bo_list *list;
-
- mutex_lock(&fpriv->bo_list_lock);
- list = idr_remove(&fpriv->bo_list_handles, id);
- mutex_unlock(&fpriv->bo_list_lock);
- if (list)
- kref_put(&list->refcount, amdgpu_bo_list_release_rcu);
-}
-
-static int amdgpu_bo_list_set(struct amdgpu_device *adev,
- struct drm_file *filp,
- struct amdgpu_bo_list *list,
- struct drm_amdgpu_bo_list_entry *info,
- unsigned num_entries)
-{
- struct amdgpu_bo_list_entry *array;
- struct amdgpu_bo *gds_obj = adev->gds.gds_gfx_bo;
- struct amdgpu_bo *gws_obj = adev->gds.gws_gfx_bo;
- struct amdgpu_bo *oa_obj = adev->gds.oa_gfx_bo;
-
unsigned last_entry = 0, first_userptr = num_entries;
+ struct amdgpu_bo_list_entry *array;
+ struct amdgpu_bo_list *list;
+ uint64_t total_size = 0;
+ size_t size;
unsigned i;
int r;
- unsigned long total_size = 0;
- array = kvmalloc_array(num_entries, sizeof(struct amdgpu_bo_list_entry), GFP_KERNEL);
- if (!array)
+ if (num_entries > (SIZE_MAX - sizeof(struct amdgpu_bo_list))
+ / sizeof(struct amdgpu_bo_list_entry))
+ return -EINVAL;
+
+ size = sizeof(struct amdgpu_bo_list);
+ size += num_entries * sizeof(struct amdgpu_bo_list_entry);
+ list = kvmalloc(size, GFP_KERNEL);
+ if (!list)
return -ENOMEM;
+
+ kref_init(&list->refcount);
+ list->gds_obj = adev->gds.gds_gfx_bo;
+ list->gws_obj = adev->gds.gws_gfx_bo;
+ list->oa_obj = adev->gds.oa_gfx_bo;
+
+ array = amdgpu_bo_list_array_entry(list, 0);
memset(array, 0, num_entries * sizeof(struct amdgpu_bo_list_entry));
for (i = 0; i < num_entries; ++i) {
@@ -150,66 +115,65 @@ static int amdgpu_bo_list_set(struct amdgpu_device *adev,
entry = &array[last_entry++];
}
- entry->robj = bo;
entry->priority = min(info[i].bo_priority,
AMDGPU_BO_LIST_MAX_PRIORITY);
- entry->tv.bo = &entry->robj->tbo;
- entry->tv.shared = !entry->robj->prime_shared_count;
-
- if (entry->robj->preferred_domains == AMDGPU_GEM_DOMAIN_GDS)
- gds_obj = entry->robj;
- if (entry->robj->preferred_domains == AMDGPU_GEM_DOMAIN_GWS)
- gws_obj = entry->robj;
- if (entry->robj->preferred_domains == AMDGPU_GEM_DOMAIN_OA)
- oa_obj = entry->robj;
-
- total_size += amdgpu_bo_size(entry->robj);
- trace_amdgpu_bo_list_set(list, entry->robj);
+ entry->tv.bo = &bo->tbo;
+ entry->tv.shared = !bo->prime_shared_count;
+
+ if (bo->preferred_domains == AMDGPU_GEM_DOMAIN_GDS)
+ list->gds_obj = bo;
+ if (bo->preferred_domains == AMDGPU_GEM_DOMAIN_GWS)
+ list->gws_obj = bo;
+ if (bo->preferred_domains == AMDGPU_GEM_DOMAIN_OA)
+ list->oa_obj = bo;
+
+ total_size += amdgpu_bo_size(bo);
+ trace_amdgpu_bo_list_set(list, bo);
}
- for (i = 0; i < list->num_entries; ++i)
- amdgpu_bo_unref(&list->array[i].robj);
-
- kvfree(list->array);
-
- list->gds_obj = gds_obj;
- list->gws_obj = gws_obj;
- list->oa_obj = oa_obj;
list->first_userptr = first_userptr;
- list->array = array;
list->num_entries = num_entries;
trace_amdgpu_cs_bo_status(list->num_entries, total_size);
+
+ *result = list;
return 0;
error_free:
- while (i--)
- amdgpu_bo_unref(&array[i].robj);
- kvfree(array);
+ while (i--) {
+ struct amdgpu_bo *bo = ttm_to_amdgpu_bo(array[i].tv.bo);
+
+ amdgpu_bo_unref(&bo);
+ }
+ kvfree(list);
return r;
+
}
-struct amdgpu_bo_list *
-amdgpu_bo_list_get(struct amdgpu_fpriv *fpriv, int id)
+static void amdgpu_bo_list_destroy(struct amdgpu_fpriv *fpriv, int id)
{
- struct amdgpu_bo_list *result;
+ struct amdgpu_bo_list *list;
+
+ mutex_lock(&fpriv->bo_list_lock);
+ list = idr_remove(&fpriv->bo_list_handles, id);
+ mutex_unlock(&fpriv->bo_list_lock);
+ if (list)
+ kref_put(&list->refcount, amdgpu_bo_list_free);
+}
+int amdgpu_bo_list_get(struct amdgpu_fpriv *fpriv, int id,
+ struct amdgpu_bo_list **result)
+{
rcu_read_lock();
- result = idr_find(&fpriv->bo_list_handles, id);
+ *result = idr_find(&fpriv->bo_list_handles, id);
- if (result) {
- if (kref_get_unless_zero(&result->refcount)) {
- rcu_read_unlock();
- mutex_lock(&result->lock);
- } else {
- rcu_read_unlock();
- result = NULL;
- }
- } else {
+ if (*result && kref_get_unless_zero(&(*result)->refcount)) {
rcu_read_unlock();
+ return 0;
}
- return result;
+ rcu_read_unlock();
+ return -ENOENT;
}
void amdgpu_bo_list_get_list(struct amdgpu_bo_list *list,
@@ -220,6 +184,7 @@ void amdgpu_bo_list_get_list(struct amdgpu_bo_list *list,
* concatenated in descending order.
*/
struct list_head bucket[AMDGPU_BO_LIST_NUM_BUCKETS];
+ struct amdgpu_bo_list_entry *e;
unsigned i;
for (i = 0; i < AMDGPU_BO_LIST_NUM_BUCKETS; i++)
@@ -230,14 +195,14 @@ void amdgpu_bo_list_get_list(struct amdgpu_bo_list *list,
* in the list, the sort mustn't change the ordering of buffers
* with the same priority, i.e. it must be stable.
*/
- for (i = 0; i < list->num_entries; i++) {
- unsigned priority = list->array[i].priority;
+ amdgpu_bo_list_for_each_entry(e, list) {
+ struct amdgpu_bo *bo = ttm_to_amdgpu_bo(e->tv.bo);
+ unsigned priority = e->priority;
- if (!list->array[i].robj->parent)
- list_add_tail(&list->array[i].tv.head,
- &bucket[priority]);
+ if (!bo->parent)
+ list_add_tail(&e->tv.head, &bucket[priority]);
- list->array[i].user_pages = NULL;
+ e->user_pages = NULL;
}
/* Connect the sorted buckets in the output list. */
@@ -247,71 +212,82 @@ void amdgpu_bo_list_get_list(struct amdgpu_bo_list *list,
void amdgpu_bo_list_put(struct amdgpu_bo_list *list)
{
- mutex_unlock(&list->lock);
- kref_put(&list->refcount, amdgpu_bo_list_release_rcu);
+ kref_put(&list->refcount, amdgpu_bo_list_free);
}
-void amdgpu_bo_list_free(struct amdgpu_bo_list *list)
-{
- unsigned i;
-
- for (i = 0; i < list->num_entries; ++i)
- amdgpu_bo_unref(&list->array[i].robj);
-
- mutex_destroy(&list->lock);
- kvfree(list->array);
- kfree(list);
-}
-
-int amdgpu_bo_list_ioctl(struct drm_device *dev, void *data,
- struct drm_file *filp)
+int amdgpu_bo_create_list_entry_array(struct drm_amdgpu_bo_list_in *in,
+ struct drm_amdgpu_bo_list_entry **info_param)
{
+ const void __user *uptr = u64_to_user_ptr(in->bo_info_ptr);
const uint32_t info_size = sizeof(struct drm_amdgpu_bo_list_entry);
-
- struct amdgpu_device *adev = dev->dev_private;
- struct amdgpu_fpriv *fpriv = filp->driver_priv;
- union drm_amdgpu_bo_list *args = data;
- uint32_t handle = args->in.list_handle;
- const void __user *uptr = u64_to_user_ptr(args->in.bo_info_ptr);
-
struct drm_amdgpu_bo_list_entry *info;
- struct amdgpu_bo_list *list;
-
int r;
- info = kvmalloc_array(args->in.bo_number,
- sizeof(struct drm_amdgpu_bo_list_entry), GFP_KERNEL);
+ info = kvmalloc_array(in->bo_number, info_size, GFP_KERNEL);
if (!info)
return -ENOMEM;
/* copy the handle array from userspace to a kernel buffer */
r = -EFAULT;
- if (likely(info_size == args->in.bo_info_size)) {
- unsigned long bytes = args->in.bo_number *
- args->in.bo_info_size;
+ if (likely(info_size == in->bo_info_size)) {
+ unsigned long bytes = in->bo_number *
+ in->bo_info_size;
if (copy_from_user(info, uptr, bytes))
goto error_free;
} else {
- unsigned long bytes = min(args->in.bo_info_size, info_size);
+ unsigned long bytes = min(in->bo_info_size, info_size);
unsigned i;
- memset(info, 0, args->in.bo_number * info_size);
- for (i = 0; i < args->in.bo_number; ++i) {
+ memset(info, 0, in->bo_number * info_size);
+ for (i = 0; i < in->bo_number; ++i) {
if (copy_from_user(&info[i], uptr, bytes))
goto error_free;
- uptr += args->in.bo_info_size;
+ uptr += in->bo_info_size;
}
}
+ *info_param = info;
+ return 0;
+
+error_free:
+ kvfree(info);
+ return r;
+}
+
+int amdgpu_bo_list_ioctl(struct drm_device *dev, void *data,
+ struct drm_file *filp)
+{
+ struct amdgpu_device *adev = dev->dev_private;
+ struct amdgpu_fpriv *fpriv = filp->driver_priv;
+ union drm_amdgpu_bo_list *args = data;
+ uint32_t handle = args->in.list_handle;
+ struct drm_amdgpu_bo_list_entry *info = NULL;
+ struct amdgpu_bo_list *list, *old;
+ int r;
+
+ r = amdgpu_bo_create_list_entry_array(&args->in, &info);
+ if (r)
+ goto error_free;
+
switch (args->in.operation) {
case AMDGPU_BO_LIST_OP_CREATE:
r = amdgpu_bo_list_create(adev, filp, info, args->in.bo_number,
- &handle);
+ &list);
if (r)
goto error_free;
+
+ mutex_lock(&fpriv->bo_list_lock);
+ r = idr_alloc(&fpriv->bo_list_handles, list, 1, 0, GFP_KERNEL);
+ mutex_unlock(&fpriv->bo_list_lock);
+ if (r < 0) {
+ amdgpu_bo_list_put(list);
+ return r;
+ }
+
+ handle = r;
break;
case AMDGPU_BO_LIST_OP_DESTROY:
@@ -320,17 +296,22 @@ int amdgpu_bo_list_ioctl(struct drm_device *dev, void *data,
break;
case AMDGPU_BO_LIST_OP_UPDATE:
- r = -ENOENT;
- list = amdgpu_bo_list_get(fpriv, handle);
- if (!list)
+ r = amdgpu_bo_list_create(adev, filp, info, args->in.bo_number,
+ &list);
+ if (r)
goto error_free;
- r = amdgpu_bo_list_set(adev, filp, list, info,
- args->in.bo_number);
- amdgpu_bo_list_put(list);
- if (r)
+ mutex_lock(&fpriv->bo_list_lock);
+ old = idr_replace(&fpriv->bo_list_handles, list, handle);
+ mutex_unlock(&fpriv->bo_list_lock);
+
+ if (IS_ERR(old)) {
+ amdgpu_bo_list_put(list);
+ r = PTR_ERR(old);
goto error_free;
+ }
+ amdgpu_bo_list_put(old);
break;
default:
@@ -345,6 +326,7 @@ int amdgpu_bo_list_ioctl(struct drm_device *dev, void *data,
return 0;
error_free:
- kvfree(info);
+ if (info)
+ kvfree(info);
return r;
}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.h
new file mode 100644
index 000000000000..7c5f5d1601e6
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.h
@@ -0,0 +1,84 @@
+/*
+ * Copyright 2018 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+#ifndef __AMDGPU_BO_LIST_H__
+#define __AMDGPU_BO_LIST_H__
+
+#include <drm/ttm/ttm_execbuf_util.h>
+#include <drm/amdgpu_drm.h>
+
+struct amdgpu_device;
+struct amdgpu_bo;
+struct amdgpu_bo_va;
+struct amdgpu_fpriv;
+
+struct amdgpu_bo_list_entry {
+ struct ttm_validate_buffer tv;
+ struct amdgpu_bo_va *bo_va;
+ uint32_t priority;
+ struct page **user_pages;
+ int user_invalidated;
+};
+
+struct amdgpu_bo_list {
+ struct rcu_head rhead;
+ struct kref refcount;
+ struct amdgpu_bo *gds_obj;
+ struct amdgpu_bo *gws_obj;
+ struct amdgpu_bo *oa_obj;
+ unsigned first_userptr;
+ unsigned num_entries;
+};
+
+int amdgpu_bo_list_get(struct amdgpu_fpriv *fpriv, int id,
+ struct amdgpu_bo_list **result);
+void amdgpu_bo_list_get_list(struct amdgpu_bo_list *list,
+ struct list_head *validated);
+void amdgpu_bo_list_put(struct amdgpu_bo_list *list);
+int amdgpu_bo_create_list_entry_array(struct drm_amdgpu_bo_list_in *in,
+ struct drm_amdgpu_bo_list_entry **info_param);
+
+int amdgpu_bo_list_create(struct amdgpu_device *adev,
+ struct drm_file *filp,
+ struct drm_amdgpu_bo_list_entry *info,
+ unsigned num_entries,
+ struct amdgpu_bo_list **list);
+
+static inline struct amdgpu_bo_list_entry *
+amdgpu_bo_list_array_entry(struct amdgpu_bo_list *list, unsigned index)
+{
+ struct amdgpu_bo_list_entry *array = (void *)&list[1];
+
+ return &array[index];
+}
+
+#define amdgpu_bo_list_for_each_entry(e, list) \
+ for (e = amdgpu_bo_list_array_entry(list, 0); \
+ e != amdgpu_bo_list_array_entry(list, (list)->num_entries); \
+ ++e)
+
+#define amdgpu_bo_list_for_each_userptr_entry(e, list) \
+ for (e = amdgpu_bo_list_array_entry(list, (list)->first_userptr); \
+ e != amdgpu_bo_list_array_entry(list, (list)->num_entries); \
+ ++e)
+
+#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c
index e950730f1933..8816c697b205 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c
@@ -314,17 +314,17 @@ static int amdgpu_cgs_get_firmware_info(struct cgs_device *cgs_device,
(adev->pdev->revision == 0x81) ||
(adev->pdev->device == 0x665f)) {
info->is_kicker = true;
- strcpy(fw_name, "radeon/bonaire_k_smc.bin");
+ strcpy(fw_name, "amdgpu/bonaire_k_smc.bin");
} else {
- strcpy(fw_name, "radeon/bonaire_smc.bin");
+ strcpy(fw_name, "amdgpu/bonaire_smc.bin");
}
break;
case CHIP_HAWAII:
if (adev->pdev->revision == 0x80) {
info->is_kicker = true;
- strcpy(fw_name, "radeon/hawaii_k_smc.bin");
+ strcpy(fw_name, "amdgpu/hawaii_k_smc.bin");
} else {
- strcpy(fw_name, "radeon/hawaii_smc.bin");
+ strcpy(fw_name, "amdgpu/hawaii_smc.bin");
}
break;
case CHIP_TOPAZ:
@@ -367,12 +367,14 @@ static int amdgpu_cgs_get_firmware_info(struct cgs_device *cgs_device,
break;
case CHIP_POLARIS10:
if (type == CGS_UCODE_ID_SMU) {
- if ((adev->pdev->device == 0x67df) &&
- ((adev->pdev->revision == 0xe0) ||
- (adev->pdev->revision == 0xe3) ||
- (adev->pdev->revision == 0xe4) ||
- (adev->pdev->revision == 0xe5) ||
- (adev->pdev->revision == 0xe7) ||
+ if (((adev->pdev->device == 0x67df) &&
+ ((adev->pdev->revision == 0xe0) ||
+ (adev->pdev->revision == 0xe3) ||
+ (adev->pdev->revision == 0xe4) ||
+ (adev->pdev->revision == 0xe5) ||
+ (adev->pdev->revision == 0xe7) ||
+ (adev->pdev->revision == 0xef))) ||
+ ((adev->pdev->device == 0x6fdf) &&
(adev->pdev->revision == 0xef))) {
info->is_kicker = true;
strcpy(fw_name, "amdgpu/polaris10_k_smc.bin");
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c
index 8e66851eb427..69ad6ec0a4f3 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c
@@ -34,6 +34,7 @@
#include "atombios_dp.h"
#include "amdgpu_connectors.h"
#include "amdgpu_i2c.h"
+#include "amdgpu_display.h"
#include <linux/pm_runtime.h>
@@ -212,30 +213,21 @@ static void
amdgpu_connector_update_scratch_regs(struct drm_connector *connector,
enum drm_connector_status status)
{
- struct drm_encoder *best_encoder = NULL;
- struct drm_encoder *encoder = NULL;
+ struct drm_encoder *best_encoder;
+ struct drm_encoder *encoder;
const struct drm_connector_helper_funcs *connector_funcs = connector->helper_private;
bool connected;
int i;
best_encoder = connector_funcs->best_encoder(connector);
- for (i = 0; i < DRM_CONNECTOR_MAX_ENCODER; i++) {
- if (connector->encoder_ids[i] == 0)
- break;
-
- encoder = drm_encoder_find(connector->dev, NULL,
- connector->encoder_ids[i]);
- if (!encoder)
- continue;
-
+ drm_connector_for_each_possible_encoder(connector, encoder, i) {
if ((encoder == best_encoder) && (status == connector_status_connected))
connected = true;
else
connected = false;
amdgpu_atombios_encoder_set_bios_scratch_regs(connector, encoder, connected);
-
}
}
@@ -246,17 +238,11 @@ amdgpu_connector_find_encoder(struct drm_connector *connector,
struct drm_encoder *encoder;
int i;
- for (i = 0; i < DRM_CONNECTOR_MAX_ENCODER; i++) {
- if (connector->encoder_ids[i] == 0)
- break;
- encoder = drm_encoder_find(connector->dev, NULL,
- connector->encoder_ids[i]);
- if (!encoder)
- continue;
-
+ drm_connector_for_each_possible_encoder(connector, encoder, i) {
if (encoder->encoder_type == encoder_type)
return encoder;
}
+
return NULL;
}
@@ -349,22 +335,24 @@ static int amdgpu_connector_ddc_get_modes(struct drm_connector *connector)
int ret;
if (amdgpu_connector->edid) {
- drm_mode_connector_update_edid_property(connector, amdgpu_connector->edid);
+ drm_connector_update_edid_property(connector, amdgpu_connector->edid);
ret = drm_add_edid_modes(connector, amdgpu_connector->edid);
return ret;
}
- drm_mode_connector_update_edid_property(connector, NULL);
+ drm_connector_update_edid_property(connector, NULL);
return 0;
}
static struct drm_encoder *
amdgpu_connector_best_single_encoder(struct drm_connector *connector)
{
- int enc_id = connector->encoder_ids[0];
+ struct drm_encoder *encoder;
+ int i;
+
+ /* pick the first one */
+ drm_connector_for_each_possible_encoder(connector, encoder, i)
+ return encoder;
- /* pick the encoder ids */
- if (enc_id)
- return drm_encoder_find(connector->dev, NULL, enc_id);
return NULL;
}
@@ -985,9 +973,8 @@ amdgpu_connector_dvi_detect(struct drm_connector *connector, bool force)
struct drm_device *dev = connector->dev;
struct amdgpu_device *adev = dev->dev_private;
struct amdgpu_connector *amdgpu_connector = to_amdgpu_connector(connector);
- struct drm_encoder *encoder = NULL;
const struct drm_encoder_helper_funcs *encoder_funcs;
- int i, r;
+ int r;
enum drm_connector_status ret = connector_status_disconnected;
bool dret = false, broken_edid = false;
@@ -1077,14 +1064,10 @@ amdgpu_connector_dvi_detect(struct drm_connector *connector, bool force)
/* find analog encoder */
if (amdgpu_connector->dac_load_detect) {
- for (i = 0; i < DRM_CONNECTOR_MAX_ENCODER; i++) {
- if (connector->encoder_ids[i] == 0)
- break;
-
- encoder = drm_encoder_find(connector->dev, NULL, connector->encoder_ids[i]);
- if (!encoder)
- continue;
+ struct drm_encoder *encoder;
+ int i;
+ drm_connector_for_each_possible_encoder(connector, encoder, i) {
if (encoder->encoder_type != DRM_MODE_ENCODER_DAC &&
encoder->encoder_type != DRM_MODE_ENCODER_TVDAC)
continue;
@@ -1132,18 +1115,11 @@ exit:
static struct drm_encoder *
amdgpu_connector_dvi_encoder(struct drm_connector *connector)
{
- int enc_id = connector->encoder_ids[0];
struct amdgpu_connector *amdgpu_connector = to_amdgpu_connector(connector);
struct drm_encoder *encoder;
int i;
- for (i = 0; i < DRM_CONNECTOR_MAX_ENCODER; i++) {
- if (connector->encoder_ids[i] == 0)
- break;
-
- encoder = drm_encoder_find(connector->dev, NULL, connector->encoder_ids[i]);
- if (!encoder)
- continue;
+ drm_connector_for_each_possible_encoder(connector, encoder, i) {
if (amdgpu_connector->use_digital == true) {
if (encoder->encoder_type == DRM_MODE_ENCODER_TMDS)
return encoder;
@@ -1158,8 +1134,9 @@ amdgpu_connector_dvi_encoder(struct drm_connector *connector)
/* then check use digitial */
/* pick the first one */
- if (enc_id)
- return drm_encoder_find(connector->dev, NULL, enc_id);
+ drm_connector_for_each_possible_encoder(connector, encoder, i)
+ return encoder;
+
return NULL;
}
@@ -1296,15 +1273,7 @@ u16 amdgpu_connector_encoder_get_dp_bridge_encoder_id(struct drm_connector *conn
struct amdgpu_encoder *amdgpu_encoder;
int i;
- for (i = 0; i < DRM_CONNECTOR_MAX_ENCODER; i++) {
- if (connector->encoder_ids[i] == 0)
- break;
-
- encoder = drm_encoder_find(connector->dev, NULL,
- connector->encoder_ids[i]);
- if (!encoder)
- continue;
-
+ drm_connector_for_each_possible_encoder(connector, encoder, i) {
amdgpu_encoder = to_amdgpu_encoder(encoder);
switch (amdgpu_encoder->encoder_id) {
@@ -1326,14 +1295,7 @@ static bool amdgpu_connector_encoder_is_hbr2(struct drm_connector *connector)
int i;
bool found = false;
- for (i = 0; i < DRM_CONNECTOR_MAX_ENCODER; i++) {
- if (connector->encoder_ids[i] == 0)
- break;
- encoder = drm_encoder_find(connector->dev, NULL,
- connector->encoder_ids[i]);
- if (!encoder)
- continue;
-
+ drm_connector_for_each_possible_encoder(connector, encoder, i) {
amdgpu_encoder = to_amdgpu_encoder(encoder);
if (amdgpu_encoder->caps & ATOM_ENCODER_CAP_RECORD_HBR2)
found = true;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
index 9c85a90be293..663043c8f0f5 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
@@ -31,45 +31,79 @@
#include <drm/drm_syncobj.h>
#include "amdgpu.h"
#include "amdgpu_trace.h"
+#include "amdgpu_gmc.h"
+#include "amdgpu_gem.h"
static int amdgpu_cs_user_fence_chunk(struct amdgpu_cs_parser *p,
struct drm_amdgpu_cs_chunk_fence *data,
uint32_t *offset)
{
struct drm_gem_object *gobj;
+ struct amdgpu_bo *bo;
unsigned long size;
+ int r;
gobj = drm_gem_object_lookup(p->filp, data->handle);
if (gobj == NULL)
return -EINVAL;
- p->uf_entry.robj = amdgpu_bo_ref(gem_to_amdgpu_bo(gobj));
+ bo = amdgpu_bo_ref(gem_to_amdgpu_bo(gobj));
p->uf_entry.priority = 0;
- p->uf_entry.tv.bo = &p->uf_entry.robj->tbo;
+ p->uf_entry.tv.bo = &bo->tbo;
p->uf_entry.tv.shared = true;
p->uf_entry.user_pages = NULL;
- size = amdgpu_bo_size(p->uf_entry.robj);
- if (size != PAGE_SIZE || (data->offset + 8) > size)
- return -EINVAL;
+ drm_gem_object_put_unlocked(gobj);
+
+ size = amdgpu_bo_size(bo);
+ if (size != PAGE_SIZE || (data->offset + 8) > size) {
+ r = -EINVAL;
+ goto error_unref;
+ }
+
+ if (amdgpu_ttm_tt_get_usermm(bo->tbo.ttm)) {
+ r = -EINVAL;
+ goto error_unref;
+ }
*offset = data->offset;
- drm_gem_object_put_unlocked(gobj);
+ return 0;
- if (amdgpu_ttm_tt_get_usermm(p->uf_entry.robj->tbo.ttm)) {
- amdgpu_bo_unref(&p->uf_entry.robj);
- return -EINVAL;
- }
+error_unref:
+ amdgpu_bo_unref(&bo);
+ return r;
+}
+
+static int amdgpu_cs_bo_handles_chunk(struct amdgpu_cs_parser *p,
+ struct drm_amdgpu_bo_list_in *data)
+{
+ int r;
+ struct drm_amdgpu_bo_list_entry *info = NULL;
+
+ r = amdgpu_bo_create_list_entry_array(data, &info);
+ if (r)
+ return r;
+ r = amdgpu_bo_list_create(p->adev, p->filp, info, data->bo_number,
+ &p->bo_list);
+ if (r)
+ goto error_free;
+
+ kvfree(info);
return 0;
+
+error_free:
+ if (info)
+ kvfree(info);
+
+ return r;
}
-static int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, void *data)
+static int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, union drm_amdgpu_cs *cs)
{
struct amdgpu_fpriv *fpriv = p->filp->driver_priv;
struct amdgpu_vm *vm = &fpriv->vm;
- union drm_amdgpu_cs *cs = data;
uint64_t *chunk_array_user;
uint64_t *chunk_array;
unsigned size, num_ibs = 0;
@@ -163,6 +197,19 @@ static int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, void *data)
break;
+ case AMDGPU_CHUNK_ID_BO_HANDLES:
+ size = sizeof(struct drm_amdgpu_bo_list_in);
+ if (p->chunks[i].length_dw * sizeof(uint32_t) < size) {
+ ret = -EINVAL;
+ goto free_partial_kdata;
+ }
+
+ ret = amdgpu_cs_bo_handles_chunk(p, p->chunks[i].kdata);
+ if (ret)
+ goto free_partial_kdata;
+
+ break;
+
case AMDGPU_CHUNK_ID_DEPENDENCIES:
case AMDGPU_CHUNK_ID_SYNCOBJ_IN:
case AMDGPU_CHUNK_ID_SYNCOBJ_OUT:
@@ -183,9 +230,13 @@ static int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, void *data)
goto free_all_kdata;
}
- if (p->uf_entry.robj)
+ if (p->uf_entry.tv.bo)
p->job->uf_addr = uf_offset;
kfree(chunk_array);
+
+ /* Use this opportunity to fill in task info for the vm */
+ amdgpu_vm_set_task_info(vm);
+
return 0;
free_all_kdata:
@@ -257,7 +308,7 @@ static void amdgpu_cs_get_threshold_for_moves(struct amdgpu_device *adev,
return;
}
- total_vram = adev->gmc.real_vram_size - adev->vram_pin_size;
+ total_vram = adev->gmc.real_vram_size - atomic64_read(&adev->vram_pin_size);
used_vram = amdgpu_vram_mgr_usage(&adev->mman.bdev.man[TTM_PL_VRAM]);
free_vram = used_vram >= total_vram ? 0 : total_vram - used_vram;
@@ -302,7 +353,7 @@ static void amdgpu_cs_get_threshold_for_moves(struct amdgpu_device *adev,
*max_bytes = us_to_bytes(adev, adev->mm_stats.accum_us);
/* Do the same for visible VRAM if half of it is free */
- if (adev->gmc.visible_vram_size < adev->gmc.real_vram_size) {
+ if (!amdgpu_gmc_vram_full_visible(&adev->gmc)) {
u64 total_vis_vram = adev->gmc.visible_vram_size;
u64 used_vis_vram =
amdgpu_vram_mgr_vis_usage(&adev->mman.bdev.man[TTM_PL_VRAM]);
@@ -359,7 +410,7 @@ static int amdgpu_cs_bo_validate(struct amdgpu_cs_parser *p,
* to move it. Don't move anything if the threshold is zero.
*/
if (p->bytes_moved < p->bytes_moved_threshold) {
- if (adev->gmc.visible_vram_size < adev->gmc.real_vram_size &&
+ if (!amdgpu_gmc_vram_full_visible(&adev->gmc) &&
(bo->flags & AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED)) {
/* And don't move a CPU_ACCESS_REQUIRED BO to limited
* visible VRAM if we've depleted our allowance to do
@@ -377,11 +428,11 @@ static int amdgpu_cs_bo_validate(struct amdgpu_cs_parser *p,
}
retry:
- amdgpu_ttm_placement_from_domain(bo, domain);
+ amdgpu_bo_placement_from_domain(bo, domain);
r = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
p->bytes_moved += ctx.bytes_moved;
- if (adev->gmc.visible_vram_size < adev->gmc.real_vram_size &&
+ if (!amdgpu_gmc_vram_full_visible(&adev->gmc) &&
amdgpu_bo_in_cpu_visible_vram(bo))
p->bytes_moved_vis += ctx.bytes_moved;
@@ -408,13 +459,13 @@ static bool amdgpu_cs_try_evict(struct amdgpu_cs_parser *p,
p->evictable = list_prev_entry(p->evictable, tv.head)) {
struct amdgpu_bo_list_entry *candidate = p->evictable;
- struct amdgpu_bo *bo = candidate->robj;
+ struct amdgpu_bo *bo = ttm_to_amdgpu_bo(candidate->tv.bo);
struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
bool update_bytes_moved_vis;
uint32_t other;
/* If we reached our current BO we can forget it */
- if (candidate->robj == validated)
+ if (bo == validated)
break;
/* We can't move pinned BOs here */
@@ -434,9 +485,9 @@ static bool amdgpu_cs_try_evict(struct amdgpu_cs_parser *p,
/* Good we can try to move this BO somewhere else */
update_bytes_moved_vis =
- adev->gmc.visible_vram_size < adev->gmc.real_vram_size &&
- amdgpu_bo_in_cpu_visible_vram(bo);
- amdgpu_ttm_placement_from_domain(bo, other);
+ !amdgpu_gmc_vram_full_visible(&adev->gmc) &&
+ amdgpu_bo_in_cpu_visible_vram(bo);
+ amdgpu_bo_placement_from_domain(bo, other);
r = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
p->bytes_moved += ctx.bytes_moved;
if (update_bytes_moved_vis)
@@ -479,7 +530,7 @@ static int amdgpu_cs_list_validate(struct amdgpu_cs_parser *p,
int r;
list_for_each_entry(lobj, validated, tv.head) {
- struct amdgpu_bo *bo = lobj->robj;
+ struct amdgpu_bo *bo = ttm_to_amdgpu_bo(lobj->tv.bo);
bool binding_userptr = false;
struct mm_struct *usermm;
@@ -490,8 +541,8 @@ static int amdgpu_cs_list_validate(struct amdgpu_cs_parser *p,
/* Check if we have user pages and nobody bound the BO already */
if (amdgpu_ttm_tt_userptr_needs_pages(bo->tbo.ttm) &&
lobj->user_pages) {
- amdgpu_ttm_placement_from_domain(bo,
- AMDGPU_GEM_DOMAIN_CPU);
+ amdgpu_bo_placement_from_domain(bo,
+ AMDGPU_GEM_DOMAIN_CPU);
r = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
if (r)
return r;
@@ -519,32 +570,46 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p,
union drm_amdgpu_cs *cs)
{
struct amdgpu_fpriv *fpriv = p->filp->driver_priv;
+ struct amdgpu_vm *vm = &fpriv->vm;
struct amdgpu_bo_list_entry *e;
struct list_head duplicates;
- unsigned i, tries = 10;
struct amdgpu_bo *gds;
struct amdgpu_bo *gws;
struct amdgpu_bo *oa;
+ unsigned tries = 10;
int r;
INIT_LIST_HEAD(&p->validated);
- p->bo_list = amdgpu_bo_list_get(fpriv, cs->in.bo_list_handle);
- if (p->bo_list) {
- amdgpu_bo_list_get_list(p->bo_list, &p->validated);
- if (p->bo_list->first_userptr != p->bo_list->num_entries)
- p->mn = amdgpu_mn_get(p->adev, AMDGPU_MN_TYPE_GFX);
+ /* p->bo_list could already be assigned if AMDGPU_CHUNK_ID_BO_HANDLES is present */
+ if (cs->in.bo_list_handle) {
+ if (p->bo_list)
+ return -EINVAL;
+
+ r = amdgpu_bo_list_get(fpriv, cs->in.bo_list_handle,
+ &p->bo_list);
+ if (r)
+ return r;
+ } else if (!p->bo_list) {
+ /* Create a empty bo_list when no handle is provided */
+ r = amdgpu_bo_list_create(p->adev, p->filp, NULL, 0,
+ &p->bo_list);
+ if (r)
+ return r;
}
+ amdgpu_bo_list_get_list(p->bo_list, &p->validated);
+ if (p->bo_list->first_userptr != p->bo_list->num_entries)
+ p->mn = amdgpu_mn_get(p->adev, AMDGPU_MN_TYPE_GFX);
+
INIT_LIST_HEAD(&duplicates);
amdgpu_vm_get_pd_bo(&fpriv->vm, &p->validated, &p->vm_pd);
- if (p->uf_entry.robj && !p->uf_entry.robj->parent)
+ if (p->uf_entry.tv.bo && !ttm_to_amdgpu_bo(p->uf_entry.tv.bo)->parent)
list_add(&p->uf_entry.tv.head, &p->validated);
while (1) {
struct list_head need_pages;
- unsigned i;
r = ttm_eu_reserve_buffers(&p->ticket, &p->validated, true,
&duplicates);
@@ -554,17 +619,9 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p,
goto error_free_pages;
}
- /* Without a BO list we don't have userptr BOs */
- if (!p->bo_list)
- break;
-
INIT_LIST_HEAD(&need_pages);
- for (i = p->bo_list->first_userptr;
- i < p->bo_list->num_entries; ++i) {
- struct amdgpu_bo *bo;
-
- e = &p->bo_list->array[i];
- bo = e->robj;
+ amdgpu_bo_list_for_each_userptr_entry(e, p->bo_list) {
+ struct amdgpu_bo *bo = ttm_to_amdgpu_bo(e->tv.bo);
if (amdgpu_ttm_tt_userptr_invalidated(bo->tbo.ttm,
&e->user_invalidated) && e->user_pages) {
@@ -583,7 +640,7 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p,
list_del(&e->tv.head);
list_add(&e->tv.head, &need_pages);
- amdgpu_bo_unreserve(e->robj);
+ amdgpu_bo_unreserve(bo);
}
}
@@ -602,7 +659,7 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p,
/* Fill the page arrays for all userptrs. */
list_for_each_entry(e, &need_pages, tv.head) {
- struct ttm_tt *ttm = e->robj->tbo.ttm;
+ struct ttm_tt *ttm = e->tv.bo->ttm;
e->user_pages = kvmalloc_array(ttm->num_pages,
sizeof(struct page*),
@@ -656,39 +713,28 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p,
amdgpu_cs_report_moved_bytes(p->adev, p->bytes_moved,
p->bytes_moved_vis);
- if (p->bo_list) {
- struct amdgpu_vm *vm = &fpriv->vm;
- unsigned i;
-
- gds = p->bo_list->gds_obj;
- gws = p->bo_list->gws_obj;
- oa = p->bo_list->oa_obj;
- for (i = 0; i < p->bo_list->num_entries; i++) {
- struct amdgpu_bo *bo = p->bo_list->array[i].robj;
+ gds = p->bo_list->gds_obj;
+ gws = p->bo_list->gws_obj;
+ oa = p->bo_list->oa_obj;
- p->bo_list->array[i].bo_va = amdgpu_vm_bo_find(vm, bo);
- }
- } else {
- gds = p->adev->gds.gds_gfx_bo;
- gws = p->adev->gds.gws_gfx_bo;
- oa = p->adev->gds.oa_gfx_bo;
- }
+ amdgpu_bo_list_for_each_entry(e, p->bo_list)
+ e->bo_va = amdgpu_vm_bo_find(vm, ttm_to_amdgpu_bo(e->tv.bo));
if (gds) {
- p->job->gds_base = amdgpu_bo_gpu_offset(gds);
- p->job->gds_size = amdgpu_bo_size(gds);
+ p->job->gds_base = amdgpu_bo_gpu_offset(gds) >> PAGE_SHIFT;
+ p->job->gds_size = amdgpu_bo_size(gds) >> PAGE_SHIFT;
}
if (gws) {
- p->job->gws_base = amdgpu_bo_gpu_offset(gws);
- p->job->gws_size = amdgpu_bo_size(gws);
+ p->job->gws_base = amdgpu_bo_gpu_offset(gws) >> PAGE_SHIFT;
+ p->job->gws_size = amdgpu_bo_size(gws) >> PAGE_SHIFT;
}
if (oa) {
- p->job->oa_base = amdgpu_bo_gpu_offset(oa);
- p->job->oa_size = amdgpu_bo_size(oa);
+ p->job->oa_base = amdgpu_bo_gpu_offset(oa) >> PAGE_SHIFT;
+ p->job->oa_size = amdgpu_bo_size(oa) >> PAGE_SHIFT;
}
- if (!r && p->uf_entry.robj) {
- struct amdgpu_bo *uf = p->uf_entry.robj;
+ if (!r && p->uf_entry.tv.bo) {
+ struct amdgpu_bo *uf = ttm_to_amdgpu_bo(p->uf_entry.tv.bo);
r = amdgpu_ttm_alloc_gart(&uf->tbo);
p->job->uf_addr += amdgpu_bo_gpu_offset(uf);
@@ -700,18 +746,12 @@ error_validate:
error_free_pages:
- if (p->bo_list) {
- for (i = p->bo_list->first_userptr;
- i < p->bo_list->num_entries; ++i) {
- e = &p->bo_list->array[i];
-
- if (!e->user_pages)
- continue;
+ amdgpu_bo_list_for_each_userptr_entry(e, p->bo_list) {
+ if (!e->user_pages)
+ continue;
- release_pages(e->user_pages,
- e->robj->tbo.ttm->num_pages);
- kvfree(e->user_pages);
- }
+ release_pages(e->user_pages, e->tv.bo->ttm->num_pages);
+ kvfree(e->user_pages);
}
return r;
@@ -723,9 +763,11 @@ static int amdgpu_cs_sync_rings(struct amdgpu_cs_parser *p)
int r;
list_for_each_entry(e, &p->validated, tv.head) {
- struct reservation_object *resv = e->robj->tbo.resv;
+ struct amdgpu_bo *bo = ttm_to_amdgpu_bo(e->tv.bo);
+ struct reservation_object *resv = bo->tbo.resv;
+
r = amdgpu_sync_resv(p->adev, &p->job->sync, resv, p->filp,
- amdgpu_bo_explicit_sync(e->robj));
+ amdgpu_bo_explicit_sync(bo));
if (r)
return r;
@@ -768,17 +810,88 @@ static void amdgpu_cs_parser_fini(struct amdgpu_cs_parser *parser, int error,
kfree(parser->chunks);
if (parser->job)
amdgpu_job_free(parser->job);
- amdgpu_bo_unref(&parser->uf_entry.robj);
+ if (parser->uf_entry.tv.bo) {
+ struct amdgpu_bo *uf = ttm_to_amdgpu_bo(parser->uf_entry.tv.bo);
+
+ amdgpu_bo_unref(&uf);
+ }
}
-static int amdgpu_bo_vm_update_pte(struct amdgpu_cs_parser *p)
+static int amdgpu_cs_vm_handling(struct amdgpu_cs_parser *p)
{
- struct amdgpu_device *adev = p->adev;
+ struct amdgpu_ring *ring = to_amdgpu_ring(p->entity->rq->sched);
struct amdgpu_fpriv *fpriv = p->filp->driver_priv;
+ struct amdgpu_device *adev = p->adev;
struct amdgpu_vm *vm = &fpriv->vm;
+ struct amdgpu_bo_list_entry *e;
struct amdgpu_bo_va *bo_va;
struct amdgpu_bo *bo;
- int i, r;
+ int r;
+
+ /* Only for UVD/VCE VM emulation */
+ if (ring->funcs->parse_cs || ring->funcs->patch_cs_in_place) {
+ unsigned i, j;
+
+ for (i = 0, j = 0; i < p->nchunks && j < p->job->num_ibs; i++) {
+ struct drm_amdgpu_cs_chunk_ib *chunk_ib;
+ struct amdgpu_bo_va_mapping *m;
+ struct amdgpu_bo *aobj = NULL;
+ struct amdgpu_cs_chunk *chunk;
+ uint64_t offset, va_start;
+ struct amdgpu_ib *ib;
+ uint8_t *kptr;
+
+ chunk = &p->chunks[i];
+ ib = &p->job->ibs[j];
+ chunk_ib = chunk->kdata;
+
+ if (chunk->chunk_id != AMDGPU_CHUNK_ID_IB)
+ continue;
+
+ va_start = chunk_ib->va_start & AMDGPU_GMC_HOLE_MASK;
+ r = amdgpu_cs_find_mapping(p, va_start, &aobj, &m);
+ if (r) {
+ DRM_ERROR("IB va_start is invalid\n");
+ return r;
+ }
+
+ if ((va_start + chunk_ib->ib_bytes) >
+ (m->last + 1) * AMDGPU_GPU_PAGE_SIZE) {
+ DRM_ERROR("IB va_start+ib_bytes is invalid\n");
+ return -EINVAL;
+ }
+
+ /* the IB should be reserved at this point */
+ r = amdgpu_bo_kmap(aobj, (void **)&kptr);
+ if (r) {
+ return r;
+ }
+
+ offset = m->start * AMDGPU_GPU_PAGE_SIZE;
+ kptr += va_start - offset;
+
+ if (ring->funcs->parse_cs) {
+ memcpy(ib->ptr, kptr, chunk_ib->ib_bytes);
+ amdgpu_bo_kunmap(aobj);
+
+ r = amdgpu_ring_parse_cs(ring, p, j);
+ if (r)
+ return r;
+ } else {
+ ib->ptr = (uint32_t *)kptr;
+ r = amdgpu_ring_patch_cs_in_place(ring, p, j);
+ amdgpu_bo_kunmap(aobj);
+ if (r)
+ return r;
+ }
+
+ j++;
+ }
+ }
+
+ if (!p->job->vm)
+ return amdgpu_cs_sync_rings(p);
+
r = amdgpu_vm_clear_freed(adev, vm, NULL);
if (r)
@@ -808,29 +921,26 @@ static int amdgpu_bo_vm_update_pte(struct amdgpu_cs_parser *p)
return r;
}
- if (p->bo_list) {
- for (i = 0; i < p->bo_list->num_entries; i++) {
- struct dma_fence *f;
-
- /* ignore duplicates */
- bo = p->bo_list->array[i].robj;
- if (!bo)
- continue;
+ amdgpu_bo_list_for_each_entry(e, p->bo_list) {
+ struct dma_fence *f;
- bo_va = p->bo_list->array[i].bo_va;
- if (bo_va == NULL)
- continue;
+ /* ignore duplicates */
+ bo = ttm_to_amdgpu_bo(e->tv.bo);
+ if (!bo)
+ continue;
- r = amdgpu_vm_bo_update(adev, bo_va, false);
- if (r)
- return r;
+ bo_va = e->bo_va;
+ if (bo_va == NULL)
+ continue;
- f = bo_va->last_pt_update;
- r = amdgpu_sync_fence(adev, &p->job->sync, f, false);
- if (r)
- return r;
- }
+ r = amdgpu_vm_bo_update(adev, bo_va, false);
+ if (r)
+ return r;
+ f = bo_va->last_pt_update;
+ r = amdgpu_sync_fence(adev, &p->job->sync, f, false);
+ if (r)
+ return r;
}
r = amdgpu_vm_handle_moved(adev, vm);
@@ -845,11 +955,18 @@ static int amdgpu_bo_vm_update_pte(struct amdgpu_cs_parser *p)
if (r)
return r;
- if (amdgpu_vm_debug && p->bo_list) {
+ r = reservation_object_reserve_shared(vm->root.base.bo->tbo.resv);
+ if (r)
+ return r;
+
+ p->job->vm_pd_addr = amdgpu_gmc_pd_addr(vm->root.base.bo);
+
+ if (amdgpu_vm_debug) {
/* Invalidate all BOs to test for userspace bugs */
- for (i = 0; i < p->bo_list->num_entries; i++) {
+ amdgpu_bo_list_for_each_entry(e, p->bo_list) {
+ struct amdgpu_bo *bo = ttm_to_amdgpu_bo(e->tv.bo);
+
/* ignore duplicates */
- bo = p->bo_list->array[i].robj;
if (!bo)
continue;
@@ -857,82 +974,6 @@ static int amdgpu_bo_vm_update_pte(struct amdgpu_cs_parser *p)
}
}
- return r;
-}
-
-static int amdgpu_cs_ib_vm_chunk(struct amdgpu_device *adev,
- struct amdgpu_cs_parser *p)
-{
- struct amdgpu_fpriv *fpriv = p->filp->driver_priv;
- struct amdgpu_vm *vm = &fpriv->vm;
- struct amdgpu_ring *ring = p->job->ring;
- int r;
-
- /* Only for UVD/VCE VM emulation */
- if (p->job->ring->funcs->parse_cs) {
- unsigned i, j;
-
- for (i = 0, j = 0; i < p->nchunks && j < p->job->num_ibs; i++) {
- struct drm_amdgpu_cs_chunk_ib *chunk_ib;
- struct amdgpu_bo_va_mapping *m;
- struct amdgpu_bo *aobj = NULL;
- struct amdgpu_cs_chunk *chunk;
- uint64_t offset, va_start;
- struct amdgpu_ib *ib;
- uint8_t *kptr;
-
- chunk = &p->chunks[i];
- ib = &p->job->ibs[j];
- chunk_ib = chunk->kdata;
-
- if (chunk->chunk_id != AMDGPU_CHUNK_ID_IB)
- continue;
-
- va_start = chunk_ib->va_start & AMDGPU_VA_HOLE_MASK;
- r = amdgpu_cs_find_mapping(p, va_start, &aobj, &m);
- if (r) {
- DRM_ERROR("IB va_start is invalid\n");
- return r;
- }
-
- if ((va_start + chunk_ib->ib_bytes) >
- (m->last + 1) * AMDGPU_GPU_PAGE_SIZE) {
- DRM_ERROR("IB va_start+ib_bytes is invalid\n");
- return -EINVAL;
- }
-
- /* the IB should be reserved at this point */
- r = amdgpu_bo_kmap(aobj, (void **)&kptr);
- if (r) {
- return r;
- }
-
- offset = m->start * AMDGPU_GPU_PAGE_SIZE;
- kptr += va_start - offset;
-
- memcpy(ib->ptr, kptr, chunk_ib->ib_bytes);
- amdgpu_bo_kunmap(aobj);
-
- r = amdgpu_ring_parse_cs(ring, p, j);
- if (r)
- return r;
-
- j++;
- }
- }
-
- if (p->job->vm) {
- p->job->vm_pd_addr = amdgpu_bo_gpu_offset(vm->root.base.bo);
-
- r = amdgpu_bo_vm_update_pte(p);
- if (r)
- return r;
-
- r = reservation_object_reserve_shared(vm->root.base.bo->tbo.resv);
- if (r)
- return r;
- }
-
return amdgpu_cs_sync_rings(p);
}
@@ -941,14 +982,15 @@ static int amdgpu_cs_ib_fill(struct amdgpu_device *adev,
{
struct amdgpu_fpriv *fpriv = parser->filp->driver_priv;
struct amdgpu_vm *vm = &fpriv->vm;
- int i, j;
int r, ce_preempt = 0, de_preempt = 0;
+ struct amdgpu_ring *ring;
+ int i, j;
for (i = 0, j = 0; i < parser->nchunks && j < parser->job->num_ibs; i++) {
struct amdgpu_cs_chunk *chunk;
struct amdgpu_ib *ib;
struct drm_amdgpu_cs_chunk_ib *chunk_ib;
- struct amdgpu_ring *ring;
+ struct drm_sched_entity *entity;
chunk = &parser->chunks[i];
ib = &parser->job->ibs[j];
@@ -970,27 +1012,24 @@ static int amdgpu_cs_ib_fill(struct amdgpu_device *adev,
return -EINVAL;
}
- r = amdgpu_queue_mgr_map(adev, &parser->ctx->queue_mgr, chunk_ib->ip_type,
- chunk_ib->ip_instance, chunk_ib->ring, &ring);
+ r = amdgpu_ctx_get_entity(parser->ctx, chunk_ib->ip_type,
+ chunk_ib->ip_instance, chunk_ib->ring,
+ &entity);
if (r)
return r;
- if (chunk_ib->flags & AMDGPU_IB_FLAG_PREAMBLE) {
- parser->job->preamble_status |= AMDGPU_PREAMBLE_IB_PRESENT;
- if (!parser->ctx->preamble_presented) {
- parser->job->preamble_status |= AMDGPU_PREAMBLE_IB_PRESENT_FIRST;
- parser->ctx->preamble_presented = true;
- }
- }
+ if (chunk_ib->flags & AMDGPU_IB_FLAG_PREAMBLE)
+ parser->job->preamble_status |=
+ AMDGPU_PREAMBLE_IB_PRESENT;
- if (parser->job->ring && parser->job->ring != ring)
+ if (parser->entity && parser->entity != entity)
return -EINVAL;
- parser->job->ring = ring;
+ parser->entity = entity;
- r = amdgpu_ib_get(adev, vm,
- ring->funcs->parse_cs ? chunk_ib->ib_bytes : 0,
- ib);
+ ring = to_amdgpu_ring(entity->rq->sched);
+ r = amdgpu_ib_get(adev, vm, ring->funcs->parse_cs ?
+ chunk_ib->ib_bytes : 0, ib);
if (r) {
DRM_ERROR("Failed to get ib !\n");
return r;
@@ -1004,12 +1043,13 @@ static int amdgpu_cs_ib_fill(struct amdgpu_device *adev,
}
/* UVD & VCE fw doesn't support user fences */
+ ring = to_amdgpu_ring(parser->entity->rq->sched);
if (parser->job->uf_addr && (
- parser->job->ring->funcs->type == AMDGPU_RING_TYPE_UVD ||
- parser->job->ring->funcs->type == AMDGPU_RING_TYPE_VCE))
+ ring->funcs->type == AMDGPU_RING_TYPE_UVD ||
+ ring->funcs->type == AMDGPU_RING_TYPE_VCE))
return -EINVAL;
- return amdgpu_ctx_wait_prev_fence(parser->ctx, parser->job->ring->idx);
+ return amdgpu_ctx_wait_prev_fence(parser->ctx, parser->entity);
}
static int amdgpu_cs_process_fence_dep(struct amdgpu_cs_parser *p,
@@ -1025,24 +1065,23 @@ static int amdgpu_cs_process_fence_dep(struct amdgpu_cs_parser *p,
sizeof(struct drm_amdgpu_cs_chunk_dep);
for (i = 0; i < num_deps; ++i) {
- struct amdgpu_ring *ring;
struct amdgpu_ctx *ctx;
+ struct drm_sched_entity *entity;
struct dma_fence *fence;
ctx = amdgpu_ctx_get(fpriv, deps[i].ctx_id);
if (ctx == NULL)
return -EINVAL;
- r = amdgpu_queue_mgr_map(p->adev, &ctx->queue_mgr,
- deps[i].ip_type,
- deps[i].ip_instance,
- deps[i].ring, &ring);
+ r = amdgpu_ctx_get_entity(ctx, deps[i].ip_type,
+ deps[i].ip_instance,
+ deps[i].ring, &entity);
if (r) {
amdgpu_ctx_put(ctx);
return r;
}
- fence = amdgpu_ctx_get_fence(ctx, ring,
+ fence = amdgpu_ctx_get_fence(ctx, entity,
deps[i].handle);
if (IS_ERR(fence)) {
r = PTR_ERR(fence);
@@ -1065,7 +1104,7 @@ static int amdgpu_syncobj_lookup_and_add_to_sync(struct amdgpu_cs_parser *p,
{
int r;
struct dma_fence *fence;
- r = drm_syncobj_find_fence(p->filp, handle, &fence);
+ r = drm_syncobj_find_fence(p->filp, handle, 0, &fence);
if (r)
return r;
@@ -1154,71 +1193,80 @@ static void amdgpu_cs_post_dependencies(struct amdgpu_cs_parser *p)
int i;
for (i = 0; i < p->num_post_dep_syncobjs; ++i)
- drm_syncobj_replace_fence(p->post_dep_syncobjs[i], p->fence);
+ drm_syncobj_replace_fence(p->post_dep_syncobjs[i], 0, p->fence);
}
static int amdgpu_cs_submit(struct amdgpu_cs_parser *p,
union drm_amdgpu_cs *cs)
{
- struct amdgpu_ring *ring = p->job->ring;
- struct drm_sched_entity *entity = &p->ctx->rings[ring->idx].entity;
+ struct amdgpu_fpriv *fpriv = p->filp->driver_priv;
+ struct drm_sched_entity *entity = p->entity;
+ enum drm_sched_priority priority;
+ struct amdgpu_ring *ring;
+ struct amdgpu_bo_list_entry *e;
struct amdgpu_job *job;
- unsigned i;
uint64_t seq;
int r;
- amdgpu_mn_lock(p->mn);
- if (p->bo_list) {
- for (i = p->bo_list->first_userptr;
- i < p->bo_list->num_entries; ++i) {
- struct amdgpu_bo *bo = p->bo_list->array[i].robj;
-
- if (amdgpu_ttm_tt_userptr_needs_pages(bo->tbo.ttm)) {
- amdgpu_mn_unlock(p->mn);
- return -ERESTARTSYS;
- }
- }
- }
-
job = p->job;
p->job = NULL;
- r = drm_sched_job_init(&job->base, &ring->sched, entity, p->filp);
- if (r) {
- amdgpu_job_free(job);
- amdgpu_mn_unlock(p->mn);
- return r;
+ r = drm_sched_job_init(&job->base, entity, p->filp);
+ if (r)
+ goto error_unlock;
+
+ /* No memory allocation is allowed while holding the mn lock */
+ amdgpu_mn_lock(p->mn);
+ amdgpu_bo_list_for_each_userptr_entry(e, p->bo_list) {
+ struct amdgpu_bo *bo = ttm_to_amdgpu_bo(e->tv.bo);
+
+ if (amdgpu_ttm_tt_userptr_needs_pages(bo->tbo.ttm)) {
+ r = -ERESTARTSYS;
+ goto error_abort;
+ }
}
job->owner = p->filp;
- job->fence_ctx = entity->fence_context;
p->fence = dma_fence_get(&job->base.s_fence->finished);
- r = amdgpu_ctx_add_fence(p->ctx, ring, p->fence, &seq);
- if (r) {
- dma_fence_put(p->fence);
- dma_fence_put(&job->base.s_fence->finished);
- amdgpu_job_free(job);
- amdgpu_mn_unlock(p->mn);
- return r;
- }
-
+ amdgpu_ctx_add_fence(p->ctx, entity, p->fence, &seq);
amdgpu_cs_post_dependencies(p);
+ if ((job->preamble_status & AMDGPU_PREAMBLE_IB_PRESENT) &&
+ !p->ctx->preamble_presented) {
+ job->preamble_status |= AMDGPU_PREAMBLE_IB_PRESENT_FIRST;
+ p->ctx->preamble_presented = true;
+ }
+
cs->out.handle = seq;
job->uf_sequence = seq;
amdgpu_job_free_resources(job);
- amdgpu_ring_priority_get(job->ring, job->base.s_priority);
trace_amdgpu_cs_ioctl(job);
+ amdgpu_vm_bo_trace_cs(&fpriv->vm, &p->ticket);
+ priority = job->base.s_priority;
drm_sched_entity_push_job(&job->base, entity);
+ ring = to_amdgpu_ring(entity->rq->sched);
+ amdgpu_ring_priority_get(ring, priority);
+
+ amdgpu_vm_move_to_lru_tail(p->adev, &fpriv->vm);
+
ttm_eu_fence_buffer_objects(&p->ticket, &p->validated, p->fence);
amdgpu_mn_unlock(p->mn);
return 0;
+
+error_abort:
+ dma_fence_put(&job->base.s_fence->finished);
+ job->base.s_fence = NULL;
+ amdgpu_mn_unlock(p->mn);
+
+error_unlock:
+ amdgpu_job_free(job);
+ return r;
}
int amdgpu_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
@@ -1245,6 +1293,12 @@ int amdgpu_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
if (r)
goto out;
+ r = amdgpu_cs_dependencies(adev, &parser);
+ if (r) {
+ DRM_ERROR("Failed in the dependencies handling %d!\n", r);
+ goto out;
+ }
+
r = amdgpu_cs_parser_bos(&parser, data);
if (r) {
if (r == -ENOMEM)
@@ -1256,16 +1310,10 @@ int amdgpu_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
reserved_buffers = true;
- r = amdgpu_cs_dependencies(adev, &parser);
- if (r) {
- DRM_ERROR("Failed in the dependencies handling %d!\n", r);
- goto out;
- }
-
for (i = 0; i < parser.job->num_ibs; i++)
trace_amdgpu_cs(&parser, i);
- r = amdgpu_cs_ib_vm_chunk(adev, &parser);
+ r = amdgpu_cs_vm_handling(&parser);
if (r)
goto out;
@@ -1289,9 +1337,8 @@ int amdgpu_cs_wait_ioctl(struct drm_device *dev, void *data,
struct drm_file *filp)
{
union drm_amdgpu_wait_cs *wait = data;
- struct amdgpu_device *adev = dev->dev_private;
unsigned long timeout = amdgpu_gem_timeout(wait->in.timeout);
- struct amdgpu_ring *ring = NULL;
+ struct drm_sched_entity *entity;
struct amdgpu_ctx *ctx;
struct dma_fence *fence;
long r;
@@ -1300,15 +1347,14 @@ int amdgpu_cs_wait_ioctl(struct drm_device *dev, void *data,
if (ctx == NULL)
return -EINVAL;
- r = amdgpu_queue_mgr_map(adev, &ctx->queue_mgr,
- wait->in.ip_type, wait->in.ip_instance,
- wait->in.ring, &ring);
+ r = amdgpu_ctx_get_entity(ctx, wait->in.ip_type, wait->in.ip_instance,
+ wait->in.ring, &entity);
if (r) {
amdgpu_ctx_put(ctx);
return r;
}
- fence = amdgpu_ctx_get_fence(ctx, ring, wait->in.handle);
+ fence = amdgpu_ctx_get_fence(ctx, entity, wait->in.handle);
if (IS_ERR(fence))
r = PTR_ERR(fence);
else if (fence) {
@@ -1340,7 +1386,7 @@ static struct dma_fence *amdgpu_cs_get_fence(struct amdgpu_device *adev,
struct drm_file *filp,
struct drm_amdgpu_fence *user)
{
- struct amdgpu_ring *ring;
+ struct drm_sched_entity *entity;
struct amdgpu_ctx *ctx;
struct dma_fence *fence;
int r;
@@ -1349,14 +1395,14 @@ static struct dma_fence *amdgpu_cs_get_fence(struct amdgpu_device *adev,
if (ctx == NULL)
return ERR_PTR(-EINVAL);
- r = amdgpu_queue_mgr_map(adev, &ctx->queue_mgr, user->ip_type,
- user->ip_instance, user->ring, &ring);
+ r = amdgpu_ctx_get_entity(ctx, user->ip_type, user->ip_instance,
+ user->ring, &entity);
if (r) {
amdgpu_ctx_put(ctx);
return ERR_PTR(r);
}
- fence = amdgpu_ctx_get_fence(ctx, ring, user->seq_no);
+ fence = amdgpu_ctx_get_fence(ctx, entity, user->seq_no);
amdgpu_ctx_put(ctx);
return fence;
@@ -1605,7 +1651,7 @@ int amdgpu_cs_find_mapping(struct amdgpu_cs_parser *parser,
if (!((*bo)->flags & AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS)) {
(*bo)->flags |= AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS;
- amdgpu_ttm_placement_from_domain(*bo, (*bo)->allowed_domains);
+ amdgpu_bo_placement_from_domain(*bo, (*bo)->allowed_domains);
r = ttm_bo_validate(&(*bo)->tbo, &(*bo)->placement, &ctx);
if (r)
return r;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
index c5bb36275e93..f9b54236102d 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
@@ -27,6 +27,30 @@
#include "amdgpu.h"
#include "amdgpu_sched.h"
+#define to_amdgpu_ctx_entity(e) \
+ container_of((e), struct amdgpu_ctx_entity, entity)
+
+const unsigned int amdgpu_ctx_num_entities[AMDGPU_HW_IP_NUM] = {
+ [AMDGPU_HW_IP_GFX] = 1,
+ [AMDGPU_HW_IP_COMPUTE] = 4,
+ [AMDGPU_HW_IP_DMA] = 2,
+ [AMDGPU_HW_IP_UVD] = 1,
+ [AMDGPU_HW_IP_VCE] = 1,
+ [AMDGPU_HW_IP_UVD_ENC] = 1,
+ [AMDGPU_HW_IP_VCN_DEC] = 1,
+ [AMDGPU_HW_IP_VCN_ENC] = 1,
+};
+
+static int amdgput_ctx_total_num_entities(void)
+{
+ unsigned i, num_entities = 0;
+
+ for (i = 0; i < AMDGPU_HW_IP_NUM; ++i)
+ num_entities += amdgpu_ctx_num_entities[i];
+
+ return num_entities;
+}
+
static int amdgpu_ctx_priority_permit(struct drm_file *filp,
enum drm_sched_priority priority)
{
@@ -48,6 +72,7 @@ static int amdgpu_ctx_init(struct amdgpu_device *adev,
struct drm_file *filp,
struct amdgpu_ctx *ctx)
{
+ unsigned num_entities = amdgput_ctx_total_num_entities();
unsigned i, j;
int r;
@@ -60,19 +85,33 @@ static int amdgpu_ctx_init(struct amdgpu_device *adev,
memset(ctx, 0, sizeof(*ctx));
ctx->adev = adev;
- kref_init(&ctx->refcount);
- spin_lock_init(&ctx->ring_lock);
- ctx->fences = kcalloc(amdgpu_sched_jobs * AMDGPU_MAX_RINGS,
+
+ ctx->fences = kcalloc(amdgpu_sched_jobs * num_entities,
sizeof(struct dma_fence*), GFP_KERNEL);
if (!ctx->fences)
return -ENOMEM;
- mutex_init(&ctx->lock);
+ ctx->entities[0] = kcalloc(num_entities,
+ sizeof(struct amdgpu_ctx_entity),
+ GFP_KERNEL);
+ if (!ctx->entities[0]) {
+ r = -ENOMEM;
+ goto error_free_fences;
+ }
- for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
- ctx->rings[i].sequence = 1;
- ctx->rings[i].fences = &ctx->fences[amdgpu_sched_jobs * i];
+ for (i = 0; i < num_entities; ++i) {
+ struct amdgpu_ctx_entity *entity = &ctx->entities[0][i];
+
+ entity->sequence = 1;
+ entity->fences = &ctx->fences[amdgpu_sched_jobs * i];
}
+ for (i = 1; i < AMDGPU_HW_IP_NUM; ++i)
+ ctx->entities[i] = ctx->entities[i - 1] +
+ amdgpu_ctx_num_entities[i - 1];
+
+ kref_init(&ctx->refcount);
+ spin_lock_init(&ctx->ring_lock);
+ mutex_init(&ctx->lock);
ctx->reset_counter = atomic_read(&adev->gpu_reset_counter);
ctx->reset_counter_query = ctx->reset_counter;
@@ -80,32 +119,70 @@ static int amdgpu_ctx_init(struct amdgpu_device *adev,
ctx->init_priority = priority;
ctx->override_priority = DRM_SCHED_PRIORITY_UNSET;
- /* create context entity for each ring */
- for (i = 0; i < adev->num_rings; i++) {
- struct amdgpu_ring *ring = adev->rings[i];
- struct drm_sched_rq *rq;
-
- rq = &ring->sched.sched_rq[priority];
+ for (i = 0; i < AMDGPU_HW_IP_NUM; ++i) {
+ struct amdgpu_ring *rings[AMDGPU_MAX_RINGS];
+ struct drm_sched_rq *rqs[AMDGPU_MAX_RINGS];
+ unsigned num_rings;
+
+ switch (i) {
+ case AMDGPU_HW_IP_GFX:
+ rings[0] = &adev->gfx.gfx_ring[0];
+ num_rings = 1;
+ break;
+ case AMDGPU_HW_IP_COMPUTE:
+ for (j = 0; j < adev->gfx.num_compute_rings; ++j)
+ rings[j] = &adev->gfx.compute_ring[j];
+ num_rings = adev->gfx.num_compute_rings;
+ break;
+ case AMDGPU_HW_IP_DMA:
+ for (j = 0; j < adev->sdma.num_instances; ++j)
+ rings[j] = &adev->sdma.instance[j].ring;
+ num_rings = adev->sdma.num_instances;
+ break;
+ case AMDGPU_HW_IP_UVD:
+ rings[0] = &adev->uvd.inst[0].ring;
+ num_rings = 1;
+ break;
+ case AMDGPU_HW_IP_VCE:
+ rings[0] = &adev->vce.ring[0];
+ num_rings = 1;
+ break;
+ case AMDGPU_HW_IP_UVD_ENC:
+ rings[0] = &adev->uvd.inst[0].ring_enc[0];
+ num_rings = 1;
+ break;
+ case AMDGPU_HW_IP_VCN_DEC:
+ rings[0] = &adev->vcn.ring_dec;
+ num_rings = 1;
+ break;
+ case AMDGPU_HW_IP_VCN_ENC:
+ rings[0] = &adev->vcn.ring_enc[0];
+ num_rings = 1;
+ break;
+ case AMDGPU_HW_IP_VCN_JPEG:
+ rings[0] = &adev->vcn.ring_jpeg;
+ num_rings = 1;
+ break;
+ }
- if (ring == &adev->gfx.kiq.ring)
- continue;
+ for (j = 0; j < num_rings; ++j)
+ rqs[j] = &rings[j]->sched.sched_rq[priority];
- r = drm_sched_entity_init(&ring->sched, &ctx->rings[i].entity,
- rq, &ctx->guilty);
+ for (j = 0; j < amdgpu_ctx_num_entities[i]; ++j)
+ r = drm_sched_entity_init(&ctx->entities[i][j].entity,
+ rqs, num_rings, &ctx->guilty);
if (r)
- goto failed;
+ goto error_cleanup_entities;
}
- r = amdgpu_queue_mgr_init(adev, &ctx->queue_mgr);
- if (r)
- goto failed;
-
return 0;
-failed:
- for (j = 0; j < i; j++)
- drm_sched_entity_fini(&adev->rings[j]->sched,
- &ctx->rings[j].entity);
+error_cleanup_entities:
+ for (i = 0; i < num_entities; ++i)
+ drm_sched_entity_destroy(&ctx->entities[0][i].entity);
+ kfree(ctx->entities[0]);
+
+error_free_fences:
kfree(ctx->fences);
ctx->fences = NULL;
return r;
@@ -114,25 +191,47 @@ failed:
static void amdgpu_ctx_fini(struct kref *ref)
{
struct amdgpu_ctx *ctx = container_of(ref, struct amdgpu_ctx, refcount);
+ unsigned num_entities = amdgput_ctx_total_num_entities();
struct amdgpu_device *adev = ctx->adev;
unsigned i, j;
if (!adev)
return;
- for (i = 0; i < AMDGPU_MAX_RINGS; ++i)
+ for (i = 0; i < num_entities; ++i)
for (j = 0; j < amdgpu_sched_jobs; ++j)
- dma_fence_put(ctx->rings[i].fences[j]);
+ dma_fence_put(ctx->entities[0][i].fences[j]);
kfree(ctx->fences);
- ctx->fences = NULL;
-
- amdgpu_queue_mgr_fini(adev, &ctx->queue_mgr);
+ kfree(ctx->entities[0]);
mutex_destroy(&ctx->lock);
kfree(ctx);
}
+int amdgpu_ctx_get_entity(struct amdgpu_ctx *ctx, u32 hw_ip, u32 instance,
+ u32 ring, struct drm_sched_entity **entity)
+{
+ if (hw_ip >= AMDGPU_HW_IP_NUM) {
+ DRM_ERROR("unknown HW IP type: %d\n", hw_ip);
+ return -EINVAL;
+ }
+
+ /* Right now all IPs have only one instance - multiple rings. */
+ if (instance != 0) {
+ DRM_DEBUG("invalid ip instance: %d\n", instance);
+ return -EINVAL;
+ }
+
+ if (ring >= amdgpu_ctx_num_entities[hw_ip]) {
+ DRM_DEBUG("invalid ring: %d %d\n", hw_ip, ring);
+ return -EINVAL;
+ }
+
+ *entity = &ctx->entities[hw_ip][ring].entity;
+ return 0;
+}
+
static int amdgpu_ctx_alloc(struct amdgpu_device *adev,
struct amdgpu_fpriv *fpriv,
struct drm_file *filp,
@@ -169,18 +268,17 @@ static int amdgpu_ctx_alloc(struct amdgpu_device *adev,
static void amdgpu_ctx_do_release(struct kref *ref)
{
struct amdgpu_ctx *ctx;
+ unsigned num_entities;
u32 i;
ctx = container_of(ref, struct amdgpu_ctx, refcount);
- for (i = 0; i < ctx->adev->num_rings; i++) {
+ num_entities = 0;
+ for (i = 0; i < AMDGPU_HW_IP_NUM; i++)
+ num_entities += amdgpu_ctx_num_entities[i];
- if (ctx->adev->rings[i] == &ctx->adev->gfx.kiq.ring)
- continue;
-
- drm_sched_entity_fini(&ctx->adev->rings[i]->sched,
- &ctx->rings[i].entity);
- }
+ for (i = 0; i < num_entities; i++)
+ drm_sched_entity_destroy(&ctx->entities[0][i].entity);
amdgpu_ctx_fini(ref);
}
@@ -336,56 +434,56 @@ int amdgpu_ctx_put(struct amdgpu_ctx *ctx)
return 0;
}
-int amdgpu_ctx_add_fence(struct amdgpu_ctx *ctx, struct amdgpu_ring *ring,
- struct dma_fence *fence, uint64_t* handler)
+void amdgpu_ctx_add_fence(struct amdgpu_ctx *ctx,
+ struct drm_sched_entity *entity,
+ struct dma_fence *fence, uint64_t* handle)
{
- struct amdgpu_ctx_ring *cring = & ctx->rings[ring->idx];
- uint64_t seq = cring->sequence;
- unsigned idx = 0;
+ struct amdgpu_ctx_entity *centity = to_amdgpu_ctx_entity(entity);
+ uint64_t seq = centity->sequence;
struct dma_fence *other = NULL;
+ unsigned idx = 0;
idx = seq & (amdgpu_sched_jobs - 1);
- other = cring->fences[idx];
+ other = centity->fences[idx];
if (other)
BUG_ON(!dma_fence_is_signaled(other));
dma_fence_get(fence);
spin_lock(&ctx->ring_lock);
- cring->fences[idx] = fence;
- cring->sequence++;
+ centity->fences[idx] = fence;
+ centity->sequence++;
spin_unlock(&ctx->ring_lock);
dma_fence_put(other);
- if (handler)
- *handler = seq;
-
- return 0;
+ if (handle)
+ *handle = seq;
}
struct dma_fence *amdgpu_ctx_get_fence(struct amdgpu_ctx *ctx,
- struct amdgpu_ring *ring, uint64_t seq)
+ struct drm_sched_entity *entity,
+ uint64_t seq)
{
- struct amdgpu_ctx_ring *cring = & ctx->rings[ring->idx];
+ struct amdgpu_ctx_entity *centity = to_amdgpu_ctx_entity(entity);
struct dma_fence *fence;
spin_lock(&ctx->ring_lock);
if (seq == ~0ull)
- seq = ctx->rings[ring->idx].sequence - 1;
+ seq = centity->sequence - 1;
- if (seq >= cring->sequence) {
+ if (seq >= centity->sequence) {
spin_unlock(&ctx->ring_lock);
return ERR_PTR(-EINVAL);
}
- if (seq + amdgpu_sched_jobs < cring->sequence) {
+ if (seq + amdgpu_sched_jobs < centity->sequence) {
spin_unlock(&ctx->ring_lock);
return NULL;
}
- fence = dma_fence_get(cring->fences[seq & (amdgpu_sched_jobs - 1)]);
+ fence = dma_fence_get(centity->fences[seq & (amdgpu_sched_jobs - 1)]);
spin_unlock(&ctx->ring_lock);
return fence;
@@ -394,35 +492,28 @@ struct dma_fence *amdgpu_ctx_get_fence(struct amdgpu_ctx *ctx,
void amdgpu_ctx_priority_override(struct amdgpu_ctx *ctx,
enum drm_sched_priority priority)
{
- int i;
- struct amdgpu_device *adev = ctx->adev;
- struct drm_sched_rq *rq;
- struct drm_sched_entity *entity;
- struct amdgpu_ring *ring;
+ unsigned num_entities = amdgput_ctx_total_num_entities();
enum drm_sched_priority ctx_prio;
+ unsigned i;
ctx->override_priority = priority;
ctx_prio = (ctx->override_priority == DRM_SCHED_PRIORITY_UNSET) ?
ctx->init_priority : ctx->override_priority;
- for (i = 0; i < adev->num_rings; i++) {
- ring = adev->rings[i];
- entity = &ctx->rings[i].entity;
- rq = &ring->sched.sched_rq[ctx_prio];
+ for (i = 0; i < num_entities; i++) {
+ struct drm_sched_entity *entity = &ctx->entities[0][i].entity;
- if (ring->funcs->type == AMDGPU_RING_TYPE_KIQ)
- continue;
-
- drm_sched_entity_set_rq(entity, rq);
+ drm_sched_entity_set_priority(entity, ctx_prio);
}
}
-int amdgpu_ctx_wait_prev_fence(struct amdgpu_ctx *ctx, unsigned ring_id)
+int amdgpu_ctx_wait_prev_fence(struct amdgpu_ctx *ctx,
+ struct drm_sched_entity *entity)
{
- struct amdgpu_ctx_ring *cring = &ctx->rings[ring_id];
- unsigned idx = cring->sequence & (amdgpu_sched_jobs - 1);
- struct dma_fence *other = cring->fences[idx];
+ struct amdgpu_ctx_entity *centity = to_amdgpu_ctx_entity(entity);
+ unsigned idx = centity->sequence & (amdgpu_sched_jobs - 1);
+ struct dma_fence *other = centity->fences[idx];
if (other) {
signed long r;
@@ -444,35 +535,37 @@ void amdgpu_ctx_mgr_init(struct amdgpu_ctx_mgr *mgr)
idr_init(&mgr->ctx_handles);
}
-void amdgpu_ctx_mgr_entity_fini(struct amdgpu_ctx_mgr *mgr)
+void amdgpu_ctx_mgr_entity_flush(struct amdgpu_ctx_mgr *mgr)
{
+ unsigned num_entities = amdgput_ctx_total_num_entities();
struct amdgpu_ctx *ctx;
struct idr *idp;
uint32_t id, i;
+ long max_wait = MAX_WAIT_SCHED_ENTITY_Q_EMPTY;
idp = &mgr->ctx_handles;
+ mutex_lock(&mgr->lock);
idr_for_each_entry(idp, ctx, id) {
- if (!ctx->adev)
+ if (!ctx->adev) {
+ mutex_unlock(&mgr->lock);
return;
+ }
- for (i = 0; i < ctx->adev->num_rings; i++) {
-
- if (ctx->adev->rings[i] == &ctx->adev->gfx.kiq.ring)
- continue;
+ for (i = 0; i < num_entities; i++) {
+ struct drm_sched_entity *entity;
- if (kref_read(&ctx->refcount) == 1)
- drm_sched_entity_do_release(&ctx->adev->rings[i]->sched,
- &ctx->rings[i].entity);
- else
- DRM_ERROR("ctx %p is still alive\n", ctx);
+ entity = &ctx->entities[0][i].entity;
+ max_wait = drm_sched_entity_flush(entity, max_wait);
}
}
+ mutex_unlock(&mgr->lock);
}
-void amdgpu_ctx_mgr_entity_cleanup(struct amdgpu_ctx_mgr *mgr)
+void amdgpu_ctx_mgr_entity_fini(struct amdgpu_ctx_mgr *mgr)
{
+ unsigned num_entities = amdgput_ctx_total_num_entities();
struct amdgpu_ctx *ctx;
struct idr *idp;
uint32_t id, i;
@@ -484,17 +577,13 @@ void amdgpu_ctx_mgr_entity_cleanup(struct amdgpu_ctx_mgr *mgr)
if (!ctx->adev)
return;
- for (i = 0; i < ctx->adev->num_rings; i++) {
-
- if (ctx->adev->rings[i] == &ctx->adev->gfx.kiq.ring)
- continue;
-
- if (kref_read(&ctx->refcount) == 1)
- drm_sched_entity_cleanup(&ctx->adev->rings[i]->sched,
- &ctx->rings[i].entity);
- else
- DRM_ERROR("ctx %p is still alive\n", ctx);
+ if (kref_read(&ctx->refcount) != 1) {
+ DRM_ERROR("ctx %p is still alive\n", ctx);
+ continue;
}
+
+ for (i = 0; i < num_entities; i++)
+ drm_sched_entity_fini(&ctx->entities[0][i].entity);
}
}
@@ -504,7 +593,7 @@ void amdgpu_ctx_mgr_fini(struct amdgpu_ctx_mgr *mgr)
struct idr *idp;
uint32_t id;
- amdgpu_ctx_mgr_entity_cleanup(mgr);
+ amdgpu_ctx_mgr_entity_fini(mgr);
idp = &mgr->ctx_handles;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h
new file mode 100644
index 000000000000..b3b012c0a7da
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h
@@ -0,0 +1,88 @@
+/*
+ * Copyright 2018 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+#ifndef __AMDGPU_CTX_H__
+#define __AMDGPU_CTX_H__
+
+#include "amdgpu_ring.h"
+
+struct drm_device;
+struct drm_file;
+struct amdgpu_fpriv;
+
+struct amdgpu_ctx_entity {
+ uint64_t sequence;
+ struct dma_fence **fences;
+ struct drm_sched_entity entity;
+};
+
+struct amdgpu_ctx {
+ struct kref refcount;
+ struct amdgpu_device *adev;
+ unsigned reset_counter;
+ unsigned reset_counter_query;
+ uint32_t vram_lost_counter;
+ spinlock_t ring_lock;
+ struct dma_fence **fences;
+ struct amdgpu_ctx_entity *entities[AMDGPU_HW_IP_NUM];
+ bool preamble_presented;
+ enum drm_sched_priority init_priority;
+ enum drm_sched_priority override_priority;
+ struct mutex lock;
+ atomic_t guilty;
+};
+
+struct amdgpu_ctx_mgr {
+ struct amdgpu_device *adev;
+ struct mutex lock;
+ /* protected by lock */
+ struct idr ctx_handles;
+};
+
+extern const unsigned int amdgpu_ctx_num_entities[AMDGPU_HW_IP_NUM];
+
+struct amdgpu_ctx *amdgpu_ctx_get(struct amdgpu_fpriv *fpriv, uint32_t id);
+int amdgpu_ctx_put(struct amdgpu_ctx *ctx);
+
+int amdgpu_ctx_get_entity(struct amdgpu_ctx *ctx, u32 hw_ip, u32 instance,
+ u32 ring, struct drm_sched_entity **entity);
+void amdgpu_ctx_add_fence(struct amdgpu_ctx *ctx,
+ struct drm_sched_entity *entity,
+ struct dma_fence *fence, uint64_t *seq);
+struct dma_fence *amdgpu_ctx_get_fence(struct amdgpu_ctx *ctx,
+ struct drm_sched_entity *entity,
+ uint64_t seq);
+void amdgpu_ctx_priority_override(struct amdgpu_ctx *ctx,
+ enum drm_sched_priority priority);
+
+int amdgpu_ctx_ioctl(struct drm_device *dev, void *data,
+ struct drm_file *filp);
+
+int amdgpu_ctx_wait_prev_fence(struct amdgpu_ctx *ctx,
+ struct drm_sched_entity *entity);
+
+void amdgpu_ctx_mgr_init(struct amdgpu_ctx_mgr *mgr);
+void amdgpu_ctx_mgr_entity_fini(struct amdgpu_ctx_mgr *mgr);
+void amdgpu_ctx_mgr_entity_flush(struct amdgpu_ctx_mgr *mgr);
+void amdgpu_ctx_mgr_fini(struct amdgpu_ctx_mgr *mgr);
+
+#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
index f5fb93795a69..dd9a4fb9ce39 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
@@ -826,21 +826,13 @@ int amdgpu_debugfs_regs_init(struct amdgpu_device *adev)
{
struct drm_minor *minor = adev->ddev->primary;
struct dentry *ent, *root = minor->debugfs_root;
- unsigned i, j;
+ unsigned int i;
for (i = 0; i < ARRAY_SIZE(debugfs_regs); i++) {
ent = debugfs_create_file(debugfs_regs_names[i],
S_IFREG | S_IRUGO, root,
adev, debugfs_regs[i]);
- if (IS_ERR(ent)) {
- for (j = 0; j < i; j++) {
- debugfs_remove(adev->debugfs_regs[i]);
- adev->debugfs_regs[i] = NULL;
- }
- return PTR_ERR(ent);
- }
-
- if (!i)
+ if (!i && !IS_ERR_OR_NULL(ent))
i_size_write(ent->d_inode, adev->rmmio_size);
adev->debugfs_regs[i] = ent;
}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index 6e5284e6c028..1e4dd09a5072 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -25,6 +25,7 @@
* Alex Deucher
* Jerome Glisse
*/
+#include <linux/power_supply.h>
#include <linux/kthread.h>
#include <linux/console.h>
#include <linux/slab.h>
@@ -61,6 +62,8 @@
MODULE_FIRMWARE("amdgpu/vega10_gpu_info.bin");
MODULE_FIRMWARE("amdgpu/vega12_gpu_info.bin");
MODULE_FIRMWARE("amdgpu/raven_gpu_info.bin");
+MODULE_FIRMWARE("amdgpu/picasso_gpu_info.bin");
+MODULE_FIRMWARE("amdgpu/raven2_gpu_info.bin");
#define AMDGPU_RESUME_MS 2000
@@ -651,73 +654,6 @@ void amdgpu_device_wb_free(struct amdgpu_device *adev, u32 wb)
}
/**
- * amdgpu_device_vram_location - try to find VRAM location
- *
- * @adev: amdgpu device structure holding all necessary informations
- * @mc: memory controller structure holding memory informations
- * @base: base address at which to put VRAM
- *
- * Function will try to place VRAM at base address provided
- * as parameter.
- */
-void amdgpu_device_vram_location(struct amdgpu_device *adev,
- struct amdgpu_gmc *mc, u64 base)
-{
- uint64_t limit = (uint64_t)amdgpu_vram_limit << 20;
-
- mc->vram_start = base;
- mc->vram_end = mc->vram_start + mc->mc_vram_size - 1;
- if (limit && limit < mc->real_vram_size)
- mc->real_vram_size = limit;
- dev_info(adev->dev, "VRAM: %lluM 0x%016llX - 0x%016llX (%lluM used)\n",
- mc->mc_vram_size >> 20, mc->vram_start,
- mc->vram_end, mc->real_vram_size >> 20);
-}
-
-/**
- * amdgpu_device_gart_location - try to find GTT location
- *
- * @adev: amdgpu device structure holding all necessary informations
- * @mc: memory controller structure holding memory informations
- *
- * Function will place try to place GTT before or after VRAM.
- *
- * If GTT size is bigger than space left then we ajust GTT size.
- * Thus function will never fails.
- *
- * FIXME: when reducing GTT size align new size on power of 2.
- */
-void amdgpu_device_gart_location(struct amdgpu_device *adev,
- struct amdgpu_gmc *mc)
-{
- u64 size_af, size_bf;
-
- mc->gart_size += adev->pm.smu_prv_buffer_size;
-
- size_af = adev->gmc.mc_mask - mc->vram_end;
- size_bf = mc->vram_start;
- if (size_bf > size_af) {
- if (mc->gart_size > size_bf) {
- dev_warn(adev->dev, "limiting GTT\n");
- mc->gart_size = size_bf;
- }
- mc->gart_start = 0;
- } else {
- if (mc->gart_size > size_af) {
- dev_warn(adev->dev, "limiting GTT\n");
- mc->gart_size = size_af;
- }
- /* VCE doesn't like it when BOs cross a 4GB segment, so align
- * the GART base on a 4GB boundary as well.
- */
- mc->gart_start = ALIGN(mc->vram_end + 1, 0x100000000ULL);
- }
- mc->gart_end = mc->gart_start + mc->gart_size - 1;
- dev_info(adev->dev, "GTT: %lluM 0x%016llX - 0x%016llX\n",
- mc->gart_size >> 20, mc->gart_start, mc->gart_end);
-}
-
-/**
* amdgpu_device_resize_fb_bar - try to resize FB BAR
*
* @adev: amdgpu_device pointer
@@ -1077,7 +1013,7 @@ static const struct vga_switcheroo_client_ops amdgpu_switcheroo_ops = {
/**
* amdgpu_device_ip_set_clockgating_state - set the CG state
*
- * @adev: amdgpu_device pointer
+ * @dev: amdgpu_device pointer
* @block_type: Type of hardware IP (SMU, GFX, UVD, etc.)
* @state: clockgating state (gate or ungate)
*
@@ -1111,7 +1047,7 @@ int amdgpu_device_ip_set_clockgating_state(void *dev,
/**
* amdgpu_device_ip_set_powergating_state - set the PG state
*
- * @adev: amdgpu_device pointer
+ * @dev: amdgpu_device pointer
* @block_type: Type of hardware IP (SMU, GFX, UVD, etc.)
* @state: powergating state (gate or ungate)
*
@@ -1222,7 +1158,7 @@ bool amdgpu_device_ip_is_idle(struct amdgpu_device *adev,
* amdgpu_device_ip_get_ip_block - get a hw IP pointer
*
* @adev: amdgpu_device pointer
- * @block_type: Type of hardware IP (SMU, GFX, UVD, etc.)
+ * @type: Type of hardware IP (SMU, GFX, UVD, etc.)
*
* Returns a pointer to the hardware IP block structure
* if it exists for the asic, otherwise NULL.
@@ -1398,7 +1334,12 @@ static int amdgpu_device_parse_gpu_info_fw(struct amdgpu_device *adev)
chip_name = "vega12";
break;
case CHIP_RAVEN:
- chip_name = "raven";
+ if (adev->rev_id >= 8)
+ chip_name = "raven2";
+ else if (adev->pdev->device == 0x15d8)
+ chip_name = "picasso";
+ else
+ chip_name = "raven";
break;
}
@@ -1552,6 +1493,8 @@ static int amdgpu_device_ip_early_init(struct amdgpu_device *adev)
}
adev->powerplay.pp_feature = amdgpu_pp_feature_mask;
+ if (amdgpu_sriov_vf(adev))
+ adev->powerplay.pp_feature &= ~PP_GFXOFF_MASK;
for (i = 0; i < adev->num_ip_blocks; i++) {
if ((amdgpu_ip_block_mask & (1 << i)) == 0) {
@@ -1582,6 +1525,92 @@ static int amdgpu_device_ip_early_init(struct amdgpu_device *adev)
return 0;
}
+static int amdgpu_device_ip_hw_init_phase1(struct amdgpu_device *adev)
+{
+ int i, r;
+
+ for (i = 0; i < adev->num_ip_blocks; i++) {
+ if (!adev->ip_blocks[i].status.sw)
+ continue;
+ if (adev->ip_blocks[i].status.hw)
+ continue;
+ if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON ||
+ adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH) {
+ r = adev->ip_blocks[i].version->funcs->hw_init(adev);
+ if (r) {
+ DRM_ERROR("hw_init of IP block <%s> failed %d\n",
+ adev->ip_blocks[i].version->funcs->name, r);
+ return r;
+ }
+ adev->ip_blocks[i].status.hw = true;
+ }
+ }
+
+ return 0;
+}
+
+static int amdgpu_device_ip_hw_init_phase2(struct amdgpu_device *adev)
+{
+ int i, r;
+
+ for (i = 0; i < adev->num_ip_blocks; i++) {
+ if (!adev->ip_blocks[i].status.sw)
+ continue;
+ if (adev->ip_blocks[i].status.hw)
+ continue;
+ r = adev->ip_blocks[i].version->funcs->hw_init(adev);
+ if (r) {
+ DRM_ERROR("hw_init of IP block <%s> failed %d\n",
+ adev->ip_blocks[i].version->funcs->name, r);
+ return r;
+ }
+ adev->ip_blocks[i].status.hw = true;
+ }
+
+ return 0;
+}
+
+static int amdgpu_device_fw_loading(struct amdgpu_device *adev)
+{
+ int r = 0;
+ int i;
+
+ if (adev->asic_type >= CHIP_VEGA10) {
+ for (i = 0; i < adev->num_ip_blocks; i++) {
+ if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP) {
+ if (adev->in_gpu_reset || adev->in_suspend) {
+ if (amdgpu_sriov_vf(adev) && adev->in_gpu_reset)
+ break; /* sriov gpu reset, psp need to do hw_init before IH because of hw limit */
+ r = adev->ip_blocks[i].version->funcs->resume(adev);
+ if (r) {
+ DRM_ERROR("resume of IP block <%s> failed %d\n",
+ adev->ip_blocks[i].version->funcs->name, r);
+ return r;
+ }
+ } else {
+ r = adev->ip_blocks[i].version->funcs->hw_init(adev);
+ if (r) {
+ DRM_ERROR("hw_init of IP block <%s> failed %d\n",
+ adev->ip_blocks[i].version->funcs->name, r);
+ return r;
+ }
+ }
+ adev->ip_blocks[i].status.hw = true;
+ }
+ }
+ }
+
+ if (adev->powerplay.pp_funcs->load_firmware) {
+ r = adev->powerplay.pp_funcs->load_firmware(adev->powerplay.pp_handle);
+ if (r) {
+ pr_err("firmware loading failed\n");
+ return r;
+ }
+ }
+
+ return 0;
+}
+
/**
* amdgpu_device_ip_init - run init for hardware IPs
*
@@ -1638,20 +1667,23 @@ static int amdgpu_device_ip_init(struct amdgpu_device *adev)
}
}
- for (i = 0; i < adev->num_ip_blocks; i++) {
- if (!adev->ip_blocks[i].status.sw)
- continue;
- if (adev->ip_blocks[i].status.hw)
- continue;
- r = adev->ip_blocks[i].version->funcs->hw_init((void *)adev);
- if (r) {
- DRM_ERROR("hw_init of IP block <%s> failed %d\n",
- adev->ip_blocks[i].version->funcs->name, r);
- return r;
- }
- adev->ip_blocks[i].status.hw = true;
- }
+ r = amdgpu_ucode_create_bo(adev); /* create ucode bo when sw_init complete*/
+ if (r)
+ return r;
+
+ r = amdgpu_device_ip_hw_init_phase1(adev);
+ if (r)
+ return r;
+
+ r = amdgpu_device_fw_loading(adev);
+ if (r)
+ return r;
+
+ r = amdgpu_device_ip_hw_init_phase2(adev);
+ if (r)
+ return r;
+ amdgpu_xgmi_add_device(adev);
amdgpu_amdkfd_device_init(adev);
if (amdgpu_sriov_vf(adev))
@@ -1691,29 +1723,28 @@ static bool amdgpu_device_check_vram_lost(struct amdgpu_device *adev)
}
/**
- * amdgpu_device_ip_late_set_cg_state - late init for clockgating
+ * amdgpu_device_set_cg_state - set clockgating for amdgpu device
*
* @adev: amdgpu_device pointer
*
- * Late initialization pass enabling clockgating for hardware IPs.
* The list of all the hardware IPs that make up the asic is walked and the
- * set_clockgating_state callbacks are run. This stage is run late
- * in the init process.
+ * set_clockgating_state callbacks are run.
+ * Late initialization pass enabling clockgating for hardware IPs.
+ * Fini or suspend, pass disabling clockgating for hardware IPs.
* Returns 0 on success, negative error code on failure.
*/
-static int amdgpu_device_ip_late_set_cg_state(struct amdgpu_device *adev)
+
+static int amdgpu_device_set_cg_state(struct amdgpu_device *adev,
+ enum amd_clockgating_state state)
{
- int i = 0, r;
+ int i, j, r;
if (amdgpu_emu_mode == 1)
return 0;
- r = amdgpu_ib_ring_tests(adev);
- if (r)
- DRM_ERROR("ib ring test failed (%d).\n", r);
-
- for (i = 0; i < adev->num_ip_blocks; i++) {
- if (!adev->ip_blocks[i].status.valid)
+ for (j = 0; j < adev->num_ip_blocks; j++) {
+ i = state == AMD_CG_STATE_GATE ? j : adev->num_ip_blocks - j - 1;
+ if (!adev->ip_blocks[i].status.late_initialized)
continue;
/* skip CG for VCE/UVD, it's handled specially */
if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_UVD &&
@@ -1722,7 +1753,7 @@ static int amdgpu_device_ip_late_set_cg_state(struct amdgpu_device *adev)
adev->ip_blocks[i].version->funcs->set_clockgating_state) {
/* enable clockgating to save power */
r = adev->ip_blocks[i].version->funcs->set_clockgating_state((void *)adev,
- AMD_CG_STATE_GATE);
+ state);
if (r) {
DRM_ERROR("set_clockgating_state(gate) of IP block <%s> failed %d\n",
adev->ip_blocks[i].version->funcs->name, r);
@@ -1731,17 +1762,35 @@ static int amdgpu_device_ip_late_set_cg_state(struct amdgpu_device *adev)
}
}
- if (adev->powerplay.pp_feature & PP_GFXOFF_MASK) {
- /* enable gfx powergating */
- amdgpu_device_ip_set_powergating_state(adev,
- AMD_IP_BLOCK_TYPE_GFX,
- AMD_PG_STATE_GATE);
- /* enable gfxoff */
- amdgpu_device_ip_set_powergating_state(adev,
- AMD_IP_BLOCK_TYPE_SMC,
- AMD_PG_STATE_GATE);
- }
+ return 0;
+}
+static int amdgpu_device_set_pg_state(struct amdgpu_device *adev, enum amd_powergating_state state)
+{
+ int i, j, r;
+
+ if (amdgpu_emu_mode == 1)
+ return 0;
+
+ for (j = 0; j < adev->num_ip_blocks; j++) {
+ i = state == AMD_PG_STATE_GATE ? j : adev->num_ip_blocks - j - 1;
+ if (!adev->ip_blocks[i].status.late_initialized)
+ continue;
+ /* skip CG for VCE/UVD, it's handled specially */
+ if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_UVD &&
+ adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCE &&
+ adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCN &&
+ adev->ip_blocks[i].version->funcs->set_powergating_state) {
+ /* enable powergating to save power */
+ r = adev->ip_blocks[i].version->funcs->set_powergating_state((void *)adev,
+ state);
+ if (r) {
+ DRM_ERROR("set_powergating_state(gate) of IP block <%s> failed %d\n",
+ adev->ip_blocks[i].version->funcs->name, r);
+ return r;
+ }
+ }
+ }
return 0;
}
@@ -1762,7 +1811,7 @@ static int amdgpu_device_ip_late_init(struct amdgpu_device *adev)
int i = 0, r;
for (i = 0; i < adev->num_ip_blocks; i++) {
- if (!adev->ip_blocks[i].status.valid)
+ if (!adev->ip_blocks[i].status.hw)
continue;
if (adev->ip_blocks[i].version->funcs->late_init) {
r = adev->ip_blocks[i].version->funcs->late_init((void *)adev);
@@ -1771,10 +1820,13 @@ static int amdgpu_device_ip_late_init(struct amdgpu_device *adev)
adev->ip_blocks[i].version->funcs->name, r);
return r;
}
- adev->ip_blocks[i].status.late_initialized = true;
}
+ adev->ip_blocks[i].status.late_initialized = true;
}
+ amdgpu_device_set_cg_state(adev, AMD_CG_STATE_GATE);
+ amdgpu_device_set_pg_state(adev, AMD_PG_STATE_GATE);
+
queue_delayed_work(system_wq, &adev->late_init_work,
msecs_to_jiffies(AMDGPU_RESUME_MS));
@@ -1799,20 +1851,15 @@ static int amdgpu_device_ip_fini(struct amdgpu_device *adev)
int i, r;
amdgpu_amdkfd_device_fini(adev);
+
+ amdgpu_device_set_pg_state(adev, AMD_PG_STATE_UNGATE);
+ amdgpu_device_set_cg_state(adev, AMD_CG_STATE_UNGATE);
+
/* need to disable SMC first */
for (i = 0; i < adev->num_ip_blocks; i++) {
if (!adev->ip_blocks[i].status.hw)
continue;
- if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC &&
- adev->ip_blocks[i].version->funcs->set_clockgating_state) {
- /* ungate blocks before hw fini so that we can shutdown the blocks safely */
- r = adev->ip_blocks[i].version->funcs->set_clockgating_state((void *)adev,
- AMD_CG_STATE_UNGATE);
- if (r) {
- DRM_ERROR("set_clockgating_state(ungate) of IP block <%s> failed %d\n",
- adev->ip_blocks[i].version->funcs->name, r);
- return r;
- }
+ if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC) {
r = adev->ip_blocks[i].version->funcs->hw_fini((void *)adev);
/* XXX handle errors */
if (r) {
@@ -1828,20 +1875,6 @@ static int amdgpu_device_ip_fini(struct amdgpu_device *adev)
if (!adev->ip_blocks[i].status.hw)
continue;
- if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_UVD &&
- adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCE &&
- adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCN &&
- adev->ip_blocks[i].version->funcs->set_clockgating_state) {
- /* ungate blocks before hw fini so that we can shutdown the blocks safely */
- r = adev->ip_blocks[i].version->funcs->set_clockgating_state((void *)adev,
- AMD_CG_STATE_UNGATE);
- if (r) {
- DRM_ERROR("set_clockgating_state(ungate) of IP block <%s> failed %d\n",
- adev->ip_blocks[i].version->funcs->name, r);
- return r;
- }
- }
-
r = adev->ip_blocks[i].version->funcs->hw_fini((void *)adev);
/* XXX handle errors */
if (r) {
@@ -1858,6 +1891,7 @@ static int amdgpu_device_ip_fini(struct amdgpu_device *adev)
continue;
if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC) {
+ amdgpu_ucode_free_bo(adev);
amdgpu_free_static_csa(adev);
amdgpu_device_wb_fini(adev);
amdgpu_device_vram_scratch_fini(adev);
@@ -1888,24 +1922,78 @@ static int amdgpu_device_ip_fini(struct amdgpu_device *adev)
return 0;
}
+static int amdgpu_device_enable_mgpu_fan_boost(void)
+{
+ struct amdgpu_gpu_instance *gpu_ins;
+ struct amdgpu_device *adev;
+ int i, ret = 0;
+
+ mutex_lock(&mgpu_info.mutex);
+
+ /*
+ * MGPU fan boost feature should be enabled
+ * only when there are two or more dGPUs in
+ * the system
+ */
+ if (mgpu_info.num_dgpu < 2)
+ goto out;
+
+ for (i = 0; i < mgpu_info.num_dgpu; i++) {
+ gpu_ins = &(mgpu_info.gpu_ins[i]);
+ adev = gpu_ins->adev;
+ if (!(adev->flags & AMD_IS_APU) &&
+ !gpu_ins->mgpu_fan_enabled &&
+ adev->powerplay.pp_funcs &&
+ adev->powerplay.pp_funcs->enable_mgpu_fan_boost) {
+ ret = amdgpu_dpm_enable_mgpu_fan_boost(adev);
+ if (ret)
+ break;
+
+ gpu_ins->mgpu_fan_enabled = 1;
+ }
+ }
+
+out:
+ mutex_unlock(&mgpu_info.mutex);
+
+ return ret;
+}
+
/**
- * amdgpu_device_ip_late_init_func_handler - work handler for clockgating
+ * amdgpu_device_ip_late_init_func_handler - work handler for ib test
*
- * @work: work_struct
- *
- * Work handler for amdgpu_device_ip_late_set_cg_state. We put the
- * clockgating setup into a worker thread to speed up driver init and
- * resume from suspend.
+ * @work: work_struct.
*/
static void amdgpu_device_ip_late_init_func_handler(struct work_struct *work)
{
struct amdgpu_device *adev =
container_of(work, struct amdgpu_device, late_init_work.work);
- amdgpu_device_ip_late_set_cg_state(adev);
+ int r;
+
+ r = amdgpu_ib_ring_tests(adev);
+ if (r)
+ DRM_ERROR("ib ring test failed (%d).\n", r);
+
+ r = amdgpu_device_enable_mgpu_fan_boost();
+ if (r)
+ DRM_ERROR("enable mgpu fan boost failed (%d).\n", r);
+}
+
+static void amdgpu_device_delay_enable_gfx_off(struct work_struct *work)
+{
+ struct amdgpu_device *adev =
+ container_of(work, struct amdgpu_device, gfx.gfx_off_delay_work.work);
+
+ mutex_lock(&adev->gfx.gfx_off_mutex);
+ if (!adev->gfx.gfx_off_state && !adev->gfx.gfx_off_req_count) {
+ if (!amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_GFX, true))
+ adev->gfx.gfx_off_state = true;
+ }
+ mutex_unlock(&adev->gfx.gfx_off_mutex);
}
/**
- * amdgpu_device_ip_suspend - run suspend for hardware IPs
+ * amdgpu_device_ip_suspend_phase1 - run suspend for hardware IPs (phase 1)
*
* @adev: amdgpu_device pointer
*
@@ -1915,39 +2003,52 @@ static void amdgpu_device_ip_late_init_func_handler(struct work_struct *work)
* in each IP into a state suitable for suspend.
* Returns 0 on success, negative error code on failure.
*/
-int amdgpu_device_ip_suspend(struct amdgpu_device *adev)
+static int amdgpu_device_ip_suspend_phase1(struct amdgpu_device *adev)
{
int i, r;
- if (amdgpu_sriov_vf(adev))
- amdgpu_virt_request_full_gpu(adev, false);
-
- /* ungate SMC block powergating */
- if (adev->powerplay.pp_feature & PP_GFXOFF_MASK)
- amdgpu_device_ip_set_powergating_state(adev,
- AMD_IP_BLOCK_TYPE_SMC,
- AMD_CG_STATE_UNGATE);
-
- /* ungate SMC block first */
- r = amdgpu_device_ip_set_clockgating_state(adev, AMD_IP_BLOCK_TYPE_SMC,
- AMD_CG_STATE_UNGATE);
- if (r) {
- DRM_ERROR("set_clockgating_state(ungate) SMC failed %d\n", r);
- }
+ amdgpu_device_set_pg_state(adev, AMD_PG_STATE_UNGATE);
+ amdgpu_device_set_cg_state(adev, AMD_CG_STATE_UNGATE);
for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
if (!adev->ip_blocks[i].status.valid)
continue;
- /* ungate blocks so that suspend can properly shut them down */
- if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_SMC &&
- adev->ip_blocks[i].version->funcs->set_clockgating_state) {
- r = adev->ip_blocks[i].version->funcs->set_clockgating_state((void *)adev,
- AMD_CG_STATE_UNGATE);
+ /* displays are handled separately */
+ if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_DCE) {
+ /* XXX handle errors */
+ r = adev->ip_blocks[i].version->funcs->suspend(adev);
+ /* XXX handle errors */
if (r) {
- DRM_ERROR("set_clockgating_state(ungate) of IP block <%s> failed %d\n",
+ DRM_ERROR("suspend of IP block <%s> failed %d\n",
adev->ip_blocks[i].version->funcs->name, r);
}
}
+ }
+
+ return 0;
+}
+
+/**
+ * amdgpu_device_ip_suspend_phase2 - run suspend for hardware IPs (phase 2)
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Main suspend function for hardware IPs. The list of all the hardware
+ * IPs that make up the asic is walked, clockgating is disabled and the
+ * suspend callbacks are run. suspend puts the hardware and software state
+ * in each IP into a state suitable for suspend.
+ * Returns 0 on success, negative error code on failure.
+ */
+static int amdgpu_device_ip_suspend_phase2(struct amdgpu_device *adev)
+{
+ int i, r;
+
+ for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
+ if (!adev->ip_blocks[i].status.valid)
+ continue;
+ /* displays are handled in phase1 */
+ if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_DCE)
+ continue;
/* XXX handle errors */
r = adev->ip_blocks[i].version->funcs->suspend(adev);
/* XXX handle errors */
@@ -1957,10 +2058,36 @@ int amdgpu_device_ip_suspend(struct amdgpu_device *adev)
}
}
+ return 0;
+}
+
+/**
+ * amdgpu_device_ip_suspend - run suspend for hardware IPs
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Main suspend function for hardware IPs. The list of all the hardware
+ * IPs that make up the asic is walked, clockgating is disabled and the
+ * suspend callbacks are run. suspend puts the hardware and software state
+ * in each IP into a state suitable for suspend.
+ * Returns 0 on success, negative error code on failure.
+ */
+int amdgpu_device_ip_suspend(struct amdgpu_device *adev)
+{
+ int r;
+
+ if (amdgpu_sriov_vf(adev))
+ amdgpu_virt_request_full_gpu(adev, false);
+
+ r = amdgpu_device_ip_suspend_phase1(adev);
+ if (r)
+ return r;
+ r = amdgpu_device_ip_suspend_phase2(adev);
+
if (amdgpu_sriov_vf(adev))
amdgpu_virt_release_full_gpu(adev, false);
- return 0;
+ return r;
}
static int amdgpu_device_ip_reinit_early_sriov(struct amdgpu_device *adev)
@@ -1970,6 +2097,7 @@ static int amdgpu_device_ip_reinit_early_sriov(struct amdgpu_device *adev)
static enum amd_ip_block_type ip_order[] = {
AMD_IP_BLOCK_TYPE_GMC,
AMD_IP_BLOCK_TYPE_COMMON,
+ AMD_IP_BLOCK_TYPE_PSP,
AMD_IP_BLOCK_TYPE_IH,
};
@@ -1985,7 +2113,7 @@ static int amdgpu_device_ip_reinit_early_sriov(struct amdgpu_device *adev)
continue;
r = block->version->funcs->hw_init(adev);
- DRM_INFO("RE-INIT: %s %s\n", block->version->funcs->name, r?"failed":"successed");
+ DRM_INFO("RE-INIT: %s %s\n", block->version->funcs->name, r?"failed":"succeeded");
if (r)
return r;
}
@@ -2000,7 +2128,6 @@ static int amdgpu_device_ip_reinit_late_sriov(struct amdgpu_device *adev)
static enum amd_ip_block_type ip_order[] = {
AMD_IP_BLOCK_TYPE_SMC,
- AMD_IP_BLOCK_TYPE_PSP,
AMD_IP_BLOCK_TYPE_DCE,
AMD_IP_BLOCK_TYPE_GFX,
AMD_IP_BLOCK_TYPE_SDMA,
@@ -2020,7 +2147,7 @@ static int amdgpu_device_ip_reinit_late_sriov(struct amdgpu_device *adev)
continue;
r = block->version->funcs->hw_init(adev);
- DRM_INFO("RE-INIT: %s %s\n", block->version->funcs->name, r?"failed":"successed");
+ DRM_INFO("RE-INIT: %s %s\n", block->version->funcs->name, r?"failed":"succeeded");
if (r)
return r;
}
@@ -2085,7 +2212,8 @@ static int amdgpu_device_ip_resume_phase2(struct amdgpu_device *adev)
continue;
if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON ||
adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC ||
- adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH)
+ adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH ||
+ adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP)
continue;
r = adev->ip_blocks[i].version->funcs->resume(adev);
if (r) {
@@ -2117,6 +2245,11 @@ static int amdgpu_device_ip_resume(struct amdgpu_device *adev)
r = amdgpu_device_ip_resume_phase1(adev);
if (r)
return r;
+
+ r = amdgpu_device_fw_loading(adev);
+ if (r)
+ return r;
+
r = amdgpu_device_ip_resume_phase2(adev);
return r;
@@ -2210,7 +2343,7 @@ bool amdgpu_device_has_dc_support(struct amdgpu_device *adev)
* amdgpu_device_init - initialize the driver
*
* @adev: amdgpu_device pointer
- * @pdev: drm dev pointer
+ * @ddev: drm dev pointer
* @pdev: pci dev pointer
* @flags: driver flags
*
@@ -2242,7 +2375,7 @@ int amdgpu_device_init(struct amdgpu_device *adev,
adev->mman.buffer_funcs = NULL;
adev->mman.buffer_funcs_ring = NULL;
adev->vm_manager.vm_pte_funcs = NULL;
- adev->vm_manager.vm_pte_num_rings = 0;
+ adev->vm_manager.vm_pte_num_rqs = 0;
adev->gmc.gmc_funcs = NULL;
adev->fence_context = dma_fence_context_alloc(AMDGPU_MAX_RINGS);
bitmap_zero(adev->gfx.pipe_reserve_bitmap, AMDGPU_MAX_COMPUTE_QUEUES);
@@ -2274,6 +2407,7 @@ int amdgpu_device_init(struct amdgpu_device *adev,
mutex_init(&adev->gfx.gpu_clock_mutex);
mutex_init(&adev->srbm_mutex);
mutex_init(&adev->gfx.pipe_reserve_mutex);
+ mutex_init(&adev->gfx.gfx_off_mutex);
mutex_init(&adev->grbm_idx_mutex);
mutex_init(&adev->mn_lock);
mutex_init(&adev->virt.vf_errors.lock);
@@ -2300,6 +2434,11 @@ int amdgpu_device_init(struct amdgpu_device *adev,
INIT_DELAYED_WORK(&adev->late_init_work,
amdgpu_device_ip_late_init_func_handler);
+ INIT_DELAYED_WORK(&adev->gfx.gfx_off_delay_work,
+ amdgpu_device_delay_enable_gfx_off);
+
+ adev->gfx.gfx_off_req_count = 1;
+ adev->pm.ac_power = power_supply_is_system_supplied() > 0 ? true : false;
/* Registers mapping */
/* TODO: block userspace mapping of io register */
@@ -2581,8 +2720,9 @@ void amdgpu_device_fini(struct amdgpu_device *adev)
/**
* amdgpu_device_suspend - initiate device suspend
*
- * @pdev: drm dev pointer
- * @state: suspend state
+ * @dev: drm dev pointer
+ * @suspend: suspend state
+ * @fbcon : notify the fbdev of suspend
*
* Puts the hw in the suspend state (all asics).
* Returns 0 for success or an error on failure.
@@ -2604,8 +2744,14 @@ int amdgpu_device_suspend(struct drm_device *dev, bool suspend, bool fbcon)
if (dev->switch_power_state == DRM_SWITCH_POWER_OFF)
return 0;
+ adev->in_suspend = true;
drm_kms_helper_poll_disable(dev);
+ if (fbcon)
+ amdgpu_fbdev_set_suspend(adev, 1);
+
+ cancel_delayed_work_sync(&adev->late_init_work);
+
if (!amdgpu_device_has_dc_support(adev)) {
/* turn off display hw */
drm_modeset_lock_all(dev);
@@ -2613,44 +2759,46 @@ int amdgpu_device_suspend(struct drm_device *dev, bool suspend, bool fbcon)
drm_helper_connector_dpms(connector, DRM_MODE_DPMS_OFF);
}
drm_modeset_unlock_all(dev);
- }
-
- amdgpu_amdkfd_suspend(adev);
-
- /* unpin the front buffers and cursors */
- list_for_each_entry(crtc, &dev->mode_config.crtc_list, head) {
- struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc);
- struct drm_framebuffer *fb = crtc->primary->fb;
- struct amdgpu_bo *robj;
-
- if (amdgpu_crtc->cursor_bo) {
- struct amdgpu_bo *aobj = gem_to_amdgpu_bo(amdgpu_crtc->cursor_bo);
- r = amdgpu_bo_reserve(aobj, true);
- if (r == 0) {
- amdgpu_bo_unpin(aobj);
- amdgpu_bo_unreserve(aobj);
+ /* unpin the front buffers and cursors */
+ list_for_each_entry(crtc, &dev->mode_config.crtc_list, head) {
+ struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc);
+ struct drm_framebuffer *fb = crtc->primary->fb;
+ struct amdgpu_bo *robj;
+
+ if (amdgpu_crtc->cursor_bo) {
+ struct amdgpu_bo *aobj = gem_to_amdgpu_bo(amdgpu_crtc->cursor_bo);
+ r = amdgpu_bo_reserve(aobj, true);
+ if (r == 0) {
+ amdgpu_bo_unpin(aobj);
+ amdgpu_bo_unreserve(aobj);
+ }
}
- }
- if (fb == NULL || fb->obj[0] == NULL) {
- continue;
- }
- robj = gem_to_amdgpu_bo(fb->obj[0]);
- /* don't unpin kernel fb objects */
- if (!amdgpu_fbdev_robj_is_fb(adev, robj)) {
- r = amdgpu_bo_reserve(robj, true);
- if (r == 0) {
- amdgpu_bo_unpin(robj);
- amdgpu_bo_unreserve(robj);
+ if (fb == NULL || fb->obj[0] == NULL) {
+ continue;
+ }
+ robj = gem_to_amdgpu_bo(fb->obj[0]);
+ /* don't unpin kernel fb objects */
+ if (!amdgpu_fbdev_robj_is_fb(adev, robj)) {
+ r = amdgpu_bo_reserve(robj, true);
+ if (r == 0) {
+ amdgpu_bo_unpin(robj);
+ amdgpu_bo_unreserve(robj);
+ }
}
}
}
+
+ amdgpu_amdkfd_suspend(adev);
+
+ r = amdgpu_device_ip_suspend_phase1(adev);
+
/* evict vram memory */
amdgpu_bo_evict_vram(adev);
amdgpu_fence_driver_suspend(adev);
- r = amdgpu_device_ip_suspend(adev);
+ r = amdgpu_device_ip_suspend_phase2(adev);
/* evict remaining vram memory
* This second call to evict vram is to evict the gart page table
@@ -2669,18 +2817,15 @@ int amdgpu_device_suspend(struct drm_device *dev, bool suspend, bool fbcon)
DRM_ERROR("amdgpu asic reset failed\n");
}
- if (fbcon) {
- console_lock();
- amdgpu_fbdev_set_suspend(adev, 1);
- console_unlock();
- }
return 0;
}
/**
* amdgpu_device_resume - initiate device resume
*
- * @pdev: drm dev pointer
+ * @dev: drm dev pointer
+ * @resume: resume state
+ * @fbcon : notify the fbdev of resume
*
* Bring the hw back to operating state (all asics).
* Returns 0 for success or an error on failure.
@@ -2696,15 +2841,12 @@ int amdgpu_device_resume(struct drm_device *dev, bool resume, bool fbcon)
if (dev->switch_power_state == DRM_SWITCH_POWER_OFF)
return 0;
- if (fbcon)
- console_lock();
-
if (resume) {
pci_set_power_state(dev->pdev, PCI_D0);
pci_restore_state(dev->pdev);
r = pci_enable_device(dev->pdev);
if (r)
- goto unlock;
+ return r;
}
/* post card */
@@ -2717,29 +2859,30 @@ int amdgpu_device_resume(struct drm_device *dev, bool resume, bool fbcon)
r = amdgpu_device_ip_resume(adev);
if (r) {
DRM_ERROR("amdgpu_device_ip_resume failed (%d).\n", r);
- goto unlock;
+ return r;
}
amdgpu_fence_driver_resume(adev);
r = amdgpu_device_ip_late_init(adev);
if (r)
- goto unlock;
-
- /* pin cursors */
- list_for_each_entry(crtc, &dev->mode_config.crtc_list, head) {
- struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc);
-
- if (amdgpu_crtc->cursor_bo) {
- struct amdgpu_bo *aobj = gem_to_amdgpu_bo(amdgpu_crtc->cursor_bo);
- r = amdgpu_bo_reserve(aobj, true);
- if (r == 0) {
- r = amdgpu_bo_pin(aobj,
- AMDGPU_GEM_DOMAIN_VRAM,
- &amdgpu_crtc->cursor_addr);
- if (r != 0)
- DRM_ERROR("Failed to pin cursor BO (%d)\n", r);
- amdgpu_bo_unreserve(aobj);
+ return r;
+
+ if (!amdgpu_device_has_dc_support(adev)) {
+ /* pin cursors */
+ list_for_each_entry(crtc, &dev->mode_config.crtc_list, head) {
+ struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc);
+
+ if (amdgpu_crtc->cursor_bo) {
+ struct amdgpu_bo *aobj = gem_to_amdgpu_bo(amdgpu_crtc->cursor_bo);
+ r = amdgpu_bo_reserve(aobj, true);
+ if (r == 0) {
+ r = amdgpu_bo_pin(aobj, AMDGPU_GEM_DOMAIN_VRAM);
+ if (r != 0)
+ DRM_ERROR("Failed to pin cursor BO (%d)\n", r);
+ amdgpu_crtc->cursor_addr = amdgpu_bo_gpu_offset(aobj);
+ amdgpu_bo_unreserve(aobj);
+ }
}
}
}
@@ -2747,6 +2890,9 @@ int amdgpu_device_resume(struct drm_device *dev, bool resume, bool fbcon)
if (r)
return r;
+ /* Make sure IB tests flushed */
+ flush_delayed_work(&adev->late_init_work);
+
/* blat the mode back in */
if (fbcon) {
if (!amdgpu_device_has_dc_support(adev)) {
@@ -2760,6 +2906,7 @@ int amdgpu_device_resume(struct drm_device *dev, bool resume, bool fbcon)
}
drm_modeset_unlock_all(dev);
}
+ amdgpu_fbdev_set_suspend(adev, 0);
}
drm_kms_helper_poll_enable(dev);
@@ -2783,15 +2930,9 @@ int amdgpu_device_resume(struct drm_device *dev, bool resume, bool fbcon)
#ifdef CONFIG_PM
dev->dev->power.disable_depth--;
#endif
+ adev->in_suspend = false;
- if (fbcon)
- amdgpu_fbdev_set_suspend(adev, 0);
-
-unlock:
- if (fbcon)
- console_unlock();
-
- return r;
+ return 0;
}
/**
@@ -2949,117 +3090,64 @@ static int amdgpu_device_ip_post_soft_reset(struct amdgpu_device *adev)
}
/**
- * amdgpu_device_recover_vram_from_shadow - restore shadowed VRAM buffers
- *
- * @adev: amdgpu_device pointer
- * @ring: amdgpu_ring for the engine handling the buffer operations
- * @bo: amdgpu_bo buffer whose shadow is being restored
- * @fence: dma_fence associated with the operation
- *
- * Restores the VRAM buffer contents from the shadow in GTT. Used to
- * restore things like GPUVM page tables after a GPU reset where
- * the contents of VRAM might be lost.
- * Returns 0 on success, negative error code on failure.
- */
-static int amdgpu_device_recover_vram_from_shadow(struct amdgpu_device *adev,
- struct amdgpu_ring *ring,
- struct amdgpu_bo *bo,
- struct dma_fence **fence)
-{
- uint32_t domain;
- int r;
-
- if (!bo->shadow)
- return 0;
-
- r = amdgpu_bo_reserve(bo, true);
- if (r)
- return r;
- domain = amdgpu_mem_type_to_domain(bo->tbo.mem.mem_type);
- /* if bo has been evicted, then no need to recover */
- if (domain == AMDGPU_GEM_DOMAIN_VRAM) {
- r = amdgpu_bo_validate(bo->shadow);
- if (r) {
- DRM_ERROR("bo validate failed!\n");
- goto err;
- }
-
- r = amdgpu_bo_restore_from_shadow(adev, ring, bo,
- NULL, fence, true);
- if (r) {
- DRM_ERROR("recover page table failed!\n");
- goto err;
- }
- }
-err:
- amdgpu_bo_unreserve(bo);
- return r;
-}
-
-/**
- * amdgpu_device_handle_vram_lost - Handle the loss of VRAM contents
+ * amdgpu_device_recover_vram - Recover some VRAM contents
*
* @adev: amdgpu_device pointer
*
* Restores the contents of VRAM buffers from the shadows in GTT. Used to
* restore things like GPUVM page tables after a GPU reset where
* the contents of VRAM might be lost.
- * Returns 0 on success, 1 on failure.
+ *
+ * Returns:
+ * 0 on success, negative error code on failure.
*/
-static int amdgpu_device_handle_vram_lost(struct amdgpu_device *adev)
+static int amdgpu_device_recover_vram(struct amdgpu_device *adev)
{
- struct amdgpu_ring *ring = adev->mman.buffer_funcs_ring;
- struct amdgpu_bo *bo, *tmp;
struct dma_fence *fence = NULL, *next = NULL;
- long r = 1;
- int i = 0;
- long tmo;
+ struct amdgpu_bo *shadow;
+ long r = 1, tmo;
if (amdgpu_sriov_runtime(adev))
- tmo = msecs_to_jiffies(amdgpu_lockup_timeout);
+ tmo = msecs_to_jiffies(8000);
else
tmo = msecs_to_jiffies(100);
DRM_INFO("recover vram bo from shadow start\n");
mutex_lock(&adev->shadow_list_lock);
- list_for_each_entry_safe(bo, tmp, &adev->shadow_list, shadow_list) {
- next = NULL;
- amdgpu_device_recover_vram_from_shadow(adev, ring, bo, &next);
+ list_for_each_entry(shadow, &adev->shadow_list, shadow_list) {
+
+ /* No need to recover an evicted BO */
+ if (shadow->tbo.mem.mem_type != TTM_PL_TT ||
+ shadow->parent->tbo.mem.mem_type != TTM_PL_VRAM)
+ continue;
+
+ r = amdgpu_bo_restore_shadow(shadow, &next);
+ if (r)
+ break;
+
if (fence) {
r = dma_fence_wait_timeout(fence, false, tmo);
- if (r == 0)
- pr_err("wait fence %p[%d] timeout\n", fence, i);
- else if (r < 0)
- pr_err("wait fence %p[%d] interrupted\n", fence, i);
- if (r < 1) {
- dma_fence_put(fence);
- fence = next;
+ dma_fence_put(fence);
+ fence = next;
+ if (r <= 0)
break;
- }
- i++;
+ } else {
+ fence = next;
}
-
- dma_fence_put(fence);
- fence = next;
}
mutex_unlock(&adev->shadow_list_lock);
- if (fence) {
- r = dma_fence_wait_timeout(fence, false, tmo);
- if (r == 0)
- pr_err("wait fence %p[%d] timeout\n", fence, i);
- else if (r < 0)
- pr_err("wait fence %p[%d] interrupted\n", fence, i);
-
- }
+ if (fence)
+ tmo = dma_fence_wait_timeout(fence, false, tmo);
dma_fence_put(fence);
- if (r > 0)
- DRM_INFO("recover vram bo from shadow done\n");
- else
+ if (r <= 0 || tmo <= 0) {
DRM_ERROR("recover vram bo from shadow failed\n");
+ return -EIO;
+ }
- return (r > 0) ? 0 : 1;
+ DRM_INFO("recover vram bo from shadow done\n");
+ return 0;
}
/**
@@ -3068,7 +3156,7 @@ static int amdgpu_device_handle_vram_lost(struct amdgpu_device *adev)
* @adev: amdgpu device pointer
*
* attempt to do soft-reset or full-reset and reinitialize Asic
- * return 0 means successed otherwise failed
+ * return 0 means succeeded otherwise failed
*/
static int amdgpu_device_reset(struct amdgpu_device *adev)
{
@@ -3112,6 +3200,10 @@ retry:
if (r)
goto out;
+ r = amdgpu_device_fw_loading(adev);
+ if (r)
+ return r;
+
r = amdgpu_device_ip_resume_phase2(adev);
if (r)
goto out;
@@ -3133,8 +3225,8 @@ out:
}
}
- if (!r && ((need_full_reset && !(adev->flags & AMD_IS_APU)) || vram_lost))
- r = amdgpu_device_handle_vram_lost(adev);
+ if (!r)
+ r = amdgpu_device_recover_vram(adev);
return r;
}
@@ -3143,9 +3235,10 @@ out:
* amdgpu_device_reset_sriov - reset ASIC for SR-IOV vf
*
* @adev: amdgpu device pointer
+ * @from_hypervisor: request from hypervisor
*
* do VF FLR and reinitialize Asic
- * return 0 means successed otherwise failed
+ * return 0 means succeeded otherwise failed
*/
static int amdgpu_device_reset_sriov(struct amdgpu_device *adev,
bool from_hypervisor)
@@ -3167,6 +3260,10 @@ static int amdgpu_device_reset_sriov(struct amdgpu_device *adev,
/* we need recover gart prior to run SMC/CP/SDMA resume */
amdgpu_gtt_mgr_recover(&adev->mman.bdev.man[TTM_PL_TT]);
+ r = amdgpu_device_fw_loading(adev);
+ if (r)
+ return r;
+
/* now we are okay to resume SMC/CP/SDMA */
r = amdgpu_device_ip_reinit_late_sriov(adev);
if (r)
@@ -3179,44 +3276,59 @@ error:
amdgpu_virt_release_full_gpu(adev, true);
if (!r && adev->virt.gim_feature & AMDGIM_FEATURE_GIM_FLR_VRAMLOST) {
atomic_inc(&adev->vram_lost_counter);
- r = amdgpu_device_handle_vram_lost(adev);
+ r = amdgpu_device_recover_vram(adev);
}
return r;
}
/**
+ * amdgpu_device_should_recover_gpu - check if we should try GPU recovery
+ *
+ * @adev: amdgpu device pointer
+ *
+ * Check amdgpu_gpu_recovery and SRIOV status to see if we should try to recover
+ * a hung GPU.
+ */
+bool amdgpu_device_should_recover_gpu(struct amdgpu_device *adev)
+{
+ if (!amdgpu_device_ip_check_soft_reset(adev)) {
+ DRM_INFO("Timeout, but no hardware hang detected.\n");
+ return false;
+ }
+
+ if (amdgpu_gpu_recovery == 0 || (amdgpu_gpu_recovery == -1 &&
+ !amdgpu_sriov_vf(adev))) {
+ DRM_INFO("GPU recovery disabled.\n");
+ return false;
+ }
+
+ return true;
+}
+
+/**
* amdgpu_device_gpu_recover - reset the asic and recover scheduler
*
* @adev: amdgpu device pointer
* @job: which job trigger hang
- * @force forces reset regardless of amdgpu_gpu_recovery
*
* Attempt to reset the GPU if it has hung (all asics).
* Returns 0 for success or an error on failure.
*/
int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
- struct amdgpu_job *job, bool force)
+ struct amdgpu_job *job)
{
int i, r, resched;
- if (!force && !amdgpu_device_ip_check_soft_reset(adev)) {
- DRM_INFO("No hardware hang detected. Did some blocks stall?\n");
- return 0;
- }
-
- if (!force && (amdgpu_gpu_recovery == 0 ||
- (amdgpu_gpu_recovery == -1 && !amdgpu_sriov_vf(adev)))) {
- DRM_INFO("GPU recovery disabled.\n");
- return 0;
- }
-
dev_info(adev->dev, "GPU reset begin!\n");
mutex_lock(&adev->lock_reset);
atomic_inc(&adev->gpu_reset_counter);
adev->in_gpu_reset = 1;
+ /* Block kfd */
+ amdgpu_amdkfd_pre_reset(adev);
+
/* block TTM */
resched = ttm_bo_lock_delayed_workqueue(&adev->mman.bdev);
@@ -3229,10 +3341,10 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
kthread_park(ring->sched.thread);
- if (job && job->ring->idx != i)
+ if (job && job->base.sched == &ring->sched)
continue;
- drm_sched_hw_job_reset(&ring->sched, &job->base);
+ drm_sched_hw_job_reset(&ring->sched, job ? &job->base : NULL);
/* after all hw jobs are reset, hw fence is meaningless, so force_completion */
amdgpu_fence_driver_force_completion(ring);
@@ -3253,7 +3365,7 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
* or all rings (in the case @job is NULL)
* after above amdgpu_reset accomplished
*/
- if ((!job || job->ring->idx == i) && !r)
+ if ((!job || job->base.sched == &ring->sched) && !r)
drm_sched_job_recovery(&ring->sched);
kthread_unpark(ring->sched.thread);
@@ -3270,9 +3382,11 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
dev_info(adev->dev, "GPU reset(%d) failed\n", atomic_read(&adev->gpu_reset_counter));
amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_GPU_RESET_FAIL, 0, r);
} else {
- dev_info(adev->dev, "GPU reset(%d) successed!\n",atomic_read(&adev->gpu_reset_counter));
+ dev_info(adev->dev, "GPU reset(%d) succeeded!\n",atomic_read(&adev->gpu_reset_counter));
}
+ /*unlock kfd */
+ amdgpu_amdkfd_post_reset(adev);
amdgpu_vf_error_trans_all(adev);
adev->in_gpu_reset = 0;
mutex_unlock(&adev->lock_reset);
@@ -3290,8 +3404,9 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
*/
static void amdgpu_device_get_pcie_info(struct amdgpu_device *adev)
{
- u32 mask;
- int ret;
+ struct pci_dev *pdev;
+ enum pci_bus_speed speed_cap;
+ enum pcie_link_width link_width;
if (amdgpu_pcie_gen_cap)
adev->pm.pcie_gen_mask = amdgpu_pcie_gen_cap;
@@ -3309,27 +3424,61 @@ static void amdgpu_device_get_pcie_info(struct amdgpu_device *adev)
}
if (adev->pm.pcie_gen_mask == 0) {
- ret = drm_pcie_get_speed_cap_mask(adev->ddev, &mask);
- if (!ret) {
- adev->pm.pcie_gen_mask = (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 |
+ /* asic caps */
+ pdev = adev->pdev;
+ speed_cap = pcie_get_speed_cap(pdev);
+ if (speed_cap == PCI_SPEED_UNKNOWN) {
+ adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 |
CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2 |
CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN3);
-
- if (mask & DRM_PCIE_SPEED_25)
- adev->pm.pcie_gen_mask |= CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1;
- if (mask & DRM_PCIE_SPEED_50)
- adev->pm.pcie_gen_mask |= CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2;
- if (mask & DRM_PCIE_SPEED_80)
- adev->pm.pcie_gen_mask |= CAIL_PCIE_LINK_SPEED_SUPPORT_GEN3;
} else {
- adev->pm.pcie_gen_mask = AMDGPU_DEFAULT_PCIE_GEN_MASK;
+ if (speed_cap == PCIE_SPEED_16_0GT)
+ adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 |
+ CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2 |
+ CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN3 |
+ CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN4);
+ else if (speed_cap == PCIE_SPEED_8_0GT)
+ adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 |
+ CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2 |
+ CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN3);
+ else if (speed_cap == PCIE_SPEED_5_0GT)
+ adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 |
+ CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2);
+ else
+ adev->pm.pcie_gen_mask |= CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1;
+ }
+ /* platform caps */
+ pdev = adev->ddev->pdev->bus->self;
+ speed_cap = pcie_get_speed_cap(pdev);
+ if (speed_cap == PCI_SPEED_UNKNOWN) {
+ adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |
+ CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2);
+ } else {
+ if (speed_cap == PCIE_SPEED_16_0GT)
+ adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |
+ CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2 |
+ CAIL_PCIE_LINK_SPEED_SUPPORT_GEN3 |
+ CAIL_PCIE_LINK_SPEED_SUPPORT_GEN4);
+ else if (speed_cap == PCIE_SPEED_8_0GT)
+ adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |
+ CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2 |
+ CAIL_PCIE_LINK_SPEED_SUPPORT_GEN3);
+ else if (speed_cap == PCIE_SPEED_5_0GT)
+ adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |
+ CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2);
+ else
+ adev->pm.pcie_gen_mask |= CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1;
+
}
}
if (adev->pm.pcie_mlw_mask == 0) {
- ret = drm_pcie_get_max_link_width(adev->ddev, &mask);
- if (!ret) {
- switch (mask) {
- case 32:
+ pdev = adev->ddev->pdev->bus->self;
+ link_width = pcie_get_width_cap(pdev);
+ if (link_width == PCIE_LNK_WIDTH_UNKNOWN) {
+ adev->pm.pcie_mlw_mask |= AMDGPU_DEFAULT_PCIE_MLW_MASK;
+ } else {
+ switch (link_width) {
+ case PCIE_LNK_X32:
adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X32 |
CAIL_PCIE_LINK_WIDTH_SUPPORT_X16 |
CAIL_PCIE_LINK_WIDTH_SUPPORT_X12 |
@@ -3338,7 +3487,7 @@ static void amdgpu_device_get_pcie_info(struct amdgpu_device *adev)
CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
break;
- case 16:
+ case PCIE_LNK_X16:
adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X16 |
CAIL_PCIE_LINK_WIDTH_SUPPORT_X12 |
CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 |
@@ -3346,36 +3495,34 @@ static void amdgpu_device_get_pcie_info(struct amdgpu_device *adev)
CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
break;
- case 12:
+ case PCIE_LNK_X12:
adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X12 |
CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 |
CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
break;
- case 8:
+ case PCIE_LNK_X8:
adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 |
CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
break;
- case 4:
+ case PCIE_LNK_X4:
adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
break;
- case 2:
+ case PCIE_LNK_X2:
adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
break;
- case 1:
+ case PCIE_LNK_X1:
adev->pm.pcie_mlw_mask = CAIL_PCIE_LINK_WIDTH_SUPPORT_X1;
break;
default:
break;
}
- } else {
- adev->pm.pcie_mlw_mask = AMDGPU_DEFAULT_PCIE_MLW_MASK;
}
}
}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c
index 76ee8e04ff11..6748cd7fc129 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c
@@ -157,7 +157,6 @@ int amdgpu_display_crtc_page_flip_target(struct drm_crtc *crtc,
struct amdgpu_bo *new_abo;
unsigned long flags;
u64 tiling_flags;
- u64 base;
int i, r;
work = kzalloc(sizeof *work, GFP_KERNEL);
@@ -189,12 +188,18 @@ int amdgpu_display_crtc_page_flip_target(struct drm_crtc *crtc,
goto cleanup;
}
- r = amdgpu_bo_pin(new_abo, amdgpu_display_supported_domains(adev), &base);
+ r = amdgpu_bo_pin(new_abo, amdgpu_display_supported_domains(adev));
if (unlikely(r != 0)) {
DRM_ERROR("failed to pin new abo buffer before flip\n");
goto unreserve;
}
+ r = amdgpu_ttm_alloc_gart(&new_abo->tbo);
+ if (unlikely(r != 0)) {
+ DRM_ERROR("%p bind failed\n", new_abo);
+ goto unpin;
+ }
+
r = reservation_object_get_fences_rcu(new_abo->tbo.resv, &work->excl,
&work->shared_count,
&work->shared);
@@ -206,7 +211,7 @@ int amdgpu_display_crtc_page_flip_target(struct drm_crtc *crtc,
amdgpu_bo_get_tiling_flags(new_abo, &tiling_flags);
amdgpu_bo_unreserve(new_abo);
- work->base = base;
+ work->base = amdgpu_bo_gpu_offset(new_abo);
work->target_vblank = target - (uint32_t)drm_crtc_vblank_count(crtc) +
amdgpu_get_vblank_counter_kms(dev, work->crtc_id);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.h
index f66e3e3fef0a..06b922fe0d42 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.h
@@ -23,6 +23,21 @@
#ifndef __AMDGPU_DISPLAY_H__
#define __AMDGPU_DISPLAY_H__
+#define amdgpu_display_vblank_get_counter(adev, crtc) (adev)->mode_info.funcs->vblank_get_counter((adev), (crtc))
+#define amdgpu_display_backlight_set_level(adev, e, l) (adev)->mode_info.funcs->backlight_set_level((e), (l))
+#define amdgpu_display_backlight_get_level(adev, e) (adev)->mode_info.funcs->backlight_get_level((e))
+#define amdgpu_display_hpd_sense(adev, h) (adev)->mode_info.funcs->hpd_sense((adev), (h))
+#define amdgpu_display_hpd_set_polarity(adev, h) (adev)->mode_info.funcs->hpd_set_polarity((adev), (h))
+#define amdgpu_display_hpd_get_gpio_reg(adev) (adev)->mode_info.funcs->hpd_get_gpio_reg((adev))
+#define amdgpu_display_bandwidth_update(adev) (adev)->mode_info.funcs->bandwidth_update((adev))
+#define amdgpu_display_page_flip(adev, crtc, base, async) (adev)->mode_info.funcs->page_flip((adev), (crtc), (base), (async))
+#define amdgpu_display_page_flip_get_scanoutpos(adev, crtc, vbl, pos) (adev)->mode_info.funcs->page_flip_get_scanoutpos((adev), (crtc), (vbl), (pos))
+#define amdgpu_display_add_encoder(adev, e, s, c) (adev)->mode_info.funcs->add_encoder((adev), (e), (s), (c))
+#define amdgpu_display_add_connector(adev, ci, sd, ct, ib, coi, h, r) (adev)->mode_info.funcs->add_connector((adev), (ci), (sd), (ct), (ib), (coi), (h), (r))
+
+int amdgpu_display_freesync_ioctl(struct drm_device *dev, void *data,
+ struct drm_file *filp);
+void amdgpu_display_update_priority(struct amdgpu_device *adev);
uint32_t amdgpu_display_supported_domains(struct amdgpu_device *adev);
struct drm_framebuffer *
amdgpu_display_user_framebuffer_create(struct drm_device *dev,
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_dpm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_dpm.c
index 77ad59ade85c..1c4595562f8f 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_dpm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_dpm.c
@@ -28,6 +28,7 @@
#include "amdgpu_i2c.h"
#include "amdgpu_dpm.h"
#include "atom.h"
+#include "amd_pcie.h"
void amdgpu_dpm_print_class_info(u32 class, u32 class2)
{
@@ -936,9 +937,11 @@ enum amdgpu_pcie_gen amdgpu_get_pcie_gen_support(struct amdgpu_device *adev,
case AMDGPU_PCIE_GEN3:
return AMDGPU_PCIE_GEN3;
default:
- if ((sys_mask & DRM_PCIE_SPEED_80) && (default_gen == AMDGPU_PCIE_GEN3))
+ if ((sys_mask & CAIL_PCIE_LINK_SPEED_SUPPORT_GEN3) &&
+ (default_gen == AMDGPU_PCIE_GEN3))
return AMDGPU_PCIE_GEN3;
- else if ((sys_mask & DRM_PCIE_SPEED_50) && (default_gen == AMDGPU_PCIE_GEN2))
+ else if ((sys_mask & CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2) &&
+ (default_gen == AMDGPU_PCIE_GEN2))
return AMDGPU_PCIE_GEN2;
else
return AMDGPU_PCIE_GEN1;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_dpm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_dpm.h
index dd6203a0a6b7..f972cd156795 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_dpm.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_dpm.h
@@ -278,6 +278,9 @@ enum amdgpu_pcie_gen {
#define amdgpu_dpm_get_fan_speed_rpm(adev, s) \
((adev)->powerplay.pp_funcs->get_fan_speed_rpm)((adev)->powerplay.pp_handle, (s))
+#define amdgpu_dpm_set_fan_speed_rpm(adev, s) \
+ ((adev)->powerplay.pp_funcs->set_fan_speed_rpm)((adev)->powerplay.pp_handle, (s))
+
#define amdgpu_dpm_get_sclk(adev, l) \
((adev)->powerplay.pp_funcs->get_sclk((adev)->powerplay.pp_handle, (l)))
@@ -287,12 +290,6 @@ enum amdgpu_pcie_gen {
#define amdgpu_dpm_force_performance_level(adev, l) \
((adev)->powerplay.pp_funcs->force_performance_level((adev)->powerplay.pp_handle, (l)))
-#define amdgpu_dpm_powergate_uvd(adev, g) \
- ((adev)->powerplay.pp_funcs->powergate_uvd((adev)->powerplay.pp_handle, (g)))
-
-#define amdgpu_dpm_powergate_vce(adev, g) \
- ((adev)->powerplay.pp_funcs->powergate_vce((adev)->powerplay.pp_handle, (g)))
-
#define amdgpu_dpm_get_current_power_state(adev) \
((adev)->powerplay.pp_funcs->get_current_power_state((adev)->powerplay.pp_handle))
@@ -347,6 +344,10 @@ enum amdgpu_pcie_gen {
((adev)->powerplay.pp_funcs->set_clockgating_by_smu(\
(adev)->powerplay.pp_handle, msg_id))
+#define amdgpu_dpm_set_powergating_by_smu(adev, block_type, gate) \
+ ((adev)->powerplay.pp_funcs->set_powergating_by_smu(\
+ (adev)->powerplay.pp_handle, block_type, gate))
+
#define amdgpu_dpm_get_power_profile_mode(adev, buf) \
((adev)->powerplay.pp_funcs->get_power_profile_mode(\
(adev)->powerplay.pp_handle, buf))
@@ -359,9 +360,9 @@ enum amdgpu_pcie_gen {
((adev)->powerplay.pp_funcs->odn_edit_dpm_table(\
(adev)->powerplay.pp_handle, type, parameter, size))
-#define amdgpu_dpm_set_mmhub_powergating_by_smu(adev) \
- ((adev)->powerplay.pp_funcs->set_mmhub_powergating_by_smu( \
- (adev)->powerplay.pp_handle))
+#define amdgpu_dpm_enable_mgpu_fan_boost(adev) \
+ ((adev)->powerplay.pp_funcs->enable_mgpu_fan_boost(\
+ (adev)->powerplay.pp_handle))
struct amdgpu_dpm {
struct amdgpu_ps *ps;
@@ -402,7 +403,6 @@ struct amdgpu_dpm {
u32 tdp_adjustment;
u16 load_line_slope;
bool power_control;
- bool ac_power;
/* special states active */
bool thermal_active;
bool uvd_active;
@@ -439,6 +439,7 @@ struct amdgpu_pm {
struct amd_pp_display_configuration pm_display_cfg;/* set by dc */
uint32_t smu_prv_buffer_size;
struct amdgpu_bo *smu_prv_buffer;
+ bool ac_power;
};
#define R600_SSTU_DFLT 0
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
index b0bf2f24da48..28781414d71c 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
@@ -1,10 +1,3 @@
-/**
- * \file amdgpu_drv.c
- * AMD Amdgpu driver
- *
- * \author Gareth Hughes <[email protected]>
- */
-
/*
* Copyright 2000 VA Linux Systems, Inc., Sunnyvale, California.
* All Rights Reserved.
@@ -43,6 +36,7 @@
#include "amdgpu.h"
#include "amdgpu_irq.h"
+#include "amdgpu_gem.h"
#include "amdgpu_amdkfd.h"
@@ -76,9 +70,10 @@
* - 3.24.0 - Add high priority compute support for gfx9
* - 3.25.0 - Add support for sensor query info (stable pstate sclk/mclk).
* - 3.26.0 - GFX9: Process AMDGPU_IB_FLAG_TC_WB_NOT_INVALIDATE.
+ * - 3.27.0 - Add new chunk to to AMDGPU_CS to enable BO_LIST creation.
*/
#define KMS_DRIVER_MAJOR 3
-#define KMS_DRIVER_MINOR 26
+#define KMS_DRIVER_MINOR 27
#define KMS_DRIVER_PATCHLEVEL 0
int amdgpu_vram_limit = 0;
@@ -110,11 +105,8 @@ int amdgpu_vram_page_split = 512;
int amdgpu_vm_update_mode = -1;
int amdgpu_exp_hw_support = 0;
int amdgpu_dc = -1;
-int amdgpu_dc_log = 0;
int amdgpu_sched_jobs = 32;
int amdgpu_sched_hw_submission = 2;
-int amdgpu_no_evict = 0;
-int amdgpu_direct_gma_size = 0;
uint amdgpu_pcie_gen_cap = 0;
uint amdgpu_pcie_lane_cap = 0;
uint amdgpu_cg_mask = 0xffffffff;
@@ -122,7 +114,8 @@ uint amdgpu_pg_mask = 0xffffffff;
uint amdgpu_sdma_phase_quantum = 32;
char *amdgpu_disable_cu = NULL;
char *amdgpu_virtual_display = NULL;
-uint amdgpu_pp_feature_mask = 0xffff3fff; /* gfxoff (bit 15) disabled by default */
+/* OverDrive(bit 14) disabled by default*/
+uint amdgpu_pp_feature_mask = 0xffffbfff;
int amdgpu_ngg = 0;
int amdgpu_prim_buf_per_se = 0;
int amdgpu_pos_buf_per_se = 0;
@@ -134,164 +127,372 @@ int amdgpu_compute_multipipe = -1;
int amdgpu_gpu_recovery = -1; /* auto */
int amdgpu_emu_mode = 0;
uint amdgpu_smu_memory_pool_size = 0;
+struct amdgpu_mgpu_info mgpu_info = {
+ .mutex = __MUTEX_INITIALIZER(mgpu_info.mutex),
+};
+/**
+ * DOC: vramlimit (int)
+ * Restrict the total amount of VRAM in MiB for testing. The default is 0 (Use full VRAM).
+ */
MODULE_PARM_DESC(vramlimit, "Restrict VRAM for testing, in megabytes");
module_param_named(vramlimit, amdgpu_vram_limit, int, 0600);
+/**
+ * DOC: vis_vramlimit (int)
+ * Restrict the amount of CPU visible VRAM in MiB for testing. The default is 0 (Use full CPU visible VRAM).
+ */
MODULE_PARM_DESC(vis_vramlimit, "Restrict visible VRAM for testing, in megabytes");
module_param_named(vis_vramlimit, amdgpu_vis_vram_limit, int, 0444);
+/**
+ * DOC: gartsize (uint)
+ * Restrict the size of GART in Mib (32, 64, etc.) for testing. The default is -1 (The size depends on asic).
+ */
MODULE_PARM_DESC(gartsize, "Size of GART to setup in megabytes (32, 64, etc., -1=auto)");
module_param_named(gartsize, amdgpu_gart_size, uint, 0600);
+/**
+ * DOC: gttsize (int)
+ * Restrict the size of GTT domain in MiB for testing. The default is -1 (It's VRAM size if 3GB < VRAM < 3/4 RAM,
+ * otherwise 3/4 RAM size).
+ */
MODULE_PARM_DESC(gttsize, "Size of the GTT domain in megabytes (-1 = auto)");
module_param_named(gttsize, amdgpu_gtt_size, int, 0600);
+/**
+ * DOC: moverate (int)
+ * Set maximum buffer migration rate in MB/s. The default is -1 (8 MB/s).
+ */
MODULE_PARM_DESC(moverate, "Maximum buffer migration rate in MB/s. (32, 64, etc., -1=auto, 0=1=disabled)");
module_param_named(moverate, amdgpu_moverate, int, 0600);
+/**
+ * DOC: benchmark (int)
+ * Run benchmarks. The default is 0 (Skip benchmarks).
+ */
MODULE_PARM_DESC(benchmark, "Run benchmark");
module_param_named(benchmark, amdgpu_benchmarking, int, 0444);
+/**
+ * DOC: test (int)
+ * Test BO GTT->VRAM and VRAM->GTT GPU copies. The default is 0 (Skip test, only set 1 to run test).
+ */
MODULE_PARM_DESC(test, "Run tests");
module_param_named(test, amdgpu_testing, int, 0444);
+/**
+ * DOC: audio (int)
+ * Set HDMI/DPAudio. Only affects non-DC display handling. The default is -1 (Enabled), set 0 to disabled it.
+ */
MODULE_PARM_DESC(audio, "Audio enable (-1 = auto, 0 = disable, 1 = enable)");
module_param_named(audio, amdgpu_audio, int, 0444);
+/**
+ * DOC: disp_priority (int)
+ * Set display Priority (1 = normal, 2 = high). Only affects non-DC display handling. The default is 0 (auto).
+ */
MODULE_PARM_DESC(disp_priority, "Display Priority (0 = auto, 1 = normal, 2 = high)");
module_param_named(disp_priority, amdgpu_disp_priority, int, 0444);
+/**
+ * DOC: hw_i2c (int)
+ * To enable hw i2c engine. Only affects non-DC display handling. The default is 0 (Disabled).
+ */
MODULE_PARM_DESC(hw_i2c, "hw i2c engine enable (0 = disable)");
module_param_named(hw_i2c, amdgpu_hw_i2c, int, 0444);
+/**
+ * DOC: pcie_gen2 (int)
+ * To disable PCIE Gen2/3 mode (0 = disable, 1 = enable). The default is -1 (auto, enabled).
+ */
MODULE_PARM_DESC(pcie_gen2, "PCIE Gen2 mode (-1 = auto, 0 = disable, 1 = enable)");
module_param_named(pcie_gen2, amdgpu_pcie_gen2, int, 0444);
+/**
+ * DOC: msi (int)
+ * To disable Message Signaled Interrupts (MSI) functionality (1 = enable, 0 = disable). The default is -1 (auto, enabled).
+ */
MODULE_PARM_DESC(msi, "MSI support (1 = enable, 0 = disable, -1 = auto)");
module_param_named(msi, amdgpu_msi, int, 0444);
+/**
+ * DOC: lockup_timeout (int)
+ * Set GPU scheduler timeout value in ms. Value 0 is invalidated, will be adjusted to 10000.
+ * Negative values mean 'infinite timeout' (MAX_JIFFY_OFFSET). The default is 10000.
+ */
MODULE_PARM_DESC(lockup_timeout, "GPU lockup timeout in ms > 0 (default 10000)");
module_param_named(lockup_timeout, amdgpu_lockup_timeout, int, 0444);
+/**
+ * DOC: dpm (int)
+ * Override for dynamic power management setting (1 = enable, 0 = disable). The default is -1 (auto).
+ */
MODULE_PARM_DESC(dpm, "DPM support (1 = enable, 0 = disable, -1 = auto)");
module_param_named(dpm, amdgpu_dpm, int, 0444);
+/**
+ * DOC: fw_load_type (int)
+ * Set different firmware loading type for debugging (0 = direct, 1 = SMU, 2 = PSP). The default is -1 (auto).
+ */
MODULE_PARM_DESC(fw_load_type, "firmware loading type (0 = direct, 1 = SMU, 2 = PSP, -1 = auto)");
module_param_named(fw_load_type, amdgpu_fw_load_type, int, 0444);
+/**
+ * DOC: aspm (int)
+ * To disable ASPM (1 = enable, 0 = disable). The default is -1 (auto, enabled).
+ */
MODULE_PARM_DESC(aspm, "ASPM support (1 = enable, 0 = disable, -1 = auto)");
module_param_named(aspm, amdgpu_aspm, int, 0444);
+/**
+ * DOC: runpm (int)
+ * Override for runtime power management control for dGPUs in PX/HG laptops. The amdgpu driver can dynamically power down
+ * the dGPU on PX/HG laptops when it is idle. The default is -1 (auto enable). Setting the value to 0 disables this functionality.
+ */
MODULE_PARM_DESC(runpm, "PX runtime pm (1 = force enable, 0 = disable, -1 = PX only default)");
module_param_named(runpm, amdgpu_runtime_pm, int, 0444);
+/**
+ * DOC: ip_block_mask (uint)
+ * Override what IP blocks are enabled on the GPU. Each GPU is a collection of IP blocks (gfx, display, video, etc.).
+ * Use this parameter to disable specific blocks. Note that the IP blocks do not have a fixed index. Some asics may not have
+ * some IPs or may include multiple instances of an IP so the ordering various from asic to asic. See the driver output in
+ * the kernel log for the list of IPs on the asic. The default is 0xffffffff (enable all blocks on a device).
+ */
MODULE_PARM_DESC(ip_block_mask, "IP Block Mask (all blocks enabled (default))");
module_param_named(ip_block_mask, amdgpu_ip_block_mask, uint, 0444);
+/**
+ * DOC: bapm (int)
+ * Bidirectional Application Power Management (BAPM) used to dynamically share TDP between CPU and GPU. Set value 0 to disable it.
+ * The default -1 (auto, enabled)
+ */
MODULE_PARM_DESC(bapm, "BAPM support (1 = enable, 0 = disable, -1 = auto)");
module_param_named(bapm, amdgpu_bapm, int, 0444);
+/**
+ * DOC: deep_color (int)
+ * Set 1 to enable Deep Color support. Only affects non-DC display handling. The default is 0 (disabled).
+ */
MODULE_PARM_DESC(deep_color, "Deep Color support (1 = enable, 0 = disable (default))");
module_param_named(deep_color, amdgpu_deep_color, int, 0444);
+/**
+ * DOC: vm_size (int)
+ * Override the size of the GPU's per client virtual address space in GiB. The default is -1 (automatic for each asic).
+ */
MODULE_PARM_DESC(vm_size, "VM address space size in gigabytes (default 64GB)");
module_param_named(vm_size, amdgpu_vm_size, int, 0444);
+/**
+ * DOC: vm_fragment_size (int)
+ * Override VM fragment size in bits (4, 5, etc. 4 = 64K, 9 = 2M). The default is -1 (automatic for each asic).
+ */
MODULE_PARM_DESC(vm_fragment_size, "VM fragment size in bits (4, 5, etc. 4 = 64K (default), Max 9 = 2M)");
module_param_named(vm_fragment_size, amdgpu_vm_fragment_size, int, 0444);
+/**
+ * DOC: vm_block_size (int)
+ * Override VM page table size in bits (default depending on vm_size and hw setup). The default is -1 (automatic for each asic).
+ */
MODULE_PARM_DESC(vm_block_size, "VM page table size in bits (default depending on vm_size)");
module_param_named(vm_block_size, amdgpu_vm_block_size, int, 0444);
+/**
+ * DOC: vm_fault_stop (int)
+ * Stop on VM fault for debugging (0 = never, 1 = print first, 2 = always). The default is 0 (No stop).
+ */
MODULE_PARM_DESC(vm_fault_stop, "Stop on VM fault (0 = never (default), 1 = print first, 2 = always)");
module_param_named(vm_fault_stop, amdgpu_vm_fault_stop, int, 0444);
+/**
+ * DOC: vm_debug (int)
+ * Debug VM handling (0 = disabled, 1 = enabled). The default is 0 (Disabled).
+ */
MODULE_PARM_DESC(vm_debug, "Debug VM handling (0 = disabled (default), 1 = enabled)");
module_param_named(vm_debug, amdgpu_vm_debug, int, 0644);
+/**
+ * DOC: vm_update_mode (int)
+ * Override VM update mode. VM updated by using CPU (0 = never, 1 = Graphics only, 2 = Compute only, 3 = Both). The default
+ * is -1 (Only in large BAR(LB) systems Compute VM tables will be updated by CPU, otherwise 0, never).
+ */
MODULE_PARM_DESC(vm_update_mode, "VM update using CPU (0 = never (default except for large BAR(LB)), 1 = Graphics only, 2 = Compute only (default for LB), 3 = Both");
module_param_named(vm_update_mode, amdgpu_vm_update_mode, int, 0444);
+/**
+ * DOC: vram_page_split (int)
+ * Override the number of pages after we split VRAM allocations (default 512, -1 = disable). The default is 512.
+ */
MODULE_PARM_DESC(vram_page_split, "Number of pages after we split VRAM allocations (default 512, -1 = disable)");
module_param_named(vram_page_split, amdgpu_vram_page_split, int, 0444);
+/**
+ * DOC: exp_hw_support (int)
+ * Enable experimental hw support (1 = enable). The default is 0 (disabled).
+ */
MODULE_PARM_DESC(exp_hw_support, "experimental hw support (1 = enable, 0 = disable (default))");
module_param_named(exp_hw_support, amdgpu_exp_hw_support, int, 0444);
+/**
+ * DOC: dc (int)
+ * Disable/Enable Display Core driver for debugging (1 = enable, 0 = disable). The default is -1 (automatic for each asic).
+ */
MODULE_PARM_DESC(dc, "Display Core driver (1 = enable, 0 = disable, -1 = auto (default))");
module_param_named(dc, amdgpu_dc, int, 0444);
-MODULE_PARM_DESC(dc_log, "Display Core Log Level (0 = minimal (default), 1 = chatty");
-module_param_named(dc_log, amdgpu_dc_log, int, 0444);
-
+/**
+ * DOC: sched_jobs (int)
+ * Override the max number of jobs supported in the sw queue. The default is 32.
+ */
MODULE_PARM_DESC(sched_jobs, "the max number of jobs supported in the sw queue (default 32)");
module_param_named(sched_jobs, amdgpu_sched_jobs, int, 0444);
+/**
+ * DOC: sched_hw_submission (int)
+ * Override the max number of HW submissions. The default is 2.
+ */
MODULE_PARM_DESC(sched_hw_submission, "the max number of HW submissions (default 2)");
module_param_named(sched_hw_submission, amdgpu_sched_hw_submission, int, 0444);
+/**
+ * DOC: ppfeaturemask (uint)
+ * Override power features enabled. See enum PP_FEATURE_MASK in drivers/gpu/drm/amd/include/amd_shared.h.
+ * The default is the current set of stable power features.
+ */
MODULE_PARM_DESC(ppfeaturemask, "all power features enabled (default))");
module_param_named(ppfeaturemask, amdgpu_pp_feature_mask, uint, 0444);
-MODULE_PARM_DESC(no_evict, "Support pinning request from user space (1 = enable, 0 = disable (default))");
-module_param_named(no_evict, amdgpu_no_evict, int, 0444);
-
-MODULE_PARM_DESC(direct_gma_size, "Direct GMA size in megabytes (max 96MB)");
-module_param_named(direct_gma_size, amdgpu_direct_gma_size, int, 0444);
-
+/**
+ * DOC: pcie_gen_cap (uint)
+ * Override PCIE gen speed capabilities. See the CAIL flags in drivers/gpu/drm/amd/include/amd_pcie.h.
+ * The default is 0 (automatic for each asic).
+ */
MODULE_PARM_DESC(pcie_gen_cap, "PCIE Gen Caps (0: autodetect (default))");
module_param_named(pcie_gen_cap, amdgpu_pcie_gen_cap, uint, 0444);
+/**
+ * DOC: pcie_lane_cap (uint)
+ * Override PCIE lanes capabilities. See the CAIL flags in drivers/gpu/drm/amd/include/amd_pcie.h.
+ * The default is 0 (automatic for each asic).
+ */
MODULE_PARM_DESC(pcie_lane_cap, "PCIE Lane Caps (0: autodetect (default))");
module_param_named(pcie_lane_cap, amdgpu_pcie_lane_cap, uint, 0444);
+/**
+ * DOC: cg_mask (uint)
+ * Override Clockgating features enabled on GPU (0 = disable clock gating). See the AMD_CG_SUPPORT flags in
+ * drivers/gpu/drm/amd/include/amd_shared.h. The default is 0xffffffff (all enabled).
+ */
MODULE_PARM_DESC(cg_mask, "Clockgating flags mask (0 = disable clock gating)");
module_param_named(cg_mask, amdgpu_cg_mask, uint, 0444);
+/**
+ * DOC: pg_mask (uint)
+ * Override Powergating features enabled on GPU (0 = disable power gating). See the AMD_PG_SUPPORT flags in
+ * drivers/gpu/drm/amd/include/amd_shared.h. The default is 0xffffffff (all enabled).
+ */
MODULE_PARM_DESC(pg_mask, "Powergating flags mask (0 = disable power gating)");
module_param_named(pg_mask, amdgpu_pg_mask, uint, 0444);
+/**
+ * DOC: sdma_phase_quantum (uint)
+ * Override SDMA context switch phase quantum (x 1K GPU clock cycles, 0 = no change). The default is 32.
+ */
MODULE_PARM_DESC(sdma_phase_quantum, "SDMA context switch phase quantum (x 1K GPU clock cycles, 0 = no change (default 32))");
module_param_named(sdma_phase_quantum, amdgpu_sdma_phase_quantum, uint, 0444);
+/**
+ * DOC: disable_cu (charp)
+ * Set to disable CUs (It's set like se.sh.cu,...). The default is NULL.
+ */
MODULE_PARM_DESC(disable_cu, "Disable CUs (se.sh.cu,...)");
module_param_named(disable_cu, amdgpu_disable_cu, charp, 0444);
+/**
+ * DOC: virtual_display (charp)
+ * Set to enable virtual display feature. This feature provides a virtual display hardware on headless boards
+ * or in virtualized environments. It will be set like xxxx:xx:xx.x,x;xxxx:xx:xx.x,x. It's the pci address of
+ * the device, plus the number of crtcs to expose. E.g., 0000:26:00.0,4 would enable 4 virtual crtcs on the pci
+ * device at 26:00.0. The default is NULL.
+ */
MODULE_PARM_DESC(virtual_display,
"Enable virtual display feature (the virtual_display will be set like xxxx:xx:xx.x,x;xxxx:xx:xx.x,x)");
module_param_named(virtual_display, amdgpu_virtual_display, charp, 0444);
+/**
+ * DOC: ngg (int)
+ * Set to enable Next Generation Graphics (1 = enable). The default is 0 (disabled).
+ */
MODULE_PARM_DESC(ngg, "Next Generation Graphics (1 = enable, 0 = disable(default depending on gfx))");
module_param_named(ngg, amdgpu_ngg, int, 0444);
+/**
+ * DOC: prim_buf_per_se (int)
+ * Override the size of Primitive Buffer per Shader Engine in Byte. The default is 0 (depending on gfx).
+ */
MODULE_PARM_DESC(prim_buf_per_se, "the size of Primitive Buffer per Shader Engine (default depending on gfx)");
module_param_named(prim_buf_per_se, amdgpu_prim_buf_per_se, int, 0444);
+/**
+ * DOC: pos_buf_per_se (int)
+ * Override the size of Position Buffer per Shader Engine in Byte. The default is 0 (depending on gfx).
+ */
MODULE_PARM_DESC(pos_buf_per_se, "the size of Position Buffer per Shader Engine (default depending on gfx)");
module_param_named(pos_buf_per_se, amdgpu_pos_buf_per_se, int, 0444);
+/**
+ * DOC: cntl_sb_buf_per_se (int)
+ * Override the size of Control Sideband per Shader Engine in Byte. The default is 0 (depending on gfx).
+ */
MODULE_PARM_DESC(cntl_sb_buf_per_se, "the size of Control Sideband per Shader Engine (default depending on gfx)");
module_param_named(cntl_sb_buf_per_se, amdgpu_cntl_sb_buf_per_se, int, 0444);
+/**
+ * DOC: param_buf_per_se (int)
+ * Override the size of Off-Chip Pramater Cache per Shader Engine in Byte. The default is 0 (depending on gfx).
+ */
MODULE_PARM_DESC(param_buf_per_se, "the size of Off-Chip Pramater Cache per Shader Engine (default depending on gfx)");
module_param_named(param_buf_per_se, amdgpu_param_buf_per_se, int, 0444);
+/**
+ * DOC: job_hang_limit (int)
+ * Set how much time allow a job hang and not drop it. The default is 0.
+ */
MODULE_PARM_DESC(job_hang_limit, "how much time allow a job hang and not drop it (default 0)");
module_param_named(job_hang_limit, amdgpu_job_hang_limit, int ,0444);
+/**
+ * DOC: lbpw (int)
+ * Override Load Balancing Per Watt (LBPW) support (1 = enable, 0 = disable). The default is -1 (auto, enabled).
+ */
MODULE_PARM_DESC(lbpw, "Load Balancing Per Watt (LBPW) support (1 = enable, 0 = disable, -1 = auto)");
module_param_named(lbpw, amdgpu_lbpw, int, 0444);
MODULE_PARM_DESC(compute_multipipe, "Force compute queues to be spread across pipes (1 = enable, 0 = disable, -1 = auto)");
module_param_named(compute_multipipe, amdgpu_compute_multipipe, int, 0444);
+/**
+ * DOC: gpu_recovery (int)
+ * Set to enable GPU recovery mechanism (1 = enable, 0 = disable). The default is -1 (auto, disabled except SRIOV).
+ */
MODULE_PARM_DESC(gpu_recovery, "Enable GPU recovery mechanism, (1 = enable, 0 = disable, -1 = auto)");
module_param_named(gpu_recovery, amdgpu_gpu_recovery, int, 0444);
+/**
+ * DOC: emu_mode (int)
+ * Set value 1 to enable emulation mode. This is only needed when running on an emulator. The default is 0 (disabled).
+ */
MODULE_PARM_DESC(emu_mode, "Emulation mode, (1 = enable, 0 = disable)");
module_param_named(emu_mode, amdgpu_emu_mode, int, 0444);
+/**
+ * DOC: si_support (int)
+ * Set SI support driver. This parameter works after set config CONFIG_DRM_AMDGPU_SI. For SI asic, when radeon driver is enabled,
+ * set value 0 to use radeon driver, while set value 1 to use amdgpu driver. The default is using radeon driver when it available,
+ * otherwise using amdgpu driver.
+ */
#ifdef CONFIG_DRM_AMDGPU_SI
#if defined(CONFIG_DRM_RADEON) || defined(CONFIG_DRM_RADEON_MODULE)
@@ -305,6 +506,12 @@ MODULE_PARM_DESC(si_support, "SI support (1 = enabled (default), 0 = disabled)")
module_param_named(si_support, amdgpu_si_support, int, 0444);
#endif
+/**
+ * DOC: cik_support (int)
+ * Set CIK support driver. This parameter works after set config CONFIG_DRM_AMDGPU_CIK. For CIK asic, when radeon driver is enabled,
+ * set value 0 to use radeon driver, while set value 1 to use amdgpu driver. The default is using radeon driver when it available,
+ * otherwise using amdgpu driver.
+ */
#ifdef CONFIG_DRM_AMDGPU_CIK
#if defined(CONFIG_DRM_RADEON) || defined(CONFIG_DRM_RADEON_MODULE)
@@ -318,11 +525,112 @@ MODULE_PARM_DESC(cik_support, "CIK support (1 = enabled (default), 0 = disabled)
module_param_named(cik_support, amdgpu_cik_support, int, 0444);
#endif
+/**
+ * DOC: smu_memory_pool_size (uint)
+ * It is used to reserve gtt for smu debug usage, setting value 0 to disable it. The actual size is value * 256MiB.
+ * E.g. 0x1 = 256Mbyte, 0x2 = 512Mbyte, 0x4 = 1 Gbyte, 0x8 = 2GByte. The default is 0 (disabled).
+ */
MODULE_PARM_DESC(smu_memory_pool_size,
"reserve gtt for smu debug usage, 0 = disable,"
"0x1 = 256Mbyte, 0x2 = 512Mbyte, 0x4 = 1 Gbyte, 0x8 = 2GByte");
module_param_named(smu_memory_pool_size, amdgpu_smu_memory_pool_size, uint, 0444);
+#ifdef CONFIG_HSA_AMD
+/**
+ * DOC: sched_policy (int)
+ * Set scheduling policy. Default is HWS(hardware scheduling) with over-subscription.
+ * Setting 1 disables over-subscription. Setting 2 disables HWS and statically
+ * assigns queues to HQDs.
+ */
+int sched_policy = KFD_SCHED_POLICY_HWS;
+module_param(sched_policy, int, 0444);
+MODULE_PARM_DESC(sched_policy,
+ "Scheduling policy (0 = HWS (Default), 1 = HWS without over-subscription, 2 = Non-HWS (Used for debugging only)");
+
+/**
+ * DOC: hws_max_conc_proc (int)
+ * Maximum number of processes that HWS can schedule concurrently. The maximum is the
+ * number of VMIDs assigned to the HWS, which is also the default.
+ */
+int hws_max_conc_proc = 8;
+module_param(hws_max_conc_proc, int, 0444);
+MODULE_PARM_DESC(hws_max_conc_proc,
+ "Max # processes HWS can execute concurrently when sched_policy=0 (0 = no concurrency, #VMIDs for KFD = Maximum(default))");
+
+/**
+ * DOC: cwsr_enable (int)
+ * CWSR(compute wave store and resume) allows the GPU to preempt shader execution in
+ * the middle of a compute wave. Default is 1 to enable this feature. Setting 0
+ * disables it.
+ */
+int cwsr_enable = 1;
+module_param(cwsr_enable, int, 0444);
+MODULE_PARM_DESC(cwsr_enable, "CWSR enable (0 = Off, 1 = On (Default))");
+
+/**
+ * DOC: max_num_of_queues_per_device (int)
+ * Maximum number of queues per device. Valid setting is between 1 and 4096. Default
+ * is 4096.
+ */
+int max_num_of_queues_per_device = KFD_MAX_NUM_OF_QUEUES_PER_DEVICE_DEFAULT;
+module_param(max_num_of_queues_per_device, int, 0444);
+MODULE_PARM_DESC(max_num_of_queues_per_device,
+ "Maximum number of supported queues per device (1 = Minimum, 4096 = default)");
+
+/**
+ * DOC: send_sigterm (int)
+ * Send sigterm to HSA process on unhandled exceptions. Default is not to send sigterm
+ * but just print errors on dmesg. Setting 1 enables sending sigterm.
+ */
+int send_sigterm;
+module_param(send_sigterm, int, 0444);
+MODULE_PARM_DESC(send_sigterm,
+ "Send sigterm to HSA process on unhandled exception (0 = disable, 1 = enable)");
+
+/**
+ * DOC: debug_largebar (int)
+ * Set debug_largebar as 1 to enable simulating large-bar capability on non-large bar
+ * system. This limits the VRAM size reported to ROCm applications to the visible
+ * size, usually 256MB.
+ * Default value is 0, diabled.
+ */
+int debug_largebar;
+module_param(debug_largebar, int, 0444);
+MODULE_PARM_DESC(debug_largebar,
+ "Debug large-bar flag used to simulate large-bar capability on non-large bar machine (0 = disable, 1 = enable)");
+
+/**
+ * DOC: ignore_crat (int)
+ * Ignore CRAT table during KFD initialization. By default, KFD uses the ACPI CRAT
+ * table to get information about AMD APUs. This option can serve as a workaround on
+ * systems with a broken CRAT table.
+ */
+int ignore_crat;
+module_param(ignore_crat, int, 0444);
+MODULE_PARM_DESC(ignore_crat,
+ "Ignore CRAT table during KFD initialization (0 = use CRAT (default), 1 = ignore CRAT)");
+
+/**
+ * DOC: noretry (int)
+ * This parameter sets sh_mem_config.retry_disable. Default value, 0, enables retry.
+ * Setting 1 disables retry.
+ * Retry is needed for recoverable page faults.
+ */
+int noretry;
+module_param(noretry, int, 0644);
+MODULE_PARM_DESC(noretry,
+ "Set sh_mem_config.retry_disable on Vega10 (0 = retry enabled (default), 1 = retry disabled)");
+
+/**
+ * DOC: halt_if_hws_hang (int)
+ * Halt if HWS hang is detected. Default value, 0, disables the halt on hang.
+ * Setting 1 enables halt on hang.
+ */
+int halt_if_hws_hang;
+module_param(halt_if_hws_hang, int, 0644);
+MODULE_PARM_DESC(halt_if_hws_hang, "Halt if HWS hang is detected (0 = off (default), 1 = on)");
+#endif
+
static const struct pci_device_id pciidlist[] = {
#ifdef CONFIG_DRM_AMDGPU_SI
{0x1002, 0x6780, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_TAHITI},
@@ -532,6 +840,7 @@ static const struct pci_device_id pciidlist[] = {
{0x1002, 0x67CA, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_POLARIS10},
{0x1002, 0x67CC, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_POLARIS10},
{0x1002, 0x67CF, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_POLARIS10},
+ {0x1002, 0x6FDF, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_POLARIS10},
/* Polaris12 */
{0x1002, 0x6980, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_POLARIS12},
{0x1002, 0x6981, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_POLARIS12},
@@ -561,14 +870,15 @@ static const struct pci_device_id pciidlist[] = {
{0x1002, 0x69A3, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VEGA12},
{0x1002, 0x69AF, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VEGA12},
/* Vega 20 */
- {0x1002, 0x66A0, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VEGA20|AMD_EXP_HW_SUPPORT},
- {0x1002, 0x66A1, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VEGA20|AMD_EXP_HW_SUPPORT},
- {0x1002, 0x66A2, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VEGA20|AMD_EXP_HW_SUPPORT},
- {0x1002, 0x66A3, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VEGA20|AMD_EXP_HW_SUPPORT},
- {0x1002, 0x66A7, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VEGA20|AMD_EXP_HW_SUPPORT},
- {0x1002, 0x66AF, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VEGA20|AMD_EXP_HW_SUPPORT},
+ {0x1002, 0x66A0, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VEGA20},
+ {0x1002, 0x66A1, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VEGA20},
+ {0x1002, 0x66A2, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VEGA20},
+ {0x1002, 0x66A3, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VEGA20},
+ {0x1002, 0x66A7, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VEGA20},
+ {0x1002, 0x66AF, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VEGA20},
/* Raven */
{0x1002, 0x15dd, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_RAVEN|AMD_IS_APU},
+ {0x1002, 0x15d8, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_RAVEN|AMD_IS_APU},
{0, 0, 0}
};
@@ -577,28 +887,6 @@ MODULE_DEVICE_TABLE(pci, pciidlist);
static struct drm_driver kms_driver;
-static int amdgpu_kick_out_firmware_fb(struct pci_dev *pdev)
-{
- struct apertures_struct *ap;
- bool primary = false;
-
- ap = alloc_apertures(1);
- if (!ap)
- return -ENOMEM;
-
- ap->ranges[0].base = pci_resource_start(pdev, 0);
- ap->ranges[0].size = pci_resource_len(pdev, 0);
-
-#ifdef CONFIG_X86
- primary = pdev->resource[PCI_ROM_RESOURCE].flags & IORESOURCE_ROM_SHADOW;
-#endif
- drm_fb_helper_remove_conflicting_framebuffers(ap, "amdgpudrmfb", primary);
- kfree(ap);
-
- return 0;
-}
-
-
static int amdgpu_pci_probe(struct pci_dev *pdev,
const struct pci_device_id *ent)
{
@@ -617,30 +905,18 @@ static int amdgpu_pci_probe(struct pci_dev *pdev,
return -ENODEV;
}
- /*
- * Initialize amdkfd before starting radeon. If it was not loaded yet,
- * defer radeon probing
- */
- ret = amdgpu_amdkfd_init();
- if (ret == -EPROBE_DEFER)
- return ret;
-
/* Get rid of things like offb */
- ret = amdgpu_kick_out_firmware_fb(pdev);
+ ret = drm_fb_helper_remove_conflicting_pci_framebuffers(pdev, 0, "amdgpudrmfb");
if (ret)
return ret;
- /* warn the user if they mix atomic and non-atomic capable GPUs */
- if ((kms_driver.driver_features & DRIVER_ATOMIC) && !supports_atomic)
- DRM_ERROR("Mixing atomic and non-atomic capable GPUs!\n");
- /* support atomic early so the atomic debugfs stuff gets created */
- if (supports_atomic)
- kms_driver.driver_features |= DRIVER_ATOMIC;
-
dev = drm_dev_alloc(&kms_driver, &pdev->dev);
if (IS_ERR(dev))
return PTR_ERR(dev);
+ if (!supports_atomic)
+ dev->driver_features &= ~DRIVER_ATOMIC;
+
ret = pci_enable_device(pdev);
if (ret)
goto err_free;
@@ -664,7 +940,7 @@ retry_init:
err_pci:
pci_disable_device(pdev);
err_free:
- drm_dev_unref(dev);
+ drm_dev_put(dev);
return ret;
}
@@ -673,8 +949,8 @@ amdgpu_pci_remove(struct pci_dev *pdev)
{
struct drm_device *dev = pci_get_drvdata(pdev);
- drm_dev_unregister(dev);
- drm_dev_unref(dev);
+ DRM_ERROR("Device removal is currently not supported outside of fbcon\n");
+ drm_dev_unplug(dev);
pci_disable_device(pdev);
pci_set_drvdata(pdev, NULL);
}
@@ -855,9 +1131,21 @@ static const struct dev_pm_ops amdgpu_pm_ops = {
.runtime_idle = amdgpu_pmops_runtime_idle,
};
+static int amdgpu_flush(struct file *f, fl_owner_t id)
+{
+ struct drm_file *file_priv = f->private_data;
+ struct amdgpu_fpriv *fpriv = file_priv->driver_priv;
+
+ amdgpu_ctx_mgr_entity_flush(&fpriv->ctx_mgr);
+
+ return 0;
+}
+
+
static const struct file_operations amdgpu_driver_kms_fops = {
.owner = THIS_MODULE,
.open = drm_open,
+ .flush = amdgpu_flush,
.release = drm_release,
.unlocked_ioctl = amdgpu_drm_ioctl,
.mmap = amdgpu_mmap,
@@ -880,7 +1168,7 @@ amdgpu_get_crtc_scanout_position(struct drm_device *dev, unsigned int pipe,
static struct drm_driver kms_driver = {
.driver_features =
- DRIVER_USE_AGP |
+ DRIVER_USE_AGP | DRIVER_ATOMIC |
DRIVER_HAVE_IRQ | DRIVER_IRQ_SHARED | DRIVER_GEM |
DRIVER_PRIME | DRIVER_RENDER | DRIVER_MODESET | DRIVER_SYNCOBJ,
.load = amdgpu_driver_load_kms,
@@ -957,6 +1245,10 @@ static int __init amdgpu_init(void)
pdriver = &amdgpu_kms_pci_driver;
driver->num_ioctls = amdgpu_max_kms_ioctl;
amdgpu_register_atpx_handler();
+
+ /* Ignore KFD init failures. Normal when CONFIG_HSA_AMD is not set. */
+ amdgpu_amdkfd_init();
+
/* let modprobe override vga console setting */
return pci_register_driver(pdriver);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_encoders.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_encoders.c
index 94138abe093b..ec78e2b2015c 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_encoders.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_encoders.c
@@ -28,6 +28,7 @@
#include <drm/amdgpu_drm.h>
#include "amdgpu.h"
#include "amdgpu_connectors.h"
+#include "amdgpu_display.h"
#include "atom.h"
#include "atombios_encoders.h"
@@ -46,7 +47,7 @@ amdgpu_link_encoder_connector(struct drm_device *dev)
list_for_each_entry(encoder, &dev->mode_config.encoder_list, head) {
amdgpu_encoder = to_amdgpu_encoder(encoder);
if (amdgpu_encoder->devices & amdgpu_connector->devices) {
- drm_mode_connector_attach_encoder(connector, encoder);
+ drm_connector_attach_encoder(connector, encoder);
if (amdgpu_encoder->devices & (ATOM_DEVICE_LCD_SUPPORT)) {
amdgpu_atombios_encoder_init_backlight(amdgpu_encoder, connector);
adev->mode_info.bl_encoder = amdgpu_encoder;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c
index bc5fd8ebab5d..5cbde74b97dd 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c
@@ -33,6 +33,7 @@
#include <drm/amdgpu_drm.h>
#include "amdgpu.h"
#include "cikd.h"
+#include "amdgpu_gem.h"
#include <drm/drm_fb_helper.h>
@@ -146,7 +147,7 @@ static int amdgpufb_create_pinned_object(struct amdgpu_fbdev *rfbdev,
AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED |
AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS |
AMDGPU_GEM_CREATE_VRAM_CLEARED,
- true, NULL, &gobj);
+ ttm_bo_type_kernel, NULL, &gobj);
if (ret) {
pr_err("failed to allocate framebuffer (%d)\n", aligned_size);
return -ENOMEM;
@@ -168,11 +169,19 @@ static int amdgpufb_create_pinned_object(struct amdgpu_fbdev *rfbdev,
}
- ret = amdgpu_bo_pin(abo, domain, NULL);
+ ret = amdgpu_bo_pin(abo, domain);
if (ret) {
amdgpu_bo_unreserve(abo);
goto out_unref;
}
+
+ ret = amdgpu_ttm_alloc_gart(&abo->tbo);
+ if (ret) {
+ amdgpu_bo_unreserve(abo);
+ dev_err(adev->dev, "%p bind failed\n", abo);
+ goto out_unref;
+ }
+
ret = amdgpu_bo_kmap(abo, NULL);
amdgpu_bo_unreserve(abo);
if (ret) {
@@ -365,8 +374,8 @@ void amdgpu_fbdev_fini(struct amdgpu_device *adev)
void amdgpu_fbdev_set_suspend(struct amdgpu_device *adev, int state)
{
if (adev->mode_info.rfbdev)
- drm_fb_helper_set_suspend(&adev->mode_info.rfbdev->helper,
- state);
+ drm_fb_helper_set_suspend_unlocked(&adev->mode_info.rfbdev->helper,
+ state);
}
int amdgpu_fbdev_total_size(struct amdgpu_device *adev)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
index e74d620d9699..5448cf27654e 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
@@ -216,8 +216,10 @@ static void amdgpu_fence_schedule_fallback(struct amdgpu_ring *ring)
* Checks the current fence value and calculates the last
* signalled fence value. Wakes the fence queue if the
* sequence number has increased.
+ *
+ * Returns true if fence was processed
*/
-void amdgpu_fence_process(struct amdgpu_ring *ring)
+bool amdgpu_fence_process(struct amdgpu_ring *ring)
{
struct amdgpu_fence_driver *drv = &ring->fence_drv;
uint32_t seq, last_seq;
@@ -229,11 +231,12 @@ void amdgpu_fence_process(struct amdgpu_ring *ring)
} while (atomic_cmpxchg(&drv->last_seq, last_seq, seq) != last_seq);
- if (seq != ring->fence_drv.sync_seq)
+ if (del_timer(&ring->fence_drv.fallback_timer) &&
+ seq != ring->fence_drv.sync_seq)
amdgpu_fence_schedule_fallback(ring);
if (unlikely(seq == last_seq))
- return;
+ return false;
last_seq &= drv->num_fences_mask;
seq &= drv->num_fences_mask;
@@ -260,6 +263,8 @@ void amdgpu_fence_process(struct amdgpu_ring *ring)
dma_fence_put(fence);
} while (last_seq != seq);
+
+ return true;
}
/**
@@ -274,7 +279,8 @@ static void amdgpu_fence_fallback(struct timer_list *t)
struct amdgpu_ring *ring = from_timer(ring, t,
fence_drv.fallback_timer);
- amdgpu_fence_process(ring);
+ if (amdgpu_fence_process(ring))
+ DRM_WARN("Fence fallback timer expired on ring %s\n", ring->name);
}
/**
@@ -646,7 +652,6 @@ static const struct dma_fence_ops amdgpu_fence_ops = {
.get_driver_name = amdgpu_fence_get_driver_name,
.get_timeline_name = amdgpu_fence_get_timeline_name,
.enable_signaling = amdgpu_fence_enable_signaling,
- .wait = dma_fence_default_wait,
.release = amdgpu_fence_release,
};
@@ -702,7 +707,7 @@ static int amdgpu_debugfs_gpu_recover(struct seq_file *m, void *data)
struct amdgpu_device *adev = dev->dev_private;
seq_printf(m, "gpu recover\n");
- amdgpu_device_gpu_recover(adev, NULL, true);
+ amdgpu_device_gpu_recover(adev, NULL);
return 0;
}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c
index dd11b7313ca0..11fea28f8ad3 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c
@@ -112,7 +112,7 @@ int amdgpu_gart_table_vram_alloc(struct amdgpu_device *adev)
{
int r;
- if (adev->gart.robj == NULL) {
+ if (adev->gart.bo == NULL) {
struct amdgpu_bo_param bp;
memset(&bp, 0, sizeof(bp));
@@ -123,7 +123,7 @@ int amdgpu_gart_table_vram_alloc(struct amdgpu_device *adev)
AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS;
bp.type = ttm_bo_type_kernel;
bp.resv = NULL;
- r = amdgpu_bo_create(adev, &bp, &adev->gart.robj);
+ r = amdgpu_bo_create(adev, &bp, &adev->gart.bo);
if (r) {
return r;
}
@@ -143,23 +143,20 @@ int amdgpu_gart_table_vram_alloc(struct amdgpu_device *adev)
*/
int amdgpu_gart_table_vram_pin(struct amdgpu_device *adev)
{
- uint64_t gpu_addr;
int r;
- r = amdgpu_bo_reserve(adev->gart.robj, false);
+ r = amdgpu_bo_reserve(adev->gart.bo, false);
if (unlikely(r != 0))
return r;
- r = amdgpu_bo_pin(adev->gart.robj,
- AMDGPU_GEM_DOMAIN_VRAM, &gpu_addr);
+ r = amdgpu_bo_pin(adev->gart.bo, AMDGPU_GEM_DOMAIN_VRAM);
if (r) {
- amdgpu_bo_unreserve(adev->gart.robj);
+ amdgpu_bo_unreserve(adev->gart.bo);
return r;
}
- r = amdgpu_bo_kmap(adev->gart.robj, &adev->gart.ptr);
+ r = amdgpu_bo_kmap(adev->gart.bo, &adev->gart.ptr);
if (r)
- amdgpu_bo_unpin(adev->gart.robj);
- amdgpu_bo_unreserve(adev->gart.robj);
- adev->gart.table_addr = gpu_addr;
+ amdgpu_bo_unpin(adev->gart.bo);
+ amdgpu_bo_unreserve(adev->gart.bo);
return r;
}
@@ -175,14 +172,14 @@ void amdgpu_gart_table_vram_unpin(struct amdgpu_device *adev)
{
int r;
- if (adev->gart.robj == NULL) {
+ if (adev->gart.bo == NULL) {
return;
}
- r = amdgpu_bo_reserve(adev->gart.robj, true);
+ r = amdgpu_bo_reserve(adev->gart.bo, true);
if (likely(r == 0)) {
- amdgpu_bo_kunmap(adev->gart.robj);
- amdgpu_bo_unpin(adev->gart.robj);
- amdgpu_bo_unreserve(adev->gart.robj);
+ amdgpu_bo_kunmap(adev->gart.bo);
+ amdgpu_bo_unpin(adev->gart.bo);
+ amdgpu_bo_unreserve(adev->gart.bo);
adev->gart.ptr = NULL;
}
}
@@ -198,10 +195,10 @@ void amdgpu_gart_table_vram_unpin(struct amdgpu_device *adev)
*/
void amdgpu_gart_table_vram_free(struct amdgpu_device *adev)
{
- if (adev->gart.robj == NULL) {
+ if (adev->gart.bo == NULL) {
return;
}
- amdgpu_bo_unref(&adev->gart.robj);
+ amdgpu_bo_unref(&adev->gart.bo);
}
/*
@@ -234,7 +231,7 @@ int amdgpu_gart_unbind(struct amdgpu_device *adev, uint64_t offset,
}
t = offset / AMDGPU_GPU_PAGE_SIZE;
- p = t / (PAGE_SIZE / AMDGPU_GPU_PAGE_SIZE);
+ p = t / AMDGPU_GPU_PAGES_IN_CPU_PAGE;
for (i = 0; i < pages; i++, p++) {
#ifdef CONFIG_DRM_AMDGPU_GART_DEBUGFS
adev->gart.pages[p] = NULL;
@@ -243,7 +240,7 @@ int amdgpu_gart_unbind(struct amdgpu_device *adev, uint64_t offset,
if (!adev->gart.ptr)
continue;
- for (j = 0; j < (PAGE_SIZE / AMDGPU_GPU_PAGE_SIZE); j++, t++) {
+ for (j = 0; j < AMDGPU_GPU_PAGES_IN_CPU_PAGE; j++, t++) {
amdgpu_gmc_set_pte_pde(adev, adev->gart.ptr,
t, page_base, flags);
page_base += AMDGPU_GPU_PAGE_SIZE;
@@ -282,7 +279,7 @@ int amdgpu_gart_map(struct amdgpu_device *adev, uint64_t offset,
for (i = 0; i < pages; i++) {
page_base = dma_addr[i];
- for (j = 0; j < (PAGE_SIZE / AMDGPU_GPU_PAGE_SIZE); j++, t++) {
+ for (j = 0; j < AMDGPU_GPU_PAGES_IN_CPU_PAGE; j++, t++) {
amdgpu_gmc_set_pte_pde(adev, dst, t, page_base, flags);
page_base += AMDGPU_GPU_PAGE_SIZE;
}
@@ -319,7 +316,7 @@ int amdgpu_gart_bind(struct amdgpu_device *adev, uint64_t offset,
#ifdef CONFIG_DRM_AMDGPU_GART_DEBUGFS
t = offset / AMDGPU_GPU_PAGE_SIZE;
- p = t / (PAGE_SIZE / AMDGPU_GPU_PAGE_SIZE);
+ p = t / AMDGPU_GPU_PAGES_IN_CPU_PAGE;
for (i = 0; i < pages; i++, p++)
adev->gart.pages[p] = pagelist ? pagelist[i] : NULL;
#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.h
index 456295c00291..9ff62887e4e3 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.h
@@ -37,9 +37,10 @@ struct amdgpu_bo;
#define AMDGPU_GPU_PAGE_SHIFT 12
#define AMDGPU_GPU_PAGE_ALIGN(a) (((a) + AMDGPU_GPU_PAGE_MASK) & ~AMDGPU_GPU_PAGE_MASK)
+#define AMDGPU_GPU_PAGES_IN_CPU_PAGE (PAGE_SIZE / AMDGPU_GPU_PAGE_SIZE)
+
struct amdgpu_gart {
- u64 table_addr;
- struct amdgpu_bo *robj;
+ struct amdgpu_bo *bo;
void *ptr;
unsigned num_gpu_pages;
unsigned num_cpu_pages;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gds.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gds.h
index e73728d90388..ecbcefe49a98 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gds.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gds.h
@@ -24,13 +24,6 @@
#ifndef __AMDGPU_GDS_H__
#define __AMDGPU_GDS_H__
-/* Because TTM request that alloacted buffer should be PAGE_SIZE aligned,
- * we should report GDS/GWS/OA size as PAGE_SIZE aligned
- * */
-#define AMDGPU_GDS_SHIFT 2
-#define AMDGPU_GWS_SHIFT PAGE_SHIFT
-#define AMDGPU_OA_SHIFT PAGE_SHIFT
-
struct amdgpu_ring;
struct amdgpu_bo;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
index 5fb156a01774..7b3d1ebda9df 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
@@ -244,16 +244,10 @@ int amdgpu_gem_create_ioctl(struct drm_device *dev, void *data,
return -EINVAL;
}
flags |= AMDGPU_GEM_CREATE_NO_CPU_ACCESS;
- if (args->in.domains == AMDGPU_GEM_DOMAIN_GDS)
- size = size << AMDGPU_GDS_SHIFT;
- else if (args->in.domains == AMDGPU_GEM_DOMAIN_GWS)
- size = size << AMDGPU_GWS_SHIFT;
- else if (args->in.domains == AMDGPU_GEM_DOMAIN_OA)
- size = size << AMDGPU_OA_SHIFT;
- else
- return -EINVAL;
+ /* GDS allocations must be DW aligned */
+ if (args->in.domains & AMDGPU_GEM_DOMAIN_GDS)
+ size = ALIGN(size, 4);
}
- size = roundup(size, PAGE_SIZE);
if (flags & AMDGPU_GEM_CREATE_VM_ALWAYS_VALID) {
r = amdgpu_bo_reserve(vm->root.base.bo, false);
@@ -265,7 +259,7 @@ int amdgpu_gem_create_ioctl(struct drm_device *dev, void *data,
r = amdgpu_gem_object_create(adev, size, args->in.alignment,
(u32)(0xffffffff & args->in.domains),
- flags, false, resv, &gobj);
+ flags, ttm_bo_type_device, resv, &gobj);
if (flags & AMDGPU_GEM_CREATE_VM_ALWAYS_VALID) {
if (!r) {
struct amdgpu_bo *abo = gem_to_amdgpu_bo(gobj);
@@ -317,7 +311,7 @@ int amdgpu_gem_userptr_ioctl(struct drm_device *dev, void *data,
/* create a gem object to contain this object in */
r = amdgpu_gem_object_create(adev, args->size, 0, AMDGPU_GEM_DOMAIN_CPU,
- 0, 0, NULL, &gobj);
+ 0, ttm_bo_type_device, NULL, &gobj);
if (r)
return r;
@@ -344,7 +338,7 @@ int amdgpu_gem_userptr_ioctl(struct drm_device *dev, void *data,
if (r)
goto free_pages;
- amdgpu_ttm_placement_from_domain(bo, AMDGPU_GEM_DOMAIN_GTT);
+ amdgpu_bo_placement_from_domain(bo, AMDGPU_GEM_DOMAIN_GTT);
r = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
amdgpu_bo_unreserve(bo);
if (r)
@@ -510,7 +504,6 @@ out:
* @adev: amdgpu_device pointer
* @vm: vm to update
* @bo_va: bo_va to update
- * @list: validation list
* @operation: map, unmap or clear
*
* Update the bo_va directly after setting its address. Errors are not
@@ -519,7 +512,6 @@ out:
static void amdgpu_gem_va_update_vm(struct amdgpu_device *adev,
struct amdgpu_vm *vm,
struct amdgpu_bo_va *bo_va,
- struct list_head *list,
uint32_t operation)
{
int r;
@@ -574,16 +566,16 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev, void *data,
return -EINVAL;
}
- if (args->va_address >= AMDGPU_VA_HOLE_START &&
- args->va_address < AMDGPU_VA_HOLE_END) {
+ if (args->va_address >= AMDGPU_GMC_HOLE_START &&
+ args->va_address < AMDGPU_GMC_HOLE_END) {
dev_dbg(&dev->pdev->dev,
"va_address 0x%LX is in VA hole 0x%LX-0x%LX\n",
- args->va_address, AMDGPU_VA_HOLE_START,
- AMDGPU_VA_HOLE_END);
+ args->va_address, AMDGPU_GMC_HOLE_START,
+ AMDGPU_GMC_HOLE_END);
return -EINVAL;
}
- args->va_address &= AMDGPU_VA_HOLE_MASK;
+ args->va_address &= AMDGPU_GMC_HOLE_MASK;
if ((args->flags & ~valid_flags) && (args->flags & ~prt_flags)) {
dev_dbg(&dev->pdev->dev, "invalid flags combination 0x%08X\n",
@@ -612,7 +604,7 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev, void *data,
return -ENOENT;
abo = gem_to_amdgpu_bo(gobj);
tv.bo = &abo->tbo;
- tv.shared = false;
+ tv.shared = !!(abo->flags & AMDGPU_GEM_CREATE_VM_ALWAYS_VALID);
list_add(&tv.head, &list);
} else {
gobj = NULL;
@@ -673,7 +665,7 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev, void *data,
break;
}
if (!r && !(args->flags & AMDGPU_VM_DELAY_UPDATE) && !amdgpu_vm_debug)
- amdgpu_gem_va_update_vm(adev, &fpriv->vm, bo_va, &list,
+ amdgpu_gem_va_update_vm(adev, &fpriv->vm, bo_va,
args->operation);
error_backoff:
@@ -768,7 +760,7 @@ int amdgpu_mode_dumb_create(struct drm_file *file_priv,
amdgpu_display_supported_domains(adev));
r = amdgpu_gem_object_create(adev, args->size, 0, domain,
AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED,
- false, NULL, &gobj);
+ ttm_bo_type_device, NULL, &gobj);
if (r)
return -ENOMEM;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.h
new file mode 100644
index 000000000000..d63daba9b17c
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.h
@@ -0,0 +1,92 @@
+/*
+ * Copyright 2018 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+#ifndef __AMDGPU_GEM_H__
+#define __AMDGPU_GEM_H__
+
+#include <drm/amdgpu_drm.h>
+#include <drm/drm_gem.h>
+
+/*
+ * GEM.
+ */
+
+#define AMDGPU_GEM_DOMAIN_MAX 0x3
+#define gem_to_amdgpu_bo(gobj) container_of((gobj), struct amdgpu_bo, gem_base)
+
+void amdgpu_gem_object_free(struct drm_gem_object *obj);
+int amdgpu_gem_object_open(struct drm_gem_object *obj,
+ struct drm_file *file_priv);
+void amdgpu_gem_object_close(struct drm_gem_object *obj,
+ struct drm_file *file_priv);
+unsigned long amdgpu_gem_timeout(uint64_t timeout_ns);
+struct sg_table *amdgpu_gem_prime_get_sg_table(struct drm_gem_object *obj);
+struct drm_gem_object *
+amdgpu_gem_prime_import_sg_table(struct drm_device *dev,
+ struct dma_buf_attachment *attach,
+ struct sg_table *sg);
+struct dma_buf *amdgpu_gem_prime_export(struct drm_device *dev,
+ struct drm_gem_object *gobj,
+ int flags);
+struct drm_gem_object *amdgpu_gem_prime_import(struct drm_device *dev,
+ struct dma_buf *dma_buf);
+struct reservation_object *amdgpu_gem_prime_res_obj(struct drm_gem_object *);
+void *amdgpu_gem_prime_vmap(struct drm_gem_object *obj);
+void amdgpu_gem_prime_vunmap(struct drm_gem_object *obj, void *vaddr);
+int amdgpu_gem_prime_mmap(struct drm_gem_object *obj, struct vm_area_struct *vma);
+
+/*
+ * GEM objects.
+ */
+void amdgpu_gem_force_release(struct amdgpu_device *adev);
+int amdgpu_gem_object_create(struct amdgpu_device *adev, unsigned long size,
+ int alignment, u32 initial_domain,
+ u64 flags, enum ttm_bo_type type,
+ struct reservation_object *resv,
+ struct drm_gem_object **obj);
+
+int amdgpu_mode_dumb_create(struct drm_file *file_priv,
+ struct drm_device *dev,
+ struct drm_mode_create_dumb *args);
+int amdgpu_mode_dumb_mmap(struct drm_file *filp,
+ struct drm_device *dev,
+ uint32_t handle, uint64_t *offset_p);
+
+int amdgpu_gem_create_ioctl(struct drm_device *dev, void *data,
+ struct drm_file *filp);
+int amdgpu_gem_info_ioctl(struct drm_device *dev, void *data,
+ struct drm_file *filp);
+int amdgpu_gem_userptr_ioctl(struct drm_device *dev, void *data,
+ struct drm_file *filp);
+int amdgpu_gem_mmap_ioctl(struct drm_device *dev, void *data,
+ struct drm_file *filp);
+int amdgpu_gem_wait_idle_ioctl(struct drm_device *dev, void *data,
+ struct drm_file *filp);
+int amdgpu_gem_va_ioctl(struct drm_device *dev, void *data,
+ struct drm_file *filp);
+int amdgpu_gem_op_ioctl(struct drm_device *dev, void *data,
+ struct drm_file *filp);
+
+int amdgpu_gem_metadata_ioctl(struct drm_device *dev, void *data,
+ struct drm_file *filp);
+
+#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
index 239bf2a4b3c6..790fd5408ddf 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
@@ -26,9 +26,44 @@
#include "amdgpu.h"
#include "amdgpu_gfx.h"
+/* delay 0.1 second to enable gfx off feature */
+#define GFX_OFF_DELAY_ENABLE msecs_to_jiffies(100)
+
/*
- * GPU scratch registers helpers function.
+ * GPU GFX IP block helpers function.
*/
+
+int amdgpu_gfx_queue_to_bit(struct amdgpu_device *adev, int mec,
+ int pipe, int queue)
+{
+ int bit = 0;
+
+ bit += mec * adev->gfx.mec.num_pipe_per_mec
+ * adev->gfx.mec.num_queue_per_pipe;
+ bit += pipe * adev->gfx.mec.num_queue_per_pipe;
+ bit += queue;
+
+ return bit;
+}
+
+void amdgpu_gfx_bit_to_queue(struct amdgpu_device *adev, int bit,
+ int *mec, int *pipe, int *queue)
+{
+ *queue = bit % adev->gfx.mec.num_queue_per_pipe;
+ *pipe = (bit / adev->gfx.mec.num_queue_per_pipe)
+ % adev->gfx.mec.num_pipe_per_mec;
+ *mec = (bit / adev->gfx.mec.num_queue_per_pipe)
+ / adev->gfx.mec.num_pipe_per_mec;
+
+}
+
+bool amdgpu_gfx_is_mec_queue_enabled(struct amdgpu_device *adev,
+ int mec, int pipe, int queue)
+{
+ return test_bit(amdgpu_gfx_queue_to_bit(adev, mec, pipe, queue),
+ adev->gfx.mec.queue_bitmap);
+}
+
/**
* amdgpu_gfx_scratch_get - Allocate a scratch register
*
@@ -340,3 +375,40 @@ void amdgpu_gfx_compute_mqd_sw_fini(struct amdgpu_device *adev)
&ring->mqd_gpu_addr,
&ring->mqd_ptr);
}
+
+/* amdgpu_gfx_off_ctrl - Handle gfx off feature enable/disable
+ *
+ * @adev: amdgpu_device pointer
+ * @bool enable true: enable gfx off feature, false: disable gfx off feature
+ *
+ * 1. gfx off feature will be enabled by gfx ip after gfx cg gp enabled.
+ * 2. other client can send request to disable gfx off feature, the request should be honored.
+ * 3. other client can cancel their request of disable gfx off feature
+ * 4. other client should not send request to enable gfx off feature before disable gfx off feature.
+ */
+
+void amdgpu_gfx_off_ctrl(struct amdgpu_device *adev, bool enable)
+{
+ if (!(adev->powerplay.pp_feature & PP_GFXOFF_MASK))
+ return;
+
+ if (!adev->powerplay.pp_funcs->set_powergating_by_smu)
+ return;
+
+
+ mutex_lock(&adev->gfx.gfx_off_mutex);
+
+ if (!enable)
+ adev->gfx.gfx_off_req_count++;
+ else if (adev->gfx.gfx_off_req_count > 0)
+ adev->gfx.gfx_off_req_count--;
+
+ if (enable && !adev->gfx.gfx_off_state && !adev->gfx.gfx_off_req_count) {
+ schedule_delayed_work(&adev->gfx.gfx_off_delay_work, GFX_OFF_DELAY_ENABLE);
+ } else if (!enable && adev->gfx.gfx_off_state) {
+ if (!amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_GFX, false))
+ adev->gfx.gfx_off_state = false;
+ }
+
+ mutex_unlock(&adev->gfx.gfx_off_mutex);
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
index 1f279050d334..b61b5c11aead 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
@@ -24,28 +24,297 @@
#ifndef __AMDGPU_GFX_H__
#define __AMDGPU_GFX_H__
-int amdgpu_gfx_scratch_get(struct amdgpu_device *adev, uint32_t *reg);
-void amdgpu_gfx_scratch_free(struct amdgpu_device *adev, uint32_t reg);
+/*
+ * GFX stuff
+ */
+#include "clearstate_defs.h"
+#include "amdgpu_ring.h"
-void amdgpu_gfx_parse_disable_cu(unsigned *mask, unsigned max_se,
- unsigned max_sh);
+/* GFX current status */
+#define AMDGPU_GFX_NORMAL_MODE 0x00000000L
+#define AMDGPU_GFX_SAFE_MODE 0x00000001L
+#define AMDGPU_GFX_PG_DISABLED_MODE 0x00000002L
+#define AMDGPU_GFX_CG_DISABLED_MODE 0x00000004L
+#define AMDGPU_GFX_LBPW_DISABLED_MODE 0x00000008L
-void amdgpu_gfx_compute_queue_acquire(struct amdgpu_device *adev);
-int amdgpu_gfx_kiq_init_ring(struct amdgpu_device *adev,
- struct amdgpu_ring *ring,
- struct amdgpu_irq_src *irq);
+struct amdgpu_rlc_funcs {
+ void (*enter_safe_mode)(struct amdgpu_device *adev);
+ void (*exit_safe_mode)(struct amdgpu_device *adev);
+};
-void amdgpu_gfx_kiq_free_ring(struct amdgpu_ring *ring,
- struct amdgpu_irq_src *irq);
+struct amdgpu_rlc {
+ /* for power gating */
+ struct amdgpu_bo *save_restore_obj;
+ uint64_t save_restore_gpu_addr;
+ volatile uint32_t *sr_ptr;
+ const u32 *reg_list;
+ u32 reg_list_size;
+ /* for clear state */
+ struct amdgpu_bo *clear_state_obj;
+ uint64_t clear_state_gpu_addr;
+ volatile uint32_t *cs_ptr;
+ const struct cs_section_def *cs_data;
+ u32 clear_state_size;
+ /* for cp tables */
+ struct amdgpu_bo *cp_table_obj;
+ uint64_t cp_table_gpu_addr;
+ volatile uint32_t *cp_table_ptr;
+ u32 cp_table_size;
-void amdgpu_gfx_kiq_fini(struct amdgpu_device *adev);
-int amdgpu_gfx_kiq_init(struct amdgpu_device *adev,
- unsigned hpd_size);
+ /* safe mode for updating CG/PG state */
+ bool in_safe_mode;
+ const struct amdgpu_rlc_funcs *funcs;
-int amdgpu_gfx_compute_mqd_sw_init(struct amdgpu_device *adev,
- unsigned mqd_size);
-void amdgpu_gfx_compute_mqd_sw_fini(struct amdgpu_device *adev);
+ /* for firmware data */
+ u32 save_and_restore_offset;
+ u32 clear_state_descriptor_offset;
+ u32 avail_scratch_ram_locations;
+ u32 reg_restore_list_size;
+ u32 reg_list_format_start;
+ u32 reg_list_format_separate_start;
+ u32 starting_offsets_start;
+ u32 reg_list_format_size_bytes;
+ u32 reg_list_size_bytes;
+ u32 reg_list_format_direct_reg_list_length;
+ u32 save_restore_list_cntl_size_bytes;
+ u32 save_restore_list_gpm_size_bytes;
+ u32 save_restore_list_srm_size_bytes;
+
+ u32 *register_list_format;
+ u32 *register_restore;
+ u8 *save_restore_list_cntl;
+ u8 *save_restore_list_gpm;
+ u8 *save_restore_list_srm;
+
+ bool is_rlc_v2_1;
+};
+
+#define AMDGPU_MAX_COMPUTE_QUEUES KGD_MAX_QUEUES
+
+struct amdgpu_mec {
+ struct amdgpu_bo *hpd_eop_obj;
+ u64 hpd_eop_gpu_addr;
+ struct amdgpu_bo *mec_fw_obj;
+ u64 mec_fw_gpu_addr;
+ u32 num_mec;
+ u32 num_pipe_per_mec;
+ u32 num_queue_per_pipe;
+ void *mqd_backup[AMDGPU_MAX_COMPUTE_RINGS + 1];
+
+ /* These are the resources for which amdgpu takes ownership */
+ DECLARE_BITMAP(queue_bitmap, AMDGPU_MAX_COMPUTE_QUEUES);
+};
+
+struct amdgpu_kiq {
+ u64 eop_gpu_addr;
+ struct amdgpu_bo *eop_obj;
+ spinlock_t ring_lock;
+ struct amdgpu_ring ring;
+ struct amdgpu_irq_src irq;
+};
+
+/*
+ * GPU scratch registers structures, functions & helpers
+ */
+struct amdgpu_scratch {
+ unsigned num_reg;
+ uint32_t reg_base;
+ uint32_t free_mask;
+};
+
+/*
+ * GFX configurations
+ */
+#define AMDGPU_GFX_MAX_SE 4
+#define AMDGPU_GFX_MAX_SH_PER_SE 2
+
+struct amdgpu_rb_config {
+ uint32_t rb_backend_disable;
+ uint32_t user_rb_backend_disable;
+ uint32_t raster_config;
+ uint32_t raster_config_1;
+};
+
+struct gb_addr_config {
+ uint16_t pipe_interleave_size;
+ uint8_t num_pipes;
+ uint8_t max_compress_frags;
+ uint8_t num_banks;
+ uint8_t num_se;
+ uint8_t num_rb_per_se;
+};
+
+struct amdgpu_gfx_config {
+ unsigned max_shader_engines;
+ unsigned max_tile_pipes;
+ unsigned max_cu_per_sh;
+ unsigned max_sh_per_se;
+ unsigned max_backends_per_se;
+ unsigned max_texture_channel_caches;
+ unsigned max_gprs;
+ unsigned max_gs_threads;
+ unsigned max_hw_contexts;
+ unsigned sc_prim_fifo_size_frontend;
+ unsigned sc_prim_fifo_size_backend;
+ unsigned sc_hiz_tile_fifo_size;
+ unsigned sc_earlyz_tile_fifo_size;
+
+ unsigned num_tile_pipes;
+ unsigned backend_enable_mask;
+ unsigned mem_max_burst_length_bytes;
+ unsigned mem_row_size_in_kb;
+ unsigned shader_engine_tile_size;
+ unsigned num_gpus;
+ unsigned multi_gpu_tile_size;
+ unsigned mc_arb_ramcfg;
+ unsigned gb_addr_config;
+ unsigned num_rbs;
+ unsigned gs_vgt_table_depth;
+ unsigned gs_prim_buffer_depth;
+
+ uint32_t tile_mode_array[32];
+ uint32_t macrotile_mode_array[16];
+
+ struct gb_addr_config gb_addr_config_fields;
+ struct amdgpu_rb_config rb_config[AMDGPU_GFX_MAX_SE][AMDGPU_GFX_MAX_SH_PER_SE];
+
+ /* gfx configure feature */
+ uint32_t double_offchip_lds_buf;
+ /* cached value of DB_DEBUG2 */
+ uint32_t db_debug2;
+};
+
+struct amdgpu_cu_info {
+ uint32_t simd_per_cu;
+ uint32_t max_waves_per_simd;
+ uint32_t wave_front_size;
+ uint32_t max_scratch_slots_per_cu;
+ uint32_t lds_size;
+
+ /* total active CU number */
+ uint32_t number;
+ uint32_t ao_cu_mask;
+ uint32_t ao_cu_bitmap[4][4];
+ uint32_t bitmap[4][4];
+};
+
+struct amdgpu_gfx_funcs {
+ /* get the gpu clock counter */
+ uint64_t (*get_gpu_clock_counter)(struct amdgpu_device *adev);
+ void (*select_se_sh)(struct amdgpu_device *adev, u32 se_num,
+ u32 sh_num, u32 instance);
+ void (*read_wave_data)(struct amdgpu_device *adev, uint32_t simd,
+ uint32_t wave, uint32_t *dst, int *no_fields);
+ void (*read_wave_vgprs)(struct amdgpu_device *adev, uint32_t simd,
+ uint32_t wave, uint32_t thread, uint32_t start,
+ uint32_t size, uint32_t *dst);
+ void (*read_wave_sgprs)(struct amdgpu_device *adev, uint32_t simd,
+ uint32_t wave, uint32_t start, uint32_t size,
+ uint32_t *dst);
+ void (*select_me_pipe_q)(struct amdgpu_device *adev, u32 me, u32 pipe,
+ u32 queue);
+};
+
+struct amdgpu_ngg_buf {
+ struct amdgpu_bo *bo;
+ uint64_t gpu_addr;
+ uint32_t size;
+ uint32_t bo_size;
+};
+
+enum {
+ NGG_PRIM = 0,
+ NGG_POS,
+ NGG_CNTL,
+ NGG_PARAM,
+ NGG_BUF_MAX
+};
+
+struct amdgpu_ngg {
+ struct amdgpu_ngg_buf buf[NGG_BUF_MAX];
+ uint32_t gds_reserve_addr;
+ uint32_t gds_reserve_size;
+ bool init;
+};
+
+struct sq_work {
+ struct work_struct work;
+ unsigned ih_data;
+};
+
+struct amdgpu_gfx {
+ struct mutex gpu_clock_mutex;
+ struct amdgpu_gfx_config config;
+ struct amdgpu_rlc rlc;
+ struct amdgpu_mec mec;
+ struct amdgpu_kiq kiq;
+ struct amdgpu_scratch scratch;
+ const struct firmware *me_fw; /* ME firmware */
+ uint32_t me_fw_version;
+ const struct firmware *pfp_fw; /* PFP firmware */
+ uint32_t pfp_fw_version;
+ const struct firmware *ce_fw; /* CE firmware */
+ uint32_t ce_fw_version;
+ const struct firmware *rlc_fw; /* RLC firmware */
+ uint32_t rlc_fw_version;
+ const struct firmware *mec_fw; /* MEC firmware */
+ uint32_t mec_fw_version;
+ const struct firmware *mec2_fw; /* MEC2 firmware */
+ uint32_t mec2_fw_version;
+ uint32_t me_feature_version;
+ uint32_t ce_feature_version;
+ uint32_t pfp_feature_version;
+ uint32_t rlc_feature_version;
+ uint32_t rlc_srlc_fw_version;
+ uint32_t rlc_srlc_feature_version;
+ uint32_t rlc_srlg_fw_version;
+ uint32_t rlc_srlg_feature_version;
+ uint32_t rlc_srls_fw_version;
+ uint32_t rlc_srls_feature_version;
+ uint32_t mec_feature_version;
+ uint32_t mec2_feature_version;
+ bool mec_fw_write_wait;
+ bool me_fw_write_wait;
+ struct amdgpu_ring gfx_ring[AMDGPU_MAX_GFX_RINGS];
+ unsigned num_gfx_rings;
+ struct amdgpu_ring compute_ring[AMDGPU_MAX_COMPUTE_RINGS];
+ unsigned num_compute_rings;
+ struct amdgpu_irq_src eop_irq;
+ struct amdgpu_irq_src priv_reg_irq;
+ struct amdgpu_irq_src priv_inst_irq;
+ struct amdgpu_irq_src cp_ecc_error_irq;
+ struct amdgpu_irq_src sq_irq;
+ struct sq_work sq_work;
+
+ /* gfx status */
+ uint32_t gfx_current_status;
+ /* ce ram size*/
+ unsigned ce_ram_size;
+ struct amdgpu_cu_info cu_info;
+ const struct amdgpu_gfx_funcs *funcs;
+
+ /* reset mask */
+ uint32_t grbm_soft_reset;
+ uint32_t srbm_soft_reset;
+
+ /* NGG */
+ struct amdgpu_ngg ngg;
+
+ /* gfx off */
+ bool gfx_off_state; /* true: enabled, false: disabled */
+ struct mutex gfx_off_mutex;
+ uint32_t gfx_off_req_count; /* default 1, enable gfx off: dec 1, disable gfx off: add 1 */
+ struct delayed_work gfx_off_delay_work;
+
+ /* pipe reservation */
+ struct mutex pipe_reserve_mutex;
+ DECLARE_BITMAP (pipe_reserve_bitmap, AMDGPU_MAX_COMPUTE_QUEUES);
+};
+
+#define amdgpu_gfx_get_gpu_clock_counter(adev) (adev)->gfx.funcs->get_gpu_clock_counter((adev))
+#define amdgpu_gfx_select_se_sh(adev, se, sh, instance) (adev)->gfx.funcs->select_se_sh((adev), (se), (sh), (instance))
+#define amdgpu_gfx_select_me_pipe_q(adev, me, pipe, q) (adev)->gfx.funcs->select_me_pipe_q((adev), (me), (pipe), (q))
/**
* amdgpu_gfx_create_bitmask - create a bitmask
@@ -60,34 +329,34 @@ static inline u32 amdgpu_gfx_create_bitmask(u32 bit_width)
return (u32)((1ULL << bit_width) - 1);
}
-static inline int amdgpu_gfx_queue_to_bit(struct amdgpu_device *adev,
- int mec, int pipe, int queue)
-{
- int bit = 0;
+int amdgpu_gfx_scratch_get(struct amdgpu_device *adev, uint32_t *reg);
+void amdgpu_gfx_scratch_free(struct amdgpu_device *adev, uint32_t reg);
- bit += mec * adev->gfx.mec.num_pipe_per_mec
- * adev->gfx.mec.num_queue_per_pipe;
- bit += pipe * adev->gfx.mec.num_queue_per_pipe;
- bit += queue;
+void amdgpu_gfx_parse_disable_cu(unsigned *mask, unsigned max_se,
+ unsigned max_sh);
- return bit;
-}
+int amdgpu_gfx_kiq_init_ring(struct amdgpu_device *adev,
+ struct amdgpu_ring *ring,
+ struct amdgpu_irq_src *irq);
-static inline void amdgpu_gfx_bit_to_queue(struct amdgpu_device *adev, int bit,
- int *mec, int *pipe, int *queue)
-{
- *queue = bit % adev->gfx.mec.num_queue_per_pipe;
- *pipe = (bit / adev->gfx.mec.num_queue_per_pipe)
- % adev->gfx.mec.num_pipe_per_mec;
- *mec = (bit / adev->gfx.mec.num_queue_per_pipe)
- / adev->gfx.mec.num_pipe_per_mec;
+void amdgpu_gfx_kiq_free_ring(struct amdgpu_ring *ring,
+ struct amdgpu_irq_src *irq);
-}
-static inline bool amdgpu_gfx_is_mec_queue_enabled(struct amdgpu_device *adev,
- int mec, int pipe, int queue)
-{
- return test_bit(amdgpu_gfx_queue_to_bit(adev, mec, pipe, queue),
- adev->gfx.mec.queue_bitmap);
-}
+void amdgpu_gfx_kiq_fini(struct amdgpu_device *adev);
+int amdgpu_gfx_kiq_init(struct amdgpu_device *adev,
+ unsigned hpd_size);
+
+int amdgpu_gfx_compute_mqd_sw_init(struct amdgpu_device *adev,
+ unsigned mqd_size);
+void amdgpu_gfx_compute_mqd_sw_fini(struct amdgpu_device *adev);
+
+void amdgpu_gfx_compute_queue_acquire(struct amdgpu_device *adev);
+int amdgpu_gfx_queue_to_bit(struct amdgpu_device *adev, int mec,
+ int pipe, int queue);
+void amdgpu_gfx_bit_to_queue(struct amdgpu_device *adev, int bit,
+ int *mec, int *pipe, int *queue);
+bool amdgpu_gfx_is_mec_queue_enabled(struct amdgpu_device *adev, int mec,
+ int pipe, int queue);
+void amdgpu_gfx_off_ctrl(struct amdgpu_device *adev, bool enable);
#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
new file mode 100644
index 000000000000..d73367cab4f3
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
@@ -0,0 +1,215 @@
+/*
+ * Copyright 2018 Advanced Micro Devices, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ */
+
+#include "amdgpu.h"
+
+/**
+ * amdgpu_gmc_get_pde_for_bo - get the PDE for a BO
+ *
+ * @bo: the BO to get the PDE for
+ * @level: the level in the PD hirarchy
+ * @addr: resulting addr
+ * @flags: resulting flags
+ *
+ * Get the address and flags to be used for a PDE (Page Directory Entry).
+ */
+void amdgpu_gmc_get_pde_for_bo(struct amdgpu_bo *bo, int level,
+ uint64_t *addr, uint64_t *flags)
+{
+ struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
+ struct ttm_dma_tt *ttm;
+
+ switch (bo->tbo.mem.mem_type) {
+ case TTM_PL_TT:
+ ttm = container_of(bo->tbo.ttm, struct ttm_dma_tt, ttm);
+ *addr = ttm->dma_address[0];
+ break;
+ case TTM_PL_VRAM:
+ *addr = amdgpu_bo_gpu_offset(bo);
+ break;
+ default:
+ *addr = 0;
+ break;
+ }
+ *flags = amdgpu_ttm_tt_pde_flags(bo->tbo.ttm, &bo->tbo.mem);
+ amdgpu_gmc_get_vm_pde(adev, level, addr, flags);
+}
+
+/**
+ * amdgpu_gmc_pd_addr - return the address of the root directory
+ *
+ */
+uint64_t amdgpu_gmc_pd_addr(struct amdgpu_bo *bo)
+{
+ struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
+ uint64_t pd_addr;
+
+ /* TODO: move that into ASIC specific code */
+ if (adev->asic_type >= CHIP_VEGA10) {
+ uint64_t flags = AMDGPU_PTE_VALID;
+
+ amdgpu_gmc_get_pde_for_bo(bo, -1, &pd_addr, &flags);
+ pd_addr |= flags;
+ } else {
+ pd_addr = amdgpu_bo_gpu_offset(bo);
+ }
+ return pd_addr;
+}
+
+/**
+ * amdgpu_gmc_agp_addr - return the address in the AGP address space
+ *
+ * @tbo: TTM BO which needs the address, must be in GTT domain
+ *
+ * Tries to figure out how to access the BO through the AGP aperture. Returns
+ * AMDGPU_BO_INVALID_OFFSET if that is not possible.
+ */
+uint64_t amdgpu_gmc_agp_addr(struct ttm_buffer_object *bo)
+{
+ struct amdgpu_device *adev = amdgpu_ttm_adev(bo->bdev);
+ struct ttm_dma_tt *ttm;
+
+ if (bo->num_pages != 1 || bo->ttm->caching_state == tt_cached)
+ return AMDGPU_BO_INVALID_OFFSET;
+
+ ttm = container_of(bo->ttm, struct ttm_dma_tt, ttm);
+ if (ttm->dma_address[0] + PAGE_SIZE >= adev->gmc.agp_size)
+ return AMDGPU_BO_INVALID_OFFSET;
+
+ return adev->gmc.agp_start + ttm->dma_address[0];
+}
+
+/**
+ * amdgpu_gmc_vram_location - try to find VRAM location
+ *
+ * @adev: amdgpu device structure holding all necessary informations
+ * @mc: memory controller structure holding memory informations
+ * @base: base address at which to put VRAM
+ *
+ * Function will try to place VRAM at base address provided
+ * as parameter.
+ */
+void amdgpu_gmc_vram_location(struct amdgpu_device *adev, struct amdgpu_gmc *mc,
+ u64 base)
+{
+ uint64_t limit = (uint64_t)amdgpu_vram_limit << 20;
+
+ mc->vram_start = base;
+ mc->vram_end = mc->vram_start + mc->mc_vram_size - 1;
+ if (limit && limit < mc->real_vram_size)
+ mc->real_vram_size = limit;
+
+ if (mc->xgmi.num_physical_nodes == 0) {
+ mc->fb_start = mc->vram_start;
+ mc->fb_end = mc->vram_end;
+ }
+ dev_info(adev->dev, "VRAM: %lluM 0x%016llX - 0x%016llX (%lluM used)\n",
+ mc->mc_vram_size >> 20, mc->vram_start,
+ mc->vram_end, mc->real_vram_size >> 20);
+}
+
+/**
+ * amdgpu_gmc_gart_location - try to find GART location
+ *
+ * @adev: amdgpu device structure holding all necessary informations
+ * @mc: memory controller structure holding memory informations
+ *
+ * Function will place try to place GART before or after VRAM.
+ *
+ * If GART size is bigger than space left then we ajust GART size.
+ * Thus function will never fails.
+ */
+void amdgpu_gmc_gart_location(struct amdgpu_device *adev, struct amdgpu_gmc *mc)
+{
+ const uint64_t four_gb = 0x100000000ULL;
+ u64 size_af, size_bf;
+ /*To avoid the hole, limit the max mc address to AMDGPU_GMC_HOLE_START*/
+ u64 max_mc_address = min(adev->gmc.mc_mask, AMDGPU_GMC_HOLE_START - 1);
+
+ mc->gart_size += adev->pm.smu_prv_buffer_size;
+
+ /* VCE doesn't like it when BOs cross a 4GB segment, so align
+ * the GART base on a 4GB boundary as well.
+ */
+ size_bf = mc->fb_start;
+ size_af = max_mc_address + 1 - ALIGN(mc->fb_end + 1, four_gb);
+
+ if (mc->gart_size > max(size_bf, size_af)) {
+ dev_warn(adev->dev, "limiting GART\n");
+ mc->gart_size = max(size_bf, size_af);
+ }
+
+ if ((size_bf >= mc->gart_size && size_bf < size_af) ||
+ (size_af < mc->gart_size))
+ mc->gart_start = 0;
+ else
+ mc->gart_start = max_mc_address - mc->gart_size + 1;
+
+ mc->gart_start &= ~(four_gb - 1);
+ mc->gart_end = mc->gart_start + mc->gart_size - 1;
+ dev_info(adev->dev, "GART: %lluM 0x%016llX - 0x%016llX\n",
+ mc->gart_size >> 20, mc->gart_start, mc->gart_end);
+}
+
+/**
+ * amdgpu_gmc_agp_location - try to find AGP location
+ * @adev: amdgpu device structure holding all necessary informations
+ * @mc: memory controller structure holding memory informations
+ *
+ * Function will place try to find a place for the AGP BAR in the MC address
+ * space.
+ *
+ * AGP BAR will be assigned the largest available hole in the address space.
+ * Should be called after VRAM and GART locations are setup.
+ */
+void amdgpu_gmc_agp_location(struct amdgpu_device *adev, struct amdgpu_gmc *mc)
+{
+ const uint64_t sixteen_gb = 1ULL << 34;
+ const uint64_t sixteen_gb_mask = ~(sixteen_gb - 1);
+ u64 size_af, size_bf;
+
+ if (mc->fb_start > mc->gart_start) {
+ size_bf = (mc->fb_start & sixteen_gb_mask) -
+ ALIGN(mc->gart_end + 1, sixteen_gb);
+ size_af = mc->mc_mask + 1 - ALIGN(mc->fb_end + 1, sixteen_gb);
+ } else {
+ size_bf = mc->fb_start & sixteen_gb_mask;
+ size_af = (mc->gart_start & sixteen_gb_mask) -
+ ALIGN(mc->fb_end + 1, sixteen_gb);
+ }
+
+ if (size_bf > size_af) {
+ mc->agp_start = (mc->fb_start - size_bf) & sixteen_gb_mask;
+ mc->agp_size = size_bf;
+ } else {
+ mc->agp_start = ALIGN(mc->fb_end + 1, sixteen_gb);
+ mc->agp_size = size_af;
+ }
+
+ mc->agp_end = mc->agp_start + mc->agp_size - 1;
+ dev_info(adev->dev, "AGP: %lluM 0x%016llX - 0x%016llX\n",
+ mc->agp_size >> 20, mc->agp_start, mc->agp_end);
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h
index 893c2490b783..6fa7ef446e46 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h
@@ -30,6 +30,19 @@
#include "amdgpu_irq.h"
+/* VA hole for 48bit addresses on Vega10 */
+#define AMDGPU_GMC_HOLE_START 0x0000800000000000ULL
+#define AMDGPU_GMC_HOLE_END 0xffff800000000000ULL
+
+/*
+ * Hardware is programmed as if the hole doesn't exists with start and end
+ * address values.
+ *
+ * This mask is used to remove the upper 16bits of the VA and so come up with
+ * the linear addr value.
+ */
+#define AMDGPU_GMC_HOLE_MASK 0x0000ffffffffffffULL
+
struct firmware;
/*
@@ -74,6 +87,20 @@ struct amdgpu_gmc_funcs {
u64 *dst, u64 *flags);
};
+struct amdgpu_xgmi {
+ /* from psp */
+ u64 device_id;
+ u64 hive_id;
+ /* fixed per family */
+ u64 node_segment_size;
+ /* physical node (0-3) */
+ unsigned physical_node_id;
+ /* number of nodes (0-4) */
+ unsigned num_physical_nodes;
+ /* gpu list in the same hive */
+ struct list_head head;
+};
+
struct amdgpu_gmc {
resource_size_t aper_size;
resource_size_t aper_base;
@@ -81,11 +108,22 @@ struct amdgpu_gmc {
* about vram size near mc fb location */
u64 mc_vram_size;
u64 visible_vram_size;
+ u64 agp_size;
+ u64 agp_start;
+ u64 agp_end;
u64 gart_size;
u64 gart_start;
u64 gart_end;
u64 vram_start;
u64 vram_end;
+ /* FB region , it's same as local vram region in single GPU, in XGMI
+ * configuration, this region covers all GPUs in the same hive ,
+ * each GPU in the hive has the same view of this FB region .
+ * GPU0's vram starts at offset (0 * segment size) ,
+ * GPU1 starts at offset (1 * segment size), etc.
+ */
+ u64 fb_start;
+ u64 fb_end;
unsigned vram_width;
u64 real_vram_size;
int vram_mtrr;
@@ -105,8 +143,58 @@ struct amdgpu_gmc {
/* protects concurrent invalidation */
spinlock_t invalidate_lock;
bool translate_further;
+ struct kfd_vm_fault_info *vm_fault_info;
+ atomic_t vm_fault_info_updated;
const struct amdgpu_gmc_funcs *gmc_funcs;
+
+ struct amdgpu_xgmi xgmi;
};
+#define amdgpu_gmc_flush_gpu_tlb(adev, vmid) (adev)->gmc.gmc_funcs->flush_gpu_tlb((adev), (vmid))
+#define amdgpu_gmc_emit_flush_gpu_tlb(r, vmid, addr) (r)->adev->gmc.gmc_funcs->emit_flush_gpu_tlb((r), (vmid), (addr))
+#define amdgpu_gmc_emit_pasid_mapping(r, vmid, pasid) (r)->adev->gmc.gmc_funcs->emit_pasid_mapping((r), (vmid), (pasid))
+#define amdgpu_gmc_set_pte_pde(adev, pt, idx, addr, flags) (adev)->gmc.gmc_funcs->set_pte_pde((adev), (pt), (idx), (addr), (flags))
+#define amdgpu_gmc_get_vm_pde(adev, level, dst, flags) (adev)->gmc.gmc_funcs->get_vm_pde((adev), (level), (dst), (flags))
+#define amdgpu_gmc_get_pte_flags(adev, flags) (adev)->gmc.gmc_funcs->get_vm_pte_flags((adev),(flags))
+
+/**
+ * amdgpu_gmc_vram_full_visible - Check if full VRAM is visible through the BAR
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Returns:
+ * True if full VRAM is visible through the BAR
+ */
+static inline bool amdgpu_gmc_vram_full_visible(struct amdgpu_gmc *gmc)
+{
+ WARN_ON(gmc->real_vram_size < gmc->visible_vram_size);
+
+ return (gmc->real_vram_size == gmc->visible_vram_size);
+}
+
+/**
+ * amdgpu_gmc_sign_extend - sign extend the given gmc address
+ *
+ * @addr: address to extend
+ */
+static inline uint64_t amdgpu_gmc_sign_extend(uint64_t addr)
+{
+ if (addr >= AMDGPU_GMC_HOLE_START)
+ addr |= AMDGPU_GMC_HOLE_END;
+
+ return addr;
+}
+
+void amdgpu_gmc_get_pde_for_bo(struct amdgpu_bo *bo, int level,
+ uint64_t *addr, uint64_t *flags);
+uint64_t amdgpu_gmc_pd_addr(struct amdgpu_bo *bo);
+uint64_t amdgpu_gmc_agp_addr(struct ttm_buffer_object *bo);
+void amdgpu_gmc_vram_location(struct amdgpu_device *adev, struct amdgpu_gmc *mc,
+ u64 base);
+void amdgpu_gmc_gart_location(struct amdgpu_device *adev,
+ struct amdgpu_gmc *mc);
+void amdgpu_gmc_agp_location(struct amdgpu_device *adev,
+ struct amdgpu_gmc *mc);
+
#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
index 7aaa263ad8c7..b8963b725dfa 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
@@ -32,6 +32,7 @@
#include <drm/amdgpu_drm.h>
#include "amdgpu.h"
#include "atom.h"
+#include "amdgpu_trace.h"
#define AMDGPU_IB_TEST_TIMEOUT msecs_to_jiffies(1000)
@@ -139,7 +140,7 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs,
/* ring tests don't use a job */
if (job) {
vm = job->vm;
- fence_ctx = job->fence_ctx;
+ fence_ctx = job->base.s_fence->scheduled.context;
} else {
vm = NULL;
fence_ctx = 0;
@@ -164,10 +165,16 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs,
return r;
}
+ need_ctx_switch = ring->current_ctx != fence_ctx;
if (ring->funcs->emit_pipeline_sync && job &&
((tmp = amdgpu_sync_get_fence(&job->sched_sync, NULL)) ||
+ (amdgpu_sriov_vf(adev) && need_ctx_switch) ||
amdgpu_vm_need_pipeline_sync(ring, job))) {
need_pipe_sync = true;
+
+ if (tmp)
+ trace_amdgpu_ib_pipe_sync(job, tmp);
+
dma_fence_put(tmp);
}
@@ -196,7 +203,6 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs,
}
skip_preamble = ring->current_ctx == fence_ctx;
- need_ctx_switch = ring->current_ctx != fence_ctx;
if (job && ring->funcs->emit_cntxcntl) {
if (need_ctx_switch)
status |= AMDGPU_HAVE_CTX_SWITCH;
@@ -348,12 +354,21 @@ int amdgpu_ib_ring_tests(struct amdgpu_device *adev)
if (!ring || !ring->ready)
continue;
+ /* skip IB tests for KIQ in general for the below reasons:
+ * 1. We never submit IBs to the KIQ
+ * 2. KIQ doesn't use the EOP interrupts,
+ * we use some other CP interrupt.
+ */
+ if (ring->funcs->type == AMDGPU_RING_TYPE_KIQ)
+ continue;
+
/* MM engine need more time */
if (ring->funcs->type == AMDGPU_RING_TYPE_UVD ||
ring->funcs->type == AMDGPU_RING_TYPE_VCE ||
ring->funcs->type == AMDGPU_RING_TYPE_UVD_ENC ||
ring->funcs->type == AMDGPU_RING_TYPE_VCN_DEC ||
- ring->funcs->type == AMDGPU_RING_TYPE_VCN_ENC)
+ ring->funcs->type == AMDGPU_RING_TYPE_VCN_ENC ||
+ ring->funcs->type == AMDGPU_RING_TYPE_VCN_JPEG)
tmo = tmo_mm;
else
tmo = tmo_gfx;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c
index a1c78f90eadf..df9b173c3d0b 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c
@@ -574,15 +574,10 @@ void amdgpu_vmid_mgr_init(struct amdgpu_device *adev)
/* skip over VMID 0, since it is the system VM */
for (j = 1; j < id_mgr->num_ids; ++j) {
amdgpu_vmid_reset(adev, i, j);
- amdgpu_sync_create(&id_mgr->ids[i].active);
+ amdgpu_sync_create(&id_mgr->ids[j].active);
list_add_tail(&id_mgr->ids[j].list, &id_mgr->ids_lru);
}
}
-
- adev->vm_manager.fence_context =
- dma_fence_context_alloc(AMDGPU_MAX_RINGS);
- for (i = 0; i < AMDGPU_MAX_RINGS; ++i)
- adev->vm_manager.seqno[i] = 0;
}
/**
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.c
index 06373d44b3da..8af67f649660 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.c
@@ -24,46 +24,21 @@
#include <drm/drmP.h>
#include "amdgpu.h"
#include "amdgpu_ih.h"
-#include "amdgpu_amdkfd.h"
-
-/**
- * amdgpu_ih_ring_alloc - allocate memory for the IH ring
- *
- * @adev: amdgpu_device pointer
- *
- * Allocate a ring buffer for the interrupt controller.
- * Returns 0 for success, errors for failure.
- */
-static int amdgpu_ih_ring_alloc(struct amdgpu_device *adev)
-{
- int r;
-
- /* Allocate ring buffer */
- if (adev->irq.ih.ring_obj == NULL) {
- r = amdgpu_bo_create_kernel(adev, adev->irq.ih.ring_size,
- PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT,
- &adev->irq.ih.ring_obj,
- &adev->irq.ih.gpu_addr,
- (void **)&adev->irq.ih.ring);
- if (r) {
- DRM_ERROR("amdgpu: failed to create ih ring buffer (%d).\n", r);
- return r;
- }
- }
- return 0;
-}
/**
* amdgpu_ih_ring_init - initialize the IH state
*
* @adev: amdgpu_device pointer
+ * @ih: ih ring to initialize
+ * @ring_size: ring size to allocate
+ * @use_bus_addr: true when we can use dma_alloc_coherent
*
* Initializes the IH state and allocates a buffer
* for the IH ring buffer.
* Returns 0 for success, errors for failure.
*/
-int amdgpu_ih_ring_init(struct amdgpu_device *adev, unsigned ring_size,
- bool use_bus_addr)
+int amdgpu_ih_ring_init(struct amdgpu_device *adev, struct amdgpu_ih_ring *ih,
+ unsigned ring_size, bool use_bus_addr)
{
u32 rb_bufsz;
int r;
@@ -71,70 +46,76 @@ int amdgpu_ih_ring_init(struct amdgpu_device *adev, unsigned ring_size,
/* Align ring size */
rb_bufsz = order_base_2(ring_size / 4);
ring_size = (1 << rb_bufsz) * 4;
- adev->irq.ih.ring_size = ring_size;
- adev->irq.ih.ptr_mask = adev->irq.ih.ring_size - 1;
- adev->irq.ih.rptr = 0;
- adev->irq.ih.use_bus_addr = use_bus_addr;
+ ih->ring_size = ring_size;
+ ih->ptr_mask = ih->ring_size - 1;
+ ih->rptr = 0;
+ ih->use_bus_addr = use_bus_addr;
- if (adev->irq.ih.use_bus_addr) {
- if (!adev->irq.ih.ring) {
- /* add 8 bytes for the rptr/wptr shadows and
- * add them to the end of the ring allocation.
- */
- adev->irq.ih.ring = pci_alloc_consistent(adev->pdev,
- adev->irq.ih.ring_size + 8,
- &adev->irq.ih.rb_dma_addr);
- if (adev->irq.ih.ring == NULL)
- return -ENOMEM;
- memset((void *)adev->irq.ih.ring, 0, adev->irq.ih.ring_size + 8);
- adev->irq.ih.wptr_offs = (adev->irq.ih.ring_size / 4) + 0;
- adev->irq.ih.rptr_offs = (adev->irq.ih.ring_size / 4) + 1;
- }
- return 0;
+ if (use_bus_addr) {
+ if (ih->ring)
+ return 0;
+
+ /* add 8 bytes for the rptr/wptr shadows and
+ * add them to the end of the ring allocation.
+ */
+ ih->ring = dma_alloc_coherent(adev->dev, ih->ring_size + 8,
+ &ih->rb_dma_addr, GFP_KERNEL);
+ if (ih->ring == NULL)
+ return -ENOMEM;
+
+ memset((void *)ih->ring, 0, ih->ring_size + 8);
+ ih->wptr_offs = (ih->ring_size / 4) + 0;
+ ih->rptr_offs = (ih->ring_size / 4) + 1;
} else {
- r = amdgpu_device_wb_get(adev, &adev->irq.ih.wptr_offs);
+ r = amdgpu_device_wb_get(adev, &ih->wptr_offs);
+ if (r)
+ return r;
+
+ r = amdgpu_device_wb_get(adev, &ih->rptr_offs);
if (r) {
- dev_err(adev->dev, "(%d) ih wptr_offs wb alloc failed\n", r);
+ amdgpu_device_wb_free(adev, ih->wptr_offs);
return r;
}
- r = amdgpu_device_wb_get(adev, &adev->irq.ih.rptr_offs);
+ r = amdgpu_bo_create_kernel(adev, ih->ring_size, PAGE_SIZE,
+ AMDGPU_GEM_DOMAIN_GTT,
+ &ih->ring_obj, &ih->gpu_addr,
+ (void **)&ih->ring);
if (r) {
- amdgpu_device_wb_free(adev, adev->irq.ih.wptr_offs);
- dev_err(adev->dev, "(%d) ih rptr_offs wb alloc failed\n", r);
+ amdgpu_device_wb_free(adev, ih->rptr_offs);
+ amdgpu_device_wb_free(adev, ih->wptr_offs);
return r;
}
-
- return amdgpu_ih_ring_alloc(adev);
}
+ return 0;
}
/**
* amdgpu_ih_ring_fini - tear down the IH state
*
* @adev: amdgpu_device pointer
+ * @ih: ih ring to tear down
*
* Tears down the IH state and frees buffer
* used for the IH ring buffer.
*/
-void amdgpu_ih_ring_fini(struct amdgpu_device *adev)
+void amdgpu_ih_ring_fini(struct amdgpu_device *adev, struct amdgpu_ih_ring *ih)
{
- if (adev->irq.ih.use_bus_addr) {
- if (adev->irq.ih.ring) {
- /* add 8 bytes for the rptr/wptr shadows and
- * add them to the end of the ring allocation.
- */
- pci_free_consistent(adev->pdev, adev->irq.ih.ring_size + 8,
- (void *)adev->irq.ih.ring,
- adev->irq.ih.rb_dma_addr);
- adev->irq.ih.ring = NULL;
- }
+ if (ih->use_bus_addr) {
+ if (!ih->ring)
+ return;
+
+ /* add 8 bytes for the rptr/wptr shadows and
+ * add them to the end of the ring allocation.
+ */
+ dma_free_coherent(adev->dev, ih->ring_size + 8,
+ (void *)ih->ring, ih->rb_dma_addr);
+ ih->ring = NULL;
} else {
- amdgpu_bo_free_kernel(&adev->irq.ih.ring_obj,
- &adev->irq.ih.gpu_addr,
- (void **)&adev->irq.ih.ring);
- amdgpu_device_wb_free(adev, adev->irq.ih.wptr_offs);
- amdgpu_device_wb_free(adev, adev->irq.ih.rptr_offs);
+ amdgpu_bo_free_kernel(&ih->ring_obj, &ih->gpu_addr,
+ (void **)&ih->ring);
+ amdgpu_device_wb_free(adev, ih->wptr_offs);
+ amdgpu_device_wb_free(adev, ih->rptr_offs);
}
}
@@ -142,133 +123,45 @@ void amdgpu_ih_ring_fini(struct amdgpu_device *adev)
* amdgpu_ih_process - interrupt handler
*
* @adev: amdgpu_device pointer
+ * @ih: ih ring to process
*
* Interrupt hander (VI), walk the IH ring.
* Returns irq process return code.
*/
-int amdgpu_ih_process(struct amdgpu_device *adev)
+int amdgpu_ih_process(struct amdgpu_device *adev, struct amdgpu_ih_ring *ih,
+ void (*callback)(struct amdgpu_device *adev,
+ struct amdgpu_ih_ring *ih))
{
- struct amdgpu_iv_entry entry;
u32 wptr;
- if (!adev->irq.ih.enabled || adev->shutdown)
+ if (!ih->enabled || adev->shutdown)
return IRQ_NONE;
wptr = amdgpu_ih_get_wptr(adev);
restart_ih:
/* is somebody else already processing irqs? */
- if (atomic_xchg(&adev->irq.ih.lock, 1))
+ if (atomic_xchg(&ih->lock, 1))
return IRQ_NONE;
- DRM_DEBUG("%s: rptr %d, wptr %d\n", __func__, adev->irq.ih.rptr, wptr);
+ DRM_DEBUG("%s: rptr %d, wptr %d\n", __func__, ih->rptr, wptr);
/* Order reading of wptr vs. reading of IH ring data */
rmb();
- while (adev->irq.ih.rptr != wptr) {
- u32 ring_index = adev->irq.ih.rptr >> 2;
-
- /* Prescreening of high-frequency interrupts */
- if (!amdgpu_ih_prescreen_iv(adev)) {
- adev->irq.ih.rptr &= adev->irq.ih.ptr_mask;
- continue;
- }
-
- /* Before dispatching irq to IP blocks, send it to amdkfd */
- amdgpu_amdkfd_interrupt(adev,
- (const void *) &adev->irq.ih.ring[ring_index]);
-
- entry.iv_entry = (const uint32_t *)
- &adev->irq.ih.ring[ring_index];
- amdgpu_ih_decode_iv(adev, &entry);
- adev->irq.ih.rptr &= adev->irq.ih.ptr_mask;
-
- amdgpu_irq_dispatch(adev, &entry);
+ while (ih->rptr != wptr) {
+ callback(adev, ih);
+ ih->rptr &= ih->ptr_mask;
}
+
amdgpu_ih_set_rptr(adev);
- atomic_set(&adev->irq.ih.lock, 0);
+ atomic_set(&ih->lock, 0);
/* make sure wptr hasn't changed while processing */
wptr = amdgpu_ih_get_wptr(adev);
- if (wptr != adev->irq.ih.rptr)
+ if (wptr != ih->rptr)
goto restart_ih;
return IRQ_HANDLED;
}
-/**
- * amdgpu_ih_add_fault - Add a page fault record
- *
- * @adev: amdgpu device pointer
- * @key: 64-bit encoding of PASID and address
- *
- * This should be called when a retry page fault interrupt is
- * received. If this is a new page fault, it will be added to a hash
- * table. The return value indicates whether this is a new fault, or
- * a fault that was already known and is already being handled.
- *
- * If there are too many pending page faults, this will fail. Retry
- * interrupts should be ignored in this case until there is enough
- * free space.
- *
- * Returns 0 if the fault was added, 1 if the fault was already known,
- * -ENOSPC if there are too many pending faults.
- */
-int amdgpu_ih_add_fault(struct amdgpu_device *adev, u64 key)
-{
- unsigned long flags;
- int r = -ENOSPC;
-
- if (WARN_ON_ONCE(!adev->irq.ih.faults))
- /* Should be allocated in <IP>_ih_sw_init on GPUs that
- * support retry faults and require retry filtering.
- */
- return r;
-
- spin_lock_irqsave(&adev->irq.ih.faults->lock, flags);
-
- /* Only let the hash table fill up to 50% for best performance */
- if (adev->irq.ih.faults->count >= (1 << (AMDGPU_PAGEFAULT_HASH_BITS-1)))
- goto unlock_out;
-
- r = chash_table_copy_in(&adev->irq.ih.faults->hash, key, NULL);
- if (!r)
- adev->irq.ih.faults->count++;
-
- /* chash_table_copy_in should never fail unless we're losing count */
- WARN_ON_ONCE(r < 0);
-
-unlock_out:
- spin_unlock_irqrestore(&adev->irq.ih.faults->lock, flags);
- return r;
-}
-
-/**
- * amdgpu_ih_clear_fault - Remove a page fault record
- *
- * @adev: amdgpu device pointer
- * @key: 64-bit encoding of PASID and address
- *
- * This should be called when a page fault has been handled. Any
- * future interrupt with this key will be processed as a new
- * page fault.
- */
-void amdgpu_ih_clear_fault(struct amdgpu_device *adev, u64 key)
-{
- unsigned long flags;
- int r;
-
- if (!adev->irq.ih.faults)
- return;
-
- spin_lock_irqsave(&adev->irq.ih.faults->lock, flags);
-
- r = chash_table_remove(&adev->irq.ih.faults->hash, key, NULL);
- if (!WARN_ON_ONCE(r < 0)) {
- adev->irq.ih.faults->count--;
- WARN_ON_ONCE(adev->irq.ih.faults->count < 0);
- }
-
- spin_unlock_irqrestore(&adev->irq.ih.faults->lock, flags);
-}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.h
index 0e01f115bbe5..9ce8c93ec19b 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.h
@@ -24,20 +24,8 @@
#ifndef __AMDGPU_IH_H__
#define __AMDGPU_IH_H__
-#include <linux/chash.h>
-#include "soc15_ih_clientid.h"
-
struct amdgpu_device;
-
-#define AMDGPU_IH_CLIENTID_LEGACY 0
-#define AMDGPU_IH_CLIENTID_MAX SOC15_IH_CLIENTID_MAX
-
-#define AMDGPU_PAGEFAULT_HASH_BITS 8
-struct amdgpu_retryfault_hashtable {
- DECLARE_CHASH_TABLE(hash, AMDGPU_PAGEFAULT_HASH_BITS, 8, 0);
- spinlock_t lock;
- int count;
-};
+struct amdgpu_iv_entry;
/*
* R6xx+ IH ring
@@ -57,30 +45,28 @@ struct amdgpu_ih_ring {
bool use_doorbell;
bool use_bus_addr;
dma_addr_t rb_dma_addr; /* only used when use_bus_addr = true */
- struct amdgpu_retryfault_hashtable *faults;
};
-#define AMDGPU_IH_SRC_DATA_MAX_SIZE_DW 4
-
-struct amdgpu_iv_entry {
- unsigned client_id;
- unsigned src_id;
- unsigned ring_id;
- unsigned vmid;
- unsigned vmid_src;
- uint64_t timestamp;
- unsigned timestamp_src;
- unsigned pasid;
- unsigned pasid_src;
- unsigned src_data[AMDGPU_IH_SRC_DATA_MAX_SIZE_DW];
- const uint32_t *iv_entry;
+/* provided by the ih block */
+struct amdgpu_ih_funcs {
+ /* ring read/write ptr handling, called from interrupt context */
+ u32 (*get_wptr)(struct amdgpu_device *adev);
+ bool (*prescreen_iv)(struct amdgpu_device *adev);
+ void (*decode_iv)(struct amdgpu_device *adev,
+ struct amdgpu_iv_entry *entry);
+ void (*set_rptr)(struct amdgpu_device *adev);
};
-int amdgpu_ih_ring_init(struct amdgpu_device *adev, unsigned ring_size,
- bool use_bus_addr);
-void amdgpu_ih_ring_fini(struct amdgpu_device *adev);
-int amdgpu_ih_process(struct amdgpu_device *adev);
-int amdgpu_ih_add_fault(struct amdgpu_device *adev, u64 key);
-void amdgpu_ih_clear_fault(struct amdgpu_device *adev, u64 key);
+#define amdgpu_ih_get_wptr(adev) (adev)->irq.ih_funcs->get_wptr((adev))
+#define amdgpu_ih_prescreen_iv(adev) (adev)->irq.ih_funcs->prescreen_iv((adev))
+#define amdgpu_ih_decode_iv(adev, iv) (adev)->irq.ih_funcs->decode_iv((adev), (iv))
+#define amdgpu_ih_set_rptr(adev) (adev)->irq.ih_funcs->set_rptr((adev))
+
+int amdgpu_ih_ring_init(struct amdgpu_device *adev, struct amdgpu_ih_ring *ih,
+ unsigned ring_size, bool use_bus_addr);
+void amdgpu_ih_ring_fini(struct amdgpu_device *adev, struct amdgpu_ih_ring *ih);
+int amdgpu_ih_process(struct amdgpu_device *adev, struct amdgpu_ih_ring *ih,
+ void (*callback)(struct amdgpu_device *adev,
+ struct amdgpu_ih_ring *ih));
#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c
index 3a5ca462abf0..52c17f6219a7 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c
@@ -25,6 +25,23 @@
* Alex Deucher
* Jerome Glisse
*/
+
+/**
+ * DOC: Interrupt Handling
+ *
+ * Interrupts generated within GPU hardware raise interrupt requests that are
+ * passed to amdgpu IRQ handler which is responsible for detecting source and
+ * type of the interrupt and dispatching matching handlers. If handling an
+ * interrupt requires calling kernel functions that may sleep processing is
+ * dispatched to work handlers.
+ *
+ * If MSI functionality is not disabled by module parameter then MSI
+ * support will be enabled.
+ *
+ * For GPU interrupt sources that may be driven by another driver, IRQ domain
+ * support is used (with mapping between virtual and hardware IRQs).
+ */
+
#include <linux/irq.h>
#include <drm/drmP.h>
#include <drm/drm_crtc_helper.h>
@@ -34,6 +51,7 @@
#include "atom.h"
#include "amdgpu_connectors.h"
#include "amdgpu_trace.h"
+#include "amdgpu_amdkfd.h"
#include <linux/pm_runtime.h>
@@ -43,19 +61,21 @@
#define AMDGPU_WAIT_IDLE_TIMEOUT 200
-/*
- * Handle hotplug events outside the interrupt handler proper.
- */
/**
- * amdgpu_hotplug_work_func - display hotplug work handler
+ * amdgpu_hotplug_work_func - work handler for display hotplug event
*
- * @work: work struct
+ * @work: work struct pointer
*
- * This is the hot plug event work handler (all asics).
- * The work gets scheduled from the irq handler if there
- * was a hot plug interrupt. It walks the connector table
- * and calls the hotplug handler for each one, then sends
- * a drm hotplug event to alert userspace.
+ * This is the hotplug event work handler (all ASICs).
+ * The work gets scheduled from the IRQ handler if there
+ * was a hotplug interrupt. It walks through the connector table
+ * and calls hotplug handler for each connector. After this, it sends
+ * a DRM hotplug event to alert userspace.
+ *
+ * This design approach is required in order to defer hotplug event handling
+ * from the IRQ handler to a work handler because hotplug handler has to use
+ * mutexes which cannot be locked in an IRQ handler (since &mutex_lock may
+ * sleep).
*/
static void amdgpu_hotplug_work_func(struct work_struct *work)
{
@@ -74,24 +94,29 @@ static void amdgpu_hotplug_work_func(struct work_struct *work)
}
/**
- * amdgpu_irq_reset_work_func - execute gpu reset
+ * amdgpu_irq_reset_work_func - execute GPU reset
*
- * @work: work struct
+ * @work: work struct pointer
*
- * Execute scheduled gpu reset (cayman+).
- * This function is called when the irq handler
- * thinks we need a gpu reset.
+ * Execute scheduled GPU reset (Cayman+).
+ * This function is called when the IRQ handler thinks we need a GPU reset.
*/
static void amdgpu_irq_reset_work_func(struct work_struct *work)
{
struct amdgpu_device *adev = container_of(work, struct amdgpu_device,
reset_work);
- if (!amdgpu_sriov_vf(adev))
- amdgpu_device_gpu_recover(adev, NULL, false);
+ if (!amdgpu_sriov_vf(adev) && amdgpu_device_should_recover_gpu(adev))
+ amdgpu_device_gpu_recover(adev, NULL);
}
-/* Disable *all* interrupts */
+/**
+ * amdgpu_irq_disable_all - disable *all* interrupts
+ *
+ * @adev: amdgpu device pointer
+ *
+ * Disable all types of interrupts from all sources.
+ */
void amdgpu_irq_disable_all(struct amdgpu_device *adev)
{
unsigned long irqflags;
@@ -99,7 +124,7 @@ void amdgpu_irq_disable_all(struct amdgpu_device *adev)
int r;
spin_lock_irqsave(&adev->irq.lock, irqflags);
- for (i = 0; i < AMDGPU_IH_CLIENTID_MAX; ++i) {
+ for (i = 0; i < AMDGPU_IRQ_CLIENTID_MAX; ++i) {
if (!adev->irq.client[i].sources)
continue;
@@ -123,11 +148,43 @@ void amdgpu_irq_disable_all(struct amdgpu_device *adev)
}
/**
- * amdgpu_irq_handler - irq handler
+ * amdgpu_irq_callback - callback from the IH ring
+ *
+ * @adev: amdgpu device pointer
+ * @ih: amdgpu ih ring
+ *
+ * Callback from IH ring processing to handle the entry at the current position
+ * and advance the read pointer.
+ */
+static void amdgpu_irq_callback(struct amdgpu_device *adev,
+ struct amdgpu_ih_ring *ih)
+{
+ u32 ring_index = ih->rptr >> 2;
+ struct amdgpu_iv_entry entry;
+
+ /* Prescreening of high-frequency interrupts */
+ if (!amdgpu_ih_prescreen_iv(adev))
+ return;
+
+ /* Before dispatching irq to IP blocks, send it to amdkfd */
+ amdgpu_amdkfd_interrupt(adev, (const void *) &ih->ring[ring_index]);
+
+ entry.iv_entry = (const uint32_t *)&ih->ring[ring_index];
+ amdgpu_ih_decode_iv(adev, &entry);
+
+ amdgpu_irq_dispatch(adev, &entry);
+}
+
+/**
+ * amdgpu_irq_handler - IRQ handler
+ *
+ * @irq: IRQ number (unused)
+ * @arg: pointer to DRM device
*
- * @int irq, void *arg: args
+ * IRQ handler for amdgpu driver (all ASICs).
*
- * This is the irq handler for the amdgpu driver (all asics).
+ * Returns:
+ * result of handling the IRQ, as defined by &irqreturn_t
*/
irqreturn_t amdgpu_irq_handler(int irq, void *arg)
{
@@ -135,25 +192,25 @@ irqreturn_t amdgpu_irq_handler(int irq, void *arg)
struct amdgpu_device *adev = dev->dev_private;
irqreturn_t ret;
- ret = amdgpu_ih_process(adev);
+ ret = amdgpu_ih_process(adev, &adev->irq.ih, amdgpu_irq_callback);
if (ret == IRQ_HANDLED)
pm_runtime_mark_last_busy(dev->dev);
return ret;
}
/**
- * amdgpu_msi_ok - asic specific msi checks
+ * amdgpu_msi_ok - check whether MSI functionality is enabled
*
- * @adev: amdgpu device pointer
+ * @adev: amdgpu device pointer (unused)
*
- * Handles asic specific MSI checks to determine if
- * MSIs should be enabled on a particular chip (all asics).
- * Returns true if MSIs should be enabled, false if MSIs
- * should not be enabled.
+ * Checks whether MSI functionality has been disabled via module parameter
+ * (all ASICs).
+ *
+ * Returns:
+ * *true* if MSIs are allowed to be enabled or *false* otherwise
*/
static bool amdgpu_msi_ok(struct amdgpu_device *adev)
{
- /* force MSI on */
if (amdgpu_msi == 1)
return true;
else if (amdgpu_msi == 0)
@@ -163,12 +220,15 @@ static bool amdgpu_msi_ok(struct amdgpu_device *adev)
}
/**
- * amdgpu_irq_init - init driver interrupt info
+ * amdgpu_irq_init - initialize interrupt handling
*
* @adev: amdgpu device pointer
*
- * Sets up the work irq handlers, vblank init, MSIs, etc. (all asics).
- * Returns 0 for success, error for failure.
+ * Sets up work functions for hotplug and reset interrupts, enables MSI
+ * functionality, initializes vblank, hotplug and reset interrupt handling.
+ *
+ * Returns:
+ * 0 on success or error code on failure
*/
int amdgpu_irq_init(struct amdgpu_device *adev)
{
@@ -176,7 +236,7 @@ int amdgpu_irq_init(struct amdgpu_device *adev)
spin_lock_init(&adev->irq.lock);
- /* enable msi */
+ /* Enable MSI if not disabled by module parameter */
adev->irq.msi_enabled = false;
if (amdgpu_msi_ok(adev)) {
@@ -189,7 +249,7 @@ int amdgpu_irq_init(struct amdgpu_device *adev)
if (!amdgpu_device_has_dc_support(adev)) {
if (!adev->enable_virtual_display)
- /* Disable vblank irqs aggressively for power-saving */
+ /* Disable vblank IRQs aggressively for power-saving */
/* XXX: can this be enabled for DC? */
adev->ddev->vblank_disable_immediate = true;
@@ -197,7 +257,7 @@ int amdgpu_irq_init(struct amdgpu_device *adev)
if (r)
return r;
- /* pre DCE11 */
+ /* Pre-DCE11 */
INIT_WORK(&adev->hotplug_work,
amdgpu_hotplug_work_func);
}
@@ -220,11 +280,13 @@ int amdgpu_irq_init(struct amdgpu_device *adev)
}
/**
- * amdgpu_irq_fini - tear down driver interrupt info
+ * amdgpu_irq_fini - shut down interrupt handling
*
* @adev: amdgpu device pointer
*
- * Tears down the work irq handlers, vblank handlers, MSIs, etc. (all asics).
+ * Tears down work functions for hotplug and reset interrupts, disables MSI
+ * functionality, shuts down vblank, hotplug and reset interrupt handling,
+ * turns off interrupts from all sources (all ASICs).
*/
void amdgpu_irq_fini(struct amdgpu_device *adev)
{
@@ -240,7 +302,7 @@ void amdgpu_irq_fini(struct amdgpu_device *adev)
cancel_work_sync(&adev->reset_work);
}
- for (i = 0; i < AMDGPU_IH_CLIENTID_MAX; ++i) {
+ for (i = 0; i < AMDGPU_IRQ_CLIENTID_MAX; ++i) {
if (!adev->irq.client[i].sources)
continue;
@@ -264,18 +326,23 @@ void amdgpu_irq_fini(struct amdgpu_device *adev)
}
/**
- * amdgpu_irq_add_id - register irq source
+ * amdgpu_irq_add_id - register IRQ source
*
* @adev: amdgpu device pointer
- * @src_id: source id for this source
- * @source: irq source
+ * @client_id: client id
+ * @src_id: source id
+ * @source: IRQ source pointer
+ *
+ * Registers IRQ source on a client.
*
+ * Returns:
+ * 0 on success or error code otherwise
*/
int amdgpu_irq_add_id(struct amdgpu_device *adev,
unsigned client_id, unsigned src_id,
struct amdgpu_irq_src *source)
{
- if (client_id >= AMDGPU_IH_CLIENTID_MAX)
+ if (client_id >= AMDGPU_IRQ_CLIENTID_MAX)
return -EINVAL;
if (src_id >= AMDGPU_MAX_IRQ_SRC_ID)
@@ -312,12 +379,12 @@ int amdgpu_irq_add_id(struct amdgpu_device *adev,
}
/**
- * amdgpu_irq_dispatch - dispatch irq to IP blocks
+ * amdgpu_irq_dispatch - dispatch IRQ to IP blocks
*
* @adev: amdgpu device pointer
- * @entry: interrupt vector
+ * @entry: interrupt vector pointer
*
- * Dispatches the irq to the different IP blocks
+ * Dispatches IRQ to IP blocks.
*/
void amdgpu_irq_dispatch(struct amdgpu_device *adev,
struct amdgpu_iv_entry *entry)
@@ -329,7 +396,7 @@ void amdgpu_irq_dispatch(struct amdgpu_device *adev,
trace_amdgpu_iv(entry);
- if (client_id >= AMDGPU_IH_CLIENTID_MAX) {
+ if (client_id >= AMDGPU_IRQ_CLIENTID_MAX) {
DRM_DEBUG("Invalid client_id in IV: %d\n", client_id);
return;
}
@@ -361,13 +428,13 @@ void amdgpu_irq_dispatch(struct amdgpu_device *adev,
}
/**
- * amdgpu_irq_update - update hw interrupt state
+ * amdgpu_irq_update - update hardware interrupt state
*
* @adev: amdgpu device pointer
- * @src: interrupt src you want to enable
- * @type: type of interrupt you want to update
+ * @src: interrupt source pointer
+ * @type: type of interrupt
*
- * Updates the interrupt state for a specific src (all asics).
+ * Updates interrupt state for the specific source (all ASICs).
*/
int amdgpu_irq_update(struct amdgpu_device *adev,
struct amdgpu_irq_src *src, unsigned type)
@@ -378,7 +445,7 @@ int amdgpu_irq_update(struct amdgpu_device *adev,
spin_lock_irqsave(&adev->irq.lock, irqflags);
- /* we need to determine after taking the lock, otherwise
+ /* We need to determine after taking the lock, otherwise
we might disable just enabled interrupts again */
if (amdgpu_irq_enabled(adev, src, type))
state = AMDGPU_IRQ_STATE_ENABLE;
@@ -390,11 +457,19 @@ int amdgpu_irq_update(struct amdgpu_device *adev,
return r;
}
+/**
+ * amdgpu_irq_gpu_reset_resume_helper - update interrupt states on all sources
+ *
+ * @adev: amdgpu device pointer
+ *
+ * Updates state of all types of interrupts on all sources on resume after
+ * reset.
+ */
void amdgpu_irq_gpu_reset_resume_helper(struct amdgpu_device *adev)
{
int i, j, k;
- for (i = 0; i < AMDGPU_IH_CLIENTID_MAX; ++i) {
+ for (i = 0; i < AMDGPU_IRQ_CLIENTID_MAX; ++i) {
if (!adev->irq.client[i].sources)
continue;
@@ -413,10 +488,13 @@ void amdgpu_irq_gpu_reset_resume_helper(struct amdgpu_device *adev)
* amdgpu_irq_get - enable interrupt
*
* @adev: amdgpu device pointer
- * @src: interrupt src you want to enable
- * @type: type of interrupt you want to enable
+ * @src: interrupt source pointer
+ * @type: type of interrupt
+ *
+ * Enables specified type of interrupt on the specified source (all ASICs).
*
- * Enables the interrupt type for a specific src (all asics).
+ * Returns:
+ * 0 on success or error code otherwise
*/
int amdgpu_irq_get(struct amdgpu_device *adev, struct amdgpu_irq_src *src,
unsigned type)
@@ -440,10 +518,13 @@ int amdgpu_irq_get(struct amdgpu_device *adev, struct amdgpu_irq_src *src,
* amdgpu_irq_put - disable interrupt
*
* @adev: amdgpu device pointer
- * @src: interrupt src you want to disable
- * @type: type of interrupt you want to disable
+ * @src: interrupt source pointer
+ * @type: type of interrupt
*
- * Disables the interrupt type for a specific src (all asics).
+ * Enables specified type of interrupt on the specified source (all ASICs).
+ *
+ * Returns:
+ * 0 on success or error code otherwise
*/
int amdgpu_irq_put(struct amdgpu_device *adev, struct amdgpu_irq_src *src,
unsigned type)
@@ -464,12 +545,17 @@ int amdgpu_irq_put(struct amdgpu_device *adev, struct amdgpu_irq_src *src,
}
/**
- * amdgpu_irq_enabled - test if irq is enabled or not
+ * amdgpu_irq_enabled - check whether interrupt is enabled or not
*
* @adev: amdgpu device pointer
- * @idx: interrupt src you want to test
+ * @src: interrupt source pointer
+ * @type: type of interrupt
+ *
+ * Checks whether the given type of interrupt is enabled on the given source.
*
- * Tests if the given interrupt source is enabled or not
+ * Returns:
+ * *true* if interrupt is enabled, *false* if interrupt is disabled or on
+ * invalid parameters
*/
bool amdgpu_irq_enabled(struct amdgpu_device *adev, struct amdgpu_irq_src *src,
unsigned type)
@@ -486,7 +572,7 @@ bool amdgpu_irq_enabled(struct amdgpu_device *adev, struct amdgpu_irq_src *src,
return !!atomic_read(&src->enabled_types[type]);
}
-/* gen irq */
+/* XXX: Generic IRQ handling */
static void amdgpu_irq_mask(struct irq_data *irqd)
{
/* XXX */
@@ -497,12 +583,26 @@ static void amdgpu_irq_unmask(struct irq_data *irqd)
/* XXX */
}
+/* amdgpu hardware interrupt chip descriptor */
static struct irq_chip amdgpu_irq_chip = {
.name = "amdgpu-ih",
.irq_mask = amdgpu_irq_mask,
.irq_unmask = amdgpu_irq_unmask,
};
+/**
+ * amdgpu_irqdomain_map - create mapping between virtual and hardware IRQ numbers
+ *
+ * @d: amdgpu IRQ domain pointer (unused)
+ * @irq: virtual IRQ number
+ * @hwirq: hardware irq number
+ *
+ * Current implementation assigns simple interrupt handler to the given virtual
+ * IRQ.
+ *
+ * Returns:
+ * 0 on success or error code otherwise
+ */
static int amdgpu_irqdomain_map(struct irq_domain *d,
unsigned int irq, irq_hw_number_t hwirq)
{
@@ -514,17 +614,21 @@ static int amdgpu_irqdomain_map(struct irq_domain *d,
return 0;
}
+/* Implementation of methods for amdgpu IRQ domain */
static const struct irq_domain_ops amdgpu_hw_irqdomain_ops = {
.map = amdgpu_irqdomain_map,
};
/**
- * amdgpu_irq_add_domain - create a linear irq domain
+ * amdgpu_irq_add_domain - create a linear IRQ domain
*
* @adev: amdgpu device pointer
*
- * Create an irq domain for GPU interrupt sources
+ * Creates an IRQ domain for GPU interrupt sources
* that may be driven by another driver (e.g., ACP).
+ *
+ * Returns:
+ * 0 on success or error code otherwise
*/
int amdgpu_irq_add_domain(struct amdgpu_device *adev)
{
@@ -539,11 +643,11 @@ int amdgpu_irq_add_domain(struct amdgpu_device *adev)
}
/**
- * amdgpu_irq_remove_domain - remove the irq domain
+ * amdgpu_irq_remove_domain - remove the IRQ domain
*
* @adev: amdgpu device pointer
*
- * Remove the irq domain for GPU interrupt sources
+ * Removes the IRQ domain for GPU interrupt sources
* that may be driven by another driver (e.g., ACP).
*/
void amdgpu_irq_remove_domain(struct amdgpu_device *adev)
@@ -555,16 +659,17 @@ void amdgpu_irq_remove_domain(struct amdgpu_device *adev)
}
/**
- * amdgpu_irq_create_mapping - create a mapping between a domain irq and a
- * Linux irq
+ * amdgpu_irq_create_mapping - create mapping between domain Linux IRQs
*
* @adev: amdgpu device pointer
* @src_id: IH source id
*
- * Create a mapping between a domain irq (GPU IH src id) and a Linux irq
+ * Creates mapping between a domain IRQ (GPU IH src id) and a Linux IRQ
* Use this for components that generate a GPU interrupt, but are driven
* by a different driver (e.g., ACP).
- * Returns the Linux irq.
+ *
+ * Returns:
+ * Linux IRQ
*/
unsigned amdgpu_irq_create_mapping(struct amdgpu_device *adev, unsigned src_id)
{
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.h
index 3375ad778edc..f6ce171cb8aa 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.h
@@ -25,19 +25,38 @@
#define __AMDGPU_IRQ_H__
#include <linux/irqdomain.h>
+#include "soc15_ih_clientid.h"
#include "amdgpu_ih.h"
-#define AMDGPU_MAX_IRQ_SRC_ID 0x100
+#define AMDGPU_MAX_IRQ_SRC_ID 0x100
#define AMDGPU_MAX_IRQ_CLIENT_ID 0x100
+#define AMDGPU_IRQ_CLIENTID_LEGACY 0
+#define AMDGPU_IRQ_CLIENTID_MAX SOC15_IH_CLIENTID_MAX
+
+#define AMDGPU_IRQ_SRC_DATA_MAX_SIZE_DW 4
+
struct amdgpu_device;
-struct amdgpu_iv_entry;
enum amdgpu_interrupt_state {
AMDGPU_IRQ_STATE_DISABLE,
AMDGPU_IRQ_STATE_ENABLE,
};
+struct amdgpu_iv_entry {
+ unsigned client_id;
+ unsigned src_id;
+ unsigned ring_id;
+ unsigned vmid;
+ unsigned vmid_src;
+ uint64_t timestamp;
+ unsigned timestamp_src;
+ unsigned pasid;
+ unsigned pasid_src;
+ unsigned src_data[AMDGPU_IRQ_SRC_DATA_MAX_SIZE_DW];
+ const uint32_t *iv_entry;
+};
+
struct amdgpu_irq_src {
unsigned num_types;
atomic_t *enabled_types;
@@ -63,7 +82,7 @@ struct amdgpu_irq {
bool installed;
spinlock_t lock;
/* interrupt sources */
- struct amdgpu_irq_client client[AMDGPU_IH_CLIENTID_MAX];
+ struct amdgpu_irq_client client[AMDGPU_IRQ_CLIENTID_MAX];
/* status, etc. */
bool msi_enabled; /* msi enabled */
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
index 2bd56760c744..755f733bf0d9 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
@@ -30,14 +30,21 @@
static void amdgpu_job_timedout(struct drm_sched_job *s_job)
{
- struct amdgpu_job *job = container_of(s_job, struct amdgpu_job, base);
+ struct amdgpu_ring *ring = to_amdgpu_ring(s_job->sched);
+ struct amdgpu_job *job = to_amdgpu_job(s_job);
- DRM_ERROR("ring %s timeout, last signaled seq=%u, last emitted seq=%u\n",
- job->base.sched->name,
- atomic_read(&job->ring->fence_drv.last_seq),
- job->ring->fence_drv.sync_seq);
+ if (amdgpu_ring_soft_recovery(ring, job->vmid, s_job->s_fence->parent)) {
+ DRM_ERROR("ring %s timeout, but soft recovered\n",
+ s_job->sched->name);
+ return;
+ }
+
+ DRM_ERROR("ring %s timeout, signaled seq=%u, emitted seq=%u\n",
+ job->base.sched->name, atomic_read(&ring->fence_drv.last_seq),
+ ring->fence_drv.sync_seq);
- amdgpu_device_gpu_recover(job->adev, job, false);
+ if (amdgpu_device_should_recover_gpu(ring->adev))
+ amdgpu_device_gpu_recover(ring->adev, job);
}
int amdgpu_job_alloc(struct amdgpu_device *adev, unsigned num_ibs,
@@ -54,7 +61,11 @@ int amdgpu_job_alloc(struct amdgpu_device *adev, unsigned num_ibs,
if (!*job)
return -ENOMEM;
- (*job)->adev = adev;
+ /*
+ * Initialize the scheduler to at least some ring so that we always
+ * have a pointer to adev.
+ */
+ (*job)->base.sched = &adev->rings[0]->sched;
(*job)->vm = vm;
(*job)->ibs = (void *)&(*job)[1];
(*job)->num_ibs = num_ibs;
@@ -62,6 +73,7 @@ int amdgpu_job_alloc(struct amdgpu_device *adev, unsigned num_ibs,
amdgpu_sync_create(&(*job)->sync);
amdgpu_sync_create(&(*job)->sched_sync);
(*job)->vram_lost_counter = atomic_read(&adev->vram_lost_counter);
+ (*job)->vm_pd_addr = AMDGPU_BO_INVALID_OFFSET;
return 0;
}
@@ -78,14 +90,13 @@ int amdgpu_job_alloc_with_ib(struct amdgpu_device *adev, unsigned size,
r = amdgpu_ib_get(adev, NULL, size, &(*job)->ibs[0]);
if (r)
kfree(*job);
- else
- (*job)->vm_pd_addr = adev->gart.table_addr;
return r;
}
void amdgpu_job_free_resources(struct amdgpu_job *job)
{
+ struct amdgpu_ring *ring = to_amdgpu_ring(job->base.sched);
struct dma_fence *f;
unsigned i;
@@ -93,14 +104,15 @@ void amdgpu_job_free_resources(struct amdgpu_job *job)
f = job->base.s_fence ? &job->base.s_fence->finished : job->fence;
for (i = 0; i < job->num_ibs; ++i)
- amdgpu_ib_free(job->adev, &job->ibs[i], f);
+ amdgpu_ib_free(ring->adev, &job->ibs[i], f);
}
static void amdgpu_job_free_cb(struct drm_sched_job *s_job)
{
- struct amdgpu_job *job = container_of(s_job, struct amdgpu_job, base);
+ struct amdgpu_ring *ring = to_amdgpu_ring(s_job->sched);
+ struct amdgpu_job *job = to_amdgpu_job(s_job);
- amdgpu_ring_priority_put(job->ring, s_job->s_priority);
+ amdgpu_ring_priority_put(ring, s_job->s_priority);
dma_fence_put(job->fence);
amdgpu_sync_free(&job->sync);
amdgpu_sync_free(&job->sched_sync);
@@ -117,50 +129,68 @@ void amdgpu_job_free(struct amdgpu_job *job)
kfree(job);
}
-int amdgpu_job_submit(struct amdgpu_job *job, struct amdgpu_ring *ring,
- struct drm_sched_entity *entity, void *owner,
- struct dma_fence **f)
+int amdgpu_job_submit(struct amdgpu_job *job, struct drm_sched_entity *entity,
+ void *owner, struct dma_fence **f)
{
+ enum drm_sched_priority priority;
+ struct amdgpu_ring *ring;
int r;
- job->ring = ring;
if (!f)
return -EINVAL;
- r = drm_sched_job_init(&job->base, &ring->sched, entity, owner);
+ r = drm_sched_job_init(&job->base, entity, owner);
if (r)
return r;
job->owner = owner;
- job->fence_ctx = entity->fence_context;
*f = dma_fence_get(&job->base.s_fence->finished);
amdgpu_job_free_resources(job);
- amdgpu_ring_priority_get(job->ring, job->base.s_priority);
+ priority = job->base.s_priority;
drm_sched_entity_push_job(&job->base, entity);
+ ring = to_amdgpu_ring(entity->rq->sched);
+ amdgpu_ring_priority_get(ring, priority);
+
+ return 0;
+}
+
+int amdgpu_job_submit_direct(struct amdgpu_job *job, struct amdgpu_ring *ring,
+ struct dma_fence **fence)
+{
+ int r;
+
+ job->base.sched = &ring->sched;
+ r = amdgpu_ib_schedule(ring, job->num_ibs, job->ibs, NULL, fence);
+ job->fence = dma_fence_get(*fence);
+ if (r)
+ return r;
+
+ amdgpu_job_free(job);
return 0;
}
static struct dma_fence *amdgpu_job_dependency(struct drm_sched_job *sched_job,
struct drm_sched_entity *s_entity)
{
+ struct amdgpu_ring *ring = to_amdgpu_ring(s_entity->rq->sched);
struct amdgpu_job *job = to_amdgpu_job(sched_job);
struct amdgpu_vm *vm = job->vm;
+ struct dma_fence *fence;
bool explicit = false;
int r;
- struct dma_fence *fence = amdgpu_sync_get_fence(&job->sync, &explicit);
+ fence = amdgpu_sync_get_fence(&job->sync, &explicit);
if (fence && explicit) {
if (drm_sched_dependency_optimized(fence, s_entity)) {
- r = amdgpu_sync_fence(job->adev, &job->sched_sync, fence, false);
+ r = amdgpu_sync_fence(ring->adev, &job->sched_sync,
+ fence, false);
if (r)
- DRM_ERROR("Error adding fence to sync (%d)\n", r);
+ DRM_ERROR("Error adding fence (%d)\n", r);
}
}
while (fence == NULL && vm && !job->vmid) {
- struct amdgpu_ring *ring = job->ring;
-
r = amdgpu_vmid_grab(vm, ring, &job->sync,
&job->base.s_fence->finished,
job);
@@ -175,30 +205,25 @@ static struct dma_fence *amdgpu_job_dependency(struct drm_sched_job *sched_job,
static struct dma_fence *amdgpu_job_run(struct drm_sched_job *sched_job)
{
+ struct amdgpu_ring *ring = to_amdgpu_ring(sched_job->sched);
struct dma_fence *fence = NULL, *finished;
- struct amdgpu_device *adev;
struct amdgpu_job *job;
int r;
- if (!sched_job) {
- DRM_ERROR("job is null\n");
- return NULL;
- }
job = to_amdgpu_job(sched_job);
finished = &job->base.s_fence->finished;
- adev = job->adev;
BUG_ON(amdgpu_sync_peek_fence(&job->sync, NULL));
trace_amdgpu_sched_run_job(job);
- if (job->vram_lost_counter != atomic_read(&adev->vram_lost_counter))
+ if (job->vram_lost_counter != atomic_read(&ring->adev->vram_lost_counter))
dma_fence_set_error(finished, -ECANCELED);/* skip IB as well if VRAM lost */
if (finished->error < 0) {
DRM_INFO("Skip scheduling IBs!\n");
} else {
- r = amdgpu_ib_schedule(job->ring, job->num_ibs, job->ibs, job,
+ r = amdgpu_ib_schedule(ring, job->num_ibs, job->ibs, job,
&fence);
if (r)
DRM_ERROR("Error scheduling IBs (%d)\n", r);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.h
new file mode 100644
index 000000000000..57cfe78a262b
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.h
@@ -0,0 +1,74 @@
+/*
+ * Copyright 2018 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+#ifndef __AMDGPU_JOB_H__
+#define __AMDGPU_JOB_H__
+
+/* bit set means command submit involves a preamble IB */
+#define AMDGPU_PREAMBLE_IB_PRESENT (1 << 0)
+/* bit set means preamble IB is first presented in belonging context */
+#define AMDGPU_PREAMBLE_IB_PRESENT_FIRST (1 << 1)
+/* bit set means context switch occured */
+#define AMDGPU_HAVE_CTX_SWITCH (1 << 2)
+
+#define to_amdgpu_job(sched_job) \
+ container_of((sched_job), struct amdgpu_job, base)
+
+struct amdgpu_fence;
+
+struct amdgpu_job {
+ struct drm_sched_job base;
+ struct amdgpu_vm *vm;
+ struct amdgpu_sync sync;
+ struct amdgpu_sync sched_sync;
+ struct amdgpu_ib *ibs;
+ struct dma_fence *fence; /* the hw fence */
+ uint32_t preamble_status;
+ uint32_t num_ibs;
+ void *owner;
+ bool vm_needs_flush;
+ uint64_t vm_pd_addr;
+ unsigned vmid;
+ unsigned pasid;
+ uint32_t gds_base, gds_size;
+ uint32_t gws_base, gws_size;
+ uint32_t oa_base, oa_size;
+ uint32_t vram_lost_counter;
+
+ /* user fence handling */
+ uint64_t uf_addr;
+ uint64_t uf_sequence;
+
+};
+
+int amdgpu_job_alloc(struct amdgpu_device *adev, unsigned num_ibs,
+ struct amdgpu_job **job, struct amdgpu_vm *vm);
+int amdgpu_job_alloc_with_ib(struct amdgpu_device *adev, unsigned size,
+ struct amdgpu_job **job);
+
+void amdgpu_job_free_resources(struct amdgpu_job *job);
+void amdgpu_job_free(struct amdgpu_job *job);
+int amdgpu_job_submit(struct amdgpu_job *job, struct drm_sched_entity *entity,
+ void *owner, struct dma_fence **f);
+int amdgpu_job_submit_direct(struct amdgpu_job *job, struct amdgpu_ring *ring,
+ struct dma_fence **fence);
+#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
index 91517b166a3b..81732a84c2ab 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
@@ -37,6 +37,32 @@
#include <linux/slab.h>
#include <linux/pm_runtime.h>
#include "amdgpu_amdkfd.h"
+#include "amdgpu_gem.h"
+#include "amdgpu_display.h"
+
+static void amdgpu_unregister_gpu_instance(struct amdgpu_device *adev)
+{
+ struct amdgpu_gpu_instance *gpu_instance;
+ int i;
+
+ mutex_lock(&mgpu_info.mutex);
+
+ for (i = 0; i < mgpu_info.num_gpu; i++) {
+ gpu_instance = &(mgpu_info.gpu_ins[i]);
+ if (gpu_instance->adev == adev) {
+ mgpu_info.gpu_ins[i] =
+ mgpu_info.gpu_ins[mgpu_info.num_gpu - 1];
+ mgpu_info.num_gpu--;
+ if (adev->flags & AMD_IS_APU)
+ mgpu_info.num_apu--;
+ else
+ mgpu_info.num_dgpu--;
+ break;
+ }
+ }
+
+ mutex_unlock(&mgpu_info.mutex);
+}
/**
* amdgpu_driver_unload_kms - Main unload function for KMS.
@@ -53,6 +79,8 @@ void amdgpu_driver_unload_kms(struct drm_device *dev)
if (adev == NULL)
return;
+ amdgpu_unregister_gpu_instance(adev);
+
if (adev->rmmio == NULL)
goto done_free;
@@ -73,6 +101,31 @@ done_free:
dev->dev_private = NULL;
}
+static void amdgpu_register_gpu_instance(struct amdgpu_device *adev)
+{
+ struct amdgpu_gpu_instance *gpu_instance;
+
+ mutex_lock(&mgpu_info.mutex);
+
+ if (mgpu_info.num_gpu >= MAX_GPU_INSTANCE) {
+ DRM_ERROR("Cannot register more gpu instance\n");
+ mutex_unlock(&mgpu_info.mutex);
+ return;
+ }
+
+ gpu_instance = &(mgpu_info.gpu_ins[mgpu_info.num_gpu]);
+ gpu_instance->adev = adev;
+ gpu_instance->mgpu_fan_enabled = 0;
+
+ mgpu_info.num_gpu++;
+ if (adev->flags & AMD_IS_APU)
+ mgpu_info.num_apu++;
+ else
+ mgpu_info.num_dgpu++;
+
+ mutex_unlock(&mgpu_info.mutex);
+}
+
/**
* amdgpu_driver_load_kms - Main load function for KMS.
*
@@ -167,6 +220,7 @@ int amdgpu_driver_load_kms(struct drm_device *dev, unsigned long flags)
pm_runtime_put_autosuspend(dev->dev);
}
+ amdgpu_register_gpu_instance(adev);
out:
if (r) {
/* balance pm_runtime_get_sync in amdgpu_driver_unload_kms */
@@ -255,12 +309,133 @@ static int amdgpu_firmware_info(struct drm_amdgpu_info_firmware *fw_info,
fw_info->ver = adev->psp.asd_fw_version;
fw_info->feature = adev->psp.asd_feature_version;
break;
+ case AMDGPU_INFO_FW_DMCU:
+ fw_info->ver = adev->dm.dmcu_fw_version;
+ fw_info->feature = 0;
+ break;
default:
return -EINVAL;
}
return 0;
}
+static int amdgpu_hw_ip_info(struct amdgpu_device *adev,
+ struct drm_amdgpu_info *info,
+ struct drm_amdgpu_info_hw_ip *result)
+{
+ uint32_t ib_start_alignment = 0;
+ uint32_t ib_size_alignment = 0;
+ enum amd_ip_block_type type;
+ unsigned int num_rings = 0;
+ unsigned int i, j;
+
+ if (info->query_hw_ip.ip_instance >= AMDGPU_HW_IP_INSTANCE_MAX_COUNT)
+ return -EINVAL;
+
+ switch (info->query_hw_ip.type) {
+ case AMDGPU_HW_IP_GFX:
+ type = AMD_IP_BLOCK_TYPE_GFX;
+ for (i = 0; i < adev->gfx.num_gfx_rings; i++)
+ if (adev->gfx.gfx_ring[i].ready)
+ ++num_rings;
+ ib_start_alignment = 32;
+ ib_size_alignment = 32;
+ break;
+ case AMDGPU_HW_IP_COMPUTE:
+ type = AMD_IP_BLOCK_TYPE_GFX;
+ for (i = 0; i < adev->gfx.num_compute_rings; i++)
+ if (adev->gfx.compute_ring[i].ready)
+ ++num_rings;
+ ib_start_alignment = 32;
+ ib_size_alignment = 32;
+ break;
+ case AMDGPU_HW_IP_DMA:
+ type = AMD_IP_BLOCK_TYPE_SDMA;
+ for (i = 0; i < adev->sdma.num_instances; i++)
+ if (adev->sdma.instance[i].ring.ready)
+ ++num_rings;
+ ib_start_alignment = 256;
+ ib_size_alignment = 4;
+ break;
+ case AMDGPU_HW_IP_UVD:
+ type = AMD_IP_BLOCK_TYPE_UVD;
+ for (i = 0; i < adev->uvd.num_uvd_inst; i++) {
+ if (adev->uvd.harvest_config & (1 << i))
+ continue;
+
+ if (adev->uvd.inst[i].ring.ready)
+ ++num_rings;
+ }
+ ib_start_alignment = 64;
+ ib_size_alignment = 64;
+ break;
+ case AMDGPU_HW_IP_VCE:
+ type = AMD_IP_BLOCK_TYPE_VCE;
+ for (i = 0; i < adev->vce.num_rings; i++)
+ if (adev->vce.ring[i].ready)
+ ++num_rings;
+ ib_start_alignment = 4;
+ ib_size_alignment = 1;
+ break;
+ case AMDGPU_HW_IP_UVD_ENC:
+ type = AMD_IP_BLOCK_TYPE_UVD;
+ for (i = 0; i < adev->uvd.num_uvd_inst; i++) {
+ if (adev->uvd.harvest_config & (1 << i))
+ continue;
+
+ for (j = 0; j < adev->uvd.num_enc_rings; j++)
+ if (adev->uvd.inst[i].ring_enc[j].ready)
+ ++num_rings;
+ }
+ ib_start_alignment = 64;
+ ib_size_alignment = 64;
+ break;
+ case AMDGPU_HW_IP_VCN_DEC:
+ type = AMD_IP_BLOCK_TYPE_VCN;
+ if (adev->vcn.ring_dec.ready)
+ ++num_rings;
+ ib_start_alignment = 16;
+ ib_size_alignment = 16;
+ break;
+ case AMDGPU_HW_IP_VCN_ENC:
+ type = AMD_IP_BLOCK_TYPE_VCN;
+ for (i = 0; i < adev->vcn.num_enc_rings; i++)
+ if (adev->vcn.ring_enc[i].ready)
+ ++num_rings;
+ ib_start_alignment = 64;
+ ib_size_alignment = 1;
+ break;
+ case AMDGPU_HW_IP_VCN_JPEG:
+ type = AMD_IP_BLOCK_TYPE_VCN;
+ if (adev->vcn.ring_jpeg.ready)
+ ++num_rings;
+ ib_start_alignment = 16;
+ ib_size_alignment = 16;
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ for (i = 0; i < adev->num_ip_blocks; i++)
+ if (adev->ip_blocks[i].version->type == type &&
+ adev->ip_blocks[i].status.valid)
+ break;
+
+ if (i == adev->num_ip_blocks)
+ return 0;
+
+ num_rings = min(amdgpu_ctx_num_entities[info->query_hw_ip.type],
+ num_rings);
+
+ result->hw_ip_version_major = adev->ip_blocks[i].version->major;
+ result->hw_ip_version_minor = adev->ip_blocks[i].version->minor;
+ result->capabilities_flags = 0;
+ result->available_rings = (1 << num_rings) - 1;
+ result->ib_start_alignment = ib_start_alignment;
+ result->ib_size_alignment = ib_size_alignment;
+ return 0;
+}
+
/*
* Userspace get information ioctl
*/
@@ -286,7 +461,7 @@ static int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file
struct drm_crtc *crtc;
uint32_t ui32 = 0;
uint64_t ui64 = 0;
- int i, j, found;
+ int i, found;
int ui32_size = sizeof(ui32);
if (!info->return_size || !info->return_pointer)
@@ -316,91 +491,14 @@ static int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file
return copy_to_user(out, &ui32, min(size, 4u)) ? -EFAULT : 0;
case AMDGPU_INFO_HW_IP_INFO: {
struct drm_amdgpu_info_hw_ip ip = {};
- enum amd_ip_block_type type;
- uint32_t ring_mask = 0;
- uint32_t ib_start_alignment = 0;
- uint32_t ib_size_alignment = 0;
-
- if (info->query_hw_ip.ip_instance >= AMDGPU_HW_IP_INSTANCE_MAX_COUNT)
- return -EINVAL;
+ int ret;
- switch (info->query_hw_ip.type) {
- case AMDGPU_HW_IP_GFX:
- type = AMD_IP_BLOCK_TYPE_GFX;
- for (i = 0; i < adev->gfx.num_gfx_rings; i++)
- ring_mask |= ((adev->gfx.gfx_ring[i].ready ? 1 : 0) << i);
- ib_start_alignment = AMDGPU_GPU_PAGE_SIZE;
- ib_size_alignment = 8;
- break;
- case AMDGPU_HW_IP_COMPUTE:
- type = AMD_IP_BLOCK_TYPE_GFX;
- for (i = 0; i < adev->gfx.num_compute_rings; i++)
- ring_mask |= ((adev->gfx.compute_ring[i].ready ? 1 : 0) << i);
- ib_start_alignment = AMDGPU_GPU_PAGE_SIZE;
- ib_size_alignment = 8;
- break;
- case AMDGPU_HW_IP_DMA:
- type = AMD_IP_BLOCK_TYPE_SDMA;
- for (i = 0; i < adev->sdma.num_instances; i++)
- ring_mask |= ((adev->sdma.instance[i].ring.ready ? 1 : 0) << i);
- ib_start_alignment = AMDGPU_GPU_PAGE_SIZE;
- ib_size_alignment = 1;
- break;
- case AMDGPU_HW_IP_UVD:
- type = AMD_IP_BLOCK_TYPE_UVD;
- for (i = 0; i < adev->uvd.num_uvd_inst; i++)
- ring_mask |= ((adev->uvd.inst[i].ring.ready ? 1 : 0) << i);
- ib_start_alignment = AMDGPU_GPU_PAGE_SIZE;
- ib_size_alignment = 16;
- break;
- case AMDGPU_HW_IP_VCE:
- type = AMD_IP_BLOCK_TYPE_VCE;
- for (i = 0; i < adev->vce.num_rings; i++)
- ring_mask |= ((adev->vce.ring[i].ready ? 1 : 0) << i);
- ib_start_alignment = AMDGPU_GPU_PAGE_SIZE;
- ib_size_alignment = 1;
- break;
- case AMDGPU_HW_IP_UVD_ENC:
- type = AMD_IP_BLOCK_TYPE_UVD;
- for (i = 0; i < adev->uvd.num_uvd_inst; i++)
- for (j = 0; j < adev->uvd.num_enc_rings; j++)
- ring_mask |=
- ((adev->uvd.inst[i].ring_enc[j].ready ? 1 : 0) <<
- (j + i * adev->uvd.num_enc_rings));
- ib_start_alignment = AMDGPU_GPU_PAGE_SIZE;
- ib_size_alignment = 1;
- break;
- case AMDGPU_HW_IP_VCN_DEC:
- type = AMD_IP_BLOCK_TYPE_VCN;
- ring_mask = adev->vcn.ring_dec.ready ? 1 : 0;
- ib_start_alignment = AMDGPU_GPU_PAGE_SIZE;
- ib_size_alignment = 16;
- break;
- case AMDGPU_HW_IP_VCN_ENC:
- type = AMD_IP_BLOCK_TYPE_VCN;
- for (i = 0; i < adev->vcn.num_enc_rings; i++)
- ring_mask |= ((adev->vcn.ring_enc[i].ready ? 1 : 0) << i);
- ib_start_alignment = AMDGPU_GPU_PAGE_SIZE;
- ib_size_alignment = 1;
- break;
- default:
- return -EINVAL;
- }
+ ret = amdgpu_hw_ip_info(adev, info, &ip);
+ if (ret)
+ return ret;
- for (i = 0; i < adev->num_ip_blocks; i++) {
- if (adev->ip_blocks[i].version->type == type &&
- adev->ip_blocks[i].status.valid) {
- ip.hw_ip_version_major = adev->ip_blocks[i].version->major;
- ip.hw_ip_version_minor = adev->ip_blocks[i].version->minor;
- ip.capabilities_flags = 0;
- ip.available_rings = ring_mask;
- ip.ib_start_alignment = ib_start_alignment;
- ip.ib_size_alignment = ib_size_alignment;
- break;
- }
- }
- return copy_to_user(out, &ip,
- min((size_t)size, sizeof(ip))) ? -EFAULT : 0;
+ ret = copy_to_user(out, &ip, min((size_t)size, sizeof(ip)));
+ return ret ? -EFAULT : 0;
}
case AMDGPU_INFO_HW_IP_COUNT: {
enum amd_ip_block_type type;
@@ -427,6 +525,7 @@ static int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file
break;
case AMDGPU_HW_IP_VCN_DEC:
case AMDGPU_HW_IP_VCN_ENC:
+ case AMDGPU_HW_IP_VCN_JPEG:
type = AMD_IP_BLOCK_TYPE_VCN;
break;
default:
@@ -481,26 +580,26 @@ static int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file
struct drm_amdgpu_info_gds gds_info;
memset(&gds_info, 0, sizeof(gds_info));
- gds_info.gds_gfx_partition_size = adev->gds.mem.gfx_partition_size >> AMDGPU_GDS_SHIFT;
- gds_info.compute_partition_size = adev->gds.mem.cs_partition_size >> AMDGPU_GDS_SHIFT;
- gds_info.gds_total_size = adev->gds.mem.total_size >> AMDGPU_GDS_SHIFT;
- gds_info.gws_per_gfx_partition = adev->gds.gws.gfx_partition_size >> AMDGPU_GWS_SHIFT;
- gds_info.gws_per_compute_partition = adev->gds.gws.cs_partition_size >> AMDGPU_GWS_SHIFT;
- gds_info.oa_per_gfx_partition = adev->gds.oa.gfx_partition_size >> AMDGPU_OA_SHIFT;
- gds_info.oa_per_compute_partition = adev->gds.oa.cs_partition_size >> AMDGPU_OA_SHIFT;
+ gds_info.gds_gfx_partition_size = adev->gds.mem.gfx_partition_size;
+ gds_info.compute_partition_size = adev->gds.mem.cs_partition_size;
+ gds_info.gds_total_size = adev->gds.mem.total_size;
+ gds_info.gws_per_gfx_partition = adev->gds.gws.gfx_partition_size;
+ gds_info.gws_per_compute_partition = adev->gds.gws.cs_partition_size;
+ gds_info.oa_per_gfx_partition = adev->gds.oa.gfx_partition_size;
+ gds_info.oa_per_compute_partition = adev->gds.oa.cs_partition_size;
return copy_to_user(out, &gds_info,
min((size_t)size, sizeof(gds_info))) ? -EFAULT : 0;
}
case AMDGPU_INFO_VRAM_GTT: {
struct drm_amdgpu_info_vram_gtt vram_gtt;
- vram_gtt.vram_size = adev->gmc.real_vram_size;
- vram_gtt.vram_size -= adev->vram_pin_size;
- vram_gtt.vram_cpu_accessible_size = adev->gmc.visible_vram_size;
- vram_gtt.vram_cpu_accessible_size -= (adev->vram_pin_size - adev->invisible_pin_size);
+ vram_gtt.vram_size = adev->gmc.real_vram_size -
+ atomic64_read(&adev->vram_pin_size);
+ vram_gtt.vram_cpu_accessible_size = adev->gmc.visible_vram_size -
+ atomic64_read(&adev->visible_pin_size);
vram_gtt.gtt_size = adev->mman.bdev.man[TTM_PL_TT].size;
vram_gtt.gtt_size *= PAGE_SIZE;
- vram_gtt.gtt_size -= adev->gart_pin_size;
+ vram_gtt.gtt_size -= atomic64_read(&adev->gart_pin_size);
return copy_to_user(out, &vram_gtt,
min((size_t)size, sizeof(vram_gtt))) ? -EFAULT : 0;
}
@@ -509,17 +608,16 @@ static int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file
memset(&mem, 0, sizeof(mem));
mem.vram.total_heap_size = adev->gmc.real_vram_size;
- mem.vram.usable_heap_size =
- adev->gmc.real_vram_size - adev->vram_pin_size;
+ mem.vram.usable_heap_size = adev->gmc.real_vram_size -
+ atomic64_read(&adev->vram_pin_size);
mem.vram.heap_usage =
amdgpu_vram_mgr_usage(&adev->mman.bdev.man[TTM_PL_VRAM]);
mem.vram.max_allocation = mem.vram.usable_heap_size * 3 / 4;
mem.cpu_accessible_vram.total_heap_size =
adev->gmc.visible_vram_size;
- mem.cpu_accessible_vram.usable_heap_size =
- adev->gmc.visible_vram_size -
- (adev->vram_pin_size - adev->invisible_pin_size);
+ mem.cpu_accessible_vram.usable_heap_size = adev->gmc.visible_vram_size -
+ atomic64_read(&adev->visible_pin_size);
mem.cpu_accessible_vram.heap_usage =
amdgpu_vram_mgr_vis_usage(&adev->mman.bdev.man[TTM_PL_VRAM]);
mem.cpu_accessible_vram.max_allocation =
@@ -527,8 +625,8 @@ static int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file
mem.gtt.total_heap_size = adev->mman.bdev.man[TTM_PL_TT].size;
mem.gtt.total_heap_size *= PAGE_SIZE;
- mem.gtt.usable_heap_size = mem.gtt.total_heap_size
- - adev->gart_pin_size;
+ mem.gtt.usable_heap_size = mem.gtt.total_heap_size -
+ atomic64_read(&adev->gart_pin_size);
mem.gtt.heap_usage =
amdgpu_gtt_mgr_usage(&adev->mman.bdev.man[TTM_PL_TT]);
mem.gtt.max_allocation = mem.gtt.usable_heap_size * 3 / 4;
@@ -607,16 +705,17 @@ static int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file
vm_size -= AMDGPU_VA_RESERVED_SIZE;
/* Older VCE FW versions are buggy and can handle only 40bits */
- if (adev->vce.fw_version < AMDGPU_VCE_FW_53_45)
+ if (adev->vce.fw_version &&
+ adev->vce.fw_version < AMDGPU_VCE_FW_53_45)
vm_size = min(vm_size, 1ULL << 40);
dev_info.virtual_address_offset = AMDGPU_VA_RESERVED_SIZE;
dev_info.virtual_address_max =
- min(vm_size, AMDGPU_VA_HOLE_START);
+ min(vm_size, AMDGPU_GMC_HOLE_START);
- if (vm_size > AMDGPU_VA_HOLE_START) {
- dev_info.high_va_offset = AMDGPU_VA_HOLE_END;
- dev_info.high_va_max = AMDGPU_VA_HOLE_END | vm_size;
+ if (vm_size > AMDGPU_GMC_HOLE_START) {
+ dev_info.high_va_offset = AMDGPU_GMC_HOLE_END;
+ dev_info.high_va_max = AMDGPU_GMC_HOLE_END | vm_size;
}
dev_info.virtual_address_alignment = max((int)PAGE_SIZE, AMDGPU_GPU_PAGE_SIZE);
dev_info.pte_fragment_size = (1 << adev->vm_manager.fragment_size) * AMDGPU_GPU_PAGE_SIZE;
@@ -930,12 +1029,11 @@ void amdgpu_driver_postclose_kms(struct drm_device *dev,
return;
pm_runtime_get_sync(dev->dev);
- amdgpu_ctx_mgr_entity_fini(&fpriv->ctx_mgr);
- if (adev->asic_type != CHIP_RAVEN) {
+ if (amdgpu_device_ip_get_ip_block(adev, AMD_IP_BLOCK_TYPE_UVD) != NULL)
amdgpu_uvd_free_handles(adev, file_priv);
+ if (amdgpu_device_ip_get_ip_block(adev, AMD_IP_BLOCK_TYPE_VCE) != NULL)
amdgpu_vce_free_handles(adev, file_priv);
- }
amdgpu_vm_bo_rmv(adev, fpriv->prt_va);
@@ -958,7 +1056,7 @@ void amdgpu_driver_postclose_kms(struct drm_device *dev,
amdgpu_bo_unref(&pd);
idr_for_each_entry(&fpriv->bo_list_handles, list, handle)
- amdgpu_bo_list_free(list);
+ amdgpu_bo_list_put(list);
idr_destroy(&fpriv->bo_list_handles);
mutex_destroy(&fpriv->bo_list_lock);
@@ -1253,6 +1351,14 @@ static int amdgpu_debugfs_firmware_info(struct seq_file *m, void *data)
seq_printf(m, "VCN feature version: %u, firmware version: 0x%08x\n",
fw_info.feature, fw_info.ver);
+ /* DMCU */
+ query_fw.fw_type = AMDGPU_INFO_FW_DMCU;
+ ret = amdgpu_firmware_info(&fw_info, &query_fw, adev);
+ if (ret)
+ return ret;
+ seq_printf(m, "DMCU feature version: %u, firmware version: 0x%08x\n",
+ fw_info.feature, fw_info.ver);
+
seq_printf(m, "VBIOS version: %s\n", ctx->vbios_version);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c
index 83e344fbb50a..e55508b39496 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c
@@ -28,6 +28,21 @@
* Christian König <[email protected]>
*/
+/**
+ * DOC: MMU Notifier
+ *
+ * For coherent userptr handling registers an MMU notifier to inform the driver
+ * about updates on the page tables of a process.
+ *
+ * When somebody tries to invalidate the page tables we block the update until
+ * all operations on the pages in question are completed, then those pages are
+ * marked as accessed and also dirty if it wasn't a read only access.
+ *
+ * New command submissions using the userptrs in question are delayed until all
+ * page table invalidation are completed and we once more see a coherent process
+ * address space.
+ */
+
#include <linux/firmware.h>
#include <linux/module.h>
#include <linux/mmu_notifier.h>
@@ -38,6 +53,22 @@
#include "amdgpu.h"
#include "amdgpu_amdkfd.h"
+/**
+ * struct amdgpu_mn
+ *
+ * @adev: amdgpu device pointer
+ * @mm: process address space
+ * @mn: MMU notifier structure
+ * @type: type of MMU notifier
+ * @work: destruction work item
+ * @node: hash table node to find structure by adev and mn
+ * @lock: rw semaphore protecting the notifier nodes
+ * @objects: interval tree containing amdgpu_mn_nodes
+ * @read_lock: mutex for recursive locking of @lock
+ * @recursion: depth of recursion
+ *
+ * Data for each amdgpu device and process address space.
+ */
struct amdgpu_mn {
/* constant after initialisation */
struct amdgpu_device *adev;
@@ -58,13 +89,21 @@ struct amdgpu_mn {
atomic_t recursion;
};
+/**
+ * struct amdgpu_mn_node
+ *
+ * @it: interval node defining start-last of the affected address range
+ * @bos: list of all BOs in the affected address range
+ *
+ * Manages all BOs which are affected of a certain range of address space.
+ */
struct amdgpu_mn_node {
struct interval_tree_node it;
struct list_head bos;
};
/**
- * amdgpu_mn_destroy - destroy the rmn
+ * amdgpu_mn_destroy - destroy the MMU notifier
*
* @work: previously sheduled work item
*
@@ -72,47 +111,50 @@ struct amdgpu_mn_node {
*/
static void amdgpu_mn_destroy(struct work_struct *work)
{
- struct amdgpu_mn *rmn = container_of(work, struct amdgpu_mn, work);
- struct amdgpu_device *adev = rmn->adev;
+ struct amdgpu_mn *amn = container_of(work, struct amdgpu_mn, work);
+ struct amdgpu_device *adev = amn->adev;
struct amdgpu_mn_node *node, *next_node;
struct amdgpu_bo *bo, *next_bo;
mutex_lock(&adev->mn_lock);
- down_write(&rmn->lock);
- hash_del(&rmn->node);
+ down_write(&amn->lock);
+ hash_del(&amn->node);
rbtree_postorder_for_each_entry_safe(node, next_node,
- &rmn->objects.rb_root, it.rb) {
+ &amn->objects.rb_root, it.rb) {
list_for_each_entry_safe(bo, next_bo, &node->bos, mn_list) {
bo->mn = NULL;
list_del_init(&bo->mn_list);
}
kfree(node);
}
- up_write(&rmn->lock);
+ up_write(&amn->lock);
mutex_unlock(&adev->mn_lock);
- mmu_notifier_unregister_no_release(&rmn->mn, rmn->mm);
- kfree(rmn);
+ mmu_notifier_unregister_no_release(&amn->mn, amn->mm);
+ kfree(amn);
}
/**
* amdgpu_mn_release - callback to notify about mm destruction
*
* @mn: our notifier
- * @mn: the mm this callback is about
+ * @mm: the mm this callback is about
*
* Shedule a work item to lazy destroy our notifier.
*/
static void amdgpu_mn_release(struct mmu_notifier *mn,
struct mm_struct *mm)
{
- struct amdgpu_mn *rmn = container_of(mn, struct amdgpu_mn, mn);
- INIT_WORK(&rmn->work, amdgpu_mn_destroy);
- schedule_work(&rmn->work);
+ struct amdgpu_mn *amn = container_of(mn, struct amdgpu_mn, mn);
+
+ INIT_WORK(&amn->work, amdgpu_mn_destroy);
+ schedule_work(&amn->work);
}
/**
- * amdgpu_mn_lock - take the write side lock for this mn
+ * amdgpu_mn_lock - take the write side lock for this notifier
+ *
+ * @mn: our notifier
*/
void amdgpu_mn_lock(struct amdgpu_mn *mn)
{
@@ -121,7 +163,9 @@ void amdgpu_mn_lock(struct amdgpu_mn *mn)
}
/**
- * amdgpu_mn_unlock - drop the write side lock for this mn
+ * amdgpu_mn_unlock - drop the write side lock for this notifier
+ *
+ * @mn: our notifier
*/
void amdgpu_mn_unlock(struct amdgpu_mn *mn)
{
@@ -130,40 +174,44 @@ void amdgpu_mn_unlock(struct amdgpu_mn *mn)
}
/**
- * amdgpu_mn_read_lock - take the rmn read lock
+ * amdgpu_mn_read_lock - take the read side lock for this notifier
*
- * @rmn: our notifier
- *
- * Take the rmn read side lock.
+ * @amn: our notifier
*/
-static void amdgpu_mn_read_lock(struct amdgpu_mn *rmn)
+static int amdgpu_mn_read_lock(struct amdgpu_mn *amn, bool blockable)
{
- mutex_lock(&rmn->read_lock);
- if (atomic_inc_return(&rmn->recursion) == 1)
- down_read_non_owner(&rmn->lock);
- mutex_unlock(&rmn->read_lock);
+ if (blockable)
+ mutex_lock(&amn->read_lock);
+ else if (!mutex_trylock(&amn->read_lock))
+ return -EAGAIN;
+
+ if (atomic_inc_return(&amn->recursion) == 1)
+ down_read_non_owner(&amn->lock);
+ mutex_unlock(&amn->read_lock);
+
+ return 0;
}
/**
- * amdgpu_mn_read_unlock - drop the rmn read lock
+ * amdgpu_mn_read_unlock - drop the read side lock for this notifier
*
- * @rmn: our notifier
- *
- * Drop the rmn read side lock.
+ * @amn: our notifier
*/
-static void amdgpu_mn_read_unlock(struct amdgpu_mn *rmn)
+static void amdgpu_mn_read_unlock(struct amdgpu_mn *amn)
{
- if (atomic_dec_return(&rmn->recursion) == 0)
- up_read_non_owner(&rmn->lock);
+ if (atomic_dec_return(&amn->recursion) == 0)
+ up_read_non_owner(&amn->lock);
}
/**
* amdgpu_mn_invalidate_node - unmap all BOs of a node
*
* @node: the node with the BOs to unmap
+ * @start: start of address range affected
+ * @end: end of address range affected
*
- * We block for all BOs and unmap them by move them
- * into system domain again.
+ * Block for operations on BOs to finish and mark pages as accessed and
+ * potentially dirty.
*/
static void amdgpu_mn_invalidate_node(struct amdgpu_mn_node *node,
unsigned long start,
@@ -190,42 +238,54 @@ static void amdgpu_mn_invalidate_node(struct amdgpu_mn_node *node,
* amdgpu_mn_invalidate_range_start_gfx - callback to notify about mm change
*
* @mn: our notifier
- * @mn: the mm this callback is about
+ * @mm: the mm this callback is about
* @start: start of updated range
* @end: end of updated range
*
- * We block for all BOs between start and end to be idle and
- * unmap them by move them into system domain again.
+ * Block for operations on BOs to finish and mark pages as accessed and
+ * potentially dirty.
*/
-static void amdgpu_mn_invalidate_range_start_gfx(struct mmu_notifier *mn,
+static int amdgpu_mn_invalidate_range_start_gfx(struct mmu_notifier *mn,
struct mm_struct *mm,
unsigned long start,
- unsigned long end)
+ unsigned long end,
+ bool blockable)
{
- struct amdgpu_mn *rmn = container_of(mn, struct amdgpu_mn, mn);
+ struct amdgpu_mn *amn = container_of(mn, struct amdgpu_mn, mn);
struct interval_tree_node *it;
/* notification is exclusive, but interval is inclusive */
end -= 1;
- amdgpu_mn_read_lock(rmn);
+ /* TODO we should be able to split locking for interval tree and
+ * amdgpu_mn_invalidate_node
+ */
+ if (amdgpu_mn_read_lock(amn, blockable))
+ return -EAGAIN;
- it = interval_tree_iter_first(&rmn->objects, start, end);
+ it = interval_tree_iter_first(&amn->objects, start, end);
while (it) {
struct amdgpu_mn_node *node;
+ if (!blockable) {
+ amdgpu_mn_read_unlock(amn);
+ return -EAGAIN;
+ }
+
node = container_of(it, struct amdgpu_mn_node, it);
it = interval_tree_iter_next(it, start, end);
amdgpu_mn_invalidate_node(node, start, end);
}
+
+ return 0;
}
/**
* amdgpu_mn_invalidate_range_start_hsa - callback to notify about mm change
*
* @mn: our notifier
- * @mn: the mm this callback is about
+ * @mm: the mm this callback is about
* @start: start of updated range
* @end: end of updated range
*
@@ -233,24 +293,31 @@ static void amdgpu_mn_invalidate_range_start_gfx(struct mmu_notifier *mn,
* necessitates evicting all user-mode queues of the process. The BOs
* are restorted in amdgpu_mn_invalidate_range_end_hsa.
*/
-static void amdgpu_mn_invalidate_range_start_hsa(struct mmu_notifier *mn,
+static int amdgpu_mn_invalidate_range_start_hsa(struct mmu_notifier *mn,
struct mm_struct *mm,
unsigned long start,
- unsigned long end)
+ unsigned long end,
+ bool blockable)
{
- struct amdgpu_mn *rmn = container_of(mn, struct amdgpu_mn, mn);
+ struct amdgpu_mn *amn = container_of(mn, struct amdgpu_mn, mn);
struct interval_tree_node *it;
/* notification is exclusive, but interval is inclusive */
end -= 1;
- amdgpu_mn_read_lock(rmn);
+ if (amdgpu_mn_read_lock(amn, blockable))
+ return -EAGAIN;
- it = interval_tree_iter_first(&rmn->objects, start, end);
+ it = interval_tree_iter_first(&amn->objects, start, end);
while (it) {
struct amdgpu_mn_node *node;
struct amdgpu_bo *bo;
+ if (!blockable) {
+ amdgpu_mn_read_unlock(amn);
+ return -EAGAIN;
+ }
+
node = container_of(it, struct amdgpu_mn_node, it);
it = interval_tree_iter_next(it, start, end);
@@ -262,13 +329,15 @@ static void amdgpu_mn_invalidate_range_start_hsa(struct mmu_notifier *mn,
amdgpu_amdkfd_evict_userptr(mem, mm);
}
}
+
+ return 0;
}
/**
* amdgpu_mn_invalidate_range_end - callback to notify about mm change
*
* @mn: our notifier
- * @mn: the mm this callback is about
+ * @mm: the mm this callback is about
* @start: start of updated range
* @end: end of updated range
*
@@ -279,9 +348,9 @@ static void amdgpu_mn_invalidate_range_end(struct mmu_notifier *mn,
unsigned long start,
unsigned long end)
{
- struct amdgpu_mn *rmn = container_of(mn, struct amdgpu_mn, mn);
+ struct amdgpu_mn *amn = container_of(mn, struct amdgpu_mn, mn);
- amdgpu_mn_read_unlock(rmn);
+ amdgpu_mn_read_unlock(amn);
}
static const struct mmu_notifier_ops amdgpu_mn_ops[] = {
@@ -315,7 +384,7 @@ struct amdgpu_mn *amdgpu_mn_get(struct amdgpu_device *adev,
enum amdgpu_mn_type type)
{
struct mm_struct *mm = current->mm;
- struct amdgpu_mn *rmn;
+ struct amdgpu_mn *amn;
unsigned long key = AMDGPU_MN_KEY(mm, type);
int r;
@@ -325,41 +394,41 @@ struct amdgpu_mn *amdgpu_mn_get(struct amdgpu_device *adev,
return ERR_PTR(-EINTR);
}
- hash_for_each_possible(adev->mn_hash, rmn, node, key)
- if (AMDGPU_MN_KEY(rmn->mm, rmn->type) == key)
+ hash_for_each_possible(adev->mn_hash, amn, node, key)
+ if (AMDGPU_MN_KEY(amn->mm, amn->type) == key)
goto release_locks;
- rmn = kzalloc(sizeof(*rmn), GFP_KERNEL);
- if (!rmn) {
- rmn = ERR_PTR(-ENOMEM);
+ amn = kzalloc(sizeof(*amn), GFP_KERNEL);
+ if (!amn) {
+ amn = ERR_PTR(-ENOMEM);
goto release_locks;
}
- rmn->adev = adev;
- rmn->mm = mm;
- init_rwsem(&rmn->lock);
- rmn->type = type;
- rmn->mn.ops = &amdgpu_mn_ops[type];
- rmn->objects = RB_ROOT_CACHED;
- mutex_init(&rmn->read_lock);
- atomic_set(&rmn->recursion, 0);
+ amn->adev = adev;
+ amn->mm = mm;
+ init_rwsem(&amn->lock);
+ amn->type = type;
+ amn->mn.ops = &amdgpu_mn_ops[type];
+ amn->objects = RB_ROOT_CACHED;
+ mutex_init(&amn->read_lock);
+ atomic_set(&amn->recursion, 0);
- r = __mmu_notifier_register(&rmn->mn, mm);
+ r = __mmu_notifier_register(&amn->mn, mm);
if (r)
- goto free_rmn;
+ goto free_amn;
- hash_add(adev->mn_hash, &rmn->node, AMDGPU_MN_KEY(mm, type));
+ hash_add(adev->mn_hash, &amn->node, AMDGPU_MN_KEY(mm, type));
release_locks:
up_write(&mm->mmap_sem);
mutex_unlock(&adev->mn_lock);
- return rmn;
+ return amn;
-free_rmn:
+free_amn:
up_write(&mm->mmap_sem);
mutex_unlock(&adev->mn_lock);
- kfree(rmn);
+ kfree(amn);
return ERR_PTR(r);
}
@@ -379,14 +448,14 @@ int amdgpu_mn_register(struct amdgpu_bo *bo, unsigned long addr)
struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
enum amdgpu_mn_type type =
bo->kfd_bo ? AMDGPU_MN_TYPE_HSA : AMDGPU_MN_TYPE_GFX;
- struct amdgpu_mn *rmn;
+ struct amdgpu_mn *amn;
struct amdgpu_mn_node *node = NULL, *new_node;
struct list_head bos;
struct interval_tree_node *it;
- rmn = amdgpu_mn_get(adev, type);
- if (IS_ERR(rmn))
- return PTR_ERR(rmn);
+ amn = amdgpu_mn_get(adev, type);
+ if (IS_ERR(amn))
+ return PTR_ERR(amn);
new_node = kmalloc(sizeof(*new_node), GFP_KERNEL);
if (!new_node)
@@ -394,12 +463,12 @@ int amdgpu_mn_register(struct amdgpu_bo *bo, unsigned long addr)
INIT_LIST_HEAD(&bos);
- down_write(&rmn->lock);
+ down_write(&amn->lock);
- while ((it = interval_tree_iter_first(&rmn->objects, addr, end))) {
+ while ((it = interval_tree_iter_first(&amn->objects, addr, end))) {
kfree(node);
node = container_of(it, struct amdgpu_mn_node, it);
- interval_tree_remove(&node->it, &rmn->objects);
+ interval_tree_remove(&node->it, &amn->objects);
addr = min(it->start, addr);
end = max(it->last, end);
list_splice(&node->bos, &bos);
@@ -410,7 +479,7 @@ int amdgpu_mn_register(struct amdgpu_bo *bo, unsigned long addr)
else
kfree(new_node);
- bo->mn = rmn;
+ bo->mn = amn;
node->it.start = addr;
node->it.last = end;
@@ -418,9 +487,9 @@ int amdgpu_mn_register(struct amdgpu_bo *bo, unsigned long addr)
list_splice(&bos, &node->bos);
list_add(&bo->mn_list, &node->bos);
- interval_tree_insert(&node->it, &rmn->objects);
+ interval_tree_insert(&node->it, &amn->objects);
- up_write(&rmn->lock);
+ up_write(&amn->lock);
return 0;
}
@@ -435,18 +504,18 @@ int amdgpu_mn_register(struct amdgpu_bo *bo, unsigned long addr)
void amdgpu_mn_unregister(struct amdgpu_bo *bo)
{
struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
- struct amdgpu_mn *rmn;
+ struct amdgpu_mn *amn;
struct list_head *head;
mutex_lock(&adev->mn_lock);
- rmn = bo->mn;
- if (rmn == NULL) {
+ amn = bo->mn;
+ if (amn == NULL) {
mutex_unlock(&adev->mn_lock);
return;
}
- down_write(&rmn->lock);
+ down_write(&amn->lock);
/* save the next list entry for later */
head = bo->mn_list.next;
@@ -456,12 +525,13 @@ void amdgpu_mn_unregister(struct amdgpu_bo *bo)
if (list_empty(head)) {
struct amdgpu_mn_node *node;
+
node = container_of(head, struct amdgpu_mn_node, bos);
- interval_tree_remove(&node->it, &rmn->objects);
+ interval_tree_remove(&node->it, &amn->objects);
kfree(node);
}
- up_write(&rmn->lock);
+ up_write(&amn->lock);
mutex_unlock(&adev->mn_lock);
}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
index 3526efa8960e..904014dc5915 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
@@ -38,23 +38,48 @@
#include "amdgpu_trace.h"
#include "amdgpu_amdkfd.h"
-static bool amdgpu_need_backup(struct amdgpu_device *adev)
-{
- if (adev->flags & AMD_IS_APU)
- return false;
+/**
+ * DOC: amdgpu_object
+ *
+ * This defines the interfaces to operate on an &amdgpu_bo buffer object which
+ * represents memory used by driver (VRAM, system memory, etc.). The driver
+ * provides DRM/GEM APIs to userspace. DRM/GEM APIs then use these interfaces
+ * to create/destroy/set buffer object which are then managed by the kernel TTM
+ * memory manager.
+ * The interfaces are also used internally by kernel clients, including gfx,
+ * uvd, etc. for kernel managed allocations used by the GPU.
+ *
+ */
- if (amdgpu_gpu_recovery == 0 ||
- (amdgpu_gpu_recovery == -1 && !amdgpu_sriov_vf(adev)))
- return false;
+/**
+ * amdgpu_bo_subtract_pin_size - Remove BO from pin_size accounting
+ *
+ * @bo: &amdgpu_bo buffer object
+ *
+ * This function is called when a BO stops being pinned, and updates the
+ * &amdgpu_device pin_size values accordingly.
+ */
+static void amdgpu_bo_subtract_pin_size(struct amdgpu_bo *bo)
+{
+ struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
- return true;
+ if (bo->tbo.mem.mem_type == TTM_PL_VRAM) {
+ atomic64_sub(amdgpu_bo_size(bo), &adev->vram_pin_size);
+ atomic64_sub(amdgpu_vram_mgr_bo_visible_size(bo),
+ &adev->visible_pin_size);
+ } else if (bo->tbo.mem.mem_type == TTM_PL_TT) {
+ atomic64_sub(amdgpu_bo_size(bo), &adev->gart_pin_size);
+ }
}
-static void amdgpu_ttm_bo_destroy(struct ttm_buffer_object *tbo)
+static void amdgpu_bo_destroy(struct ttm_buffer_object *tbo)
{
struct amdgpu_device *adev = amdgpu_ttm_adev(tbo->bdev);
struct amdgpu_bo *bo = ttm_to_amdgpu_bo(tbo);
+ if (bo->pin_count > 0)
+ amdgpu_bo_subtract_pin_size(bo);
+
if (bo->kfd_bo)
amdgpu_amdkfd_unreserve_system_memory_limit(bo);
@@ -73,14 +98,32 @@ static void amdgpu_ttm_bo_destroy(struct ttm_buffer_object *tbo)
kfree(bo);
}
-bool amdgpu_ttm_bo_is_amdgpu_bo(struct ttm_buffer_object *bo)
+/**
+ * amdgpu_bo_is_amdgpu_bo - check if the buffer object is an &amdgpu_bo
+ * @bo: buffer object to be checked
+ *
+ * Uses destroy function associated with the object to determine if this is
+ * an &amdgpu_bo.
+ *
+ * Returns:
+ * true if the object belongs to &amdgpu_bo, false if not.
+ */
+bool amdgpu_bo_is_amdgpu_bo(struct ttm_buffer_object *bo)
{
- if (bo->destroy == &amdgpu_ttm_bo_destroy)
+ if (bo->destroy == &amdgpu_bo_destroy)
return true;
return false;
}
-void amdgpu_ttm_placement_from_domain(struct amdgpu_bo *abo, u32 domain)
+/**
+ * amdgpu_bo_placement_from_domain - set buffer's placement
+ * @abo: &amdgpu_bo buffer object whose placement is to be set
+ * @domain: requested domain
+ *
+ * Sets buffer's placement according to requested domain and the buffer's
+ * flags.
+ */
+void amdgpu_bo_placement_from_domain(struct amdgpu_bo *abo, u32 domain)
{
struct amdgpu_device *adev = amdgpu_ttm_adev(abo->tbo.bdev);
struct ttm_placement *placement = &abo->placement;
@@ -108,10 +151,7 @@ void amdgpu_ttm_placement_from_domain(struct amdgpu_bo *abo, u32 domain)
if (domain & AMDGPU_GEM_DOMAIN_GTT) {
places[c].fpfn = 0;
- if (flags & AMDGPU_GEM_CREATE_SHADOW)
- places[c].lpfn = adev->gmc.gart_size >> PAGE_SHIFT;
- else
- places[c].lpfn = 0;
+ places[c].lpfn = 0;
places[c].flags = TTM_PL_FLAG_TT;
if (flags & AMDGPU_GEM_CREATE_CPU_GTT_USWC)
places[c].flags |= TTM_PL_FLAG_WC |
@@ -161,6 +201,8 @@ void amdgpu_ttm_placement_from_domain(struct amdgpu_bo *abo, u32 domain)
c++;
}
+ BUG_ON(c >= AMDGPU_BO_MAX_PLACEMENTS);
+
placement->num_placement = c;
placement->placement = places;
@@ -184,7 +226,8 @@ void amdgpu_ttm_placement_from_domain(struct amdgpu_bo *abo, u32 domain)
*
* Note: For bo_ptr new BO is only created if bo_ptr points to NULL.
*
- * Returns 0 on success, negative error code otherwise.
+ * Returns:
+ * 0 on success, negative error code otherwise.
*/
int amdgpu_bo_create_reserved(struct amdgpu_device *adev,
unsigned long size, int align,
@@ -195,6 +238,11 @@ int amdgpu_bo_create_reserved(struct amdgpu_device *adev,
bool free = false;
int r;
+ if (!size) {
+ amdgpu_bo_unref(bo_ptr);
+ return 0;
+ }
+
memset(&bp, 0, sizeof(bp));
bp.size = size;
bp.byte_align = align;
@@ -220,22 +268,33 @@ int amdgpu_bo_create_reserved(struct amdgpu_device *adev,
goto error_free;
}
- r = amdgpu_bo_pin(*bo_ptr, domain, gpu_addr);
+ r = amdgpu_bo_pin(*bo_ptr, domain);
if (r) {
dev_err(adev->dev, "(%d) kernel bo pin failed\n", r);
goto error_unreserve;
}
+ r = amdgpu_ttm_alloc_gart(&(*bo_ptr)->tbo);
+ if (r) {
+ dev_err(adev->dev, "%p bind failed\n", *bo_ptr);
+ goto error_unpin;
+ }
+
+ if (gpu_addr)
+ *gpu_addr = amdgpu_bo_gpu_offset(*bo_ptr);
+
if (cpu_addr) {
r = amdgpu_bo_kmap(*bo_ptr, cpu_addr);
if (r) {
dev_err(adev->dev, "(%d) kernel bo map failed\n", r);
- goto error_unreserve;
+ goto error_unpin;
}
}
return 0;
+error_unpin:
+ amdgpu_bo_unpin(*bo_ptr);
error_unreserve:
amdgpu_bo_unreserve(*bo_ptr);
@@ -261,7 +320,8 @@ error_free:
*
* Note: For bo_ptr new BO is only created if bo_ptr points to NULL.
*
- * Returns 0 on success, negative error code otherwise.
+ * Returns:
+ * 0 on success, negative error code otherwise.
*/
int amdgpu_bo_create_kernel(struct amdgpu_device *adev,
unsigned long size, int align,
@@ -276,7 +336,8 @@ int amdgpu_bo_create_kernel(struct amdgpu_device *adev,
if (r)
return r;
- amdgpu_bo_unreserve(*bo_ptr);
+ if (*bo_ptr)
+ amdgpu_bo_unreserve(*bo_ptr);
return 0;
}
@@ -285,6 +346,8 @@ int amdgpu_bo_create_kernel(struct amdgpu_device *adev,
* amdgpu_bo_free_kernel - free BO for kernel use
*
* @bo: amdgpu BO to free
+ * @gpu_addr: pointer to where the BO's GPU memory space address was stored
+ * @cpu_addr: pointer to where the BO's CPU memory space address was stored
*
* unmaps and unpin a BO for kernel internal use.
*/
@@ -364,7 +427,11 @@ static int amdgpu_bo_do_create(struct amdgpu_device *adev,
int r;
page_align = roundup(bp->byte_align, PAGE_SIZE) >> PAGE_SHIFT;
- size = ALIGN(size, PAGE_SIZE);
+ if (bp->domain & (AMDGPU_GEM_DOMAIN_GDS | AMDGPU_GEM_DOMAIN_GWS |
+ AMDGPU_GEM_DOMAIN_OA))
+ size <<= PAGE_SHIFT;
+ else
+ size = ALIGN(size, PAGE_SIZE);
if (!amdgpu_bo_validate_size(adev, size, bp->domain))
return -ENOMEM;
@@ -379,7 +446,7 @@ static int amdgpu_bo_do_create(struct amdgpu_device *adev,
return -ENOMEM;
drm_gem_private_object_init(adev->ddev, &bo->gem_base, size);
INIT_LIST_HEAD(&bo->shadow_list);
- INIT_LIST_HEAD(&bo->va);
+ bo->vm_bo = NULL;
bo->preferred_domains = bp->preferred_domain ? bp->preferred_domain :
bp->domain;
bo->allowed_domains = bo->preferred_domains;
@@ -418,17 +485,17 @@ static int amdgpu_bo_do_create(struct amdgpu_device *adev,
#endif
bo->tbo.bdev = &adev->mman.bdev;
- amdgpu_ttm_placement_from_domain(bo, bp->domain);
+ amdgpu_bo_placement_from_domain(bo, bp->domain);
if (bp->type == ttm_bo_type_kernel)
bo->tbo.priority = 1;
r = ttm_bo_init_reserved(&adev->mman.bdev, &bo->tbo, size, bp->type,
&bo->placement, page_align, &ctx, acc_size,
- NULL, bp->resv, &amdgpu_ttm_bo_destroy);
+ NULL, bp->resv, &amdgpu_bo_destroy);
if (unlikely(r != 0))
return r;
- if (adev->gmc.visible_vram_size < adev->gmc.real_vram_size &&
+ if (!amdgpu_gmc_vram_full_visible(&adev->gmc) &&
bo->tbo.mem.mem_type == TTM_PL_VRAM &&
bo->tbo.mem.start < adev->gmc.visible_vram_size >> PAGE_SHIFT)
amdgpu_cs_report_moved_bytes(adev, ctx.bytes_moved,
@@ -469,7 +536,7 @@ fail_unreserve:
}
static int amdgpu_bo_create_shadow(struct amdgpu_device *adev,
- unsigned long size, int byte_align,
+ unsigned long size,
struct amdgpu_bo *bo)
{
struct amdgpu_bo_param bp;
@@ -480,7 +547,6 @@ static int amdgpu_bo_create_shadow(struct amdgpu_device *adev,
memset(&bp, 0, sizeof(bp));
bp.size = size;
- bp.byte_align = byte_align;
bp.domain = AMDGPU_GEM_DOMAIN_GTT;
bp.flags = AMDGPU_GEM_CREATE_CPU_GTT_USWC |
AMDGPU_GEM_CREATE_SHADOW;
@@ -491,13 +557,27 @@ static int amdgpu_bo_create_shadow(struct amdgpu_device *adev,
if (!r) {
bo->shadow->parent = amdgpu_bo_ref(bo);
mutex_lock(&adev->shadow_list_lock);
- list_add_tail(&bo->shadow_list, &adev->shadow_list);
+ list_add_tail(&bo->shadow->shadow_list, &adev->shadow_list);
mutex_unlock(&adev->shadow_list_lock);
}
return r;
}
+/**
+ * amdgpu_bo_create - create an &amdgpu_bo buffer object
+ * @adev: amdgpu device object
+ * @bp: parameters to be used for the buffer object
+ * @bo_ptr: pointer to the buffer object pointer
+ *
+ * Creates an &amdgpu_bo buffer object; and if requested, also creates a
+ * shadow object.
+ * Shadow object is used to backup the original buffer object, and is always
+ * in GTT.
+ *
+ * Returns:
+ * 0 for success or a negative error code on failure.
+ */
int amdgpu_bo_create(struct amdgpu_device *adev,
struct amdgpu_bo_param *bp,
struct amdgpu_bo **bo_ptr)
@@ -510,12 +590,12 @@ int amdgpu_bo_create(struct amdgpu_device *adev,
if (r)
return r;
- if ((flags & AMDGPU_GEM_CREATE_SHADOW) && amdgpu_need_backup(adev)) {
+ if ((flags & AMDGPU_GEM_CREATE_SHADOW) && !(adev->flags & AMD_IS_APU)) {
if (!bp->resv)
WARN_ON(reservation_object_lock((*bo_ptr)->tbo.resv,
NULL));
- r = amdgpu_bo_create_shadow(adev, bp->size, bp->byte_align, (*bo_ptr));
+ r = amdgpu_bo_create_shadow(adev, bp->size, *bo_ptr);
if (!bp->resv)
reservation_object_unlock((*bo_ptr)->tbo.resv);
@@ -527,6 +607,21 @@ int amdgpu_bo_create(struct amdgpu_device *adev,
return r;
}
+/**
+ * amdgpu_bo_backup_to_shadow - Backs up an &amdgpu_bo buffer object
+ * @adev: amdgpu device object
+ * @ring: amdgpu_ring for the engine handling the buffer operations
+ * @bo: &amdgpu_bo buffer to be backed up
+ * @resv: reservation object with embedded fence
+ * @fence: dma_fence associated with the operation
+ * @direct: whether to submit the job directly
+ *
+ * Copies an &amdgpu_bo buffer object to its shadow object.
+ * Not used for now.
+ *
+ * Returns:
+ * 0 for success or a negative error code on failure.
+ */
int amdgpu_bo_backup_to_shadow(struct amdgpu_device *adev,
struct amdgpu_ring *ring,
struct amdgpu_bo *bo,
@@ -559,6 +654,18 @@ err:
return r;
}
+/**
+ * amdgpu_bo_validate - validate an &amdgpu_bo buffer object
+ * @bo: pointer to the buffer object
+ *
+ * Sets placement according to domain; and changes placement and caching
+ * policy of the buffer object according to the placement.
+ * This is used for validating shadow bos. It calls ttm_bo_validate() to
+ * make sure the buffer is resident where it needs to be.
+ *
+ * Returns:
+ * 0 for success or a negative error code on failure.
+ */
int amdgpu_bo_validate(struct amdgpu_bo *bo)
{
struct ttm_operation_ctx ctx = { false, false };
@@ -571,7 +678,7 @@ int amdgpu_bo_validate(struct amdgpu_bo *bo)
domain = bo->preferred_domains;
retry:
- amdgpu_ttm_placement_from_domain(bo, domain);
+ amdgpu_bo_placement_from_domain(bo, domain);
r = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
if (unlikely(r == -ENOMEM) && domain != bo->allowed_domains) {
domain = bo->allowed_domains;
@@ -581,38 +688,45 @@ retry:
return r;
}
-int amdgpu_bo_restore_from_shadow(struct amdgpu_device *adev,
- struct amdgpu_ring *ring,
- struct amdgpu_bo *bo,
- struct reservation_object *resv,
- struct dma_fence **fence,
- bool direct)
+/**
+ * amdgpu_bo_restore_shadow - restore an &amdgpu_bo shadow
+ *
+ * @shadow: &amdgpu_bo shadow to be restored
+ * @fence: dma_fence associated with the operation
+ *
+ * Copies a buffer object's shadow content back to the object.
+ * This is used for recovering a buffer from its shadow in case of a gpu
+ * reset where vram context may be lost.
+ *
+ * Returns:
+ * 0 for success or a negative error code on failure.
+ */
+int amdgpu_bo_restore_shadow(struct amdgpu_bo *shadow, struct dma_fence **fence)
{
- struct amdgpu_bo *shadow = bo->shadow;
- uint64_t bo_addr, shadow_addr;
- int r;
+ struct amdgpu_device *adev = amdgpu_ttm_adev(shadow->tbo.bdev);
+ struct amdgpu_ring *ring = adev->mman.buffer_funcs_ring;
+ uint64_t shadow_addr, parent_addr;
- if (!shadow)
- return -EINVAL;
+ shadow_addr = amdgpu_bo_gpu_offset(shadow);
+ parent_addr = amdgpu_bo_gpu_offset(shadow->parent);
- bo_addr = amdgpu_bo_gpu_offset(bo);
- shadow_addr = amdgpu_bo_gpu_offset(bo->shadow);
-
- r = reservation_object_reserve_shared(bo->tbo.resv);
- if (r)
- goto err;
-
- r = amdgpu_copy_buffer(ring, shadow_addr, bo_addr,
- amdgpu_bo_size(bo), resv, fence,
- direct, false);
- if (!r)
- amdgpu_bo_fence(bo, *fence, true);
-
-err:
- return r;
+ return amdgpu_copy_buffer(ring, shadow_addr, parent_addr,
+ amdgpu_bo_size(shadow), NULL, fence,
+ true, false);
}
+/**
+ * amdgpu_bo_kmap - map an &amdgpu_bo buffer object
+ * @bo: &amdgpu_bo buffer object to be mapped
+ * @ptr: kernel virtual address to be returned
+ *
+ * Calls ttm_bo_kmap() to set up the kernel virtual mapping; calls
+ * amdgpu_bo_kptr() to get the kernel virtual address.
+ *
+ * Returns:
+ * 0 for success or a negative error code on failure.
+ */
int amdgpu_bo_kmap(struct amdgpu_bo *bo, void **ptr)
{
void *kptr;
@@ -643,6 +757,15 @@ int amdgpu_bo_kmap(struct amdgpu_bo *bo, void **ptr)
return 0;
}
+/**
+ * amdgpu_bo_kptr - returns a kernel virtual address of the buffer object
+ * @bo: &amdgpu_bo buffer object
+ *
+ * Calls ttm_kmap_obj_virtual() to get the kernel virtual address
+ *
+ * Returns:
+ * the virtual address of a buffer object area.
+ */
void *amdgpu_bo_kptr(struct amdgpu_bo *bo)
{
bool is_iomem;
@@ -650,21 +773,42 @@ void *amdgpu_bo_kptr(struct amdgpu_bo *bo)
return ttm_kmap_obj_virtual(&bo->kmap, &is_iomem);
}
+/**
+ * amdgpu_bo_kunmap - unmap an &amdgpu_bo buffer object
+ * @bo: &amdgpu_bo buffer object to be unmapped
+ *
+ * Unmaps a kernel map set up by amdgpu_bo_kmap().
+ */
void amdgpu_bo_kunmap(struct amdgpu_bo *bo)
{
if (bo->kmap.bo)
ttm_bo_kunmap(&bo->kmap);
}
+/**
+ * amdgpu_bo_ref - reference an &amdgpu_bo buffer object
+ * @bo: &amdgpu_bo buffer object
+ *
+ * References the contained &ttm_buffer_object.
+ *
+ * Returns:
+ * a refcounted pointer to the &amdgpu_bo buffer object.
+ */
struct amdgpu_bo *amdgpu_bo_ref(struct amdgpu_bo *bo)
{
if (bo == NULL)
return NULL;
- ttm_bo_reference(&bo->tbo);
+ ttm_bo_get(&bo->tbo);
return bo;
}
+/**
+ * amdgpu_bo_unref - unreference an &amdgpu_bo buffer object
+ * @bo: &amdgpu_bo buffer object
+ *
+ * Unreferences the contained &ttm_buffer_object and clear the pointer
+ */
void amdgpu_bo_unref(struct amdgpu_bo **bo)
{
struct ttm_buffer_object *tbo;
@@ -673,14 +817,34 @@ void amdgpu_bo_unref(struct amdgpu_bo **bo)
return;
tbo = &((*bo)->tbo);
- ttm_bo_unref(&tbo);
- if (tbo == NULL)
- *bo = NULL;
+ ttm_bo_put(tbo);
+ *bo = NULL;
}
+/**
+ * amdgpu_bo_pin_restricted - pin an &amdgpu_bo buffer object
+ * @bo: &amdgpu_bo buffer object to be pinned
+ * @domain: domain to be pinned to
+ * @min_offset: the start of requested address range
+ * @max_offset: the end of requested address range
+ *
+ * Pins the buffer object according to requested domain and address range. If
+ * the memory is unbound gart memory, binds the pages into gart table. Adjusts
+ * pin_count and pin_size accordingly.
+ *
+ * Pinning means to lock pages in memory along with keeping them at a fixed
+ * offset. It is required when a buffer can not be moved, for example, when
+ * a display buffer is being scanned out.
+ *
+ * Compared with amdgpu_bo_pin(), this function gives more flexibility on
+ * where to pin a buffer if there are specific restrictions on where a buffer
+ * must be located.
+ *
+ * Returns:
+ * 0 for success or a negative error code on failure.
+ */
int amdgpu_bo_pin_restricted(struct amdgpu_bo *bo, u32 domain,
- u64 min_offset, u64 max_offset,
- u64 *gpu_addr)
+ u64 min_offset, u64 max_offset)
{
struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
struct ttm_operation_ctx ctx = { false, false };
@@ -712,8 +876,6 @@ int amdgpu_bo_pin_restricted(struct amdgpu_bo *bo, u32 domain,
return -EINVAL;
bo->pin_count++;
- if (gpu_addr)
- *gpu_addr = amdgpu_bo_gpu_offset(bo);
if (max_offset != 0) {
u64 domain_start = bo->tbo.bdev->man[mem_type].gpu_offset;
@@ -728,7 +890,7 @@ int amdgpu_bo_pin_restricted(struct amdgpu_bo *bo, u32 domain,
/* force to pin into visible video ram */
if (!(bo->flags & AMDGPU_GEM_CREATE_NO_CPU_ACCESS))
bo->flags |= AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED;
- amdgpu_ttm_placement_from_domain(bo, domain);
+ amdgpu_bo_placement_from_domain(bo, domain);
for (i = 0; i < bo->placement.num_placement; i++) {
unsigned fpfn, lpfn;
@@ -749,33 +911,48 @@ int amdgpu_bo_pin_restricted(struct amdgpu_bo *bo, u32 domain,
goto error;
}
- r = amdgpu_ttm_alloc_gart(&bo->tbo);
- if (unlikely(r)) {
- dev_err(adev->dev, "%p bind failed\n", bo);
- goto error;
- }
-
bo->pin_count = 1;
- if (gpu_addr != NULL)
- *gpu_addr = amdgpu_bo_gpu_offset(bo);
domain = amdgpu_mem_type_to_domain(bo->tbo.mem.mem_type);
if (domain == AMDGPU_GEM_DOMAIN_VRAM) {
- adev->vram_pin_size += amdgpu_bo_size(bo);
- adev->invisible_pin_size += amdgpu_vram_mgr_bo_invisible_size(bo);
+ atomic64_add(amdgpu_bo_size(bo), &adev->vram_pin_size);
+ atomic64_add(amdgpu_vram_mgr_bo_visible_size(bo),
+ &adev->visible_pin_size);
} else if (domain == AMDGPU_GEM_DOMAIN_GTT) {
- adev->gart_pin_size += amdgpu_bo_size(bo);
+ atomic64_add(amdgpu_bo_size(bo), &adev->gart_pin_size);
}
error:
return r;
}
-int amdgpu_bo_pin(struct amdgpu_bo *bo, u32 domain, u64 *gpu_addr)
+/**
+ * amdgpu_bo_pin - pin an &amdgpu_bo buffer object
+ * @bo: &amdgpu_bo buffer object to be pinned
+ * @domain: domain to be pinned to
+ *
+ * A simple wrapper to amdgpu_bo_pin_restricted().
+ * Provides a simpler API for buffers that do not have any strict restrictions
+ * on where a buffer must be located.
+ *
+ * Returns:
+ * 0 for success or a negative error code on failure.
+ */
+int amdgpu_bo_pin(struct amdgpu_bo *bo, u32 domain)
{
- return amdgpu_bo_pin_restricted(bo, domain, 0, 0, gpu_addr);
+ return amdgpu_bo_pin_restricted(bo, domain, 0, 0);
}
+/**
+ * amdgpu_bo_unpin - unpin an &amdgpu_bo buffer object
+ * @bo: &amdgpu_bo buffer object to be unpinned
+ *
+ * Decreases the pin_count, and clears the flags if pin_count reaches 0.
+ * Changes placement and pin size accordingly.
+ *
+ * Returns:
+ * 0 for success or a negative error code on failure.
+ */
int amdgpu_bo_unpin(struct amdgpu_bo *bo)
{
struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
@@ -790,12 +967,7 @@ int amdgpu_bo_unpin(struct amdgpu_bo *bo)
if (bo->pin_count)
return 0;
- if (bo->tbo.mem.mem_type == TTM_PL_VRAM) {
- adev->vram_pin_size -= amdgpu_bo_size(bo);
- adev->invisible_pin_size -= amdgpu_vram_mgr_bo_invisible_size(bo);
- } else if (bo->tbo.mem.mem_type == TTM_PL_TT) {
- adev->gart_pin_size -= amdgpu_bo_size(bo);
- }
+ amdgpu_bo_subtract_pin_size(bo);
for (i = 0; i < bo->placement.num_placement; i++) {
bo->placements[i].lpfn = 0;
@@ -808,13 +980,25 @@ int amdgpu_bo_unpin(struct amdgpu_bo *bo)
return r;
}
+/**
+ * amdgpu_bo_evict_vram - evict VRAM buffers
+ * @adev: amdgpu device object
+ *
+ * Evicts all VRAM buffers on the lru list of the memory type.
+ * Mainly used for evicting vram at suspend time.
+ *
+ * Returns:
+ * 0 for success or a negative error code on failure.
+ */
int amdgpu_bo_evict_vram(struct amdgpu_device *adev)
{
/* late 2.6.33 fix IGP hibernate - we need pm ops to do this correct */
- if (0 && (adev->flags & AMD_IS_APU)) {
+#ifndef CONFIG_HIBERNATION
+ if (adev->flags & AMD_IS_APU) {
/* Useless to evict on IGP chips */
return 0;
}
+#endif
return ttm_bo_evict_mm(&adev->mman.bdev, TTM_PL_VRAM);
}
@@ -830,6 +1014,15 @@ static const char *amdgpu_vram_names[] = {
"DDR4",
};
+/**
+ * amdgpu_bo_init - initialize memory manager
+ * @adev: amdgpu device object
+ *
+ * Calls amdgpu_ttm_init() to initialize amdgpu memory manager.
+ *
+ * Returns:
+ * 0 for success or a negative error code on failure.
+ */
int amdgpu_bo_init(struct amdgpu_device *adev)
{
/* reserve PAT memory space to WC for VRAM */
@@ -847,6 +1040,16 @@ int amdgpu_bo_init(struct amdgpu_device *adev)
return amdgpu_ttm_init(adev);
}
+/**
+ * amdgpu_bo_late_init - late init
+ * @adev: amdgpu device object
+ *
+ * Calls amdgpu_ttm_late_init() to free resources used earlier during
+ * initialization.
+ *
+ * Returns:
+ * 0 for success or a negative error code on failure.
+ */
int amdgpu_bo_late_init(struct amdgpu_device *adev)
{
amdgpu_ttm_late_init(adev);
@@ -854,6 +1057,12 @@ int amdgpu_bo_late_init(struct amdgpu_device *adev)
return 0;
}
+/**
+ * amdgpu_bo_fini - tear down memory manager
+ * @adev: amdgpu device object
+ *
+ * Reverses amdgpu_bo_init() to tear down memory manager.
+ */
void amdgpu_bo_fini(struct amdgpu_device *adev)
{
amdgpu_ttm_fini(adev);
@@ -861,12 +1070,33 @@ void amdgpu_bo_fini(struct amdgpu_device *adev)
arch_io_free_memtype_wc(adev->gmc.aper_base, adev->gmc.aper_size);
}
+/**
+ * amdgpu_bo_fbdev_mmap - mmap fbdev memory
+ * @bo: &amdgpu_bo buffer object
+ * @vma: vma as input from the fbdev mmap method
+ *
+ * Calls ttm_fbdev_mmap() to mmap fbdev memory if it is backed by a bo.
+ *
+ * Returns:
+ * 0 for success or a negative error code on failure.
+ */
int amdgpu_bo_fbdev_mmap(struct amdgpu_bo *bo,
struct vm_area_struct *vma)
{
return ttm_fbdev_mmap(vma, &bo->tbo);
}
+/**
+ * amdgpu_bo_set_tiling_flags - set tiling flags
+ * @bo: &amdgpu_bo buffer object
+ * @tiling_flags: new flags
+ *
+ * Sets buffer object's tiling flags with the new one. Used by GEM ioctl or
+ * kernel driver to set the tiling flags on a buffer.
+ *
+ * Returns:
+ * 0 for success or a negative error code on failure.
+ */
int amdgpu_bo_set_tiling_flags(struct amdgpu_bo *bo, u64 tiling_flags)
{
struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
@@ -879,6 +1109,14 @@ int amdgpu_bo_set_tiling_flags(struct amdgpu_bo *bo, u64 tiling_flags)
return 0;
}
+/**
+ * amdgpu_bo_get_tiling_flags - get tiling flags
+ * @bo: &amdgpu_bo buffer object
+ * @tiling_flags: returned flags
+ *
+ * Gets buffer object's tiling flags. Used by GEM ioctl or kernel driver to
+ * set the tiling flags on a buffer.
+ */
void amdgpu_bo_get_tiling_flags(struct amdgpu_bo *bo, u64 *tiling_flags)
{
lockdep_assert_held(&bo->tbo.resv->lock.base);
@@ -887,6 +1125,19 @@ void amdgpu_bo_get_tiling_flags(struct amdgpu_bo *bo, u64 *tiling_flags)
*tiling_flags = bo->tiling_flags;
}
+/**
+ * amdgpu_bo_set_metadata - set metadata
+ * @bo: &amdgpu_bo buffer object
+ * @metadata: new metadata
+ * @metadata_size: size of the new metadata
+ * @flags: flags of the new metadata
+ *
+ * Sets buffer object's metadata, its size and flags.
+ * Used via GEM ioctl.
+ *
+ * Returns:
+ * 0 for success or a negative error code on failure.
+ */
int amdgpu_bo_set_metadata (struct amdgpu_bo *bo, void *metadata,
uint32_t metadata_size, uint64_t flags)
{
@@ -916,6 +1167,21 @@ int amdgpu_bo_set_metadata (struct amdgpu_bo *bo, void *metadata,
return 0;
}
+/**
+ * amdgpu_bo_get_metadata - get metadata
+ * @bo: &amdgpu_bo buffer object
+ * @buffer: returned metadata
+ * @buffer_size: size of the buffer
+ * @metadata_size: size of the returned metadata
+ * @flags: flags of the returned metadata
+ *
+ * Gets buffer object's metadata, its size and flags. buffer_size shall not be
+ * less than metadata_size.
+ * Used via GEM ioctl.
+ *
+ * Returns:
+ * 0 for success or a negative error code on failure.
+ */
int amdgpu_bo_get_metadata(struct amdgpu_bo *bo, void *buffer,
size_t buffer_size, uint32_t *metadata_size,
uint64_t *flags)
@@ -939,6 +1205,16 @@ int amdgpu_bo_get_metadata(struct amdgpu_bo *bo, void *buffer,
return 0;
}
+/**
+ * amdgpu_bo_move_notify - notification about a memory move
+ * @bo: pointer to a buffer object
+ * @evict: if this move is evicting the buffer from the graphics address space
+ * @new_mem: new information of the bufer object
+ *
+ * Marks the corresponding &amdgpu_bo buffer object as invalid, also performs
+ * bookkeeping.
+ * TTM driver callback which is called when ttm moves a buffer.
+ */
void amdgpu_bo_move_notify(struct ttm_buffer_object *bo,
bool evict,
struct ttm_mem_reg *new_mem)
@@ -947,7 +1223,7 @@ void amdgpu_bo_move_notify(struct ttm_buffer_object *bo,
struct amdgpu_bo *abo;
struct ttm_mem_reg *old_mem = &bo->mem;
- if (!amdgpu_ttm_bo_is_amdgpu_bo(bo))
+ if (!amdgpu_bo_is_amdgpu_bo(bo))
return;
abo = ttm_to_amdgpu_bo(bo);
@@ -964,9 +1240,20 @@ void amdgpu_bo_move_notify(struct ttm_buffer_object *bo,
return;
/* move_notify is called before move happens */
- trace_amdgpu_ttm_bo_move(abo, new_mem->mem_type, old_mem->mem_type);
+ trace_amdgpu_bo_move(abo, new_mem->mem_type, old_mem->mem_type);
}
+/**
+ * amdgpu_bo_fault_reserve_notify - notification about a memory fault
+ * @bo: pointer to a buffer object
+ *
+ * Notifies the driver we are taking a fault on this BO and have reserved it,
+ * also performs bookkeeping.
+ * TTM driver callback for dealing with vm faults.
+ *
+ * Returns:
+ * 0 for success or a negative error code on failure.
+ */
int amdgpu_bo_fault_reserve_notify(struct ttm_buffer_object *bo)
{
struct amdgpu_device *adev = amdgpu_ttm_adev(bo->bdev);
@@ -975,7 +1262,7 @@ int amdgpu_bo_fault_reserve_notify(struct ttm_buffer_object *bo)
unsigned long offset, size;
int r;
- if (!amdgpu_ttm_bo_is_amdgpu_bo(bo))
+ if (!amdgpu_bo_is_amdgpu_bo(bo))
return 0;
abo = ttm_to_amdgpu_bo(bo);
@@ -997,8 +1284,8 @@ int amdgpu_bo_fault_reserve_notify(struct ttm_buffer_object *bo)
/* hurrah the memory is not visible ! */
atomic64_inc(&adev->num_vram_cpu_page_faults);
- amdgpu_ttm_placement_from_domain(abo, AMDGPU_GEM_DOMAIN_VRAM |
- AMDGPU_GEM_DOMAIN_GTT);
+ amdgpu_bo_placement_from_domain(abo, AMDGPU_GEM_DOMAIN_VRAM |
+ AMDGPU_GEM_DOMAIN_GTT);
/* Avoid costly evictions; only set GTT as a busy placement */
abo->placement.num_busy_placement = 1;
@@ -1040,25 +1327,32 @@ void amdgpu_bo_fence(struct amdgpu_bo *bo, struct dma_fence *fence,
* amdgpu_bo_gpu_offset - return GPU offset of bo
* @bo: amdgpu object for which we query the offset
*
- * Returns current GPU offset of the object.
- *
* Note: object should either be pinned or reserved when calling this
* function, it might be useful to add check for this for debugging.
+ *
+ * Returns:
+ * current GPU offset of the object.
*/
u64 amdgpu_bo_gpu_offset(struct amdgpu_bo *bo)
{
WARN_ON_ONCE(bo->tbo.mem.mem_type == TTM_PL_SYSTEM);
- WARN_ON_ONCE(bo->tbo.mem.mem_type == TTM_PL_TT &&
- !amdgpu_gtt_mgr_has_gart_addr(&bo->tbo.mem));
WARN_ON_ONCE(!ww_mutex_is_locked(&bo->tbo.resv->lock) &&
- !bo->pin_count);
+ !bo->pin_count && bo->tbo.type != ttm_bo_type_kernel);
WARN_ON_ONCE(bo->tbo.mem.start == AMDGPU_BO_INVALID_OFFSET);
WARN_ON_ONCE(bo->tbo.mem.mem_type == TTM_PL_VRAM &&
!(bo->flags & AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS));
- return bo->tbo.offset;
+ return amdgpu_gmc_sign_extend(bo->tbo.offset);
}
+/**
+ * amdgpu_bo_get_preferred_pin_domain - get preferred domain for scanout
+ * @adev: amdgpu device object
+ * @domain: allowed :ref:`memory domains <amdgpu_memory_domains>`
+ *
+ * Returns:
+ * Which of the allowed domains is preferred for pinning the BO for scanout.
+ */
uint32_t amdgpu_bo_get_preferred_pin_domain(struct amdgpu_device *adev,
uint32_t domain)
{
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
index 731748033878..7d3312d0da11 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
@@ -32,6 +32,7 @@
#include "amdgpu.h"
#define AMDGPU_BO_INVALID_OFFSET LONG_MAX
+#define AMDGPU_BO_MAX_PLACEMENTS 3
struct amdgpu_bo_param {
unsigned long size;
@@ -77,7 +78,7 @@ struct amdgpu_bo {
/* Protected by tbo.reserved */
u32 preferred_domains;
u32 allowed_domains;
- struct ttm_place placements[AMDGPU_GEM_DOMAIN_MAX + 1];
+ struct ttm_place placements[AMDGPU_BO_MAX_PLACEMENTS];
struct ttm_placement placement;
struct ttm_buffer_object tbo;
struct ttm_bo_kmap_obj kmap;
@@ -88,8 +89,8 @@ struct amdgpu_bo {
void *metadata;
u32 metadata_size;
unsigned prime_shared_count;
- /* list of all virtual address to which this bo is associated to */
- struct list_head va;
+ /* per VM structure for page tables and with virtual addresses */
+ struct amdgpu_vm_bo_base *vm_bo;
/* Constant after initialization */
struct drm_gem_object gem_base;
struct amdgpu_bo *parent;
@@ -193,19 +194,6 @@ static inline u64 amdgpu_bo_mmap_offset(struct amdgpu_bo *bo)
}
/**
- * amdgpu_bo_gpu_accessible - return whether the bo is currently in memory that
- * is accessible to the GPU.
- */
-static inline bool amdgpu_bo_gpu_accessible(struct amdgpu_bo *bo)
-{
- switch (bo->tbo.mem.mem_type) {
- case TTM_PL_TT: return amdgpu_gtt_mgr_has_gart_addr(&bo->tbo.mem);
- case TTM_PL_VRAM: return true;
- default: return false;
- }
-}
-
-/**
* amdgpu_bo_in_cpu_visible_vram - check if BO is (partly) in visible VRAM
*/
static inline bool amdgpu_bo_in_cpu_visible_vram(struct amdgpu_bo *bo)
@@ -234,6 +222,9 @@ static inline bool amdgpu_bo_explicit_sync(struct amdgpu_bo *bo)
return bo->flags & AMDGPU_GEM_CREATE_EXPLICIT_SYNC;
}
+bool amdgpu_bo_is_amdgpu_bo(struct ttm_buffer_object *bo);
+void amdgpu_bo_placement_from_domain(struct amdgpu_bo *abo, u32 domain);
+
int amdgpu_bo_create(struct amdgpu_device *adev,
struct amdgpu_bo_param *bp,
struct amdgpu_bo **bo_ptr);
@@ -252,10 +243,9 @@ void *amdgpu_bo_kptr(struct amdgpu_bo *bo);
void amdgpu_bo_kunmap(struct amdgpu_bo *bo);
struct amdgpu_bo *amdgpu_bo_ref(struct amdgpu_bo *bo);
void amdgpu_bo_unref(struct amdgpu_bo **bo);
-int amdgpu_bo_pin(struct amdgpu_bo *bo, u32 domain, u64 *gpu_addr);
+int amdgpu_bo_pin(struct amdgpu_bo *bo, u32 domain);
int amdgpu_bo_pin_restricted(struct amdgpu_bo *bo, u32 domain,
- u64 min_offset, u64 max_offset,
- u64 *gpu_addr);
+ u64 min_offset, u64 max_offset);
int amdgpu_bo_unpin(struct amdgpu_bo *bo);
int amdgpu_bo_evict_vram(struct amdgpu_device *adev);
int amdgpu_bo_init(struct amdgpu_device *adev);
@@ -283,12 +273,8 @@ int amdgpu_bo_backup_to_shadow(struct amdgpu_device *adev,
struct reservation_object *resv,
struct dma_fence **fence, bool direct);
int amdgpu_bo_validate(struct amdgpu_bo *bo);
-int amdgpu_bo_restore_from_shadow(struct amdgpu_device *adev,
- struct amdgpu_ring *ring,
- struct amdgpu_bo *bo,
- struct reservation_object *resv,
- struct dma_fence **fence,
- bool direct);
+int amdgpu_bo_restore_shadow(struct amdgpu_bo *shadow,
+ struct dma_fence **fence);
uint32_t amdgpu_bo_get_preferred_pin_domain(struct amdgpu_device *adev,
uint32_t domain);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c
index fc818b4d849c..94055a485e01 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c
@@ -27,11 +27,12 @@
#include "amdgpu_drv.h"
#include "amdgpu_pm.h"
#include "amdgpu_dpm.h"
+#include "amdgpu_display.h"
#include "atom.h"
#include <linux/power_supply.h>
#include <linux/hwmon.h>
#include <linux/hwmon-sysfs.h>
-
+#include <linux/nospec.h>
static int amdgpu_debugfs_pm_init(struct amdgpu_device *adev);
@@ -68,11 +69,11 @@ void amdgpu_pm_acpi_event_handler(struct amdgpu_device *adev)
if (adev->pm.dpm_enabled) {
mutex_lock(&adev->pm.mutex);
if (power_supply_is_system_supplied() > 0)
- adev->pm.dpm.ac_power = true;
+ adev->pm.ac_power = true;
else
- adev->pm.dpm.ac_power = false;
+ adev->pm.ac_power = false;
if (adev->powerplay.pp_funcs->enable_bapm)
- amdgpu_dpm_enable_bapm(adev, adev->pm.dpm.ac_power);
+ amdgpu_dpm_enable_bapm(adev, adev->pm.ac_power);
mutex_unlock(&adev->pm.mutex);
}
}
@@ -80,12 +81,15 @@ void amdgpu_pm_acpi_event_handler(struct amdgpu_device *adev)
/**
* DOC: power_dpm_state
*
- * This is a legacy interface and is only provided for backwards compatibility.
- * The amdgpu driver provides a sysfs API for adjusting certain power
- * related parameters. The file power_dpm_state is used for this.
+ * The power_dpm_state file is a legacy interface and is only provided for
+ * backwards compatibility. The amdgpu driver provides a sysfs API for adjusting
+ * certain power related parameters. The file power_dpm_state is used for this.
* It accepts the following arguments:
+ *
* - battery
+ *
* - balanced
+ *
* - performance
*
* battery
@@ -169,14 +173,21 @@ fail:
* The amdgpu driver provides a sysfs API for adjusting certain power
* related parameters. The file power_dpm_force_performance_level is
* used for this. It accepts the following arguments:
+ *
* - auto
+ *
* - low
+ *
* - high
+ *
* - manual
- * - GPU fan
+ *
* - profile_standard
+ *
* - profile_min_sclk
+ *
* - profile_min_mclk
+ *
* - profile_peak
*
* auto
@@ -393,6 +404,7 @@ static ssize_t amdgpu_set_pp_force_state(struct device *dev,
count = -EINVAL;
goto fail;
}
+ idx = array_index_nospec(idx, ARRAY_SIZE(data.states));
amdgpu_dpm_get_pp_num_states(adev, &data);
state = data.states[idx];
@@ -462,9 +474,14 @@ static ssize_t amdgpu_set_pp_table(struct device *dev,
* in each power level within a power state. The pp_od_clk_voltage is used for
* this.
*
+ * < For Vega10 and previous ASICs >
+ *
* Reading the file will display:
+ *
* - a list of engine clock levels and voltages labeled OD_SCLK
+ *
* - a list of memory clock levels and voltages labeled OD_MCLK
+ *
* - a list of valid ranges for sclk, mclk, and voltage labeled OD_RANGE
*
* To manually adjust these settings, first select manual using
@@ -476,6 +493,44 @@ static ssize_t amdgpu_set_pp_table(struct device *dev,
* "c" (commit) to the file to commit your changes. If you want to reset to the
* default power levels, write "r" (reset) to the file to reset them.
*
+ *
+ * < For Vega20 >
+ *
+ * Reading the file will display:
+ *
+ * - minimum and maximum engine clock labeled OD_SCLK
+ *
+ * - maximum memory clock labeled OD_MCLK
+ *
+ * - three <frequency, voltage> points labeled OD_VDDC_CURVE.
+ * They can be used to calibrate the sclk voltage curve.
+ *
+ * - a list of valid ranges for sclk, mclk, and voltage curve points
+ * labeled OD_RANGE
+ *
+ * To manually adjust these settings:
+ *
+ * - First select manual using power_dpm_force_performance_level
+ *
+ * - For clock frequency setting, enter a new value by writing a
+ * string that contains "s/m index clock" to the file. The index
+ * should be 0 if to set minimum clock. And 1 if to set maximum
+ * clock. E.g., "s 0 500" will update minimum sclk to be 500 MHz.
+ * "m 1 800" will update maximum mclk to be 800Mhz.
+ *
+ * For sclk voltage curve, enter the new values by writing a
+ * string that contains "vc point clock voltage" to the file. The
+ * points are indexed by 0, 1 and 2. E.g., "vc 0 300 600" will
+ * update point1 with clock set as 300Mhz and voltage as
+ * 600mV. "vc 2 1000 1000" will update point3 with clock set
+ * as 1000Mhz and voltage 1000mV.
+ *
+ * - When you have edited all of the states as needed, write "c" (commit)
+ * to the file to commit your changes
+ *
+ * - If you want to reset to the default power levels, write "r" (reset)
+ * to the file to reset them
+ *
*/
static ssize_t amdgpu_set_pp_od_clk_voltage(struct device *dev,
@@ -505,6 +560,8 @@ static ssize_t amdgpu_set_pp_od_clk_voltage(struct device *dev,
type = PP_OD_RESTORE_DEFAULT_TABLE;
else if (*buf == 'c')
type = PP_OD_COMMIT_DPM_TABLE;
+ else if (!strncmp(buf, "vc", 2))
+ type = PP_OD_EDIT_VDDC_CURVE;
else
return -EINVAL;
@@ -512,6 +569,8 @@ static ssize_t amdgpu_set_pp_od_clk_voltage(struct device *dev,
tmp_str = buf_cpy;
+ if (type == PP_OD_EDIT_VDDC_CURVE)
+ tmp_str++;
while (isspace(*++tmp_str));
while (tmp_str[0]) {
@@ -555,6 +614,7 @@ static ssize_t amdgpu_get_pp_od_clk_voltage(struct device *dev,
if (adev->powerplay.pp_funcs->print_clock_levels) {
size = amdgpu_dpm_print_clock_levels(adev, OD_SCLK, buf);
size += amdgpu_dpm_print_clock_levels(adev, OD_MCLK, buf+size);
+ size += amdgpu_dpm_print_clock_levels(adev, OD_VDDC_CURVE, buf+size);
size += amdgpu_dpm_print_clock_levels(adev, OD_RANGE, buf+size);
return size;
} else {
@@ -593,40 +653,59 @@ static ssize_t amdgpu_get_pp_dpm_sclk(struct device *dev,
return snprintf(buf, PAGE_SIZE, "\n");
}
-static ssize_t amdgpu_set_pp_dpm_sclk(struct device *dev,
- struct device_attribute *attr,
- const char *buf,
- size_t count)
+/*
+ * Worst case: 32 bits individually specified, in octal at 12 characters
+ * per line (+1 for \n).
+ */
+#define AMDGPU_MASK_BUF_MAX (32 * 13)
+
+static ssize_t amdgpu_read_mask(const char *buf, size_t count, uint32_t *mask)
{
- struct drm_device *ddev = dev_get_drvdata(dev);
- struct amdgpu_device *adev = ddev->dev_private;
int ret;
long level;
- uint32_t mask = 0;
char *sub_str = NULL;
char *tmp;
- char buf_cpy[count];
+ char buf_cpy[AMDGPU_MASK_BUF_MAX + 1];
const char delimiter[3] = {' ', '\n', '\0'};
+ size_t bytes;
- memcpy(buf_cpy, buf, count+1);
+ *mask = 0;
+
+ bytes = min(count, sizeof(buf_cpy) - 1);
+ memcpy(buf_cpy, buf, bytes);
+ buf_cpy[bytes] = '\0';
tmp = buf_cpy;
while (tmp[0]) {
- sub_str = strsep(&tmp, delimiter);
+ sub_str = strsep(&tmp, delimiter);
if (strlen(sub_str)) {
ret = kstrtol(sub_str, 0, &level);
-
- if (ret) {
- count = -EINVAL;
- goto fail;
- }
- mask |= 1 << level;
+ if (ret)
+ return -EINVAL;
+ *mask |= 1 << level;
} else
break;
}
+
+ return 0;
+}
+
+static ssize_t amdgpu_set_pp_dpm_sclk(struct device *dev,
+ struct device_attribute *attr,
+ const char *buf,
+ size_t count)
+{
+ struct drm_device *ddev = dev_get_drvdata(dev);
+ struct amdgpu_device *adev = ddev->dev_private;
+ int ret;
+ uint32_t mask = 0;
+
+ ret = amdgpu_read_mask(buf, count, &mask);
+ if (ret)
+ return ret;
+
if (adev->powerplay.pp_funcs->force_clock_level)
amdgpu_dpm_force_clock_level(adev, PP_SCLK, mask);
-fail:
return count;
}
@@ -651,32 +730,15 @@ static ssize_t amdgpu_set_pp_dpm_mclk(struct device *dev,
struct drm_device *ddev = dev_get_drvdata(dev);
struct amdgpu_device *adev = ddev->dev_private;
int ret;
- long level;
uint32_t mask = 0;
- char *sub_str = NULL;
- char *tmp;
- char buf_cpy[count];
- const char delimiter[3] = {' ', '\n', '\0'};
- memcpy(buf_cpy, buf, count+1);
- tmp = buf_cpy;
- while (tmp[0]) {
- sub_str = strsep(&tmp, delimiter);
- if (strlen(sub_str)) {
- ret = kstrtol(sub_str, 0, &level);
+ ret = amdgpu_read_mask(buf, count, &mask);
+ if (ret)
+ return ret;
- if (ret) {
- count = -EINVAL;
- goto fail;
- }
- mask |= 1 << level;
- } else
- break;
- }
if (adev->powerplay.pp_funcs->force_clock_level)
amdgpu_dpm_force_clock_level(adev, PP_MCLK, mask);
-fail:
return count;
}
@@ -701,33 +763,15 @@ static ssize_t amdgpu_set_pp_dpm_pcie(struct device *dev,
struct drm_device *ddev = dev_get_drvdata(dev);
struct amdgpu_device *adev = ddev->dev_private;
int ret;
- long level;
uint32_t mask = 0;
- char *sub_str = NULL;
- char *tmp;
- char buf_cpy[count];
- const char delimiter[3] = {' ', '\n', '\0'};
- memcpy(buf_cpy, buf, count+1);
- tmp = buf_cpy;
-
- while (tmp[0]) {
- sub_str = strsep(&tmp, delimiter);
- if (strlen(sub_str)) {
- ret = kstrtol(sub_str, 0, &level);
+ ret = amdgpu_read_mask(buf, count, &mask);
+ if (ret)
+ return ret;
- if (ret) {
- count = -EINVAL;
- goto fail;
- }
- mask |= 1 << level;
- } else
- break;
- }
if (adev->powerplay.pp_funcs->force_clock_level)
amdgpu_dpm_force_clock_level(adev, PP_PCIE, mask);
-fail:
return count;
}
@@ -905,6 +949,36 @@ fail:
return -EINVAL;
}
+/**
+ * DOC: busy_percent
+ *
+ * The amdgpu driver provides a sysfs API for reading how busy the GPU
+ * is as a percentage. The file gpu_busy_percent is used for this.
+ * The SMU firmware computes a percentage of load based on the
+ * aggregate activity level in the IP cores.
+ */
+static ssize_t amdgpu_get_busy_percent(struct device *dev,
+ struct device_attribute *attr,
+ char *buf)
+{
+ struct drm_device *ddev = dev_get_drvdata(dev);
+ struct amdgpu_device *adev = ddev->dev_private;
+ int r, value, size = sizeof(value);
+
+ /* sanity check PP is enabled */
+ if (!(adev->powerplay.pp_funcs &&
+ adev->powerplay.pp_funcs->read_sensor))
+ return -EINVAL;
+
+ /* read the IP busy sensor */
+ r = amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_GPU_LOAD,
+ (void *)&value, &size);
+ if (r)
+ return r;
+
+ return snprintf(buf, PAGE_SIZE, "%d\n", value);
+}
+
static DEVICE_ATTR(power_dpm_state, S_IRUGO | S_IWUSR, amdgpu_get_dpm_state, amdgpu_set_dpm_state);
static DEVICE_ATTR(power_dpm_force_performance_level, S_IRUGO | S_IWUSR,
amdgpu_get_dpm_forced_performance_level,
@@ -938,6 +1012,8 @@ static DEVICE_ATTR(pp_power_profile_mode, S_IRUGO | S_IWUSR,
static DEVICE_ATTR(pp_od_clk_voltage, S_IRUGO | S_IWUSR,
amdgpu_get_pp_od_clk_voltage,
amdgpu_set_pp_od_clk_voltage);
+static DEVICE_ATTR(gpu_busy_percent, S_IRUGO,
+ amdgpu_get_busy_percent, NULL);
static ssize_t amdgpu_hwmon_show_temp(struct device *dev,
struct device_attribute *attr,
@@ -1044,12 +1120,19 @@ static ssize_t amdgpu_hwmon_set_pwm1(struct device *dev,
struct amdgpu_device *adev = dev_get_drvdata(dev);
int err;
u32 value;
+ u32 pwm_mode;
/* Can't adjust fan when the card is off */
if ((adev->flags & AMD_IS_PX) &&
(adev->ddev->switch_power_state != DRM_SWITCH_POWER_ON))
return -EINVAL;
+ pwm_mode = amdgpu_dpm_get_fan_control_mode(adev);
+ if (pwm_mode != AMD_FAN_CTRL_MANUAL) {
+ pr_info("manual fan speed control should be enabled first\n");
+ return -EINVAL;
+ }
+
err = kstrtou32(buf, 10, &value);
if (err)
return err;
@@ -1111,6 +1194,148 @@ static ssize_t amdgpu_hwmon_get_fan1_input(struct device *dev,
return sprintf(buf, "%i\n", speed);
}
+static ssize_t amdgpu_hwmon_get_fan1_min(struct device *dev,
+ struct device_attribute *attr,
+ char *buf)
+{
+ struct amdgpu_device *adev = dev_get_drvdata(dev);
+ u32 min_rpm = 0;
+ u32 size = sizeof(min_rpm);
+ int r;
+
+ if (!adev->powerplay.pp_funcs->read_sensor)
+ return -EINVAL;
+
+ r = amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_MIN_FAN_RPM,
+ (void *)&min_rpm, &size);
+ if (r)
+ return r;
+
+ return snprintf(buf, PAGE_SIZE, "%d\n", min_rpm);
+}
+
+static ssize_t amdgpu_hwmon_get_fan1_max(struct device *dev,
+ struct device_attribute *attr,
+ char *buf)
+{
+ struct amdgpu_device *adev = dev_get_drvdata(dev);
+ u32 max_rpm = 0;
+ u32 size = sizeof(max_rpm);
+ int r;
+
+ if (!adev->powerplay.pp_funcs->read_sensor)
+ return -EINVAL;
+
+ r = amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_MAX_FAN_RPM,
+ (void *)&max_rpm, &size);
+ if (r)
+ return r;
+
+ return snprintf(buf, PAGE_SIZE, "%d\n", max_rpm);
+}
+
+static ssize_t amdgpu_hwmon_get_fan1_target(struct device *dev,
+ struct device_attribute *attr,
+ char *buf)
+{
+ struct amdgpu_device *adev = dev_get_drvdata(dev);
+ int err;
+ u32 rpm = 0;
+
+ /* Can't adjust fan when the card is off */
+ if ((adev->flags & AMD_IS_PX) &&
+ (adev->ddev->switch_power_state != DRM_SWITCH_POWER_ON))
+ return -EINVAL;
+
+ if (adev->powerplay.pp_funcs->get_fan_speed_rpm) {
+ err = amdgpu_dpm_get_fan_speed_rpm(adev, &rpm);
+ if (err)
+ return err;
+ }
+
+ return sprintf(buf, "%i\n", rpm);
+}
+
+static ssize_t amdgpu_hwmon_set_fan1_target(struct device *dev,
+ struct device_attribute *attr,
+ const char *buf, size_t count)
+{
+ struct amdgpu_device *adev = dev_get_drvdata(dev);
+ int err;
+ u32 value;
+ u32 pwm_mode;
+
+ pwm_mode = amdgpu_dpm_get_fan_control_mode(adev);
+ if (pwm_mode != AMD_FAN_CTRL_MANUAL)
+ return -ENODATA;
+
+ /* Can't adjust fan when the card is off */
+ if ((adev->flags & AMD_IS_PX) &&
+ (adev->ddev->switch_power_state != DRM_SWITCH_POWER_ON))
+ return -EINVAL;
+
+ err = kstrtou32(buf, 10, &value);
+ if (err)
+ return err;
+
+ if (adev->powerplay.pp_funcs->set_fan_speed_rpm) {
+ err = amdgpu_dpm_set_fan_speed_rpm(adev, value);
+ if (err)
+ return err;
+ }
+
+ return count;
+}
+
+static ssize_t amdgpu_hwmon_get_fan1_enable(struct device *dev,
+ struct device_attribute *attr,
+ char *buf)
+{
+ struct amdgpu_device *adev = dev_get_drvdata(dev);
+ u32 pwm_mode = 0;
+
+ if (!adev->powerplay.pp_funcs->get_fan_control_mode)
+ return -EINVAL;
+
+ pwm_mode = amdgpu_dpm_get_fan_control_mode(adev);
+
+ return sprintf(buf, "%i\n", pwm_mode == AMD_FAN_CTRL_AUTO ? 0 : 1);
+}
+
+static ssize_t amdgpu_hwmon_set_fan1_enable(struct device *dev,
+ struct device_attribute *attr,
+ const char *buf,
+ size_t count)
+{
+ struct amdgpu_device *adev = dev_get_drvdata(dev);
+ int err;
+ int value;
+ u32 pwm_mode;
+
+ /* Can't adjust fan when the card is off */
+ if ((adev->flags & AMD_IS_PX) &&
+ (adev->ddev->switch_power_state != DRM_SWITCH_POWER_ON))
+ return -EINVAL;
+
+ if (!adev->powerplay.pp_funcs->set_fan_control_mode)
+ return -EINVAL;
+
+ err = kstrtoint(buf, 10, &value);
+ if (err)
+ return err;
+
+ if (value == 0)
+ pwm_mode = AMD_FAN_CTRL_AUTO;
+ else if (value == 1)
+ pwm_mode = AMD_FAN_CTRL_MANUAL;
+ else
+ return -EINVAL;
+
+ amdgpu_dpm_set_fan_control_mode(adev, pwm_mode);
+
+ return count;
+}
+
static ssize_t amdgpu_hwmon_show_vddgfx(struct device *dev,
struct device_attribute *attr,
char *buf)
@@ -1156,7 +1381,7 @@ static ssize_t amdgpu_hwmon_show_vddnb(struct device *dev,
int r, size = sizeof(vddnb);
/* only APUs have vddnb */
- if (adev->flags & AMD_IS_APU)
+ if (!(adev->flags & AMD_IS_APU))
return -EINVAL;
/* Can't get voltage when the card is off */
@@ -1285,37 +1510,61 @@ static ssize_t amdgpu_hwmon_set_power_cap(struct device *dev,
* DOC: hwmon
*
* The amdgpu driver exposes the following sensor interfaces:
+ *
* - GPU temperature (via the on-die sensor)
+ *
* - GPU voltage
+ *
* - Northbridge voltage (APUs only)
+ *
* - GPU power
+ *
* - GPU fan
*
* hwmon interfaces for GPU temperature:
+ *
* - temp1_input: the on die GPU temperature in millidegrees Celsius
+ *
* - temp1_crit: temperature critical max value in millidegrees Celsius
+ *
* - temp1_crit_hyst: temperature hysteresis for critical limit in millidegrees Celsius
*
* hwmon interfaces for GPU voltage:
+ *
* - in0_input: the voltage on the GPU in millivolts
+ *
* - in1_input: the voltage on the Northbridge in millivolts
*
* hwmon interfaces for GPU power:
+ *
* - power1_average: average power used by the GPU in microWatts
+ *
* - power1_cap_min: minimum cap supported in microWatts
+ *
* - power1_cap_max: maximum cap supported in microWatts
+ *
* - power1_cap: selected power cap in microWatts
*
* hwmon interfaces for GPU fan:
+ *
* - pwm1: pulse width modulation fan level (0-255)
- * - pwm1_enable: pulse width modulation fan control method
- * 0: no fan speed control
- * 1: manual fan speed control using pwm interface
- * 2: automatic fan speed control
+ *
+ * - pwm1_enable: pulse width modulation fan control method (0: no fan speed control, 1: manual fan speed control using pwm interface, 2: automatic fan speed control)
+ *
* - pwm1_min: pulse width modulation fan control minimum level (0)
+ *
* - pwm1_max: pulse width modulation fan control maximum level (255)
+ *
+ * - fan1_min: an minimum value Unit: revolution/min (RPM)
+ *
+ * - fan1_max: an maxmum value Unit: revolution/max (RPM)
+ *
* - fan1_input: fan speed in RPM
*
+ * - fan[1-*]_target: Desired fan speed Unit: revolution/min (RPM)
+ *
+ * - fan[1-*]_enable: Enable or disable the sensors.1: Enable 0: Disable
+ *
* You can use hwmon tools like sensors to view this information on your system.
*
*/
@@ -1328,6 +1577,10 @@ static SENSOR_DEVICE_ATTR(pwm1_enable, S_IRUGO | S_IWUSR, amdgpu_hwmon_get_pwm1_
static SENSOR_DEVICE_ATTR(pwm1_min, S_IRUGO, amdgpu_hwmon_get_pwm1_min, NULL, 0);
static SENSOR_DEVICE_ATTR(pwm1_max, S_IRUGO, amdgpu_hwmon_get_pwm1_max, NULL, 0);
static SENSOR_DEVICE_ATTR(fan1_input, S_IRUGO, amdgpu_hwmon_get_fan1_input, NULL, 0);
+static SENSOR_DEVICE_ATTR(fan1_min, S_IRUGO, amdgpu_hwmon_get_fan1_min, NULL, 0);
+static SENSOR_DEVICE_ATTR(fan1_max, S_IRUGO, amdgpu_hwmon_get_fan1_max, NULL, 0);
+static SENSOR_DEVICE_ATTR(fan1_target, S_IRUGO | S_IWUSR, amdgpu_hwmon_get_fan1_target, amdgpu_hwmon_set_fan1_target, 0);
+static SENSOR_DEVICE_ATTR(fan1_enable, S_IRUGO | S_IWUSR, amdgpu_hwmon_get_fan1_enable, amdgpu_hwmon_set_fan1_enable, 0);
static SENSOR_DEVICE_ATTR(in0_input, S_IRUGO, amdgpu_hwmon_show_vddgfx, NULL, 0);
static SENSOR_DEVICE_ATTR(in0_label, S_IRUGO, amdgpu_hwmon_show_vddgfx_label, NULL, 0);
static SENSOR_DEVICE_ATTR(in1_input, S_IRUGO, amdgpu_hwmon_show_vddnb, NULL, 0);
@@ -1346,6 +1599,10 @@ static struct attribute *hwmon_attributes[] = {
&sensor_dev_attr_pwm1_min.dev_attr.attr,
&sensor_dev_attr_pwm1_max.dev_attr.attr,
&sensor_dev_attr_fan1_input.dev_attr.attr,
+ &sensor_dev_attr_fan1_min.dev_attr.attr,
+ &sensor_dev_attr_fan1_max.dev_attr.attr,
+ &sensor_dev_attr_fan1_target.dev_attr.attr,
+ &sensor_dev_attr_fan1_enable.dev_attr.attr,
&sensor_dev_attr_in0_input.dev_attr.attr,
&sensor_dev_attr_in0_label.dev_attr.attr,
&sensor_dev_attr_in1_input.dev_attr.attr,
@@ -1364,13 +1621,16 @@ static umode_t hwmon_attributes_visible(struct kobject *kobj,
struct amdgpu_device *adev = dev_get_drvdata(dev);
umode_t effective_mode = attr->mode;
-
/* Skip fan attributes if fan is not present */
if (adev->pm.no_fan && (attr == &sensor_dev_attr_pwm1.dev_attr.attr ||
attr == &sensor_dev_attr_pwm1_enable.dev_attr.attr ||
attr == &sensor_dev_attr_pwm1_max.dev_attr.attr ||
attr == &sensor_dev_attr_pwm1_min.dev_attr.attr ||
- attr == &sensor_dev_attr_fan1_input.dev_attr.attr))
+ attr == &sensor_dev_attr_fan1_input.dev_attr.attr ||
+ attr == &sensor_dev_attr_fan1_min.dev_attr.attr ||
+ attr == &sensor_dev_attr_fan1_max.dev_attr.attr ||
+ attr == &sensor_dev_attr_fan1_target.dev_attr.attr ||
+ attr == &sensor_dev_attr_fan1_enable.dev_attr.attr))
return 0;
/* Skip limit attributes if DPM is not enabled */
@@ -1380,7 +1640,12 @@ static umode_t hwmon_attributes_visible(struct kobject *kobj,
attr == &sensor_dev_attr_pwm1.dev_attr.attr ||
attr == &sensor_dev_attr_pwm1_enable.dev_attr.attr ||
attr == &sensor_dev_attr_pwm1_max.dev_attr.attr ||
- attr == &sensor_dev_attr_pwm1_min.dev_attr.attr))
+ attr == &sensor_dev_attr_pwm1_min.dev_attr.attr ||
+ attr == &sensor_dev_attr_fan1_input.dev_attr.attr ||
+ attr == &sensor_dev_attr_fan1_min.dev_attr.attr ||
+ attr == &sensor_dev_attr_fan1_max.dev_attr.attr ||
+ attr == &sensor_dev_attr_fan1_target.dev_attr.attr ||
+ attr == &sensor_dev_attr_fan1_enable.dev_attr.attr))
return 0;
/* mask fan attributes if we have no bindings for this asic to expose */
@@ -1405,10 +1670,18 @@ static umode_t hwmon_attributes_visible(struct kobject *kobj,
/* hide max/min values if we can't both query and manage the fan */
if ((!adev->powerplay.pp_funcs->set_fan_speed_percent &&
!adev->powerplay.pp_funcs->get_fan_speed_percent) &&
+ (!adev->powerplay.pp_funcs->set_fan_speed_rpm &&
+ !adev->powerplay.pp_funcs->get_fan_speed_rpm) &&
(attr == &sensor_dev_attr_pwm1_max.dev_attr.attr ||
attr == &sensor_dev_attr_pwm1_min.dev_attr.attr))
return 0;
+ if ((!adev->powerplay.pp_funcs->set_fan_speed_rpm &&
+ !adev->powerplay.pp_funcs->get_fan_speed_rpm) &&
+ (attr == &sensor_dev_attr_fan1_max.dev_attr.attr ||
+ attr == &sensor_dev_attr_fan1_min.dev_attr.attr))
+ return 0;
+
/* only APUs have vddnb */
if (!(adev->flags & AMD_IS_APU) &&
(attr == &sensor_dev_attr_in1_input.dev_attr.attr ||
@@ -1668,56 +1941,21 @@ static void amdgpu_dpm_change_power_state_locked(struct amdgpu_device *adev)
void amdgpu_dpm_enable_uvd(struct amdgpu_device *adev, bool enable)
{
- if (adev->powerplay.pp_funcs->powergate_uvd) {
+ if (adev->powerplay.pp_funcs->set_powergating_by_smu) {
/* enable/disable UVD */
mutex_lock(&adev->pm.mutex);
- amdgpu_dpm_powergate_uvd(adev, !enable);
+ amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_UVD, !enable);
mutex_unlock(&adev->pm.mutex);
- } else {
- if (enable) {
- mutex_lock(&adev->pm.mutex);
- adev->pm.dpm.uvd_active = true;
- adev->pm.dpm.state = POWER_STATE_TYPE_INTERNAL_UVD;
- mutex_unlock(&adev->pm.mutex);
- } else {
- mutex_lock(&adev->pm.mutex);
- adev->pm.dpm.uvd_active = false;
- mutex_unlock(&adev->pm.mutex);
- }
- amdgpu_pm_compute_clocks(adev);
}
}
void amdgpu_dpm_enable_vce(struct amdgpu_device *adev, bool enable)
{
- if (adev->powerplay.pp_funcs->powergate_vce) {
+ if (adev->powerplay.pp_funcs->set_powergating_by_smu) {
/* enable/disable VCE */
mutex_lock(&adev->pm.mutex);
- amdgpu_dpm_powergate_vce(adev, !enable);
+ amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_VCE, !enable);
mutex_unlock(&adev->pm.mutex);
- } else {
- if (enable) {
- mutex_lock(&adev->pm.mutex);
- adev->pm.dpm.vce_active = true;
- /* XXX select vce level based on ring/task */
- adev->pm.dpm.vce_level = AMD_VCE_LEVEL_AC_ALL;
- mutex_unlock(&adev->pm.mutex);
- amdgpu_device_ip_set_clockgating_state(adev, AMD_IP_BLOCK_TYPE_VCE,
- AMD_CG_STATE_UNGATE);
- amdgpu_device_ip_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_VCE,
- AMD_PG_STATE_UNGATE);
- amdgpu_pm_compute_clocks(adev);
- } else {
- amdgpu_device_ip_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_VCE,
- AMD_PG_STATE_GATE);
- amdgpu_device_ip_set_clockgating_state(adev, AMD_IP_BLOCK_TYPE_VCE,
- AMD_CG_STATE_GATE);
- mutex_lock(&adev->pm.mutex);
- adev->pm.dpm.vce_active = false;
- mutex_unlock(&adev->pm.mutex);
- amdgpu_pm_compute_clocks(adev);
- }
-
}
}
@@ -1825,6 +2063,13 @@ int amdgpu_pm_sysfs_init(struct amdgpu_device *adev)
"pp_od_clk_voltage\n");
return ret;
}
+ ret = device_create_file(adev->dev,
+ &dev_attr_gpu_busy_percent);
+ if (ret) {
+ DRM_ERROR("failed to create device file "
+ "gpu_busy_level\n");
+ return ret;
+ }
ret = amdgpu_debugfs_pm_init(adev);
if (ret) {
DRM_ERROR("Failed to register debugfs file for dpm!\n");
@@ -1860,6 +2105,7 @@ void amdgpu_pm_sysfs_fini(struct amdgpu_device *adev)
&dev_attr_pp_power_profile_mode);
device_remove_file(adev->dev,
&dev_attr_pp_od_clk_voltage);
+ device_remove_file(adev->dev, &dev_attr_gpu_busy_percent);
}
void amdgpu_pm_compute_clocks(struct amdgpu_device *adev)
@@ -1898,14 +2144,7 @@ void amdgpu_pm_compute_clocks(struct amdgpu_device *adev)
} else {
mutex_lock(&adev->pm.mutex);
amdgpu_dpm_get_active_displays(adev);
- /* update battery/ac status */
- if (power_supply_is_system_supplied() > 0)
- adev->pm.dpm.ac_power = true;
- else
- adev->pm.dpm.ac_power = false;
-
amdgpu_dpm_change_power_state_locked(adev);
-
mutex_unlock(&adev->pm.mutex);
}
}
@@ -1918,6 +2157,7 @@ void amdgpu_pm_compute_clocks(struct amdgpu_device *adev)
static int amdgpu_debugfs_pm_info_pp(struct seq_file *m, struct amdgpu_device *adev)
{
uint32_t value;
+ uint64_t value64;
uint32_t query = 0;
int size;
@@ -1956,6 +2196,10 @@ static int amdgpu_debugfs_pm_info_pp(struct seq_file *m, struct amdgpu_device *a
seq_printf(m, "GPU Load: %u %%\n", value);
seq_printf(m, "\n");
+ /* SMC feature mask */
+ if (!amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_ENABLED_SMC_FEATURES_MASK, (void *)&value64, &size))
+ seq_printf(m, "SMC Feature Mask: 0x%016llx\n", value64);
+
/* UVD clocks */
if (!amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_UVD_POWER, (void *)&value, &size)) {
if (!value) {
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c
index 4683626b065f..e45e929aaab5 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c
@@ -23,15 +23,32 @@
*
* Authors: Alex Deucher
*/
+
+/**
+ * DOC: PRIME Buffer Sharing
+ *
+ * The following callback implementations are used for :ref:`sharing GEM buffer
+ * objects between different devices via PRIME <prime_buffer_sharing>`.
+ */
+
#include <drm/drmP.h>
#include "amdgpu.h"
#include "amdgpu_display.h"
+#include "amdgpu_gem.h"
#include <drm/amdgpu_drm.h>
#include <linux/dma-buf.h>
static const struct dma_buf_ops amdgpu_dmabuf_ops;
+/**
+ * amdgpu_gem_prime_get_sg_table - &drm_driver.gem_prime_get_sg_table
+ * implementation
+ * @obj: GEM buffer object (BO)
+ *
+ * Returns:
+ * A scatter/gather table for the pinned pages of the BO's memory.
+ */
struct sg_table *amdgpu_gem_prime_get_sg_table(struct drm_gem_object *obj)
{
struct amdgpu_bo *bo = gem_to_amdgpu_bo(obj);
@@ -40,6 +57,15 @@ struct sg_table *amdgpu_gem_prime_get_sg_table(struct drm_gem_object *obj)
return drm_prime_pages_to_sg(bo->tbo.ttm->pages, npages);
}
+/**
+ * amdgpu_gem_prime_vmap - &dma_buf_ops.vmap implementation
+ * @obj: GEM BO
+ *
+ * Sets up an in-kernel virtual mapping of the BO's memory.
+ *
+ * Returns:
+ * The virtual address of the mapping or an error pointer.
+ */
void *amdgpu_gem_prime_vmap(struct drm_gem_object *obj)
{
struct amdgpu_bo *bo = gem_to_amdgpu_bo(obj);
@@ -53,6 +79,13 @@ void *amdgpu_gem_prime_vmap(struct drm_gem_object *obj)
return bo->dma_buf_vmap.virtual;
}
+/**
+ * amdgpu_gem_prime_vunmap - &dma_buf_ops.vunmap implementation
+ * @obj: GEM BO
+ * @vaddr: Virtual address (unused)
+ *
+ * Tears down the in-kernel virtual mapping of the BO's memory.
+ */
void amdgpu_gem_prime_vunmap(struct drm_gem_object *obj, void *vaddr)
{
struct amdgpu_bo *bo = gem_to_amdgpu_bo(obj);
@@ -60,6 +93,17 @@ void amdgpu_gem_prime_vunmap(struct drm_gem_object *obj, void *vaddr)
ttm_bo_kunmap(&bo->dma_buf_vmap);
}
+/**
+ * amdgpu_gem_prime_mmap - &drm_driver.gem_prime_mmap implementation
+ * @obj: GEM BO
+ * @vma: Virtual memory area
+ *
+ * Sets up a userspace mapping of the BO's memory in the given
+ * virtual memory area.
+ *
+ * Returns:
+ * 0 on success or a negative error code on failure.
+ */
int amdgpu_gem_prime_mmap(struct drm_gem_object *obj, struct vm_area_struct *vma)
{
struct amdgpu_bo *bo = gem_to_amdgpu_bo(obj);
@@ -94,6 +138,19 @@ int amdgpu_gem_prime_mmap(struct drm_gem_object *obj, struct vm_area_struct *vma
return ret;
}
+/**
+ * amdgpu_gem_prime_import_sg_table - &drm_driver.gem_prime_import_sg_table
+ * implementation
+ * @dev: DRM device
+ * @attach: DMA-buf attachment
+ * @sg: Scatter/gather table
+ *
+ * Imports shared DMA buffer memory exported by another device.
+ *
+ * Returns:
+ * A new GEM BO of the given DRM device, representing the memory
+ * described by the given DMA-buf attachment and scatter/gather table.
+ */
struct drm_gem_object *
amdgpu_gem_prime_import_sg_table(struct drm_device *dev,
struct dma_buf_attachment *attach,
@@ -132,8 +189,19 @@ error:
return ERR_PTR(ret);
}
+/**
+ * amdgpu_gem_map_attach - &dma_buf_ops.attach implementation
+ * @dma_buf: Shared DMA buffer
+ * @attach: DMA-buf attachment
+ *
+ * Makes sure that the shared DMA buffer can be accessed by the target device.
+ * For now, simply pins it to the GTT domain, where it should be accessible by
+ * all DMA devices.
+ *
+ * Returns:
+ * 0 on success or a negative error code on failure.
+ */
static int amdgpu_gem_map_attach(struct dma_buf *dma_buf,
- struct device *target_dev,
struct dma_buf_attachment *attach)
{
struct drm_gem_object *obj = dma_buf->priv;
@@ -141,7 +209,7 @@ static int amdgpu_gem_map_attach(struct dma_buf *dma_buf,
struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
long r;
- r = drm_gem_map_attach(dma_buf, target_dev, attach);
+ r = drm_gem_map_attach(dma_buf, attach);
if (r)
return r;
@@ -165,7 +233,7 @@ static int amdgpu_gem_map_attach(struct dma_buf *dma_buf,
}
/* pin buffer into GTT */
- r = amdgpu_bo_pin(bo, AMDGPU_GEM_DOMAIN_GTT, NULL);
+ r = amdgpu_bo_pin(bo, AMDGPU_GEM_DOMAIN_GTT);
if (r)
goto error_unreserve;
@@ -181,6 +249,14 @@ error_detach:
return r;
}
+/**
+ * amdgpu_gem_map_detach - &dma_buf_ops.detach implementation
+ * @dma_buf: Shared DMA buffer
+ * @attach: DMA-buf attachment
+ *
+ * This is called when a shared DMA buffer no longer needs to be accessible by
+ * another device. For now, simply unpins the buffer from GTT.
+ */
static void amdgpu_gem_map_detach(struct dma_buf *dma_buf,
struct dma_buf_attachment *attach)
{
@@ -202,6 +278,13 @@ error:
drm_gem_map_detach(dma_buf, attach);
}
+/**
+ * amdgpu_gem_prime_res_obj - &drm_driver.gem_prime_res_obj implementation
+ * @obj: GEM BO
+ *
+ * Returns:
+ * The BO's reservation object.
+ */
struct reservation_object *amdgpu_gem_prime_res_obj(struct drm_gem_object *obj)
{
struct amdgpu_bo *bo = gem_to_amdgpu_bo(obj);
@@ -209,6 +292,18 @@ struct reservation_object *amdgpu_gem_prime_res_obj(struct drm_gem_object *obj)
return bo->tbo.resv;
}
+/**
+ * amdgpu_gem_begin_cpu_access - &dma_buf_ops.begin_cpu_access implementation
+ * @dma_buf: Shared DMA buffer
+ * @direction: Direction of DMA transfer
+ *
+ * This is called before CPU access to the shared DMA buffer's memory. If it's
+ * a read access, the buffer is moved to the GTT domain if possible, for optimal
+ * CPU read performance.
+ *
+ * Returns:
+ * 0 on success or a negative error code on failure.
+ */
static int amdgpu_gem_begin_cpu_access(struct dma_buf *dma_buf,
enum dma_data_direction direction)
{
@@ -229,7 +324,7 @@ static int amdgpu_gem_begin_cpu_access(struct dma_buf *dma_buf,
return ret;
if (!bo->pin_count && (bo->allowed_domains & AMDGPU_GEM_DOMAIN_GTT)) {
- amdgpu_ttm_placement_from_domain(bo, AMDGPU_GEM_DOMAIN_GTT);
+ amdgpu_bo_placement_from_domain(bo, AMDGPU_GEM_DOMAIN_GTT);
ret = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
}
@@ -245,14 +340,24 @@ static const struct dma_buf_ops amdgpu_dmabuf_ops = {
.release = drm_gem_dmabuf_release,
.begin_cpu_access = amdgpu_gem_begin_cpu_access,
.map = drm_gem_dmabuf_kmap,
- .map_atomic = drm_gem_dmabuf_kmap_atomic,
.unmap = drm_gem_dmabuf_kunmap,
- .unmap_atomic = drm_gem_dmabuf_kunmap_atomic,
.mmap = drm_gem_dmabuf_mmap,
.vmap = drm_gem_dmabuf_vmap,
.vunmap = drm_gem_dmabuf_vunmap,
};
+/**
+ * amdgpu_gem_prime_export - &drm_driver.gem_prime_export implementation
+ * @dev: DRM device
+ * @gobj: GEM BO
+ * @flags: Flags such as DRM_CLOEXEC and DRM_RDWR.
+ *
+ * The main work is done by the &drm_gem_prime_export helper, which in turn
+ * uses &amdgpu_gem_prime_res_obj.
+ *
+ * Returns:
+ * Shared DMA buffer representing the GEM BO from the given device.
+ */
struct dma_buf *amdgpu_gem_prime_export(struct drm_device *dev,
struct drm_gem_object *gobj,
int flags)
@@ -273,6 +378,17 @@ struct dma_buf *amdgpu_gem_prime_export(struct drm_device *dev,
return buf;
}
+/**
+ * amdgpu_gem_prime_import - &drm_driver.gem_prime_import implementation
+ * @dev: DRM device
+ * @dma_buf: Shared DMA buffer
+ *
+ * The main work is done by the &drm_gem_prime_import helper, which in turn
+ * uses &amdgpu_gem_prime_import_sg_table.
+ *
+ * Returns:
+ * GEM BO representing the shared DMA buffer for the given device.
+ */
struct drm_gem_object *amdgpu_gem_prime_import(struct drm_device *dev,
struct dma_buf *dma_buf)
{
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
index 9f1a5bd39ae8..25d2f3e757f1 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
@@ -31,6 +31,7 @@
#include "soc15_common.h"
#include "psp_v3_1.h"
#include "psp_v10_0.h"
+#include "psp_v11_0.h"
static void psp_set_funcs(struct amdgpu_device *adev);
@@ -52,12 +53,14 @@ static int psp_sw_init(void *handle)
switch (adev->asic_type) {
case CHIP_VEGA10:
case CHIP_VEGA12:
- case CHIP_VEGA20:
psp_v3_1_set_psp_funcs(psp);
break;
case CHIP_RAVEN:
psp_v10_0_set_psp_funcs(psp);
break;
+ case CHIP_VEGA20:
+ psp_v11_0_set_psp_funcs(psp);
+ break;
default:
return -EINVAL;
}
@@ -131,6 +134,18 @@ psp_cmd_submit_buf(struct psp_context *psp,
msleep(1);
}
+ /* the status field must be 0 after FW is loaded */
+ if (ucode && psp->cmd_buf_mem->resp.status) {
+ DRM_ERROR("failed loading with status (%d) and ucode id (%d)\n",
+ psp->cmd_buf_mem->resp.status, ucode->ucode_id);
+ return -EINVAL;
+ }
+
+ if (ucode) {
+ ucode->tmr_mc_addr_lo = psp->cmd_buf_mem->resp.fw_addr_lo;
+ ucode->tmr_mc_addr_hi = psp->cmd_buf_mem->resp.fw_addr_hi;
+ }
+
return ret;
}
@@ -155,7 +170,7 @@ static int psp_tmr_init(struct psp_context *psp)
* Note: this memory need be reserved till the driver
* uninitializes.
*/
- ret = amdgpu_bo_create_kernel(psp->adev, 0x300000, 0x100000,
+ ret = amdgpu_bo_create_kernel(psp->adev, PSP_TMR_SIZE, 0x100000,
AMDGPU_GEM_DOMAIN_VRAM,
&psp->tmr_bo, &psp->tmr_mc_addr, &psp->tmr_buf);
@@ -171,7 +186,9 @@ static int psp_tmr_load(struct psp_context *psp)
if (!cmd)
return -ENOMEM;
- psp_prep_tmr_cmd_buf(cmd, psp->tmr_mc_addr, 0x300000);
+ psp_prep_tmr_cmd_buf(cmd, psp->tmr_mc_addr, PSP_TMR_SIZE);
+ DRM_INFO("reserve 0x%x from 0x%llx for PSP TMR SIZE\n",
+ PSP_TMR_SIZE, psp->tmr_mc_addr);
ret = psp_cmd_submit_buf(psp, NULL, cmd,
psp->fence_buf_mc_addr, 1);
@@ -435,8 +452,6 @@ static int psp_hw_fini(void *handle)
if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP)
return 0;
- amdgpu_ucode_fini_bo(adev);
-
psp_ring_destroy(psp, PSP_RING_TYPE__KM);
amdgpu_bo_free_kernel(&psp->tmr_bo, &psp->tmr_mc_addr, &psp->tmr_buf);
@@ -589,3 +604,12 @@ const struct amdgpu_ip_block_version psp_v10_0_ip_block =
.rev = 0,
.funcs = &psp_ip_funcs,
};
+
+const struct amdgpu_ip_block_version psp_v11_0_ip_block =
+{
+ .type = AMD_IP_BLOCK_TYPE_PSP,
+ .major = 11,
+ .minor = 0,
+ .rev = 0,
+ .funcs = &psp_ip_funcs,
+};
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h
index 129209686848..8b8720e9c3f0 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h
@@ -32,8 +32,10 @@
#define PSP_CMD_BUFFER_SIZE 0x1000
#define PSP_ASD_SHARED_MEM_SIZE 0x4000
#define PSP_1_MEG 0x100000
+#define PSP_TMR_SIZE 0x400000
struct psp_context;
+struct psp_xgmi_topology_info;
enum psp_ring_type
{
@@ -63,18 +65,27 @@ struct psp_funcs
int (*prep_cmd_buf)(struct amdgpu_firmware_info *ucode,
struct psp_gfx_cmd_resp *cmd);
int (*ring_init)(struct psp_context *psp, enum psp_ring_type ring_type);
- int (*ring_create)(struct psp_context *psp, enum psp_ring_type ring_type);
+ int (*ring_create)(struct psp_context *psp,
+ enum psp_ring_type ring_type);
int (*ring_stop)(struct psp_context *psp,
enum psp_ring_type ring_type);
int (*ring_destroy)(struct psp_context *psp,
enum psp_ring_type ring_type);
- int (*cmd_submit)(struct psp_context *psp, struct amdgpu_firmware_info *ucode,
- uint64_t cmd_buf_mc_addr, uint64_t fence_mc_addr, int index);
+ int (*cmd_submit)(struct psp_context *psp,
+ struct amdgpu_firmware_info *ucode,
+ uint64_t cmd_buf_mc_addr, uint64_t fence_mc_addr,
+ int index);
bool (*compare_sram_data)(struct psp_context *psp,
struct amdgpu_firmware_info *ucode,
enum AMDGPU_UCODE_ID ucode_type);
bool (*smu_reload_quirk)(struct psp_context *psp);
int (*mode1_reset)(struct psp_context *psp);
+ uint64_t (*xgmi_get_device_id)(struct psp_context *psp);
+ uint64_t (*xgmi_get_hive_id)(struct psp_context *psp);
+ int (*xgmi_get_topology_info)(struct psp_context *psp, int number_devices,
+ struct psp_xgmi_topology_info *topology);
+ int (*xgmi_set_topology_info)(struct psp_context *psp, int number_devices,
+ struct psp_xgmi_topology_info *topology);
};
struct psp_context
@@ -83,11 +94,11 @@ struct psp_context
struct psp_ring km_ring;
struct psp_gfx_cmd_resp *cmd;
- const struct psp_funcs *funcs;
+ const struct psp_funcs *funcs;
/* fence buffer */
- struct amdgpu_bo *fw_pri_bo;
- uint64_t fw_pri_mc_addr;
+ struct amdgpu_bo *fw_pri_bo;
+ uint64_t fw_pri_mc_addr;
void *fw_pri_buf;
/* sos firmware */
@@ -100,8 +111,8 @@ struct psp_context
uint8_t *sos_start_addr;
/* tmr buffer */
- struct amdgpu_bo *tmr_bo;
- uint64_t tmr_mc_addr;
+ struct amdgpu_bo *tmr_bo;
+ uint64_t tmr_mc_addr;
void *tmr_buf;
/* asd firmware and buffer */
@@ -110,13 +121,13 @@ struct psp_context
uint32_t asd_feature_version;
uint32_t asd_ucode_size;
uint8_t *asd_start_addr;
- struct amdgpu_bo *asd_shared_bo;
- uint64_t asd_shared_mc_addr;
+ struct amdgpu_bo *asd_shared_bo;
+ uint64_t asd_shared_mc_addr;
void *asd_shared_buf;
/* fence buffer */
- struct amdgpu_bo *fence_buf_bo;
- uint64_t fence_buf_mc_addr;
+ struct amdgpu_bo *fence_buf_bo;
+ uint64_t fence_buf_mc_addr;
void *fence_buf;
/* cmd buffer */
@@ -130,6 +141,23 @@ struct amdgpu_psp_funcs {
enum AMDGPU_UCODE_ID);
};
+struct psp_xgmi_topology_info {
+ /* Generated by PSP to identify the GPU instance within xgmi connection */
+ uint64_t device_id;
+ /*
+ * If all bits set to 0 , driver indicates it wants to retrieve the xgmi
+ * connection vector topology, but not access enable the connections
+ * if some or all bits are set to 1, driver indicates it want to retrieve the
+ * current xgmi topology and access enable the link to GPU[i] associated
+ * with the bit position in the vector.
+ * On return,: bits indicated which xgmi links are present/active depending
+ * on the value passed in. The relative bit offset for the relative GPU index
+ * within the hive is always marked active.
+ */
+ uint32_t connection_mask;
+ uint32_t reserved; /* must be 0 */
+};
+
#define psp_prep_cmd_buf(ucode, type) (psp)->funcs->prep_cmd_buf((ucode), (type))
#define psp_ring_init(psp, type) (psp)->funcs->ring_init((psp), (type))
#define psp_ring_create(psp, type) (psp)->funcs->ring_create((psp), (type))
@@ -149,6 +177,18 @@ struct amdgpu_psp_funcs {
((psp)->funcs->smu_reload_quirk ? (psp)->funcs->smu_reload_quirk((psp)) : false)
#define psp_mode1_reset(psp) \
((psp)->funcs->mode1_reset ? (psp)->funcs->mode1_reset((psp)) : false)
+#define psp_xgmi_get_device_id(psp) \
+ ((psp)->funcs->xgmi_get_device_id ? (psp)->funcs->xgmi_get_device_id((psp)) : 0)
+#define psp_xgmi_get_hive_id(psp) \
+ ((psp)->funcs->xgmi_get_hive_id ? (psp)->funcs->xgmi_get_hive_id((psp)) : 0)
+#define psp_xgmi_get_topology_info(psp, num_device, topology) \
+ ((psp)->funcs->xgmi_get_topology_info ? \
+ (psp)->funcs->xgmi_get_topology_info((psp), (num_device), (topology)) : -EINVAL)
+#define psp_xgmi_set_topology_info(psp, num_device, topology) \
+ ((psp)->funcs->xgmi_set_topology_info ? \
+ (psp)->funcs->xgmi_set_topology_info((psp), (num_device), (topology)) : -EINVAL)
+
+#define amdgpu_psp_check_fw_loading_status(adev, i) (adev)->firmware.funcs->check_fw_loading_status((adev), (i))
extern const struct amd_ip_funcs psp_ip_funcs;
@@ -159,5 +199,6 @@ extern int psp_wait_for(struct psp_context *psp, uint32_t reg_index,
extern const struct amdgpu_ip_block_version psp_v10_0_ip_block;
int psp_gpu_reset(struct amdgpu_device *adev);
+extern const struct amdgpu_ip_block_version psp_v11_0_ip_block;
#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_queue_mgr.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_queue_mgr.c
deleted file mode 100644
index 8af16e81c7d4..000000000000
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_queue_mgr.c
+++ /dev/null
@@ -1,307 +0,0 @@
-/*
- * Copyright 2017 Valve Corporation
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
- * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
- * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
- * OTHER DEALINGS IN THE SOFTWARE.
- *
- * Authors: Andres Rodriguez
- */
-
-#include "amdgpu.h"
-#include "amdgpu_ring.h"
-
-static int amdgpu_queue_mapper_init(struct amdgpu_queue_mapper *mapper,
- int hw_ip)
-{
- if (!mapper)
- return -EINVAL;
-
- if (hw_ip > AMDGPU_MAX_IP_NUM)
- return -EINVAL;
-
- mapper->hw_ip = hw_ip;
- mutex_init(&mapper->lock);
-
- memset(mapper->queue_map, 0, sizeof(mapper->queue_map));
-
- return 0;
-}
-
-static struct amdgpu_ring *amdgpu_get_cached_map(struct amdgpu_queue_mapper *mapper,
- int ring)
-{
- return mapper->queue_map[ring];
-}
-
-static int amdgpu_update_cached_map(struct amdgpu_queue_mapper *mapper,
- int ring, struct amdgpu_ring *pring)
-{
- if (WARN_ON(mapper->queue_map[ring])) {
- DRM_ERROR("Un-expected ring re-map\n");
- return -EINVAL;
- }
-
- mapper->queue_map[ring] = pring;
-
- return 0;
-}
-
-static int amdgpu_identity_map(struct amdgpu_device *adev,
- struct amdgpu_queue_mapper *mapper,
- u32 ring,
- struct amdgpu_ring **out_ring)
-{
- u32 instance;
-
- switch (mapper->hw_ip) {
- case AMDGPU_HW_IP_GFX:
- *out_ring = &adev->gfx.gfx_ring[ring];
- break;
- case AMDGPU_HW_IP_COMPUTE:
- *out_ring = &adev->gfx.compute_ring[ring];
- break;
- case AMDGPU_HW_IP_DMA:
- *out_ring = &adev->sdma.instance[ring].ring;
- break;
- case AMDGPU_HW_IP_UVD:
- instance = ring;
- *out_ring = &adev->uvd.inst[instance].ring;
- break;
- case AMDGPU_HW_IP_VCE:
- *out_ring = &adev->vce.ring[ring];
- break;
- case AMDGPU_HW_IP_UVD_ENC:
- instance = ring / adev->uvd.num_enc_rings;
- *out_ring =
- &adev->uvd.inst[instance].ring_enc[ring%adev->uvd.num_enc_rings];
- break;
- case AMDGPU_HW_IP_VCN_DEC:
- *out_ring = &adev->vcn.ring_dec;
- break;
- case AMDGPU_HW_IP_VCN_ENC:
- *out_ring = &adev->vcn.ring_enc[ring];
- break;
- default:
- *out_ring = NULL;
- DRM_ERROR("unknown HW IP type: %d\n", mapper->hw_ip);
- return -EINVAL;
- }
-
- return amdgpu_update_cached_map(mapper, ring, *out_ring);
-}
-
-static enum amdgpu_ring_type amdgpu_hw_ip_to_ring_type(int hw_ip)
-{
- switch (hw_ip) {
- case AMDGPU_HW_IP_GFX:
- return AMDGPU_RING_TYPE_GFX;
- case AMDGPU_HW_IP_COMPUTE:
- return AMDGPU_RING_TYPE_COMPUTE;
- case AMDGPU_HW_IP_DMA:
- return AMDGPU_RING_TYPE_SDMA;
- case AMDGPU_HW_IP_UVD:
- return AMDGPU_RING_TYPE_UVD;
- case AMDGPU_HW_IP_VCE:
- return AMDGPU_RING_TYPE_VCE;
- default:
- DRM_ERROR("Invalid HW IP specified %d\n", hw_ip);
- return -1;
- }
-}
-
-static int amdgpu_lru_map(struct amdgpu_device *adev,
- struct amdgpu_queue_mapper *mapper,
- u32 user_ring, bool lru_pipe_order,
- struct amdgpu_ring **out_ring)
-{
- int r, i, j;
- int ring_type = amdgpu_hw_ip_to_ring_type(mapper->hw_ip);
- int ring_blacklist[AMDGPU_MAX_RINGS];
- struct amdgpu_ring *ring;
-
- /* 0 is a valid ring index, so initialize to -1 */
- memset(ring_blacklist, 0xff, sizeof(ring_blacklist));
-
- for (i = 0, j = 0; i < AMDGPU_MAX_RINGS; i++) {
- ring = mapper->queue_map[i];
- if (ring)
- ring_blacklist[j++] = ring->idx;
- }
-
- r = amdgpu_ring_lru_get(adev, ring_type, ring_blacklist,
- j, lru_pipe_order, out_ring);
- if (r)
- return r;
-
- return amdgpu_update_cached_map(mapper, user_ring, *out_ring);
-}
-
-/**
- * amdgpu_queue_mgr_init - init an amdgpu_queue_mgr struct
- *
- * @adev: amdgpu_device pointer
- * @mgr: amdgpu_queue_mgr structure holding queue information
- *
- * Initialize the the selected @mgr (all asics).
- *
- * Returns 0 on success, error on failure.
- */
-int amdgpu_queue_mgr_init(struct amdgpu_device *adev,
- struct amdgpu_queue_mgr *mgr)
-{
- int i, r;
-
- if (!adev || !mgr)
- return -EINVAL;
-
- memset(mgr, 0, sizeof(*mgr));
-
- for (i = 0; i < AMDGPU_MAX_IP_NUM; ++i) {
- r = amdgpu_queue_mapper_init(&mgr->mapper[i], i);
- if (r)
- return r;
- }
-
- return 0;
-}
-
-/**
- * amdgpu_queue_mgr_fini - de-initialize an amdgpu_queue_mgr struct
- *
- * @adev: amdgpu_device pointer
- * @mgr: amdgpu_queue_mgr structure holding queue information
- *
- * De-initialize the the selected @mgr (all asics).
- *
- * Returns 0 on success, error on failure.
- */
-int amdgpu_queue_mgr_fini(struct amdgpu_device *adev,
- struct amdgpu_queue_mgr *mgr)
-{
- return 0;
-}
-
-/**
- * amdgpu_queue_mgr_map - Map a userspace ring id to an amdgpu_ring
- *
- * @adev: amdgpu_device pointer
- * @mgr: amdgpu_queue_mgr structure holding queue information
- * @hw_ip: HW IP enum
- * @instance: HW instance
- * @ring: user ring id
- * @our_ring: pointer to mapped amdgpu_ring
- *
- * Map a userspace ring id to an appropriate kernel ring. Different
- * policies are configurable at a HW IP level.
- *
- * Returns 0 on success, error on failure.
- */
-int amdgpu_queue_mgr_map(struct amdgpu_device *adev,
- struct amdgpu_queue_mgr *mgr,
- u32 hw_ip, u32 instance, u32 ring,
- struct amdgpu_ring **out_ring)
-{
- int r, ip_num_rings;
- struct amdgpu_queue_mapper *mapper = &mgr->mapper[hw_ip];
-
- if (!adev || !mgr || !out_ring)
- return -EINVAL;
-
- if (hw_ip >= AMDGPU_MAX_IP_NUM)
- return -EINVAL;
-
- if (ring >= AMDGPU_MAX_RINGS)
- return -EINVAL;
-
- /* Right now all IPs have only one instance - multiple rings. */
- if (instance != 0) {
- DRM_DEBUG("invalid ip instance: %d\n", instance);
- return -EINVAL;
- }
-
- switch (hw_ip) {
- case AMDGPU_HW_IP_GFX:
- ip_num_rings = adev->gfx.num_gfx_rings;
- break;
- case AMDGPU_HW_IP_COMPUTE:
- ip_num_rings = adev->gfx.num_compute_rings;
- break;
- case AMDGPU_HW_IP_DMA:
- ip_num_rings = adev->sdma.num_instances;
- break;
- case AMDGPU_HW_IP_UVD:
- ip_num_rings = adev->uvd.num_uvd_inst;
- break;
- case AMDGPU_HW_IP_VCE:
- ip_num_rings = adev->vce.num_rings;
- break;
- case AMDGPU_HW_IP_UVD_ENC:
- ip_num_rings =
- adev->uvd.num_enc_rings * adev->uvd.num_uvd_inst;
- break;
- case AMDGPU_HW_IP_VCN_DEC:
- ip_num_rings = 1;
- break;
- case AMDGPU_HW_IP_VCN_ENC:
- ip_num_rings = adev->vcn.num_enc_rings;
- break;
- default:
- DRM_DEBUG("unknown ip type: %d\n", hw_ip);
- return -EINVAL;
- }
-
- if (ring >= ip_num_rings) {
- DRM_DEBUG("Ring index:%d exceeds maximum:%d for ip:%d\n",
- ring, ip_num_rings, hw_ip);
- return -EINVAL;
- }
-
- mutex_lock(&mapper->lock);
-
- *out_ring = amdgpu_get_cached_map(mapper, ring);
- if (*out_ring) {
- /* cache hit */
- r = 0;
- goto out_unlock;
- }
-
- switch (mapper->hw_ip) {
- case AMDGPU_HW_IP_GFX:
- case AMDGPU_HW_IP_UVD:
- case AMDGPU_HW_IP_VCE:
- case AMDGPU_HW_IP_UVD_ENC:
- case AMDGPU_HW_IP_VCN_DEC:
- case AMDGPU_HW_IP_VCN_ENC:
- r = amdgpu_identity_map(adev, mapper, ring, out_ring);
- break;
- case AMDGPU_HW_IP_DMA:
- r = amdgpu_lru_map(adev, mapper, ring, false, out_ring);
- break;
- case AMDGPU_HW_IP_COMPUTE:
- r = amdgpu_lru_map(adev, mapper, ring, true, out_ring);
- break;
- default:
- *out_ring = NULL;
- r = -EINVAL;
- DRM_DEBUG("unknown HW IP type: %d\n", mapper->hw_ip);
- }
-
-out_unlock:
- mutex_unlock(&mapper->lock);
- return r;
-}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c
index c6850b629d0e..b70e85ec147d 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c
@@ -135,9 +135,6 @@ void amdgpu_ring_commit(struct amdgpu_ring *ring)
if (ring->funcs->end_use)
ring->funcs->end_use(ring);
-
- if (ring->funcs->type != AMDGPU_RING_TYPE_KIQ)
- amdgpu_ring_lru_touch(ring->adev, ring);
}
/**
@@ -211,7 +208,8 @@ void amdgpu_ring_priority_get(struct amdgpu_ring *ring,
if (!ring->funcs->set_priority)
return;
- atomic_inc(&ring->num_jobs[priority]);
+ if (atomic_inc_return(&ring->num_jobs[priority]) <= 0)
+ return;
mutex_lock(&ring->priority_mutex);
if (priority <= ring->priority)
@@ -304,7 +302,7 @@ int amdgpu_ring_init(struct amdgpu_device *adev, struct amdgpu_ring *ring,
0xffffffffffffffff : ring->buf_mask;
/* Allocate ring buffer */
if (ring->ring_obj == NULL) {
- r = amdgpu_bo_create_kernel(adev, ring->ring_size, PAGE_SIZE,
+ r = amdgpu_bo_create_kernel(adev, ring->ring_size + ring->funcs->extra_dw, PAGE_SIZE,
AMDGPU_GEM_DOMAIN_GTT,
&ring->ring_obj,
&ring->gpu_addr,
@@ -319,8 +317,6 @@ int amdgpu_ring_init(struct amdgpu_device *adev, struct amdgpu_ring *ring,
ring->max_dw = max_dw;
ring->priority = DRM_SCHED_PRIORITY_NORMAL;
mutex_init(&ring->priority_mutex);
- INIT_LIST_HEAD(&ring->lru_list);
- amdgpu_ring_lru_touch(adev, ring);
for (i = 0; i < DRM_SCHED_PRIORITY_MAX; ++i)
atomic_set(&ring->num_jobs[i], 0);
@@ -367,99 +363,6 @@ void amdgpu_ring_fini(struct amdgpu_ring *ring)
ring->adev->rings[ring->idx] = NULL;
}
-static void amdgpu_ring_lru_touch_locked(struct amdgpu_device *adev,
- struct amdgpu_ring *ring)
-{
- /* list_move_tail handles the case where ring isn't part of the list */
- list_move_tail(&ring->lru_list, &adev->ring_lru_list);
-}
-
-static bool amdgpu_ring_is_blacklisted(struct amdgpu_ring *ring,
- int *blacklist, int num_blacklist)
-{
- int i;
-
- for (i = 0; i < num_blacklist; i++) {
- if (ring->idx == blacklist[i])
- return true;
- }
-
- return false;
-}
-
-/**
- * amdgpu_ring_lru_get - get the least recently used ring for a HW IP block
- *
- * @adev: amdgpu_device pointer
- * @type: amdgpu_ring_type enum
- * @blacklist: blacklisted ring ids array
- * @num_blacklist: number of entries in @blacklist
- * @lru_pipe_order: find a ring from the least recently used pipe
- * @ring: output ring
- *
- * Retrieve the amdgpu_ring structure for the least recently used ring of
- * a specific IP block (all asics).
- * Returns 0 on success, error on failure.
- */
-int amdgpu_ring_lru_get(struct amdgpu_device *adev, int type,
- int *blacklist, int num_blacklist,
- bool lru_pipe_order, struct amdgpu_ring **ring)
-{
- struct amdgpu_ring *entry;
-
- /* List is sorted in LRU order, find first entry corresponding
- * to the desired HW IP */
- *ring = NULL;
- spin_lock(&adev->ring_lru_list_lock);
- list_for_each_entry(entry, &adev->ring_lru_list, lru_list) {
- if (entry->funcs->type != type)
- continue;
-
- if (amdgpu_ring_is_blacklisted(entry, blacklist, num_blacklist))
- continue;
-
- if (!*ring) {
- *ring = entry;
-
- /* We are done for ring LRU */
- if (!lru_pipe_order)
- break;
- }
-
- /* Move all rings on the same pipe to the end of the list */
- if (entry->pipe == (*ring)->pipe)
- amdgpu_ring_lru_touch_locked(adev, entry);
- }
-
- /* Move the ring we found to the end of the list */
- if (*ring)
- amdgpu_ring_lru_touch_locked(adev, *ring);
-
- spin_unlock(&adev->ring_lru_list_lock);
-
- if (!*ring) {
- DRM_ERROR("Ring LRU contains no entries for ring type:%d\n", type);
- return -EINVAL;
- }
-
- return 0;
-}
-
-/**
- * amdgpu_ring_lru_touch - mark a ring as recently being used
- *
- * @adev: amdgpu_device pointer
- * @ring: ring to touch
- *
- * Move @ring to the tail of the lru list
- */
-void amdgpu_ring_lru_touch(struct amdgpu_device *adev, struct amdgpu_ring *ring)
-{
- spin_lock(&adev->ring_lru_list_lock);
- amdgpu_ring_lru_touch_locked(adev, ring);
- spin_unlock(&adev->ring_lru_list_lock);
-}
-
/**
* amdgpu_ring_emit_reg_write_reg_wait_helper - ring helper
*
@@ -480,6 +383,31 @@ void amdgpu_ring_emit_reg_write_reg_wait_helper(struct amdgpu_ring *ring,
amdgpu_ring_emit_reg_wait(ring, reg1, mask, mask);
}
+/**
+ * amdgpu_ring_soft_recovery - try to soft recover a ring lockup
+ *
+ * @ring: ring to try the recovery on
+ * @vmid: VMID we try to get going again
+ * @fence: timedout fence
+ *
+ * Tries to get a ring proceeding again when it is stuck.
+ */
+bool amdgpu_ring_soft_recovery(struct amdgpu_ring *ring, unsigned int vmid,
+ struct dma_fence *fence)
+{
+ ktime_t deadline = ktime_add_us(ktime_get(), 10000);
+
+ if (!ring->funcs->soft_recovery)
+ return false;
+
+ atomic_inc(&ring->adev->gpu_reset_counter);
+ while (!dma_fence_is_signaled(fence) &&
+ ktime_to_ns(ktime_sub(deadline, ktime_get())) > 0)
+ ring->funcs->soft_recovery(ring, vmid);
+
+ return dma_fence_is_signaled(fence);
+}
+
/*
* Debugfs info
*/
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
index 1513124c5659..4caa301ce454 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
@@ -44,6 +44,8 @@
#define AMDGPU_FENCE_FLAG_INT (1 << 1)
#define AMDGPU_FENCE_FLAG_TC_WB_ONLY (1 << 2)
+#define to_amdgpu_ring(s) container_of((s), struct amdgpu_ring, sched)
+
enum amdgpu_ring_type {
AMDGPU_RING_TYPE_GFX,
AMDGPU_RING_TYPE_COMPUTE,
@@ -53,7 +55,8 @@ enum amdgpu_ring_type {
AMDGPU_RING_TYPE_KIQ,
AMDGPU_RING_TYPE_UVD_ENC,
AMDGPU_RING_TYPE_VCN_DEC,
- AMDGPU_RING_TYPE_VCN_ENC
+ AMDGPU_RING_TYPE_VCN_ENC,
+ AMDGPU_RING_TYPE_VCN_JPEG
};
struct amdgpu_device;
@@ -94,7 +97,7 @@ void amdgpu_fence_driver_resume(struct amdgpu_device *adev);
int amdgpu_fence_emit(struct amdgpu_ring *ring, struct dma_fence **fence,
unsigned flags);
int amdgpu_fence_emit_polling(struct amdgpu_ring *ring, uint32_t *s);
-void amdgpu_fence_process(struct amdgpu_ring *ring);
+bool amdgpu_fence_process(struct amdgpu_ring *ring);
int amdgpu_fence_wait_empty(struct amdgpu_ring *ring);
signed long amdgpu_fence_wait_polling(struct amdgpu_ring *ring,
uint32_t wait_seq,
@@ -112,6 +115,7 @@ struct amdgpu_ring_funcs {
u32 nop;
bool support_64bit_ptrs;
unsigned vmhub;
+ unsigned extra_dw;
/* ring read/write ptr handling */
u64 (*get_rptr)(struct amdgpu_ring *ring);
@@ -119,6 +123,7 @@ struct amdgpu_ring_funcs {
void (*set_wptr)(struct amdgpu_ring *ring);
/* validating and patching of IBs */
int (*parse_cs)(struct amdgpu_cs_parser *p, uint32_t ib_idx);
+ int (*patch_cs_in_place)(struct amdgpu_cs_parser *p, uint32_t ib_idx);
/* constants to calculate how many DW are needed for an emit */
unsigned emit_frame_size;
unsigned emit_ib_size;
@@ -163,6 +168,8 @@ struct amdgpu_ring_funcs {
/* priority functions */
void (*set_priority) (struct amdgpu_ring *ring,
enum drm_sched_priority priority);
+ /* Try to soft recover the ring to make the fence signal */
+ void (*soft_recovery)(struct amdgpu_ring *ring, unsigned vmid);
};
struct amdgpu_ring {
@@ -170,7 +177,6 @@ struct amdgpu_ring {
const struct amdgpu_ring_funcs *funcs;
struct amdgpu_fence_driver fence_drv;
struct drm_gpu_scheduler sched;
- struct list_head lru_list;
struct amdgpu_bo *ring_obj;
volatile uint32_t *ring;
@@ -216,6 +222,30 @@ struct amdgpu_ring {
#endif
};
+#define amdgpu_ring_parse_cs(r, p, ib) ((r)->funcs->parse_cs((p), (ib)))
+#define amdgpu_ring_patch_cs_in_place(r, p, ib) ((r)->funcs->patch_cs_in_place((p), (ib)))
+#define amdgpu_ring_test_ring(r) (r)->funcs->test_ring((r))
+#define amdgpu_ring_test_ib(r, t) (r)->funcs->test_ib((r), (t))
+#define amdgpu_ring_get_rptr(r) (r)->funcs->get_rptr((r))
+#define amdgpu_ring_get_wptr(r) (r)->funcs->get_wptr((r))
+#define amdgpu_ring_set_wptr(r) (r)->funcs->set_wptr((r))
+#define amdgpu_ring_emit_ib(r, ib, vmid, c) (r)->funcs->emit_ib((r), (ib), (vmid), (c))
+#define amdgpu_ring_emit_pipeline_sync(r) (r)->funcs->emit_pipeline_sync((r))
+#define amdgpu_ring_emit_vm_flush(r, vmid, addr) (r)->funcs->emit_vm_flush((r), (vmid), (addr))
+#define amdgpu_ring_emit_fence(r, addr, seq, flags) (r)->funcs->emit_fence((r), (addr), (seq), (flags))
+#define amdgpu_ring_emit_gds_switch(r, v, db, ds, wb, ws, ab, as) (r)->funcs->emit_gds_switch((r), (v), (db), (ds), (wb), (ws), (ab), (as))
+#define amdgpu_ring_emit_hdp_flush(r) (r)->funcs->emit_hdp_flush((r))
+#define amdgpu_ring_emit_switch_buffer(r) (r)->funcs->emit_switch_buffer((r))
+#define amdgpu_ring_emit_cntxcntl(r, d) (r)->funcs->emit_cntxcntl((r), (d))
+#define amdgpu_ring_emit_rreg(r, d) (r)->funcs->emit_rreg((r), (d))
+#define amdgpu_ring_emit_wreg(r, d, v) (r)->funcs->emit_wreg((r), (d), (v))
+#define amdgpu_ring_emit_reg_wait(r, d, v, m) (r)->funcs->emit_reg_wait((r), (d), (v), (m))
+#define amdgpu_ring_emit_reg_write_reg_wait(r, d0, d1, v, m) (r)->funcs->emit_reg_write_reg_wait((r), (d0), (d1), (v), (m))
+#define amdgpu_ring_emit_tmz(r, b) (r)->funcs->emit_tmz((r), (b))
+#define amdgpu_ring_pad_ib(r, ib) ((r)->funcs->pad_ib((r), (ib)))
+#define amdgpu_ring_init_cond_exec(r) (r)->funcs->init_cond_exec((r))
+#define amdgpu_ring_patch_cond_exec(r,o) (r)->funcs->patch_cond_exec((r),(o))
+
int amdgpu_ring_alloc(struct amdgpu_ring *ring, unsigned ndw);
void amdgpu_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count);
void amdgpu_ring_generic_pad_ib(struct amdgpu_ring *ring, struct amdgpu_ib *ib);
@@ -229,13 +259,11 @@ int amdgpu_ring_init(struct amdgpu_device *adev, struct amdgpu_ring *ring,
unsigned ring_size, struct amdgpu_irq_src *irq_src,
unsigned irq_type);
void amdgpu_ring_fini(struct amdgpu_ring *ring);
-int amdgpu_ring_lru_get(struct amdgpu_device *adev, int type,
- int *blacklist, int num_blacklist,
- bool lru_pipe_order, struct amdgpu_ring **ring);
-void amdgpu_ring_lru_touch(struct amdgpu_device *adev, struct amdgpu_ring *ring);
void amdgpu_ring_emit_reg_write_reg_wait_helper(struct amdgpu_ring *ring,
uint32_t reg0, uint32_t val0,
uint32_t reg1, uint32_t val1);
+bool amdgpu_ring_soft_recovery(struct amdgpu_ring *ring, unsigned int vmid,
+ struct dma_fence *fence);
static inline void amdgpu_ring_clear_ring(struct amdgpu_ring *ring)
{
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sa.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_sa.c
index fb1667b35daa..12f2bf97611f 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sa.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sa.c
@@ -226,6 +226,8 @@ static bool amdgpu_sa_bo_next_hole(struct amdgpu_sa_manager *sa_manager,
for (i = 0; i < AMDGPU_SA_NUM_FENCE_LISTS; ++i) {
struct amdgpu_sa_bo *sa_bo;
+ fences[i] = NULL;
+
if (list_empty(&sa_manager->flist[i]))
continue;
@@ -296,10 +298,8 @@ int amdgpu_sa_bo_new(struct amdgpu_sa_manager *sa_manager,
spin_lock(&sa_manager->wq.lock);
do {
- for (i = 0; i < AMDGPU_SA_NUM_FENCE_LISTS; ++i) {
- fences[i] = NULL;
+ for (i = 0; i < AMDGPU_SA_NUM_FENCE_LISTS; ++i)
tries[i] = 0;
- }
do {
amdgpu_sa_bo_try_free(sa_manager);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sched.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_sched.c
index 86a0715d9431..1cafe8d83a4d 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sched.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sched.c
@@ -53,9 +53,8 @@ static int amdgpu_sched_process_priority_override(struct amdgpu_device *adev,
int fd,
enum drm_sched_priority priority)
{
- struct file *filp = fcheck(fd);
+ struct file *filp = fget(fd);
struct drm_file *file;
- struct pid *pid;
struct amdgpu_fpriv *fpriv;
struct amdgpu_ctx *ctx;
uint32_t id;
@@ -63,20 +62,12 @@ static int amdgpu_sched_process_priority_override(struct amdgpu_device *adev,
if (!filp)
return -EINVAL;
- pid = get_pid(((struct drm_file *)filp->private_data)->pid);
+ file = filp->private_data;
+ fpriv = file->driver_priv;
+ idr_for_each_entry(&fpriv->ctx_mgr.ctx_handles, ctx, id)
+ amdgpu_ctx_priority_override(ctx, priority);
- mutex_lock(&adev->ddev->filelist_mutex);
- list_for_each_entry(file, &adev->ddev->filelist, lhead) {
- if (file->pid != pid)
- continue;
-
- fpriv = file->driver_priv;
- idr_for_each_entry(&fpriv->ctx_mgr.ctx_handles, ctx, id)
- amdgpu_ctx_priority_override(ctx, priority);
- }
- mutex_unlock(&adev->ddev->filelist_mutex);
-
- put_pid(pid);
+ fput(filp);
return 0;
}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c
new file mode 100644
index 000000000000..bc9244b429ef
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c
@@ -0,0 +1,44 @@
+/*
+ * Copyright 2018 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+#include <drm/drmP.h>
+#include "amdgpu.h"
+#include "amdgpu_sdma.h"
+
+/*
+ * GPU SDMA IP block helpers function.
+ */
+
+struct amdgpu_sdma_instance * amdgpu_get_sdma_instance(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+ int i;
+
+ for (i = 0; i < adev->sdma.num_instances; i++)
+ if (&adev->sdma.instance[i].ring == ring)
+ break;
+
+ if (i < AMDGPU_MAX_SDMA_INSTANCES)
+ return &adev->sdma.instance[i];
+ else
+ return NULL;
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h
new file mode 100644
index 000000000000..500113ec65ca
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h
@@ -0,0 +1,97 @@
+/*
+ * Copyright 2018 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef __AMDGPU_SDMA_H__
+#define __AMDGPU_SDMA_H__
+
+/* max number of IP instances */
+#define AMDGPU_MAX_SDMA_INSTANCES 2
+
+enum amdgpu_sdma_irq {
+ AMDGPU_SDMA_IRQ_TRAP0 = 0,
+ AMDGPU_SDMA_IRQ_TRAP1,
+
+ AMDGPU_SDMA_IRQ_LAST
+};
+
+struct amdgpu_sdma_instance {
+ /* SDMA firmware */
+ const struct firmware *fw;
+ uint32_t fw_version;
+ uint32_t feature_version;
+
+ struct amdgpu_ring ring;
+ bool burst_nop;
+};
+
+struct amdgpu_sdma {
+ struct amdgpu_sdma_instance instance[AMDGPU_MAX_SDMA_INSTANCES];
+ struct amdgpu_irq_src trap_irq;
+ struct amdgpu_irq_src illegal_inst_irq;
+ int num_instances;
+ uint32_t srbm_soft_reset;
+};
+
+/*
+ * Provided by hw blocks that can move/clear data. e.g., gfx or sdma
+ * But currently, we use sdma to move data.
+ */
+struct amdgpu_buffer_funcs {
+ /* maximum bytes in a single operation */
+ uint32_t copy_max_bytes;
+
+ /* number of dw to reserve per operation */
+ unsigned copy_num_dw;
+
+ /* used for buffer migration */
+ void (*emit_copy_buffer)(struct amdgpu_ib *ib,
+ /* src addr in bytes */
+ uint64_t src_offset,
+ /* dst addr in bytes */
+ uint64_t dst_offset,
+ /* number of byte to transfer */
+ uint32_t byte_count);
+
+ /* maximum bytes in a single operation */
+ uint32_t fill_max_bytes;
+
+ /* number of dw to reserve per operation */
+ unsigned fill_num_dw;
+
+ /* used for buffer clearing */
+ void (*emit_fill_buffer)(struct amdgpu_ib *ib,
+ /* value to write to memory */
+ uint32_t src_data,
+ /* dst addr in bytes */
+ uint64_t dst_offset,
+ /* number of byte to fill */
+ uint32_t byte_count);
+};
+
+#define amdgpu_emit_copy_buffer(adev, ib, s, d, b) (adev)->mman.buffer_funcs->emit_copy_buffer((ib), (s), (d), (b))
+#define amdgpu_emit_fill_buffer(adev, ib, s, d, b) (adev)->mman.buffer_funcs->emit_fill_buffer((ib), (s), (d), (b))
+
+struct amdgpu_sdma_instance *
+amdgpu_get_sdma_instance(struct amdgpu_ring *ring);
+
+#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_test.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_test.c
index e3878256743a..8904e62dca7a 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_test.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_test.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0 OR MIT
/*
* Copyright 2009 VMware, Inc.
*
@@ -75,11 +76,12 @@ static void amdgpu_do_test_moves(struct amdgpu_device *adev)
r = amdgpu_bo_reserve(vram_obj, false);
if (unlikely(r != 0))
goto out_unref;
- r = amdgpu_bo_pin(vram_obj, AMDGPU_GEM_DOMAIN_VRAM, &vram_addr);
+ r = amdgpu_bo_pin(vram_obj, AMDGPU_GEM_DOMAIN_VRAM);
if (r) {
DRM_ERROR("Failed to pin VRAM object\n");
goto out_unres;
}
+ vram_addr = amdgpu_bo_gpu_offset(vram_obj);
for (i = 0; i < n; i++) {
void *gtt_map, *vram_map;
void **gart_start, **gart_end;
@@ -96,11 +98,17 @@ static void amdgpu_do_test_moves(struct amdgpu_device *adev)
r = amdgpu_bo_reserve(gtt_obj[i], false);
if (unlikely(r != 0))
goto out_lclean_unref;
- r = amdgpu_bo_pin(gtt_obj[i], AMDGPU_GEM_DOMAIN_GTT, &gart_addr);
+ r = amdgpu_bo_pin(gtt_obj[i], AMDGPU_GEM_DOMAIN_GTT);
if (r) {
DRM_ERROR("Failed to pin GTT object %d\n", i);
goto out_lclean_unres;
}
+ r = amdgpu_ttm_alloc_gart(&gtt_obj[i]->tbo);
+ if (r) {
+ DRM_ERROR("%p bind failed\n", gtt_obj[i]);
+ goto out_lclean_unpin;
+ }
+ gart_addr = amdgpu_bo_gpu_offset(gtt_obj[i]);
r = amdgpu_bo_kmap(gtt_obj[i], &gtt_map);
if (r) {
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h
index e96e26d3f3b0..e9bf70e2ac51 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h
@@ -103,7 +103,7 @@ TRACE_EVENT(amdgpu_iv,
__entry->src_data[2] = iv->src_data[2];
__entry->src_data[3] = iv->src_data[3];
),
- TP_printk("client_id:%u src_id:%u ring:%u vmid:%u timestamp: %llu pasid:%u src_data: %08x %08x %08x %08x\n",
+ TP_printk("client_id:%u src_id:%u ring:%u vmid:%u timestamp: %llu pasid:%u src_data: %08x %08x %08x %08x",
__entry->client_id, __entry->src_id,
__entry->ring_id, __entry->vmid,
__entry->timestamp, __entry->pasid,
@@ -150,10 +150,10 @@ TRACE_EVENT(amdgpu_cs,
TP_fast_assign(
__entry->bo_list = p->bo_list;
- __entry->ring = p->job->ring->idx;
+ __entry->ring = to_amdgpu_ring(p->entity->rq->sched)->idx;
__entry->dw = p->job->ibs[i].length_dw;
__entry->fences = amdgpu_fence_count_emitted(
- p->job->ring);
+ to_amdgpu_ring(p->entity->rq->sched));
),
TP_printk("bo_list=%p, ring=%u, dw=%u, fences=%u",
__entry->bo_list, __entry->ring, __entry->dw,
@@ -178,7 +178,7 @@ TRACE_EVENT(amdgpu_cs_ioctl,
__assign_str(timeline, AMDGPU_JOB_GET_TIMELINE_NAME(job))
__entry->context = job->base.s_fence->finished.context;
__entry->seqno = job->base.s_fence->finished.seqno;
- __entry->ring_name = job->ring->name;
+ __entry->ring_name = to_amdgpu_ring(job->base.sched)->name;
__entry->num_ibs = job->num_ibs;
),
TP_printk("sched_job=%llu, timeline=%s, context=%u, seqno=%u, ring_name=%s, num_ibs=%u",
@@ -203,7 +203,7 @@ TRACE_EVENT(amdgpu_sched_run_job,
__assign_str(timeline, AMDGPU_JOB_GET_TIMELINE_NAME(job))
__entry->context = job->base.s_fence->finished.context;
__entry->seqno = job->base.s_fence->finished.seqno;
- __entry->ring_name = job->ring->name;
+ __entry->ring_name = to_amdgpu_ring(job->base.sched)->name;
__entry->num_ibs = job->num_ibs;
),
TP_printk("sched_job=%llu, timeline=%s, context=%u, seqno=%u, ring_name=%s, num_ibs=%u",
@@ -314,6 +314,11 @@ DEFINE_EVENT(amdgpu_vm_mapping, amdgpu_vm_bo_mapping,
TP_ARGS(mapping)
);
+DEFINE_EVENT(amdgpu_vm_mapping, amdgpu_vm_bo_cs,
+ TP_PROTO(struct amdgpu_bo_va_mapping *mapping),
+ TP_ARGS(mapping)
+);
+
TRACE_EVENT(amdgpu_vm_set_ptes,
TP_PROTO(uint64_t pe, uint64_t addr, unsigned count,
uint32_t incr, uint64_t flags),
@@ -436,7 +441,7 @@ TRACE_EVENT(amdgpu_cs_bo_status,
__entry->total_bo, __entry->total_size)
);
-TRACE_EVENT(amdgpu_ttm_bo_move,
+TRACE_EVENT(amdgpu_bo_move,
TP_PROTO(struct amdgpu_bo* bo, uint32_t new_placement, uint32_t old_placement),
TP_ARGS(bo, new_placement, old_placement),
TP_STRUCT__entry(
@@ -457,6 +462,30 @@ TRACE_EVENT(amdgpu_ttm_bo_move,
__entry->new_placement, __entry->bo_size)
);
+TRACE_EVENT(amdgpu_ib_pipe_sync,
+ TP_PROTO(struct amdgpu_job *sched_job, struct dma_fence *fence),
+ TP_ARGS(sched_job, fence),
+ TP_STRUCT__entry(
+ __field(const char *,name)
+ __field(uint64_t, id)
+ __field(struct dma_fence *, fence)
+ __field(uint64_t, ctx)
+ __field(unsigned, seqno)
+ ),
+
+ TP_fast_assign(
+ __entry->name = sched_job->base.sched->name;
+ __entry->id = sched_job->base.id;
+ __entry->fence = fence;
+ __entry->ctx = fence->context;
+ __entry->seqno = fence->seqno;
+ ),
+ TP_printk("job ring=%s, id=%llu, need pipe sync to fence=%p, context=%llu, seq=%u",
+ __entry->name, __entry->id,
+ __entry->fence, __entry->ctx,
+ __entry->seqno)
+);
+
#undef AMDGPU_JOB_GET_TIMELINE_NAME
#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_trace_points.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_trace_points.c
index b160b958e5fe..f212402570a5 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_trace_points.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_trace_points.c
@@ -1,4 +1,4 @@
-// SPDX-License-Identifier: GPL-2.0
+// SPDX-License-Identifier: MIT
/* Copyright Red Hat Inc 2010.
*
* Permission is hereby granted, free of charge, to any person obtaining a
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
index e93a0a237dc3..a44fc12ae1f9 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
@@ -47,6 +47,7 @@
#include "amdgpu_object.h"
#include "amdgpu_trace.h"
#include "amdgpu_amdkfd.h"
+#include "amdgpu_sdma.h"
#include "bif/bif_4_1_d.h"
#define DRM_FILE_PAGE_OFFSET (0x100000000ULL >> PAGE_SHIFT)
@@ -92,11 +93,9 @@ static void amdgpu_ttm_mem_global_release(struct drm_global_reference *ref)
}
/**
- * amdgpu_ttm_global_init - Initialize global TTM memory reference
- * structures.
+ * amdgpu_ttm_global_init - Initialize global TTM memory reference structures.
*
- * @adev: AMDGPU device for which the global structures need to be
- * registered.
+ * @adev: AMDGPU device for which the global structures need to be registered.
*
* This is called as part of the AMDGPU ttm init from amdgpu_ttm_init()
* during bring up.
@@ -104,8 +103,6 @@ static void amdgpu_ttm_mem_global_release(struct drm_global_reference *ref)
static int amdgpu_ttm_global_init(struct amdgpu_device *adev)
{
struct drm_global_reference *global_ref;
- struct amdgpu_ring *ring;
- struct drm_sched_rq *rq;
int r;
/* ensure reference is false in case init fails */
@@ -138,21 +135,10 @@ static int amdgpu_ttm_global_init(struct amdgpu_device *adev)
mutex_init(&adev->mman.gtt_window_lock);
- ring = adev->mman.buffer_funcs_ring;
- rq = &ring->sched.sched_rq[DRM_SCHED_PRIORITY_KERNEL];
- r = drm_sched_entity_init(&ring->sched, &adev->mman.entity,
- rq, NULL);
- if (r) {
- DRM_ERROR("Failed setting up TTM BO move run queue.\n");
- goto error_entity;
- }
-
adev->mman.mem_global_referenced = true;
return 0;
-error_entity:
- drm_global_item_unref(&adev->mman.bo_global_ref.ref);
error_bo:
drm_global_item_unref(&adev->mman.mem_global_ref);
error_mem:
@@ -162,8 +148,6 @@ error_mem:
static void amdgpu_ttm_global_fini(struct amdgpu_device *adev)
{
if (adev->mman.mem_global_referenced) {
- drm_sched_entity_fini(adev->mman.entity.sched,
- &adev->mman.entity);
mutex_destroy(&adev->mman.gtt_window_lock);
drm_global_item_unref(&adev->mman.bo_global_ref.ref);
drm_global_item_unref(&adev->mman.mem_global_ref);
@@ -177,13 +161,12 @@ static int amdgpu_invalidate_caches(struct ttm_bo_device *bdev, uint32_t flags)
}
/**
- * amdgpu_init_mem_type - Initialize a memory manager for a specific
- * type of memory request.
+ * amdgpu_init_mem_type - Initialize a memory manager for a specific type of
+ * memory request.
*
- * @bdev: The TTM BO device object (contains a reference to
- * amdgpu_device)
- * @type: The type of memory requested
- * @man:
+ * @bdev: The TTM BO device object (contains a reference to amdgpu_device)
+ * @type: The type of memory requested
+ * @man: The memory type manager for each domain
*
* This is called by ttm_bo_init_mm() when a buffer object is being
* initialized.
@@ -263,7 +246,7 @@ static void amdgpu_evict_flags(struct ttm_buffer_object *bo,
}
/* Object isn't an AMDGPU object so ignore */
- if (!amdgpu_ttm_bo_is_amdgpu_bo(bo)) {
+ if (!amdgpu_bo_is_amdgpu_bo(bo)) {
placement->placement = &placements;
placement->busy_placement = &placements;
placement->num_placement = 1;
@@ -273,11 +256,18 @@ static void amdgpu_evict_flags(struct ttm_buffer_object *bo,
abo = ttm_to_amdgpu_bo(bo);
switch (bo->mem.mem_type) {
+ case AMDGPU_PL_GDS:
+ case AMDGPU_PL_GWS:
+ case AMDGPU_PL_OA:
+ placement->num_placement = 0;
+ placement->num_busy_placement = 0;
+ return;
+
case TTM_PL_VRAM:
if (!adev->mman.buffer_funcs_enabled) {
/* Move to system memory */
- amdgpu_ttm_placement_from_domain(abo, AMDGPU_GEM_DOMAIN_CPU);
- } else if (adev->gmc.visible_vram_size < adev->gmc.real_vram_size &&
+ amdgpu_bo_placement_from_domain(abo, AMDGPU_GEM_DOMAIN_CPU);
+ } else if (!amdgpu_gmc_vram_full_visible(&adev->gmc) &&
!(abo->flags & AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED) &&
amdgpu_bo_in_cpu_visible_vram(abo)) {
@@ -286,7 +276,7 @@ static void amdgpu_evict_flags(struct ttm_buffer_object *bo,
* BO will be evicted to GTT rather than causing other
* BOs to be evicted from VRAM
*/
- amdgpu_ttm_placement_from_domain(abo, AMDGPU_GEM_DOMAIN_VRAM |
+ amdgpu_bo_placement_from_domain(abo, AMDGPU_GEM_DOMAIN_VRAM |
AMDGPU_GEM_DOMAIN_GTT);
abo->placements[0].fpfn = adev->gmc.visible_vram_size >> PAGE_SHIFT;
abo->placements[0].lpfn = 0;
@@ -294,12 +284,13 @@ static void amdgpu_evict_flags(struct ttm_buffer_object *bo,
abo->placement.num_busy_placement = 1;
} else {
/* Move to GTT memory */
- amdgpu_ttm_placement_from_domain(abo, AMDGPU_GEM_DOMAIN_GTT);
+ amdgpu_bo_placement_from_domain(abo, AMDGPU_GEM_DOMAIN_GTT);
}
break;
case TTM_PL_TT:
default:
- amdgpu_ttm_placement_from_domain(abo, AMDGPU_GEM_DOMAIN_CPU);
+ amdgpu_bo_placement_from_domain(abo, AMDGPU_GEM_DOMAIN_CPU);
+ break;
}
*placement = abo->placement;
}
@@ -307,8 +298,8 @@ static void amdgpu_evict_flags(struct ttm_buffer_object *bo,
/**
* amdgpu_verify_access - Verify access for a mmap call
*
- * @bo: The buffer object to map
- * @filp: The file pointer from the process performing the mmap
+ * @bo: The buffer object to map
+ * @filp: The file pointer from the process performing the mmap
*
* This is called by ttm_bo_mmap() to verify whether a process
* has the right to mmap a BO to their process space.
@@ -333,11 +324,10 @@ static int amdgpu_verify_access(struct ttm_buffer_object *bo, struct file *filp)
/**
* amdgpu_move_null - Register memory for a buffer object
*
- * @bo: The bo to assign the memory to
- * @new_mem: The memory to be assigned.
+ * @bo: The bo to assign the memory to
+ * @new_mem: The memory to be assigned.
*
- * Assign the memory from new_mem to the memory of the buffer object
- * bo.
+ * Assign the memory from new_mem to the memory of the buffer object bo.
*/
static void amdgpu_move_null(struct ttm_buffer_object *bo,
struct ttm_mem_reg *new_mem)
@@ -350,8 +340,12 @@ static void amdgpu_move_null(struct ttm_buffer_object *bo,
}
/**
- * amdgpu_mm_node_addr - Compute the GPU relative offset of a GTT
- * buffer.
+ * amdgpu_mm_node_addr - Compute the GPU relative offset of a GTT buffer.
+ *
+ * @bo: The bo to assign the memory to.
+ * @mm_node: Memory manager node for drm allocator.
+ * @mem: The region where the bo resides.
+ *
*/
static uint64_t amdgpu_mm_node_addr(struct ttm_buffer_object *bo,
struct drm_mm_node *mm_node,
@@ -359,7 +353,7 @@ static uint64_t amdgpu_mm_node_addr(struct ttm_buffer_object *bo,
{
uint64_t addr = 0;
- if (mem->mem_type != TTM_PL_TT || amdgpu_gtt_mgr_has_gart_addr(mem)) {
+ if (mm_node->start != AMDGPU_BO_INVALID_OFFSET) {
addr = mm_node->start << PAGE_SHIFT;
addr += bo->bdev->man[mem->mem_type].gpu_offset;
}
@@ -367,10 +361,12 @@ static uint64_t amdgpu_mm_node_addr(struct ttm_buffer_object *bo,
}
/**
- * amdgpu_find_mm_node - Helper function finds the drm_mm_node
- * corresponding to @offset. It also modifies
- * the offset to be within the drm_mm_node
- * returned
+ * amdgpu_find_mm_node - Helper function finds the drm_mm_node corresponding to
+ * @offset. It also modifies the offset to be within the drm_mm_node returned
+ *
+ * @mem: The region where the bo resides.
+ * @offset: The offset that drm_mm_node is used for finding.
+ *
*/
static struct drm_mm_node *amdgpu_find_mm_node(struct ttm_mem_reg *mem,
unsigned long *offset)
@@ -445,8 +441,7 @@ int amdgpu_ttm_copy_mem_to_mem(struct amdgpu_device *adev,
/* Map only what needs to be accessed. Map src to window 0 and
* dst to window 1
*/
- if (src->mem->mem_type == TTM_PL_TT &&
- !amdgpu_gtt_mgr_has_gart_addr(src->mem)) {
+ if (src->mem->start == AMDGPU_BO_INVALID_OFFSET) {
r = amdgpu_map_buffer(src->bo, src->mem,
PFN_UP(cur_size + src_page_offset),
src_node_start, 0, ring,
@@ -459,8 +454,7 @@ int amdgpu_ttm_copy_mem_to_mem(struct amdgpu_device *adev,
from += src_page_offset;
}
- if (dst->mem->mem_type == TTM_PL_TT &&
- !amdgpu_gtt_mgr_has_gart_addr(dst->mem)) {
+ if (dst->mem->start == AMDGPU_BO_INVALID_OFFSET) {
r = amdgpu_map_buffer(dst->bo, dst->mem,
PFN_UP(cur_size + dst_page_offset),
dst_node_start, 1, ring,
@@ -512,8 +506,8 @@ error:
/**
* amdgpu_move_blit - Copy an entire buffer to another buffer
*
- * This is a helper called by amdgpu_bo_move() and
- * amdgpu_move_vram_ram() to help move buffers to and from VRAM.
+ * This is a helper called by amdgpu_bo_move() and amdgpu_move_vram_ram() to
+ * help move buffers to and from VRAM.
*/
static int amdgpu_move_blit(struct ttm_buffer_object *bo,
bool evict, bool no_wait_gpu,
@@ -538,7 +532,11 @@ static int amdgpu_move_blit(struct ttm_buffer_object *bo,
if (r)
goto error;
- r = ttm_bo_pipeline_move(bo, fence, evict, new_mem);
+ /* Always block for VM page tables before committing the new location */
+ if (bo->type == ttm_bo_type_kernel)
+ r = ttm_bo_move_accel_cleanup(bo, fence, true, new_mem);
+ else
+ r = ttm_bo_pipeline_move(bo, fence, evict, new_mem);
dma_fence_put(fence);
return r;
@@ -595,7 +593,7 @@ static int amdgpu_move_vram_ram(struct ttm_buffer_object *bo, bool evict,
}
/* blit VRAM to GTT */
- r = amdgpu_move_blit(bo, true, ctx->no_wait_gpu, &tmp_mem, old_mem);
+ r = amdgpu_move_blit(bo, evict, ctx->no_wait_gpu, &tmp_mem, old_mem);
if (unlikely(r)) {
goto out_cleanup;
}
@@ -647,7 +645,7 @@ static int amdgpu_move_ram_vram(struct ttm_buffer_object *bo, bool evict,
}
/* copy to VRAM */
- r = amdgpu_move_blit(bo, true, ctx->no_wait_gpu, new_mem, old_mem);
+ r = amdgpu_move_blit(bo, evict, ctx->no_wait_gpu, new_mem, old_mem);
if (unlikely(r)) {
goto out_cleanup;
}
@@ -689,6 +687,16 @@ static int amdgpu_bo_move(struct ttm_buffer_object *bo, bool evict,
amdgpu_move_null(bo, new_mem);
return 0;
}
+ if (old_mem->mem_type == AMDGPU_PL_GDS ||
+ old_mem->mem_type == AMDGPU_PL_GWS ||
+ old_mem->mem_type == AMDGPU_PL_OA ||
+ new_mem->mem_type == AMDGPU_PL_GDS ||
+ new_mem->mem_type == AMDGPU_PL_GWS ||
+ new_mem->mem_type == AMDGPU_PL_OA) {
+ /* Nothing to save here */
+ amdgpu_move_null(bo, new_mem);
+ return 0;
+ }
if (!adev->mman.buffer_funcs_enabled)
goto memcpy;
@@ -809,8 +817,8 @@ struct amdgpu_ttm_tt {
};
/**
- * amdgpu_ttm_tt_get_user_pages - Pin pages of memory pointed to
- * by a USERPTR pointer to memory
+ * amdgpu_ttm_tt_get_user_pages - Pin pages of memory pointed to by a USERPTR
+ * pointer to memory
*
* Called by amdgpu_gem_userptr_ioctl() and amdgpu_cs_parser_bos().
* This provides a wrapper around the get_user_pages() call to provide
@@ -833,8 +841,10 @@ int amdgpu_ttm_tt_get_user_pages(struct ttm_tt *ttm, struct page **pages)
down_read(&mm->mmap_sem);
if (gtt->userflags & AMDGPU_GEM_USERPTR_ANONONLY) {
- /* check that we only use anonymous memory
- to prevent problems with writeback */
+ /*
+ * check that we only use anonymous memory to prevent problems
+ * with writeback
+ */
unsigned long end = gtt->userptr + ttm->num_pages * PAGE_SIZE;
struct vm_area_struct *vma;
@@ -885,10 +895,9 @@ release_pages:
}
/**
- * amdgpu_ttm_tt_set_user_pages - Copy pages in, putting old pages
- * as necessary.
+ * amdgpu_ttm_tt_set_user_pages - Copy pages in, putting old pages as necessary.
*
- * Called by amdgpu_cs_list_validate(). This creates the page list
+ * Called by amdgpu_cs_list_validate(). This creates the page list
* that backs user memory and will ultimately be mapped into the device
* address space.
*/
@@ -930,8 +939,7 @@ void amdgpu_ttm_tt_mark_user_pages(struct ttm_tt *ttm)
}
/**
- * amdgpu_ttm_tt_pin_userptr - prepare the sg table with the
- * user pages
+ * amdgpu_ttm_tt_pin_userptr - prepare the sg table with the user pages
*
* Called by amdgpu_ttm_backend_bind()
**/
@@ -1095,42 +1103,48 @@ int amdgpu_ttm_alloc_gart(struct ttm_buffer_object *bo)
struct ttm_mem_reg tmp;
struct ttm_placement placement;
struct ttm_place placements;
- uint64_t flags;
+ uint64_t addr, flags;
int r;
- if (bo->mem.mem_type != TTM_PL_TT ||
- amdgpu_gtt_mgr_has_gart_addr(&bo->mem))
+ if (bo->mem.start != AMDGPU_BO_INVALID_OFFSET)
return 0;
- /* allocate GTT space */
- tmp = bo->mem;
- tmp.mm_node = NULL;
- placement.num_placement = 1;
- placement.placement = &placements;
- placement.num_busy_placement = 1;
- placement.busy_placement = &placements;
- placements.fpfn = 0;
- placements.lpfn = adev->gmc.gart_size >> PAGE_SHIFT;
- placements.flags = (bo->mem.placement & ~TTM_PL_MASK_MEM) |
- TTM_PL_FLAG_TT;
+ addr = amdgpu_gmc_agp_addr(bo);
+ if (addr != AMDGPU_BO_INVALID_OFFSET) {
+ bo->mem.start = addr >> PAGE_SHIFT;
+ } else {
- r = ttm_bo_mem_space(bo, &placement, &tmp, &ctx);
- if (unlikely(r))
- return r;
+ /* allocate GART space */
+ tmp = bo->mem;
+ tmp.mm_node = NULL;
+ placement.num_placement = 1;
+ placement.placement = &placements;
+ placement.num_busy_placement = 1;
+ placement.busy_placement = &placements;
+ placements.fpfn = 0;
+ placements.lpfn = adev->gmc.gart_size >> PAGE_SHIFT;
+ placements.flags = (bo->mem.placement & ~TTM_PL_MASK_MEM) |
+ TTM_PL_FLAG_TT;
+
+ r = ttm_bo_mem_space(bo, &placement, &tmp, &ctx);
+ if (unlikely(r))
+ return r;
- /* compute PTE flags for this buffer object */
- flags = amdgpu_ttm_tt_pte_flags(adev, bo->ttm, &tmp);
+ /* compute PTE flags for this buffer object */
+ flags = amdgpu_ttm_tt_pte_flags(adev, bo->ttm, &tmp);
- /* Bind pages */
- gtt->offset = (u64)tmp.start << PAGE_SHIFT;
- r = amdgpu_ttm_gart_bind(adev, bo, flags);
- if (unlikely(r)) {
- ttm_bo_mem_put(bo, &tmp);
- return r;
+ /* Bind pages */
+ gtt->offset = (u64)tmp.start << PAGE_SHIFT;
+ r = amdgpu_ttm_gart_bind(adev, bo, flags);
+ if (unlikely(r)) {
+ ttm_bo_mem_put(bo, &tmp);
+ return r;
+ }
+
+ ttm_bo_mem_put(bo, &bo->mem);
+ bo->mem = tmp;
}
- ttm_bo_mem_put(bo, &bo->mem);
- bo->mem = tmp;
bo->offset = (bo->mem.start << PAGE_SHIFT) +
bo->bdev->man[bo->mem.mem_type].gpu_offset;
@@ -1310,8 +1324,8 @@ static void amdgpu_ttm_tt_unpopulate(struct ttm_tt *ttm)
}
/**
- * amdgpu_ttm_tt_set_userptr - Initialize userptr GTT ttm_tt
- * for the current task
+ * amdgpu_ttm_tt_set_userptr - Initialize userptr GTT ttm_tt for the current
+ * task
*
* @ttm: The ttm_tt object to bind this userptr object to
* @addr: The address in the current tasks VM space to use
@@ -1361,9 +1375,8 @@ struct mm_struct *amdgpu_ttm_tt_get_usermm(struct ttm_tt *ttm)
}
/**
- * amdgpu_ttm_tt_affect_userptr - Determine if a ttm_tt object lays
- * inside an address range for the
- * current task.
+ * amdgpu_ttm_tt_affect_userptr - Determine if a ttm_tt object lays inside an
+ * address range for the current task.
*
*/
bool amdgpu_ttm_tt_affect_userptr(struct ttm_tt *ttm, unsigned long start,
@@ -1401,8 +1414,7 @@ bool amdgpu_ttm_tt_affect_userptr(struct ttm_tt *ttm, unsigned long start,
}
/**
- * amdgpu_ttm_tt_userptr_invalidated - Has the ttm_tt object been
- * invalidated?
+ * amdgpu_ttm_tt_userptr_invalidated - Has the ttm_tt object been invalidated?
*/
bool amdgpu_ttm_tt_userptr_invalidated(struct ttm_tt *ttm,
int *last_invalidated)
@@ -1415,10 +1427,8 @@ bool amdgpu_ttm_tt_userptr_invalidated(struct ttm_tt *ttm,
}
/**
- * amdgpu_ttm_tt_userptr_needs_pages - Have the pages backing this
- * ttm_tt object been invalidated
- * since the last time they've
- * been set?
+ * amdgpu_ttm_tt_userptr_needs_pages - Have the pages backing this ttm_tt object
+ * been invalidated since the last time they've been set?
*/
bool amdgpu_ttm_tt_userptr_needs_pages(struct ttm_tt *ttm)
{
@@ -1444,13 +1454,14 @@ bool amdgpu_ttm_tt_is_readonly(struct ttm_tt *ttm)
}
/**
- * amdgpu_ttm_tt_pte_flags - Compute PTE flags for ttm_tt object
+ * amdgpu_ttm_tt_pde_flags - Compute PDE flags for ttm_tt object
*
* @ttm: The ttm_tt object to compute the flags for
* @mem: The memory registry backing this ttm_tt object
+ *
+ * Figure out the flags to use for a VM PDE (Page Directory Entry).
*/
-uint64_t amdgpu_ttm_tt_pte_flags(struct amdgpu_device *adev, struct ttm_tt *ttm,
- struct ttm_mem_reg *mem)
+uint64_t amdgpu_ttm_tt_pde_flags(struct ttm_tt *ttm, struct ttm_mem_reg *mem)
{
uint64_t flags = 0;
@@ -1464,6 +1475,22 @@ uint64_t amdgpu_ttm_tt_pte_flags(struct amdgpu_device *adev, struct ttm_tt *ttm,
flags |= AMDGPU_PTE_SNOOPED;
}
+ return flags;
+}
+
+/**
+ * amdgpu_ttm_tt_pte_flags - Compute PTE flags for ttm_tt object
+ *
+ * @ttm: The ttm_tt object to compute the flags for
+ * @mem: The memory registry backing this ttm_tt object
+
+ * Figure out the flags to use for a VM PTE (Page Table Entry).
+ */
+uint64_t amdgpu_ttm_tt_pte_flags(struct amdgpu_device *adev, struct ttm_tt *ttm,
+ struct ttm_mem_reg *mem)
+{
+ uint64_t flags = amdgpu_ttm_tt_pde_flags(ttm, mem);
+
flags |= adev->gart.gart_pte_flags;
flags |= AMDGPU_PTE_READABLE;
@@ -1474,13 +1501,12 @@ uint64_t amdgpu_ttm_tt_pte_flags(struct amdgpu_device *adev, struct ttm_tt *ttm,
}
/**
- * amdgpu_ttm_bo_eviction_valuable - Check to see if we can evict
- * a buffer object.
+ * amdgpu_ttm_bo_eviction_valuable - Check to see if we can evict a buffer
+ * object.
*
- * Return true if eviction is sensible. Called by
- * ttm_mem_evict_first() on behalf of ttm_bo_mem_force_space()
- * which tries to evict buffer objects until it can find space
- * for a new object and by ttm_bo_force_list_clean() which is
+ * Return true if eviction is sensible. Called by ttm_mem_evict_first() on
+ * behalf of ttm_bo_mem_force_space() which tries to evict buffer objects until
+ * it can find space for a new object and by ttm_bo_force_list_clean() which is
* used to clean out a memory space.
*/
static bool amdgpu_ttm_bo_eviction_valuable(struct ttm_buffer_object *bo,
@@ -1530,8 +1556,7 @@ static bool amdgpu_ttm_bo_eviction_valuable(struct ttm_buffer_object *bo,
}
/**
- * amdgpu_ttm_access_memory - Read or Write memory that backs a
- * buffer object.
+ * amdgpu_ttm_access_memory - Read or Write memory that backs a buffer object.
*
* @bo: The buffer object to read/write
* @offset: Offset into buffer object
@@ -1695,7 +1720,7 @@ static int amdgpu_ttm_fw_reserve_vram_init(struct amdgpu_device *adev)
AMDGPU_GEM_DOMAIN_VRAM,
adev->fw_vram_usage.start_offset,
(adev->fw_vram_usage.start_offset +
- adev->fw_vram_usage.size), NULL);
+ adev->fw_vram_usage.size));
if (r)
goto error_pin;
r = amdgpu_bo_kmap(adev->fw_vram_usage.reserved_bo,
@@ -1719,8 +1744,8 @@ error_create:
return r;
}
/**
- * amdgpu_ttm_init - Init the memory management (ttm) as well as
- * various gtt/vram related fields.
+ * amdgpu_ttm_init - Init the memory management (ttm) as well as various
+ * gtt/vram related fields.
*
* This initializes all of the memory space pools that the TTM layer
* will need such as the GTT space (system memory mapped to the device),
@@ -1788,14 +1813,12 @@ int amdgpu_ttm_init(struct amdgpu_device *adev)
* This is used for VGA emulation and pre-OS scanout buffers to
* avoid display artifacts while transitioning between pre-OS
* and driver. */
- if (adev->gmc.stolen_size) {
- r = amdgpu_bo_create_kernel(adev, adev->gmc.stolen_size, PAGE_SIZE,
- AMDGPU_GEM_DOMAIN_VRAM,
- &adev->stolen_vga_memory,
- NULL, NULL);
- if (r)
- return r;
- }
+ r = amdgpu_bo_create_kernel(adev, adev->gmc.stolen_size, PAGE_SIZE,
+ AMDGPU_GEM_DOMAIN_VRAM,
+ &adev->stolen_vga_memory,
+ NULL, NULL);
+ if (r)
+ return r;
DRM_INFO("amdgpu: %uM of VRAM memory ready\n",
(unsigned) (adev->gmc.real_vram_size / (1024 * 1024)));
@@ -1822,45 +1845,45 @@ int amdgpu_ttm_init(struct amdgpu_device *adev)
(unsigned)(gtt_size / (1024 * 1024)));
/* Initialize various on-chip memory pools */
- adev->gds.mem.total_size = adev->gds.mem.total_size << AMDGPU_GDS_SHIFT;
- adev->gds.mem.gfx_partition_size = adev->gds.mem.gfx_partition_size << AMDGPU_GDS_SHIFT;
- adev->gds.mem.cs_partition_size = adev->gds.mem.cs_partition_size << AMDGPU_GDS_SHIFT;
- adev->gds.gws.total_size = adev->gds.gws.total_size << AMDGPU_GWS_SHIFT;
- adev->gds.gws.gfx_partition_size = adev->gds.gws.gfx_partition_size << AMDGPU_GWS_SHIFT;
- adev->gds.gws.cs_partition_size = adev->gds.gws.cs_partition_size << AMDGPU_GWS_SHIFT;
- adev->gds.oa.total_size = adev->gds.oa.total_size << AMDGPU_OA_SHIFT;
- adev->gds.oa.gfx_partition_size = adev->gds.oa.gfx_partition_size << AMDGPU_OA_SHIFT;
- adev->gds.oa.cs_partition_size = adev->gds.oa.cs_partition_size << AMDGPU_OA_SHIFT;
- /* GDS Memory */
- if (adev->gds.mem.total_size) {
- r = ttm_bo_init_mm(&adev->mman.bdev, AMDGPU_PL_GDS,
- adev->gds.mem.total_size >> PAGE_SHIFT);
- if (r) {
- DRM_ERROR("Failed initializing GDS heap.\n");
- return r;
- }
+ r = ttm_bo_init_mm(&adev->mman.bdev, AMDGPU_PL_GDS,
+ adev->gds.mem.total_size);
+ if (r) {
+ DRM_ERROR("Failed initializing GDS heap.\n");
+ return r;
}
- /* GWS */
- if (adev->gds.gws.total_size) {
- r = ttm_bo_init_mm(&adev->mman.bdev, AMDGPU_PL_GWS,
- adev->gds.gws.total_size >> PAGE_SHIFT);
- if (r) {
- DRM_ERROR("Failed initializing gws heap.\n");
- return r;
- }
+ r = amdgpu_bo_create_kernel(adev, adev->gds.mem.gfx_partition_size,
+ PAGE_SIZE, AMDGPU_GEM_DOMAIN_GDS,
+ &adev->gds.gds_gfx_bo, NULL, NULL);
+ if (r)
+ return r;
+
+ r = ttm_bo_init_mm(&adev->mman.bdev, AMDGPU_PL_GWS,
+ adev->gds.gws.total_size);
+ if (r) {
+ DRM_ERROR("Failed initializing gws heap.\n");
+ return r;
}
- /* OA */
- if (adev->gds.oa.total_size) {
- r = ttm_bo_init_mm(&adev->mman.bdev, AMDGPU_PL_OA,
- adev->gds.oa.total_size >> PAGE_SHIFT);
- if (r) {
- DRM_ERROR("Failed initializing oa heap.\n");
- return r;
- }
+ r = amdgpu_bo_create_kernel(adev, adev->gds.gws.gfx_partition_size,
+ PAGE_SIZE, AMDGPU_GEM_DOMAIN_GWS,
+ &adev->gds.gws_gfx_bo, NULL, NULL);
+ if (r)
+ return r;
+
+ r = ttm_bo_init_mm(&adev->mman.bdev, AMDGPU_PL_OA,
+ adev->gds.oa.total_size);
+ if (r) {
+ DRM_ERROR("Failed initializing oa heap.\n");
+ return r;
}
+ r = amdgpu_bo_create_kernel(adev, adev->gds.oa.gfx_partition_size,
+ PAGE_SIZE, AMDGPU_GEM_DOMAIN_OA,
+ &adev->gds.oa_gfx_bo, NULL, NULL);
+ if (r)
+ return r;
+
/* Register debugfs entries for amdgpu_ttm */
r = amdgpu_ttm_debugfs_init(adev);
if (r) {
@@ -1871,8 +1894,7 @@ int amdgpu_ttm_init(struct amdgpu_device *adev)
}
/**
- * amdgpu_ttm_late_init - Handle any late initialization for
- * amdgpu_ttm
+ * amdgpu_ttm_late_init - Handle any late initialization for amdgpu_ttm
*/
void amdgpu_ttm_late_init(struct amdgpu_device *adev)
{
@@ -1896,12 +1918,9 @@ void amdgpu_ttm_fini(struct amdgpu_device *adev)
ttm_bo_clean_mm(&adev->mman.bdev, TTM_PL_VRAM);
ttm_bo_clean_mm(&adev->mman.bdev, TTM_PL_TT);
- if (adev->gds.mem.total_size)
- ttm_bo_clean_mm(&adev->mman.bdev, AMDGPU_PL_GDS);
- if (adev->gds.gws.total_size)
- ttm_bo_clean_mm(&adev->mman.bdev, AMDGPU_PL_GWS);
- if (adev->gds.oa.total_size)
- ttm_bo_clean_mm(&adev->mman.bdev, AMDGPU_PL_OA);
+ ttm_bo_clean_mm(&adev->mman.bdev, AMDGPU_PL_GDS);
+ ttm_bo_clean_mm(&adev->mman.bdev, AMDGPU_PL_GWS);
+ ttm_bo_clean_mm(&adev->mman.bdev, AMDGPU_PL_OA);
ttm_bo_device_release(&adev->mman.bdev);
amdgpu_ttm_global_fini(adev);
adev->mman.initialized = false;
@@ -1921,10 +1940,30 @@ void amdgpu_ttm_set_buffer_funcs_status(struct amdgpu_device *adev, bool enable)
{
struct ttm_mem_type_manager *man = &adev->mman.bdev.man[TTM_PL_VRAM];
uint64_t size;
+ int r;
- if (!adev->mman.initialized || adev->in_gpu_reset)
+ if (!adev->mman.initialized || adev->in_gpu_reset ||
+ adev->mman.buffer_funcs_enabled == enable)
return;
+ if (enable) {
+ struct amdgpu_ring *ring;
+ struct drm_sched_rq *rq;
+
+ ring = adev->mman.buffer_funcs_ring;
+ rq = &ring->sched.sched_rq[DRM_SCHED_PRIORITY_KERNEL];
+ r = drm_sched_entity_init(&adev->mman.entity, &rq, 1, NULL);
+ if (r) {
+ DRM_ERROR("Failed setting up TTM BO move entity (%d)\n",
+ r);
+ return;
+ }
+ } else {
+ drm_sched_entity_destroy(&adev->mman.entity);
+ dma_fence_put(man->move);
+ man->move = NULL;
+ }
+
/* this just adjusts TTM size idea, which sets lpfn to the correct value */
if (enable)
size = adev->gmc.real_vram_size;
@@ -1987,7 +2026,7 @@ static int amdgpu_map_buffer(struct ttm_buffer_object *bo,
src_addr = num_dw * 4;
src_addr += job->ibs[0].gpu_addr;
- dst_addr = adev->gart.table_addr;
+ dst_addr = amdgpu_bo_gpu_offset(adev->gart.bo);
dst_addr += window * AMDGPU_GTT_MAX_TRANSFER_SIZE * 8;
amdgpu_emit_copy_buffer(adev, &job->ibs[0], src_addr,
dst_addr, num_bytes);
@@ -2002,7 +2041,7 @@ static int amdgpu_map_buffer(struct ttm_buffer_object *bo,
if (r)
goto error_free;
- r = amdgpu_job_submit(job, ring, &adev->mman.entity,
+ r = amdgpu_job_submit(job, &adev->mman.entity,
AMDGPU_FENCE_OWNER_UNDEFINED, &fence);
if (r)
goto error_free;
@@ -2047,7 +2086,10 @@ int amdgpu_copy_buffer(struct amdgpu_ring *ring, uint64_t src_offset,
if (r)
return r;
- job->vm_needs_flush = vm_needs_flush;
+ if (vm_needs_flush) {
+ job->vm_pd_addr = amdgpu_gmc_pd_addr(adev->gart.bo);
+ job->vm_needs_flush = true;
+ }
if (resv) {
r = amdgpu_sync_resv(adev, &job->sync, resv,
AMDGPU_FENCE_OWNER_UNDEFINED,
@@ -2071,24 +2113,19 @@ int amdgpu_copy_buffer(struct amdgpu_ring *ring, uint64_t src_offset,
amdgpu_ring_pad_ib(ring, &job->ibs[0]);
WARN_ON(job->ibs[0].length_dw > num_dw);
- if (direct_submit) {
- r = amdgpu_ib_schedule(ring, job->num_ibs, job->ibs,
- NULL, fence);
- job->fence = dma_fence_get(*fence);
- if (r)
- DRM_ERROR("Error scheduling IBs (%d)\n", r);
- amdgpu_job_free(job);
- } else {
- r = amdgpu_job_submit(job, ring, &adev->mman.entity,
+ if (direct_submit)
+ r = amdgpu_job_submit_direct(job, ring, fence);
+ else
+ r = amdgpu_job_submit(job, &adev->mman.entity,
AMDGPU_FENCE_OWNER_UNDEFINED, fence);
- if (r)
- goto error_free;
- }
+ if (r)
+ goto error_free;
return r;
error_free:
amdgpu_job_free(job);
+ DRM_ERROR("Error scheduling IBs (%d)\n", r);
return r;
}
@@ -2171,7 +2208,7 @@ int amdgpu_fill_buffer(struct amdgpu_bo *bo,
amdgpu_ring_pad_ib(ring, &job->ibs[0]);
WARN_ON(job->ibs[0].length_dw > num_dw);
- r = amdgpu_job_submit(job, ring, &adev->mman.entity,
+ r = amdgpu_job_submit(job, &adev->mman.entity,
AMDGPU_FENCE_OWNER_UNDEFINED, fence);
if (r)
goto error_free;
@@ -2188,7 +2225,7 @@ error_free:
static int amdgpu_mm_dump_table(struct seq_file *m, void *data)
{
struct drm_info_node *node = (struct drm_info_node *)m->private;
- unsigned ttm_pl = *(int *)node->info_ent->data;
+ unsigned ttm_pl = (uintptr_t)node->info_ent->data;
struct drm_device *dev = node->minor->dev;
struct amdgpu_device *adev = dev->dev_private;
struct ttm_mem_type_manager *man = &adev->mman.bdev.man[ttm_pl];
@@ -2198,12 +2235,12 @@ static int amdgpu_mm_dump_table(struct seq_file *m, void *data)
return 0;
}
-static int ttm_pl_vram = TTM_PL_VRAM;
-static int ttm_pl_tt = TTM_PL_TT;
-
static const struct drm_info_list amdgpu_ttm_debugfs_list[] = {
- {"amdgpu_vram_mm", amdgpu_mm_dump_table, 0, &ttm_pl_vram},
- {"amdgpu_gtt_mm", amdgpu_mm_dump_table, 0, &ttm_pl_tt},
+ {"amdgpu_vram_mm", amdgpu_mm_dump_table, 0, (void *)TTM_PL_VRAM},
+ {"amdgpu_gtt_mm", amdgpu_mm_dump_table, 0, (void *)TTM_PL_TT},
+ {"amdgpu_gds_mm", amdgpu_mm_dump_table, 0, (void *)AMDGPU_PL_GDS},
+ {"amdgpu_gws_mm", amdgpu_mm_dump_table, 0, (void *)AMDGPU_PL_GWS},
+ {"amdgpu_oa_mm", amdgpu_mm_dump_table, 0, (void *)AMDGPU_PL_OA},
{"ttm_page_pool", ttm_page_alloc_debugfs, 0, NULL},
#ifdef CONFIG_SWIOTLB
{"ttm_dma_page_pool", ttm_dma_page_alloc_debugfs, 0, NULL}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
index e5da4654b630..fe8f276e9811 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
@@ -73,7 +73,7 @@ bool amdgpu_gtt_mgr_has_gart_addr(struct ttm_mem_reg *mem);
uint64_t amdgpu_gtt_mgr_usage(struct ttm_mem_type_manager *man);
int amdgpu_gtt_mgr_recover(struct ttm_mem_type_manager *man);
-u64 amdgpu_vram_mgr_bo_invisible_size(struct amdgpu_bo *bo);
+u64 amdgpu_vram_mgr_bo_visible_size(struct amdgpu_bo *bo);
uint64_t amdgpu_vram_mgr_usage(struct ttm_mem_type_manager *man);
uint64_t amdgpu_vram_mgr_vis_usage(struct ttm_mem_type_manager *man);
@@ -116,6 +116,7 @@ bool amdgpu_ttm_tt_userptr_invalidated(struct ttm_tt *ttm,
int *last_invalidated);
bool amdgpu_ttm_tt_userptr_needs_pages(struct ttm_tt *ttm);
bool amdgpu_ttm_tt_is_readonly(struct ttm_tt *ttm);
+uint64_t amdgpu_ttm_tt_pde_flags(struct ttm_tt *ttm, struct ttm_mem_reg *mem);
uint64_t amdgpu_ttm_tt_pte_flags(struct amdgpu_device *adev, struct ttm_tt *ttm,
struct ttm_mem_reg *mem);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c
index f55f72a37ca8..7b33867036e7 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c
@@ -277,6 +277,7 @@ amdgpu_ucode_get_load_type(struct amdgpu_device *adev, int load_type)
case CHIP_PITCAIRN:
case CHIP_VERDE:
case CHIP_OLAND:
+ case CHIP_HAINAN:
return AMDGPU_FW_LOAD_DIRECT;
#endif
#ifdef CONFIG_DRM_AMDGPU_CIK
@@ -296,19 +297,15 @@ amdgpu_ucode_get_load_type(struct amdgpu_device *adev, int load_type)
case CHIP_POLARIS11:
case CHIP_POLARIS12:
case CHIP_VEGAM:
- if (!load_type)
- return AMDGPU_FW_LOAD_DIRECT;
- else
- return AMDGPU_FW_LOAD_SMU;
+ return AMDGPU_FW_LOAD_SMU;
case CHIP_VEGA10:
case CHIP_RAVEN:
case CHIP_VEGA12:
+ case CHIP_VEGA20:
if (!load_type)
return AMDGPU_FW_LOAD_DIRECT;
else
return AMDGPU_FW_LOAD_PSP;
- case CHIP_VEGA20:
- return AMDGPU_FW_LOAD_DIRECT;
default:
DRM_ERROR("Unknown firmware load type\n");
}
@@ -322,6 +319,7 @@ static int amdgpu_ucode_init_single_fw(struct amdgpu_device *adev,
{
const struct common_firmware_header *header = NULL;
const struct gfx_firmware_header_v1_0 *cp_hdr = NULL;
+ const struct dmcu_firmware_header_v1_0 *dmcu_hdr = NULL;
if (NULL == ucode->fw)
return 0;
@@ -333,8 +331,8 @@ static int amdgpu_ucode_init_single_fw(struct amdgpu_device *adev,
return 0;
header = (const struct common_firmware_header *)ucode->fw->data;
-
cp_hdr = (const struct gfx_firmware_header_v1_0 *)ucode->fw->data;
+ dmcu_hdr = (const struct dmcu_firmware_header_v1_0 *)ucode->fw->data;
if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP ||
(ucode->ucode_id != AMDGPU_UCODE_ID_CP_MEC1 &&
@@ -343,7 +341,9 @@ static int amdgpu_ucode_init_single_fw(struct amdgpu_device *adev,
ucode->ucode_id != AMDGPU_UCODE_ID_CP_MEC2_JT &&
ucode->ucode_id != AMDGPU_UCODE_ID_RLC_RESTORE_LIST_CNTL &&
ucode->ucode_id != AMDGPU_UCODE_ID_RLC_RESTORE_LIST_GPM_MEM &&
- ucode->ucode_id != AMDGPU_UCODE_ID_RLC_RESTORE_LIST_SRM_MEM)) {
+ ucode->ucode_id != AMDGPU_UCODE_ID_RLC_RESTORE_LIST_SRM_MEM &&
+ ucode->ucode_id != AMDGPU_UCODE_ID_DMCU_ERAM &&
+ ucode->ucode_id != AMDGPU_UCODE_ID_DMCU_INTV)) {
ucode->ucode_size = le32_to_cpu(header->ucode_size_bytes);
memcpy(ucode->kaddr, (void *)((uint8_t *)ucode->fw->data +
@@ -365,6 +365,20 @@ static int amdgpu_ucode_init_single_fw(struct amdgpu_device *adev,
le32_to_cpu(header->ucode_array_offset_bytes) +
le32_to_cpu(cp_hdr->jt_offset) * 4),
ucode->ucode_size);
+ } else if (ucode->ucode_id == AMDGPU_UCODE_ID_DMCU_ERAM) {
+ ucode->ucode_size = le32_to_cpu(header->ucode_size_bytes) -
+ le32_to_cpu(dmcu_hdr->intv_size_bytes);
+
+ memcpy(ucode->kaddr, (void *)((uint8_t *)ucode->fw->data +
+ le32_to_cpu(header->ucode_array_offset_bytes)),
+ ucode->ucode_size);
+ } else if (ucode->ucode_id == AMDGPU_UCODE_ID_DMCU_INTV) {
+ ucode->ucode_size = le32_to_cpu(dmcu_hdr->intv_size_bytes);
+
+ memcpy(ucode->kaddr, (void *)((uint8_t *)ucode->fw->data +
+ le32_to_cpu(header->ucode_array_offset_bytes) +
+ le32_to_cpu(dmcu_hdr->intv_offset_bytes)),
+ ucode->ucode_size);
} else if (ucode->ucode_id == AMDGPU_UCODE_ID_RLC_RESTORE_LIST_CNTL) {
ucode->ucode_size = adev->gfx.rlc.save_restore_list_cntl_size_bytes;
memcpy(ucode->kaddr, adev->gfx.rlc.save_restore_list_cntl,
@@ -406,32 +420,41 @@ static int amdgpu_ucode_patch_jt(struct amdgpu_firmware_info *ucode,
return 0;
}
-int amdgpu_ucode_init_bo(struct amdgpu_device *adev)
+int amdgpu_ucode_create_bo(struct amdgpu_device *adev)
{
- uint64_t fw_offset = 0;
- int i, err;
- struct amdgpu_firmware_info *ucode = NULL;
- const struct common_firmware_header *header = NULL;
-
- if (!adev->firmware.fw_size) {
- dev_warn(adev->dev, "No ip firmware need to load\n");
- return 0;
- }
-
- if (!adev->in_gpu_reset) {
- err = amdgpu_bo_create_kernel(adev, adev->firmware.fw_size, PAGE_SIZE,
- amdgpu_sriov_vf(adev) ? AMDGPU_GEM_DOMAIN_VRAM : AMDGPU_GEM_DOMAIN_GTT,
- &adev->firmware.fw_buf,
- &adev->firmware.fw_buf_mc,
- &adev->firmware.fw_buf_ptr);
- if (err) {
+ if (adev->firmware.load_type != AMDGPU_FW_LOAD_DIRECT) {
+ amdgpu_bo_create_kernel(adev, adev->firmware.fw_size, PAGE_SIZE,
+ amdgpu_sriov_vf(adev) ? AMDGPU_GEM_DOMAIN_VRAM : AMDGPU_GEM_DOMAIN_GTT,
+ &adev->firmware.fw_buf,
+ &adev->firmware.fw_buf_mc,
+ &adev->firmware.fw_buf_ptr);
+ if (!adev->firmware.fw_buf) {
dev_err(adev->dev, "failed to create kernel buffer for firmware.fw_buf\n");
- goto failed;
+ return -ENOMEM;
+ } else if (amdgpu_sriov_vf(adev)) {
+ memset(adev->firmware.fw_buf_ptr, 0, adev->firmware.fw_size);
}
}
+ return 0;
+}
- memset(adev->firmware.fw_buf_ptr, 0, adev->firmware.fw_size);
+void amdgpu_ucode_free_bo(struct amdgpu_device *adev)
+{
+ if (adev->firmware.load_type != AMDGPU_FW_LOAD_DIRECT)
+ amdgpu_bo_free_kernel(&adev->firmware.fw_buf,
+ &adev->firmware.fw_buf_mc,
+ &adev->firmware.fw_buf_ptr);
+}
+
+int amdgpu_ucode_init_bo(struct amdgpu_device *adev)
+{
+ uint64_t fw_offset = 0;
+ int i;
+ struct amdgpu_firmware_info *ucode = NULL;
+ /* for baremetal, the ucode is allocated in gtt, so don't need to fill the bo when reset/suspend */
+ if (!amdgpu_sriov_vf(adev) && (adev->in_gpu_reset || adev->in_suspend))
+ return 0;
/*
* if SMU loaded firmware, it needn't add SMC, UVD, and VCE
* ucode info here
@@ -448,7 +471,6 @@ int amdgpu_ucode_init_bo(struct amdgpu_device *adev)
for (i = 0; i < adev->firmware.max_ucodes; i++) {
ucode = &adev->firmware.ucode[i];
if (ucode->fw) {
- header = (const struct common_firmware_header *)ucode->fw->data;
amdgpu_ucode_init_single_fw(adev, ucode, adev->firmware.fw_buf_mc + fw_offset,
adev->firmware.fw_buf_ptr + fw_offset);
if (i == AMDGPU_UCODE_ID_CP_MEC1 &&
@@ -463,33 +485,4 @@ int amdgpu_ucode_init_bo(struct amdgpu_device *adev)
}
}
return 0;
-
-failed:
- if (err)
- adev->firmware.load_type = AMDGPU_FW_LOAD_DIRECT;
-
- return err;
-}
-
-int amdgpu_ucode_fini_bo(struct amdgpu_device *adev)
-{
- int i;
- struct amdgpu_firmware_info *ucode = NULL;
-
- if (!adev->firmware.fw_size)
- return 0;
-
- for (i = 0; i < adev->firmware.max_ucodes; i++) {
- ucode = &adev->firmware.ucode[i];
- if (ucode->fw) {
- ucode->mc_addr = 0;
- ucode->kaddr = NULL;
- }
- }
-
- amdgpu_bo_free_kernel(&adev->firmware.fw_buf,
- &adev->firmware.fw_buf_mc,
- &adev->firmware.fw_buf_ptr);
-
- return 0;
}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h
index 08e38579af24..aa6641b944a0 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h
@@ -157,6 +157,13 @@ struct gpu_info_firmware_header_v1_0 {
uint16_t version_minor; /* version */
};
+/* version_major=1, version_minor=0 */
+struct dmcu_firmware_header_v1_0 {
+ struct common_firmware_header header;
+ uint32_t intv_offset_bytes; /* interrupt vectors offset from end of header, in bytes */
+ uint32_t intv_size_bytes; /* size of interrupt vectors, in bytes */
+};
+
/* header is fixed size */
union amdgpu_firmware_header {
struct common_firmware_header common;
@@ -170,6 +177,7 @@ union amdgpu_firmware_header {
struct sdma_firmware_header_v1_0 sdma;
struct sdma_firmware_header_v1_1 sdma_v1_1;
struct gpu_info_firmware_header_v1_0 gpu_info;
+ struct dmcu_firmware_header_v1_0 dmcu;
uint8_t raw[0x100];
};
@@ -193,7 +201,11 @@ enum AMDGPU_UCODE_ID {
AMDGPU_UCODE_ID_STORAGE,
AMDGPU_UCODE_ID_SMC,
AMDGPU_UCODE_ID_UVD,
+ AMDGPU_UCODE_ID_UVD1,
AMDGPU_UCODE_ID_VCE,
+ AMDGPU_UCODE_ID_VCN,
+ AMDGPU_UCODE_ID_DMCU_ERAM,
+ AMDGPU_UCODE_ID_DMCU_INTV,
AMDGPU_UCODE_ID_MAXIMUM,
};
@@ -204,6 +216,12 @@ enum AMDGPU_UCODE_STATUS {
AMDGPU_UCODE_STATUS_LOADED,
};
+enum amdgpu_firmware_load_type {
+ AMDGPU_FW_LOAD_DIRECT = 0,
+ AMDGPU_FW_LOAD_SMU,
+ AMDGPU_FW_LOAD_PSP,
+};
+
/* conform to smu_ucode_xfer_cz.h */
#define AMDGPU_SDMA0_UCODE_LOADED 0x00000001
#define AMDGPU_SDMA1_UCODE_LOADED 0x00000002
@@ -226,6 +244,27 @@ struct amdgpu_firmware_info {
void *kaddr;
/* ucode_size_bytes */
uint32_t ucode_size;
+ /* starting tmr mc address */
+ uint32_t tmr_mc_addr_lo;
+ uint32_t tmr_mc_addr_hi;
+};
+
+struct amdgpu_firmware {
+ struct amdgpu_firmware_info ucode[AMDGPU_UCODE_ID_MAXIMUM];
+ enum amdgpu_firmware_load_type load_type;
+ struct amdgpu_bo *fw_buf;
+ unsigned int fw_size;
+ unsigned int max_ucodes;
+ /* firmwares are loaded by psp instead of smu from vega10 */
+ const struct amdgpu_psp_funcs *funcs;
+ struct amdgpu_bo *rbuf;
+ struct mutex mutex;
+
+ /* gpu info firmware data pointer */
+ const struct firmware *gpu_info_fw;
+
+ void *fw_buf_ptr;
+ uint64_t fw_buf_mc;
};
void amdgpu_ucode_print_mc_hdr(const struct common_firmware_header *hdr);
@@ -237,8 +276,10 @@ void amdgpu_ucode_print_gpu_info_hdr(const struct common_firmware_header *hdr);
int amdgpu_ucode_validate(const struct firmware *fw);
bool amdgpu_ucode_hdr_version(union amdgpu_firmware_header *hdr,
uint16_t hdr_major, uint16_t hdr_minor);
+
int amdgpu_ucode_init_bo(struct amdgpu_device *adev);
-int amdgpu_ucode_fini_bo(struct amdgpu_device *adev);
+int amdgpu_ucode_create_bo(struct amdgpu_device *adev);
+void amdgpu_ucode_free_bo(struct amdgpu_device *adev);
enum amdgpu_firmware_load_type
amdgpu_ucode_get_load_type(struct amdgpu_device *adev, int load_type);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c
index 3ff08e326838..e5a6db6beab7 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c
@@ -53,11 +53,11 @@
/* Firmware Names */
#ifdef CONFIG_DRM_AMDGPU_CIK
-#define FIRMWARE_BONAIRE "radeon/bonaire_uvd.bin"
-#define FIRMWARE_KABINI "radeon/kabini_uvd.bin"
-#define FIRMWARE_KAVERI "radeon/kaveri_uvd.bin"
-#define FIRMWARE_HAWAII "radeon/hawaii_uvd.bin"
-#define FIRMWARE_MULLINS "radeon/mullins_uvd.bin"
+#define FIRMWARE_BONAIRE "amdgpu/bonaire_uvd.bin"
+#define FIRMWARE_KABINI "amdgpu/kabini_uvd.bin"
+#define FIRMWARE_KAVERI "amdgpu/kaveri_uvd.bin"
+#define FIRMWARE_HAWAII "amdgpu/hawaii_uvd.bin"
+#define FIRMWARE_MULLINS "amdgpu/mullins_uvd.bin"
#endif
#define FIRMWARE_TONGA "amdgpu/tonga_uvd.bin"
#define FIRMWARE_CARRIZO "amdgpu/carrizo_uvd.bin"
@@ -122,12 +122,10 @@ static void amdgpu_uvd_idle_work_handler(struct work_struct *work);
int amdgpu_uvd_sw_init(struct amdgpu_device *adev)
{
- struct amdgpu_ring *ring;
- struct drm_sched_rq *rq;
unsigned long bo_size;
const char *fw_name;
const struct common_firmware_header *hdr;
- unsigned version_major, version_minor, family_id;
+ unsigned family_id;
int i, j, r;
INIT_DELAYED_WORK(&adev->uvd.idle_work, amdgpu_uvd_idle_work_handler);
@@ -208,29 +206,46 @@ int amdgpu_uvd_sw_init(struct amdgpu_device *adev)
hdr = (const struct common_firmware_header *)adev->uvd.fw->data;
family_id = le32_to_cpu(hdr->ucode_version) & 0xff;
- version_major = (le32_to_cpu(hdr->ucode_version) >> 24) & 0xff;
- version_minor = (le32_to_cpu(hdr->ucode_version) >> 8) & 0xff;
- DRM_INFO("Found UVD firmware Version: %hu.%hu Family ID: %hu\n",
- version_major, version_minor, family_id);
-
- /*
- * Limit the number of UVD handles depending on microcode major
- * and minor versions. The firmware version which has 40 UVD
- * instances support is 1.80. So all subsequent versions should
- * also have the same support.
- */
- if ((version_major > 0x01) ||
- ((version_major == 0x01) && (version_minor >= 0x50)))
- adev->uvd.max_handles = AMDGPU_MAX_UVD_HANDLES;
- adev->uvd.fw_version = ((version_major << 24) | (version_minor << 16) |
- (family_id << 8));
+ if (adev->asic_type < CHIP_VEGA20) {
+ unsigned version_major, version_minor;
- if ((adev->asic_type == CHIP_POLARIS10 ||
- adev->asic_type == CHIP_POLARIS11) &&
- (adev->uvd.fw_version < FW_1_66_16))
- DRM_ERROR("POLARIS10/11 UVD firmware version %hu.%hu is too old.\n",
- version_major, version_minor);
+ version_major = (le32_to_cpu(hdr->ucode_version) >> 24) & 0xff;
+ version_minor = (le32_to_cpu(hdr->ucode_version) >> 8) & 0xff;
+ DRM_INFO("Found UVD firmware Version: %hu.%hu Family ID: %hu\n",
+ version_major, version_minor, family_id);
+
+ /*
+ * Limit the number of UVD handles depending on microcode major
+ * and minor versions. The firmware version which has 40 UVD
+ * instances support is 1.80. So all subsequent versions should
+ * also have the same support.
+ */
+ if ((version_major > 0x01) ||
+ ((version_major == 0x01) && (version_minor >= 0x50)))
+ adev->uvd.max_handles = AMDGPU_MAX_UVD_HANDLES;
+
+ adev->uvd.fw_version = ((version_major << 24) | (version_minor << 16) |
+ (family_id << 8));
+
+ if ((adev->asic_type == CHIP_POLARIS10 ||
+ adev->asic_type == CHIP_POLARIS11) &&
+ (adev->uvd.fw_version < FW_1_66_16))
+ DRM_ERROR("POLARIS10/11 UVD firmware version %hu.%hu is too old.\n",
+ version_major, version_minor);
+ } else {
+ unsigned int enc_major, enc_minor, dec_minor;
+
+ dec_minor = (le32_to_cpu(hdr->ucode_version) >> 8) & 0xff;
+ enc_minor = (le32_to_cpu(hdr->ucode_version) >> 24) & 0x3f;
+ enc_major = (le32_to_cpu(hdr->ucode_version) >> 30) & 0x3;
+ DRM_INFO("Found UVD firmware ENC: %hu.%hu DEC: .%hu Family ID: %hu\n",
+ enc_major, enc_minor, dec_minor, family_id);
+
+ adev->uvd.max_handles = AMDGPU_MAX_UVD_HANDLES;
+
+ adev->uvd.fw_version = le32_to_cpu(hdr->ucode_version);
+ }
bo_size = AMDGPU_UVD_STACK_SIZE + AMDGPU_UVD_HEAP_SIZE
+ AMDGPU_UVD_SESSION_SIZE * adev->uvd.max_handles;
@@ -238,7 +253,8 @@ int amdgpu_uvd_sw_init(struct amdgpu_device *adev)
bo_size += AMDGPU_GPU_PAGE_ALIGN(le32_to_cpu(hdr->ucode_size_bytes) + 8);
for (j = 0; j < adev->uvd.num_uvd_inst; j++) {
-
+ if (adev->uvd.harvest_config & (1 << j))
+ continue;
r = amdgpu_bo_create_kernel(adev, bo_size, PAGE_SIZE,
AMDGPU_GEM_DOMAIN_VRAM, &adev->uvd.inst[j].vcpu_bo,
&adev->uvd.inst[j].gpu_addr, &adev->uvd.inst[j].cpu_addr);
@@ -246,21 +262,13 @@ int amdgpu_uvd_sw_init(struct amdgpu_device *adev)
dev_err(adev->dev, "(%d) failed to allocate UVD bo\n", r);
return r;
}
+ }
- ring = &adev->uvd.inst[j].ring;
- rq = &ring->sched.sched_rq[DRM_SCHED_PRIORITY_NORMAL];
- r = drm_sched_entity_init(&ring->sched, &adev->uvd.inst[j].entity,
- rq, NULL);
- if (r != 0) {
- DRM_ERROR("Failed setting up UVD(%d) run queue.\n", j);
- return r;
- }
-
- for (i = 0; i < adev->uvd.max_handles; ++i) {
- atomic_set(&adev->uvd.inst[j].handles[i], 0);
- adev->uvd.inst[j].filp[i] = NULL;
- }
+ for (i = 0; i < adev->uvd.max_handles; ++i) {
+ atomic_set(&adev->uvd.handles[i], 0);
+ adev->uvd.filp[i] = NULL;
}
+
/* from uvd v5.0 HW addressing capacity increased to 64 bits */
if (!amdgpu_device_ip_block_version_cmp(adev, AMD_IP_BLOCK_TYPE_UVD, 5, 0))
adev->uvd.address_64_bit = true;
@@ -289,10 +297,12 @@ int amdgpu_uvd_sw_fini(struct amdgpu_device *adev)
{
int i, j;
- for (j = 0; j < adev->uvd.num_uvd_inst; ++j) {
- kfree(adev->uvd.inst[j].saved_bo);
+ drm_sched_entity_destroy(&adev->uvd.entity);
- drm_sched_entity_fini(&adev->uvd.inst[j].ring.sched, &adev->uvd.inst[j].entity);
+ for (j = 0; j < adev->uvd.num_uvd_inst; ++j) {
+ if (adev->uvd.harvest_config & (1 << j))
+ continue;
+ kvfree(adev->uvd.inst[j].saved_bo);
amdgpu_bo_free_kernel(&adev->uvd.inst[j].vcpu_bo,
&adev->uvd.inst[j].gpu_addr,
@@ -308,6 +318,29 @@ int amdgpu_uvd_sw_fini(struct amdgpu_device *adev)
return 0;
}
+/**
+ * amdgpu_uvd_entity_init - init entity
+ *
+ * @adev: amdgpu_device pointer
+ *
+ */
+int amdgpu_uvd_entity_init(struct amdgpu_device *adev)
+{
+ struct amdgpu_ring *ring;
+ struct drm_sched_rq *rq;
+ int r;
+
+ ring = &adev->uvd.inst[0].ring;
+ rq = &ring->sched.sched_rq[DRM_SCHED_PRIORITY_NORMAL];
+ r = drm_sched_entity_init(&adev->uvd.entity, &rq, 1, NULL);
+ if (r) {
+ DRM_ERROR("Failed setting up UVD kernel entity.\n");
+ return r;
+ }
+
+ return 0;
+}
+
int amdgpu_uvd_suspend(struct amdgpu_device *adev)
{
unsigned size;
@@ -316,24 +349,26 @@ int amdgpu_uvd_suspend(struct amdgpu_device *adev)
cancel_delayed_work_sync(&adev->uvd.idle_work);
+ /* only valid for physical mode */
+ if (adev->asic_type < CHIP_POLARIS10) {
+ for (i = 0; i < adev->uvd.max_handles; ++i)
+ if (atomic_read(&adev->uvd.handles[i]))
+ break;
+
+ if (i == adev->uvd.max_handles)
+ return 0;
+ }
+
for (j = 0; j < adev->uvd.num_uvd_inst; ++j) {
+ if (adev->uvd.harvest_config & (1 << j))
+ continue;
if (adev->uvd.inst[j].vcpu_bo == NULL)
continue;
- /* only valid for physical mode */
- if (adev->asic_type < CHIP_POLARIS10) {
- for (i = 0; i < adev->uvd.max_handles; ++i)
- if (atomic_read(&adev->uvd.inst[j].handles[i]))
- break;
-
- if (i == adev->uvd.max_handles)
- continue;
- }
-
size = amdgpu_bo_size(adev->uvd.inst[j].vcpu_bo);
ptr = adev->uvd.inst[j].cpu_addr;
- adev->uvd.inst[j].saved_bo = kmalloc(size, GFP_KERNEL);
+ adev->uvd.inst[j].saved_bo = kvmalloc(size, GFP_KERNEL);
if (!adev->uvd.inst[j].saved_bo)
return -ENOMEM;
@@ -349,6 +384,8 @@ int amdgpu_uvd_resume(struct amdgpu_device *adev)
int i;
for (i = 0; i < adev->uvd.num_uvd_inst; i++) {
+ if (adev->uvd.harvest_config & (1 << i))
+ continue;
if (adev->uvd.inst[i].vcpu_bo == NULL)
return -EINVAL;
@@ -357,7 +394,7 @@ int amdgpu_uvd_resume(struct amdgpu_device *adev)
if (adev->uvd.inst[i].saved_bo != NULL) {
memcpy_toio(ptr, adev->uvd.inst[i].saved_bo, size);
- kfree(adev->uvd.inst[i].saved_bo);
+ kvfree(adev->uvd.inst[i].saved_bo);
adev->uvd.inst[i].saved_bo = NULL;
} else {
const struct common_firmware_header *hdr;
@@ -381,30 +418,27 @@ int amdgpu_uvd_resume(struct amdgpu_device *adev)
void amdgpu_uvd_free_handles(struct amdgpu_device *adev, struct drm_file *filp)
{
- struct amdgpu_ring *ring;
- int i, j, r;
-
- for (j = 0; j < adev->uvd.num_uvd_inst; j++) {
- ring = &adev->uvd.inst[j].ring;
+ struct amdgpu_ring *ring = &adev->uvd.inst[0].ring;
+ int i, r;
- for (i = 0; i < adev->uvd.max_handles; ++i) {
- uint32_t handle = atomic_read(&adev->uvd.inst[j].handles[i]);
- if (handle != 0 && adev->uvd.inst[j].filp[i] == filp) {
- struct dma_fence *fence;
-
- r = amdgpu_uvd_get_destroy_msg(ring, handle,
- false, &fence);
- if (r) {
- DRM_ERROR("Error destroying UVD(%d) %d!\n", j, r);
- continue;
- }
+ for (i = 0; i < adev->uvd.max_handles; ++i) {
+ uint32_t handle = atomic_read(&adev->uvd.handles[i]);
- dma_fence_wait(fence, false);
- dma_fence_put(fence);
+ if (handle != 0 && adev->uvd.filp[i] == filp) {
+ struct dma_fence *fence;
- adev->uvd.inst[j].filp[i] = NULL;
- atomic_set(&adev->uvd.inst[j].handles[i], 0);
+ r = amdgpu_uvd_get_destroy_msg(ring, handle, false,
+ &fence);
+ if (r) {
+ DRM_ERROR("Error destroying UVD %d!\n", r);
+ continue;
}
+
+ dma_fence_wait(fence, false);
+ dma_fence_put(fence);
+
+ adev->uvd.filp[i] = NULL;
+ atomic_set(&adev->uvd.handles[i], 0);
}
}
}
@@ -459,7 +493,7 @@ static int amdgpu_uvd_cs_pass1(struct amdgpu_uvd_cs_ctx *ctx)
if (cmd == 0x0 || cmd == 0x3) {
/* yes, force it into VRAM */
uint32_t domain = AMDGPU_GEM_DOMAIN_VRAM;
- amdgpu_ttm_placement_from_domain(bo, domain);
+ amdgpu_bo_placement_from_domain(bo, domain);
}
amdgpu_uvd_force_into_uvd_segment(bo);
@@ -679,16 +713,15 @@ static int amdgpu_uvd_cs_msg(struct amdgpu_uvd_cs_ctx *ctx,
void *ptr;
long r;
int i;
- uint32_t ip_instance = ctx->parser->job->ring->me;
if (offset & 0x3F) {
- DRM_ERROR("UVD(%d) messages must be 64 byte aligned!\n", ip_instance);
+ DRM_ERROR("UVD messages must be 64 byte aligned!\n");
return -EINVAL;
}
r = amdgpu_bo_kmap(bo, &ptr);
if (r) {
- DRM_ERROR("Failed mapping the UVD(%d) message (%ld)!\n", ip_instance, r);
+ DRM_ERROR("Failed mapping the UVD) message (%ld)!\n", r);
return r;
}
@@ -698,7 +731,7 @@ static int amdgpu_uvd_cs_msg(struct amdgpu_uvd_cs_ctx *ctx,
handle = msg[2];
if (handle == 0) {
- DRM_ERROR("Invalid UVD(%d) handle!\n", ip_instance);
+ DRM_ERROR("Invalid UVD handle!\n");
return -EINVAL;
}
@@ -709,18 +742,19 @@ static int amdgpu_uvd_cs_msg(struct amdgpu_uvd_cs_ctx *ctx,
/* try to alloc a new handle */
for (i = 0; i < adev->uvd.max_handles; ++i) {
- if (atomic_read(&adev->uvd.inst[ip_instance].handles[i]) == handle) {
- DRM_ERROR("(%d)Handle 0x%x already in use!\n", ip_instance, handle);
+ if (atomic_read(&adev->uvd.handles[i]) == handle) {
+ DRM_ERROR(")Handle 0x%x already in use!\n",
+ handle);
return -EINVAL;
}
- if (!atomic_cmpxchg(&adev->uvd.inst[ip_instance].handles[i], 0, handle)) {
- adev->uvd.inst[ip_instance].filp[i] = ctx->parser->filp;
+ if (!atomic_cmpxchg(&adev->uvd.handles[i], 0, handle)) {
+ adev->uvd.filp[i] = ctx->parser->filp;
return 0;
}
}
- DRM_ERROR("No more free UVD(%d) handles!\n", ip_instance);
+ DRM_ERROR("No more free UVD handles!\n");
return -ENOSPC;
case 1:
@@ -732,27 +766,27 @@ static int amdgpu_uvd_cs_msg(struct amdgpu_uvd_cs_ctx *ctx,
/* validate the handle */
for (i = 0; i < adev->uvd.max_handles; ++i) {
- if (atomic_read(&adev->uvd.inst[ip_instance].handles[i]) == handle) {
- if (adev->uvd.inst[ip_instance].filp[i] != ctx->parser->filp) {
- DRM_ERROR("UVD(%d) handle collision detected!\n", ip_instance);
+ if (atomic_read(&adev->uvd.handles[i]) == handle) {
+ if (adev->uvd.filp[i] != ctx->parser->filp) {
+ DRM_ERROR("UVD handle collision detected!\n");
return -EINVAL;
}
return 0;
}
}
- DRM_ERROR("Invalid UVD(%d) handle 0x%x!\n", ip_instance, handle);
+ DRM_ERROR("Invalid UVD handle 0x%x!\n", handle);
return -ENOENT;
case 2:
/* it's a destroy msg, free the handle */
for (i = 0; i < adev->uvd.max_handles; ++i)
- atomic_cmpxchg(&adev->uvd.inst[ip_instance].handles[i], handle, 0);
+ atomic_cmpxchg(&adev->uvd.handles[i], handle, 0);
amdgpu_bo_kunmap(bo);
return 0;
default:
- DRM_ERROR("Illegal UVD(%d) message type (%d)!\n", ip_instance, msg_type);
+ DRM_ERROR("Illegal UVD message type (%d)!\n", msg_type);
return -EINVAL;
}
BUG();
@@ -1000,7 +1034,7 @@ static int amdgpu_uvd_send_msg(struct amdgpu_ring *ring, struct amdgpu_bo *bo,
if (!ring->adev->uvd.address_64_bit) {
struct ttm_operation_ctx ctx = { true, false };
- amdgpu_ttm_placement_from_domain(bo, AMDGPU_GEM_DOMAIN_VRAM);
+ amdgpu_bo_placement_from_domain(bo, AMDGPU_GEM_DOMAIN_VRAM);
amdgpu_uvd_force_into_uvd_segment(bo);
r = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
if (r)
@@ -1045,19 +1079,16 @@ static int amdgpu_uvd_send_msg(struct amdgpu_ring *ring, struct amdgpu_bo *bo,
if (r < 0)
goto err_free;
- r = amdgpu_ib_schedule(ring, 1, ib, NULL, &f);
- job->fence = dma_fence_get(f);
+ r = amdgpu_job_submit_direct(job, ring, &f);
if (r)
goto err_free;
-
- amdgpu_job_free(job);
} else {
r = amdgpu_sync_resv(adev, &job->sync, bo->tbo.resv,
AMDGPU_FENCE_OWNER_UNDEFINED, false);
if (r)
goto err_free;
- r = amdgpu_job_submit(job, ring, &adev->uvd.inst[ring->me].entity,
+ r = amdgpu_job_submit(job, &adev->uvd.entity,
AMDGPU_FENCE_OWNER_UNDEFINED, &f);
if (r)
goto err_free;
@@ -1149,6 +1180,8 @@ static void amdgpu_uvd_idle_work_handler(struct work_struct *work)
unsigned fences = 0, i, j;
for (i = 0; i < adev->uvd.num_uvd_inst; ++i) {
+ if (adev->uvd.harvest_config & (1 << i))
+ continue;
fences += amdgpu_fence_count_emitted(&adev->uvd.inst[i].ring);
for (j = 0; j < adev->uvd.num_enc_rings; ++j) {
fences += amdgpu_fence_count_emitted(&adev->uvd.inst[i].ring_enc[j]);
@@ -1259,7 +1292,7 @@ uint32_t amdgpu_uvd_used_handles(struct amdgpu_device *adev)
* necessarily linear. So we need to count
* all non-zero handles.
*/
- if (atomic_read(&adev->uvd.inst->handles[i]))
+ if (atomic_read(&adev->uvd.handles[i]))
used_handles++;
}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.h
index 8b23a1b00c76..a3ab1a41060f 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.h
@@ -42,30 +42,34 @@ struct amdgpu_uvd_inst {
void *cpu_addr;
uint64_t gpu_addr;
void *saved_bo;
- atomic_t handles[AMDGPU_MAX_UVD_HANDLES];
- struct drm_file *filp[AMDGPU_MAX_UVD_HANDLES];
struct amdgpu_ring ring;
struct amdgpu_ring ring_enc[AMDGPU_MAX_UVD_ENC_RINGS];
struct amdgpu_irq_src irq;
- struct drm_sched_entity entity;
- struct drm_sched_entity entity_enc;
uint32_t srbm_soft_reset;
};
+#define AMDGPU_UVD_HARVEST_UVD0 (1 << 0)
+#define AMDGPU_UVD_HARVEST_UVD1 (1 << 1)
+
struct amdgpu_uvd {
const struct firmware *fw; /* UVD firmware */
unsigned fw_version;
unsigned max_handles;
unsigned num_enc_rings;
- uint8_t num_uvd_inst;
+ uint8_t num_uvd_inst;
bool address_64_bit;
bool use_ctx_buf;
- struct amdgpu_uvd_inst inst[AMDGPU_MAX_UVD_INSTANCES];
+ struct amdgpu_uvd_inst inst[AMDGPU_MAX_UVD_INSTANCES];
+ struct drm_file *filp[AMDGPU_MAX_UVD_HANDLES];
+ atomic_t handles[AMDGPU_MAX_UVD_HANDLES];
+ struct drm_sched_entity entity;
struct delayed_work idle_work;
+ unsigned harvest_config;
};
int amdgpu_uvd_sw_init(struct amdgpu_device *adev);
int amdgpu_uvd_sw_fini(struct amdgpu_device *adev);
+int amdgpu_uvd_entity_init(struct amdgpu_device *adev);
int amdgpu_uvd_suspend(struct amdgpu_device *adev);
int amdgpu_uvd_resume(struct amdgpu_device *adev);
int amdgpu_uvd_get_create_msg(struct amdgpu_ring *ring, uint32_t handle,
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c
index 23d960ec1cf2..5f3f54073818 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c
@@ -40,11 +40,11 @@
/* Firmware Names */
#ifdef CONFIG_DRM_AMDGPU_CIK
-#define FIRMWARE_BONAIRE "radeon/bonaire_vce.bin"
-#define FIRMWARE_KABINI "radeon/kabini_vce.bin"
-#define FIRMWARE_KAVERI "radeon/kaveri_vce.bin"
-#define FIRMWARE_HAWAII "radeon/hawaii_vce.bin"
-#define FIRMWARE_MULLINS "radeon/mullins_vce.bin"
+#define FIRMWARE_BONAIRE "amdgpu/bonaire_vce.bin"
+#define FIRMWARE_KABINI "amdgpu/kabini_vce.bin"
+#define FIRMWARE_KAVERI "amdgpu/kaveri_vce.bin"
+#define FIRMWARE_HAWAII "amdgpu/hawaii_vce.bin"
+#define FIRMWARE_MULLINS "amdgpu/mullins_vce.bin"
#endif
#define FIRMWARE_TONGA "amdgpu/tonga_vce.bin"
#define FIRMWARE_CARRIZO "amdgpu/carrizo_vce.bin"
@@ -90,8 +90,6 @@ static void amdgpu_vce_idle_work_handler(struct work_struct *work);
*/
int amdgpu_vce_sw_init(struct amdgpu_device *adev, unsigned long size)
{
- struct amdgpu_ring *ring;
- struct drm_sched_rq *rq;
const char *fw_name;
const struct common_firmware_header *hdr;
unsigned ucode_version, version_major, version_minor, binary_id;
@@ -188,15 +186,6 @@ int amdgpu_vce_sw_init(struct amdgpu_device *adev, unsigned long size)
return r;
}
- ring = &adev->vce.ring[0];
- rq = &ring->sched.sched_rq[DRM_SCHED_PRIORITY_NORMAL];
- r = drm_sched_entity_init(&ring->sched, &adev->vce.entity,
- rq, NULL);
- if (r != 0) {
- DRM_ERROR("Failed setting up VCE run queue.\n");
- return r;
- }
-
for (i = 0; i < AMDGPU_MAX_VCE_HANDLES; ++i) {
atomic_set(&adev->vce.handles[i], 0);
adev->vce.filp[i] = NULL;
@@ -222,7 +211,7 @@ int amdgpu_vce_sw_fini(struct amdgpu_device *adev)
if (adev->vce.vcpu_bo == NULL)
return 0;
- drm_sched_entity_fini(&adev->vce.ring[0].sched, &adev->vce.entity);
+ drm_sched_entity_destroy(&adev->vce.entity);
amdgpu_bo_free_kernel(&adev->vce.vcpu_bo, &adev->vce.gpu_addr,
(void **)&adev->vce.cpu_addr);
@@ -237,6 +226,29 @@ int amdgpu_vce_sw_fini(struct amdgpu_device *adev)
}
/**
+ * amdgpu_vce_entity_init - init entity
+ *
+ * @adev: amdgpu_device pointer
+ *
+ */
+int amdgpu_vce_entity_init(struct amdgpu_device *adev)
+{
+ struct amdgpu_ring *ring;
+ struct drm_sched_rq *rq;
+ int r;
+
+ ring = &adev->vce.ring[0];
+ rq = &ring->sched.sched_rq[DRM_SCHED_PRIORITY_NORMAL];
+ r = drm_sched_entity_init(&adev->vce.entity, &rq, 1, NULL);
+ if (r != 0) {
+ DRM_ERROR("Failed setting up VCE run queue.\n");
+ return r;
+ }
+
+ return 0;
+}
+
+/**
* amdgpu_vce_suspend - unpin VCE fw memory
*
* @adev: amdgpu_device pointer
@@ -246,6 +258,8 @@ int amdgpu_vce_suspend(struct amdgpu_device *adev)
{
int i;
+ cancel_delayed_work_sync(&adev->vce.idle_work);
+
if (adev->vce.vcpu_bo == NULL)
return 0;
@@ -256,7 +270,6 @@ int amdgpu_vce_suspend(struct amdgpu_device *adev)
if (i == AMDGPU_MAX_VCE_HANDLES)
return 0;
- cancel_delayed_work_sync(&adev->vce.idle_work);
/* TODO: suspending running encoding sessions isn't supported */
return -EINVAL;
}
@@ -470,12 +483,10 @@ int amdgpu_vce_get_create_msg(struct amdgpu_ring *ring, uint32_t handle,
for (i = ib->length_dw; i < ib_size_dw; ++i)
ib->ptr[i] = 0x0;
- r = amdgpu_ib_schedule(ring, 1, ib, NULL, &f);
- job->fence = dma_fence_get(f);
+ r = amdgpu_job_submit_direct(job, ring, &f);
if (r)
goto err;
- amdgpu_job_free(job);
if (fence)
*fence = dma_fence_get(f);
dma_fence_put(f);
@@ -532,19 +543,13 @@ int amdgpu_vce_get_destroy_msg(struct amdgpu_ring *ring, uint32_t handle,
for (i = ib->length_dw; i < ib_size_dw; ++i)
ib->ptr[i] = 0x0;
- if (direct) {
- r = amdgpu_ib_schedule(ring, 1, ib, NULL, &f);
- job->fence = dma_fence_get(f);
- if (r)
- goto err;
-
- amdgpu_job_free(job);
- } else {
- r = amdgpu_job_submit(job, ring, &ring->adev->vce.entity,
+ if (direct)
+ r = amdgpu_job_submit_direct(job, ring, &f);
+ else
+ r = amdgpu_job_submit(job, &ring->adev->vce.entity,
AMDGPU_FENCE_OWNER_UNDEFINED, &f);
- if (r)
- goto err;
- }
+ if (r)
+ goto err;
if (fence)
*fence = dma_fence_get(f);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.h
index 71781267ee4c..a1f209eed4c4 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.h
@@ -55,6 +55,7 @@ struct amdgpu_vce {
int amdgpu_vce_sw_init(struct amdgpu_device *adev, unsigned long size);
int amdgpu_vce_sw_fini(struct amdgpu_device *adev);
+int amdgpu_vce_entity_init(struct amdgpu_device *adev);
int amdgpu_vce_suspend(struct amdgpu_device *adev);
int amdgpu_vce_resume(struct amdgpu_device *adev);
int amdgpu_vce_get_create_msg(struct amdgpu_ring *ring, uint32_t handle,
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c
index 1b4ad9b2a755..27da13df2f11 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c
@@ -36,14 +36,19 @@
#include "soc15_common.h"
#include "vcn/vcn_1_0_offset.h"
+#include "vcn/vcn_1_0_sh_mask.h"
/* 1 second timeout */
#define VCN_IDLE_TIMEOUT msecs_to_jiffies(1000)
/* Firmware Names */
#define FIRMWARE_RAVEN "amdgpu/raven_vcn.bin"
+#define FIRMWARE_PICASSO "amdgpu/picasso_vcn.bin"
+#define FIRMWARE_RAVEN2 "amdgpu/raven2_vcn.bin"
MODULE_FIRMWARE(FIRMWARE_RAVEN);
+MODULE_FIRMWARE(FIRMWARE_PICASSO);
+MODULE_FIRMWARE(FIRMWARE_RAVEN2);
static void amdgpu_vcn_idle_work_handler(struct work_struct *work);
@@ -59,7 +64,12 @@ int amdgpu_vcn_sw_init(struct amdgpu_device *adev)
switch (adev->asic_type) {
case CHIP_RAVEN:
- fw_name = FIRMWARE_RAVEN;
+ if (adev->rev_id >= 8)
+ fw_name = FIRMWARE_RAVEN2;
+ else if (adev->pdev->device == 0x15d8)
+ fw_name = FIRMWARE_PICASSO;
+ else
+ fw_name = FIRMWARE_RAVEN;
break;
default:
return -EINVAL;
@@ -111,9 +121,9 @@ int amdgpu_vcn_sw_init(struct amdgpu_device *adev)
version_major, version_minor, family_id);
}
- bo_size = AMDGPU_GPU_PAGE_ALIGN(le32_to_cpu(hdr->ucode_size_bytes) + 8)
- + AMDGPU_VCN_STACK_SIZE + AMDGPU_VCN_HEAP_SIZE
- + AMDGPU_VCN_SESSION_SIZE * 40;
+ bo_size = AMDGPU_VCN_STACK_SIZE + AMDGPU_VCN_CONTEXT_SIZE;
+ if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP)
+ bo_size += AMDGPU_GPU_PAGE_ALIGN(le32_to_cpu(hdr->ucode_size_bytes) + 8);
r = amdgpu_bo_create_kernel(adev, bo_size, PAGE_SIZE,
AMDGPU_GEM_DOMAIN_VRAM, &adev->vcn.vcpu_bo,
&adev->vcn.gpu_addr, &adev->vcn.cpu_addr);
@@ -129,7 +139,7 @@ int amdgpu_vcn_sw_fini(struct amdgpu_device *adev)
{
int i;
- kfree(adev->vcn.saved_bo);
+ kvfree(adev->vcn.saved_bo);
amdgpu_bo_free_kernel(&adev->vcn.vcpu_bo,
&adev->vcn.gpu_addr,
@@ -140,6 +150,8 @@ int amdgpu_vcn_sw_fini(struct amdgpu_device *adev)
for (i = 0; i < adev->vcn.num_enc_rings; ++i)
amdgpu_ring_fini(&adev->vcn.ring_enc[i]);
+ amdgpu_ring_fini(&adev->vcn.ring_jpeg);
+
release_firmware(adev->vcn.fw);
return 0;
@@ -150,15 +162,15 @@ int amdgpu_vcn_suspend(struct amdgpu_device *adev)
unsigned size;
void *ptr;
+ cancel_delayed_work_sync(&adev->vcn.idle_work);
+
if (adev->vcn.vcpu_bo == NULL)
return 0;
- cancel_delayed_work_sync(&adev->vcn.idle_work);
-
size = amdgpu_bo_size(adev->vcn.vcpu_bo);
ptr = adev->vcn.cpu_addr;
- adev->vcn.saved_bo = kmalloc(size, GFP_KERNEL);
+ adev->vcn.saved_bo = kvmalloc(size, GFP_KERNEL);
if (!adev->vcn.saved_bo)
return -ENOMEM;
@@ -180,36 +192,184 @@ int amdgpu_vcn_resume(struct amdgpu_device *adev)
if (adev->vcn.saved_bo != NULL) {
memcpy_toio(ptr, adev->vcn.saved_bo, size);
- kfree(adev->vcn.saved_bo);
+ kvfree(adev->vcn.saved_bo);
adev->vcn.saved_bo = NULL;
} else {
const struct common_firmware_header *hdr;
unsigned offset;
hdr = (const struct common_firmware_header *)adev->vcn.fw->data;
- offset = le32_to_cpu(hdr->ucode_array_offset_bytes);
- memcpy_toio(adev->vcn.cpu_addr, adev->vcn.fw->data + offset,
- le32_to_cpu(hdr->ucode_size_bytes));
- size -= le32_to_cpu(hdr->ucode_size_bytes);
- ptr += le32_to_cpu(hdr->ucode_size_bytes);
+ if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) {
+ offset = le32_to_cpu(hdr->ucode_array_offset_bytes);
+ memcpy_toio(adev->vcn.cpu_addr, adev->vcn.fw->data + offset,
+ le32_to_cpu(hdr->ucode_size_bytes));
+ size -= le32_to_cpu(hdr->ucode_size_bytes);
+ ptr += le32_to_cpu(hdr->ucode_size_bytes);
+ }
memset_io(ptr, 0, size);
}
return 0;
}
+static int amdgpu_vcn_pause_dpg_mode(struct amdgpu_device *adev,
+ struct dpg_pause_state *new_state)
+{
+ int ret_code;
+ uint32_t reg_data = 0;
+ uint32_t reg_data2 = 0;
+ struct amdgpu_ring *ring;
+
+ /* pause/unpause if state is changed */
+ if (adev->vcn.pause_state.fw_based != new_state->fw_based) {
+ DRM_DEBUG("dpg pause state changed %d:%d -> %d:%d",
+ adev->vcn.pause_state.fw_based, adev->vcn.pause_state.jpeg,
+ new_state->fw_based, new_state->jpeg);
+
+ reg_data = RREG32_SOC15(UVD, 0, mmUVD_DPG_PAUSE) &
+ (~UVD_DPG_PAUSE__NJ_PAUSE_DPG_ACK_MASK);
+
+ if (new_state->fw_based == VCN_DPG_STATE__PAUSE) {
+ ret_code = 0;
+
+ if (!(reg_data & UVD_DPG_PAUSE__JPEG_PAUSE_DPG_ACK_MASK))
+ SOC15_WAIT_ON_RREG(UVD, 0, mmUVD_POWER_STATUS,
+ UVD_POWER_STATUS__UVD_POWER_STATUS_TILES_OFF,
+ UVD_POWER_STATUS__UVD_POWER_STATUS_MASK, ret_code);
+
+ if (!ret_code) {
+ /* pause DPG non-jpeg */
+ reg_data |= UVD_DPG_PAUSE__NJ_PAUSE_DPG_REQ_MASK;
+ WREG32_SOC15(UVD, 0, mmUVD_DPG_PAUSE, reg_data);
+ SOC15_WAIT_ON_RREG(UVD, 0, mmUVD_DPG_PAUSE,
+ UVD_DPG_PAUSE__NJ_PAUSE_DPG_ACK_MASK,
+ UVD_DPG_PAUSE__NJ_PAUSE_DPG_ACK_MASK, ret_code);
+
+ /* Restore */
+ ring = &adev->vcn.ring_enc[0];
+ WREG32_SOC15(UVD, 0, mmUVD_RB_BASE_LO, ring->gpu_addr);
+ WREG32_SOC15(UVD, 0, mmUVD_RB_BASE_HI, upper_32_bits(ring->gpu_addr));
+ WREG32_SOC15(UVD, 0, mmUVD_RB_SIZE, ring->ring_size / 4);
+ WREG32_SOC15(UVD, 0, mmUVD_RB_RPTR, lower_32_bits(ring->wptr));
+ WREG32_SOC15(UVD, 0, mmUVD_RB_WPTR, lower_32_bits(ring->wptr));
+
+ ring = &adev->vcn.ring_enc[1];
+ WREG32_SOC15(UVD, 0, mmUVD_RB_BASE_LO2, ring->gpu_addr);
+ WREG32_SOC15(UVD, 0, mmUVD_RB_BASE_HI2, upper_32_bits(ring->gpu_addr));
+ WREG32_SOC15(UVD, 0, mmUVD_RB_SIZE2, ring->ring_size / 4);
+ WREG32_SOC15(UVD, 0, mmUVD_RB_RPTR2, lower_32_bits(ring->wptr));
+ WREG32_SOC15(UVD, 0, mmUVD_RB_WPTR2, lower_32_bits(ring->wptr));
+
+ ring = &adev->vcn.ring_dec;
+ WREG32_SOC15(UVD, 0, mmUVD_RBC_RB_WPTR,
+ RREG32_SOC15(UVD, 0, mmUVD_SCRATCH2));
+ SOC15_WAIT_ON_RREG(UVD, 0, mmUVD_POWER_STATUS,
+ UVD_PGFSM_CONFIG__UVDM_UVDU_PWR_ON,
+ UVD_POWER_STATUS__UVD_POWER_STATUS_MASK, ret_code);
+ }
+ } else {
+ /* unpause dpg non-jpeg, no need to wait */
+ reg_data &= ~UVD_DPG_PAUSE__NJ_PAUSE_DPG_REQ_MASK;
+ WREG32_SOC15(UVD, 0, mmUVD_DPG_PAUSE, reg_data);
+ }
+ adev->vcn.pause_state.fw_based = new_state->fw_based;
+ }
+
+ /* pause/unpause if state is changed */
+ if (adev->vcn.pause_state.jpeg != new_state->jpeg) {
+ DRM_DEBUG("dpg pause state changed %d:%d -> %d:%d",
+ adev->vcn.pause_state.fw_based, adev->vcn.pause_state.jpeg,
+ new_state->fw_based, new_state->jpeg);
+
+ reg_data = RREG32_SOC15(UVD, 0, mmUVD_DPG_PAUSE) &
+ (~UVD_DPG_PAUSE__JPEG_PAUSE_DPG_ACK_MASK);
+
+ if (new_state->jpeg == VCN_DPG_STATE__PAUSE) {
+ ret_code = 0;
+
+ if (!(reg_data & UVD_DPG_PAUSE__NJ_PAUSE_DPG_ACK_MASK))
+ SOC15_WAIT_ON_RREG(UVD, 0, mmUVD_POWER_STATUS,
+ UVD_POWER_STATUS__UVD_POWER_STATUS_TILES_OFF,
+ UVD_POWER_STATUS__UVD_POWER_STATUS_MASK, ret_code);
+
+ if (!ret_code) {
+ /* Make sure JPRG Snoop is disabled before sending the pause */
+ reg_data2 = RREG32_SOC15(UVD, 0, mmUVD_POWER_STATUS);
+ reg_data2 |= UVD_POWER_STATUS__JRBC_SNOOP_DIS_MASK;
+ WREG32_SOC15(UVD, 0, mmUVD_POWER_STATUS, reg_data2);
+
+ /* pause DPG jpeg */
+ reg_data |= UVD_DPG_PAUSE__JPEG_PAUSE_DPG_REQ_MASK;
+ WREG32_SOC15(UVD, 0, mmUVD_DPG_PAUSE, reg_data);
+ SOC15_WAIT_ON_RREG(UVD, 0, mmUVD_DPG_PAUSE,
+ UVD_DPG_PAUSE__JPEG_PAUSE_DPG_ACK_MASK,
+ UVD_DPG_PAUSE__JPEG_PAUSE_DPG_ACK_MASK, ret_code);
+
+ /* Restore */
+ ring = &adev->vcn.ring_jpeg;
+ WREG32_SOC15(UVD, 0, mmUVD_LMI_JRBC_RB_VMID, 0);
+ WREG32_SOC15(UVD, 0, mmUVD_JRBC_RB_CNTL,
+ UVD_JRBC_RB_CNTL__RB_NO_FETCH_MASK |
+ UVD_JRBC_RB_CNTL__RB_RPTR_WR_EN_MASK);
+ WREG32_SOC15(UVD, 0, mmUVD_LMI_JRBC_RB_64BIT_BAR_LOW,
+ lower_32_bits(ring->gpu_addr));
+ WREG32_SOC15(UVD, 0, mmUVD_LMI_JRBC_RB_64BIT_BAR_HIGH,
+ upper_32_bits(ring->gpu_addr));
+ WREG32_SOC15(UVD, 0, mmUVD_JRBC_RB_RPTR, ring->wptr);
+ WREG32_SOC15(UVD, 0, mmUVD_JRBC_RB_WPTR, ring->wptr);
+ WREG32_SOC15(UVD, 0, mmUVD_JRBC_RB_CNTL,
+ UVD_JRBC_RB_CNTL__RB_RPTR_WR_EN_MASK);
+
+ ring = &adev->vcn.ring_dec;
+ WREG32_SOC15(UVD, 0, mmUVD_RBC_RB_WPTR,
+ RREG32_SOC15(UVD, 0, mmUVD_SCRATCH2));
+ SOC15_WAIT_ON_RREG(UVD, 0, mmUVD_POWER_STATUS,
+ UVD_PGFSM_CONFIG__UVDM_UVDU_PWR_ON,
+ UVD_POWER_STATUS__UVD_POWER_STATUS_MASK, ret_code);
+ }
+ } else {
+ /* unpause dpg jpeg, no need to wait */
+ reg_data &= ~UVD_DPG_PAUSE__JPEG_PAUSE_DPG_REQ_MASK;
+ WREG32_SOC15(UVD, 0, mmUVD_DPG_PAUSE, reg_data);
+ }
+ adev->vcn.pause_state.jpeg = new_state->jpeg;
+ }
+
+ return 0;
+}
+
static void amdgpu_vcn_idle_work_handler(struct work_struct *work)
{
struct amdgpu_device *adev =
container_of(work, struct amdgpu_device, vcn.idle_work.work);
- unsigned fences = amdgpu_fence_count_emitted(&adev->vcn.ring_dec);
- unsigned i;
+ unsigned int fences = 0;
+ unsigned int i;
for (i = 0; i < adev->vcn.num_enc_rings; ++i) {
fences += amdgpu_fence_count_emitted(&adev->vcn.ring_enc[i]);
}
+ if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) {
+ struct dpg_pause_state new_state;
+
+ if (fences)
+ new_state.fw_based = VCN_DPG_STATE__PAUSE;
+ else
+ new_state.fw_based = VCN_DPG_STATE__UNPAUSE;
+
+ if (amdgpu_fence_count_emitted(&adev->vcn.ring_jpeg))
+ new_state.jpeg = VCN_DPG_STATE__PAUSE;
+ else
+ new_state.jpeg = VCN_DPG_STATE__UNPAUSE;
+
+ amdgpu_vcn_pause_dpg_mode(adev, &new_state);
+ }
+
+ fences += amdgpu_fence_count_emitted(&adev->vcn.ring_jpeg);
+ fences += amdgpu_fence_count_emitted(&adev->vcn.ring_dec);
+
if (fences == 0) {
+ amdgpu_gfx_off_ctrl(adev, true);
if (adev->pm.dpm_enabled)
amdgpu_dpm_enable_uvd(adev, false);
else
@@ -225,13 +385,30 @@ void amdgpu_vcn_ring_begin_use(struct amdgpu_ring *ring)
struct amdgpu_device *adev = ring->adev;
bool set_clocks = !cancel_delayed_work_sync(&adev->vcn.idle_work);
- if (set_clocks && adev->pm.dpm_enabled) {
+ if (set_clocks) {
+ amdgpu_gfx_off_ctrl(adev, false);
if (adev->pm.dpm_enabled)
amdgpu_dpm_enable_uvd(adev, true);
else
amdgpu_device_ip_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_VCN,
AMD_PG_STATE_UNGATE);
}
+
+ if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) {
+ struct dpg_pause_state new_state;
+
+ if (ring->funcs->type == AMDGPU_RING_TYPE_VCN_ENC)
+ new_state.fw_based = VCN_DPG_STATE__PAUSE;
+ else
+ new_state.fw_based = adev->vcn.pause_state.fw_based;
+
+ if (ring->funcs->type == AMDGPU_RING_TYPE_VCN_JPEG)
+ new_state.jpeg = VCN_DPG_STATE__PAUSE;
+ else
+ new_state.jpeg = adev->vcn.pause_state.jpeg;
+
+ amdgpu_vcn_pause_dpg_mode(adev, &new_state);
+ }
}
void amdgpu_vcn_ring_end_use(struct amdgpu_ring *ring)
@@ -246,7 +423,7 @@ int amdgpu_vcn_dec_ring_test_ring(struct amdgpu_ring *ring)
unsigned i;
int r;
- WREG32(SOC15_REG_OFFSET(UVD, 0, mmUVD_CONTEXT_ID), 0xCAFEDEAD);
+ WREG32(SOC15_REG_OFFSET(UVD, 0, mmUVD_SCRATCH9), 0xCAFEDEAD);
r = amdgpu_ring_alloc(ring, 3);
if (r) {
DRM_ERROR("amdgpu: cp failed to lock ring %d (%d).\n",
@@ -254,11 +431,11 @@ int amdgpu_vcn_dec_ring_test_ring(struct amdgpu_ring *ring)
return r;
}
amdgpu_ring_write(ring,
- PACKET0(SOC15_REG_OFFSET(UVD, 0, mmUVD_CONTEXT_ID), 0));
+ PACKET0(SOC15_REG_OFFSET(UVD, 0, mmUVD_SCRATCH9), 0));
amdgpu_ring_write(ring, 0xDEADBEEF);
amdgpu_ring_commit(ring);
for (i = 0; i < adev->usec_timeout; i++) {
- tmp = RREG32(SOC15_REG_OFFSET(UVD, 0, mmUVD_CONTEXT_ID));
+ tmp = RREG32(SOC15_REG_OFFSET(UVD, 0, mmUVD_SCRATCH9));
if (tmp == 0xDEADBEEF)
break;
DRM_UDELAY(1);
@@ -304,13 +481,10 @@ static int amdgpu_vcn_dec_send_msg(struct amdgpu_ring *ring,
}
ib->length_dw = 16;
- r = amdgpu_ib_schedule(ring, 1, ib, NULL, &f);
- job->fence = dma_fence_get(f);
+ r = amdgpu_job_submit_direct(job, ring, &f);
if (r)
goto err_free;
- amdgpu_job_free(job);
-
amdgpu_bo_fence(bo, f, false);
amdgpu_bo_unreserve(bo);
amdgpu_bo_unref(&bo);
@@ -495,12 +669,10 @@ static int amdgpu_vcn_enc_get_create_msg(struct amdgpu_ring *ring, uint32_t hand
for (i = ib->length_dw; i < ib_size_dw; ++i)
ib->ptr[i] = 0x0;
- r = amdgpu_ib_schedule(ring, 1, ib, NULL, &f);
- job->fence = dma_fence_get(f);
+ r = amdgpu_job_submit_direct(job, ring, &f);
if (r)
goto err;
- amdgpu_job_free(job);
if (fence)
*fence = dma_fence_get(f);
dma_fence_put(f);
@@ -549,12 +721,10 @@ static int amdgpu_vcn_enc_get_destroy_msg(struct amdgpu_ring *ring, uint32_t han
for (i = ib->length_dw; i < ib_size_dw; ++i)
ib->ptr[i] = 0x0;
- r = amdgpu_ib_schedule(ring, 1, ib, NULL, &f);
- job->fence = dma_fence_get(f);
+ r = amdgpu_job_submit_direct(job, ring, &f);
if (r)
goto err;
- amdgpu_job_free(job);
if (fence)
*fence = dma_fence_get(f);
dma_fence_put(f);
@@ -597,3 +767,127 @@ error:
dma_fence_put(fence);
return r;
}
+
+int amdgpu_vcn_jpeg_ring_test_ring(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+ uint32_t tmp = 0;
+ unsigned i;
+ int r;
+
+ WREG32(SOC15_REG_OFFSET(UVD, 0, mmUVD_SCRATCH9), 0xCAFEDEAD);
+ r = amdgpu_ring_alloc(ring, 3);
+
+ if (r) {
+ DRM_ERROR("amdgpu: cp failed to lock ring %d (%d).\n",
+ ring->idx, r);
+ return r;
+ }
+
+ amdgpu_ring_write(ring,
+ PACKETJ(SOC15_REG_OFFSET(UVD, 0, mmUVD_SCRATCH9), 0, 0, 0));
+ amdgpu_ring_write(ring, 0xDEADBEEF);
+ amdgpu_ring_commit(ring);
+
+ for (i = 0; i < adev->usec_timeout; i++) {
+ tmp = RREG32(SOC15_REG_OFFSET(UVD, 0, mmUVD_SCRATCH9));
+ if (tmp == 0xDEADBEEF)
+ break;
+ DRM_UDELAY(1);
+ }
+
+ if (i < adev->usec_timeout) {
+ DRM_DEBUG("ring test on %d succeeded in %d usecs\n",
+ ring->idx, i);
+ } else {
+ DRM_ERROR("amdgpu: ring %d test failed (0x%08X)\n",
+ ring->idx, tmp);
+ r = -EINVAL;
+ }
+
+ return r;
+}
+
+static int amdgpu_vcn_jpeg_set_reg(struct amdgpu_ring *ring, uint32_t handle,
+ struct dma_fence **fence)
+{
+ struct amdgpu_device *adev = ring->adev;
+ struct amdgpu_job *job;
+ struct amdgpu_ib *ib;
+ struct dma_fence *f = NULL;
+ const unsigned ib_size_dw = 16;
+ int i, r;
+
+ r = amdgpu_job_alloc_with_ib(ring->adev, ib_size_dw * 4, &job);
+ if (r)
+ return r;
+
+ ib = &job->ibs[0];
+
+ ib->ptr[0] = PACKETJ(SOC15_REG_OFFSET(UVD, 0, mmUVD_SCRATCH9), 0, 0, PACKETJ_TYPE0);
+ ib->ptr[1] = 0xDEADBEEF;
+ for (i = 2; i < 16; i += 2) {
+ ib->ptr[i] = PACKETJ(0, 0, 0, PACKETJ_TYPE6);
+ ib->ptr[i+1] = 0;
+ }
+ ib->length_dw = 16;
+
+ r = amdgpu_job_submit_direct(job, ring, &f);
+ if (r)
+ goto err;
+
+ if (fence)
+ *fence = dma_fence_get(f);
+ dma_fence_put(f);
+
+ return 0;
+
+err:
+ amdgpu_job_free(job);
+ return r;
+}
+
+int amdgpu_vcn_jpeg_ring_test_ib(struct amdgpu_ring *ring, long timeout)
+{
+ struct amdgpu_device *adev = ring->adev;
+ uint32_t tmp = 0;
+ unsigned i;
+ struct dma_fence *fence = NULL;
+ long r = 0;
+
+ r = amdgpu_vcn_jpeg_set_reg(ring, 1, &fence);
+ if (r) {
+ DRM_ERROR("amdgpu: failed to set jpeg register (%ld).\n", r);
+ goto error;
+ }
+
+ r = dma_fence_wait_timeout(fence, false, timeout);
+ if (r == 0) {
+ DRM_ERROR("amdgpu: IB test timed out.\n");
+ r = -ETIMEDOUT;
+ goto error;
+ } else if (r < 0) {
+ DRM_ERROR("amdgpu: fence wait failed (%ld).\n", r);
+ goto error;
+ } else
+ r = 0;
+
+ for (i = 0; i < adev->usec_timeout; i++) {
+ tmp = RREG32(SOC15_REG_OFFSET(UVD, 0, mmUVD_SCRATCH9));
+ if (tmp == 0xDEADBEEF)
+ break;
+ DRM_UDELAY(1);
+ }
+
+ if (i < adev->usec_timeout)
+ DRM_DEBUG("ib test on ring %d succeeded\n", ring->idx);
+ else {
+ DRM_ERROR("ib test failed (0x%08X)\n", tmp);
+ r = -EINVAL;
+ }
+
+ dma_fence_put(fence);
+
+error:
+ return r;
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h
index 773010b9ff15..a0ad19af9080 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h
@@ -24,9 +24,9 @@
#ifndef __AMDGPU_VCN_H__
#define __AMDGPU_VCN_H__
-#define AMDGPU_VCN_STACK_SIZE (200*1024)
-#define AMDGPU_VCN_HEAP_SIZE (256*1024)
-#define AMDGPU_VCN_SESSION_SIZE (50*1024)
+#define AMDGPU_VCN_STACK_SIZE (128*1024)
+#define AMDGPU_VCN_CONTEXT_SIZE (512*1024)
+
#define AMDGPU_VCN_FIRMWARE_OFFSET 256
#define AMDGPU_VCN_MAX_ENC_RINGS 3
@@ -56,6 +56,16 @@ enum engine_status_constants {
UVD_STATUS__RBC_BUSY = 0x1,
};
+enum internal_dpg_state {
+ VCN_DPG_STATE__UNPAUSE = 0,
+ VCN_DPG_STATE__PAUSE,
+};
+
+struct dpg_pause_state {
+ enum internal_dpg_state fw_based;
+ enum internal_dpg_state jpeg;
+};
+
struct amdgpu_vcn {
struct amdgpu_bo *vcpu_bo;
void *cpu_addr;
@@ -66,8 +76,11 @@ struct amdgpu_vcn {
const struct firmware *fw; /* VCN firmware */
struct amdgpu_ring ring_dec;
struct amdgpu_ring ring_enc[AMDGPU_VCN_MAX_ENC_RINGS];
+ struct amdgpu_ring ring_jpeg;
struct amdgpu_irq_src irq;
unsigned num_enc_rings;
+ enum amd_powergating_state cur_state;
+ struct dpg_pause_state pause_state;
};
int amdgpu_vcn_sw_init(struct amdgpu_device *adev);
@@ -83,4 +96,7 @@ int amdgpu_vcn_dec_ring_test_ib(struct amdgpu_ring *ring, long timeout);
int amdgpu_vcn_enc_ring_test_ring(struct amdgpu_ring *ring);
int amdgpu_vcn_enc_ring_test_ib(struct amdgpu_ring *ring, long timeout);
+int amdgpu_vcn_jpeg_ring_test_ring(struct amdgpu_ring *ring);
+int amdgpu_vcn_jpeg_ring_test_ib(struct amdgpu_ring *ring, long timeout);
+
#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
index 21adb1b6e5cb..f2f358aa0597 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
@@ -22,18 +22,13 @@
*/
#include "amdgpu.h"
-#define MAX_KIQ_REG_WAIT 5000 /* in usecs, 5ms */
-#define MAX_KIQ_REG_BAILOUT_INTERVAL 5 /* in msecs, 5ms */
-#define MAX_KIQ_REG_TRY 20
uint64_t amdgpu_csa_vaddr(struct amdgpu_device *adev)
{
uint64_t addr = adev->vm_manager.max_pfn << AMDGPU_GPU_PAGE_SHIFT;
addr -= AMDGPU_VA_RESERVED_SIZE;
-
- if (addr >= AMDGPU_VA_HOLE_START)
- addr |= AMDGPU_VA_HOLE_END;
+ addr = amdgpu_gmc_sign_extend(addr);
return addr;
}
@@ -76,7 +71,7 @@ void amdgpu_free_static_csa(struct amdgpu_device *adev) {
int amdgpu_map_static_csa(struct amdgpu_device *adev, struct amdgpu_vm *vm,
struct amdgpu_bo_va **bo_va)
{
- uint64_t csa_addr = amdgpu_csa_vaddr(adev) & AMDGPU_VA_HOLE_MASK;
+ uint64_t csa_addr = amdgpu_csa_vaddr(adev) & AMDGPU_GMC_HOLE_MASK;
struct ww_acquire_ctx ticket;
struct list_head list;
struct amdgpu_bo_list_entry pd;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index fdcb498f6d19..6904d794d60a 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -33,9 +33,11 @@
#include "amdgpu.h"
#include "amdgpu_trace.h"
#include "amdgpu_amdkfd.h"
+#include "amdgpu_gmc.h"
-/*
- * GPUVM
+/**
+ * DOC: GPUVM
+ *
* GPUVM is similar to the legacy gart on older asics, however
* rather than there being a single global gart table
* for the entire GPU, there are multiple VM page tables active
@@ -63,74 +65,82 @@ INTERVAL_TREE_DEFINE(struct amdgpu_bo_va_mapping, rb, uint64_t, __subtree_last,
#undef START
#undef LAST
-/* Local structure. Encapsulate some VM table update parameters to reduce
+/**
+ * struct amdgpu_pte_update_params - Local structure
+ *
+ * Encapsulate some VM table update parameters to reduce
* the number of function parameters
+ *
*/
struct amdgpu_pte_update_params {
- /* amdgpu device we do this update for */
+
+ /**
+ * @adev: amdgpu device we do this update for
+ */
struct amdgpu_device *adev;
- /* optional amdgpu_vm we do this update for */
+
+ /**
+ * @vm: optional amdgpu_vm we do this update for
+ */
struct amdgpu_vm *vm;
- /* address where to copy page table entries from */
+
+ /**
+ * @src: address where to copy page table entries from
+ */
uint64_t src;
- /* indirect buffer to fill with commands */
+
+ /**
+ * @ib: indirect buffer to fill with commands
+ */
struct amdgpu_ib *ib;
- /* Function which actually does the update */
+
+ /**
+ * @func: Function which actually does the update
+ */
void (*func)(struct amdgpu_pte_update_params *params,
struct amdgpu_bo *bo, uint64_t pe,
uint64_t addr, unsigned count, uint32_t incr,
uint64_t flags);
- /* The next two are used during VM update by CPU
- * DMA addresses to use for mapping
- * Kernel pointer of PD/PT BO that needs to be updated
+ /**
+ * @pages_addr:
+ *
+ * DMA addresses to use for mapping, used during VM update by CPU
*/
dma_addr_t *pages_addr;
+
+ /**
+ * @kptr:
+ *
+ * Kernel pointer of PD/PT BO that needs to be updated,
+ * used during VM update by CPU
+ */
void *kptr;
};
-/* Helper to disable partial resident texture feature from a fence callback */
+/**
+ * struct amdgpu_prt_cb - Helper to disable partial resident texture feature from a fence callback
+ */
struct amdgpu_prt_cb {
+
+ /**
+ * @adev: amdgpu device
+ */
struct amdgpu_device *adev;
+
+ /**
+ * @cb: callback
+ */
struct dma_fence_cb cb;
};
-static void amdgpu_vm_bo_base_init(struct amdgpu_vm_bo_base *base,
- struct amdgpu_vm *vm,
- struct amdgpu_bo *bo)
-{
- base->vm = vm;
- base->bo = bo;
- INIT_LIST_HEAD(&base->bo_list);
- INIT_LIST_HEAD(&base->vm_status);
-
- if (!bo)
- return;
- list_add_tail(&base->bo_list, &bo->va);
-
- if (bo->tbo.type == ttm_bo_type_kernel)
- list_move(&base->vm_status, &vm->relocated);
-
- if (bo->tbo.resv != vm->root.base.bo->tbo.resv)
- return;
-
- if (bo->preferred_domains &
- amdgpu_mem_type_to_domain(bo->tbo.mem.mem_type))
- return;
-
- /*
- * we checked all the prerequisites, but it looks like this per vm bo
- * is currently evicted. add the bo to the evicted list to make sure it
- * is validated on next vm use to avoid fault.
- * */
- list_move_tail(&base->vm_status, &vm->evicted);
-}
-
/**
* amdgpu_vm_level_shift - return the addr shift for each level
*
* @adev: amdgpu_device pointer
+ * @level: VMPT level
*
- * Returns the number of bits the pfn needs to be right shifted for a level.
+ * Returns:
+ * The number of bits the pfn needs to be right shifted for a level.
*/
static unsigned amdgpu_vm_level_shift(struct amdgpu_device *adev,
unsigned level)
@@ -158,8 +168,10 @@ static unsigned amdgpu_vm_level_shift(struct amdgpu_device *adev,
* amdgpu_vm_num_entries - return the number of entries in a PD/PT
*
* @adev: amdgpu_device pointer
+ * @level: VMPT level
*
- * Calculate the number of entries in a page directory or page table.
+ * Returns:
+ * The number of entries in a page directory or page table.
*/
static unsigned amdgpu_vm_num_entries(struct amdgpu_device *adev,
unsigned level)
@@ -179,11 +191,33 @@ static unsigned amdgpu_vm_num_entries(struct amdgpu_device *adev,
}
/**
+ * amdgpu_vm_entries_mask - the mask to get the entry number of a PD/PT
+ *
+ * @adev: amdgpu_device pointer
+ * @level: VMPT level
+ *
+ * Returns:
+ * The mask to extract the entry number of a PD/PT from an address.
+ */
+static uint32_t amdgpu_vm_entries_mask(struct amdgpu_device *adev,
+ unsigned int level)
+{
+ if (level <= adev->vm_manager.root_level)
+ return 0xffffffff;
+ else if (level != AMDGPU_VM_PTB)
+ return 0x1ff;
+ else
+ return AMDGPU_VM_PTE_COUNT(adev) - 1;
+}
+
+/**
* amdgpu_vm_bo_size - returns the size of the BOs in bytes
*
* @adev: amdgpu_device pointer
+ * @level: VMPT level
*
- * Calculate the size of the BO for a page directory or page table in bytes.
+ * Returns:
+ * The size of the BO for a page directory or page table in bytes.
*/
static unsigned amdgpu_vm_bo_size(struct amdgpu_device *adev, unsigned level)
{
@@ -191,6 +225,382 @@ static unsigned amdgpu_vm_bo_size(struct amdgpu_device *adev, unsigned level)
}
/**
+ * amdgpu_vm_bo_evicted - vm_bo is evicted
+ *
+ * @vm_bo: vm_bo which is evicted
+ *
+ * State for PDs/PTs and per VM BOs which are not at the location they should
+ * be.
+ */
+static void amdgpu_vm_bo_evicted(struct amdgpu_vm_bo_base *vm_bo)
+{
+ struct amdgpu_vm *vm = vm_bo->vm;
+ struct amdgpu_bo *bo = vm_bo->bo;
+
+ vm_bo->moved = true;
+ if (bo->tbo.type == ttm_bo_type_kernel)
+ list_move(&vm_bo->vm_status, &vm->evicted);
+ else
+ list_move_tail(&vm_bo->vm_status, &vm->evicted);
+}
+
+/**
+ * amdgpu_vm_bo_relocated - vm_bo is reloacted
+ *
+ * @vm_bo: vm_bo which is relocated
+ *
+ * State for PDs/PTs which needs to update their parent PD.
+ */
+static void amdgpu_vm_bo_relocated(struct amdgpu_vm_bo_base *vm_bo)
+{
+ list_move(&vm_bo->vm_status, &vm_bo->vm->relocated);
+}
+
+/**
+ * amdgpu_vm_bo_moved - vm_bo is moved
+ *
+ * @vm_bo: vm_bo which is moved
+ *
+ * State for per VM BOs which are moved, but that change is not yet reflected
+ * in the page tables.
+ */
+static void amdgpu_vm_bo_moved(struct amdgpu_vm_bo_base *vm_bo)
+{
+ list_move(&vm_bo->vm_status, &vm_bo->vm->moved);
+}
+
+/**
+ * amdgpu_vm_bo_idle - vm_bo is idle
+ *
+ * @vm_bo: vm_bo which is now idle
+ *
+ * State for PDs/PTs and per VM BOs which have gone through the state machine
+ * and are now idle.
+ */
+static void amdgpu_vm_bo_idle(struct amdgpu_vm_bo_base *vm_bo)
+{
+ list_move(&vm_bo->vm_status, &vm_bo->vm->idle);
+ vm_bo->moved = false;
+}
+
+/**
+ * amdgpu_vm_bo_invalidated - vm_bo is invalidated
+ *
+ * @vm_bo: vm_bo which is now invalidated
+ *
+ * State for normal BOs which are invalidated and that change not yet reflected
+ * in the PTs.
+ */
+static void amdgpu_vm_bo_invalidated(struct amdgpu_vm_bo_base *vm_bo)
+{
+ spin_lock(&vm_bo->vm->invalidated_lock);
+ list_move(&vm_bo->vm_status, &vm_bo->vm->invalidated);
+ spin_unlock(&vm_bo->vm->invalidated_lock);
+}
+
+/**
+ * amdgpu_vm_bo_done - vm_bo is done
+ *
+ * @vm_bo: vm_bo which is now done
+ *
+ * State for normal BOs which are invalidated and that change has been updated
+ * in the PTs.
+ */
+static void amdgpu_vm_bo_done(struct amdgpu_vm_bo_base *vm_bo)
+{
+ spin_lock(&vm_bo->vm->invalidated_lock);
+ list_del_init(&vm_bo->vm_status);
+ spin_unlock(&vm_bo->vm->invalidated_lock);
+}
+
+/**
+ * amdgpu_vm_bo_base_init - Adds bo to the list of bos associated with the vm
+ *
+ * @base: base structure for tracking BO usage in a VM
+ * @vm: vm to which bo is to be added
+ * @bo: amdgpu buffer object
+ *
+ * Initialize a bo_va_base structure and add it to the appropriate lists
+ *
+ */
+static void amdgpu_vm_bo_base_init(struct amdgpu_vm_bo_base *base,
+ struct amdgpu_vm *vm,
+ struct amdgpu_bo *bo)
+{
+ base->vm = vm;
+ base->bo = bo;
+ base->next = NULL;
+ INIT_LIST_HEAD(&base->vm_status);
+
+ if (!bo)
+ return;
+ base->next = bo->vm_bo;
+ bo->vm_bo = base;
+
+ if (bo->tbo.resv != vm->root.base.bo->tbo.resv)
+ return;
+
+ vm->bulk_moveable = false;
+ if (bo->tbo.type == ttm_bo_type_kernel)
+ amdgpu_vm_bo_relocated(base);
+ else
+ amdgpu_vm_bo_idle(base);
+
+ if (bo->preferred_domains &
+ amdgpu_mem_type_to_domain(bo->tbo.mem.mem_type))
+ return;
+
+ /*
+ * we checked all the prerequisites, but it looks like this per vm bo
+ * is currently evicted. add the bo to the evicted list to make sure it
+ * is validated on next vm use to avoid fault.
+ * */
+ amdgpu_vm_bo_evicted(base);
+}
+
+/**
+ * amdgpu_vm_pt_parent - get the parent page directory
+ *
+ * @pt: child page table
+ *
+ * Helper to get the parent entry for the child page table. NULL if we are at
+ * the root page directory.
+ */
+static struct amdgpu_vm_pt *amdgpu_vm_pt_parent(struct amdgpu_vm_pt *pt)
+{
+ struct amdgpu_bo *parent = pt->base.bo->parent;
+
+ if (!parent)
+ return NULL;
+
+ return container_of(parent->vm_bo, struct amdgpu_vm_pt, base);
+}
+
+/**
+ * amdgpu_vm_pt_cursor - state for for_each_amdgpu_vm_pt
+ */
+struct amdgpu_vm_pt_cursor {
+ uint64_t pfn;
+ struct amdgpu_vm_pt *parent;
+ struct amdgpu_vm_pt *entry;
+ unsigned level;
+};
+
+/**
+ * amdgpu_vm_pt_start - start PD/PT walk
+ *
+ * @adev: amdgpu_device pointer
+ * @vm: amdgpu_vm structure
+ * @start: start address of the walk
+ * @cursor: state to initialize
+ *
+ * Initialize a amdgpu_vm_pt_cursor to start a walk.
+ */
+static void amdgpu_vm_pt_start(struct amdgpu_device *adev,
+ struct amdgpu_vm *vm, uint64_t start,
+ struct amdgpu_vm_pt_cursor *cursor)
+{
+ cursor->pfn = start;
+ cursor->parent = NULL;
+ cursor->entry = &vm->root;
+ cursor->level = adev->vm_manager.root_level;
+}
+
+/**
+ * amdgpu_vm_pt_descendant - go to child node
+ *
+ * @adev: amdgpu_device pointer
+ * @cursor: current state
+ *
+ * Walk to the child node of the current node.
+ * Returns:
+ * True if the walk was possible, false otherwise.
+ */
+static bool amdgpu_vm_pt_descendant(struct amdgpu_device *adev,
+ struct amdgpu_vm_pt_cursor *cursor)
+{
+ unsigned mask, shift, idx;
+
+ if (!cursor->entry->entries)
+ return false;
+
+ BUG_ON(!cursor->entry->base.bo);
+ mask = amdgpu_vm_entries_mask(adev, cursor->level);
+ shift = amdgpu_vm_level_shift(adev, cursor->level);
+
+ ++cursor->level;
+ idx = (cursor->pfn >> shift) & mask;
+ cursor->parent = cursor->entry;
+ cursor->entry = &cursor->entry->entries[idx];
+ return true;
+}
+
+/**
+ * amdgpu_vm_pt_sibling - go to sibling node
+ *
+ * @adev: amdgpu_device pointer
+ * @cursor: current state
+ *
+ * Walk to the sibling node of the current node.
+ * Returns:
+ * True if the walk was possible, false otherwise.
+ */
+static bool amdgpu_vm_pt_sibling(struct amdgpu_device *adev,
+ struct amdgpu_vm_pt_cursor *cursor)
+{
+ unsigned shift, num_entries;
+
+ /* Root doesn't have a sibling */
+ if (!cursor->parent)
+ return false;
+
+ /* Go to our parents and see if we got a sibling */
+ shift = amdgpu_vm_level_shift(adev, cursor->level - 1);
+ num_entries = amdgpu_vm_num_entries(adev, cursor->level - 1);
+
+ if (cursor->entry == &cursor->parent->entries[num_entries - 1])
+ return false;
+
+ cursor->pfn += 1ULL << shift;
+ cursor->pfn &= ~((1ULL << shift) - 1);
+ ++cursor->entry;
+ return true;
+}
+
+/**
+ * amdgpu_vm_pt_ancestor - go to parent node
+ *
+ * @cursor: current state
+ *
+ * Walk to the parent node of the current node.
+ * Returns:
+ * True if the walk was possible, false otherwise.
+ */
+static bool amdgpu_vm_pt_ancestor(struct amdgpu_vm_pt_cursor *cursor)
+{
+ if (!cursor->parent)
+ return false;
+
+ --cursor->level;
+ cursor->entry = cursor->parent;
+ cursor->parent = amdgpu_vm_pt_parent(cursor->parent);
+ return true;
+}
+
+/**
+ * amdgpu_vm_pt_next - get next PD/PT in hieratchy
+ *
+ * @adev: amdgpu_device pointer
+ * @cursor: current state
+ *
+ * Walk the PD/PT tree to the next node.
+ */
+static void amdgpu_vm_pt_next(struct amdgpu_device *adev,
+ struct amdgpu_vm_pt_cursor *cursor)
+{
+ /* First try a newborn child */
+ if (amdgpu_vm_pt_descendant(adev, cursor))
+ return;
+
+ /* If that didn't worked try to find a sibling */
+ while (!amdgpu_vm_pt_sibling(adev, cursor)) {
+ /* No sibling, go to our parents and grandparents */
+ if (!amdgpu_vm_pt_ancestor(cursor)) {
+ cursor->pfn = ~0ll;
+ return;
+ }
+ }
+}
+
+/**
+ * amdgpu_vm_pt_first_leaf - get first leaf PD/PT
+ *
+ * @adev: amdgpu_device pointer
+ * @vm: amdgpu_vm structure
+ * @start: start addr of the walk
+ * @cursor: state to initialize
+ *
+ * Start a walk and go directly to the leaf node.
+ */
+static void amdgpu_vm_pt_first_leaf(struct amdgpu_device *adev,
+ struct amdgpu_vm *vm, uint64_t start,
+ struct amdgpu_vm_pt_cursor *cursor)
+{
+ amdgpu_vm_pt_start(adev, vm, start, cursor);
+ while (amdgpu_vm_pt_descendant(adev, cursor));
+}
+
+/**
+ * amdgpu_vm_pt_next_leaf - get next leaf PD/PT
+ *
+ * @adev: amdgpu_device pointer
+ * @cursor: current state
+ *
+ * Walk the PD/PT tree to the next leaf node.
+ */
+static void amdgpu_vm_pt_next_leaf(struct amdgpu_device *adev,
+ struct amdgpu_vm_pt_cursor *cursor)
+{
+ amdgpu_vm_pt_next(adev, cursor);
+ while (amdgpu_vm_pt_descendant(adev, cursor));
+}
+
+/**
+ * for_each_amdgpu_vm_pt_leaf - walk over all leaf PDs/PTs in the hierarchy
+ */
+#define for_each_amdgpu_vm_pt_leaf(adev, vm, start, end, cursor) \
+ for (amdgpu_vm_pt_first_leaf((adev), (vm), (start), &(cursor)); \
+ (cursor).pfn <= end; amdgpu_vm_pt_next_leaf((adev), &(cursor)))
+
+/**
+ * amdgpu_vm_pt_first_dfs - start a deep first search
+ *
+ * @adev: amdgpu_device structure
+ * @vm: amdgpu_vm structure
+ * @cursor: state to initialize
+ *
+ * Starts a deep first traversal of the PD/PT tree.
+ */
+static void amdgpu_vm_pt_first_dfs(struct amdgpu_device *adev,
+ struct amdgpu_vm *vm,
+ struct amdgpu_vm_pt_cursor *cursor)
+{
+ amdgpu_vm_pt_start(adev, vm, 0, cursor);
+ while (amdgpu_vm_pt_descendant(adev, cursor));
+}
+
+/**
+ * amdgpu_vm_pt_next_dfs - get the next node for a deep first search
+ *
+ * @adev: amdgpu_device structure
+ * @cursor: current state
+ *
+ * Move the cursor to the next node in a deep first search.
+ */
+static void amdgpu_vm_pt_next_dfs(struct amdgpu_device *adev,
+ struct amdgpu_vm_pt_cursor *cursor)
+{
+ if (!cursor->entry)
+ return;
+
+ if (!cursor->parent)
+ cursor->entry = NULL;
+ else if (amdgpu_vm_pt_sibling(adev, cursor))
+ while (amdgpu_vm_pt_descendant(adev, cursor));
+ else
+ amdgpu_vm_pt_ancestor(cursor);
+}
+
+/**
+ * for_each_amdgpu_vm_pt_dfs_safe - safe deep first search of all PDs/PTs
+ */
+#define for_each_amdgpu_vm_pt_dfs_safe(adev, vm, cursor, entry) \
+ for (amdgpu_vm_pt_first_dfs((adev), (vm), &(cursor)), \
+ (entry) = (cursor).entry, amdgpu_vm_pt_next_dfs((adev), &(cursor));\
+ (entry); (entry) = (cursor).entry, \
+ amdgpu_vm_pt_next_dfs((adev), &(cursor)))
+
+/**
* amdgpu_vm_get_pd_bo - add the VM PD to a validation list
*
* @vm: vm providing the BOs
@@ -204,15 +614,55 @@ void amdgpu_vm_get_pd_bo(struct amdgpu_vm *vm,
struct list_head *validated,
struct amdgpu_bo_list_entry *entry)
{
- entry->robj = vm->root.base.bo;
entry->priority = 0;
- entry->tv.bo = &entry->robj->tbo;
+ entry->tv.bo = &vm->root.base.bo->tbo;
entry->tv.shared = true;
entry->user_pages = NULL;
list_add(&entry->tv.head, validated);
}
/**
+ * amdgpu_vm_move_to_lru_tail - move all BOs to the end of LRU
+ *
+ * @adev: amdgpu device pointer
+ * @vm: vm providing the BOs
+ *
+ * Move all BOs to the end of LRU and remember their positions to put them
+ * together.
+ */
+void amdgpu_vm_move_to_lru_tail(struct amdgpu_device *adev,
+ struct amdgpu_vm *vm)
+{
+ struct ttm_bo_global *glob = adev->mman.bdev.glob;
+ struct amdgpu_vm_bo_base *bo_base;
+
+ if (vm->bulk_moveable) {
+ spin_lock(&glob->lru_lock);
+ ttm_bo_bulk_move_lru_tail(&vm->lru_bulk_move);
+ spin_unlock(&glob->lru_lock);
+ return;
+ }
+
+ memset(&vm->lru_bulk_move, 0, sizeof(vm->lru_bulk_move));
+
+ spin_lock(&glob->lru_lock);
+ list_for_each_entry(bo_base, &vm->idle, vm_status) {
+ struct amdgpu_bo *bo = bo_base->bo;
+
+ if (!bo->parent)
+ continue;
+
+ ttm_bo_move_to_lru_tail(&bo->tbo, &vm->lru_bulk_move);
+ if (bo->shadow)
+ ttm_bo_move_to_lru_tail(&bo->shadow->tbo,
+ &vm->lru_bulk_move);
+ }
+ spin_unlock(&glob->lru_lock);
+
+ vm->bulk_moveable = true;
+}
+
+/**
* amdgpu_vm_validate_pt_bos - validate the page table BOs
*
* @adev: amdgpu device pointer
@@ -221,52 +671,44 @@ void amdgpu_vm_get_pd_bo(struct amdgpu_vm *vm,
* @param: parameter for the validation callback
*
* Validate the page table BOs on command submission if neccessary.
+ *
+ * Returns:
+ * Validation result.
*/
int amdgpu_vm_validate_pt_bos(struct amdgpu_device *adev, struct amdgpu_vm *vm,
int (*validate)(void *p, struct amdgpu_bo *bo),
void *param)
{
- struct ttm_bo_global *glob = adev->mman.bdev.glob;
struct amdgpu_vm_bo_base *bo_base, *tmp;
int r = 0;
+ vm->bulk_moveable &= list_empty(&vm->evicted);
+
list_for_each_entry_safe(bo_base, tmp, &vm->evicted, vm_status) {
struct amdgpu_bo *bo = bo_base->bo;
- if (bo->parent) {
- r = validate(param, bo);
- if (r)
- break;
-
- spin_lock(&glob->lru_lock);
- ttm_bo_move_to_lru_tail(&bo->tbo);
- if (bo->shadow)
- ttm_bo_move_to_lru_tail(&bo->shadow->tbo);
- spin_unlock(&glob->lru_lock);
- }
+ r = validate(param, bo);
+ if (r)
+ break;
if (bo->tbo.type != ttm_bo_type_kernel) {
- spin_lock(&vm->moved_lock);
- list_move(&bo_base->vm_status, &vm->moved);
- spin_unlock(&vm->moved_lock);
+ amdgpu_vm_bo_moved(bo_base);
} else {
- list_move(&bo_base->vm_status, &vm->relocated);
+ if (vm->use_cpu_for_update)
+ r = amdgpu_bo_kmap(bo, NULL);
+ else
+ r = amdgpu_ttm_alloc_gart(&bo->tbo);
+ if (r)
+ break;
+ if (bo->shadow) {
+ r = amdgpu_ttm_alloc_gart(&bo->shadow->tbo);
+ if (r)
+ break;
+ }
+ amdgpu_vm_bo_relocated(bo_base);
}
}
- spin_lock(&glob->lru_lock);
- list_for_each_entry(bo_base, &vm->idle, vm_status) {
- struct amdgpu_bo *bo = bo_base->bo;
-
- if (!bo->parent)
- continue;
-
- ttm_bo_move_to_lru_tail(&bo->tbo);
- if (bo->shadow)
- ttm_bo_move_to_lru_tail(&bo->shadow->tbo);
- }
- spin_unlock(&glob->lru_lock);
-
return r;
}
@@ -276,6 +718,9 @@ int amdgpu_vm_validate_pt_bos(struct amdgpu_device *adev, struct amdgpu_vm *vm,
* @vm: VM to check
*
* Check if all VM PDs/PTs are ready for updates
+ *
+ * Returns:
+ * True if eviction list is empty.
*/
bool amdgpu_vm_ready(struct amdgpu_vm *vm)
{
@@ -286,10 +731,15 @@ bool amdgpu_vm_ready(struct amdgpu_vm *vm)
* amdgpu_vm_clear_bo - initially clear the PDs/PTs
*
* @adev: amdgpu_device pointer
+ * @vm: VM to clear BO from
* @bo: BO to clear
* @level: level this BO is at
+ * @pte_support_ats: indicate ATS support from PTE
*
* Root PD needs to be reserved when calling this.
+ *
+ * Returns:
+ * 0 on success, errno otherwise.
*/
static int amdgpu_vm_clear_bo(struct amdgpu_device *adev,
struct amdgpu_vm *vm, struct amdgpu_bo *bo,
@@ -303,14 +753,13 @@ static int amdgpu_vm_clear_bo(struct amdgpu_device *adev,
uint64_t addr;
int r;
- addr = amdgpu_bo_gpu_offset(bo);
entries = amdgpu_bo_size(bo) / 8;
if (pte_support_ats) {
if (level == adev->vm_manager.root_level) {
ats_entries = amdgpu_vm_level_shift(adev, level);
ats_entries += AMDGPU_GPU_PAGE_SHIFT;
- ats_entries = AMDGPU_VA_HOLE_START >> ats_entries;
+ ats_entries = AMDGPU_GMC_HOLE_START >> ats_entries;
ats_entries = min(ats_entries, entries);
entries -= ats_entries;
} else {
@@ -321,7 +770,7 @@ static int amdgpu_vm_clear_bo(struct amdgpu_device *adev,
ats_entries = 0;
}
- ring = container_of(vm->entity.sched, struct amdgpu_ring, sched);
+ ring = container_of(vm->entity.rq->sched, struct amdgpu_ring, sched);
r = reservation_object_reserve_shared(bo->tbo.resv);
if (r)
@@ -331,10 +780,15 @@ static int amdgpu_vm_clear_bo(struct amdgpu_device *adev,
if (r)
goto error;
+ r = amdgpu_ttm_alloc_gart(&bo->tbo);
+ if (r)
+ return r;
+
r = amdgpu_job_alloc_with_ib(adev, 64, &job);
if (r)
goto error;
+ addr = amdgpu_bo_gpu_offset(bo);
if (ats_entries) {
uint64_t ats_value;
@@ -359,8 +813,8 @@ static int amdgpu_vm_clear_bo(struct amdgpu_device *adev,
if (r)
goto error_free;
- r = amdgpu_job_submit(job, ring, &vm->entity,
- AMDGPU_FENCE_OWNER_UNDEFINED, &fence);
+ r = amdgpu_job_submit(job, &vm->entity, AMDGPU_FENCE_OWNER_UNDEFINED,
+ &fence);
if (r)
goto error_free;
@@ -381,110 +835,33 @@ error:
}
/**
- * amdgpu_vm_alloc_levels - allocate the PD/PT levels
+ * amdgpu_vm_bo_param - fill in parameters for PD/PT allocation
*
* @adev: amdgpu_device pointer
- * @vm: requested vm
- * @saddr: start of the address range
- * @eaddr: end of the address range
- *
- * Make sure the page directories and page tables are allocated
+ * @vm: requesting vm
+ * @bp: resulting BO allocation parameters
*/
-static int amdgpu_vm_alloc_levels(struct amdgpu_device *adev,
- struct amdgpu_vm *vm,
- struct amdgpu_vm_pt *parent,
- uint64_t saddr, uint64_t eaddr,
- unsigned level, bool ats)
+static void amdgpu_vm_bo_param(struct amdgpu_device *adev, struct amdgpu_vm *vm,
+ int level, struct amdgpu_bo_param *bp)
{
- unsigned shift = amdgpu_vm_level_shift(adev, level);
- unsigned pt_idx, from, to;
- u64 flags;
- int r;
-
- if (!parent->entries) {
- unsigned num_entries = amdgpu_vm_num_entries(adev, level);
-
- parent->entries = kvmalloc_array(num_entries,
- sizeof(struct amdgpu_vm_pt),
- GFP_KERNEL | __GFP_ZERO);
- if (!parent->entries)
- return -ENOMEM;
- memset(parent->entries, 0 , sizeof(struct amdgpu_vm_pt));
- }
-
- from = saddr >> shift;
- to = eaddr >> shift;
- if (from >= amdgpu_vm_num_entries(adev, level) ||
- to >= amdgpu_vm_num_entries(adev, level))
- return -EINVAL;
-
- ++level;
- saddr = saddr & ((1 << shift) - 1);
- eaddr = eaddr & ((1 << shift) - 1);
-
- flags = AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS;
+ memset(bp, 0, sizeof(*bp));
+
+ bp->size = amdgpu_vm_bo_size(adev, level);
+ bp->byte_align = AMDGPU_GPU_PAGE_SIZE;
+ bp->domain = AMDGPU_GEM_DOMAIN_VRAM;
+ if (bp->size <= PAGE_SIZE && adev->asic_type >= CHIP_VEGA10 &&
+ adev->flags & AMD_IS_APU)
+ bp->domain |= AMDGPU_GEM_DOMAIN_GTT;
+ bp->domain = amdgpu_bo_get_preferred_pin_domain(adev, bp->domain);
+ bp->flags = AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS |
+ AMDGPU_GEM_CREATE_CPU_GTT_USWC;
if (vm->use_cpu_for_update)
- flags |= AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED;
- else
- flags |= (AMDGPU_GEM_CREATE_NO_CPU_ACCESS |
- AMDGPU_GEM_CREATE_SHADOW);
-
- /* walk over the address space and allocate the page tables */
- for (pt_idx = from; pt_idx <= to; ++pt_idx) {
- struct reservation_object *resv = vm->root.base.bo->tbo.resv;
- struct amdgpu_vm_pt *entry = &parent->entries[pt_idx];
- struct amdgpu_bo *pt;
-
- if (!entry->base.bo) {
- struct amdgpu_bo_param bp;
-
- memset(&bp, 0, sizeof(bp));
- bp.size = amdgpu_vm_bo_size(adev, level);
- bp.byte_align = AMDGPU_GPU_PAGE_SIZE;
- bp.domain = AMDGPU_GEM_DOMAIN_VRAM;
- bp.flags = flags;
- bp.type = ttm_bo_type_kernel;
- bp.resv = resv;
- r = amdgpu_bo_create(adev, &bp, &pt);
- if (r)
- return r;
-
- r = amdgpu_vm_clear_bo(adev, vm, pt, level, ats);
- if (r) {
- amdgpu_bo_unref(&pt->shadow);
- amdgpu_bo_unref(&pt);
- return r;
- }
-
- if (vm->use_cpu_for_update) {
- r = amdgpu_bo_kmap(pt, NULL);
- if (r) {
- amdgpu_bo_unref(&pt->shadow);
- amdgpu_bo_unref(&pt);
- return r;
- }
- }
-
- /* Keep a reference to the root directory to avoid
- * freeing them up in the wrong order.
- */
- pt->parent = amdgpu_bo_ref(parent->base.bo);
-
- amdgpu_vm_bo_base_init(&entry->base, vm, pt);
- }
-
- if (level < AMDGPU_VM_PTB) {
- uint64_t sub_saddr = (pt_idx == from) ? saddr : 0;
- uint64_t sub_eaddr = (pt_idx == to) ? eaddr :
- ((1 << shift) - 1);
- r = amdgpu_vm_alloc_levels(adev, vm, entry, sub_saddr,
- sub_eaddr, level, ats);
- if (r)
- return r;
- }
- }
-
- return 0;
+ bp->flags |= AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED;
+ else if (!vm->root.base.bo || vm->root.base.bo->shadow)
+ bp->flags |= AMDGPU_GEM_CREATE_SHADOW;
+ bp->type = ttm_bo_type_kernel;
+ if (vm->root.base.bo)
+ bp->resv = vm->root.base.bo->tbo.resv;
}
/**
@@ -495,14 +872,20 @@ static int amdgpu_vm_alloc_levels(struct amdgpu_device *adev,
* @saddr: Start address which needs to be allocated
* @size: Size from start address we need.
*
- * Make sure the page tables are allocated.
+ * Make sure the page directories and page tables are allocated
+ *
+ * Returns:
+ * 0 on success, errno otherwise.
*/
int amdgpu_vm_alloc_pts(struct amdgpu_device *adev,
struct amdgpu_vm *vm,
uint64_t saddr, uint64_t size)
{
- uint64_t eaddr;
+ struct amdgpu_vm_pt_cursor cursor;
+ struct amdgpu_bo *pt;
bool ats = false;
+ uint64_t eaddr;
+ int r;
/* validate the parameters */
if (saddr & AMDGPU_GPU_PAGE_MASK || size & AMDGPU_GPU_PAGE_MASK)
@@ -511,7 +894,7 @@ int amdgpu_vm_alloc_pts(struct amdgpu_device *adev,
eaddr = saddr + size - 1;
if (vm->pte_support_ats)
- ats = saddr < AMDGPU_VA_HOLE_START;
+ ats = saddr < AMDGPU_GMC_HOLE_START;
saddr /= AMDGPU_GPU_PAGE_SIZE;
eaddr /= AMDGPU_GPU_PAGE_SIZE;
@@ -522,8 +905,84 @@ int amdgpu_vm_alloc_pts(struct amdgpu_device *adev,
return -EINVAL;
}
- return amdgpu_vm_alloc_levels(adev, vm, &vm->root, saddr, eaddr,
- adev->vm_manager.root_level, ats);
+ for_each_amdgpu_vm_pt_leaf(adev, vm, saddr, eaddr, cursor) {
+ struct amdgpu_vm_pt *entry = cursor.entry;
+ struct amdgpu_bo_param bp;
+
+ if (cursor.level < AMDGPU_VM_PTB) {
+ unsigned num_entries;
+
+ num_entries = amdgpu_vm_num_entries(adev, cursor.level);
+ entry->entries = kvmalloc_array(num_entries,
+ sizeof(*entry->entries),
+ GFP_KERNEL |
+ __GFP_ZERO);
+ if (!entry->entries)
+ return -ENOMEM;
+ }
+
+
+ if (entry->base.bo)
+ continue;
+
+ amdgpu_vm_bo_param(adev, vm, cursor.level, &bp);
+
+ r = amdgpu_bo_create(adev, &bp, &pt);
+ if (r)
+ return r;
+
+ r = amdgpu_vm_clear_bo(adev, vm, pt, cursor.level, ats);
+ if (r)
+ goto error_free_pt;
+
+ if (vm->use_cpu_for_update) {
+ r = amdgpu_bo_kmap(pt, NULL);
+ if (r)
+ goto error_free_pt;
+ }
+
+ /* Keep a reference to the root directory to avoid
+ * freeing them up in the wrong order.
+ */
+ pt->parent = amdgpu_bo_ref(cursor.parent->base.bo);
+
+ amdgpu_vm_bo_base_init(&entry->base, vm, pt);
+ }
+
+ return 0;
+
+error_free_pt:
+ amdgpu_bo_unref(&pt->shadow);
+ amdgpu_bo_unref(&pt);
+ return r;
+}
+
+/**
+ * amdgpu_vm_free_pts - free PD/PT levels
+ *
+ * @adev: amdgpu device structure
+ * @vm: amdgpu vm structure
+ *
+ * Free the page directory or page table level and all sub levels.
+ */
+static void amdgpu_vm_free_pts(struct amdgpu_device *adev,
+ struct amdgpu_vm *vm)
+{
+ struct amdgpu_vm_pt_cursor cursor;
+ struct amdgpu_vm_pt *entry;
+
+ for_each_amdgpu_vm_pt_dfs_safe(adev, vm, cursor, entry) {
+
+ if (entry->base.bo) {
+ entry->base.bo->vm_bo = NULL;
+ list_del(&entry->base.vm_status);
+ amdgpu_bo_unref(&entry->base.bo->shadow);
+ amdgpu_bo_unref(&entry->base.bo);
+ }
+ kvfree(entry->entries);
+ }
+
+ BUG_ON(vm->root.base.bo);
}
/**
@@ -561,6 +1020,15 @@ void amdgpu_vm_check_compute_bug(struct amdgpu_device *adev)
}
}
+/**
+ * amdgpu_vm_need_pipeline_sync - Check if pipe sync is needed for job.
+ *
+ * @ring: ring on which the job will be submitted
+ * @job: job to submit
+ *
+ * Returns:
+ * True if sync is needed.
+ */
bool amdgpu_vm_need_pipeline_sync(struct amdgpu_ring *ring,
struct amdgpu_job *job)
{
@@ -588,19 +1056,17 @@ bool amdgpu_vm_need_pipeline_sync(struct amdgpu_ring *ring,
return vm_flush_needed || gds_switch_needed;
}
-static bool amdgpu_vm_is_large_bar(struct amdgpu_device *adev)
-{
- return (adev->gmc.real_vram_size == adev->gmc.visible_vram_size);
-}
-
/**
* amdgpu_vm_flush - hardware flush the vm
*
* @ring: ring to use for flush
- * @vmid: vmid number to use
- * @pd_addr: address of the page directory
+ * @job: related job
+ * @need_pipe_sync: is pipe sync needed
*
* Emit a VM flush when it is necessary.
+ *
+ * Returns:
+ * 0 on success, errno otherwise.
*/
int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job, bool need_pipe_sync)
{
@@ -630,7 +1096,8 @@ int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job, bool need_
}
gds_switch_needed &= !!ring->funcs->emit_gds_switch;
- vm_flush_needed &= !!ring->funcs->emit_vm_flush;
+ vm_flush_needed &= !!ring->funcs->emit_vm_flush &&
+ job->vm_pd_addr != AMDGPU_BO_INVALID_OFFSET;
pasid_mapping_needed &= adev->gmc.gmc_funcs->emit_pasid_mapping &&
ring->funcs->emit_wreg;
@@ -708,16 +1175,20 @@ int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job, bool need_
* Returns the found bo_va or NULL if none is found
*
* Object has to be reserved!
+ *
+ * Returns:
+ * Found bo_va or NULL.
*/
struct amdgpu_bo_va *amdgpu_vm_bo_find(struct amdgpu_vm *vm,
struct amdgpu_bo *bo)
{
- struct amdgpu_bo_va *bo_va;
+ struct amdgpu_vm_bo_base *base;
- list_for_each_entry(bo_va, &bo->va, base.bo_list) {
- if (bo_va->base.vm == vm) {
- return bo_va;
- }
+ for (base = bo->vm_bo; base; base = base->next) {
+ if (base->vm != vm)
+ continue;
+
+ return container_of(base, struct amdgpu_bo_va, base);
}
return NULL;
}
@@ -789,7 +1260,10 @@ static void amdgpu_vm_do_copy_ptes(struct amdgpu_pte_update_params *params,
* @addr: the unmapped addr
*
* Look up the physical address of the page that the pte resolves
- * to and return the pointer for the page table entry.
+ * to.
+ *
+ * Returns:
+ * The pointer for the page table entry.
*/
static uint64_t amdgpu_vm_map_gart(const dma_addr_t *pages_addr, uint64_t addr)
{
@@ -842,6 +1316,17 @@ static void amdgpu_vm_cpu_set_ptes(struct amdgpu_pte_update_params *params,
}
}
+
+/**
+ * amdgpu_vm_wait_pd - Wait for PT BOs to be free.
+ *
+ * @adev: amdgpu_device pointer
+ * @vm: related vm
+ * @owner: fence owner
+ *
+ * Returns:
+ * 0 on success, errno otherwise.
+ */
static int amdgpu_vm_wait_pd(struct amdgpu_device *adev, struct amdgpu_vm *vm,
void *owner)
{
@@ -856,6 +1341,22 @@ static int amdgpu_vm_wait_pd(struct amdgpu_device *adev, struct amdgpu_vm *vm,
return r;
}
+/**
+ * amdgpu_vm_update_func - helper to call update function
+ *
+ * Calls the update function for both the given BO as well as its shadow.
+ */
+static void amdgpu_vm_update_func(struct amdgpu_pte_update_params *params,
+ struct amdgpu_bo *bo,
+ uint64_t pe, uint64_t addr,
+ unsigned count, uint32_t incr,
+ uint64_t flags)
+{
+ if (bo->shadow)
+ params->func(params, bo->shadow, pe, addr, count, incr, flags);
+ params->func(params, bo, pe, addr, count, incr, flags);
+}
+
/*
* amdgpu_vm_update_pde - update a single level in the hierarchy
*
@@ -883,44 +1384,28 @@ static void amdgpu_vm_update_pde(struct amdgpu_pte_update_params *params,
pbo = pbo->parent;
level += params->adev->vm_manager.root_level;
- pt = amdgpu_bo_gpu_offset(entry->base.bo);
- flags = AMDGPU_PTE_VALID;
- amdgpu_gmc_get_vm_pde(params->adev, level, &pt, &flags);
+ amdgpu_gmc_get_pde_for_bo(entry->base.bo, level, &pt, &flags);
pde = (entry - parent->entries) * 8;
- if (bo->shadow)
- params->func(params, bo->shadow, pde, pt, 1, 0, flags);
- params->func(params, bo, pde, pt, 1, 0, flags);
+ amdgpu_vm_update_func(params, bo, pde, pt, 1, 0, flags);
}
/*
- * amdgpu_vm_invalidate_level - mark all PD levels as invalid
+ * amdgpu_vm_invalidate_pds - mark all PDs as invalid
*
- * @parent: parent PD
+ * @adev: amdgpu_device pointer
+ * @vm: related vm
*
* Mark all PD level as invalid after an error.
*/
-static void amdgpu_vm_invalidate_level(struct amdgpu_device *adev,
- struct amdgpu_vm *vm,
- struct amdgpu_vm_pt *parent,
- unsigned level)
+static void amdgpu_vm_invalidate_pds(struct amdgpu_device *adev,
+ struct amdgpu_vm *vm)
{
- unsigned pt_idx, num_entries;
+ struct amdgpu_vm_pt_cursor cursor;
+ struct amdgpu_vm_pt *entry;
- /*
- * Recurse into the subdirectories. This recursion is harmless because
- * we only have a maximum of 5 layers.
- */
- num_entries = amdgpu_vm_num_entries(adev, level);
- for (pt_idx = 0; pt_idx < num_entries; ++pt_idx) {
- struct amdgpu_vm_pt *entry = &parent->entries[pt_idx];
-
- if (!entry->base.bo)
- continue;
-
- if (!entry->base.moved)
- list_move(&entry->base.vm_status, &vm->relocated);
- amdgpu_vm_invalidate_level(adev, vm, entry, level + 1);
- }
+ for_each_amdgpu_vm_pt_dfs_safe(adev, vm, cursor, entry)
+ if (entry->base.bo && !entry->base.moved)
+ amdgpu_vm_bo_relocated(&entry->base);
}
/*
@@ -930,7 +1415,9 @@ static void amdgpu_vm_invalidate_level(struct amdgpu_device *adev,
* @vm: requested vm
*
* Makes sure all directories are up to date.
- * Returns 0 for success, error for failure.
+ *
+ * Returns:
+ * 0 for success, error for failure.
*/
int amdgpu_vm_update_directories(struct amdgpu_device *adev,
struct amdgpu_vm *vm)
@@ -948,14 +1435,6 @@ restart:
params.adev = adev;
if (vm->use_cpu_for_update) {
- struct amdgpu_vm_bo_base *bo_base;
-
- list_for_each_entry(bo_base, &vm->relocated, vm_status) {
- r = amdgpu_bo_kmap(bo_base->bo, NULL);
- if (unlikely(r))
- return r;
- }
-
r = amdgpu_vm_wait_pd(adev, vm, AMDGPU_FENCE_OWNER_VM);
if (unlikely(r))
return r;
@@ -972,25 +1451,16 @@ restart:
}
while (!list_empty(&vm->relocated)) {
- struct amdgpu_vm_bo_base *bo_base, *parent;
struct amdgpu_vm_pt *pt, *entry;
- struct amdgpu_bo *bo;
- bo_base = list_first_entry(&vm->relocated,
- struct amdgpu_vm_bo_base,
- vm_status);
- bo_base->moved = false;
- list_move(&bo_base->vm_status, &vm->idle);
+ entry = list_first_entry(&vm->relocated, struct amdgpu_vm_pt,
+ base.vm_status);
+ amdgpu_vm_bo_idle(&entry->base);
- bo = bo_base->bo->parent;
- if (!bo)
+ pt = amdgpu_vm_pt_parent(entry);
+ if (!pt)
continue;
- parent = list_first_entry(&bo->va, struct amdgpu_vm_bo_base,
- bo_list);
- pt = container_of(parent, struct amdgpu_vm_pt, base);
- entry = container_of(bo_base, struct amdgpu_vm_pt, base);
-
amdgpu_vm_update_pde(&params, vm, pt, entry);
if (!vm->use_cpu_for_update &&
@@ -1009,15 +1479,15 @@ restart:
struct amdgpu_ring *ring;
struct dma_fence *fence;
- ring = container_of(vm->entity.sched, struct amdgpu_ring,
+ ring = container_of(vm->entity.rq->sched, struct amdgpu_ring,
sched);
amdgpu_ring_pad_ib(ring, params.ib);
amdgpu_sync_resv(adev, &job->sync, root->tbo.resv,
AMDGPU_FENCE_OWNER_VM, false);
WARN_ON(params.ib->length_dw > ndw);
- r = amdgpu_job_submit(job, ring, &vm->entity,
- AMDGPU_FENCE_OWNER_VM, &fence);
+ r = amdgpu_job_submit(job, &vm->entity, AMDGPU_FENCE_OWNER_VM,
+ &fence);
if (r)
goto error;
@@ -1032,205 +1502,206 @@ restart:
return 0;
error:
- amdgpu_vm_invalidate_level(adev, vm, &vm->root,
- adev->vm_manager.root_level);
+ amdgpu_vm_invalidate_pds(adev, vm);
amdgpu_job_free(job);
return r;
}
/**
- * amdgpu_vm_find_entry - find the entry for an address
- *
- * @p: see amdgpu_pte_update_params definition
- * @addr: virtual address in question
- * @entry: resulting entry or NULL
- * @parent: parent entry
+ * amdgpu_vm_update_huge - figure out parameters for PTE updates
*
- * Find the vm_pt entry and it's parent for the given address.
+ * Make sure to set the right flags for the PTEs at the desired level.
*/
-void amdgpu_vm_get_entry(struct amdgpu_pte_update_params *p, uint64_t addr,
- struct amdgpu_vm_pt **entry,
- struct amdgpu_vm_pt **parent)
-{
- unsigned level = p->adev->vm_manager.root_level;
-
- *parent = NULL;
- *entry = &p->vm->root;
- while ((*entry)->entries) {
- unsigned shift = amdgpu_vm_level_shift(p->adev, level++);
+static void amdgpu_vm_update_huge(struct amdgpu_pte_update_params *params,
+ struct amdgpu_bo *bo, unsigned level,
+ uint64_t pe, uint64_t addr,
+ unsigned count, uint32_t incr,
+ uint64_t flags)
- *parent = *entry;
- *entry = &(*entry)->entries[addr >> shift];
- addr &= (1ULL << shift) - 1;
+{
+ if (level != AMDGPU_VM_PTB) {
+ flags |= AMDGPU_PDE_PTE;
+ amdgpu_gmc_get_vm_pde(params->adev, level, &addr, &flags);
}
- if (level != AMDGPU_VM_PTB)
- *entry = NULL;
+ amdgpu_vm_update_func(params, bo, pe, addr, count, incr, flags);
}
/**
- * amdgpu_vm_handle_huge_pages - handle updating the PD with huge pages
+ * amdgpu_vm_fragment - get fragment for PTEs
*
- * @p: see amdgpu_pte_update_params definition
- * @entry: vm_pt entry to check
- * @parent: parent entry
- * @nptes: number of PTEs updated with this operation
- * @dst: destination address where the PTEs should point to
- * @flags: access flags fro the PTEs
+ * @params: see amdgpu_pte_update_params definition
+ * @start: first PTE to handle
+ * @end: last PTE to handle
+ * @flags: hw mapping flags
+ * @frag: resulting fragment size
+ * @frag_end: end of this fragment
*
- * Check if we can update the PD with a huge page.
+ * Returns the first possible fragment for the start and end address.
*/
-static void amdgpu_vm_handle_huge_pages(struct amdgpu_pte_update_params *p,
- struct amdgpu_vm_pt *entry,
- struct amdgpu_vm_pt *parent,
- unsigned nptes, uint64_t dst,
- uint64_t flags)
+static void amdgpu_vm_fragment(struct amdgpu_pte_update_params *params,
+ uint64_t start, uint64_t end, uint64_t flags,
+ unsigned int *frag, uint64_t *frag_end)
{
- uint64_t pde;
+ /**
+ * The MC L1 TLB supports variable sized pages, based on a fragment
+ * field in the PTE. When this field is set to a non-zero value, page
+ * granularity is increased from 4KB to (1 << (12 + frag)). The PTE
+ * flags are considered valid for all PTEs within the fragment range
+ * and corresponding mappings are assumed to be physically contiguous.
+ *
+ * The L1 TLB can store a single PTE for the whole fragment,
+ * significantly increasing the space available for translation
+ * caching. This leads to large improvements in throughput when the
+ * TLB is under pressure.
+ *
+ * The L2 TLB distributes small and large fragments into two
+ * asymmetric partitions. The large fragment cache is significantly
+ * larger. Thus, we try to use large fragments wherever possible.
+ * Userspace can support this by aligning virtual base address and
+ * allocation size to the fragment size.
+ *
+ * Starting with Vega10 the fragment size only controls the L1. The L2
+ * is now directly feed with small/huge/giant pages from the walker.
+ */
+ unsigned max_frag;
- /* In the case of a mixed PT the PDE must point to it*/
- if (p->adev->asic_type >= CHIP_VEGA10 && !p->src &&
- nptes == AMDGPU_VM_PTE_COUNT(p->adev)) {
- /* Set the huge page flag to stop scanning at this PDE */
- flags |= AMDGPU_PDE_PTE;
- }
+ if (params->adev->asic_type < CHIP_VEGA10)
+ max_frag = params->adev->vm_manager.fragment_size;
+ else
+ max_frag = 31;
- if (!(flags & AMDGPU_PDE_PTE)) {
- if (entry->huge) {
- /* Add the entry to the relocated list to update it. */
- entry->huge = false;
- list_move(&entry->base.vm_status, &p->vm->relocated);
- }
+ /* system pages are non continuously */
+ if (params->src) {
+ *frag = 0;
+ *frag_end = end;
return;
}
- entry->huge = true;
- amdgpu_gmc_get_vm_pde(p->adev, AMDGPU_VM_PDB0, &dst, &flags);
-
- pde = (entry - parent->entries) * 8;
- if (parent->base.bo->shadow)
- p->func(p, parent->base.bo->shadow, pde, dst, 1, 0, flags);
- p->func(p, parent->base.bo, pde, dst, 1, 0, flags);
+ /* This intentionally wraps around if no bit is set */
+ *frag = min((unsigned)ffs(start) - 1, (unsigned)fls64(end - start) - 1);
+ if (*frag >= max_frag) {
+ *frag = max_frag;
+ *frag_end = end & ~((1ULL << max_frag) - 1);
+ } else {
+ *frag_end = start + (1 << *frag);
+ }
}
/**
* amdgpu_vm_update_ptes - make sure that page tables are valid
*
* @params: see amdgpu_pte_update_params definition
- * @vm: requested vm
* @start: start of GPU address range
* @end: end of GPU address range
* @dst: destination address to map to, the next dst inside the function
* @flags: mapping flags
*
* Update the page tables in the range @start - @end.
- * Returns 0 for success, -EINVAL for failure.
+ *
+ * Returns:
+ * 0 for success, -EINVAL for failure.
*/
static int amdgpu_vm_update_ptes(struct amdgpu_pte_update_params *params,
- uint64_t start, uint64_t end,
- uint64_t dst, uint64_t flags)
+ uint64_t start, uint64_t end,
+ uint64_t dst, uint64_t flags)
{
struct amdgpu_device *adev = params->adev;
- const uint64_t mask = AMDGPU_VM_PTE_COUNT(adev) - 1;
+ struct amdgpu_vm_pt_cursor cursor;
+ uint64_t frag_start = start, frag_end;
+ unsigned int frag;
- uint64_t addr, pe_start;
- struct amdgpu_bo *pt;
- unsigned nptes;
+ /* figure out the initial fragment */
+ amdgpu_vm_fragment(params, frag_start, end, flags, &frag, &frag_end);
- /* walk over the address space and update the page tables */
- for (addr = start; addr < end; addr += nptes,
- dst += nptes * AMDGPU_GPU_PAGE_SIZE) {
- struct amdgpu_vm_pt *entry, *parent;
+ /* walk over the address space and update the PTs */
+ amdgpu_vm_pt_start(adev, params->vm, start, &cursor);
+ while (cursor.pfn < end) {
+ struct amdgpu_bo *pt = cursor.entry->base.bo;
+ unsigned shift, parent_shift, mask;
+ uint64_t incr, entry_end, pe_start;
- amdgpu_vm_get_entry(params, addr, &entry, &parent);
- if (!entry)
+ if (!pt)
return -ENOENT;
- if ((addr & ~mask) == (end & ~mask))
- nptes = end - addr;
- else
- nptes = AMDGPU_VM_PTE_COUNT(adev) - (addr & mask);
-
- amdgpu_vm_handle_huge_pages(params, entry, parent,
- nptes, dst, flags);
- /* We don't need to update PTEs for huge pages */
- if (entry->huge)
+ /* The root level can't be a huge page */
+ if (cursor.level == adev->vm_manager.root_level) {
+ if (!amdgpu_vm_pt_descendant(adev, &cursor))
+ return -ENOENT;
continue;
+ }
- pt = entry->base.bo;
- pe_start = (addr & mask) * 8;
- if (pt->shadow)
- params->func(params, pt->shadow, pe_start, dst, nptes,
- AMDGPU_GPU_PAGE_SIZE, flags);
- params->func(params, pt, pe_start, dst, nptes,
- AMDGPU_GPU_PAGE_SIZE, flags);
- }
-
- return 0;
-}
+ /* First check if the entry is already handled */
+ if (cursor.pfn < frag_start) {
+ cursor.entry->huge = true;
+ amdgpu_vm_pt_next(adev, &cursor);
+ continue;
+ }
-/*
- * amdgpu_vm_frag_ptes - add fragment information to PTEs
- *
- * @params: see amdgpu_pte_update_params definition
- * @vm: requested vm
- * @start: first PTE to handle
- * @end: last PTE to handle
- * @dst: addr those PTEs should point to
- * @flags: hw mapping flags
- * Returns 0 for success, -EINVAL for failure.
- */
-static int amdgpu_vm_frag_ptes(struct amdgpu_pte_update_params *params,
- uint64_t start, uint64_t end,
- uint64_t dst, uint64_t flags)
-{
- /**
- * The MC L1 TLB supports variable sized pages, based on a fragment
- * field in the PTE. When this field is set to a non-zero value, page
- * granularity is increased from 4KB to (1 << (12 + frag)). The PTE
- * flags are considered valid for all PTEs within the fragment range
- * and corresponding mappings are assumed to be physically contiguous.
- *
- * The L1 TLB can store a single PTE for the whole fragment,
- * significantly increasing the space available for translation
- * caching. This leads to large improvements in throughput when the
- * TLB is under pressure.
- *
- * The L2 TLB distributes small and large fragments into two
- * asymmetric partitions. The large fragment cache is significantly
- * larger. Thus, we try to use large fragments wherever possible.
- * Userspace can support this by aligning virtual base address and
- * allocation size to the fragment size.
- */
- unsigned max_frag = params->adev->vm_manager.fragment_size;
- int r;
+ /* If it isn't already handled it can't be a huge page */
+ if (cursor.entry->huge) {
+ /* Add the entry to the relocated list to update it. */
+ cursor.entry->huge = false;
+ amdgpu_vm_bo_relocated(&cursor.entry->base);
+ }
- /* system pages are non continuously */
- if (params->src || !(flags & AMDGPU_PTE_VALID))
- return amdgpu_vm_update_ptes(params, start, end, dst, flags);
-
- while (start != end) {
- uint64_t frag_flags, frag_end;
- unsigned frag;
-
- /* This intentionally wraps around if no bit is set */
- frag = min((unsigned)ffs(start) - 1,
- (unsigned)fls64(end - start) - 1);
- if (frag >= max_frag) {
- frag_flags = AMDGPU_PTE_FRAG(max_frag);
- frag_end = end & ~((1ULL << max_frag) - 1);
- } else {
- frag_flags = AMDGPU_PTE_FRAG(frag);
- frag_end = start + (1 << frag);
+ shift = amdgpu_vm_level_shift(adev, cursor.level);
+ parent_shift = amdgpu_vm_level_shift(adev, cursor.level - 1);
+ if (adev->asic_type < CHIP_VEGA10) {
+ /* No huge page support before GMC v9 */
+ if (cursor.level != AMDGPU_VM_PTB) {
+ if (!amdgpu_vm_pt_descendant(adev, &cursor))
+ return -ENOENT;
+ continue;
+ }
+ } else if (frag < shift) {
+ /* We can't use this level when the fragment size is
+ * smaller than the address shift. Go to the next
+ * child entry and try again.
+ */
+ if (!amdgpu_vm_pt_descendant(adev, &cursor))
+ return -ENOENT;
+ continue;
+ } else if (frag >= parent_shift) {
+ /* If the fragment size is even larger than the parent
+ * shift we should go up one level and check it again.
+ */
+ if (!amdgpu_vm_pt_ancestor(&cursor))
+ return -ENOENT;
+ continue;
}
- r = amdgpu_vm_update_ptes(params, start, frag_end, dst,
- flags | frag_flags);
- if (r)
- return r;
+ /* Looks good so far, calculate parameters for the update */
+ incr = AMDGPU_GPU_PAGE_SIZE << shift;
+ mask = amdgpu_vm_entries_mask(adev, cursor.level);
+ pe_start = ((cursor.pfn >> shift) & mask) * 8;
+ entry_end = (mask + 1) << shift;
+ entry_end += cursor.pfn & ~(entry_end - 1);
+ entry_end = min(entry_end, end);
+
+ do {
+ uint64_t upd_end = min(entry_end, frag_end);
+ unsigned nptes = (upd_end - frag_start) >> shift;
+
+ amdgpu_vm_update_huge(params, pt, cursor.level,
+ pe_start, dst, nptes, incr,
+ flags | AMDGPU_PTE_FRAG(frag));
+
+ pe_start += nptes * 8;
+ dst += nptes * AMDGPU_GPU_PAGE_SIZE << shift;
+
+ frag_start = upd_end;
+ if (frag_start >= frag_end) {
+ /* figure out the next fragment */
+ amdgpu_vm_fragment(params, frag_start, end,
+ flags, &frag, &frag_end);
+ if (frag < shift)
+ break;
+ }
+ } while (frag_start < entry_end);
- dst += (frag_end - start) * AMDGPU_GPU_PAGE_SIZE;
- start = frag_end;
+ if (frag >= shift)
+ amdgpu_vm_pt_next(adev, &cursor);
}
return 0;
@@ -1250,7 +1721,9 @@ static int amdgpu_vm_frag_ptes(struct amdgpu_pte_update_params *params,
* @fence: optional resulting fence
*
* Fill in the page table entries between @start and @last.
- * Returns 0 for success, -EINVAL for failure.
+ *
+ * Returns:
+ * 0 for success, -EINVAL for failure.
*/
static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev,
struct dma_fence *exclusive,
@@ -1290,11 +1763,11 @@ static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev,
params.func = amdgpu_vm_cpu_set_ptes;
params.pages_addr = pages_addr;
- return amdgpu_vm_frag_ptes(&params, start, last + 1,
- addr, flags);
+ return amdgpu_vm_update_ptes(&params, start, last + 1,
+ addr, flags);
}
- ring = container_of(vm->entity.sched, struct amdgpu_ring, sched);
+ ring = container_of(vm->entity.rq->sched, struct amdgpu_ring, sched);
nptes = last - start + 1;
@@ -1326,7 +1799,10 @@ static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev,
ndw += ncmds * 10;
/* extra commands for begin/end fragments */
- ndw += 2 * 10 * adev->vm_manager.fragment_size;
+ if (vm->root.base.bo->shadow)
+ ndw += 2 * 10 * adev->vm_manager.fragment_size * 2;
+ else
+ ndw += 2 * 10 * adev->vm_manager.fragment_size;
params.func = amdgpu_vm_do_set_ptes;
}
@@ -1367,14 +1843,13 @@ static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev,
if (r)
goto error_free;
- r = amdgpu_vm_frag_ptes(&params, start, last + 1, addr, flags);
+ r = amdgpu_vm_update_ptes(&params, start, last + 1, addr, flags);
if (r)
goto error_free;
amdgpu_ring_pad_ib(ring, params.ib);
WARN_ON(params.ib->length_dw > ndw);
- r = amdgpu_job_submit(job, ring, &vm->entity,
- AMDGPU_FENCE_OWNER_VM, &f);
+ r = amdgpu_job_submit(job, &vm->entity, AMDGPU_FENCE_OWNER_VM, &f);
if (r)
goto error_free;
@@ -1402,7 +1877,9 @@ error_free:
*
* Split the mapping into smaller chunks so that each update fits
* into a SDMA IB.
- * Returns 0 for success, -EINVAL for failure.
+ *
+ * Returns:
+ * 0 for success, -EINVAL for failure.
*/
static int amdgpu_vm_bo_split_mapping(struct amdgpu_device *adev,
struct dma_fence *exclusive,
@@ -1455,7 +1932,7 @@ static int amdgpu_vm_bo_split_mapping(struct amdgpu_device *adev,
if (nodes) {
addr = nodes->start << PAGE_SHIFT;
max_entries = (nodes->size - pfn) *
- (PAGE_SIZE / AMDGPU_GPU_PAGE_SIZE);
+ AMDGPU_GPU_PAGES_IN_CPU_PAGE;
} else {
addr = 0;
max_entries = S64_MAX;
@@ -1466,7 +1943,7 @@ static int amdgpu_vm_bo_split_mapping(struct amdgpu_device *adev,
max_entries = min(max_entries, 16ull * 1024ull);
for (count = 1;
- count < max_entries / (PAGE_SIZE / AMDGPU_GPU_PAGE_SIZE);
+ count < max_entries / AMDGPU_GPU_PAGES_IN_CPU_PAGE;
++count) {
uint64_t idx = pfn + count;
@@ -1480,7 +1957,7 @@ static int amdgpu_vm_bo_split_mapping(struct amdgpu_device *adev,
dma_addr = pages_addr;
} else {
addr = pages_addr[pfn];
- max_entries = count * (PAGE_SIZE / AMDGPU_GPU_PAGE_SIZE);
+ max_entries = count * AMDGPU_GPU_PAGES_IN_CPU_PAGE;
}
} else if (flags & AMDGPU_PTE_VALID) {
@@ -1495,7 +1972,7 @@ static int amdgpu_vm_bo_split_mapping(struct amdgpu_device *adev,
if (r)
return r;
- pfn += (last - start + 1) / (PAGE_SIZE / AMDGPU_GPU_PAGE_SIZE);
+ pfn += (last - start + 1) / AMDGPU_GPU_PAGES_IN_CPU_PAGE;
if (nodes && nodes->size == pfn) {
pfn = 0;
++nodes;
@@ -1515,7 +1992,9 @@ static int amdgpu_vm_bo_split_mapping(struct amdgpu_device *adev,
* @clear: if true clear the entries
*
* Fill in the page table entries for @bo_va.
- * Returns 0 for success, -EINVAL for failure.
+ *
+ * Returns:
+ * 0 for success, -EINVAL for failure.
*/
int amdgpu_vm_bo_update(struct amdgpu_device *adev,
struct amdgpu_bo_va *bo_va,
@@ -1531,18 +2010,17 @@ int amdgpu_vm_bo_update(struct amdgpu_device *adev,
uint64_t flags;
int r;
- if (clear || !bo_va->base.bo) {
+ if (clear || !bo) {
mem = NULL;
nodes = NULL;
exclusive = NULL;
} else {
struct ttm_dma_tt *ttm;
- mem = &bo_va->base.bo->tbo.mem;
+ mem = &bo->tbo.mem;
nodes = mem->mm_node;
if (mem->mem_type == TTM_PL_TT) {
- ttm = container_of(bo_va->base.bo->tbo.ttm,
- struct ttm_dma_tt, ttm);
+ ttm = container_of(bo->tbo.ttm, struct ttm_dma_tt, ttm);
pages_addr = ttm->dma_address;
}
exclusive = reservation_object_get_excl(bo->tbo.resv);
@@ -1580,10 +2058,6 @@ int amdgpu_vm_bo_update(struct amdgpu_device *adev,
amdgpu_asic_flush_hdp(adev, NULL);
}
- spin_lock(&vm->moved_lock);
- list_del_init(&bo_va->base.vm_status);
- spin_unlock(&vm->moved_lock);
-
/* If the BO is not in its preferred location add it back to
* the evicted list so that it gets validated again on the
* next command submission.
@@ -1592,9 +2066,11 @@ int amdgpu_vm_bo_update(struct amdgpu_device *adev,
uint32_t mem_type = bo->tbo.mem.mem_type;
if (!(bo->preferred_domains & amdgpu_mem_type_to_domain(mem_type)))
- list_add_tail(&bo_va->base.vm_status, &vm->evicted);
+ amdgpu_vm_bo_evicted(&bo_va->base);
else
- list_add(&bo_va->base.vm_status, &vm->idle);
+ amdgpu_vm_bo_idle(&bo_va->base);
+ } else {
+ amdgpu_vm_bo_done(&bo_va->base);
}
list_splice_init(&bo_va->invalids, &bo_va->valids);
@@ -1610,6 +2086,8 @@ int amdgpu_vm_bo_update(struct amdgpu_device *adev,
/**
* amdgpu_vm_update_prt_state - update the global PRT state
+ *
+ * @adev: amdgpu_device pointer
*/
static void amdgpu_vm_update_prt_state(struct amdgpu_device *adev)
{
@@ -1624,6 +2102,8 @@ static void amdgpu_vm_update_prt_state(struct amdgpu_device *adev)
/**
* amdgpu_vm_prt_get - add a PRT user
+ *
+ * @adev: amdgpu_device pointer
*/
static void amdgpu_vm_prt_get(struct amdgpu_device *adev)
{
@@ -1636,6 +2116,8 @@ static void amdgpu_vm_prt_get(struct amdgpu_device *adev)
/**
* amdgpu_vm_prt_put - drop a PRT user
+ *
+ * @adev: amdgpu_device pointer
*/
static void amdgpu_vm_prt_put(struct amdgpu_device *adev)
{
@@ -1645,6 +2127,9 @@ static void amdgpu_vm_prt_put(struct amdgpu_device *adev)
/**
* amdgpu_vm_prt_cb - callback for updating the PRT status
+ *
+ * @fence: fence for the callback
+ * @_cb: the callback function
*/
static void amdgpu_vm_prt_cb(struct dma_fence *fence, struct dma_fence_cb *_cb)
{
@@ -1656,6 +2141,9 @@ static void amdgpu_vm_prt_cb(struct dma_fence *fence, struct dma_fence_cb *_cb)
/**
* amdgpu_vm_add_prt_cb - add callback for updating the PRT status
+ *
+ * @adev: amdgpu_device pointer
+ * @fence: fence for the callback
*/
static void amdgpu_vm_add_prt_cb(struct amdgpu_device *adev,
struct dma_fence *fence)
@@ -1747,9 +2235,11 @@ static void amdgpu_vm_prt_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm)
* or if an error occurred)
*
* Make sure all freed BOs are cleared in the PT.
- * Returns 0 for success.
- *
* PTs have to be reserved and mutex must be locked!
+ *
+ * Returns:
+ * 0 for success.
+ *
*/
int amdgpu_vm_clear_freed(struct amdgpu_device *adev,
struct amdgpu_vm *vm,
@@ -1765,7 +2255,8 @@ int amdgpu_vm_clear_freed(struct amdgpu_device *adev,
struct amdgpu_bo_va_mapping, list);
list_del(&mapping->list);
- if (vm->pte_support_ats && mapping->start < AMDGPU_VA_HOLE_START)
+ if (vm->pte_support_ats &&
+ mapping->start < AMDGPU_GMC_HOLE_START)
init_pte_value = AMDGPU_PTE_DEFAULT_ATC;
r = amdgpu_vm_bo_update_mapping(adev, NULL, NULL, vm,
@@ -1794,10 +2285,11 @@ int amdgpu_vm_clear_freed(struct amdgpu_device *adev,
*
* @adev: amdgpu_device pointer
* @vm: requested vm
- * @sync: sync object to add fences to
*
* Make sure all BOs which are moved are updated in the PTs.
- * Returns 0 for success.
+ *
+ * Returns:
+ * 0 for success.
*
* PTs have to be reserved!
*/
@@ -1805,40 +2297,40 @@ int amdgpu_vm_handle_moved(struct amdgpu_device *adev,
struct amdgpu_vm *vm)
{
struct amdgpu_bo_va *bo_va, *tmp;
- struct list_head moved;
+ struct reservation_object *resv;
bool clear;
int r;
- INIT_LIST_HEAD(&moved);
- spin_lock(&vm->moved_lock);
- list_splice_init(&vm->moved, &moved);
- spin_unlock(&vm->moved_lock);
+ list_for_each_entry_safe(bo_va, tmp, &vm->moved, base.vm_status) {
+ /* Per VM BOs never need to bo cleared in the page tables */
+ r = amdgpu_vm_bo_update(adev, bo_va, false);
+ if (r)
+ return r;
+ }
- list_for_each_entry_safe(bo_va, tmp, &moved, base.vm_status) {
- struct reservation_object *resv = bo_va->base.bo->tbo.resv;
+ spin_lock(&vm->invalidated_lock);
+ while (!list_empty(&vm->invalidated)) {
+ bo_va = list_first_entry(&vm->invalidated, struct amdgpu_bo_va,
+ base.vm_status);
+ resv = bo_va->base.bo->tbo.resv;
+ spin_unlock(&vm->invalidated_lock);
- /* Per VM BOs never need to bo cleared in the page tables */
- if (resv == vm->root.base.bo->tbo.resv)
- clear = false;
/* Try to reserve the BO to avoid clearing its ptes */
- else if (!amdgpu_vm_debug && reservation_object_trylock(resv))
+ if (!amdgpu_vm_debug && reservation_object_trylock(resv))
clear = false;
/* Somebody else is using the BO right now */
else
clear = true;
r = amdgpu_vm_bo_update(adev, bo_va, clear);
- if (r) {
- spin_lock(&vm->moved_lock);
- list_splice(&moved, &vm->moved);
- spin_unlock(&vm->moved_lock);
+ if (r)
return r;
- }
- if (!clear && resv != vm->root.base.bo->tbo.resv)
+ if (!clear)
reservation_object_unlock(resv);
-
+ spin_lock(&vm->invalidated_lock);
}
+ spin_unlock(&vm->invalidated_lock);
return 0;
}
@@ -1852,7 +2344,9 @@ int amdgpu_vm_handle_moved(struct amdgpu_device *adev,
*
* Add @bo into the requested vm.
* Add @bo to the list of bos associated with the vm
- * Returns newly added bo_va or NULL for failure
+ *
+ * Returns:
+ * Newly added bo_va or NULL for failure
*
* Object has to be reserved!
*/
@@ -1901,9 +2395,7 @@ static void amdgpu_vm_bo_insert_map(struct amdgpu_device *adev,
if (bo && bo->tbo.resv == vm->root.base.bo->tbo.resv &&
!bo_va->base.moved) {
- spin_lock(&vm->moved_lock);
list_move(&bo_va->base.vm_status, &vm->moved);
- spin_unlock(&vm->moved_lock);
}
trace_amdgpu_vm_bo_map(bo_va, mapping);
}
@@ -1915,10 +2407,13 @@ static void amdgpu_vm_bo_insert_map(struct amdgpu_device *adev,
* @bo_va: bo_va to store the address
* @saddr: where to map the BO
* @offset: requested offset in the BO
+ * @size: BO size in bytes
* @flags: attributes of pages (read/write/valid/etc.)
*
* Add a mapping of the BO at the specefied addr into the VM.
- * Returns 0 for success, error for failure.
+ *
+ * Returns:
+ * 0 for success, error for failure.
*
* Object has to be reserved and unreserved outside!
*/
@@ -1976,11 +2471,14 @@ int amdgpu_vm_bo_map(struct amdgpu_device *adev,
* @bo_va: bo_va to store the address
* @saddr: where to map the BO
* @offset: requested offset in the BO
+ * @size: BO size in bytes
* @flags: attributes of pages (read/write/valid/etc.)
*
* Add a mapping of the BO at the specefied addr into the VM. Replace existing
* mappings as we do so.
- * Returns 0 for success, error for failure.
+ *
+ * Returns:
+ * 0 for success, error for failure.
*
* Object has to be reserved and unreserved outside!
*/
@@ -2037,7 +2535,9 @@ int amdgpu_vm_bo_replace_map(struct amdgpu_device *adev,
* @saddr: where to the BO is mapped
*
* Remove a mapping of the BO at the specefied addr from the VM.
- * Returns 0 for success, error for failure.
+ *
+ * Returns:
+ * 0 for success, error for failure.
*
* Object has to be reserved and unreserved outside!
*/
@@ -2091,7 +2591,9 @@ int amdgpu_vm_bo_unmap(struct amdgpu_device *adev,
* @size: size of the range
*
* Remove all mappings in a range, split them as appropriate.
- * Returns 0 for success, error for failure.
+ *
+ * Returns:
+ * 0 for success, error for failure.
*/
int amdgpu_vm_bo_clear_mappings(struct amdgpu_device *adev,
struct amdgpu_vm *vm,
@@ -2188,8 +2690,13 @@ int amdgpu_vm_bo_clear_mappings(struct amdgpu_device *adev,
* amdgpu_vm_bo_lookup_mapping - find mapping by address
*
* @vm: the requested VM
+ * @addr: the address
*
* Find a mapping by it's address.
+ *
+ * Returns:
+ * The amdgpu_bo_va_mapping matching for addr or NULL
+ *
*/
struct amdgpu_bo_va_mapping *amdgpu_vm_bo_lookup_mapping(struct amdgpu_vm *vm,
uint64_t addr)
@@ -2198,6 +2705,35 @@ struct amdgpu_bo_va_mapping *amdgpu_vm_bo_lookup_mapping(struct amdgpu_vm *vm,
}
/**
+ * amdgpu_vm_bo_trace_cs - trace all reserved mappings
+ *
+ * @vm: the requested vm
+ * @ticket: CS ticket
+ *
+ * Trace all mappings of BOs reserved during a command submission.
+ */
+void amdgpu_vm_bo_trace_cs(struct amdgpu_vm *vm, struct ww_acquire_ctx *ticket)
+{
+ struct amdgpu_bo_va_mapping *mapping;
+
+ if (!trace_amdgpu_vm_bo_cs_enabled())
+ return;
+
+ for (mapping = amdgpu_vm_it_iter_first(&vm->va, 0, U64_MAX); mapping;
+ mapping = amdgpu_vm_it_iter_next(mapping, 0, U64_MAX)) {
+ if (mapping->bo_va && mapping->bo_va->base.bo) {
+ struct amdgpu_bo *bo;
+
+ bo = mapping->bo_va->base.bo;
+ if (READ_ONCE(bo->tbo.resv->lock.ctx) != ticket)
+ continue;
+ }
+
+ trace_amdgpu_vm_bo_cs(mapping);
+ }
+}
+
+/**
* amdgpu_vm_bo_rmv - remove a bo to a specific vm
*
* @adev: amdgpu_device pointer
@@ -2211,13 +2747,27 @@ void amdgpu_vm_bo_rmv(struct amdgpu_device *adev,
struct amdgpu_bo_va *bo_va)
{
struct amdgpu_bo_va_mapping *mapping, *next;
+ struct amdgpu_bo *bo = bo_va->base.bo;
struct amdgpu_vm *vm = bo_va->base.vm;
+ struct amdgpu_vm_bo_base **base;
+
+ if (bo) {
+ if (bo->tbo.resv == vm->root.base.bo->tbo.resv)
+ vm->bulk_moveable = false;
+
+ for (base = &bo_va->base.bo->vm_bo; *base;
+ base = &(*base)->next) {
+ if (*base != &bo_va->base)
+ continue;
- list_del(&bo_va->base.bo_list);
+ *base = bo_va->base.next;
+ break;
+ }
+ }
- spin_lock(&vm->moved_lock);
+ spin_lock(&vm->invalidated_lock);
list_del(&bo_va->base.vm_status);
- spin_unlock(&vm->moved_lock);
+ spin_unlock(&vm->invalidated_lock);
list_for_each_entry_safe(mapping, next, &bo_va->valids, list) {
list_del(&mapping->list);
@@ -2241,8 +2791,8 @@ void amdgpu_vm_bo_rmv(struct amdgpu_device *adev,
* amdgpu_vm_bo_invalidate - mark the bo as invalid
*
* @adev: amdgpu_device pointer
- * @vm: requested vm
* @bo: amdgpu buffer object
+ * @evicted: is the BO evicted
*
* Mark @bo as invalid.
*/
@@ -2255,33 +2805,35 @@ void amdgpu_vm_bo_invalidate(struct amdgpu_device *adev,
if (bo->parent && bo->parent->shadow == bo)
bo = bo->parent;
- list_for_each_entry(bo_base, &bo->va, bo_list) {
+ for (bo_base = bo->vm_bo; bo_base; bo_base = bo_base->next) {
struct amdgpu_vm *vm = bo_base->vm;
- bool was_moved = bo_base->moved;
- bo_base->moved = true;
if (evicted && bo->tbo.resv == vm->root.base.bo->tbo.resv) {
- if (bo->tbo.type == ttm_bo_type_kernel)
- list_move(&bo_base->vm_status, &vm->evicted);
- else
- list_move_tail(&bo_base->vm_status,
- &vm->evicted);
+ amdgpu_vm_bo_evicted(bo_base);
continue;
}
- if (was_moved)
+ if (bo_base->moved)
continue;
+ bo_base->moved = true;
- if (bo->tbo.type == ttm_bo_type_kernel) {
- list_move(&bo_base->vm_status, &vm->relocated);
- } else {
- spin_lock(&bo_base->vm->moved_lock);
- list_move(&bo_base->vm_status, &vm->moved);
- spin_unlock(&bo_base->vm->moved_lock);
- }
+ if (bo->tbo.type == ttm_bo_type_kernel)
+ amdgpu_vm_bo_relocated(bo_base);
+ else if (bo->tbo.resv == vm->root.base.bo->tbo.resv)
+ amdgpu_vm_bo_moved(bo_base);
+ else
+ amdgpu_vm_bo_invalidated(bo_base);
}
}
+/**
+ * amdgpu_vm_get_block_size - calculate VM page table size as power of two
+ *
+ * @vm_size: VM size
+ *
+ * Returns:
+ * VM page table as power of two
+ */
static uint32_t amdgpu_vm_get_block_size(uint64_t vm_size)
{
/* Total bits covered by PD + PTs */
@@ -2299,24 +2851,52 @@ static uint32_t amdgpu_vm_get_block_size(uint64_t vm_size)
* amdgpu_vm_adjust_size - adjust vm size, block size and fragment size
*
* @adev: amdgpu_device pointer
- * @vm_size: the default vm size if it's set auto
+ * @min_vm_size: the minimum vm size in GB if it's set auto
+ * @fragment_size_default: Default PTE fragment size
+ * @max_level: max VMPT level
+ * @max_bits: max address space size in bits
+ *
*/
-void amdgpu_vm_adjust_size(struct amdgpu_device *adev, uint32_t vm_size,
+void amdgpu_vm_adjust_size(struct amdgpu_device *adev, uint32_t min_vm_size,
uint32_t fragment_size_default, unsigned max_level,
unsigned max_bits)
{
+ unsigned int max_size = 1 << (max_bits - 30);
+ unsigned int vm_size;
uint64_t tmp;
/* adjust vm size first */
if (amdgpu_vm_size != -1) {
- unsigned max_size = 1 << (max_bits - 30);
-
vm_size = amdgpu_vm_size;
if (vm_size > max_size) {
dev_warn(adev->dev, "VM size (%d) too large, max is %u GB\n",
amdgpu_vm_size, max_size);
vm_size = max_size;
}
+ } else {
+ struct sysinfo si;
+ unsigned int phys_ram_gb;
+
+ /* Optimal VM size depends on the amount of physical
+ * RAM available. Underlying requirements and
+ * assumptions:
+ *
+ * - Need to map system memory and VRAM from all GPUs
+ * - VRAM from other GPUs not known here
+ * - Assume VRAM <= system memory
+ * - On GFX8 and older, VM space can be segmented for
+ * different MTYPEs
+ * - Need to allow room for fragmentation, guard pages etc.
+ *
+ * This adds up to a rough guess of system memory x3.
+ * Round up to power of two to maximize the available
+ * VM size with the given page table size.
+ */
+ si_meminfo(&si);
+ phys_ram_gb = ((uint64_t)si.totalram * si.mem_unit +
+ (1 << 30) - 1) >> 30;
+ vm_size = roundup_pow_of_two(
+ min(max(phys_ram_gb * 3, min_vm_size), max_size));
}
adev->vm_manager.max_pfn = (uint64_t)vm_size << 18;
@@ -2361,27 +2941,40 @@ void amdgpu_vm_adjust_size(struct amdgpu_device *adev, uint32_t vm_size,
adev->vm_manager.fragment_size);
}
+static struct amdgpu_retryfault_hashtable *init_fault_hash(void)
+{
+ struct amdgpu_retryfault_hashtable *fault_hash;
+
+ fault_hash = kmalloc(sizeof(*fault_hash), GFP_KERNEL);
+ if (!fault_hash)
+ return fault_hash;
+
+ INIT_CHASH_TABLE(fault_hash->hash,
+ AMDGPU_PAGEFAULT_HASH_BITS, 8, 0);
+ spin_lock_init(&fault_hash->lock);
+ fault_hash->count = 0;
+
+ return fault_hash;
+}
+
/**
* amdgpu_vm_init - initialize a vm instance
*
* @adev: amdgpu_device pointer
* @vm: requested vm
* @vm_context: Indicates if it GFX or Compute context
+ * @pasid: Process address space identifier
*
* Init @vm fields.
+ *
+ * Returns:
+ * 0 for success, error for failure.
*/
int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm,
int vm_context, unsigned int pasid)
{
struct amdgpu_bo_param bp;
struct amdgpu_bo *root;
- const unsigned align = min(AMDGPU_VM_PTB_ALIGN_SIZE,
- AMDGPU_VM_PTE_COUNT(adev) * 8);
- unsigned ring_instance;
- struct amdgpu_ring *ring;
- struct drm_sched_rq *rq;
- unsigned long size;
- uint64_t flags;
int r, i;
vm->va = RB_ROOT_CACHED;
@@ -2389,19 +2982,15 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm,
vm->reserved_vmid[i] = NULL;
INIT_LIST_HEAD(&vm->evicted);
INIT_LIST_HEAD(&vm->relocated);
- spin_lock_init(&vm->moved_lock);
INIT_LIST_HEAD(&vm->moved);
INIT_LIST_HEAD(&vm->idle);
+ INIT_LIST_HEAD(&vm->invalidated);
+ spin_lock_init(&vm->invalidated_lock);
INIT_LIST_HEAD(&vm->freed);
/* create scheduler entity for page table updates */
-
- ring_instance = atomic_inc_return(&adev->vm_manager.vm_pte_next_ring);
- ring_instance %= adev->vm_manager.vm_pte_num_rings;
- ring = adev->vm_manager.vm_pte_rings[ring_instance];
- rq = &ring->sched.sched_rq[DRM_SCHED_PRIORITY_KERNEL];
- r = drm_sched_entity_init(&ring->sched, &vm->entity,
- rq, NULL);
+ r = drm_sched_entity_init(&vm->entity, adev->vm_manager.vm_pte_rqs,
+ adev->vm_manager.vm_pte_num_rqs, NULL);
if (r)
return r;
@@ -2419,24 +3008,13 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm,
}
DRM_DEBUG_DRIVER("VM update mode is %s\n",
vm->use_cpu_for_update ? "CPU" : "SDMA");
- WARN_ONCE((vm->use_cpu_for_update & !amdgpu_vm_is_large_bar(adev)),
+ WARN_ONCE((vm->use_cpu_for_update & !amdgpu_gmc_vram_full_visible(&adev->gmc)),
"CPU update of VM recommended only for large BAR system\n");
vm->last_update = NULL;
- flags = AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS;
- if (vm->use_cpu_for_update)
- flags |= AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED;
- else
- flags |= AMDGPU_GEM_CREATE_SHADOW;
-
- size = amdgpu_vm_bo_size(adev, adev->vm_manager.root_level);
- memset(&bp, 0, sizeof(bp));
- bp.size = size;
- bp.byte_align = align;
- bp.domain = AMDGPU_GEM_DOMAIN_VRAM;
- bp.flags = flags;
- bp.type = ttm_bo_type_kernel;
- bp.resv = NULL;
+ amdgpu_vm_bo_param(adev, vm, adev->vm_manager.root_level, &bp);
+ if (vm_context == AMDGPU_VM_CONTEXT_COMPUTE)
+ bp.flags &= ~AMDGPU_GEM_CREATE_SHADOW;
r = amdgpu_bo_create(adev, &bp, &root);
if (r)
goto error_free_sched_entity;
@@ -2467,6 +3045,12 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm,
vm->pasid = pasid;
}
+ vm->fault_hash = init_fault_hash();
+ if (!vm->fault_hash) {
+ r = -ENOMEM;
+ goto error_free_root;
+ }
+
INIT_KFIFO(vm->faults);
vm->fault_credit = 16;
@@ -2481,7 +3065,7 @@ error_free_root:
vm->root.base.bo = NULL;
error_free_sched_entity:
- drm_sched_entity_fini(&ring->sched, &vm->entity);
+ drm_sched_entity_destroy(&vm->entity);
return r;
}
@@ -2489,6 +3073,9 @@ error_free_sched_entity:
/**
* amdgpu_vm_make_compute - Turn a GFX VM into a compute VM
*
+ * @adev: amdgpu_device pointer
+ * @vm: requested vm
+ *
* This only works on GFX VMs that don't have any BOs added and no
* page tables allocated yet.
*
@@ -2498,12 +3085,12 @@ error_free_sched_entity:
* - pasid (old PASID is released, because compute manages its own PASIDs)
*
* Reinitializes the page directory to reflect the changed ATS
- * setting. May leave behind an unused shadow BO for the page
- * directory when switching from SDMA updates to CPU updates.
+ * setting.
*
- * Returns 0 for success, -errno for errors.
+ * Returns:
+ * 0 for success, -errno for errors.
*/
-int amdgpu_vm_make_compute(struct amdgpu_device *adev, struct amdgpu_vm *vm)
+int amdgpu_vm_make_compute(struct amdgpu_device *adev, struct amdgpu_vm *vm, unsigned int pasid)
{
bool pte_support_ats = (adev->asic_type == CHIP_RAVEN);
int r;
@@ -2515,7 +3102,20 @@ int amdgpu_vm_make_compute(struct amdgpu_device *adev, struct amdgpu_vm *vm)
/* Sanity checks */
if (!RB_EMPTY_ROOT(&vm->va.rb_root) || vm->root.entries) {
r = -EINVAL;
- goto error;
+ goto unreserve_bo;
+ }
+
+ if (pasid) {
+ unsigned long flags;
+
+ spin_lock_irqsave(&adev->vm_manager.pasid_lock, flags);
+ r = idr_alloc(&adev->vm_manager.pasid_idr, vm, pasid, pasid + 1,
+ GFP_ATOMIC);
+ spin_unlock_irqrestore(&adev->vm_manager.pasid_lock, flags);
+
+ if (r == -ENOSPC)
+ goto unreserve_bo;
+ r = 0;
}
/* Check if PD needs to be reinitialized and do it before
@@ -2526,7 +3126,7 @@ int amdgpu_vm_make_compute(struct amdgpu_device *adev, struct amdgpu_vm *vm)
adev->vm_manager.root_level,
pte_support_ats);
if (r)
- goto error;
+ goto free_idr;
}
/* Update VM state */
@@ -2535,7 +3135,7 @@ int amdgpu_vm_make_compute(struct amdgpu_device *adev, struct amdgpu_vm *vm)
vm->pte_support_ats = pte_support_ats;
DRM_DEBUG_DRIVER("VM update mode is %s\n",
vm->use_cpu_for_update ? "CPU" : "SDMA");
- WARN_ONCE((vm->use_cpu_for_update & !amdgpu_vm_is_large_bar(adev)),
+ WARN_ONCE((vm->use_cpu_for_update & !amdgpu_gmc_vram_full_visible(&adev->gmc)),
"CPU update of VM recommended only for large BAR system\n");
if (vm->pasid) {
@@ -2545,42 +3145,52 @@ int amdgpu_vm_make_compute(struct amdgpu_device *adev, struct amdgpu_vm *vm)
idr_remove(&adev->vm_manager.pasid_idr, vm->pasid);
spin_unlock_irqrestore(&adev->vm_manager.pasid_lock, flags);
+ /* Free the original amdgpu allocated pasid
+ * Will be replaced with kfd allocated pasid
+ */
+ amdgpu_pasid_free(vm->pasid);
vm->pasid = 0;
}
-error:
+ /* Free the shadow bo for compute VM */
+ amdgpu_bo_unref(&vm->root.base.bo->shadow);
+
+ if (pasid)
+ vm->pasid = pasid;
+
+ goto unreserve_bo;
+
+free_idr:
+ if (pasid) {
+ unsigned long flags;
+
+ spin_lock_irqsave(&adev->vm_manager.pasid_lock, flags);
+ idr_remove(&adev->vm_manager.pasid_idr, pasid);
+ spin_unlock_irqrestore(&adev->vm_manager.pasid_lock, flags);
+ }
+unreserve_bo:
amdgpu_bo_unreserve(vm->root.base.bo);
return r;
}
/**
- * amdgpu_vm_free_levels - free PD/PT levels
- *
- * @adev: amdgpu device structure
- * @parent: PD/PT starting level to free
- * @level: level of parent structure
+ * amdgpu_vm_release_compute - release a compute vm
+ * @adev: amdgpu_device pointer
+ * @vm: a vm turned into compute vm by calling amdgpu_vm_make_compute
*
- * Free the page directory or page table level and all sub levels.
+ * This is a correspondant of amdgpu_vm_make_compute. It decouples compute
+ * pasid from vm. Compute should stop use of vm after this call.
*/
-static void amdgpu_vm_free_levels(struct amdgpu_device *adev,
- struct amdgpu_vm_pt *parent,
- unsigned level)
+void amdgpu_vm_release_compute(struct amdgpu_device *adev, struct amdgpu_vm *vm)
{
- unsigned i, num_entries = amdgpu_vm_num_entries(adev, level);
+ if (vm->pasid) {
+ unsigned long flags;
- if (parent->base.bo) {
- list_del(&parent->base.bo_list);
- list_del(&parent->base.vm_status);
- amdgpu_bo_unref(&parent->base.bo->shadow);
- amdgpu_bo_unref(&parent->base.bo);
+ spin_lock_irqsave(&adev->vm_manager.pasid_lock, flags);
+ idr_remove(&adev->vm_manager.pasid_idr, vm->pasid);
+ spin_unlock_irqrestore(&adev->vm_manager.pasid_lock, flags);
}
-
- if (parent->entries)
- for (i = 0; i < num_entries; i++)
- amdgpu_vm_free_levels(adev, &parent->entries[i],
- level + 1);
-
- kvfree(parent->entries);
+ vm->pasid = 0;
}
/**
@@ -2604,7 +3214,7 @@ void amdgpu_vm_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm)
/* Clear pending page faults from IH when the VM is destroyed */
while (kfifo_get(&vm->faults, &fault))
- amdgpu_ih_clear_fault(adev, fault);
+ amdgpu_vm_clear_fault(vm->fault_hash, fault);
if (vm->pasid) {
unsigned long flags;
@@ -2614,7 +3224,10 @@ void amdgpu_vm_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm)
spin_unlock_irqrestore(&adev->vm_manager.pasid_lock, flags);
}
- drm_sched_entity_fini(vm->entity.sched, &vm->entity);
+ kfree(vm->fault_hash);
+ vm->fault_hash = NULL;
+
+ drm_sched_entity_destroy(&vm->entity);
if (!RB_EMPTY_ROOT(&vm->va.rb_root)) {
dev_err(adev->dev, "still active bo inside vm\n");
@@ -2640,8 +3253,7 @@ void amdgpu_vm_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm)
if (r) {
dev_err(adev->dev, "Leaking page tables because BO reservation failed\n");
} else {
- amdgpu_vm_free_levels(adev, &vm->root,
- adev->vm_manager.root_level);
+ amdgpu_vm_free_pts(adev, vm);
amdgpu_bo_unreserve(root);
}
amdgpu_bo_unref(&root);
@@ -2656,8 +3268,10 @@ void amdgpu_vm_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm)
* @adev: amdgpu_device pointer
* @pasid: PASID do identify the VM
*
- * This function is expected to be called in interrupt context. Returns
- * true if there was fault credit, false otherwise
+ * This function is expected to be called in interrupt context.
+ *
+ * Returns:
+ * True if there was fault credit, false otherwise
*/
bool amdgpu_vm_pasid_fault_credit(struct amdgpu_device *adev,
unsigned int pasid)
@@ -2702,7 +3316,6 @@ void amdgpu_vm_manager_init(struct amdgpu_device *adev)
for (i = 0; i < AMDGPU_MAX_RINGS; ++i)
adev->vm_manager.seqno[i] = 0;
- atomic_set(&adev->vm_manager.vm_pte_next_ring, 0);
spin_lock_init(&adev->vm_manager.prt_lock);
atomic_set(&adev->vm_manager.num_prt_users, 0);
@@ -2711,7 +3324,7 @@ void amdgpu_vm_manager_init(struct amdgpu_device *adev)
*/
#ifdef CONFIG_X86_64
if (amdgpu_vm_update_mode == -1) {
- if (amdgpu_vm_is_large_bar(adev))
+ if (amdgpu_gmc_vram_full_visible(&adev->gmc))
adev->vm_manager.vm_update_mode =
AMDGPU_VM_USE_CPU_FOR_COMPUTE;
else
@@ -2741,6 +3354,16 @@ void amdgpu_vm_manager_fini(struct amdgpu_device *adev)
amdgpu_vmid_mgr_fini(adev);
}
+/**
+ * amdgpu_vm_ioctl - Manages VMID reservation for vm hubs.
+ *
+ * @dev: drm device pointer
+ * @data: drm_amdgpu_vm
+ * @filp: drm file pointer
+ *
+ * Returns:
+ * 0 for success, -errno for errors.
+ */
int amdgpu_vm_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
{
union drm_amdgpu_vm *args = data;
@@ -2764,3 +3387,117 @@ int amdgpu_vm_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
return 0;
}
+
+/**
+ * amdgpu_vm_get_task_info - Extracts task info for a PASID.
+ *
+ * @adev: drm device pointer
+ * @pasid: PASID identifier for VM
+ * @task_info: task_info to fill.
+ */
+void amdgpu_vm_get_task_info(struct amdgpu_device *adev, unsigned int pasid,
+ struct amdgpu_task_info *task_info)
+{
+ struct amdgpu_vm *vm;
+
+ spin_lock(&adev->vm_manager.pasid_lock);
+
+ vm = idr_find(&adev->vm_manager.pasid_idr, pasid);
+ if (vm)
+ *task_info = vm->task_info;
+
+ spin_unlock(&adev->vm_manager.pasid_lock);
+}
+
+/**
+ * amdgpu_vm_set_task_info - Sets VMs task info.
+ *
+ * @vm: vm for which to set the info
+ */
+void amdgpu_vm_set_task_info(struct amdgpu_vm *vm)
+{
+ if (!vm->task_info.pid) {
+ vm->task_info.pid = current->pid;
+ get_task_comm(vm->task_info.task_name, current);
+
+ if (current->group_leader->mm == current->mm) {
+ vm->task_info.tgid = current->group_leader->pid;
+ get_task_comm(vm->task_info.process_name, current->group_leader);
+ }
+ }
+}
+
+/**
+ * amdgpu_vm_add_fault - Add a page fault record to fault hash table
+ *
+ * @fault_hash: fault hash table
+ * @key: 64-bit encoding of PASID and address
+ *
+ * This should be called when a retry page fault interrupt is
+ * received. If this is a new page fault, it will be added to a hash
+ * table. The return value indicates whether this is a new fault, or
+ * a fault that was already known and is already being handled.
+ *
+ * If there are too many pending page faults, this will fail. Retry
+ * interrupts should be ignored in this case until there is enough
+ * free space.
+ *
+ * Returns 0 if the fault was added, 1 if the fault was already known,
+ * -ENOSPC if there are too many pending faults.
+ */
+int amdgpu_vm_add_fault(struct amdgpu_retryfault_hashtable *fault_hash, u64 key)
+{
+ unsigned long flags;
+ int r = -ENOSPC;
+
+ if (WARN_ON_ONCE(!fault_hash))
+ /* Should be allocated in amdgpu_vm_init
+ */
+ return r;
+
+ spin_lock_irqsave(&fault_hash->lock, flags);
+
+ /* Only let the hash table fill up to 50% for best performance */
+ if (fault_hash->count >= (1 << (AMDGPU_PAGEFAULT_HASH_BITS-1)))
+ goto unlock_out;
+
+ r = chash_table_copy_in(&fault_hash->hash, key, NULL);
+ if (!r)
+ fault_hash->count++;
+
+ /* chash_table_copy_in should never fail unless we're losing count */
+ WARN_ON_ONCE(r < 0);
+
+unlock_out:
+ spin_unlock_irqrestore(&fault_hash->lock, flags);
+ return r;
+}
+
+/**
+ * amdgpu_vm_clear_fault - Remove a page fault record
+ *
+ * @fault_hash: fault hash table
+ * @key: 64-bit encoding of PASID and address
+ *
+ * This should be called when a page fault has been handled. Any
+ * future interrupt with this key will be processed as a new
+ * page fault.
+ */
+void amdgpu_vm_clear_fault(struct amdgpu_retryfault_hashtable *fault_hash, u64 key)
+{
+ unsigned long flags;
+ int r;
+
+ if (!fault_hash)
+ return;
+
+ spin_lock_irqsave(&fault_hash->lock, flags);
+
+ r = chash_table_remove(&fault_hash->hash, key, NULL);
+ if (!WARN_ON_ONCE(r < 0)) {
+ fault_hash->count--;
+ WARN_ON_ONCE(fault_hash->count < 0);
+ }
+
+ spin_unlock_irqrestore(&fault_hash->lock, flags);
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
index 061b99a18cb8..2a8898d19c8b 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
@@ -29,6 +29,8 @@
#include <linux/rbtree.h>
#include <drm/gpu_scheduler.h>
#include <drm/drm_file.h>
+#include <drm/ttm/ttm_bo_driver.h>
+#include <linux/chash.h>
#include "amdgpu_sync.h"
#include "amdgpu_ring.h"
@@ -48,9 +50,6 @@ struct amdgpu_bo_list_entry;
/* number of entries in page table */
#define AMDGPU_VM_PTE_COUNT(adev) (1 << (adev)->vm_manager.block_size)
-/* PTBs (Page Table Blocks) need to be aligned to 32K */
-#define AMDGPU_VM_PTB_ALIGN_SIZE 32768
-
#define AMDGPU_PTE_VALID (1ULL << 0)
#define AMDGPU_PTE_SYSTEM (1ULL << 1)
#define AMDGPU_PTE_SNOOPED (1ULL << 2)
@@ -103,19 +102,6 @@ struct amdgpu_bo_list_entry;
/* hardcode that limit for now */
#define AMDGPU_VA_RESERVED_SIZE (1ULL << 20)
-/* VA hole for 48bit addresses on Vega10 */
-#define AMDGPU_VA_HOLE_START 0x0000800000000000ULL
-#define AMDGPU_VA_HOLE_END 0xffff800000000000ULL
-
-/*
- * Hardware is programmed as if the hole doesn't exists with start and end
- * address values.
- *
- * This mask is used to remove the upper 16bits of the VA and so come up with
- * the linear addr value.
- */
-#define AMDGPU_VA_HOLE_MASK 0x0000ffffffffffffULL
-
/* max vmids dedicated for process */
#define AMDGPU_VM_MAX_RESERVED_VMID 1
@@ -143,7 +129,7 @@ struct amdgpu_vm_bo_base {
struct amdgpu_bo *bo;
/* protected by bo being reserved */
- struct list_head bo_list;
+ struct amdgpu_vm_bo_base *next;
/* protected by spinlock */
struct list_head vm_status;
@@ -160,10 +146,46 @@ struct amdgpu_vm_pt {
struct amdgpu_vm_pt *entries;
};
+/* provided by hw blocks that can write ptes, e.g., sdma */
+struct amdgpu_vm_pte_funcs {
+ /* number of dw to reserve per operation */
+ unsigned copy_pte_num_dw;
+
+ /* copy pte entries from GART */
+ void (*copy_pte)(struct amdgpu_ib *ib,
+ uint64_t pe, uint64_t src,
+ unsigned count);
+
+ /* write pte one entry at a time with addr mapping */
+ void (*write_pte)(struct amdgpu_ib *ib, uint64_t pe,
+ uint64_t value, unsigned count,
+ uint32_t incr);
+ /* for linear pte/pde updates without addr mapping */
+ void (*set_pte_pde)(struct amdgpu_ib *ib,
+ uint64_t pe,
+ uint64_t addr, unsigned count,
+ uint32_t incr, uint64_t flags);
+};
+
#define AMDGPU_VM_FAULT(pasid, addr) (((u64)(pasid) << 48) | (addr))
#define AMDGPU_VM_FAULT_PASID(fault) ((u64)(fault) >> 48)
#define AMDGPU_VM_FAULT_ADDR(fault) ((u64)(fault) & 0xfffffffff000ULL)
+
+struct amdgpu_task_info {
+ char process_name[TASK_COMM_LEN];
+ char task_name[TASK_COMM_LEN];
+ pid_t pid;
+ pid_t tgid;
+};
+
+#define AMDGPU_PAGEFAULT_HASH_BITS 8
+struct amdgpu_retryfault_hashtable {
+ DECLARE_CHASH_TABLE(hash, AMDGPU_PAGEFAULT_HASH_BITS, 8, 0);
+ spinlock_t lock;
+ int count;
+};
+
struct amdgpu_vm {
/* tree of virtual addresses mapped */
struct rb_root_cached va;
@@ -174,13 +196,16 @@ struct amdgpu_vm {
/* PT BOs which relocated and their parent need an update */
struct list_head relocated;
- /* BOs moved, but not yet updated in the PT */
+ /* per VM BOs moved, but not yet updated in the PT */
struct list_head moved;
- spinlock_t moved_lock;
/* All BOs of this VM not currently in the state machine */
struct list_head idle;
+ /* regular invalidated BOs, but not yet updated in the PT */
+ struct list_head invalidated;
+ spinlock_t invalidated_lock;
+
/* BO mappings freed, but not yet updated in the PT */
struct list_head freed;
@@ -215,6 +240,15 @@ struct amdgpu_vm {
/* Valid while the PD is reserved or fenced */
uint64_t pd_phys_addr;
+
+ /* Some basic info about the task */
+ struct amdgpu_task_info task_info;
+
+ /* Store positions of group of BOs */
+ struct ttm_lru_bulk_move lru_bulk_move;
+ /* mark whether can do the bulk move */
+ bool bulk_moveable;
+ struct amdgpu_retryfault_hashtable *fault_hash;
};
struct amdgpu_vm_manager {
@@ -233,10 +267,9 @@ struct amdgpu_vm_manager {
/* vram base address for page table entry */
u64 vram_base_offset;
/* vm pte handling */
- const struct amdgpu_vm_pte_funcs *vm_pte_funcs;
- struct amdgpu_ring *vm_pte_rings[AMDGPU_MAX_RINGS];
- unsigned vm_pte_num_rings;
- atomic_t vm_pte_next_ring;
+ const struct amdgpu_vm_pte_funcs *vm_pte_funcs;
+ struct drm_sched_rq *vm_pte_rqs[AMDGPU_MAX_RINGS];
+ unsigned vm_pte_num_rqs;
/* partial resident texture handling */
spinlock_t prt_lock;
@@ -255,11 +288,16 @@ struct amdgpu_vm_manager {
spinlock_t pasid_lock;
};
+#define amdgpu_vm_copy_pte(adev, ib, pe, src, count) ((adev)->vm_manager.vm_pte_funcs->copy_pte((ib), (pe), (src), (count)))
+#define amdgpu_vm_write_pte(adev, ib, pe, value, count, incr) ((adev)->vm_manager.vm_pte_funcs->write_pte((ib), (pe), (value), (count), (incr)))
+#define amdgpu_vm_set_pte_pde(adev, ib, pe, addr, count, incr, flags) ((adev)->vm_manager.vm_pte_funcs->set_pte_pde((ib), (pe), (addr), (count), (incr), (flags)))
+
void amdgpu_vm_manager_init(struct amdgpu_device *adev);
void amdgpu_vm_manager_fini(struct amdgpu_device *adev);
int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm,
int vm_context, unsigned int pasid);
-int amdgpu_vm_make_compute(struct amdgpu_device *adev, struct amdgpu_vm *vm);
+int amdgpu_vm_make_compute(struct amdgpu_device *adev, struct amdgpu_vm *vm, unsigned int pasid);
+void amdgpu_vm_release_compute(struct amdgpu_device *adev, struct amdgpu_vm *vm);
void amdgpu_vm_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm);
bool amdgpu_vm_pasid_fault_credit(struct amdgpu_device *adev,
unsigned int pasid);
@@ -307,9 +345,10 @@ int amdgpu_vm_bo_clear_mappings(struct amdgpu_device *adev,
uint64_t saddr, uint64_t size);
struct amdgpu_bo_va_mapping *amdgpu_vm_bo_lookup_mapping(struct amdgpu_vm *vm,
uint64_t addr);
+void amdgpu_vm_bo_trace_cs(struct amdgpu_vm *vm, struct ww_acquire_ctx *ticket);
void amdgpu_vm_bo_rmv(struct amdgpu_device *adev,
struct amdgpu_bo_va *bo_va);
-void amdgpu_vm_adjust_size(struct amdgpu_device *adev, uint32_t vm_size,
+void amdgpu_vm_adjust_size(struct amdgpu_device *adev, uint32_t min_vm_size,
uint32_t fragment_size_default, unsigned max_level,
unsigned max_bits);
int amdgpu_vm_ioctl(struct drm_device *dev, void *data, struct drm_file *filp);
@@ -317,4 +356,16 @@ bool amdgpu_vm_need_pipeline_sync(struct amdgpu_ring *ring,
struct amdgpu_job *job);
void amdgpu_vm_check_compute_bug(struct amdgpu_device *adev);
+void amdgpu_vm_get_task_info(struct amdgpu_device *adev, unsigned int pasid,
+ struct amdgpu_task_info *task_info);
+
+void amdgpu_vm_set_task_info(struct amdgpu_vm *vm);
+
+void amdgpu_vm_move_to_lru_tail(struct amdgpu_device *adev,
+ struct amdgpu_vm *vm);
+
+int amdgpu_vm_add_fault(struct amdgpu_retryfault_hashtable *fault_hash, u64 key);
+
+void amdgpu_vm_clear_fault(struct amdgpu_retryfault_hashtable *fault_hash, u64 key);
+
#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
index b6333f92ba45..3f9d5d00c9b3 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
@@ -97,38 +97,56 @@ static u64 amdgpu_vram_mgr_vis_size(struct amdgpu_device *adev,
}
/**
- * amdgpu_vram_mgr_bo_invisible_size - CPU invisible BO size
+ * amdgpu_vram_mgr_bo_visible_size - CPU visible BO size
*
* @bo: &amdgpu_bo buffer object (must be in VRAM)
*
* Returns:
- * How much of the given &amdgpu_bo buffer object lies in CPU invisible VRAM.
+ * How much of the given &amdgpu_bo buffer object lies in CPU visible VRAM.
*/
-u64 amdgpu_vram_mgr_bo_invisible_size(struct amdgpu_bo *bo)
+u64 amdgpu_vram_mgr_bo_visible_size(struct amdgpu_bo *bo)
{
struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
struct ttm_mem_reg *mem = &bo->tbo.mem;
struct drm_mm_node *nodes = mem->mm_node;
unsigned pages = mem->num_pages;
- u64 usage = 0;
+ u64 usage;
- if (adev->gmc.visible_vram_size == adev->gmc.real_vram_size)
- return 0;
+ if (amdgpu_gmc_vram_full_visible(&adev->gmc))
+ return amdgpu_bo_size(bo);
if (mem->start >= adev->gmc.visible_vram_size >> PAGE_SHIFT)
- return amdgpu_bo_size(bo);
+ return 0;
- while (nodes && pages) {
- usage += nodes->size << PAGE_SHIFT;
- usage -= amdgpu_vram_mgr_vis_size(adev, nodes);
- pages -= nodes->size;
- ++nodes;
- }
+ for (usage = 0; nodes && pages; pages -= nodes->size, nodes++)
+ usage += amdgpu_vram_mgr_vis_size(adev, nodes);
return usage;
}
/**
+ * amdgpu_vram_mgr_virt_start - update virtual start address
+ *
+ * @mem: ttm_mem_reg to update
+ * @node: just allocated node
+ *
+ * Calculate a virtual BO start address to easily check if everything is CPU
+ * accessible.
+ */
+static void amdgpu_vram_mgr_virt_start(struct ttm_mem_reg *mem,
+ struct drm_mm_node *node)
+{
+ unsigned long start;
+
+ start = node->start + node->size;
+ if (start > mem->num_pages)
+ start -= mem->num_pages;
+ else
+ start = 0;
+ mem->start = max(mem->start, start);
+}
+
+/**
* amdgpu_vram_mgr_new - allocate new ranges
*
* @man: TTM memory type manager
@@ -180,10 +198,25 @@ static int amdgpu_vram_mgr_new(struct ttm_mem_type_manager *man,
pages_left = mem->num_pages;
spin_lock(&mgr->lock);
- for (i = 0; i < num_nodes; ++i) {
+ for (i = 0; pages_left >= pages_per_node; ++i) {
+ unsigned long pages = rounddown_pow_of_two(pages_left);
+
+ r = drm_mm_insert_node_in_range(mm, &nodes[i], pages,
+ pages_per_node, 0,
+ place->fpfn, lpfn,
+ mode);
+ if (unlikely(r))
+ break;
+
+ usage += nodes[i].size << PAGE_SHIFT;
+ vis_usage += amdgpu_vram_mgr_vis_size(adev, &nodes[i]);
+ amdgpu_vram_mgr_virt_start(mem, &nodes[i]);
+ pages_left -= pages;
+ }
+
+ for (; pages_left; ++i) {
unsigned long pages = min(pages_left, pages_per_node);
uint32_t alignment = mem->page_alignment;
- unsigned long start;
if (pages == pages_per_node)
alignment = pages_per_node;
@@ -197,16 +230,7 @@ static int amdgpu_vram_mgr_new(struct ttm_mem_type_manager *man,
usage += nodes[i].size << PAGE_SHIFT;
vis_usage += amdgpu_vram_mgr_vis_size(adev, &nodes[i]);
-
- /* Calculate a virtual BO start address to easily check if
- * everything is CPU accessible.
- */
- start = nodes[i].start + nodes[i].size;
- if (start > mem->num_pages)
- start -= mem->num_pages;
- else
- start = 0;
- mem->start = max(mem->start, start);
+ amdgpu_vram_mgr_virt_start(mem, &nodes[i]);
pages_left -= pages;
}
spin_unlock(&mgr->lock);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c
new file mode 100644
index 000000000000..897afbb348c1
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c
@@ -0,0 +1,119 @@
+/*
+ * Copyright 2018 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ *
+ */
+#include <linux/list.h>
+#include "amdgpu.h"
+#include "amdgpu_psp.h"
+
+
+static DEFINE_MUTEX(xgmi_mutex);
+
+#define AMDGPU_MAX_XGMI_HIVE 8
+#define AMDGPU_MAX_XGMI_DEVICE_PER_HIVE 4
+
+struct amdgpu_hive_info {
+ uint64_t hive_id;
+ struct list_head device_list;
+};
+
+static struct amdgpu_hive_info xgmi_hives[AMDGPU_MAX_XGMI_HIVE];
+static unsigned hive_count = 0;
+
+static struct amdgpu_hive_info *amdgpu_get_xgmi_hive(struct amdgpu_device *adev)
+{
+ int i;
+ struct amdgpu_hive_info *tmp;
+
+ if (!adev->gmc.xgmi.hive_id)
+ return NULL;
+ for (i = 0 ; i < hive_count; ++i) {
+ tmp = &xgmi_hives[i];
+ if (tmp->hive_id == adev->gmc.xgmi.hive_id)
+ return tmp;
+ }
+ if (i >= AMDGPU_MAX_XGMI_HIVE)
+ return NULL;
+
+ /* initialize new hive if not exist */
+ tmp = &xgmi_hives[hive_count++];
+ tmp->hive_id = adev->gmc.xgmi.hive_id;
+ INIT_LIST_HEAD(&tmp->device_list);
+ return tmp;
+}
+
+int amdgpu_xgmi_add_device(struct amdgpu_device *adev)
+{
+ struct psp_xgmi_topology_info tmp_topology[AMDGPU_MAX_XGMI_DEVICE_PER_HIVE];
+ struct amdgpu_hive_info *hive;
+ struct amdgpu_xgmi *entry;
+ struct amdgpu_device *tmp_adev;
+
+ int count = 0, ret = -EINVAL;
+
+ if ((adev->asic_type < CHIP_VEGA20) ||
+ (adev->flags & AMD_IS_APU) )
+ return 0;
+ adev->gmc.xgmi.device_id = psp_xgmi_get_device_id(&adev->psp);
+ adev->gmc.xgmi.hive_id = psp_xgmi_get_hive_id(&adev->psp);
+
+ memset(&tmp_topology[0], 0, sizeof(tmp_topology));
+ mutex_lock(&xgmi_mutex);
+ hive = amdgpu_get_xgmi_hive(adev);
+ if (!hive)
+ goto exit;
+
+ list_add_tail(&adev->gmc.xgmi.head, &hive->device_list);
+ list_for_each_entry(entry, &hive->device_list, head)
+ tmp_topology[count++].device_id = entry->device_id;
+
+ ret = psp_xgmi_get_topology_info(&adev->psp, count, tmp_topology);
+ if (ret) {
+ dev_err(adev->dev,
+ "XGMI: Get topology failure on device %llx, hive %llx, ret %d",
+ adev->gmc.xgmi.device_id,
+ adev->gmc.xgmi.hive_id, ret);
+ goto exit;
+ }
+ /* Each psp need to set the latest topology */
+ list_for_each_entry(tmp_adev, &hive->device_list, gmc.xgmi.head) {
+ ret = psp_xgmi_set_topology_info(&tmp_adev->psp, count, tmp_topology);
+ if (ret) {
+ dev_err(tmp_adev->dev,
+ "XGMI: Set topology failure on device %llx, hive %llx, ret %d",
+ tmp_adev->gmc.xgmi.device_id,
+ tmp_adev->gmc.xgmi.hive_id, ret);
+ /* To do : continue with some node failed or disable the whole hive */
+ break;
+ }
+ }
+ if (!ret)
+ dev_info(adev->dev, "XGMI: Add node %d to hive 0x%llx.\n",
+ adev->gmc.xgmi.physical_node_id,
+ adev->gmc.xgmi.hive_id);
+
+exit:
+ mutex_unlock(&xgmi_mutex);
+ return ret;
+}
+
+
diff --git a/drivers/gpu/drm/amd/amdgpu/atombios_encoders.c b/drivers/gpu/drm/amd/amdgpu/atombios_encoders.c
index d702fb8e3427..60e2447e12c5 100644
--- a/drivers/gpu/drm/amd/amdgpu/atombios_encoders.c
+++ b/drivers/gpu/drm/amd/amdgpu/atombios_encoders.c
@@ -28,6 +28,7 @@
#include <drm/amdgpu_drm.h>
#include "amdgpu.h"
#include "amdgpu_connectors.h"
+#include "amdgpu_display.h"
#include "atom.h"
#include "atombios_encoders.h"
#include "atombios_dp.h"
diff --git a/drivers/gpu/drm/amd/amdgpu/ci_dpm.c b/drivers/gpu/drm/amd/amdgpu/ci_dpm.c
index 7fbad2f5f0bd..79220a91abe3 100644
--- a/drivers/gpu/drm/amd/amdgpu/ci_dpm.c
+++ b/drivers/gpu/drm/amd/amdgpu/ci_dpm.c
@@ -49,10 +49,10 @@
#include "gmc/gmc_7_1_d.h"
#include "gmc/gmc_7_1_sh_mask.h"
-MODULE_FIRMWARE("radeon/bonaire_smc.bin");
-MODULE_FIRMWARE("radeon/bonaire_k_smc.bin");
-MODULE_FIRMWARE("radeon/hawaii_smc.bin");
-MODULE_FIRMWARE("radeon/hawaii_k_smc.bin");
+MODULE_FIRMWARE("amdgpu/bonaire_smc.bin");
+MODULE_FIRMWARE("amdgpu/bonaire_k_smc.bin");
+MODULE_FIRMWARE("amdgpu/hawaii_smc.bin");
+MODULE_FIRMWARE("amdgpu/hawaii_k_smc.bin");
#define MC_CG_ARB_FREQ_F0 0x0a
#define MC_CG_ARB_FREQ_F1 0x0b
@@ -951,12 +951,12 @@ static void ci_apply_state_adjust_rules(struct amdgpu_device *adev,
else
pi->battery_state = false;
- if (adev->pm.dpm.ac_power)
+ if (adev->pm.ac_power)
max_limits = &adev->pm.dpm.dyn_state.max_clock_voltage_on_ac;
else
max_limits = &adev->pm.dpm.dyn_state.max_clock_voltage_on_dc;
- if (adev->pm.dpm.ac_power == false) {
+ if (adev->pm.ac_power == false) {
for (i = 0; i < ps->performance_level_count; i++) {
if (ps->performance_levels[i].mclk > max_limits->mclk)
ps->performance_levels[i].mclk = max_limits->mclk;
@@ -4078,7 +4078,7 @@ static int ci_enable_uvd_dpm(struct amdgpu_device *adev, bool enable)
const struct amdgpu_clock_and_voltage_limits *max_limits;
int i;
- if (adev->pm.dpm.ac_power)
+ if (adev->pm.ac_power)
max_limits = &adev->pm.dpm.dyn_state.max_clock_voltage_on_ac;
else
max_limits = &adev->pm.dpm.dyn_state.max_clock_voltage_on_dc;
@@ -4127,7 +4127,7 @@ static int ci_enable_vce_dpm(struct amdgpu_device *adev, bool enable)
const struct amdgpu_clock_and_voltage_limits *max_limits;
int i;
- if (adev->pm.dpm.ac_power)
+ if (adev->pm.ac_power)
max_limits = &adev->pm.dpm.dyn_state.max_clock_voltage_on_ac;
else
max_limits = &adev->pm.dpm.dyn_state.max_clock_voltage_on_dc;
@@ -4160,7 +4160,7 @@ static int ci_enable_samu_dpm(struct amdgpu_device *adev, bool enable)
const struct amdgpu_clock_and_voltage_limits *max_limits;
int i;
- if (adev->pm.dpm.ac_power)
+ if (adev->pm.ac_power)
max_limits = &adev->pm.dpm.dyn_state.max_clock_voltage_on_ac;
else
max_limits = &adev->pm.dpm.dyn_state.max_clock_voltage_on_dc;
@@ -4191,7 +4191,7 @@ static int ci_enable_acp_dpm(struct amdgpu_device *adev, bool enable)
const struct amdgpu_clock_and_voltage_limits *max_limits;
int i;
- if (adev->pm.dpm.ac_power)
+ if (adev->pm.ac_power)
max_limits = &adev->pm.dpm.dyn_state.max_clock_voltage_on_ac;
else
max_limits = &adev->pm.dpm.dyn_state.max_clock_voltage_on_dc;
@@ -5815,7 +5815,7 @@ static int ci_dpm_init_microcode(struct amdgpu_device *adev)
default: BUG();
}
- snprintf(fw_name, sizeof(fw_name), "radeon/%s_smc.bin", chip_name);
+ snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_smc.bin", chip_name);
err = request_firmware(&adev->pm.fw, fw_name, adev->dev);
if (err)
goto out;
@@ -5846,8 +5846,7 @@ static int ci_dpm_init(struct amdgpu_device *adev)
adev->pm.dpm.priv = pi;
pi->sys_pcie_mask =
- (adev->pm.pcie_gen_mask & CAIL_PCIE_LINK_SPEED_SUPPORT_MASK) >>
- CAIL_PCIE_LINK_SPEED_SUPPORT_SHIFT;
+ adev->pm.pcie_gen_mask & CAIL_PCIE_LINK_SPEED_SUPPORT_MASK;
pi->force_pcie_gen = AMDGPU_PCIE_GEN_INVALID;
@@ -6278,12 +6277,12 @@ static int ci_dpm_sw_init(void *handle)
int ret;
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
- ret = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 230,
+ ret = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, 230,
&adev->pm.dpm.thermal.irq);
if (ret)
return ret;
- ret = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 231,
+ ret = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, 231,
&adev->pm.dpm.thermal.irq);
if (ret)
return ret;
@@ -6767,6 +6766,19 @@ static int ci_dpm_read_sensor(void *handle, int idx,
}
}
+static int ci_set_powergating_by_smu(void *handle,
+ uint32_t block_type, bool gate)
+{
+ switch (block_type) {
+ case AMD_IP_BLOCK_TYPE_UVD:
+ ci_dpm_powergate_uvd(handle, gate);
+ break;
+ default:
+ break;
+ }
+ return 0;
+}
+
static const struct amd_ip_funcs ci_dpm_ip_funcs = {
.name = "ci_dpm",
.early_init = ci_dpm_early_init,
@@ -6804,7 +6816,7 @@ static const struct amd_pm_funcs ci_dpm_funcs = {
.debugfs_print_current_performance_level = &ci_dpm_debugfs_print_current_performance_level,
.force_performance_level = &ci_dpm_force_performance_level,
.vblank_too_short = &ci_dpm_vblank_too_short,
- .powergate_uvd = &ci_dpm_powergate_uvd,
+ .set_powergating_by_smu = &ci_set_powergating_by_smu,
.set_fan_control_mode = &ci_dpm_set_fan_control_mode,
.get_fan_control_mode = &ci_dpm_get_fan_control_mode,
.set_fan_speed_percent = &ci_dpm_set_fan_speed_percent,
diff --git a/drivers/gpu/drm/amd/amdgpu/cik.c b/drivers/gpu/drm/amd/amdgpu/cik.c
index 8ff4c60d1b59..f41f5f57e9f3 100644
--- a/drivers/gpu/drm/amd/amdgpu/cik.c
+++ b/drivers/gpu/drm/amd/amdgpu/cik.c
@@ -1476,7 +1476,7 @@ static void cik_pcie_gen3_enable(struct amdgpu_device *adev)
tmp |= PCIE_LC_CNTL4__LC_REDO_EQ_MASK;
WREG32_PCIE(ixPCIE_LC_CNTL4, tmp);
- mdelay(100);
+ msleep(100);
/* linkctl */
pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &tmp16);
@@ -2002,10 +2002,12 @@ int cik_set_ip_blocks(struct amdgpu_device *adev)
amdgpu_device_ip_block_add(adev, &cik_common_ip_block);
amdgpu_device_ip_block_add(adev, &gmc_v7_0_ip_block);
amdgpu_device_ip_block_add(adev, &cik_ih_ip_block);
+ amdgpu_device_ip_block_add(adev, &gfx_v7_2_ip_block);
+ amdgpu_device_ip_block_add(adev, &cik_sdma_ip_block);
if (amdgpu_dpm == -1)
- amdgpu_device_ip_block_add(adev, &ci_smu_ip_block);
- else
amdgpu_device_ip_block_add(adev, &pp_smu_ip_block);
+ else
+ amdgpu_device_ip_block_add(adev, &ci_smu_ip_block);
if (adev->enable_virtual_display)
amdgpu_device_ip_block_add(adev, &dce_virtual_ip_block);
#if defined(CONFIG_DRM_AMD_DC)
@@ -2014,8 +2016,6 @@ int cik_set_ip_blocks(struct amdgpu_device *adev)
#endif
else
amdgpu_device_ip_block_add(adev, &dce_v8_2_ip_block);
- amdgpu_device_ip_block_add(adev, &gfx_v7_2_ip_block);
- amdgpu_device_ip_block_add(adev, &cik_sdma_ip_block);
amdgpu_device_ip_block_add(adev, &uvd_v4_2_ip_block);
amdgpu_device_ip_block_add(adev, &vce_v2_0_ip_block);
break;
@@ -2023,10 +2023,12 @@ int cik_set_ip_blocks(struct amdgpu_device *adev)
amdgpu_device_ip_block_add(adev, &cik_common_ip_block);
amdgpu_device_ip_block_add(adev, &gmc_v7_0_ip_block);
amdgpu_device_ip_block_add(adev, &cik_ih_ip_block);
+ amdgpu_device_ip_block_add(adev, &gfx_v7_3_ip_block);
+ amdgpu_device_ip_block_add(adev, &cik_sdma_ip_block);
if (amdgpu_dpm == -1)
- amdgpu_device_ip_block_add(adev, &ci_smu_ip_block);
- else
amdgpu_device_ip_block_add(adev, &pp_smu_ip_block);
+ else
+ amdgpu_device_ip_block_add(adev, &ci_smu_ip_block);
if (adev->enable_virtual_display)
amdgpu_device_ip_block_add(adev, &dce_virtual_ip_block);
#if defined(CONFIG_DRM_AMD_DC)
@@ -2035,8 +2037,6 @@ int cik_set_ip_blocks(struct amdgpu_device *adev)
#endif
else
amdgpu_device_ip_block_add(adev, &dce_v8_5_ip_block);
- amdgpu_device_ip_block_add(adev, &gfx_v7_3_ip_block);
- amdgpu_device_ip_block_add(adev, &cik_sdma_ip_block);
amdgpu_device_ip_block_add(adev, &uvd_v4_2_ip_block);
amdgpu_device_ip_block_add(adev, &vce_v2_0_ip_block);
break;
@@ -2044,6 +2044,8 @@ int cik_set_ip_blocks(struct amdgpu_device *adev)
amdgpu_device_ip_block_add(adev, &cik_common_ip_block);
amdgpu_device_ip_block_add(adev, &gmc_v7_0_ip_block);
amdgpu_device_ip_block_add(adev, &cik_ih_ip_block);
+ amdgpu_device_ip_block_add(adev, &gfx_v7_1_ip_block);
+ amdgpu_device_ip_block_add(adev, &cik_sdma_ip_block);
amdgpu_device_ip_block_add(adev, &kv_smu_ip_block);
if (adev->enable_virtual_display)
amdgpu_device_ip_block_add(adev, &dce_virtual_ip_block);
@@ -2053,8 +2055,7 @@ int cik_set_ip_blocks(struct amdgpu_device *adev)
#endif
else
amdgpu_device_ip_block_add(adev, &dce_v8_1_ip_block);
- amdgpu_device_ip_block_add(adev, &gfx_v7_1_ip_block);
- amdgpu_device_ip_block_add(adev, &cik_sdma_ip_block);
+
amdgpu_device_ip_block_add(adev, &uvd_v4_2_ip_block);
amdgpu_device_ip_block_add(adev, &vce_v2_0_ip_block);
break;
@@ -2063,6 +2064,8 @@ int cik_set_ip_blocks(struct amdgpu_device *adev)
amdgpu_device_ip_block_add(adev, &cik_common_ip_block);
amdgpu_device_ip_block_add(adev, &gmc_v7_0_ip_block);
amdgpu_device_ip_block_add(adev, &cik_ih_ip_block);
+ amdgpu_device_ip_block_add(adev, &gfx_v7_2_ip_block);
+ amdgpu_device_ip_block_add(adev, &cik_sdma_ip_block);
amdgpu_device_ip_block_add(adev, &kv_smu_ip_block);
if (adev->enable_virtual_display)
amdgpu_device_ip_block_add(adev, &dce_virtual_ip_block);
@@ -2072,8 +2075,6 @@ int cik_set_ip_blocks(struct amdgpu_device *adev)
#endif
else
amdgpu_device_ip_block_add(adev, &dce_v8_3_ip_block);
- amdgpu_device_ip_block_add(adev, &gfx_v7_2_ip_block);
- amdgpu_device_ip_block_add(adev, &cik_sdma_ip_block);
amdgpu_device_ip_block_add(adev, &uvd_v4_2_ip_block);
amdgpu_device_ip_block_add(adev, &vce_v2_0_ip_block);
break;
diff --git a/drivers/gpu/drm/amd/amdgpu/cik_ih.c b/drivers/gpu/drm/amd/amdgpu/cik_ih.c
index 44d10c2172f6..b5775c6a857b 100644
--- a/drivers/gpu/drm/amd/amdgpu/cik_ih.c
+++ b/drivers/gpu/drm/amd/amdgpu/cik_ih.c
@@ -276,7 +276,7 @@ static void cik_ih_decode_iv(struct amdgpu_device *adev,
dw[2] = le32_to_cpu(adev->irq.ih.ring[ring_index + 2]);
dw[3] = le32_to_cpu(adev->irq.ih.ring[ring_index + 3]);
- entry->client_id = AMDGPU_IH_CLIENTID_LEGACY;
+ entry->client_id = AMDGPU_IRQ_CLIENTID_LEGACY;
entry->src_id = dw[0] & 0xff;
entry->src_data[0] = dw[1] & 0xfffffff;
entry->ring_id = dw[2] & 0xff;
@@ -318,7 +318,7 @@ static int cik_ih_sw_init(void *handle)
int r;
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
- r = amdgpu_ih_ring_init(adev, 64 * 1024, false);
+ r = amdgpu_ih_ring_init(adev, &adev->irq.ih, 64 * 1024, false);
if (r)
return r;
@@ -332,7 +332,7 @@ static int cik_ih_sw_fini(void *handle)
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
amdgpu_irq_fini(adev);
- amdgpu_ih_ring_fini(adev);
+ amdgpu_ih_ring_fini(adev, &adev->irq.ih);
amdgpu_irq_remove_domain(adev);
return 0;
@@ -468,8 +468,7 @@ static const struct amdgpu_ih_funcs cik_ih_funcs = {
static void cik_ih_set_interrupt_funcs(struct amdgpu_device *adev)
{
- if (adev->irq.ih_funcs == NULL)
- adev->irq.ih_funcs = &cik_ih_funcs;
+ adev->irq.ih_funcs = &cik_ih_funcs;
}
const struct amdgpu_ip_block_version cik_ih_ip_block =
diff --git a/drivers/gpu/drm/amd/amdgpu/cik_sdma.c b/drivers/gpu/drm/amd/amdgpu/cik_sdma.c
index a7576255cc30..b918c8886b75 100644
--- a/drivers/gpu/drm/amd/amdgpu/cik_sdma.c
+++ b/drivers/gpu/drm/amd/amdgpu/cik_sdma.c
@@ -54,16 +54,16 @@ static void cik_sdma_set_buffer_funcs(struct amdgpu_device *adev);
static void cik_sdma_set_vm_pte_funcs(struct amdgpu_device *adev);
static int cik_sdma_soft_reset(void *handle);
-MODULE_FIRMWARE("radeon/bonaire_sdma.bin");
-MODULE_FIRMWARE("radeon/bonaire_sdma1.bin");
-MODULE_FIRMWARE("radeon/hawaii_sdma.bin");
-MODULE_FIRMWARE("radeon/hawaii_sdma1.bin");
-MODULE_FIRMWARE("radeon/kaveri_sdma.bin");
-MODULE_FIRMWARE("radeon/kaveri_sdma1.bin");
-MODULE_FIRMWARE("radeon/kabini_sdma.bin");
-MODULE_FIRMWARE("radeon/kabini_sdma1.bin");
-MODULE_FIRMWARE("radeon/mullins_sdma.bin");
-MODULE_FIRMWARE("radeon/mullins_sdma1.bin");
+MODULE_FIRMWARE("amdgpu/bonaire_sdma.bin");
+MODULE_FIRMWARE("amdgpu/bonaire_sdma1.bin");
+MODULE_FIRMWARE("amdgpu/hawaii_sdma.bin");
+MODULE_FIRMWARE("amdgpu/hawaii_sdma1.bin");
+MODULE_FIRMWARE("amdgpu/kaveri_sdma.bin");
+MODULE_FIRMWARE("amdgpu/kaveri_sdma1.bin");
+MODULE_FIRMWARE("amdgpu/kabini_sdma.bin");
+MODULE_FIRMWARE("amdgpu/kabini_sdma1.bin");
+MODULE_FIRMWARE("amdgpu/mullins_sdma.bin");
+MODULE_FIRMWARE("amdgpu/mullins_sdma1.bin");
u32 amdgpu_cik_gpu_check_soft_reset(struct amdgpu_device *adev);
@@ -132,9 +132,9 @@ static int cik_sdma_init_microcode(struct amdgpu_device *adev)
for (i = 0; i < adev->sdma.num_instances; i++) {
if (i == 0)
- snprintf(fw_name, sizeof(fw_name), "radeon/%s_sdma.bin", chip_name);
+ snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_sdma.bin", chip_name);
else
- snprintf(fw_name, sizeof(fw_name), "radeon/%s_sdma1.bin", chip_name);
+ snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_sdma1.bin", chip_name);
err = request_firmware(&adev->sdma.instance[i].fw, fw_name, adev->dev);
if (err)
goto out;
@@ -177,9 +177,8 @@ static uint64_t cik_sdma_ring_get_rptr(struct amdgpu_ring *ring)
static uint64_t cik_sdma_ring_get_wptr(struct amdgpu_ring *ring)
{
struct amdgpu_device *adev = ring->adev;
- u32 me = (ring == &adev->sdma.instance[0].ring) ? 0 : 1;
- return (RREG32(mmSDMA0_GFX_RB_WPTR + sdma_offsets[me]) & 0x3fffc) >> 2;
+ return (RREG32(mmSDMA0_GFX_RB_WPTR + sdma_offsets[ring->me]) & 0x3fffc) >> 2;
}
/**
@@ -192,9 +191,8 @@ static uint64_t cik_sdma_ring_get_wptr(struct amdgpu_ring *ring)
static void cik_sdma_ring_set_wptr(struct amdgpu_ring *ring)
{
struct amdgpu_device *adev = ring->adev;
- u32 me = (ring == &adev->sdma.instance[0].ring) ? 0 : 1;
- WREG32(mmSDMA0_GFX_RB_WPTR + sdma_offsets[me],
+ WREG32(mmSDMA0_GFX_RB_WPTR + sdma_offsets[ring->me],
(lower_32_bits(ring->wptr) << 2) & 0x3fffc);
}
@@ -248,7 +246,7 @@ static void cik_sdma_ring_emit_hdp_flush(struct amdgpu_ring *ring)
SDMA_POLL_REG_MEM_EXTRA_FUNC(3)); /* == */
u32 ref_and_mask;
- if (ring == &ring->adev->sdma.instance[0].ring)
+ if (ring->me == 0)
ref_and_mask = GPU_HDP_FLUSH_DONE__SDMA0_MASK;
else
ref_and_mask = GPU_HDP_FLUSH_DONE__SDMA1_MASK;
@@ -972,19 +970,19 @@ static int cik_sdma_sw_init(void *handle)
}
/* SDMA trap event */
- r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 224,
+ r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, 224,
&adev->sdma.trap_irq);
if (r)
return r;
/* SDMA Privileged inst */
- r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 241,
+ r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, 241,
&adev->sdma.illegal_inst_irq);
if (r)
return r;
/* SDMA Privileged inst */
- r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 247,
+ r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, 247,
&adev->sdma.illegal_inst_irq);
if (r)
return r;
@@ -1290,8 +1288,10 @@ static void cik_sdma_set_ring_funcs(struct amdgpu_device *adev)
{
int i;
- for (i = 0; i < adev->sdma.num_instances; i++)
+ for (i = 0; i < adev->sdma.num_instances; i++) {
adev->sdma.instance[i].ring.funcs = &cik_sdma_ring_funcs;
+ adev->sdma.instance[i].ring.me = i;
+ }
}
static const struct amdgpu_irq_src_funcs cik_sdma_trap_irq_funcs = {
@@ -1370,10 +1370,8 @@ static const struct amdgpu_buffer_funcs cik_sdma_buffer_funcs = {
static void cik_sdma_set_buffer_funcs(struct amdgpu_device *adev)
{
- if (adev->mman.buffer_funcs == NULL) {
- adev->mman.buffer_funcs = &cik_sdma_buffer_funcs;
- adev->mman.buffer_funcs_ring = &adev->sdma.instance[0].ring;
- }
+ adev->mman.buffer_funcs = &cik_sdma_buffer_funcs;
+ adev->mman.buffer_funcs_ring = &adev->sdma.instance[0].ring;
}
static const struct amdgpu_vm_pte_funcs cik_sdma_vm_pte_funcs = {
@@ -1386,16 +1384,16 @@ static const struct amdgpu_vm_pte_funcs cik_sdma_vm_pte_funcs = {
static void cik_sdma_set_vm_pte_funcs(struct amdgpu_device *adev)
{
+ struct drm_gpu_scheduler *sched;
unsigned i;
- if (adev->vm_manager.vm_pte_funcs == NULL) {
- adev->vm_manager.vm_pte_funcs = &cik_sdma_vm_pte_funcs;
- for (i = 0; i < adev->sdma.num_instances; i++)
- adev->vm_manager.vm_pte_rings[i] =
- &adev->sdma.instance[i].ring;
-
- adev->vm_manager.vm_pte_num_rings = adev->sdma.num_instances;
+ adev->vm_manager.vm_pte_funcs = &cik_sdma_vm_pte_funcs;
+ for (i = 0; i < adev->sdma.num_instances; i++) {
+ sched = &adev->sdma.instance[i].ring.sched;
+ adev->vm_manager.vm_pte_rqs[i] =
+ &sched->sched_rq[DRM_SCHED_PRIORITY_KERNEL];
}
+ adev->vm_manager.vm_pte_num_rqs = adev->sdma.num_instances;
}
const struct amdgpu_ip_block_version cik_sdma_ip_block =
diff --git a/drivers/gpu/drm/amd/amdgpu/cz_ih.c b/drivers/gpu/drm/amd/amdgpu/cz_ih.c
index 960c29e17da6..df5ac4d85a00 100644
--- a/drivers/gpu/drm/amd/amdgpu/cz_ih.c
+++ b/drivers/gpu/drm/amd/amdgpu/cz_ih.c
@@ -255,7 +255,7 @@ static void cz_ih_decode_iv(struct amdgpu_device *adev,
dw[2] = le32_to_cpu(adev->irq.ih.ring[ring_index + 2]);
dw[3] = le32_to_cpu(adev->irq.ih.ring[ring_index + 3]);
- entry->client_id = AMDGPU_IH_CLIENTID_LEGACY;
+ entry->client_id = AMDGPU_IRQ_CLIENTID_LEGACY;
entry->src_id = dw[0] & 0xff;
entry->src_data[0] = dw[1] & 0xfffffff;
entry->ring_id = dw[2] & 0xff;
@@ -297,7 +297,7 @@ static int cz_ih_sw_init(void *handle)
int r;
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
- r = amdgpu_ih_ring_init(adev, 64 * 1024, false);
+ r = amdgpu_ih_ring_init(adev, &adev->irq.ih, 64 * 1024, false);
if (r)
return r;
@@ -311,7 +311,7 @@ static int cz_ih_sw_fini(void *handle)
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
amdgpu_irq_fini(adev);
- amdgpu_ih_ring_fini(adev);
+ amdgpu_ih_ring_fini(adev, &adev->irq.ih);
amdgpu_irq_remove_domain(adev);
return 0;
@@ -449,8 +449,7 @@ static const struct amdgpu_ih_funcs cz_ih_funcs = {
static void cz_ih_set_interrupt_funcs(struct amdgpu_device *adev)
{
- if (adev->irq.ih_funcs == NULL)
- adev->irq.ih_funcs = &cz_ih_funcs;
+ adev->irq.ih_funcs = &cz_ih_funcs;
}
const struct amdgpu_ip_block_version cz_ih_ip_block =
diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c b/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c
index ada241bfeee9..4cfecdce29a3 100644
--- a/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c
@@ -31,6 +31,7 @@
#include "atombios_encoders.h"
#include "amdgpu_pll.h"
#include "amdgpu_connectors.h"
+#include "amdgpu_display.h"
#include "dce_v10_0.h"
#include "dce/dce_10_0_d.h"
@@ -41,6 +42,8 @@
#include "gmc/gmc_8_1_d.h"
#include "gmc/gmc_8_1_sh_mask.h"
+#include "ivsrcid/ivsrcid_vislands30.h"
+
static void dce_v10_0_set_display_funcs(struct amdgpu_device *adev);
static void dce_v10_0_set_irq_funcs(struct amdgpu_device *adev);
@@ -1855,15 +1858,14 @@ static int dce_v10_0_crtc_do_set_base(struct drm_crtc *crtc,
if (unlikely(r != 0))
return r;
- if (atomic) {
- fb_location = amdgpu_bo_gpu_offset(abo);
- } else {
- r = amdgpu_bo_pin(abo, AMDGPU_GEM_DOMAIN_VRAM, &fb_location);
+ if (!atomic) {
+ r = amdgpu_bo_pin(abo, AMDGPU_GEM_DOMAIN_VRAM);
if (unlikely(r != 0)) {
amdgpu_bo_unreserve(abo);
return -EINVAL;
}
}
+ fb_location = amdgpu_bo_gpu_offset(abo);
amdgpu_bo_get_tiling_flags(abo, &tiling_flags);
amdgpu_bo_unreserve(abo);
@@ -1941,6 +1943,17 @@ static int dce_v10_0_crtc_do_set_base(struct drm_crtc *crtc,
/* Greater 8 bpc fb needs to bypass hw-lut to retain precision */
bypass_lut = true;
break;
+ case DRM_FORMAT_XBGR8888:
+ case DRM_FORMAT_ABGR8888:
+ fb_format = REG_SET_FIELD(0, GRPH_CONTROL, GRPH_DEPTH, 2);
+ fb_format = REG_SET_FIELD(fb_format, GRPH_CONTROL, GRPH_FORMAT, 0);
+ fb_swap = REG_SET_FIELD(fb_swap, GRPH_SWAP_CNTL, GRPH_RED_CROSSBAR, 2);
+ fb_swap = REG_SET_FIELD(fb_swap, GRPH_SWAP_CNTL, GRPH_BLUE_CROSSBAR, 2);
+#ifdef __BIG_ENDIAN
+ fb_swap = REG_SET_FIELD(fb_swap, GRPH_SWAP_CNTL, GRPH_ENDIAN_SWAP,
+ ENDIAN_8IN32);
+#endif
+ break;
default:
DRM_ERROR("Unsupported screen format %s\n",
drm_get_format_name(target_fb->format->format, &format_name));
@@ -2370,13 +2383,14 @@ static int dce_v10_0_crtc_cursor_set2(struct drm_crtc *crtc,
return ret;
}
- ret = amdgpu_bo_pin(aobj, AMDGPU_GEM_DOMAIN_VRAM, &amdgpu_crtc->cursor_addr);
+ ret = amdgpu_bo_pin(aobj, AMDGPU_GEM_DOMAIN_VRAM);
amdgpu_bo_unreserve(aobj);
if (ret) {
DRM_ERROR("Failed to pin new cursor BO (%d)\n", ret);
drm_gem_object_put_unlocked(obj);
return ret;
}
+ amdgpu_crtc->cursor_addr = amdgpu_bo_gpu_offset(aobj);
dce_v10_0_lock_cursor(crtc, true);
@@ -2732,19 +2746,19 @@ static int dce_v10_0_sw_init(void *handle)
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
for (i = 0; i < adev->mode_info.num_crtc; i++) {
- r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, i + 1, &adev->crtc_irq);
+ r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, i + 1, &adev->crtc_irq);
if (r)
return r;
}
- for (i = 8; i < 20; i += 2) {
- r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, i, &adev->pageflip_irq);
+ for (i = VISLANDS30_IV_SRCID_D1_GRPH_PFLIP; i < 20; i += 2) {
+ r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, i, &adev->pageflip_irq);
if (r)
return r;
}
/* HPD hotplug */
- r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 42, &adev->hpd_irq);
+ r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, VISLANDS30_IV_SRCID_HOTPLUG_DETECT_A, &adev->hpd_irq);
if (r)
return r;
@@ -3556,8 +3570,7 @@ static const struct amdgpu_display_funcs dce_v10_0_display_funcs = {
static void dce_v10_0_set_display_funcs(struct amdgpu_device *adev)
{
- if (adev->mode_info.funcs == NULL)
- adev->mode_info.funcs = &dce_v10_0_display_funcs;
+ adev->mode_info.funcs = &dce_v10_0_display_funcs;
}
static const struct amdgpu_irq_src_funcs dce_v10_0_crtc_irq_funcs = {
diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c b/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c
index a5b96eac3033..7c868916d90f 100644
--- a/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c
@@ -31,6 +31,7 @@
#include "atombios_encoders.h"
#include "amdgpu_pll.h"
#include "amdgpu_connectors.h"
+#include "amdgpu_display.h"
#include "dce_v11_0.h"
#include "dce/dce_11_0_d.h"
@@ -41,6 +42,8 @@
#include "gmc/gmc_8_1_d.h"
#include "gmc/gmc_8_1_sh_mask.h"
+#include "ivsrcid/ivsrcid_vislands30.h"
+
static void dce_v11_0_set_display_funcs(struct amdgpu_device *adev);
static void dce_v11_0_set_irq_funcs(struct amdgpu_device *adev);
@@ -1897,15 +1900,14 @@ static int dce_v11_0_crtc_do_set_base(struct drm_crtc *crtc,
if (unlikely(r != 0))
return r;
- if (atomic) {
- fb_location = amdgpu_bo_gpu_offset(abo);
- } else {
- r = amdgpu_bo_pin(abo, AMDGPU_GEM_DOMAIN_VRAM, &fb_location);
+ if (!atomic) {
+ r = amdgpu_bo_pin(abo, AMDGPU_GEM_DOMAIN_VRAM);
if (unlikely(r != 0)) {
amdgpu_bo_unreserve(abo);
return -EINVAL;
}
}
+ fb_location = amdgpu_bo_gpu_offset(abo);
amdgpu_bo_get_tiling_flags(abo, &tiling_flags);
amdgpu_bo_unreserve(abo);
@@ -1983,6 +1985,17 @@ static int dce_v11_0_crtc_do_set_base(struct drm_crtc *crtc,
/* Greater 8 bpc fb needs to bypass hw-lut to retain precision */
bypass_lut = true;
break;
+ case DRM_FORMAT_XBGR8888:
+ case DRM_FORMAT_ABGR8888:
+ fb_format = REG_SET_FIELD(0, GRPH_CONTROL, GRPH_DEPTH, 2);
+ fb_format = REG_SET_FIELD(fb_format, GRPH_CONTROL, GRPH_FORMAT, 0);
+ fb_swap = REG_SET_FIELD(fb_swap, GRPH_SWAP_CNTL, GRPH_RED_CROSSBAR, 2);
+ fb_swap = REG_SET_FIELD(fb_swap, GRPH_SWAP_CNTL, GRPH_BLUE_CROSSBAR, 2);
+#ifdef __BIG_ENDIAN
+ fb_swap = REG_SET_FIELD(fb_swap, GRPH_SWAP_CNTL, GRPH_ENDIAN_SWAP,
+ ENDIAN_8IN32);
+#endif
+ break;
default:
DRM_ERROR("Unsupported screen format %s\n",
drm_get_format_name(target_fb->format->format, &format_name));
@@ -2449,13 +2462,14 @@ static int dce_v11_0_crtc_cursor_set2(struct drm_crtc *crtc,
return ret;
}
- ret = amdgpu_bo_pin(aobj, AMDGPU_GEM_DOMAIN_VRAM, &amdgpu_crtc->cursor_addr);
+ ret = amdgpu_bo_pin(aobj, AMDGPU_GEM_DOMAIN_VRAM);
amdgpu_bo_unreserve(aobj);
if (ret) {
DRM_ERROR("Failed to pin new cursor BO (%d)\n", ret);
drm_gem_object_put_unlocked(obj);
return ret;
}
+ amdgpu_crtc->cursor_addr = amdgpu_bo_gpu_offset(aobj);
dce_v11_0_lock_cursor(crtc, true);
@@ -2853,19 +2867,19 @@ static int dce_v11_0_sw_init(void *handle)
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
for (i = 0; i < adev->mode_info.num_crtc; i++) {
- r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, i + 1, &adev->crtc_irq);
+ r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, i + 1, &adev->crtc_irq);
if (r)
return r;
}
- for (i = 8; i < 20; i += 2) {
- r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, i, &adev->pageflip_irq);
+ for (i = VISLANDS30_IV_SRCID_D1_GRPH_PFLIP; i < 20; i += 2) {
+ r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, i, &adev->pageflip_irq);
if (r)
return r;
}
/* HPD hotplug */
- r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 42, &adev->hpd_irq);
+ r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, VISLANDS30_IV_SRCID_HOTPLUG_DETECT_A, &adev->hpd_irq);
if (r)
return r;
@@ -3688,8 +3702,7 @@ static const struct amdgpu_display_funcs dce_v11_0_display_funcs = {
static void dce_v11_0_set_display_funcs(struct amdgpu_device *adev)
{
- if (adev->mode_info.funcs == NULL)
- adev->mode_info.funcs = &dce_v11_0_display_funcs;
+ adev->mode_info.funcs = &dce_v11_0_display_funcs;
}
static const struct amdgpu_irq_src_funcs dce_v11_0_crtc_irq_funcs = {
diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c b/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c
index 394cc1e8fe20..17eaaba36017 100644
--- a/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c
@@ -30,6 +30,7 @@
#include "atombios_encoders.h"
#include "amdgpu_pll.h"
#include "amdgpu_connectors.h"
+#include "amdgpu_display.h"
#include "bif/bif_3_0_d.h"
#include "bif/bif_3_0_sh_mask.h"
@@ -1811,15 +1812,14 @@ static int dce_v6_0_crtc_do_set_base(struct drm_crtc *crtc,
if (unlikely(r != 0))
return r;
- if (atomic) {
- fb_location = amdgpu_bo_gpu_offset(abo);
- } else {
- r = amdgpu_bo_pin(abo, AMDGPU_GEM_DOMAIN_VRAM, &fb_location);
+ if (!atomic) {
+ r = amdgpu_bo_pin(abo, AMDGPU_GEM_DOMAIN_VRAM);
if (unlikely(r != 0)) {
amdgpu_bo_unreserve(abo);
return -EINVAL;
}
}
+ fb_location = amdgpu_bo_gpu_offset(abo);
amdgpu_bo_get_tiling_flags(abo, &tiling_flags);
amdgpu_bo_unreserve(abo);
@@ -1888,6 +1888,16 @@ static int dce_v6_0_crtc_do_set_base(struct drm_crtc *crtc,
/* Greater 8 bpc fb needs to bypass hw-lut to retain precision */
bypass_lut = true;
break;
+ case DRM_FORMAT_XBGR8888:
+ case DRM_FORMAT_ABGR8888:
+ fb_format = (GRPH_DEPTH(GRPH_DEPTH_32BPP) |
+ GRPH_FORMAT(GRPH_FORMAT_ARGB8888));
+ fb_swap = (GRPH_RED_CROSSBAR(GRPH_RED_SEL_B) |
+ GRPH_BLUE_CROSSBAR(GRPH_BLUE_SEL_R));
+#ifdef __BIG_ENDIAN
+ fb_swap |= GRPH_ENDIAN_SWAP(GRPH_ENDIAN_8IN32);
+#endif
+ break;
default:
DRM_ERROR("Unsupported screen format %s\n",
drm_get_format_name(target_fb->format->format, &format_name));
@@ -2263,13 +2273,14 @@ static int dce_v6_0_crtc_cursor_set2(struct drm_crtc *crtc,
return ret;
}
- ret = amdgpu_bo_pin(aobj, AMDGPU_GEM_DOMAIN_VRAM, &amdgpu_crtc->cursor_addr);
+ ret = amdgpu_bo_pin(aobj, AMDGPU_GEM_DOMAIN_VRAM);
amdgpu_bo_unreserve(aobj);
if (ret) {
DRM_ERROR("Failed to pin new cursor BO (%d)\n", ret);
drm_gem_object_put_unlocked(obj);
return ret;
}
+ amdgpu_crtc->cursor_addr = amdgpu_bo_gpu_offset(aobj);
dce_v6_0_lock_cursor(crtc, true);
@@ -2605,19 +2616,19 @@ static int dce_v6_0_sw_init(void *handle)
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
for (i = 0; i < adev->mode_info.num_crtc; i++) {
- r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, i + 1, &adev->crtc_irq);
+ r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, i + 1, &adev->crtc_irq);
if (r)
return r;
}
for (i = 8; i < 20; i += 2) {
- r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, i, &adev->pageflip_irq);
+ r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, i, &adev->pageflip_irq);
if (r)
return r;
}
/* HPD hotplug */
- r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 42, &adev->hpd_irq);
+ r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, 42, &adev->hpd_irq);
if (r)
return r;
@@ -3365,8 +3376,7 @@ static const struct amdgpu_display_funcs dce_v6_0_display_funcs = {
static void dce_v6_0_set_display_funcs(struct amdgpu_device *adev)
{
- if (adev->mode_info.funcs == NULL)
- adev->mode_info.funcs = &dce_v6_0_display_funcs;
+ adev->mode_info.funcs = &dce_v6_0_display_funcs;
}
static const struct amdgpu_irq_src_funcs dce_v6_0_crtc_irq_funcs = {
diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c b/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c
index c9b9ab8f1b05..8c0576978d36 100644
--- a/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c
@@ -31,6 +31,7 @@
#include "atombios_encoders.h"
#include "amdgpu_pll.h"
#include "amdgpu_connectors.h"
+#include "amdgpu_display.h"
#include "dce_v8_0.h"
#include "dce/dce_8_0_d.h"
@@ -1786,15 +1787,14 @@ static int dce_v8_0_crtc_do_set_base(struct drm_crtc *crtc,
if (unlikely(r != 0))
return r;
- if (atomic) {
- fb_location = amdgpu_bo_gpu_offset(abo);
- } else {
- r = amdgpu_bo_pin(abo, AMDGPU_GEM_DOMAIN_VRAM, &fb_location);
+ if (!atomic) {
+ r = amdgpu_bo_pin(abo, AMDGPU_GEM_DOMAIN_VRAM);
if (unlikely(r != 0)) {
amdgpu_bo_unreserve(abo);
return -EINVAL;
}
}
+ fb_location = amdgpu_bo_gpu_offset(abo);
amdgpu_bo_get_tiling_flags(abo, &tiling_flags);
amdgpu_bo_unreserve(abo);
@@ -1865,6 +1865,16 @@ static int dce_v8_0_crtc_do_set_base(struct drm_crtc *crtc,
/* Greater 8 bpc fb needs to bypass hw-lut to retain precision */
bypass_lut = true;
break;
+ case DRM_FORMAT_XBGR8888:
+ case DRM_FORMAT_ABGR8888:
+ fb_format = ((GRPH_DEPTH_32BPP << GRPH_CONTROL__GRPH_DEPTH__SHIFT) |
+ (GRPH_FORMAT_ARGB8888 << GRPH_CONTROL__GRPH_FORMAT__SHIFT));
+ fb_swap = ((GRPH_RED_SEL_B << GRPH_SWAP_CNTL__GRPH_RED_CROSSBAR__SHIFT) |
+ (GRPH_BLUE_SEL_R << GRPH_SWAP_CNTL__GRPH_BLUE_CROSSBAR__SHIFT));
+#ifdef __BIG_ENDIAN
+ fb_swap |= (GRPH_ENDIAN_8IN32 << GRPH_SWAP_CNTL__GRPH_ENDIAN_SWAP__SHIFT);
+#endif
+ break;
default:
DRM_ERROR("Unsupported screen format %s\n",
drm_get_format_name(target_fb->format->format, &format_name));
@@ -2274,13 +2284,14 @@ static int dce_v8_0_crtc_cursor_set2(struct drm_crtc *crtc,
return ret;
}
- ret = amdgpu_bo_pin(aobj, AMDGPU_GEM_DOMAIN_VRAM, &amdgpu_crtc->cursor_addr);
+ ret = amdgpu_bo_pin(aobj, AMDGPU_GEM_DOMAIN_VRAM);
amdgpu_bo_unreserve(aobj);
if (ret) {
DRM_ERROR("Failed to pin new cursor BO (%d)\n", ret);
drm_gem_object_put_unlocked(obj);
return ret;
}
+ amdgpu_crtc->cursor_addr = amdgpu_bo_gpu_offset(aobj);
dce_v8_0_lock_cursor(crtc, true);
@@ -2632,19 +2643,19 @@ static int dce_v8_0_sw_init(void *handle)
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
for (i = 0; i < adev->mode_info.num_crtc; i++) {
- r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, i + 1, &adev->crtc_irq);
+ r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, i + 1, &adev->crtc_irq);
if (r)
return r;
}
for (i = 8; i < 20; i += 2) {
- r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, i, &adev->pageflip_irq);
+ r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, i, &adev->pageflip_irq);
if (r)
return r;
}
/* HPD hotplug */
- r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 42, &adev->hpd_irq);
+ r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, 42, &adev->hpd_irq);
if (r)
return r;
@@ -3447,8 +3458,7 @@ static const struct amdgpu_display_funcs dce_v8_0_display_funcs = {
static void dce_v8_0_set_display_funcs(struct amdgpu_device *adev)
{
- if (adev->mode_info.funcs == NULL)
- adev->mode_info.funcs = &dce_v8_0_display_funcs;
+ adev->mode_info.funcs = &dce_v8_0_display_funcs;
}
static const struct amdgpu_irq_src_funcs dce_v8_0_crtc_irq_funcs = {
diff --git a/drivers/gpu/drm/amd/amdgpu/dce_virtual.c b/drivers/gpu/drm/amd/amdgpu/dce_virtual.c
index dbf2ccd0c744..fdace004544d 100644
--- a/drivers/gpu/drm/amd/amdgpu/dce_virtual.c
+++ b/drivers/gpu/drm/amd/amdgpu/dce_virtual.c
@@ -36,6 +36,7 @@
#include "dce_v10_0.h"
#include "dce_v11_0.h"
#include "dce_virtual.h"
+#include "ivsrcid/ivsrcid_vislands30.h"
#define DCE_VIRTUAL_VBLANK_PERIOD 16666666
@@ -269,25 +270,18 @@ static int dce_virtual_early_init(void *handle)
static struct drm_encoder *
dce_virtual_encoder(struct drm_connector *connector)
{
- int enc_id = connector->encoder_ids[0];
struct drm_encoder *encoder;
int i;
- for (i = 0; i < DRM_CONNECTOR_MAX_ENCODER; i++) {
- if (connector->encoder_ids[i] == 0)
- break;
-
- encoder = drm_encoder_find(connector->dev, NULL, connector->encoder_ids[i]);
- if (!encoder)
- continue;
-
+ drm_connector_for_each_possible_encoder(connector, encoder, i) {
if (encoder->encoder_type == DRM_MODE_ENCODER_VIRTUAL)
return encoder;
}
/* pick the first one */
- if (enc_id)
- return drm_encoder_find(connector->dev, NULL, enc_id);
+ drm_connector_for_each_possible_encoder(connector, encoder, i)
+ return encoder;
+
return NULL;
}
@@ -378,7 +372,7 @@ static int dce_virtual_sw_init(void *handle)
int r, i;
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
- r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 229, &adev->crtc_irq);
+ r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, VISLANDS30_IV_SRCID_SMU_DISP_TIMER2_TRIGGER, &adev->crtc_irq);
if (r)
return r;
@@ -634,7 +628,7 @@ static int dce_virtual_connector_encoder_init(struct amdgpu_device *adev,
drm_connector_register(connector);
/* link them */
- drm_mode_connector_attach_encoder(connector, encoder);
+ drm_connector_attach_encoder(connector, encoder);
return 0;
}
@@ -655,8 +649,7 @@ static const struct amdgpu_display_funcs dce_virtual_display_funcs = {
static void dce_virtual_set_display_funcs(struct amdgpu_device *adev)
{
- if (adev->mode_info.funcs == NULL)
- adev->mode_info.funcs = &dce_virtual_display_funcs;
+ adev->mode_info.funcs = &dce_virtual_display_funcs;
}
static int dce_virtual_pageflip(struct amdgpu_device *adev,
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c
index cd6bf291a853..d76eb27945dc 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c
@@ -44,30 +44,30 @@ static void gfx_v6_0_set_ring_funcs(struct amdgpu_device *adev);
static void gfx_v6_0_set_irq_funcs(struct amdgpu_device *adev);
static void gfx_v6_0_get_cu_info(struct amdgpu_device *adev);
-MODULE_FIRMWARE("radeon/tahiti_pfp.bin");
-MODULE_FIRMWARE("radeon/tahiti_me.bin");
-MODULE_FIRMWARE("radeon/tahiti_ce.bin");
-MODULE_FIRMWARE("radeon/tahiti_rlc.bin");
-
-MODULE_FIRMWARE("radeon/pitcairn_pfp.bin");
-MODULE_FIRMWARE("radeon/pitcairn_me.bin");
-MODULE_FIRMWARE("radeon/pitcairn_ce.bin");
-MODULE_FIRMWARE("radeon/pitcairn_rlc.bin");
-
-MODULE_FIRMWARE("radeon/verde_pfp.bin");
-MODULE_FIRMWARE("radeon/verde_me.bin");
-MODULE_FIRMWARE("radeon/verde_ce.bin");
-MODULE_FIRMWARE("radeon/verde_rlc.bin");
-
-MODULE_FIRMWARE("radeon/oland_pfp.bin");
-MODULE_FIRMWARE("radeon/oland_me.bin");
-MODULE_FIRMWARE("radeon/oland_ce.bin");
-MODULE_FIRMWARE("radeon/oland_rlc.bin");
-
-MODULE_FIRMWARE("radeon/hainan_pfp.bin");
-MODULE_FIRMWARE("radeon/hainan_me.bin");
-MODULE_FIRMWARE("radeon/hainan_ce.bin");
-MODULE_FIRMWARE("radeon/hainan_rlc.bin");
+MODULE_FIRMWARE("amdgpu/tahiti_pfp.bin");
+MODULE_FIRMWARE("amdgpu/tahiti_me.bin");
+MODULE_FIRMWARE("amdgpu/tahiti_ce.bin");
+MODULE_FIRMWARE("amdgpu/tahiti_rlc.bin");
+
+MODULE_FIRMWARE("amdgpu/pitcairn_pfp.bin");
+MODULE_FIRMWARE("amdgpu/pitcairn_me.bin");
+MODULE_FIRMWARE("amdgpu/pitcairn_ce.bin");
+MODULE_FIRMWARE("amdgpu/pitcairn_rlc.bin");
+
+MODULE_FIRMWARE("amdgpu/verde_pfp.bin");
+MODULE_FIRMWARE("amdgpu/verde_me.bin");
+MODULE_FIRMWARE("amdgpu/verde_ce.bin");
+MODULE_FIRMWARE("amdgpu/verde_rlc.bin");
+
+MODULE_FIRMWARE("amdgpu/oland_pfp.bin");
+MODULE_FIRMWARE("amdgpu/oland_me.bin");
+MODULE_FIRMWARE("amdgpu/oland_ce.bin");
+MODULE_FIRMWARE("amdgpu/oland_rlc.bin");
+
+MODULE_FIRMWARE("amdgpu/hainan_pfp.bin");
+MODULE_FIRMWARE("amdgpu/hainan_me.bin");
+MODULE_FIRMWARE("amdgpu/hainan_ce.bin");
+MODULE_FIRMWARE("amdgpu/hainan_rlc.bin");
static u32 gfx_v6_0_get_csb_size(struct amdgpu_device *adev);
static void gfx_v6_0_get_csb_buffer(struct amdgpu_device *adev, volatile u32 *buffer);
@@ -335,7 +335,7 @@ static int gfx_v6_0_init_microcode(struct amdgpu_device *adev)
default: BUG();
}
- snprintf(fw_name, sizeof(fw_name), "radeon/%s_pfp.bin", chip_name);
+ snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_pfp.bin", chip_name);
err = request_firmware(&adev->gfx.pfp_fw, fw_name, adev->dev);
if (err)
goto out;
@@ -346,7 +346,7 @@ static int gfx_v6_0_init_microcode(struct amdgpu_device *adev)
adev->gfx.pfp_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
adev->gfx.pfp_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
- snprintf(fw_name, sizeof(fw_name), "radeon/%s_me.bin", chip_name);
+ snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_me.bin", chip_name);
err = request_firmware(&adev->gfx.me_fw, fw_name, adev->dev);
if (err)
goto out;
@@ -357,7 +357,7 @@ static int gfx_v6_0_init_microcode(struct amdgpu_device *adev)
adev->gfx.me_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
adev->gfx.me_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
- snprintf(fw_name, sizeof(fw_name), "radeon/%s_ce.bin", chip_name);
+ snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ce.bin", chip_name);
err = request_firmware(&adev->gfx.ce_fw, fw_name, adev->dev);
if (err)
goto out;
@@ -368,7 +368,7 @@ static int gfx_v6_0_init_microcode(struct amdgpu_device *adev)
adev->gfx.ce_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
adev->gfx.ce_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
- snprintf(fw_name, sizeof(fw_name), "radeon/%s_rlc.bin", chip_name);
+ snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc.bin", chip_name);
err = request_firmware(&adev->gfx.rlc_fw, fw_name, adev->dev);
if (err)
goto out;
@@ -1552,7 +1552,7 @@ static void gfx_v6_0_config_init(struct amdgpu_device *adev)
adev->gfx.config.double_offchip_lds_buf = 0;
}
-static void gfx_v6_0_gpu_init(struct amdgpu_device *adev)
+static void gfx_v6_0_constants_init(struct amdgpu_device *adev)
{
u32 gb_addr_config = 0;
u32 mc_shared_chmap, mc_arb_ramcfg;
@@ -3094,15 +3094,15 @@ static int gfx_v6_0_sw_init(void *handle)
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
int i, r;
- r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 181, &adev->gfx.eop_irq);
+ r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, 181, &adev->gfx.eop_irq);
if (r)
return r;
- r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 184, &adev->gfx.priv_reg_irq);
+ r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, 184, &adev->gfx.priv_reg_irq);
if (r)
return r;
- r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 185, &adev->gfx.priv_inst_irq);
+ r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, 185, &adev->gfx.priv_inst_irq);
if (r)
return r;
@@ -3175,7 +3175,7 @@ static int gfx_v6_0_hw_init(void *handle)
int r;
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
- gfx_v6_0_gpu_init(adev);
+ gfx_v6_0_constants_init(adev);
r = gfx_v6_0_rlc_resume(adev);
if (r)
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
index 42b6144c1fd5..0e72bc09939a 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
@@ -57,36 +57,36 @@ static void gfx_v7_0_set_ring_funcs(struct amdgpu_device *adev);
static void gfx_v7_0_set_irq_funcs(struct amdgpu_device *adev);
static void gfx_v7_0_set_gds_init(struct amdgpu_device *adev);
-MODULE_FIRMWARE("radeon/bonaire_pfp.bin");
-MODULE_FIRMWARE("radeon/bonaire_me.bin");
-MODULE_FIRMWARE("radeon/bonaire_ce.bin");
-MODULE_FIRMWARE("radeon/bonaire_rlc.bin");
-MODULE_FIRMWARE("radeon/bonaire_mec.bin");
-
-MODULE_FIRMWARE("radeon/hawaii_pfp.bin");
-MODULE_FIRMWARE("radeon/hawaii_me.bin");
-MODULE_FIRMWARE("radeon/hawaii_ce.bin");
-MODULE_FIRMWARE("radeon/hawaii_rlc.bin");
-MODULE_FIRMWARE("radeon/hawaii_mec.bin");
-
-MODULE_FIRMWARE("radeon/kaveri_pfp.bin");
-MODULE_FIRMWARE("radeon/kaveri_me.bin");
-MODULE_FIRMWARE("radeon/kaveri_ce.bin");
-MODULE_FIRMWARE("radeon/kaveri_rlc.bin");
-MODULE_FIRMWARE("radeon/kaveri_mec.bin");
-MODULE_FIRMWARE("radeon/kaveri_mec2.bin");
-
-MODULE_FIRMWARE("radeon/kabini_pfp.bin");
-MODULE_FIRMWARE("radeon/kabini_me.bin");
-MODULE_FIRMWARE("radeon/kabini_ce.bin");
-MODULE_FIRMWARE("radeon/kabini_rlc.bin");
-MODULE_FIRMWARE("radeon/kabini_mec.bin");
-
-MODULE_FIRMWARE("radeon/mullins_pfp.bin");
-MODULE_FIRMWARE("radeon/mullins_me.bin");
-MODULE_FIRMWARE("radeon/mullins_ce.bin");
-MODULE_FIRMWARE("radeon/mullins_rlc.bin");
-MODULE_FIRMWARE("radeon/mullins_mec.bin");
+MODULE_FIRMWARE("amdgpu/bonaire_pfp.bin");
+MODULE_FIRMWARE("amdgpu/bonaire_me.bin");
+MODULE_FIRMWARE("amdgpu/bonaire_ce.bin");
+MODULE_FIRMWARE("amdgpu/bonaire_rlc.bin");
+MODULE_FIRMWARE("amdgpu/bonaire_mec.bin");
+
+MODULE_FIRMWARE("amdgpu/hawaii_pfp.bin");
+MODULE_FIRMWARE("amdgpu/hawaii_me.bin");
+MODULE_FIRMWARE("amdgpu/hawaii_ce.bin");
+MODULE_FIRMWARE("amdgpu/hawaii_rlc.bin");
+MODULE_FIRMWARE("amdgpu/hawaii_mec.bin");
+
+MODULE_FIRMWARE("amdgpu/kaveri_pfp.bin");
+MODULE_FIRMWARE("amdgpu/kaveri_me.bin");
+MODULE_FIRMWARE("amdgpu/kaveri_ce.bin");
+MODULE_FIRMWARE("amdgpu/kaveri_rlc.bin");
+MODULE_FIRMWARE("amdgpu/kaveri_mec.bin");
+MODULE_FIRMWARE("amdgpu/kaveri_mec2.bin");
+
+MODULE_FIRMWARE("amdgpu/kabini_pfp.bin");
+MODULE_FIRMWARE("amdgpu/kabini_me.bin");
+MODULE_FIRMWARE("amdgpu/kabini_ce.bin");
+MODULE_FIRMWARE("amdgpu/kabini_rlc.bin");
+MODULE_FIRMWARE("amdgpu/kabini_mec.bin");
+
+MODULE_FIRMWARE("amdgpu/mullins_pfp.bin");
+MODULE_FIRMWARE("amdgpu/mullins_me.bin");
+MODULE_FIRMWARE("amdgpu/mullins_ce.bin");
+MODULE_FIRMWARE("amdgpu/mullins_rlc.bin");
+MODULE_FIRMWARE("amdgpu/mullins_mec.bin");
static const struct amdgpu_gds_reg_offset amdgpu_gds_reg_offset[] =
{
@@ -925,7 +925,7 @@ static int gfx_v7_0_init_microcode(struct amdgpu_device *adev)
default: BUG();
}
- snprintf(fw_name, sizeof(fw_name), "radeon/%s_pfp.bin", chip_name);
+ snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_pfp.bin", chip_name);
err = request_firmware(&adev->gfx.pfp_fw, fw_name, adev->dev);
if (err)
goto out;
@@ -933,7 +933,7 @@ static int gfx_v7_0_init_microcode(struct amdgpu_device *adev)
if (err)
goto out;
- snprintf(fw_name, sizeof(fw_name), "radeon/%s_me.bin", chip_name);
+ snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_me.bin", chip_name);
err = request_firmware(&adev->gfx.me_fw, fw_name, adev->dev);
if (err)
goto out;
@@ -941,7 +941,7 @@ static int gfx_v7_0_init_microcode(struct amdgpu_device *adev)
if (err)
goto out;
- snprintf(fw_name, sizeof(fw_name), "radeon/%s_ce.bin", chip_name);
+ snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ce.bin", chip_name);
err = request_firmware(&adev->gfx.ce_fw, fw_name, adev->dev);
if (err)
goto out;
@@ -949,7 +949,7 @@ static int gfx_v7_0_init_microcode(struct amdgpu_device *adev)
if (err)
goto out;
- snprintf(fw_name, sizeof(fw_name), "radeon/%s_mec.bin", chip_name);
+ snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec.bin", chip_name);
err = request_firmware(&adev->gfx.mec_fw, fw_name, adev->dev);
if (err)
goto out;
@@ -958,7 +958,7 @@ static int gfx_v7_0_init_microcode(struct amdgpu_device *adev)
goto out;
if (adev->asic_type == CHIP_KAVERI) {
- snprintf(fw_name, sizeof(fw_name), "radeon/%s_mec2.bin", chip_name);
+ snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec2.bin", chip_name);
err = request_firmware(&adev->gfx.mec2_fw, fw_name, adev->dev);
if (err)
goto out;
@@ -967,7 +967,7 @@ static int gfx_v7_0_init_microcode(struct amdgpu_device *adev)
goto out;
}
- snprintf(fw_name, sizeof(fw_name), "radeon/%s_rlc.bin", chip_name);
+ snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc.bin", chip_name);
err = request_firmware(&adev->gfx.rlc_fw, fw_name, adev->dev);
if (err)
goto out;
@@ -1886,14 +1886,14 @@ static void gfx_v7_0_config_init(struct amdgpu_device *adev)
}
/**
- * gfx_v7_0_gpu_init - setup the 3D engine
+ * gfx_v7_0_constants_init - setup the 3D engine
*
* @adev: amdgpu_device pointer
*
- * Configures the 3D engine and tiling configuration
- * registers so that the 3D engine is usable.
+ * init the gfx constants such as the 3D engine, tiling configuration
+ * registers, maximum number of quad pipes, render backends...
*/
-static void gfx_v7_0_gpu_init(struct amdgpu_device *adev)
+static void gfx_v7_0_constants_init(struct amdgpu_device *adev)
{
u32 sh_mem_cfg, sh_static_mem_cfg, sh_mem_base;
u32 tmp;
@@ -4170,15 +4170,6 @@ static void gfx_v7_0_ring_emit_gds_switch(struct amdgpu_ring *ring,
uint32_t gws_base, uint32_t gws_size,
uint32_t oa_base, uint32_t oa_size)
{
- gds_base = gds_base >> AMDGPU_GDS_SHIFT;
- gds_size = gds_size >> AMDGPU_GDS_SHIFT;
-
- gws_base = gws_base >> AMDGPU_GWS_SHIFT;
- gws_size = gws_size >> AMDGPU_GWS_SHIFT;
-
- oa_base = oa_base >> AMDGPU_OA_SHIFT;
- oa_size = oa_size >> AMDGPU_OA_SHIFT;
-
/* GDS Base */
amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
@@ -4212,6 +4203,18 @@ static void gfx_v7_0_ring_emit_gds_switch(struct amdgpu_ring *ring,
amdgpu_ring_write(ring, (1 << (oa_size + oa_base)) - (1 << oa_base));
}
+static void gfx_v7_0_ring_soft_recovery(struct amdgpu_ring *ring, unsigned vmid)
+{
+ struct amdgpu_device *adev = ring->adev;
+ uint32_t value = 0;
+
+ value = REG_SET_FIELD(value, SQ_CMD, CMD, 0x03);
+ value = REG_SET_FIELD(value, SQ_CMD, MODE, 0x01);
+ value = REG_SET_FIELD(value, SQ_CMD, CHECK_VMID, 1);
+ value = REG_SET_FIELD(value, SQ_CMD, VM_ID, vmid);
+ WREG32(mmSQ_CMD, value);
+}
+
static uint32_t wave_read_ind(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t address)
{
WREG32(mmSQ_IND_INDEX,
@@ -4513,18 +4516,18 @@ static int gfx_v7_0_sw_init(void *handle)
adev->gfx.mec.num_queue_per_pipe = 8;
/* EOP Event */
- r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 181, &adev->gfx.eop_irq);
+ r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, 181, &adev->gfx.eop_irq);
if (r)
return r;
/* Privileged reg */
- r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 184,
+ r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, 184,
&adev->gfx.priv_reg_irq);
if (r)
return r;
/* Privileged inst */
- r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 185,
+ r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, 185,
&adev->gfx.priv_inst_irq);
if (r)
return r;
@@ -4579,25 +4582,6 @@ static int gfx_v7_0_sw_init(void *handle)
}
}
- /* reserve GDS, GWS and OA resource for gfx */
- r = amdgpu_bo_create_kernel(adev, adev->gds.mem.gfx_partition_size,
- PAGE_SIZE, AMDGPU_GEM_DOMAIN_GDS,
- &adev->gds.gds_gfx_bo, NULL, NULL);
- if (r)
- return r;
-
- r = amdgpu_bo_create_kernel(adev, adev->gds.gws.gfx_partition_size,
- PAGE_SIZE, AMDGPU_GEM_DOMAIN_GWS,
- &adev->gds.gws_gfx_bo, NULL, NULL);
- if (r)
- return r;
-
- r = amdgpu_bo_create_kernel(adev, adev->gds.oa.gfx_partition_size,
- PAGE_SIZE, AMDGPU_GEM_DOMAIN_OA,
- &adev->gds.oa_gfx_bo, NULL, NULL);
- if (r)
- return r;
-
adev->gfx.ce_ram_size = 0x8000;
gfx_v7_0_gpu_early_init(adev);
@@ -4640,7 +4624,7 @@ static int gfx_v7_0_hw_init(void *handle)
int r;
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
- gfx_v7_0_gpu_init(adev);
+ gfx_v7_0_constants_init(adev);
/* init rlc */
r = gfx_v7_0_rlc_resume(adev);
@@ -5088,6 +5072,7 @@ static const struct amdgpu_ring_funcs gfx_v7_0_ring_funcs_gfx = {
.pad_ib = amdgpu_ring_generic_pad_ib,
.emit_cntxcntl = gfx_v7_ring_emit_cntxcntl,
.emit_wreg = gfx_v7_0_ring_emit_wreg,
+ .soft_recovery = gfx_v7_0_ring_soft_recovery,
};
static const struct amdgpu_ring_funcs gfx_v7_0_ring_funcs_compute = {
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
index 818874b13c99..3d0f277a6523 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
@@ -51,6 +51,8 @@
#include "smu/smu_7_1_3_d.h"
+#include "ivsrcid/ivsrcid_vislands30.h"
+
#define GFX8_NUM_GFX_RINGS 1
#define GFX8_MEC_HPD_SIZE 2048
@@ -704,6 +706,17 @@ static const u32 stoney_mgcg_cgcg_init[] =
mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96940200,
};
+
+static const char * const sq_edc_source_names[] = {
+ "SQ_EDC_INFO_SOURCE_INVALID: No EDC error has occurred",
+ "SQ_EDC_INFO_SOURCE_INST: EDC source is Instruction Fetch",
+ "SQ_EDC_INFO_SOURCE_SGPR: EDC source is SGPR or SQC data return",
+ "SQ_EDC_INFO_SOURCE_VGPR: EDC source is VGPR",
+ "SQ_EDC_INFO_SOURCE_LDS: EDC source is LDS",
+ "SQ_EDC_INFO_SOURCE_GDS: EDC source is GDS",
+ "SQ_EDC_INFO_SOURCE_TA: EDC source is TA",
+};
+
static void gfx_v8_0_set_ring_funcs(struct amdgpu_device *adev);
static void gfx_v8_0_set_irq_funcs(struct amdgpu_device *adev);
static void gfx_v8_0_set_gds_init(struct amdgpu_device *adev);
@@ -866,26 +879,32 @@ static int gfx_v8_0_ring_test_ib(struct amdgpu_ring *ring, long timeout)
struct amdgpu_device *adev = ring->adev;
struct amdgpu_ib ib;
struct dma_fence *f = NULL;
- uint32_t scratch;
- uint32_t tmp = 0;
+
+ unsigned int index;
+ uint64_t gpu_addr;
+ uint32_t tmp;
long r;
- r = amdgpu_gfx_scratch_get(adev, &scratch);
+ r = amdgpu_device_wb_get(adev, &index);
if (r) {
- DRM_ERROR("amdgpu: failed to get scratch reg (%ld).\n", r);
+ dev_err(adev->dev, "(%ld) failed to allocate wb slot\n", r);
return r;
}
- WREG32(scratch, 0xCAFEDEAD);
+
+ gpu_addr = adev->wb.gpu_addr + (index * 4);
+ adev->wb.wb[index] = cpu_to_le32(0xCAFEDEAD);
memset(&ib, 0, sizeof(ib));
- r = amdgpu_ib_get(adev, NULL, 256, &ib);
+ r = amdgpu_ib_get(adev, NULL, 16, &ib);
if (r) {
DRM_ERROR("amdgpu: failed to get ib (%ld).\n", r);
goto err1;
}
- ib.ptr[0] = PACKET3(PACKET3_SET_UCONFIG_REG, 1);
- ib.ptr[1] = ((scratch - PACKET3_SET_UCONFIG_REG_START));
- ib.ptr[2] = 0xDEADBEEF;
- ib.length_dw = 3;
+ ib.ptr[0] = PACKET3(PACKET3_WRITE_DATA, 3);
+ ib.ptr[1] = WRITE_DATA_DST_SEL(5) | WR_CONFIRM;
+ ib.ptr[2] = lower_32_bits(gpu_addr);
+ ib.ptr[3] = upper_32_bits(gpu_addr);
+ ib.ptr[4] = 0xDEADBEEF;
+ ib.length_dw = 5;
r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f);
if (r)
@@ -900,20 +919,21 @@ static int gfx_v8_0_ring_test_ib(struct amdgpu_ring *ring, long timeout)
DRM_ERROR("amdgpu: fence wait failed (%ld).\n", r);
goto err2;
}
- tmp = RREG32(scratch);
+
+ tmp = adev->wb.wb[index];
if (tmp == 0xDEADBEEF) {
DRM_DEBUG("ib test on ring %d succeeded\n", ring->idx);
r = 0;
} else {
- DRM_ERROR("amdgpu: ib test failed (scratch(0x%04X)=0x%08X)\n",
- scratch, tmp);
+ DRM_ERROR("ib test on ring %d failed\n", ring->idx);
r = -EINVAL;
}
+
err2:
amdgpu_ib_free(adev, &ib, NULL);
dma_fence_put(f);
err1:
- amdgpu_gfx_scratch_free(adev, scratch);
+ amdgpu_device_wb_free(adev, index);
return r;
}
@@ -1094,14 +1114,14 @@ static int gfx_v8_0_init_microcode(struct amdgpu_device *adev)
tmp = (unsigned int *)((uintptr_t)rlc_hdr +
le32_to_cpu(rlc_hdr->reg_list_format_array_offset_bytes));
- for (i = 0 ; i < (rlc_hdr->reg_list_format_size_bytes >> 2); i++)
+ for (i = 0 ; i < (adev->gfx.rlc.reg_list_format_size_bytes >> 2); i++)
adev->gfx.rlc.register_list_format[i] = le32_to_cpu(tmp[i]);
adev->gfx.rlc.register_restore = adev->gfx.rlc.register_list_format + i;
tmp = (unsigned int *)((uintptr_t)rlc_hdr +
le32_to_cpu(rlc_hdr->reg_list_array_offset_bytes));
- for (i = 0 ; i < (rlc_hdr->reg_list_size_bytes >> 2); i++)
+ for (i = 0 ; i < (adev->gfx.rlc.reg_list_size_bytes >> 2); i++)
adev->gfx.rlc.register_restore[i] = le32_to_cpu(tmp[i]);
if (adev->asic_type >= CHIP_POLARIS10 && adev->asic_type <= CHIP_POLARIS12) {
@@ -1153,64 +1173,61 @@ static int gfx_v8_0_init_microcode(struct amdgpu_device *adev)
}
}
- if (adev->firmware.load_type == AMDGPU_FW_LOAD_SMU) {
- info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_PFP];
- info->ucode_id = AMDGPU_UCODE_ID_CP_PFP;
- info->fw = adev->gfx.pfp_fw;
- header = (const struct common_firmware_header *)info->fw->data;
- adev->firmware.fw_size +=
- ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
-
- info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_ME];
- info->ucode_id = AMDGPU_UCODE_ID_CP_ME;
- info->fw = adev->gfx.me_fw;
- header = (const struct common_firmware_header *)info->fw->data;
- adev->firmware.fw_size +=
- ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
-
- info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_CE];
- info->ucode_id = AMDGPU_UCODE_ID_CP_CE;
- info->fw = adev->gfx.ce_fw;
- header = (const struct common_firmware_header *)info->fw->data;
- adev->firmware.fw_size +=
- ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
+ info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_PFP];
+ info->ucode_id = AMDGPU_UCODE_ID_CP_PFP;
+ info->fw = adev->gfx.pfp_fw;
+ header = (const struct common_firmware_header *)info->fw->data;
+ adev->firmware.fw_size +=
+ ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
+
+ info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_ME];
+ info->ucode_id = AMDGPU_UCODE_ID_CP_ME;
+ info->fw = adev->gfx.me_fw;
+ header = (const struct common_firmware_header *)info->fw->data;
+ adev->firmware.fw_size +=
+ ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
+
+ info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_CE];
+ info->ucode_id = AMDGPU_UCODE_ID_CP_CE;
+ info->fw = adev->gfx.ce_fw;
+ header = (const struct common_firmware_header *)info->fw->data;
+ adev->firmware.fw_size +=
+ ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
+
+ info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_G];
+ info->ucode_id = AMDGPU_UCODE_ID_RLC_G;
+ info->fw = adev->gfx.rlc_fw;
+ header = (const struct common_firmware_header *)info->fw->data;
+ adev->firmware.fw_size +=
+ ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
+
+ info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC1];
+ info->ucode_id = AMDGPU_UCODE_ID_CP_MEC1;
+ info->fw = adev->gfx.mec_fw;
+ header = (const struct common_firmware_header *)info->fw->data;
+ adev->firmware.fw_size +=
+ ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
+
+ /* we need account JT in */
+ cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
+ adev->firmware.fw_size +=
+ ALIGN(le32_to_cpu(cp_hdr->jt_size) << 2, PAGE_SIZE);
- info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_G];
- info->ucode_id = AMDGPU_UCODE_ID_RLC_G;
- info->fw = adev->gfx.rlc_fw;
- header = (const struct common_firmware_header *)info->fw->data;
+ if (amdgpu_sriov_vf(adev)) {
+ info = &adev->firmware.ucode[AMDGPU_UCODE_ID_STORAGE];
+ info->ucode_id = AMDGPU_UCODE_ID_STORAGE;
+ info->fw = adev->gfx.mec_fw;
adev->firmware.fw_size +=
- ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
+ ALIGN(le32_to_cpu(64 * PAGE_SIZE), PAGE_SIZE);
+ }
- info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC1];
- info->ucode_id = AMDGPU_UCODE_ID_CP_MEC1;
- info->fw = adev->gfx.mec_fw;
+ if (adev->gfx.mec2_fw) {
+ info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC2];
+ info->ucode_id = AMDGPU_UCODE_ID_CP_MEC2;
+ info->fw = adev->gfx.mec2_fw;
header = (const struct common_firmware_header *)info->fw->data;
adev->firmware.fw_size +=
ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
-
- /* we need account JT in */
- cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
- adev->firmware.fw_size +=
- ALIGN(le32_to_cpu(cp_hdr->jt_size) << 2, PAGE_SIZE);
-
- if (amdgpu_sriov_vf(adev)) {
- info = &adev->firmware.ucode[AMDGPU_UCODE_ID_STORAGE];
- info->ucode_id = AMDGPU_UCODE_ID_STORAGE;
- info->fw = adev->gfx.mec_fw;
- adev->firmware.fw_size +=
- ALIGN(le32_to_cpu(64 * PAGE_SIZE), PAGE_SIZE);
- }
-
- if (adev->gfx.mec2_fw) {
- info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC2];
- info->ucode_id = AMDGPU_UCODE_ID_CP_MEC2;
- info->fw = adev->gfx.mec2_fw;
- header = (const struct common_firmware_header *)info->fw->data;
- adev->firmware.fw_size +=
- ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
- }
-
}
out:
@@ -1999,6 +2016,8 @@ static int gfx_v8_0_compute_ring_init(struct amdgpu_device *adev, int ring_id,
return 0;
}
+static void gfx_v8_0_sq_irq_work_func(struct work_struct *work);
+
static int gfx_v8_0_sw_init(void *handle)
{
int i, j, k, r, ring_id;
@@ -2026,28 +2045,39 @@ static int gfx_v8_0_sw_init(void *handle)
adev->gfx.mec.num_pipe_per_mec = 4;
adev->gfx.mec.num_queue_per_pipe = 8;
- /* KIQ event */
- r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 178, &adev->gfx.kiq.irq);
- if (r)
- return r;
-
/* EOP Event */
- r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 181, &adev->gfx.eop_irq);
+ r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, VISLANDS30_IV_SRCID_CP_END_OF_PIPE, &adev->gfx.eop_irq);
if (r)
return r;
/* Privileged reg */
- r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 184,
+ r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, VISLANDS30_IV_SRCID_CP_PRIV_REG_FAULT,
&adev->gfx.priv_reg_irq);
if (r)
return r;
/* Privileged inst */
- r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 185,
+ r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, VISLANDS30_IV_SRCID_CP_PRIV_INSTR_FAULT,
&adev->gfx.priv_inst_irq);
if (r)
return r;
+ /* Add CP EDC/ECC irq */
+ r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, VISLANDS30_IV_SRCID_CP_ECC_ERROR,
+ &adev->gfx.cp_ecc_error_irq);
+ if (r)
+ return r;
+
+ /* SQ interrupts. */
+ r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, VISLANDS30_IV_SRCID_SQ_INTERRUPT_MSG,
+ &adev->gfx.sq_irq);
+ if (r) {
+ DRM_ERROR("amdgpu_irq_add() for SQ failed: %d\n", r);
+ return r;
+ }
+
+ INIT_WORK(&adev->gfx.sq_work.work, gfx_v8_0_sq_irq_work_func);
+
adev->gfx.gfx_current_status = AMDGPU_GFX_NORMAL_MODE;
gfx_v8_0_scratch_init(adev);
@@ -2123,25 +2153,6 @@ static int gfx_v8_0_sw_init(void *handle)
if (r)
return r;
- /* reserve GDS, GWS and OA resource for gfx */
- r = amdgpu_bo_create_kernel(adev, adev->gds.mem.gfx_partition_size,
- PAGE_SIZE, AMDGPU_GEM_DOMAIN_GDS,
- &adev->gds.gds_gfx_bo, NULL, NULL);
- if (r)
- return r;
-
- r = amdgpu_bo_create_kernel(adev, adev->gds.gws.gfx_partition_size,
- PAGE_SIZE, AMDGPU_GEM_DOMAIN_GWS,
- &adev->gds.gws_gfx_bo, NULL, NULL);
- if (r)
- return r;
-
- r = amdgpu_bo_create_kernel(adev, adev->gds.oa.gfx_partition_size,
- PAGE_SIZE, AMDGPU_GEM_DOMAIN_OA,
- &adev->gds.oa_gfx_bo, NULL, NULL);
- if (r)
- return r;
-
adev->gfx.ce_ram_size = 0x8000;
r = gfx_v8_0_gpu_early_init(adev);
@@ -3816,7 +3827,7 @@ static void gfx_v8_0_config_init(struct amdgpu_device *adev)
}
}
-static void gfx_v8_0_gpu_init(struct amdgpu_device *adev)
+static void gfx_v8_0_constants_init(struct amdgpu_device *adev)
{
u32 tmp, sh_static_mem_cfg;
int i;
@@ -4162,65 +4173,11 @@ static void gfx_v8_0_rlc_start(struct amdgpu_device *adev)
udelay(50);
}
-static int gfx_v8_0_rlc_load_microcode(struct amdgpu_device *adev)
-{
- const struct rlc_firmware_header_v2_0 *hdr;
- const __le32 *fw_data;
- unsigned i, fw_size;
-
- if (!adev->gfx.rlc_fw)
- return -EINVAL;
-
- hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
- amdgpu_ucode_print_rlc_hdr(&hdr->header);
-
- fw_data = (const __le32 *)(adev->gfx.rlc_fw->data +
- le32_to_cpu(hdr->header.ucode_array_offset_bytes));
- fw_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
-
- WREG32(mmRLC_GPM_UCODE_ADDR, 0);
- for (i = 0; i < fw_size; i++)
- WREG32(mmRLC_GPM_UCODE_DATA, le32_to_cpup(fw_data++));
- WREG32(mmRLC_GPM_UCODE_ADDR, adev->gfx.rlc_fw_version);
-
- return 0;
-}
-
static int gfx_v8_0_rlc_resume(struct amdgpu_device *adev)
{
- int r;
- u32 tmp;
-
gfx_v8_0_rlc_stop(adev);
-
- /* disable CG */
- tmp = RREG32(mmRLC_CGCG_CGLS_CTRL);
- tmp &= ~(RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK |
- RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK);
- WREG32(mmRLC_CGCG_CGLS_CTRL, tmp);
- if (adev->asic_type == CHIP_POLARIS11 ||
- adev->asic_type == CHIP_POLARIS10 ||
- adev->asic_type == CHIP_POLARIS12 ||
- adev->asic_type == CHIP_VEGAM) {
- tmp = RREG32(mmRLC_CGCG_CGLS_CTRL_3D);
- tmp &= ~0x3;
- WREG32(mmRLC_CGCG_CGLS_CTRL_3D, tmp);
- }
-
- /* disable PG */
- WREG32(mmRLC_PG_CNTL, 0);
-
gfx_v8_0_rlc_reset(adev);
gfx_v8_0_init_pg(adev);
-
-
- if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) {
- /* legacy rlc firmware loading */
- r = gfx_v8_0_rlc_load_microcode(adev);
- if (r)
- return r;
- }
-
gfx_v8_0_rlc_start(adev);
return 0;
@@ -4246,63 +4203,6 @@ static void gfx_v8_0_cp_gfx_enable(struct amdgpu_device *adev, bool enable)
udelay(50);
}
-static int gfx_v8_0_cp_gfx_load_microcode(struct amdgpu_device *adev)
-{
- const struct gfx_firmware_header_v1_0 *pfp_hdr;
- const struct gfx_firmware_header_v1_0 *ce_hdr;
- const struct gfx_firmware_header_v1_0 *me_hdr;
- const __le32 *fw_data;
- unsigned i, fw_size;
-
- if (!adev->gfx.me_fw || !adev->gfx.pfp_fw || !adev->gfx.ce_fw)
- return -EINVAL;
-
- pfp_hdr = (const struct gfx_firmware_header_v1_0 *)
- adev->gfx.pfp_fw->data;
- ce_hdr = (const struct gfx_firmware_header_v1_0 *)
- adev->gfx.ce_fw->data;
- me_hdr = (const struct gfx_firmware_header_v1_0 *)
- adev->gfx.me_fw->data;
-
- amdgpu_ucode_print_gfx_hdr(&pfp_hdr->header);
- amdgpu_ucode_print_gfx_hdr(&ce_hdr->header);
- amdgpu_ucode_print_gfx_hdr(&me_hdr->header);
-
- gfx_v8_0_cp_gfx_enable(adev, false);
-
- /* PFP */
- fw_data = (const __le32 *)
- (adev->gfx.pfp_fw->data +
- le32_to_cpu(pfp_hdr->header.ucode_array_offset_bytes));
- fw_size = le32_to_cpu(pfp_hdr->header.ucode_size_bytes) / 4;
- WREG32(mmCP_PFP_UCODE_ADDR, 0);
- for (i = 0; i < fw_size; i++)
- WREG32(mmCP_PFP_UCODE_DATA, le32_to_cpup(fw_data++));
- WREG32(mmCP_PFP_UCODE_ADDR, adev->gfx.pfp_fw_version);
-
- /* CE */
- fw_data = (const __le32 *)
- (adev->gfx.ce_fw->data +
- le32_to_cpu(ce_hdr->header.ucode_array_offset_bytes));
- fw_size = le32_to_cpu(ce_hdr->header.ucode_size_bytes) / 4;
- WREG32(mmCP_CE_UCODE_ADDR, 0);
- for (i = 0; i < fw_size; i++)
- WREG32(mmCP_CE_UCODE_DATA, le32_to_cpup(fw_data++));
- WREG32(mmCP_CE_UCODE_ADDR, adev->gfx.ce_fw_version);
-
- /* ME */
- fw_data = (const __le32 *)
- (adev->gfx.me_fw->data +
- le32_to_cpu(me_hdr->header.ucode_array_offset_bytes));
- fw_size = le32_to_cpu(me_hdr->header.ucode_size_bytes) / 4;
- WREG32(mmCP_ME_RAM_WADDR, 0);
- for (i = 0; i < fw_size; i++)
- WREG32(mmCP_ME_RAM_DATA, le32_to_cpup(fw_data++));
- WREG32(mmCP_ME_RAM_WADDR, adev->gfx.me_fw_version);
-
- return 0;
-}
-
static u32 gfx_v8_0_get_csb_size(struct amdgpu_device *adev)
{
u32 count = 0;
@@ -4502,52 +4402,6 @@ static void gfx_v8_0_cp_compute_enable(struct amdgpu_device *adev, bool enable)
udelay(50);
}
-static int gfx_v8_0_cp_compute_load_microcode(struct amdgpu_device *adev)
-{
- const struct gfx_firmware_header_v1_0 *mec_hdr;
- const __le32 *fw_data;
- unsigned i, fw_size;
-
- if (!adev->gfx.mec_fw)
- return -EINVAL;
-
- gfx_v8_0_cp_compute_enable(adev, false);
-
- mec_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
- amdgpu_ucode_print_gfx_hdr(&mec_hdr->header);
-
- fw_data = (const __le32 *)
- (adev->gfx.mec_fw->data +
- le32_to_cpu(mec_hdr->header.ucode_array_offset_bytes));
- fw_size = le32_to_cpu(mec_hdr->header.ucode_size_bytes) / 4;
-
- /* MEC1 */
- WREG32(mmCP_MEC_ME1_UCODE_ADDR, 0);
- for (i = 0; i < fw_size; i++)
- WREG32(mmCP_MEC_ME1_UCODE_DATA, le32_to_cpup(fw_data+i));
- WREG32(mmCP_MEC_ME1_UCODE_ADDR, adev->gfx.mec_fw_version);
-
- /* Loading MEC2 firmware is only necessary if MEC2 should run different microcode than MEC1. */
- if (adev->gfx.mec2_fw) {
- const struct gfx_firmware_header_v1_0 *mec2_hdr;
-
- mec2_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec2_fw->data;
- amdgpu_ucode_print_gfx_hdr(&mec2_hdr->header);
-
- fw_data = (const __le32 *)
- (adev->gfx.mec2_fw->data +
- le32_to_cpu(mec2_hdr->header.ucode_array_offset_bytes));
- fw_size = le32_to_cpu(mec2_hdr->header.ucode_size_bytes) / 4;
-
- WREG32(mmCP_MEC_ME2_UCODE_ADDR, 0);
- for (i = 0; i < fw_size; i++)
- WREG32(mmCP_MEC_ME2_UCODE_DATA, le32_to_cpup(fw_data+i));
- WREG32(mmCP_MEC_ME2_UCODE_ADDR, adev->gfx.mec2_fw_version);
- }
-
- return 0;
-}
-
/* KIQ functions */
static void gfx_v8_0_kiq_setting(struct amdgpu_ring *ring)
{
@@ -4566,7 +4420,6 @@ static void gfx_v8_0_kiq_setting(struct amdgpu_ring *ring)
static int gfx_v8_0_kiq_kcq_enable(struct amdgpu_device *adev)
{
struct amdgpu_ring *kiq_ring = &adev->gfx.kiq.ring;
- uint32_t scratch, tmp = 0;
uint64_t queue_mask = 0;
int r, i;
@@ -4585,17 +4438,9 @@ static int gfx_v8_0_kiq_kcq_enable(struct amdgpu_device *adev)
queue_mask |= (1ull << i);
}
- r = amdgpu_gfx_scratch_get(adev, &scratch);
- if (r) {
- DRM_ERROR("Failed to get scratch reg (%d).\n", r);
- return r;
- }
- WREG32(scratch, 0xCAFEDEAD);
-
- r = amdgpu_ring_alloc(kiq_ring, (8 * adev->gfx.num_compute_rings) + 11);
+ r = amdgpu_ring_alloc(kiq_ring, (8 * adev->gfx.num_compute_rings) + 8);
if (r) {
DRM_ERROR("Failed to lock KIQ (%d).\n", r);
- amdgpu_gfx_scratch_free(adev, scratch);
return r;
}
/* set resources */
@@ -4627,25 +4472,12 @@ static int gfx_v8_0_kiq_kcq_enable(struct amdgpu_device *adev)
amdgpu_ring_write(kiq_ring, lower_32_bits(wptr_addr));
amdgpu_ring_write(kiq_ring, upper_32_bits(wptr_addr));
}
- /* write to scratch for completion */
- amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
- amdgpu_ring_write(kiq_ring, (scratch - PACKET3_SET_UCONFIG_REG_START));
- amdgpu_ring_write(kiq_ring, 0xDEADBEEF);
- amdgpu_ring_commit(kiq_ring);
- for (i = 0; i < adev->usec_timeout; i++) {
- tmp = RREG32(scratch);
- if (tmp == 0xDEADBEEF)
- break;
- DRM_UDELAY(1);
- }
- if (i >= adev->usec_timeout) {
- DRM_ERROR("KCQ enable failed (scratch(0x%04X)=0x%08X)\n",
- scratch, tmp);
- r = -EINVAL;
+ r = amdgpu_ring_test_ring(kiq_ring);
+ if (r) {
+ DRM_ERROR("KCQ enable failed\n");
+ kiq_ring->ready = false;
}
- amdgpu_gfx_scratch_free(adev, scratch);
-
return r;
}
@@ -4895,7 +4727,7 @@ static int gfx_v8_0_kcq_init_queue(struct amdgpu_ring *ring)
struct vi_mqd *mqd = ring->mqd_ptr;
int mqd_idx = ring - &adev->gfx.compute_ring[0];
- if (!adev->in_gpu_reset && !adev->gfx.in_suspend) {
+ if (!adev->in_gpu_reset && !adev->in_suspend) {
memset((void *)mqd, 0, sizeof(struct vi_mqd_allocation));
((struct vi_mqd_allocation *)mqd)->dynamic_cu_mask = 0xFFFFFFFF;
((struct vi_mqd_allocation *)mqd)->dynamic_rb_mask = 0xFFFFFFFF;
@@ -4932,26 +4764,33 @@ static void gfx_v8_0_set_mec_doorbell_range(struct amdgpu_device *adev)
static int gfx_v8_0_kiq_resume(struct amdgpu_device *adev)
{
- struct amdgpu_ring *ring = NULL;
- int r = 0, i;
-
- gfx_v8_0_cp_compute_enable(adev, true);
+ struct amdgpu_ring *ring;
+ int r;
ring = &adev->gfx.kiq.ring;
r = amdgpu_bo_reserve(ring->mqd_obj, false);
if (unlikely(r != 0))
- goto done;
+ return r;
r = amdgpu_bo_kmap(ring->mqd_obj, &ring->mqd_ptr);
- if (!r) {
- r = gfx_v8_0_kiq_init_queue(ring);
- amdgpu_bo_kunmap(ring->mqd_obj);
- ring->mqd_ptr = NULL;
- }
+ if (unlikely(r != 0))
+ return r;
+
+ gfx_v8_0_kiq_init_queue(ring);
+ amdgpu_bo_kunmap(ring->mqd_obj);
+ ring->mqd_ptr = NULL;
amdgpu_bo_unreserve(ring->mqd_obj);
- if (r)
- goto done;
+ ring->ready = true;
+ return 0;
+}
+
+static int gfx_v8_0_kcq_resume(struct amdgpu_device *adev)
+{
+ struct amdgpu_ring *ring = NULL;
+ int r = 0, i;
+
+ gfx_v8_0_cp_compute_enable(adev, true);
for (i = 0; i < adev->gfx.num_compute_rings; i++) {
ring = &adev->gfx.compute_ring[i];
@@ -4976,15 +4815,6 @@ static int gfx_v8_0_kiq_resume(struct amdgpu_device *adev)
if (r)
goto done;
- /* Test KIQ */
- ring = &adev->gfx.kiq.ring;
- ring->ready = true;
- r = amdgpu_ring_test_ring(ring);
- if (r) {
- ring->ready = false;
- goto done;
- }
-
/* Test KCQs */
for (i = 0; i < adev->gfx.num_compute_rings; i++) {
ring = &adev->gfx.compute_ring[i];
@@ -5005,25 +4835,17 @@ static int gfx_v8_0_cp_resume(struct amdgpu_device *adev)
if (!(adev->flags & AMD_IS_APU))
gfx_v8_0_enable_gui_idle_interrupt(adev, false);
- if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) {
- /* legacy firmware loading */
- r = gfx_v8_0_cp_gfx_load_microcode(adev);
- if (r)
- return r;
-
- r = gfx_v8_0_cp_compute_load_microcode(adev);
- if (r)
- return r;
- }
+ r = gfx_v8_0_kiq_resume(adev);
+ if (r)
+ return r;
r = gfx_v8_0_cp_gfx_resume(adev);
if (r)
return r;
- r = gfx_v8_0_kiq_resume(adev);
+ r = gfx_v8_0_kcq_resume(adev);
if (r)
return r;
-
gfx_v8_0_enable_gui_idle_interrupt(adev, true);
return 0;
@@ -5041,7 +4863,7 @@ static int gfx_v8_0_hw_init(void *handle)
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
gfx_v8_0_init_golden_registers(adev);
- gfx_v8_0_gpu_init(adev);
+ gfx_v8_0_constants_init(adev);
r = gfx_v8_0_rlc_resume(adev);
if (r)
@@ -5052,113 +4874,74 @@ static int gfx_v8_0_hw_init(void *handle)
return r;
}
-static int gfx_v8_0_kcq_disable(struct amdgpu_ring *kiq_ring,struct amdgpu_ring *ring)
+static int gfx_v8_0_kcq_disable(struct amdgpu_device *adev)
{
- struct amdgpu_device *adev = kiq_ring->adev;
- uint32_t scratch, tmp = 0;
int r, i;
+ struct amdgpu_ring *kiq_ring = &adev->gfx.kiq.ring;
- r = amdgpu_gfx_scratch_get(adev, &scratch);
- if (r) {
- DRM_ERROR("Failed to get scratch reg (%d).\n", r);
- return r;
- }
- WREG32(scratch, 0xCAFEDEAD);
-
- r = amdgpu_ring_alloc(kiq_ring, 10);
- if (r) {
+ r = amdgpu_ring_alloc(kiq_ring, 6 * adev->gfx.num_compute_rings);
+ if (r)
DRM_ERROR("Failed to lock KIQ (%d).\n", r);
- amdgpu_gfx_scratch_free(adev, scratch);
- return r;
- }
- /* unmap queues */
- amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_UNMAP_QUEUES, 4));
- amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */
+ for (i = 0; i < adev->gfx.num_compute_rings; i++) {
+ struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
+
+ amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_UNMAP_QUEUES, 4));
+ amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */
PACKET3_UNMAP_QUEUES_ACTION(1) | /* RESET_QUEUES */
PACKET3_UNMAP_QUEUES_QUEUE_SEL(0) |
PACKET3_UNMAP_QUEUES_ENGINE_SEL(0) |
PACKET3_UNMAP_QUEUES_NUM_QUEUES(1));
- amdgpu_ring_write(kiq_ring, PACKET3_UNMAP_QUEUES_DOORBELL_OFFSET0(ring->doorbell_index));
- amdgpu_ring_write(kiq_ring, 0);
- amdgpu_ring_write(kiq_ring, 0);
- amdgpu_ring_write(kiq_ring, 0);
- /* write to scratch for completion */
- amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
- amdgpu_ring_write(kiq_ring, (scratch - PACKET3_SET_UCONFIG_REG_START));
- amdgpu_ring_write(kiq_ring, 0xDEADBEEF);
- amdgpu_ring_commit(kiq_ring);
-
- for (i = 0; i < adev->usec_timeout; i++) {
- tmp = RREG32(scratch);
- if (tmp == 0xDEADBEEF)
- break;
- DRM_UDELAY(1);
- }
- if (i >= adev->usec_timeout) {
- DRM_ERROR("KCQ disabled failed (scratch(0x%04X)=0x%08X)\n", scratch, tmp);
- r = -EINVAL;
+ amdgpu_ring_write(kiq_ring, PACKET3_UNMAP_QUEUES_DOORBELL_OFFSET0(ring->doorbell_index));
+ amdgpu_ring_write(kiq_ring, 0);
+ amdgpu_ring_write(kiq_ring, 0);
+ amdgpu_ring_write(kiq_ring, 0);
}
- amdgpu_gfx_scratch_free(adev, scratch);
+ r = amdgpu_ring_test_ring(kiq_ring);
+ if (r)
+ DRM_ERROR("KCQ disable failed\n");
+
return r;
}
-static int gfx_v8_0_hw_fini(void *handle)
+static bool gfx_v8_0_is_idle(void *handle)
{
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
- int i;
-
- amdgpu_irq_put(adev, &adev->gfx.priv_reg_irq, 0);
- amdgpu_irq_put(adev, &adev->gfx.priv_inst_irq, 0);
-
- /* disable KCQ to avoid CPC touch memory not valid anymore */
- for (i = 0; i < adev->gfx.num_compute_rings; i++)
- gfx_v8_0_kcq_disable(&adev->gfx.kiq.ring, &adev->gfx.compute_ring[i]);
- if (amdgpu_sriov_vf(adev)) {
- pr_debug("For SRIOV client, shouldn't do anything.\n");
- return 0;
- }
- gfx_v8_0_cp_enable(adev, false);
- gfx_v8_0_rlc_stop(adev);
-
- amdgpu_device_ip_set_powergating_state(adev,
- AMD_IP_BLOCK_TYPE_GFX,
- AMD_PG_STATE_UNGATE);
-
- return 0;
+ if (REG_GET_FIELD(RREG32(mmGRBM_STATUS), GRBM_STATUS, GUI_ACTIVE)
+ || RREG32(mmGRBM_STATUS2) != 0x8)
+ return false;
+ else
+ return true;
}
-static int gfx_v8_0_suspend(void *handle)
+static bool gfx_v8_0_rlc_is_idle(void *handle)
{
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
- adev->gfx.in_suspend = true;
- return gfx_v8_0_hw_fini(adev);
-}
-static int gfx_v8_0_resume(void *handle)
-{
- int r;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
-
- r = gfx_v8_0_hw_init(adev);
- adev->gfx.in_suspend = false;
- return r;
+ if (RREG32(mmGRBM_STATUS2) != 0x8)
+ return false;
+ else
+ return true;
}
-static bool gfx_v8_0_is_idle(void *handle)
+static int gfx_v8_0_wait_for_rlc_idle(void *handle)
{
+ unsigned int i;
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
- if (REG_GET_FIELD(RREG32(mmGRBM_STATUS), GRBM_STATUS, GUI_ACTIVE))
- return false;
- else
- return true;
+ for (i = 0; i < adev->usec_timeout; i++) {
+ if (gfx_v8_0_rlc_is_idle(handle))
+ return 0;
+
+ udelay(1);
+ }
+ return -ETIMEDOUT;
}
static int gfx_v8_0_wait_for_idle(void *handle)
{
- unsigned i;
+ unsigned int i;
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
for (i = 0; i < adev->usec_timeout; i++) {
@@ -5170,6 +4953,47 @@ static int gfx_v8_0_wait_for_idle(void *handle)
return -ETIMEDOUT;
}
+static int gfx_v8_0_hw_fini(void *handle)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ amdgpu_irq_put(adev, &adev->gfx.priv_reg_irq, 0);
+ amdgpu_irq_put(adev, &adev->gfx.priv_inst_irq, 0);
+
+ amdgpu_irq_put(adev, &adev->gfx.cp_ecc_error_irq, 0);
+
+ amdgpu_irq_put(adev, &adev->gfx.sq_irq, 0);
+
+ /* disable KCQ to avoid CPC touch memory not valid anymore */
+ gfx_v8_0_kcq_disable(adev);
+
+ if (amdgpu_sriov_vf(adev)) {
+ pr_debug("For SRIOV client, shouldn't do anything.\n");
+ return 0;
+ }
+ adev->gfx.rlc.funcs->enter_safe_mode(adev);
+ if (!gfx_v8_0_wait_for_idle(adev))
+ gfx_v8_0_cp_enable(adev, false);
+ else
+ pr_err("cp is busy, skip halt cp\n");
+ if (!gfx_v8_0_wait_for_rlc_idle(adev))
+ gfx_v8_0_rlc_stop(adev);
+ else
+ pr_err("rlc is busy, skip halt rlc\n");
+ adev->gfx.rlc.funcs->exit_safe_mode(adev);
+ return 0;
+}
+
+static int gfx_v8_0_suspend(void *handle)
+{
+ return gfx_v8_0_hw_fini(handle);
+}
+
+static int gfx_v8_0_resume(void *handle)
+{
+ return gfx_v8_0_hw_init(handle);
+}
+
static bool gfx_v8_0_check_soft_reset(void *handle)
{
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
@@ -5349,10 +5173,6 @@ static int gfx_v8_0_post_soft_reset(void *handle)
srbm_soft_reset = adev->gfx.srbm_soft_reset;
if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) ||
- REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_GFX))
- gfx_v8_0_cp_gfx_resume(adev);
-
- if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) ||
REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPF) ||
REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPC) ||
REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPG)) {
@@ -5368,7 +5188,13 @@ static int gfx_v8_0_post_soft_reset(void *handle)
mutex_unlock(&adev->srbm_mutex);
}
gfx_v8_0_kiq_resume(adev);
+ gfx_v8_0_kcq_resume(adev);
}
+
+ if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) ||
+ REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_GFX))
+ gfx_v8_0_cp_gfx_resume(adev);
+
gfx_v8_0_rlc_start(adev);
return 0;
@@ -5400,15 +5226,6 @@ static void gfx_v8_0_ring_emit_gds_switch(struct amdgpu_ring *ring,
uint32_t gws_base, uint32_t gws_size,
uint32_t oa_base, uint32_t oa_size)
{
- gds_base = gds_base >> AMDGPU_GDS_SHIFT;
- gds_size = gds_size >> AMDGPU_GDS_SHIFT;
-
- gws_base = gws_base >> AMDGPU_GWS_SHIFT;
- gws_size = gws_size >> AMDGPU_GWS_SHIFT;
-
- oa_base = oa_base >> AMDGPU_OA_SHIFT;
- oa_size = oa_size >> AMDGPU_OA_SHIFT;
-
/* GDS Base */
amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
@@ -5542,9 +5359,19 @@ static int gfx_v8_0_late_init(void *handle)
if (r)
return r;
- amdgpu_device_ip_set_powergating_state(adev,
- AMD_IP_BLOCK_TYPE_GFX,
- AMD_PG_STATE_GATE);
+ r = amdgpu_irq_get(adev, &adev->gfx.cp_ecc_error_irq, 0);
+ if (r) {
+ DRM_ERROR("amdgpu_irq_get() failed to get IRQ for EDC, r: %d.\n", r);
+ return r;
+ }
+
+ r = amdgpu_irq_get(adev, &adev->gfx.sq_irq, 0);
+ if (r) {
+ DRM_ERROR(
+ "amdgpu_irq_get() failed to get IRQ for SQ, r: %d.\n",
+ r);
+ return r;
+ }
return 0;
}
@@ -5552,14 +5379,12 @@ static int gfx_v8_0_late_init(void *handle)
static void gfx_v8_0_enable_gfx_static_mg_power_gating(struct amdgpu_device *adev,
bool enable)
{
- if ((adev->asic_type == CHIP_POLARIS11) ||
+ if (((adev->asic_type == CHIP_POLARIS11) ||
(adev->asic_type == CHIP_POLARIS12) ||
- (adev->asic_type == CHIP_VEGAM))
+ (adev->asic_type == CHIP_VEGAM)) &&
+ adev->powerplay.pp_funcs->set_powergating_by_smu)
/* Send msg to SMU via Powerplay */
- amdgpu_device_ip_set_powergating_state(adev,
- AMD_IP_BLOCK_TYPE_SMC,
- enable ?
- AMD_PG_STATE_GATE : AMD_PG_STATE_UNGATE);
+ amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_GFX, enable);
WREG32_FIELD(RLC_PG_CNTL, STATIC_PER_CU_PG_ENABLE, enable ? 1 : 0);
}
@@ -5614,6 +5439,11 @@ static int gfx_v8_0_set_powergating_state(void *handle,
if (amdgpu_sriov_vf(adev))
return 0;
+ if (adev->pg_flags & (AMD_PG_SUPPORT_GFX_SMG |
+ AMD_PG_SUPPORT_RLC_SMU_HS |
+ AMD_PG_SUPPORT_CP |
+ AMD_PG_SUPPORT_GFX_DMG))
+ adev->gfx.rlc.funcs->enter_safe_mode(adev);
switch (adev->asic_type) {
case CHIP_CARRIZO:
case CHIP_STONEY:
@@ -5663,7 +5493,11 @@ static int gfx_v8_0_set_powergating_state(void *handle,
default:
break;
}
-
+ if (adev->pg_flags & (AMD_PG_SUPPORT_GFX_SMG |
+ AMD_PG_SUPPORT_RLC_SMU_HS |
+ AMD_PG_SUPPORT_CP |
+ AMD_PG_SUPPORT_GFX_DMG))
+ adev->gfx.rlc.funcs->exit_safe_mode(adev);
return 0;
}
@@ -6668,6 +6502,18 @@ static void gfx_v8_0_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t reg,
amdgpu_ring_write(ring, val);
}
+static void gfx_v8_0_ring_soft_recovery(struct amdgpu_ring *ring, unsigned vmid)
+{
+ struct amdgpu_device *adev = ring->adev;
+ uint32_t value = 0;
+
+ value = REG_SET_FIELD(value, SQ_CMD, CMD, 0x03);
+ value = REG_SET_FIELD(value, SQ_CMD, MODE, 0x01);
+ value = REG_SET_FIELD(value, SQ_CMD, CHECK_VMID, 1);
+ value = REG_SET_FIELD(value, SQ_CMD, VM_ID, vmid);
+ WREG32(mmSQ_CMD, value);
+}
+
static void gfx_v8_0_set_gfx_eop_interrupt_state(struct amdgpu_device *adev,
enum amdgpu_interrupt_state state)
{
@@ -6787,6 +6633,77 @@ static int gfx_v8_0_set_eop_interrupt_state(struct amdgpu_device *adev,
return 0;
}
+static int gfx_v8_0_set_cp_ecc_int_state(struct amdgpu_device *adev,
+ struct amdgpu_irq_src *source,
+ unsigned int type,
+ enum amdgpu_interrupt_state state)
+{
+ int enable_flag;
+
+ switch (state) {
+ case AMDGPU_IRQ_STATE_DISABLE:
+ enable_flag = 0;
+ break;
+
+ case AMDGPU_IRQ_STATE_ENABLE:
+ enable_flag = 1;
+ break;
+
+ default:
+ return -EINVAL;
+ }
+
+ WREG32_FIELD(CP_INT_CNTL, CP_ECC_ERROR_INT_ENABLE, enable_flag);
+ WREG32_FIELD(CP_INT_CNTL_RING0, CP_ECC_ERROR_INT_ENABLE, enable_flag);
+ WREG32_FIELD(CP_INT_CNTL_RING1, CP_ECC_ERROR_INT_ENABLE, enable_flag);
+ WREG32_FIELD(CP_INT_CNTL_RING2, CP_ECC_ERROR_INT_ENABLE, enable_flag);
+ WREG32_FIELD(CPC_INT_CNTL, CP_ECC_ERROR_INT_ENABLE, enable_flag);
+ WREG32_FIELD(CP_ME1_PIPE0_INT_CNTL, CP_ECC_ERROR_INT_ENABLE,
+ enable_flag);
+ WREG32_FIELD(CP_ME1_PIPE1_INT_CNTL, CP_ECC_ERROR_INT_ENABLE,
+ enable_flag);
+ WREG32_FIELD(CP_ME1_PIPE2_INT_CNTL, CP_ECC_ERROR_INT_ENABLE,
+ enable_flag);
+ WREG32_FIELD(CP_ME1_PIPE3_INT_CNTL, CP_ECC_ERROR_INT_ENABLE,
+ enable_flag);
+ WREG32_FIELD(CP_ME2_PIPE0_INT_CNTL, CP_ECC_ERROR_INT_ENABLE,
+ enable_flag);
+ WREG32_FIELD(CP_ME2_PIPE1_INT_CNTL, CP_ECC_ERROR_INT_ENABLE,
+ enable_flag);
+ WREG32_FIELD(CP_ME2_PIPE2_INT_CNTL, CP_ECC_ERROR_INT_ENABLE,
+ enable_flag);
+ WREG32_FIELD(CP_ME2_PIPE3_INT_CNTL, CP_ECC_ERROR_INT_ENABLE,
+ enable_flag);
+
+ return 0;
+}
+
+static int gfx_v8_0_set_sq_int_state(struct amdgpu_device *adev,
+ struct amdgpu_irq_src *source,
+ unsigned int type,
+ enum amdgpu_interrupt_state state)
+{
+ int enable_flag;
+
+ switch (state) {
+ case AMDGPU_IRQ_STATE_DISABLE:
+ enable_flag = 1;
+ break;
+
+ case AMDGPU_IRQ_STATE_ENABLE:
+ enable_flag = 0;
+ break;
+
+ default:
+ return -EINVAL;
+ }
+
+ WREG32_FIELD(SQ_INTERRUPT_MSG_CTRL, STALL,
+ enable_flag);
+
+ return 0;
+}
+
static int gfx_v8_0_eop_irq(struct amdgpu_device *adev,
struct amdgpu_irq_src *source,
struct amdgpu_iv_entry *entry)
@@ -6837,49 +6754,111 @@ static int gfx_v8_0_priv_inst_irq(struct amdgpu_device *adev,
return 0;
}
-static int gfx_v8_0_kiq_set_interrupt_state(struct amdgpu_device *adev,
- struct amdgpu_irq_src *src,
- unsigned int type,
- enum amdgpu_interrupt_state state)
+static int gfx_v8_0_cp_ecc_error_irq(struct amdgpu_device *adev,
+ struct amdgpu_irq_src *source,
+ struct amdgpu_iv_entry *entry)
{
- struct amdgpu_ring *ring = &(adev->gfx.kiq.ring);
+ DRM_ERROR("CP EDC/ECC error detected.");
+ return 0;
+}
- switch (type) {
- case AMDGPU_CP_KIQ_IRQ_DRIVER0:
- WREG32_FIELD(CPC_INT_CNTL, GENERIC2_INT_ENABLE,
- state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1);
- if (ring->me == 1)
- WREG32_FIELD_OFFSET(CP_ME1_PIPE0_INT_CNTL,
- ring->pipe,
- GENERIC2_INT_ENABLE,
- state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1);
- else
- WREG32_FIELD_OFFSET(CP_ME2_PIPE0_INT_CNTL,
- ring->pipe,
- GENERIC2_INT_ENABLE,
- state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1);
- break;
- default:
- BUG(); /* kiq only support GENERIC2_INT now */
- break;
+static void gfx_v8_0_parse_sq_irq(struct amdgpu_device *adev, unsigned ih_data)
+{
+ u32 enc, se_id, sh_id, cu_id;
+ char type[20];
+ int sq_edc_source = -1;
+
+ enc = REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_CMN, ENCODING);
+ se_id = REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_CMN, SE_ID);
+
+ switch (enc) {
+ case 0:
+ DRM_INFO("SQ general purpose intr detected:"
+ "se_id %d, immed_overflow %d, host_reg_overflow %d,"
+ "host_cmd_overflow %d, cmd_timestamp %d,"
+ "reg_timestamp %d, thread_trace_buff_full %d,"
+ "wlt %d, thread_trace %d.\n",
+ se_id,
+ REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_AUTO, IMMED_OVERFLOW),
+ REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_AUTO, HOST_REG_OVERFLOW),
+ REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_AUTO, HOST_CMD_OVERFLOW),
+ REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_AUTO, CMD_TIMESTAMP),
+ REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_AUTO, REG_TIMESTAMP),
+ REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_AUTO, THREAD_TRACE_BUF_FULL),
+ REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_AUTO, WLT),
+ REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_AUTO, THREAD_TRACE)
+ );
+ break;
+ case 1:
+ case 2:
+
+ cu_id = REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_WAVE, CU_ID);
+ sh_id = REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_WAVE, SH_ID);
+
+ /*
+ * This function can be called either directly from ISR
+ * or from BH in which case we can access SQ_EDC_INFO
+ * instance
+ */
+ if (in_task()) {
+ mutex_lock(&adev->grbm_idx_mutex);
+ gfx_v8_0_select_se_sh(adev, se_id, sh_id, cu_id);
+
+ sq_edc_source = REG_GET_FIELD(RREG32(mmSQ_EDC_INFO), SQ_EDC_INFO, SOURCE);
+
+ gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
+ mutex_unlock(&adev->grbm_idx_mutex);
+ }
+
+ if (enc == 1)
+ sprintf(type, "instruction intr");
+ else
+ sprintf(type, "EDC/ECC error");
+
+ DRM_INFO(
+ "SQ %s detected: "
+ "se_id %d, sh_id %d, cu_id %d, simd_id %d, wave_id %d, vm_id %d "
+ "trap %s, sq_ed_info.source %s.\n",
+ type, se_id, sh_id, cu_id,
+ REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_WAVE, SIMD_ID),
+ REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_WAVE, WAVE_ID),
+ REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_WAVE, VM_ID),
+ REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_WAVE, PRIV) ? "true" : "false",
+ (sq_edc_source != -1) ? sq_edc_source_names[sq_edc_source] : "unavailable"
+ );
+ break;
+ default:
+ DRM_ERROR("SQ invalid encoding type\n.");
}
- return 0;
}
-static int gfx_v8_0_kiq_irq(struct amdgpu_device *adev,
- struct amdgpu_irq_src *source,
- struct amdgpu_iv_entry *entry)
+static void gfx_v8_0_sq_irq_work_func(struct work_struct *work)
{
- u8 me_id, pipe_id, queue_id;
- struct amdgpu_ring *ring = &(adev->gfx.kiq.ring);
- me_id = (entry->ring_id & 0x0c) >> 2;
- pipe_id = (entry->ring_id & 0x03) >> 0;
- queue_id = (entry->ring_id & 0x70) >> 4;
- DRM_DEBUG("IH: CPC GENERIC2_INT, me:%d, pipe:%d, queue:%d\n",
- me_id, pipe_id, queue_id);
+ struct amdgpu_device *adev = container_of(work, struct amdgpu_device, gfx.sq_work.work);
+ struct sq_work *sq_work = container_of(work, struct sq_work, work);
+
+ gfx_v8_0_parse_sq_irq(adev, sq_work->ih_data);
+}
+
+static int gfx_v8_0_sq_irq(struct amdgpu_device *adev,
+ struct amdgpu_irq_src *source,
+ struct amdgpu_iv_entry *entry)
+{
+ unsigned ih_data = entry->src_data[0];
+
+ /*
+ * Try to submit work so SQ_EDC_INFO can be accessed from
+ * BH. If previous work submission hasn't finished yet
+ * just print whatever info is possible directly from the ISR.
+ */
+ if (work_pending(&adev->gfx.sq_work.work)) {
+ gfx_v8_0_parse_sq_irq(adev, ih_data);
+ } else {
+ adev->gfx.sq_work.ih_data = ih_data;
+ schedule_work(&adev->gfx.sq_work.work);
+ }
- amdgpu_fence_process(ring);
return 0;
}
@@ -6946,6 +6925,7 @@ static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_gfx = {
.init_cond_exec = gfx_v8_0_ring_emit_init_cond_exec,
.patch_cond_exec = gfx_v8_0_ring_emit_patch_cond_exec,
.emit_wreg = gfx_v8_0_ring_emit_wreg,
+ .soft_recovery = gfx_v8_0_ring_soft_recovery,
};
static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_compute = {
@@ -7032,9 +7012,14 @@ static const struct amdgpu_irq_src_funcs gfx_v8_0_priv_inst_irq_funcs = {
.process = gfx_v8_0_priv_inst_irq,
};
-static const struct amdgpu_irq_src_funcs gfx_v8_0_kiq_irq_funcs = {
- .set = gfx_v8_0_kiq_set_interrupt_state,
- .process = gfx_v8_0_kiq_irq,
+static const struct amdgpu_irq_src_funcs gfx_v8_0_cp_ecc_error_irq_funcs = {
+ .set = gfx_v8_0_set_cp_ecc_int_state,
+ .process = gfx_v8_0_cp_ecc_error_irq,
+};
+
+static const struct amdgpu_irq_src_funcs gfx_v8_0_sq_irq_funcs = {
+ .set = gfx_v8_0_set_sq_int_state,
+ .process = gfx_v8_0_sq_irq,
};
static void gfx_v8_0_set_irq_funcs(struct amdgpu_device *adev)
@@ -7048,8 +7033,11 @@ static void gfx_v8_0_set_irq_funcs(struct amdgpu_device *adev)
adev->gfx.priv_inst_irq.num_types = 1;
adev->gfx.priv_inst_irq.funcs = &gfx_v8_0_priv_inst_irq_funcs;
- adev->gfx.kiq.irq.num_types = AMDGPU_CP_KIQ_IRQ_LAST;
- adev->gfx.kiq.irq.funcs = &gfx_v8_0_kiq_irq_funcs;
+ adev->gfx.cp_ecc_error_irq.num_types = 1;
+ adev->gfx.cp_ecc_error_irq.funcs = &gfx_v8_0_cp_ecc_error_irq_funcs;
+
+ adev->gfx.sq_irq.num_types = 1;
+ adev->gfx.sq_irq.funcs = &gfx_v8_0_sq_irq_funcs;
}
static void gfx_v8_0_set_rlc_funcs(struct amdgpu_device *adev)
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
index a69153435ea7..6d7baf59d6e1 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
@@ -38,6 +38,8 @@
#include "clearstate_gfx9.h"
#include "v9_structs.h"
+#include "ivsrcid/gfx/irqsrcs_gfx_9_0.h"
+
#define GFX9_NUM_GFX_RINGS 1
#define GFX9_MEC_HPD_SIZE 2048
#define RLCG_UCODE_LOADING_START_ADDRESS 0x00002000L
@@ -78,9 +80,24 @@ MODULE_FIRMWARE("amdgpu/raven_mec.bin");
MODULE_FIRMWARE("amdgpu/raven_mec2.bin");
MODULE_FIRMWARE("amdgpu/raven_rlc.bin");
+MODULE_FIRMWARE("amdgpu/picasso_ce.bin");
+MODULE_FIRMWARE("amdgpu/picasso_pfp.bin");
+MODULE_FIRMWARE("amdgpu/picasso_me.bin");
+MODULE_FIRMWARE("amdgpu/picasso_mec.bin");
+MODULE_FIRMWARE("amdgpu/picasso_mec2.bin");
+MODULE_FIRMWARE("amdgpu/picasso_rlc.bin");
+
+MODULE_FIRMWARE("amdgpu/raven2_ce.bin");
+MODULE_FIRMWARE("amdgpu/raven2_pfp.bin");
+MODULE_FIRMWARE("amdgpu/raven2_me.bin");
+MODULE_FIRMWARE("amdgpu/raven2_mec.bin");
+MODULE_FIRMWARE("amdgpu/raven2_mec2.bin");
+MODULE_FIRMWARE("amdgpu/raven2_rlc.bin");
+
static const struct soc15_reg_golden golden_settings_gc_9_0[] =
{
- SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0xf00fffff, 0x00000420),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0xf00fffff, 0x00000400),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG3, 0x80000000, 0x80000000),
SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_GPU_ID, 0x0000000f, 0x00000000),
SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_BINNER_EVENT_CNTL_3, 0x00000003, 0x82400024),
SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE, 0x3fffffff, 0x00000001),
@@ -102,11 +119,25 @@ static const struct soc15_reg_golden golden_settings_gc_9_0_vg10[] =
{
SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0x0000f000, 0x00012107),
SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_3, 0x30000000, 0x10000000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPC_UTCL1_CNTL, 0x08000000, 0x08000080),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPF_UTCL1_CNTL, 0x08000000, 0x08000080),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPG_UTCL1_CNTL, 0x08000000, 0x08000080),
SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x2a114042),
SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xffff77ff, 0x2a114042),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmIA_UTCL1_CNTL, 0x08000000, 0x08000080),
SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0x00008000, 0x00048000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_0, 0x08000000, 0x08000080),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_1, 0x08000000, 0x08000080),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_2, 0x08000000, 0x08000080),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_PREWALKER_UTCL1_CNTL, 0x08000000, 0x08000080),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_UTCL1_CNTL, 0x08000000, 0x08000080),
SOC15_REG_GOLDEN_VALUE(GC, 0, mmRMI_UTCL1_CNTL2, 0x00030000, 0x00020000),
- SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x00001800, 0x00000800)
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_CONFIG_CNTL_1, 0x0000000f, 0x01000107),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x00001800, 0x00000800),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmWD_UTCL1_CNTL, 0x08000000, 0x08000080),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC1_F32_INT_DIS, 0x00000000, 0x00000800),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC2_F32_INT_DIS, 0x00000000, 0x00000800),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_DEBUG, 0x00000000, 0x00008000)
};
static const struct soc15_reg_golden golden_settings_gc_9_0_vg20[] =
@@ -146,7 +177,10 @@ static const struct soc15_reg_golden golden_settings_gc_9_1[] =
SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00003120),
SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_CACHE_INVALIDATION, 0x3fff3af3, 0x19200000),
SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_GS_MAX_WAVE_ID, 0x00000fff, 0x000000ff),
- SOC15_REG_GOLDEN_VALUE(GC, 0, mmWD_UTCL1_CNTL, 0x08000000, 0x08000080)
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmWD_UTCL1_CNTL, 0x08000000, 0x08000080),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC1_F32_INT_DIS, 0x00000000, 0x00000800),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC2_F32_INT_DIS, 0x00000000, 0x00000800),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_DEBUG, 0x00000000, 0x00008000)
};
static const struct soc15_reg_golden golden_settings_gc_9_1_rv1[] =
@@ -160,6 +194,29 @@ static const struct soc15_reg_golden golden_settings_gc_9_1_rv1[] =
SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x01bd9f33, 0x00000800)
};
+static const struct soc15_reg_golden golden_settings_gc_9_1_rv2[] =
+{
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_DCC_CONFIG, 0xff7fffff, 0x04000000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0xfffdf3cf, 0x00014104),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_2, 0xff7fffff, 0x0a000000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPC_UTCL1_CNTL, 0x7f0fffff, 0x08000080),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPF_UTCL1_CNTL, 0xff8fffff, 0x08000080),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPG_UTCL1_CNTL, 0x7f8fffff, 0x08000080),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x26013041),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xffff77ff, 0x26013041),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmIA_UTCL1_CNTL, 0x3f8fffff, 0x08000080),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xffffffff, 0x04040000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_0, 0xff0fffff, 0x08000080),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_1, 0xff0fffff, 0x08000080),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_2, 0xff0fffff, 0x08000080),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_PREWALKER_UTCL1_CNTL, 0xff0fffff, 0x08000080),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_UTCL1_CNTL, 0xff0fffff, 0x08000080),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00000010),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x01bd9f33, 0x01000000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmWD_UTCL1_CNTL, 0x3f8fffff, 0x08000080),
+};
+
static const struct soc15_reg_golden golden_settings_gc_9_x_common[] =
{
SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_CAM_INDEX, 0xffffffff, 0x00000000),
@@ -197,7 +254,10 @@ static const struct soc15_reg_golden golden_settings_gc_9_2_1_vg12[] =
SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_CONFIG_CNTL_1, 0xffff03ff, 0x01000107),
SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000),
SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0x76325410),
- SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x01bd9f33, 0x01000000)
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x01bd9f33, 0x01000000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC1_F32_INT_DIS, 0x00000000, 0x00000800),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC2_F32_INT_DIS, 0x00000000, 0x00000800),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_DEBUG, 0x00000000, 0x00008000)
};
static const u32 GFX_RLC_SRM_INDEX_CNTL_ADDR_OFFSETS[] =
@@ -227,6 +287,7 @@ static const u32 GFX_RLC_SRM_INDEX_CNTL_DATA_OFFSETS[] =
#define VEGA10_GB_ADDR_CONFIG_GOLDEN 0x2a114042
#define VEGA12_GB_ADDR_CONFIG_GOLDEN 0x24104041
#define RAVEN_GB_ADDR_CONFIG_GOLDEN 0x24000042
+#define RAVEN2_GB_ADDR_CONFIG_GOLDEN 0x26013041
static void gfx_v9_0_set_ring_funcs(struct amdgpu_device *adev);
static void gfx_v9_0_set_irq_funcs(struct amdgpu_device *adev);
@@ -266,12 +327,16 @@ static void gfx_v9_0_init_golden_registers(struct amdgpu_device *adev)
ARRAY_SIZE(golden_settings_gc_9_0_vg20));
break;
case CHIP_RAVEN:
- soc15_program_register_sequence(adev,
- golden_settings_gc_9_1,
- ARRAY_SIZE(golden_settings_gc_9_1));
- soc15_program_register_sequence(adev,
- golden_settings_gc_9_1_rv1,
- ARRAY_SIZE(golden_settings_gc_9_1_rv1));
+ soc15_program_register_sequence(adev, golden_settings_gc_9_1,
+ ARRAY_SIZE(golden_settings_gc_9_1));
+ if (adev->rev_id >= 8)
+ soc15_program_register_sequence(adev,
+ golden_settings_gc_9_1_rv2,
+ ARRAY_SIZE(golden_settings_gc_9_1_rv2));
+ else
+ soc15_program_register_sequence(adev,
+ golden_settings_gc_9_1_rv1,
+ ARRAY_SIZE(golden_settings_gc_9_1_rv1));
break;
default:
break;
@@ -469,6 +534,61 @@ static void gfx_v9_0_init_rlc_ext_microcode(struct amdgpu_device *adev)
le32_to_cpu(rlc_hdr->reg_list_format_direct_reg_list_length);
}
+static void gfx_v9_0_check_fw_write_wait(struct amdgpu_device *adev)
+{
+ adev->gfx.me_fw_write_wait = false;
+ adev->gfx.mec_fw_write_wait = false;
+
+ switch (adev->asic_type) {
+ case CHIP_VEGA10:
+ if ((adev->gfx.me_fw_version >= 0x0000009c) &&
+ (adev->gfx.me_feature_version >= 42) &&
+ (adev->gfx.pfp_fw_version >= 0x000000b1) &&
+ (adev->gfx.pfp_feature_version >= 42))
+ adev->gfx.me_fw_write_wait = true;
+
+ if ((adev->gfx.mec_fw_version >= 0x00000193) &&
+ (adev->gfx.mec_feature_version >= 42))
+ adev->gfx.mec_fw_write_wait = true;
+ break;
+ case CHIP_VEGA12:
+ if ((adev->gfx.me_fw_version >= 0x0000009c) &&
+ (adev->gfx.me_feature_version >= 44) &&
+ (adev->gfx.pfp_fw_version >= 0x000000b2) &&
+ (adev->gfx.pfp_feature_version >= 44))
+ adev->gfx.me_fw_write_wait = true;
+
+ if ((adev->gfx.mec_fw_version >= 0x00000196) &&
+ (adev->gfx.mec_feature_version >= 44))
+ adev->gfx.mec_fw_write_wait = true;
+ break;
+ case CHIP_VEGA20:
+ if ((adev->gfx.me_fw_version >= 0x0000009c) &&
+ (adev->gfx.me_feature_version >= 44) &&
+ (adev->gfx.pfp_fw_version >= 0x000000b2) &&
+ (adev->gfx.pfp_feature_version >= 44))
+ adev->gfx.me_fw_write_wait = true;
+
+ if ((adev->gfx.mec_fw_version >= 0x00000197) &&
+ (adev->gfx.mec_feature_version >= 44))
+ adev->gfx.mec_fw_write_wait = true;
+ break;
+ case CHIP_RAVEN:
+ if ((adev->gfx.me_fw_version >= 0x0000009c) &&
+ (adev->gfx.me_feature_version >= 42) &&
+ (adev->gfx.pfp_fw_version >= 0x000000b1) &&
+ (adev->gfx.pfp_feature_version >= 42))
+ adev->gfx.me_fw_write_wait = true;
+
+ if ((adev->gfx.mec_fw_version >= 0x00000192) &&
+ (adev->gfx.mec_feature_version >= 42))
+ adev->gfx.mec_fw_write_wait = true;
+ break;
+ default:
+ break;
+ }
+}
+
static int gfx_v9_0_init_microcode(struct amdgpu_device *adev)
{
const char *chip_name;
@@ -496,7 +616,12 @@ static int gfx_v9_0_init_microcode(struct amdgpu_device *adev)
chip_name = "vega20";
break;
case CHIP_RAVEN:
- chip_name = "raven";
+ if (adev->rev_id >= 8)
+ chip_name = "raven2";
+ else if (adev->pdev->device == 0x15d8)
+ chip_name = "picasso";
+ else
+ chip_name = "raven";
break;
default:
BUG();
@@ -577,14 +702,14 @@ static int gfx_v9_0_init_microcode(struct amdgpu_device *adev)
tmp = (unsigned int *)((uintptr_t)rlc_hdr +
le32_to_cpu(rlc_hdr->reg_list_format_array_offset_bytes));
- for (i = 0 ; i < (rlc_hdr->reg_list_format_size_bytes >> 2); i++)
+ for (i = 0 ; i < (adev->gfx.rlc.reg_list_format_size_bytes >> 2); i++)
adev->gfx.rlc.register_list_format[i] = le32_to_cpu(tmp[i]);
adev->gfx.rlc.register_restore = adev->gfx.rlc.register_list_format + i;
tmp = (unsigned int *)((uintptr_t)rlc_hdr +
le32_to_cpu(rlc_hdr->reg_list_array_offset_bytes));
- for (i = 0 ; i < (rlc_hdr->reg_list_size_bytes >> 2); i++)
+ for (i = 0 ; i < (adev->gfx.rlc.reg_list_size_bytes >> 2); i++)
adev->gfx.rlc.register_restore[i] = le32_to_cpu(tmp[i]);
if (adev->gfx.rlc.is_rlc_v2_1)
@@ -648,7 +773,10 @@ static int gfx_v9_0_init_microcode(struct amdgpu_device *adev)
adev->firmware.fw_size +=
ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
- if (adev->gfx.rlc.is_rlc_v2_1) {
+ if (adev->gfx.rlc.is_rlc_v2_1 &&
+ adev->gfx.rlc.save_restore_list_cntl_size_bytes &&
+ adev->gfx.rlc.save_restore_list_gpm_size_bytes &&
+ adev->gfx.rlc.save_restore_list_srm_size_bytes) {
info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_RESTORE_LIST_CNTL];
info->ucode_id = AMDGPU_UCODE_ID_RLC_RESTORE_LIST_CNTL;
info->fw = adev->gfx.rlc_fw;
@@ -700,6 +828,7 @@ static int gfx_v9_0_init_microcode(struct amdgpu_device *adev)
}
out:
+ gfx_v9_0_check_fw_write_wait(adev);
if (err) {
dev_err(adev->dev,
"gfx9: Failed to load firmware \"%s\"\n",
@@ -789,6 +918,50 @@ static void gfx_v9_0_get_csb_buffer(struct amdgpu_device *adev,
buffer[count++] = cpu_to_le32(0);
}
+static void gfx_v9_0_init_always_on_cu_mask(struct amdgpu_device *adev)
+{
+ struct amdgpu_cu_info *cu_info = &adev->gfx.cu_info;
+ uint32_t pg_always_on_cu_num = 2;
+ uint32_t always_on_cu_num;
+ uint32_t i, j, k;
+ uint32_t mask, cu_bitmap, counter;
+
+ if (adev->flags & AMD_IS_APU)
+ always_on_cu_num = 4;
+ else if (adev->asic_type == CHIP_VEGA12)
+ always_on_cu_num = 8;
+ else
+ always_on_cu_num = 12;
+
+ mutex_lock(&adev->grbm_idx_mutex);
+ for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
+ for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
+ mask = 1;
+ cu_bitmap = 0;
+ counter = 0;
+ gfx_v9_0_select_se_sh(adev, i, j, 0xffffffff);
+
+ for (k = 0; k < adev->gfx.config.max_cu_per_sh; k ++) {
+ if (cu_info->bitmap[i][j] & mask) {
+ if (counter == pg_always_on_cu_num)
+ WREG32_SOC15(GC, 0, mmRLC_PG_ALWAYS_ON_CU_MASK, cu_bitmap);
+ if (counter < always_on_cu_num)
+ cu_bitmap |= mask;
+ else
+ break;
+ counter++;
+ }
+ mask <<= 1;
+ }
+
+ WREG32_SOC15(GC, 0, mmRLC_LB_ALWAYS_ACTIVE_CU_MASK, cu_bitmap);
+ cu_info->ao_cu_bitmap[i][j] = cu_bitmap;
+ }
+ }
+ gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
+ mutex_unlock(&adev->grbm_idx_mutex);
+}
+
static void gfx_v9_0_init_lbpw(struct amdgpu_device *adev)
{
uint32_t data;
@@ -822,8 +995,10 @@ static void gfx_v9_0_init_lbpw(struct amdgpu_device *adev)
data |= 0x00C00000;
WREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_7, data);
- /* set RLC_LB_ALWAYS_ACTIVE_CU_MASK = 0xFFF */
- WREG32_SOC15(GC, 0, mmRLC_LB_ALWAYS_ACTIVE_CU_MASK, 0xFFF);
+ /*
+ * RLC_LB_ALWAYS_ACTIVE_CU_MASK = 0xF (4 CUs AON for Raven),
+ * programmed in gfx_v9_0_init_always_on_cu_mask()
+ */
/* set RLC_LB_CNTL = 0x8000_0095, 31 bit is reserved,
* but used for RLC_LB_CNTL configuration */
@@ -832,6 +1007,57 @@ static void gfx_v9_0_init_lbpw(struct amdgpu_device *adev)
data |= REG_SET_FIELD(data, RLC_LB_CNTL, RESERVED, 0x80000);
WREG32_SOC15(GC, 0, mmRLC_LB_CNTL, data);
mutex_unlock(&adev->grbm_idx_mutex);
+
+ gfx_v9_0_init_always_on_cu_mask(adev);
+}
+
+static void gfx_v9_4_init_lbpw(struct amdgpu_device *adev)
+{
+ uint32_t data;
+
+ /* set mmRLC_LB_THR_CONFIG_1/2/3/4 */
+ WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_1, 0x0000007F);
+ WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_2, 0x033388F8);
+ WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_3, 0x00000077);
+ WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_4, (0x10 | 0x27 << 8 | 0x02FA << 16));
+
+ /* set mmRLC_LB_CNTR_INIT = 0x0000_0000 */
+ WREG32_SOC15(GC, 0, mmRLC_LB_CNTR_INIT, 0x00000000);
+
+ /* set mmRLC_LB_CNTR_MAX = 0x0000_0500 */
+ WREG32_SOC15(GC, 0, mmRLC_LB_CNTR_MAX, 0x00000800);
+
+ mutex_lock(&adev->grbm_idx_mutex);
+ /* set mmRLC_LB_INIT_CU_MASK thru broadcast mode to enable all SE/SH*/
+ gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
+ WREG32_SOC15(GC, 0, mmRLC_LB_INIT_CU_MASK, 0xffffffff);
+
+ /* set mmRLC_LB_PARAMS = 0x003F_1006 */
+ data = REG_SET_FIELD(0, RLC_LB_PARAMS, FIFO_SAMPLES, 0x0003);
+ data |= REG_SET_FIELD(data, RLC_LB_PARAMS, PG_IDLE_SAMPLES, 0x0010);
+ data |= REG_SET_FIELD(data, RLC_LB_PARAMS, PG_IDLE_SAMPLE_INTERVAL, 0x033F);
+ WREG32_SOC15(GC, 0, mmRLC_LB_PARAMS, data);
+
+ /* set mmRLC_GPM_GENERAL_7[31-16] = 0x00C0 */
+ data = RREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_7);
+ data &= 0x0000FFFF;
+ data |= 0x00C00000;
+ WREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_7, data);
+
+ /*
+ * RLC_LB_ALWAYS_ACTIVE_CU_MASK = 0xFFF (12 CUs AON),
+ * programmed in gfx_v9_0_init_always_on_cu_mask()
+ */
+
+ /* set RLC_LB_CNTL = 0x8000_0095, 31 bit is reserved,
+ * but used for RLC_LB_CNTL configuration */
+ data = RLC_LB_CNTL__LB_CNT_SPIM_ACTIVE_MASK;
+ data |= REG_SET_FIELD(data, RLC_LB_CNTL, CU_MASK_USED_OFF_HYST, 0x09);
+ data |= REG_SET_FIELD(data, RLC_LB_CNTL, RESERVED, 0x80000);
+ WREG32_SOC15(GC, 0, mmRLC_LB_CNTL, data);
+ mutex_unlock(&adev->grbm_idx_mutex);
+
+ gfx_v9_0_init_always_on_cu_mask(adev);
}
static void gfx_v9_0_enable_lbpw(struct amdgpu_device *adev, bool enable)
@@ -943,6 +1169,7 @@ static int gfx_v9_0_rlc_init(struct amdgpu_device *adev)
dst_ptr = adev->gfx.rlc.cs_ptr;
gfx_v9_0_get_csb_buffer(adev, dst_ptr);
amdgpu_bo_kunmap(adev->gfx.rlc.clear_state_obj);
+ amdgpu_bo_unpin(adev->gfx.rlc.clear_state_obj);
amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj);
}
@@ -964,13 +1191,55 @@ static int gfx_v9_0_rlc_init(struct amdgpu_device *adev)
rv_init_cp_jump_table(adev);
amdgpu_bo_kunmap(adev->gfx.rlc.cp_table_obj);
amdgpu_bo_unreserve(adev->gfx.rlc.cp_table_obj);
+ }
+ switch (adev->asic_type) {
+ case CHIP_RAVEN:
gfx_v9_0_init_lbpw(adev);
+ break;
+ case CHIP_VEGA20:
+ gfx_v9_4_init_lbpw(adev);
+ break;
+ default:
+ break;
}
return 0;
}
+static int gfx_v9_0_csb_vram_pin(struct amdgpu_device *adev)
+{
+ int r;
+
+ r = amdgpu_bo_reserve(adev->gfx.rlc.clear_state_obj, false);
+ if (unlikely(r != 0))
+ return r;
+
+ r = amdgpu_bo_pin(adev->gfx.rlc.clear_state_obj,
+ AMDGPU_GEM_DOMAIN_VRAM);
+ if (!r)
+ adev->gfx.rlc.clear_state_gpu_addr =
+ amdgpu_bo_gpu_offset(adev->gfx.rlc.clear_state_obj);
+
+ amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj);
+
+ return r;
+}
+
+static void gfx_v9_0_csb_vram_unpin(struct amdgpu_device *adev)
+{
+ int r;
+
+ if (!adev->gfx.rlc.clear_state_obj)
+ return;
+
+ r = amdgpu_bo_reserve(adev->gfx.rlc.clear_state_obj, true);
+ if (likely(r == 0)) {
+ amdgpu_bo_unpin(adev->gfx.rlc.clear_state_obj);
+ amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj);
+ }
+}
+
static void gfx_v9_0_mec_fini(struct amdgpu_device *adev)
{
amdgpu_bo_free_kernel(&adev->gfx.mec.hpd_eop_obj, NULL, NULL);
@@ -1160,7 +1429,10 @@ static int gfx_v9_0_gpu_early_init(struct amdgpu_device *adev)
adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
- gb_addr_config = RAVEN_GB_ADDR_CONFIG_GOLDEN;
+ if (adev->rev_id >= 8)
+ gb_addr_config = RAVEN2_GB_ADDR_CONFIG_GOLDEN;
+ else
+ gb_addr_config = RAVEN_GB_ADDR_CONFIG_GOLDEN;
break;
default:
BUG();
@@ -1371,8 +1643,7 @@ static int gfx_v9_0_ngg_en(struct amdgpu_device *adev)
gfx_v9_0_write_data_to_reg(ring, 0, false,
SOC15_REG_OFFSET(GC, 0, mmGDS_VMID0_SIZE),
(adev->gds.mem.total_size +
- adev->gfx.ngg.gds_reserve_size) >>
- AMDGPU_GDS_SHIFT);
+ adev->gfx.ngg.gds_reserve_size));
amdgpu_ring_write(ring, PACKET3(PACKET3_DMA_DATA, 5));
amdgpu_ring_write(ring, (PACKET3_DMA_DATA_CP_SYNC |
@@ -1450,24 +1721,19 @@ static int gfx_v9_0_sw_init(void *handle)
adev->gfx.mec.num_pipe_per_mec = 4;
adev->gfx.mec.num_queue_per_pipe = 8;
- /* KIQ event */
- r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, 178, &adev->gfx.kiq.irq);
- if (r)
- return r;
-
/* EOP Event */
- r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, 181, &adev->gfx.eop_irq);
+ r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_EOP_INTERRUPT, &adev->gfx.eop_irq);
if (r)
return r;
/* Privileged reg */
- r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, 184,
+ r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_PRIV_REG_FAULT,
&adev->gfx.priv_reg_irq);
if (r)
return r;
/* Privileged inst */
- r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, 185,
+ r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_PRIV_INSTR_FAULT,
&adev->gfx.priv_inst_irq);
if (r)
return r;
@@ -1545,25 +1811,6 @@ static int gfx_v9_0_sw_init(void *handle)
if (r)
return r;
- /* reserve GDS, GWS and OA resource for gfx */
- r = amdgpu_bo_create_kernel(adev, adev->gds.mem.gfx_partition_size,
- PAGE_SIZE, AMDGPU_GEM_DOMAIN_GDS,
- &adev->gds.gds_gfx_bo, NULL, NULL);
- if (r)
- return r;
-
- r = amdgpu_bo_create_kernel(adev, adev->gds.gws.gfx_partition_size,
- PAGE_SIZE, AMDGPU_GEM_DOMAIN_GWS,
- &adev->gds.gws_gfx_bo, NULL, NULL);
- if (r)
- return r;
-
- r = amdgpu_bo_create_kernel(adev, adev->gds.oa.gfx_partition_size,
- PAGE_SIZE, AMDGPU_GEM_DOMAIN_OA,
- &adev->gds.oa_gfx_bo, NULL, NULL);
- if (r)
- return r;
-
adev->gfx.ce_ram_size = 0x8000;
r = gfx_v9_0_gpu_early_init(adev);
@@ -1711,7 +1958,7 @@ static void gfx_v9_0_init_compute_vmid(struct amdgpu_device *adev)
mutex_unlock(&adev->srbm_mutex);
}
-static void gfx_v9_0_gpu_init(struct amdgpu_device *adev)
+static void gfx_v9_0_constants_init(struct amdgpu_device *adev)
{
u32 tmp;
int i;
@@ -2148,8 +2395,16 @@ static void gfx_v9_0_enable_gfx_dynamic_mg_power_gating(struct amdgpu_device *ad
static void gfx_v9_0_init_pg(struct amdgpu_device *adev)
{
- if (!adev->gfx.rlc.is_rlc_v2_1)
- return;
+ gfx_v9_0_init_csb(adev);
+
+ /*
+ * Rlc save restore list is workable since v2_1.
+ * And it's needed by gfxoff feature.
+ */
+ if (adev->gfx.rlc.is_rlc_v2_1) {
+ gfx_v9_1_init_rlc_save_restore_list(adev);
+ gfx_v9_0_enable_save_restore_machine(adev);
+ }
if (adev->pg_flags & (AMD_PG_SUPPORT_GFX_PG |
AMD_PG_SUPPORT_GFX_SMG |
@@ -2157,10 +2412,6 @@ static void gfx_v9_0_init_pg(struct amdgpu_device *adev)
AMD_PG_SUPPORT_CP |
AMD_PG_SUPPORT_GDS |
AMD_PG_SUPPORT_RLC_SMU_HS)) {
- gfx_v9_0_init_csb(adev);
- gfx_v9_1_init_rlc_save_restore_list(adev);
- gfx_v9_0_enable_save_restore_machine(adev);
-
WREG32(mmRLC_JUMP_TABLE_RESTORE,
adev->gfx.rlc.cp_table_gpu_addr >> 8);
gfx_v9_0_init_gfx_power_gating(adev);
@@ -2252,9 +2503,6 @@ static int gfx_v9_0_rlc_resume(struct amdgpu_device *adev)
/* disable CG */
WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL, 0);
- /* disable PG */
- WREG32_SOC15(GC, 0, mmRLC_PG_CNTL, 0);
-
gfx_v9_0_rlc_reset(adev);
gfx_v9_0_init_pg(adev);
@@ -2266,7 +2514,8 @@ static int gfx_v9_0_rlc_resume(struct amdgpu_device *adev)
return r;
}
- if (adev->asic_type == CHIP_RAVEN) {
+ if (adev->asic_type == CHIP_RAVEN ||
+ adev->asic_type == CHIP_VEGA20) {
if (amdgpu_lbpw != 0)
gfx_v9_0_enable_lbpw(adev, true);
else
@@ -2559,7 +2808,6 @@ static void gfx_v9_0_kiq_setting(struct amdgpu_ring *ring)
static int gfx_v9_0_kiq_kcq_enable(struct amdgpu_device *adev)
{
struct amdgpu_ring *kiq_ring = &adev->gfx.kiq.ring;
- uint32_t scratch, tmp = 0;
uint64_t queue_mask = 0;
int r, i;
@@ -2578,17 +2826,9 @@ static int gfx_v9_0_kiq_kcq_enable(struct amdgpu_device *adev)
queue_mask |= (1ull << i);
}
- r = amdgpu_gfx_scratch_get(adev, &scratch);
- if (r) {
- DRM_ERROR("Failed to get scratch reg (%d).\n", r);
- return r;
- }
- WREG32(scratch, 0xCAFEDEAD);
-
- r = amdgpu_ring_alloc(kiq_ring, (7 * adev->gfx.num_compute_rings) + 11);
+ r = amdgpu_ring_alloc(kiq_ring, (7 * adev->gfx.num_compute_rings) + 8);
if (r) {
DRM_ERROR("Failed to lock KIQ (%d).\n", r);
- amdgpu_gfx_scratch_free(adev, scratch);
return r;
}
@@ -2625,24 +2865,12 @@ static int gfx_v9_0_kiq_kcq_enable(struct amdgpu_device *adev)
amdgpu_ring_write(kiq_ring, lower_32_bits(wptr_addr));
amdgpu_ring_write(kiq_ring, upper_32_bits(wptr_addr));
}
- /* write to scratch for completion */
- amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
- amdgpu_ring_write(kiq_ring, (scratch - PACKET3_SET_UCONFIG_REG_START));
- amdgpu_ring_write(kiq_ring, 0xDEADBEEF);
- amdgpu_ring_commit(kiq_ring);
- for (i = 0; i < adev->usec_timeout; i++) {
- tmp = RREG32(scratch);
- if (tmp == 0xDEADBEEF)
- break;
- DRM_UDELAY(1);
- }
- if (i >= adev->usec_timeout) {
- DRM_ERROR("KCQ enable failed (scratch(0x%04X)=0x%08X)\n",
- scratch, tmp);
- r = -EINVAL;
+ r = amdgpu_ring_test_ring(kiq_ring);
+ if (r) {
+ DRM_ERROR("KCQ enable failed\n");
+ kiq_ring->ready = false;
}
- amdgpu_gfx_scratch_free(adev, scratch);
return r;
}
@@ -2975,7 +3203,7 @@ static int gfx_v9_0_kcq_init_queue(struct amdgpu_ring *ring)
struct v9_mqd *mqd = ring->mqd_ptr;
int mqd_idx = ring - &adev->gfx.compute_ring[0];
- if (!adev->in_gpu_reset && !adev->gfx.in_suspend) {
+ if (!adev->in_gpu_reset && !adev->in_suspend) {
memset((void *)mqd, 0, sizeof(struct v9_mqd_allocation));
((struct v9_mqd_allocation *)mqd)->dynamic_cu_mask = 0xFFFFFFFF;
((struct v9_mqd_allocation *)mqd)->dynamic_rb_mask = 0xFFFFFFFF;
@@ -3004,26 +3232,33 @@ static int gfx_v9_0_kcq_init_queue(struct amdgpu_ring *ring)
static int gfx_v9_0_kiq_resume(struct amdgpu_device *adev)
{
- struct amdgpu_ring *ring = NULL;
- int r = 0, i;
-
- gfx_v9_0_cp_compute_enable(adev, true);
+ struct amdgpu_ring *ring;
+ int r;
ring = &adev->gfx.kiq.ring;
r = amdgpu_bo_reserve(ring->mqd_obj, false);
if (unlikely(r != 0))
- goto done;
+ return r;
r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&ring->mqd_ptr);
- if (!r) {
- r = gfx_v9_0_kiq_init_queue(ring);
- amdgpu_bo_kunmap(ring->mqd_obj);
- ring->mqd_ptr = NULL;
- }
+ if (unlikely(r != 0))
+ return r;
+
+ gfx_v9_0_kiq_init_queue(ring);
+ amdgpu_bo_kunmap(ring->mqd_obj);
+ ring->mqd_ptr = NULL;
amdgpu_bo_unreserve(ring->mqd_obj);
- if (r)
- goto done;
+ ring->ready = true;
+ return 0;
+}
+
+static int gfx_v9_0_kcq_resume(struct amdgpu_device *adev)
+{
+ struct amdgpu_ring *ring = NULL;
+ int r = 0, i;
+
+ gfx_v9_0_cp_compute_enable(adev, true);
for (i = 0; i < adev->gfx.num_compute_rings; i++) {
ring = &adev->gfx.compute_ring[i];
@@ -3066,11 +3301,15 @@ static int gfx_v9_0_cp_resume(struct amdgpu_device *adev)
return r;
}
+ r = gfx_v9_0_kiq_resume(adev);
+ if (r)
+ return r;
+
r = gfx_v9_0_cp_gfx_resume(adev);
if (r)
return r;
- r = gfx_v9_0_kiq_resume(adev);
+ r = gfx_v9_0_kcq_resume(adev);
if (r)
return r;
@@ -3081,12 +3320,6 @@ static int gfx_v9_0_cp_resume(struct amdgpu_device *adev)
return r;
}
- ring = &adev->gfx.kiq.ring;
- ring->ready = true;
- r = amdgpu_ring_test_ring(ring);
- if (r)
- ring->ready = false;
-
for (i = 0; i < adev->gfx.num_compute_rings; i++) {
ring = &adev->gfx.compute_ring[i];
@@ -3114,7 +3347,11 @@ static int gfx_v9_0_hw_init(void *handle)
gfx_v9_0_init_golden_registers(adev);
- gfx_v9_0_gpu_init(adev);
+ gfx_v9_0_constants_init(adev);
+
+ r = gfx_v9_0_csb_vram_pin(adev);
+ if (r)
+ return r;
r = gfx_v9_0_rlc_resume(adev);
if (r)
@@ -3131,71 +3368,45 @@ static int gfx_v9_0_hw_init(void *handle)
return r;
}
-static int gfx_v9_0_kcq_disable(struct amdgpu_ring *kiq_ring,struct amdgpu_ring *ring)
+static int gfx_v9_0_kcq_disable(struct amdgpu_device *adev)
{
- struct amdgpu_device *adev = kiq_ring->adev;
- uint32_t scratch, tmp = 0;
int r, i;
+ struct amdgpu_ring *kiq_ring = &adev->gfx.kiq.ring;
- r = amdgpu_gfx_scratch_get(adev, &scratch);
- if (r) {
- DRM_ERROR("Failed to get scratch reg (%d).\n", r);
- return r;
- }
- WREG32(scratch, 0xCAFEDEAD);
-
- r = amdgpu_ring_alloc(kiq_ring, 10);
- if (r) {
+ r = amdgpu_ring_alloc(kiq_ring, 6 * adev->gfx.num_compute_rings);
+ if (r)
DRM_ERROR("Failed to lock KIQ (%d).\n", r);
- amdgpu_gfx_scratch_free(adev, scratch);
- return r;
- }
- /* unmap queues */
- amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_UNMAP_QUEUES, 4));
- amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */
+ for (i = 0; i < adev->gfx.num_compute_rings; i++) {
+ struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
+
+ amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_UNMAP_QUEUES, 4));
+ amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */
PACKET3_UNMAP_QUEUES_ACTION(1) | /* RESET_QUEUES */
PACKET3_UNMAP_QUEUES_QUEUE_SEL(0) |
PACKET3_UNMAP_QUEUES_ENGINE_SEL(0) |
PACKET3_UNMAP_QUEUES_NUM_QUEUES(1));
- amdgpu_ring_write(kiq_ring, PACKET3_UNMAP_QUEUES_DOORBELL_OFFSET0(ring->doorbell_index));
- amdgpu_ring_write(kiq_ring, 0);
- amdgpu_ring_write(kiq_ring, 0);
- amdgpu_ring_write(kiq_ring, 0);
- /* write to scratch for completion */
- amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
- amdgpu_ring_write(kiq_ring, (scratch - PACKET3_SET_UCONFIG_REG_START));
- amdgpu_ring_write(kiq_ring, 0xDEADBEEF);
- amdgpu_ring_commit(kiq_ring);
-
- for (i = 0; i < adev->usec_timeout; i++) {
- tmp = RREG32(scratch);
- if (tmp == 0xDEADBEEF)
- break;
- DRM_UDELAY(1);
- }
- if (i >= adev->usec_timeout) {
- DRM_ERROR("KCQ disabled failed (scratch(0x%04X)=0x%08X)\n", scratch, tmp);
- r = -EINVAL;
+ amdgpu_ring_write(kiq_ring, PACKET3_UNMAP_QUEUES_DOORBELL_OFFSET0(ring->doorbell_index));
+ amdgpu_ring_write(kiq_ring, 0);
+ amdgpu_ring_write(kiq_ring, 0);
+ amdgpu_ring_write(kiq_ring, 0);
}
- amdgpu_gfx_scratch_free(adev, scratch);
+ r = amdgpu_ring_test_ring(kiq_ring);
+ if (r)
+ DRM_ERROR("KCQ disable failed\n");
+
return r;
}
static int gfx_v9_0_hw_fini(void *handle)
{
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
- int i;
-
- amdgpu_device_ip_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_GFX,
- AMD_PG_STATE_UNGATE);
amdgpu_irq_put(adev, &adev->gfx.priv_reg_irq, 0);
amdgpu_irq_put(adev, &adev->gfx.priv_inst_irq, 0);
/* disable KCQ to avoid CPC touch memory not valid anymore */
- for (i = 0; i < adev->gfx.num_compute_rings; i++)
- gfx_v9_0_kcq_disable(&adev->gfx.kiq.ring, &adev->gfx.compute_ring[i]);
+ gfx_v9_0_kcq_disable(adev);
if (amdgpu_sriov_vf(adev)) {
gfx_v9_0_cp_gfx_enable(adev, false);
@@ -3211,7 +3422,7 @@ static int gfx_v9_0_hw_fini(void *handle)
/* Use deinitialize sequence from CAIL when unbinding device from driver,
* otherwise KIQ is hanging when binding back
*/
- if (!adev->in_gpu_reset && !adev->gfx.in_suspend) {
+ if (!adev->in_gpu_reset && !adev->in_suspend) {
mutex_lock(&adev->srbm_mutex);
soc15_grbm_select(adev, adev->gfx.kiq.ring.me,
adev->gfx.kiq.ring.pipe,
@@ -3224,25 +3435,19 @@ static int gfx_v9_0_hw_fini(void *handle)
gfx_v9_0_cp_enable(adev, false);
gfx_v9_0_rlc_stop(adev);
+ gfx_v9_0_csb_vram_unpin(adev);
+
return 0;
}
static int gfx_v9_0_suspend(void *handle)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
-
- adev->gfx.in_suspend = true;
- return gfx_v9_0_hw_fini(adev);
+ return gfx_v9_0_hw_fini(handle);
}
static int gfx_v9_0_resume(void *handle)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
- int r;
-
- r = gfx_v9_0_hw_init(adev);
- adev->gfx.in_suspend = false;
- return r;
+ return gfx_v9_0_hw_init(handle);
}
static bool gfx_v9_0_is_idle(void *handle)
@@ -3351,15 +3556,6 @@ static void gfx_v9_0_ring_emit_gds_switch(struct amdgpu_ring *ring,
{
struct amdgpu_device *adev = ring->adev;
- gds_base = gds_base >> AMDGPU_GDS_SHIFT;
- gds_size = gds_size >> AMDGPU_GDS_SHIFT;
-
- gws_base = gws_base >> AMDGPU_GWS_SHIFT;
- gws_size = gws_size >> AMDGPU_GWS_SHIFT;
-
- oa_base = oa_base >> AMDGPU_OA_SHIFT;
- oa_size = oa_size >> AMDGPU_OA_SHIFT;
-
/* GDS Base */
gfx_v9_0_write_data_to_reg(ring, 0, false,
SOC15_REG_OFFSET(GC, 0, mmGDS_VMID0_BASE) + 2 * vmid,
@@ -3433,7 +3629,7 @@ static void gfx_v9_0_enter_rlc_safe_mode(struct amdgpu_device *adev)
/* wait for RLC_SAFE_MODE */
for (i = 0; i < adev->usec_timeout; i++) {
- if (!REG_GET_FIELD(SOC15_REG_OFFSET(GC, 0, mmRLC_SAFE_MODE), RLC_SAFE_MODE, CMD))
+ if (!REG_GET_FIELD(RREG32_SOC15(GC, 0, mmRLC_SAFE_MODE), RLC_SAFE_MODE, CMD))
break;
udelay(1);
}
@@ -3510,8 +3706,11 @@ static void gfx_v9_0_update_medium_grain_clock_gating(struct amdgpu_device *adev
if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG)) {
/* 1 - RLC_CGTT_MGCG_OVERRIDE */
def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE);
- data &= ~(RLC_CGTT_MGCG_OVERRIDE__CPF_CGTT_SCLK_OVERRIDE_MASK |
- RLC_CGTT_MGCG_OVERRIDE__GRBM_CGTT_SCLK_OVERRIDE_MASK |
+
+ if (adev->asic_type != CHIP_VEGA12)
+ data &= ~RLC_CGTT_MGCG_OVERRIDE__CPF_CGTT_SCLK_OVERRIDE_MASK;
+
+ data &= ~(RLC_CGTT_MGCG_OVERRIDE__GRBM_CGTT_SCLK_OVERRIDE_MASK |
RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK |
RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGLS_OVERRIDE_MASK);
@@ -3541,11 +3740,15 @@ static void gfx_v9_0_update_medium_grain_clock_gating(struct amdgpu_device *adev
} else {
/* 1 - MGCG_OVERRIDE */
def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE);
- data |= (RLC_CGTT_MGCG_OVERRIDE__CPF_CGTT_SCLK_OVERRIDE_MASK |
- RLC_CGTT_MGCG_OVERRIDE__RLC_CGTT_SCLK_OVERRIDE_MASK |
+
+ if (adev->asic_type != CHIP_VEGA12)
+ data |= RLC_CGTT_MGCG_OVERRIDE__CPF_CGTT_SCLK_OVERRIDE_MASK;
+
+ data |= (RLC_CGTT_MGCG_OVERRIDE__RLC_CGTT_SCLK_OVERRIDE_MASK |
RLC_CGTT_MGCG_OVERRIDE__GRBM_CGTT_SCLK_OVERRIDE_MASK |
RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK |
RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGLS_OVERRIDE_MASK);
+
if (def != data)
WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data);
@@ -3581,9 +3784,11 @@ static void gfx_v9_0_update_3d_clock_gating(struct amdgpu_device *adev,
/* update CGCG and CGLS override bits */
if (def != data)
WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data);
- /* enable 3Dcgcg FSM(0x0020003f) */
+
+ /* enable 3Dcgcg FSM(0x0000363f) */
def = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D);
- data = (0x2000 << RLC_CGCG_CGLS_CTRL_3D__CGCG_GFX_IDLE_THRESHOLD__SHIFT) |
+
+ data = (0x36 << RLC_CGCG_CGLS_CTRL_3D__CGCG_GFX_IDLE_THRESHOLD__SHIFT) |
RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK;
if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGLS)
data |= (0x000F << RLC_CGCG_CGLS_CTRL_3D__CGLS_REP_COMPANSAT_DELAY__SHIFT) |
@@ -3630,9 +3835,10 @@ static void gfx_v9_0_update_coarse_grain_clock_gating(struct amdgpu_device *adev
if (def != data)
WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data);
- /* enable cgcg FSM(0x0020003F) */
+ /* enable cgcg FSM(0x0000363F) */
def = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL);
- data = (0x2000 << RLC_CGCG_CGLS_CTRL__CGCG_GFX_IDLE_THRESHOLD__SHIFT) |
+
+ data = (0x36 << RLC_CGCG_CGLS_CTRL__CGCG_GFX_IDLE_THRESHOLD__SHIFT) |
RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK;
if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS)
data |= (0x000F << RLC_CGCG_CGLS_CTRL__CGLS_REP_COMPANSAT_DELAY__SHIFT) |
@@ -3696,6 +3902,10 @@ static int gfx_v9_0_set_powergating_state(void *handle,
switch (adev->asic_type) {
case CHIP_RAVEN:
+ if (!enable) {
+ amdgpu_gfx_off_ctrl(adev, false);
+ cancel_delayed_work_sync(&adev->gfx.gfx_off_delay_work);
+ }
if (adev->pg_flags & AMD_PG_SUPPORT_RLC_SMU_HS) {
gfx_v9_0_enable_sck_slow_down_on_power_up(adev, true);
gfx_v9_0_enable_sck_slow_down_on_power_down(adev, true);
@@ -3714,6 +3924,17 @@ static int gfx_v9_0_set_powergating_state(void *handle,
/* update mgcg state */
gfx_v9_0_update_gfx_mg_power_gating(adev, enable);
+
+ if (enable)
+ amdgpu_gfx_off_ctrl(adev, true);
+ break;
+ case CHIP_VEGA12:
+ if (!enable) {
+ amdgpu_gfx_off_ctrl(adev, false);
+ cancel_delayed_work_sync(&adev->gfx.gfx_off_delay_work);
+ } else {
+ amdgpu_gfx_off_ctrl(adev, true);
+ }
break;
default:
break;
@@ -4274,8 +4495,11 @@ static void gfx_v9_0_ring_emit_reg_write_reg_wait(struct amdgpu_ring *ring,
uint32_t ref, uint32_t mask)
{
int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
+ struct amdgpu_device *adev = ring->adev;
+ bool fw_version_ok = (ring->funcs->type == AMDGPU_RING_TYPE_GFX) ?
+ adev->gfx.me_fw_write_wait : adev->gfx.mec_fw_write_wait;
- if (amdgpu_sriov_vf(ring->adev))
+ if (fw_version_ok)
gfx_v9_0_wait_reg_mem(ring, usepfp, 0, 1, reg0, reg1,
ref, mask, 0x20);
else
@@ -4283,6 +4507,18 @@ static void gfx_v9_0_ring_emit_reg_write_reg_wait(struct amdgpu_ring *ring,
ref, mask);
}
+static void gfx_v9_0_ring_soft_recovery(struct amdgpu_ring *ring, unsigned vmid)
+{
+ struct amdgpu_device *adev = ring->adev;
+ uint32_t value = 0;
+
+ value = REG_SET_FIELD(value, SQ_CMD, CMD, 0x03);
+ value = REG_SET_FIELD(value, SQ_CMD, MODE, 0x01);
+ value = REG_SET_FIELD(value, SQ_CMD, CHECK_VMID, 1);
+ value = REG_SET_FIELD(value, SQ_CMD, VM_ID, vmid);
+ WREG32(mmSQ_CMD, value);
+}
+
static void gfx_v9_0_set_gfx_eop_interrupt_state(struct amdgpu_device *adev,
enum amdgpu_interrupt_state state)
{
@@ -4477,68 +4713,6 @@ static int gfx_v9_0_priv_inst_irq(struct amdgpu_device *adev,
return 0;
}
-static int gfx_v9_0_kiq_set_interrupt_state(struct amdgpu_device *adev,
- struct amdgpu_irq_src *src,
- unsigned int type,
- enum amdgpu_interrupt_state state)
-{
- uint32_t tmp, target;
- struct amdgpu_ring *ring = &(adev->gfx.kiq.ring);
-
- if (ring->me == 1)
- target = SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE0_INT_CNTL);
- else
- target = SOC15_REG_OFFSET(GC, 0, mmCP_ME2_PIPE0_INT_CNTL);
- target += ring->pipe;
-
- switch (type) {
- case AMDGPU_CP_KIQ_IRQ_DRIVER0:
- if (state == AMDGPU_IRQ_STATE_DISABLE) {
- tmp = RREG32_SOC15(GC, 0, mmCPC_INT_CNTL);
- tmp = REG_SET_FIELD(tmp, CPC_INT_CNTL,
- GENERIC2_INT_ENABLE, 0);
- WREG32_SOC15(GC, 0, mmCPC_INT_CNTL, tmp);
-
- tmp = RREG32(target);
- tmp = REG_SET_FIELD(tmp, CP_ME2_PIPE0_INT_CNTL,
- GENERIC2_INT_ENABLE, 0);
- WREG32(target, tmp);
- } else {
- tmp = RREG32_SOC15(GC, 0, mmCPC_INT_CNTL);
- tmp = REG_SET_FIELD(tmp, CPC_INT_CNTL,
- GENERIC2_INT_ENABLE, 1);
- WREG32_SOC15(GC, 0, mmCPC_INT_CNTL, tmp);
-
- tmp = RREG32(target);
- tmp = REG_SET_FIELD(tmp, CP_ME2_PIPE0_INT_CNTL,
- GENERIC2_INT_ENABLE, 1);
- WREG32(target, tmp);
- }
- break;
- default:
- BUG(); /* kiq only support GENERIC2_INT now */
- break;
- }
- return 0;
-}
-
-static int gfx_v9_0_kiq_irq(struct amdgpu_device *adev,
- struct amdgpu_irq_src *source,
- struct amdgpu_iv_entry *entry)
-{
- u8 me_id, pipe_id, queue_id;
- struct amdgpu_ring *ring = &(adev->gfx.kiq.ring);
-
- me_id = (entry->ring_id & 0x0c) >> 2;
- pipe_id = (entry->ring_id & 0x03) >> 0;
- queue_id = (entry->ring_id & 0x70) >> 4;
- DRM_DEBUG("IH: CPC GENERIC2_INT, me:%d, pipe:%d, queue:%d\n",
- me_id, pipe_id, queue_id);
-
- amdgpu_fence_process(ring);
- return 0;
-}
-
static const struct amd_ip_funcs gfx_v9_0_ip_funcs = {
.name = "gfx_v9_0",
.early_init = gfx_v9_0_early_init,
@@ -4605,6 +4779,7 @@ static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_gfx = {
.emit_wreg = gfx_v9_0_ring_emit_wreg,
.emit_reg_wait = gfx_v9_0_ring_emit_reg_wait,
.emit_reg_write_reg_wait = gfx_v9_0_ring_emit_reg_write_reg_wait,
+ .soft_recovery = gfx_v9_0_ring_soft_recovery,
};
static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_compute = {
@@ -4686,11 +4861,6 @@ static void gfx_v9_0_set_ring_funcs(struct amdgpu_device *adev)
adev->gfx.compute_ring[i].funcs = &gfx_v9_0_ring_funcs_compute;
}
-static const struct amdgpu_irq_src_funcs gfx_v9_0_kiq_irq_funcs = {
- .set = gfx_v9_0_kiq_set_interrupt_state,
- .process = gfx_v9_0_kiq_irq,
-};
-
static const struct amdgpu_irq_src_funcs gfx_v9_0_eop_irq_funcs = {
.set = gfx_v9_0_set_eop_interrupt_state,
.process = gfx_v9_0_eop_irq,
@@ -4716,9 +4886,6 @@ static void gfx_v9_0_set_irq_funcs(struct amdgpu_device *adev)
adev->gfx.priv_inst_irq.num_types = 1;
adev->gfx.priv_inst_irq.funcs = &gfx_v9_0_priv_inst_irq_funcs;
-
- adev->gfx.kiq.irq.num_types = AMDGPU_CP_KIQ_IRQ_LAST;
- adev->gfx.kiq.irq.funcs = &gfx_v9_0_kiq_irq_funcs;
}
static void gfx_v9_0_set_rlc_funcs(struct amdgpu_device *adev)
@@ -4738,7 +4905,20 @@ static void gfx_v9_0_set_rlc_funcs(struct amdgpu_device *adev)
static void gfx_v9_0_set_gds_init(struct amdgpu_device *adev)
{
/* init asci gds info */
- adev->gds.mem.total_size = RREG32_SOC15(GC, 0, mmGDS_VMID0_SIZE);
+ switch (adev->asic_type) {
+ case CHIP_VEGA10:
+ case CHIP_VEGA12:
+ case CHIP_VEGA20:
+ adev->gds.mem.total_size = 0x10000;
+ break;
+ case CHIP_RAVEN:
+ adev->gds.mem.total_size = 0x1000;
+ break;
+ default:
+ adev->gds.mem.total_size = 0x10000;
+ break;
+ }
+
adev->gds.gws.total_size = 64;
adev->gds.oa.total_size = 16;
diff --git a/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c b/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c
index acfbd2d749cf..ceb7847b504f 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c
@@ -37,13 +37,7 @@ u64 gfxhub_v1_0_get_mc_fb_offset(struct amdgpu_device *adev)
static void gfxhub_v1_0_init_gart_pt_regs(struct amdgpu_device *adev)
{
- uint64_t value;
-
- BUG_ON(adev->gart.table_addr & (~0x0000FFFFFFFFF000ULL));
- value = adev->gart.table_addr - adev->gmc.vram_start
- + adev->vm_manager.vram_base_offset;
- value &= 0x0000FFFFFFFFF000ULL;
- value |= 0x1; /*valid bit*/
+ uint64_t value = amdgpu_gmc_pd_addr(adev->gart.bo);
WREG32_SOC15(GC, 0, mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32,
lower_32_bits(value));
@@ -71,16 +65,28 @@ static void gfxhub_v1_0_init_system_aperture_regs(struct amdgpu_device *adev)
{
uint64_t value;
- /* Disable AGP. */
+ /* Program the AGP BAR */
WREG32_SOC15(GC, 0, mmMC_VM_AGP_BASE, 0);
- WREG32_SOC15(GC, 0, mmMC_VM_AGP_TOP, 0);
- WREG32_SOC15(GC, 0, mmMC_VM_AGP_BOT, 0xFFFFFFFF);
+ WREG32_SOC15(GC, 0, mmMC_VM_AGP_BOT, adev->gmc.agp_start >> 24);
+ WREG32_SOC15(GC, 0, mmMC_VM_AGP_TOP, adev->gmc.agp_end >> 24);
/* Program the system aperture low logical page number. */
WREG32_SOC15(GC, 0, mmMC_VM_SYSTEM_APERTURE_LOW_ADDR,
- adev->gmc.vram_start >> 18);
- WREG32_SOC15(GC, 0, mmMC_VM_SYSTEM_APERTURE_HIGH_ADDR,
- adev->gmc.vram_end >> 18);
+ min(adev->gmc.vram_start, adev->gmc.agp_start) >> 18);
+
+ if (adev->asic_type == CHIP_RAVEN && adev->rev_id >= 0x8)
+ /*
+ * Raven2 has a HW issue that it is unable to use the vram which
+ * is out of MC_VM_SYSTEM_APERTURE_HIGH_ADDR. So here is the
+ * workaround that increase system aperture high address (add 1)
+ * to get rid of the VM fault and hardware hang.
+ */
+ WREG32_SOC15(GC, 0, mmMC_VM_SYSTEM_APERTURE_HIGH_ADDR,
+ max((adev->gmc.vram_end >> 18) + 0x1,
+ adev->gmc.agp_end >> 18));
+ else
+ WREG32_SOC15(GC, 0, mmMC_VM_SYSTEM_APERTURE_HIGH_ADDR,
+ max(adev->gmc.vram_end, adev->gmc.agp_end) >> 18);
/* Set default page address. */
value = adev->vram_scratch.gpu_addr - adev->gmc.vram_start
diff --git a/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_1.c b/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_1.c
new file mode 100644
index 000000000000..5e9ab8eb214a
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_1.c
@@ -0,0 +1,53 @@
+/*
+ * Copyright 2018 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+#include "amdgpu.h"
+#include "gfxhub_v1_1.h"
+
+#include "gc/gc_9_2_1_offset.h"
+#include "gc/gc_9_2_1_sh_mask.h"
+
+#include "soc15_common.h"
+
+int gfxhub_v1_1_get_xgmi_info(struct amdgpu_device *adev)
+{
+ u32 xgmi_lfb_cntl = RREG32_SOC15(GC, 0, mmMC_VM_XGMI_LFB_CNTL);
+ u32 max_region =
+ REG_GET_FIELD(xgmi_lfb_cntl, MC_VM_XGMI_LFB_CNTL, PF_MAX_REGION);
+
+ /* PF_MAX_REGION=0 means xgmi is disabled */
+ if (max_region) {
+ adev->gmc.xgmi.num_physical_nodes = max_region + 1;
+ if (adev->gmc.xgmi.num_physical_nodes > 4)
+ return -EINVAL;
+
+ adev->gmc.xgmi.physical_node_id =
+ REG_GET_FIELD(xgmi_lfb_cntl, MC_VM_XGMI_LFB_CNTL, PF_LFB_REGION);
+ if (adev->gmc.xgmi.physical_node_id > 3)
+ return -EINVAL;
+ adev->gmc.xgmi.node_segment_size = REG_GET_FIELD(
+ RREG32_SOC15(GC, 0, mmMC_VM_XGMI_LFB_SIZE),
+ MC_VM_XGMI_LFB_SIZE, PF_LFB_SIZE) << 24;
+ }
+
+ return 0;
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_1.h b/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_1.h
new file mode 100644
index 000000000000..d753cf28a0a6
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_1.h
@@ -0,0 +1,29 @@
+/*
+ * Copyright 2018 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef __GFXHUB_V1_1_H__
+#define __GFXHUB_V1_1_H__
+
+int gfxhub_v1_1_get_xgmi_info(struct amdgpu_device *adev);
+
+#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c
index 79f9ac29019b..e1c2b4e9c7b2 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c
@@ -26,6 +26,7 @@
#include "amdgpu.h"
#include "gmc_v6_0.h"
#include "amdgpu_ucode.h"
+#include "amdgpu_gem.h"
#include "bif/bif_3_0_d.h"
#include "bif/bif_3_0_sh_mask.h"
@@ -41,11 +42,11 @@ static void gmc_v6_0_set_gmc_funcs(struct amdgpu_device *adev);
static void gmc_v6_0_set_irq_funcs(struct amdgpu_device *adev);
static int gmc_v6_0_wait_for_idle(void *handle);
-MODULE_FIRMWARE("radeon/tahiti_mc.bin");
-MODULE_FIRMWARE("radeon/pitcairn_mc.bin");
-MODULE_FIRMWARE("radeon/verde_mc.bin");
-MODULE_FIRMWARE("radeon/oland_mc.bin");
-MODULE_FIRMWARE("radeon/si58_mc.bin");
+MODULE_FIRMWARE("amdgpu/tahiti_mc.bin");
+MODULE_FIRMWARE("amdgpu/pitcairn_mc.bin");
+MODULE_FIRMWARE("amdgpu/verde_mc.bin");
+MODULE_FIRMWARE("amdgpu/oland_mc.bin");
+MODULE_FIRMWARE("amdgpu/si58_mc.bin");
#define MC_SEQ_MISC0__MT__MASK 0xf0000000
#define MC_SEQ_MISC0__MT__GDDR1 0x10000000
@@ -134,9 +135,9 @@ static int gmc_v6_0_init_microcode(struct amdgpu_device *adev)
is_58_fw = true;
if (is_58_fw)
- snprintf(fw_name, sizeof(fw_name), "radeon/si58_mc.bin");
+ snprintf(fw_name, sizeof(fw_name), "amdgpu/si58_mc.bin");
else
- snprintf(fw_name, sizeof(fw_name), "radeon/%s_mc.bin", chip_name);
+ snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mc.bin", chip_name);
err = request_firmware(&adev->gmc.fw, fw_name, adev->dev);
if (err)
goto out;
@@ -223,8 +224,8 @@ static void gmc_v6_0_vram_gtt_location(struct amdgpu_device *adev,
u64 base = RREG32(mmMC_VM_FB_LOCATION) & 0xFFFF;
base <<= 24;
- amdgpu_device_vram_location(adev, &adev->gmc, base);
- amdgpu_device_gart_location(adev, mc);
+ amdgpu_gmc_vram_location(adev, &adev->gmc, base);
+ amdgpu_gmc_gart_location(adev, mc);
}
static void gmc_v6_0_mc_program(struct amdgpu_device *adev)
@@ -493,16 +494,20 @@ static void gmc_v6_0_set_prt(struct amdgpu_device *adev, bool enable)
static int gmc_v6_0_gart_enable(struct amdgpu_device *adev)
{
+ uint64_t table_addr;
int r, i;
u32 field;
- if (adev->gart.robj == NULL) {
+ if (adev->gart.bo == NULL) {
dev_err(adev->dev, "No VRAM object for PCIE GART.\n");
return -EINVAL;
}
r = amdgpu_gart_table_vram_pin(adev);
if (r)
return r;
+
+ table_addr = amdgpu_bo_gpu_offset(adev->gart.bo);
+
/* Setup TLB control */
WREG32(mmMC_VM_MX_L1_TLB_CNTL,
(0xA << 7) |
@@ -531,7 +536,7 @@ static int gmc_v6_0_gart_enable(struct amdgpu_device *adev)
/* setup context0 */
WREG32(mmVM_CONTEXT0_PAGE_TABLE_START_ADDR, adev->gmc.gart_start >> 12);
WREG32(mmVM_CONTEXT0_PAGE_TABLE_END_ADDR, adev->gmc.gart_end >> 12);
- WREG32(mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR, adev->gart.table_addr >> 12);
+ WREG32(mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR, table_addr >> 12);
WREG32(mmVM_CONTEXT0_PROTECTION_FAULT_DEFAULT_ADDR,
(u32)(adev->dummy_page_addr >> 12));
WREG32(mmVM_CONTEXT0_CNTL2, 0);
@@ -555,10 +560,10 @@ static int gmc_v6_0_gart_enable(struct amdgpu_device *adev)
for (i = 1; i < 16; i++) {
if (i < 8)
WREG32(mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR + i,
- adev->gart.table_addr >> 12);
+ table_addr >> 12);
else
WREG32(mmVM_CONTEXT8_PAGE_TABLE_BASE_ADDR + i - 8,
- adev->gart.table_addr >> 12);
+ table_addr >> 12);
}
/* enable context1-15 */
@@ -578,7 +583,7 @@ static int gmc_v6_0_gart_enable(struct amdgpu_device *adev)
gmc_v6_0_flush_gpu_tlb(adev, 0);
dev_info(adev->dev, "PCIE GART of %uM enabled (table at 0x%016llX).\n",
(unsigned)(adev->gmc.gart_size >> 20),
- (unsigned long long)adev->gart.table_addr);
+ (unsigned long long)table_addr);
adev->gart.ready = true;
return 0;
}
@@ -587,7 +592,7 @@ static int gmc_v6_0_gart_init(struct amdgpu_device *adev)
{
int r;
- if (adev->gart.robj) {
+ if (adev->gart.bo) {
dev_warn(adev->dev, "gmc_v6_0 PCIE GART already initialized\n");
return 0;
}
@@ -632,12 +637,6 @@ static void gmc_v6_0_gart_disable(struct amdgpu_device *adev)
amdgpu_gart_table_vram_unpin(adev);
}
-static void gmc_v6_0_gart_fini(struct amdgpu_device *adev)
-{
- amdgpu_gart_table_vram_free(adev);
- amdgpu_gart_fini(adev);
-}
-
static void gmc_v6_0_vm_decode_fault(struct amdgpu_device *adev,
u32 status, u32 addr, u32 mc_client)
{
@@ -860,11 +859,11 @@ static int gmc_v6_0_sw_init(void *handle)
adev->gmc.vram_type = gmc_v6_0_convert_vram_type(tmp);
}
- r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 146, &adev->gmc.vm_fault);
+ r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, 146, &adev->gmc.vm_fault);
if (r)
return r;
- r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 147, &adev->gmc.vm_fault);
+ r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, 147, &adev->gmc.vm_fault);
if (r)
return r;
@@ -935,8 +934,9 @@ static int gmc_v6_0_sw_fini(void *handle)
amdgpu_gem_force_release(adev);
amdgpu_vm_manager_fini(adev);
- gmc_v6_0_gart_fini(adev);
+ amdgpu_gart_table_vram_free(adev);
amdgpu_bo_fini(adev);
+ amdgpu_gart_fini(adev);
release_firmware(adev->gmc.fw);
adev->gmc.fw = NULL;
@@ -1180,8 +1180,7 @@ static const struct amdgpu_irq_src_funcs gmc_v6_0_irq_funcs = {
static void gmc_v6_0_set_gmc_funcs(struct amdgpu_device *adev)
{
- if (adev->gmc.gmc_funcs == NULL)
- adev->gmc.gmc_funcs = &gmc_v6_0_gmc_funcs;
+ adev->gmc.gmc_funcs = &gmc_v6_0_gmc_funcs;
}
static void gmc_v6_0_set_irq_funcs(struct amdgpu_device *adev)
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c
index 7147bfe25a23..910c4ce19cb3 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c
@@ -28,6 +28,8 @@
#include "cik.h"
#include "gmc_v7_0.h"
#include "amdgpu_ucode.h"
+#include "amdgpu_amdkfd.h"
+#include "amdgpu_gem.h"
#include "bif/bif_4_1_d.h"
#include "bif/bif_4_1_sh_mask.h"
@@ -43,12 +45,14 @@
#include "amdgpu_atombios.h"
+#include "ivsrcid/ivsrcid_vislands30.h"
+
static void gmc_v7_0_set_gmc_funcs(struct amdgpu_device *adev);
static void gmc_v7_0_set_irq_funcs(struct amdgpu_device *adev);
static int gmc_v7_0_wait_for_idle(void *handle);
-MODULE_FIRMWARE("radeon/bonaire_mc.bin");
-MODULE_FIRMWARE("radeon/hawaii_mc.bin");
+MODULE_FIRMWARE("amdgpu/bonaire_mc.bin");
+MODULE_FIRMWARE("amdgpu/hawaii_mc.bin");
MODULE_FIRMWARE("amdgpu/topaz_mc.bin");
static const u32 golden_settings_iceland_a11[] =
@@ -147,10 +151,7 @@ static int gmc_v7_0_init_microcode(struct amdgpu_device *adev)
default: BUG();
}
- if (adev->asic_type == CHIP_TOPAZ)
- snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mc.bin", chip_name);
- else
- snprintf(fw_name, sizeof(fw_name), "radeon/%s_mc.bin", chip_name);
+ snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mc.bin", chip_name);
err = request_firmware(&adev->gmc.fw, fw_name, adev->dev);
if (err)
@@ -241,8 +242,8 @@ static void gmc_v7_0_vram_gtt_location(struct amdgpu_device *adev,
u64 base = RREG32(mmMC_VM_FB_LOCATION) & 0xFFFF;
base <<= 24;
- amdgpu_device_vram_location(adev, &adev->gmc, base);
- amdgpu_device_gart_location(adev, mc);
+ amdgpu_gmc_vram_location(adev, &adev->gmc, base);
+ amdgpu_gmc_gart_location(adev, mc);
}
/**
@@ -601,16 +602,20 @@ static void gmc_v7_0_set_prt(struct amdgpu_device *adev, bool enable)
*/
static int gmc_v7_0_gart_enable(struct amdgpu_device *adev)
{
+ uint64_t table_addr;
int r, i;
u32 tmp, field;
- if (adev->gart.robj == NULL) {
+ if (adev->gart.bo == NULL) {
dev_err(adev->dev, "No VRAM object for PCIE GART.\n");
return -EINVAL;
}
r = amdgpu_gart_table_vram_pin(adev);
if (r)
return r;
+
+ table_addr = amdgpu_bo_gpu_offset(adev->gart.bo);
+
/* Setup TLB control */
tmp = RREG32(mmMC_VM_MX_L1_TLB_CNTL);
tmp = REG_SET_FIELD(tmp, MC_VM_MX_L1_TLB_CNTL, ENABLE_L1_TLB, 1);
@@ -642,7 +647,7 @@ static int gmc_v7_0_gart_enable(struct amdgpu_device *adev)
/* setup context0 */
WREG32(mmVM_CONTEXT0_PAGE_TABLE_START_ADDR, adev->gmc.gart_start >> 12);
WREG32(mmVM_CONTEXT0_PAGE_TABLE_END_ADDR, adev->gmc.gart_end >> 12);
- WREG32(mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR, adev->gart.table_addr >> 12);
+ WREG32(mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR, table_addr >> 12);
WREG32(mmVM_CONTEXT0_PROTECTION_FAULT_DEFAULT_ADDR,
(u32)(adev->dummy_page_addr >> 12));
WREG32(mmVM_CONTEXT0_CNTL2, 0);
@@ -666,10 +671,10 @@ static int gmc_v7_0_gart_enable(struct amdgpu_device *adev)
for (i = 1; i < 16; i++) {
if (i < 8)
WREG32(mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR + i,
- adev->gart.table_addr >> 12);
+ table_addr >> 12);
else
WREG32(mmVM_CONTEXT8_PAGE_TABLE_BASE_ADDR + i - 8,
- adev->gart.table_addr >> 12);
+ table_addr >> 12);
}
/* enable context1-15 */
@@ -696,7 +701,7 @@ static int gmc_v7_0_gart_enable(struct amdgpu_device *adev)
gmc_v7_0_flush_gpu_tlb(adev, 0);
DRM_INFO("PCIE GART of %uM enabled (table at 0x%016llX).\n",
(unsigned)(adev->gmc.gart_size >> 20),
- (unsigned long long)adev->gart.table_addr);
+ (unsigned long long)table_addr);
adev->gart.ready = true;
return 0;
}
@@ -705,7 +710,7 @@ static int gmc_v7_0_gart_init(struct amdgpu_device *adev)
{
int r;
- if (adev->gart.robj) {
+ if (adev->gart.bo) {
WARN(1, "R600 PCIE GART already initialized\n");
return 0;
}
@@ -747,19 +752,6 @@ static void gmc_v7_0_gart_disable(struct amdgpu_device *adev)
}
/**
- * gmc_v7_0_gart_fini - vm fini callback
- *
- * @adev: amdgpu_device pointer
- *
- * Tears down the driver GART/VM setup (CIK).
- */
-static void gmc_v7_0_gart_fini(struct amdgpu_device *adev)
-{
- amdgpu_gart_table_vram_free(adev);
- amdgpu_gart_fini(adev);
-}
-
-/**
* gmc_v7_0_vm_decode_fault - print human readable fault info
*
* @adev: amdgpu_device pointer
@@ -999,11 +991,11 @@ static int gmc_v7_0_sw_init(void *handle)
adev->gmc.vram_type = gmc_v7_0_convert_vram_type(tmp);
}
- r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 146, &adev->gmc.vm_fault);
+ r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, VISLANDS30_IV_SRCID_GFX_PAGE_INV_FAULT, &adev->gmc.vm_fault);
if (r)
return r;
- r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 147, &adev->gmc.vm_fault);
+ r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, VISLANDS30_IV_SRCID_GFX_MEM_PROT_FAULT, &adev->gmc.vm_fault);
if (r)
return r;
@@ -1079,6 +1071,12 @@ static int gmc_v7_0_sw_init(void *handle)
adev->vm_manager.vram_base_offset = 0;
}
+ adev->gmc.vm_fault_info = kmalloc(sizeof(struct kfd_vm_fault_info),
+ GFP_KERNEL);
+ if (!adev->gmc.vm_fault_info)
+ return -ENOMEM;
+ atomic_set(&adev->gmc.vm_fault_info_updated, 0);
+
return 0;
}
@@ -1088,8 +1086,10 @@ static int gmc_v7_0_sw_fini(void *handle)
amdgpu_gem_force_release(adev);
amdgpu_vm_manager_fini(adev);
- gmc_v7_0_gart_fini(adev);
+ kfree(adev->gmc.vm_fault_info);
+ amdgpu_gart_table_vram_free(adev);
amdgpu_bo_fini(adev);
+ amdgpu_gart_fini(adev);
release_firmware(adev->gmc.fw);
adev->gmc.fw = NULL;
@@ -1277,7 +1277,7 @@ static int gmc_v7_0_process_interrupt(struct amdgpu_device *adev,
struct amdgpu_irq_src *source,
struct amdgpu_iv_entry *entry)
{
- u32 addr, status, mc_client;
+ u32 addr, status, mc_client, vmid;
addr = RREG32(mmVM_CONTEXT1_PROTECTION_FAULT_ADDR);
status = RREG32(mmVM_CONTEXT1_PROTECTION_FAULT_STATUS);
@@ -1302,6 +1302,29 @@ static int gmc_v7_0_process_interrupt(struct amdgpu_device *adev,
entry->pasid);
}
+ vmid = REG_GET_FIELD(status, VM_CONTEXT1_PROTECTION_FAULT_STATUS,
+ VMID);
+ if (amdgpu_amdkfd_is_kfd_vmid(adev, vmid)
+ && !atomic_read(&adev->gmc.vm_fault_info_updated)) {
+ struct kfd_vm_fault_info *info = adev->gmc.vm_fault_info;
+ u32 protections = REG_GET_FIELD(status,
+ VM_CONTEXT1_PROTECTION_FAULT_STATUS,
+ PROTECTIONS);
+
+ info->vmid = vmid;
+ info->mc_id = REG_GET_FIELD(status,
+ VM_CONTEXT1_PROTECTION_FAULT_STATUS,
+ MEMORY_CLIENT_ID);
+ info->status = status;
+ info->page_addr = addr;
+ info->prot_valid = protections & 0x7 ? true : false;
+ info->prot_read = protections & 0x8 ? true : false;
+ info->prot_write = protections & 0x10 ? true : false;
+ info->prot_exec = protections & 0x20 ? true : false;
+ mb();
+ atomic_set(&adev->gmc.vm_fault_info_updated, 1);
+ }
+
return 0;
}
@@ -1365,8 +1388,7 @@ static const struct amdgpu_irq_src_funcs gmc_v7_0_irq_funcs = {
static void gmc_v7_0_set_gmc_funcs(struct amdgpu_device *adev)
{
- if (adev->gmc.gmc_funcs == NULL)
- adev->gmc.gmc_funcs = &gmc_v7_0_gmc_funcs;
+ adev->gmc.gmc_funcs = &gmc_v7_0_gmc_funcs;
}
static void gmc_v7_0_set_irq_funcs(struct amdgpu_device *adev)
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c
index 1edbe6b477b5..1d3265c97b70 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c
@@ -26,6 +26,8 @@
#include "amdgpu.h"
#include "gmc_v8_0.h"
#include "amdgpu_ucode.h"
+#include "amdgpu_amdkfd.h"
+#include "amdgpu_gem.h"
#include "gmc/gmc_8_1_d.h"
#include "gmc/gmc_8_1_sh_mask.h"
@@ -44,6 +46,7 @@
#include "amdgpu_atombios.h"
+#include "ivsrcid/ivsrcid_vislands30.h"
static void gmc_v8_0_set_gmc_funcs(struct amdgpu_device *adev);
static void gmc_v8_0_set_irq_funcs(struct amdgpu_device *adev);
@@ -408,8 +411,8 @@ static void gmc_v8_0_vram_gtt_location(struct amdgpu_device *adev,
base = RREG32(mmMC_VM_FB_LOCATION) & 0xFFFF;
base <<= 24;
- amdgpu_device_vram_location(adev, &adev->gmc, base);
- amdgpu_device_gart_location(adev, mc);
+ amdgpu_gmc_vram_location(adev, &adev->gmc, base);
+ amdgpu_gmc_gart_location(adev, mc);
}
/**
@@ -804,16 +807,20 @@ static void gmc_v8_0_set_prt(struct amdgpu_device *adev, bool enable)
*/
static int gmc_v8_0_gart_enable(struct amdgpu_device *adev)
{
+ uint64_t table_addr;
int r, i;
u32 tmp, field;
- if (adev->gart.robj == NULL) {
+ if (adev->gart.bo == NULL) {
dev_err(adev->dev, "No VRAM object for PCIE GART.\n");
return -EINVAL;
}
r = amdgpu_gart_table_vram_pin(adev);
if (r)
return r;
+
+ table_addr = amdgpu_bo_gpu_offset(adev->gart.bo);
+
/* Setup TLB control */
tmp = RREG32(mmMC_VM_MX_L1_TLB_CNTL);
tmp = REG_SET_FIELD(tmp, MC_VM_MX_L1_TLB_CNTL, ENABLE_L1_TLB, 1);
@@ -861,7 +868,7 @@ static int gmc_v8_0_gart_enable(struct amdgpu_device *adev)
/* setup context0 */
WREG32(mmVM_CONTEXT0_PAGE_TABLE_START_ADDR, adev->gmc.gart_start >> 12);
WREG32(mmVM_CONTEXT0_PAGE_TABLE_END_ADDR, adev->gmc.gart_end >> 12);
- WREG32(mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR, adev->gart.table_addr >> 12);
+ WREG32(mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR, table_addr >> 12);
WREG32(mmVM_CONTEXT0_PROTECTION_FAULT_DEFAULT_ADDR,
(u32)(adev->dummy_page_addr >> 12));
WREG32(mmVM_CONTEXT0_CNTL2, 0);
@@ -885,10 +892,10 @@ static int gmc_v8_0_gart_enable(struct amdgpu_device *adev)
for (i = 1; i < 16; i++) {
if (i < 8)
WREG32(mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR + i,
- adev->gart.table_addr >> 12);
+ table_addr >> 12);
else
WREG32(mmVM_CONTEXT8_PAGE_TABLE_BASE_ADDR + i - 8,
- adev->gart.table_addr >> 12);
+ table_addr >> 12);
}
/* enable context1-15 */
@@ -916,7 +923,7 @@ static int gmc_v8_0_gart_enable(struct amdgpu_device *adev)
gmc_v8_0_flush_gpu_tlb(adev, 0);
DRM_INFO("PCIE GART of %uM enabled (table at 0x%016llX).\n",
(unsigned)(adev->gmc.gart_size >> 20),
- (unsigned long long)adev->gart.table_addr);
+ (unsigned long long)table_addr);
adev->gart.ready = true;
return 0;
}
@@ -925,7 +932,7 @@ static int gmc_v8_0_gart_init(struct amdgpu_device *adev)
{
int r;
- if (adev->gart.robj) {
+ if (adev->gart.bo) {
WARN(1, "R600 PCIE GART already initialized\n");
return 0;
}
@@ -967,19 +974,6 @@ static void gmc_v8_0_gart_disable(struct amdgpu_device *adev)
}
/**
- * gmc_v8_0_gart_fini - vm fini callback
- *
- * @adev: amdgpu_device pointer
- *
- * Tears down the driver GART/VM setup (CIK).
- */
-static void gmc_v8_0_gart_fini(struct amdgpu_device *adev)
-{
- amdgpu_gart_table_vram_free(adev);
- amdgpu_gart_fini(adev);
-}
-
-/**
* gmc_v8_0_vm_decode_fault - print human readable fault info
*
* @adev: amdgpu_device pointer
@@ -1101,11 +1095,11 @@ static int gmc_v8_0_sw_init(void *handle)
adev->gmc.vram_type = gmc_v8_0_convert_vram_type(tmp);
}
- r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 146, &adev->gmc.vm_fault);
+ r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, VISLANDS30_IV_SRCID_GFX_PAGE_INV_FAULT, &adev->gmc.vm_fault);
if (r)
return r;
- r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 147, &adev->gmc.vm_fault);
+ r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, VISLANDS30_IV_SRCID_GFX_MEM_PROT_FAULT, &adev->gmc.vm_fault);
if (r)
return r;
@@ -1181,6 +1175,12 @@ static int gmc_v8_0_sw_init(void *handle)
adev->vm_manager.vram_base_offset = 0;
}
+ adev->gmc.vm_fault_info = kmalloc(sizeof(struct kfd_vm_fault_info),
+ GFP_KERNEL);
+ if (!adev->gmc.vm_fault_info)
+ return -ENOMEM;
+ atomic_set(&adev->gmc.vm_fault_info_updated, 0);
+
return 0;
}
@@ -1190,8 +1190,10 @@ static int gmc_v8_0_sw_fini(void *handle)
amdgpu_gem_force_release(adev);
amdgpu_vm_manager_fini(adev);
- gmc_v8_0_gart_fini(adev);
+ kfree(adev->gmc.vm_fault_info);
+ amdgpu_gart_table_vram_free(adev);
amdgpu_bo_fini(adev);
+ amdgpu_gart_fini(adev);
release_firmware(adev->gmc.fw);
adev->gmc.fw = NULL;
@@ -1425,7 +1427,7 @@ static int gmc_v8_0_process_interrupt(struct amdgpu_device *adev,
struct amdgpu_irq_src *source,
struct amdgpu_iv_entry *entry)
{
- u32 addr, status, mc_client;
+ u32 addr, status, mc_client, vmid;
if (amdgpu_sriov_vf(adev)) {
dev_err(adev->dev, "GPU fault detected: %d 0x%08x\n",
@@ -1447,8 +1449,13 @@ static int gmc_v8_0_process_interrupt(struct amdgpu_device *adev,
gmc_v8_0_set_fault_enable_default(adev, false);
if (printk_ratelimit()) {
- dev_err(adev->dev, "GPU fault detected: %d 0x%08x\n",
- entry->src_id, entry->src_data[0]);
+ struct amdgpu_task_info task_info = { 0 };
+
+ amdgpu_vm_get_task_info(adev, entry->pasid, &task_info);
+
+ dev_err(adev->dev, "GPU fault detected: %d 0x%08x for process %s pid %d thread %s pid %d\n",
+ entry->src_id, entry->src_data[0], task_info.process_name,
+ task_info.tgid, task_info.task_name, task_info.pid);
dev_err(adev->dev, " VM_CONTEXT1_PROTECTION_FAULT_ADDR 0x%08X\n",
addr);
dev_err(adev->dev, " VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
@@ -1457,6 +1464,29 @@ static int gmc_v8_0_process_interrupt(struct amdgpu_device *adev,
entry->pasid);
}
+ vmid = REG_GET_FIELD(status, VM_CONTEXT1_PROTECTION_FAULT_STATUS,
+ VMID);
+ if (amdgpu_amdkfd_is_kfd_vmid(adev, vmid)
+ && !atomic_read(&adev->gmc.vm_fault_info_updated)) {
+ struct kfd_vm_fault_info *info = adev->gmc.vm_fault_info;
+ u32 protections = REG_GET_FIELD(status,
+ VM_CONTEXT1_PROTECTION_FAULT_STATUS,
+ PROTECTIONS);
+
+ info->vmid = vmid;
+ info->mc_id = REG_GET_FIELD(status,
+ VM_CONTEXT1_PROTECTION_FAULT_STATUS,
+ MEMORY_CLIENT_ID);
+ info->status = status;
+ info->page_addr = addr;
+ info->prot_valid = protections & 0x7 ? true : false;
+ info->prot_read = protections & 0x8 ? true : false;
+ info->prot_write = protections & 0x10 ? true : false;
+ info->prot_exec = protections & 0x20 ? true : false;
+ mb();
+ atomic_set(&adev->gmc.vm_fault_info_updated, 1);
+ }
+
return 0;
}
@@ -1703,8 +1733,7 @@ static const struct amdgpu_irq_src_funcs gmc_v8_0_irq_funcs = {
static void gmc_v8_0_set_gmc_funcs(struct amdgpu_device *adev)
{
- if (adev->gmc.gmc_funcs == NULL)
- adev->gmc.gmc_funcs = &gmc_v8_0_gmc_funcs;
+ adev->gmc.gmc_funcs = &gmc_v8_0_gmc_funcs;
}
static void gmc_v8_0_set_irq_funcs(struct amdgpu_device *adev)
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
index 3c0a85d4e4ab..f35d7a554ad5 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
@@ -25,6 +25,7 @@
#include "amdgpu.h"
#include "gmc_v9_0.h"
#include "amdgpu_atomfirmware.h"
+#include "amdgpu_gem.h"
#include "hdp/hdp_4_0_offset.h"
#include "hdp/hdp_4_0_sh_mask.h"
@@ -42,6 +43,9 @@
#include "gfxhub_v1_0.h"
#include "mmhub_v1_0.h"
+#include "gfxhub_v1_1.h"
+
+#include "ivsrcid/vmc/irqsrcs_vmc_1_0.h"
/* add these here since we already include dce12 headers and these are for DCN */
#define mmHUBP0_DCSURF_PRI_VIEWPORT_DIMENSION 0x055d
@@ -257,12 +261,17 @@ static int gmc_v9_0_process_interrupt(struct amdgpu_device *adev,
}
if (printk_ratelimit()) {
+ struct amdgpu_task_info task_info = { 0 };
+
+ amdgpu_vm_get_task_info(adev, entry->pasid, &task_info);
+
dev_err(adev->dev,
- "[%s] VMC page fault (src_id:%u ring:%u vmid:%u pasid:%u)\n",
+ "[%s] VMC page fault (src_id:%u ring:%u vmid:%u pasid:%u, for process %s pid %d thread %s pid %d)\n",
entry->vmid_src ? "mmhub" : "gfxhub",
entry->src_id, entry->ring_id, entry->vmid,
- entry->pasid);
- dev_err(adev->dev, " at page 0x%016llx from %d\n",
+ entry->pasid, task_info.process_name, task_info.tgid,
+ task_info.task_name, task_info.pid);
+ dev_err(adev->dev, " in page starting at address 0x%016llx from %d\n",
addr, entry->client_id);
if (!amdgpu_sriov_vf(adev))
dev_err(adev->dev,
@@ -303,6 +312,48 @@ static uint32_t gmc_v9_0_get_invalidate_req(unsigned int vmid)
return req;
}
+static signed long amdgpu_kiq_reg_write_reg_wait(struct amdgpu_device *adev,
+ uint32_t reg0, uint32_t reg1,
+ uint32_t ref, uint32_t mask)
+{
+ signed long r, cnt = 0;
+ unsigned long flags;
+ uint32_t seq;
+ struct amdgpu_kiq *kiq = &adev->gfx.kiq;
+ struct amdgpu_ring *ring = &kiq->ring;
+
+ spin_lock_irqsave(&kiq->ring_lock, flags);
+
+ amdgpu_ring_alloc(ring, 32);
+ amdgpu_ring_emit_reg_write_reg_wait(ring, reg0, reg1,
+ ref, mask);
+ amdgpu_fence_emit_polling(ring, &seq);
+ amdgpu_ring_commit(ring);
+ spin_unlock_irqrestore(&kiq->ring_lock, flags);
+
+ r = amdgpu_fence_wait_polling(ring, seq, MAX_KIQ_REG_WAIT);
+
+ /* don't wait anymore for IRQ context */
+ if (r < 1 && in_interrupt())
+ goto failed_kiq;
+
+ might_sleep();
+
+ while (r < 1 && cnt++ < MAX_KIQ_REG_TRY) {
+ msleep(MAX_KIQ_REG_BAILOUT_INTERVAL);
+ r = amdgpu_fence_wait_polling(ring, seq, MAX_KIQ_REG_WAIT);
+ }
+
+ if (cnt > MAX_KIQ_REG_TRY)
+ goto failed_kiq;
+
+ return 0;
+
+failed_kiq:
+ pr_err("failed to invalidate tlb with kiq\n");
+ return r;
+}
+
/*
* GART
* VMID 0 is the physical GPU addresses as used by the kernel.
@@ -324,13 +375,23 @@ static void gmc_v9_0_flush_gpu_tlb(struct amdgpu_device *adev,
/* Use register 17 for GART */
const unsigned eng = 17;
unsigned i, j;
-
- spin_lock(&adev->gmc.invalidate_lock);
+ int r;
for (i = 0; i < AMDGPU_MAX_VMHUBS; ++i) {
struct amdgpu_vmhub *hub = &adev->vmhub[i];
u32 tmp = gmc_v9_0_get_invalidate_req(vmid);
+ if (adev->gfx.kiq.ring.ready &&
+ (amdgpu_sriov_runtime(adev) || !amdgpu_sriov_vf(adev)) &&
+ !adev->in_gpu_reset) {
+ r = amdgpu_kiq_reg_write_reg_wait(adev, hub->vm_inv_eng0_req + eng,
+ hub->vm_inv_eng0_ack + eng, tmp, 1 << vmid);
+ if (!r)
+ continue;
+ }
+
+ spin_lock(&adev->gmc.invalidate_lock);
+
WREG32_NO_KIQ(hub->vm_inv_eng0_req + eng, tmp);
/* Busy wait for ACK.*/
@@ -341,8 +402,10 @@ static void gmc_v9_0_flush_gpu_tlb(struct amdgpu_device *adev,
break;
cpu_relax();
}
- if (j < 100)
+ if (j < 100) {
+ spin_unlock(&adev->gmc.invalidate_lock);
continue;
+ }
/* Wait for ACK with a delay.*/
for (j = 0; j < adev->usec_timeout; j++) {
@@ -352,13 +415,13 @@ static void gmc_v9_0_flush_gpu_tlb(struct amdgpu_device *adev,
break;
udelay(1);
}
- if (j < adev->usec_timeout)
+ if (j < adev->usec_timeout) {
+ spin_unlock(&adev->gmc.invalidate_lock);
continue;
-
+ }
+ spin_unlock(&adev->gmc.invalidate_lock);
DRM_ERROR("Timeout waiting for VM flush ACK!\n");
}
-
- spin_unlock(&adev->gmc.invalidate_lock);
}
static uint64_t gmc_v9_0_emit_flush_gpu_tlb(struct amdgpu_ring *ring,
@@ -367,12 +430,8 @@ static uint64_t gmc_v9_0_emit_flush_gpu_tlb(struct amdgpu_ring *ring,
struct amdgpu_device *adev = ring->adev;
struct amdgpu_vmhub *hub = &adev->vmhub[ring->funcs->vmhub];
uint32_t req = gmc_v9_0_get_invalidate_req(vmid);
- uint64_t flags = AMDGPU_PTE_VALID;
unsigned eng = ring->vm_inv_eng;
- amdgpu_gmc_get_vm_pde(adev, -1, &pd_addr, &flags);
- pd_addr |= flags;
-
amdgpu_ring_emit_wreg(ring, hub->ctx0_ptb_addr_lo32 + (2 * vmid),
lower_32_bits(pd_addr));
@@ -502,7 +561,7 @@ static uint64_t gmc_v9_0_get_vm_pte_flags(struct amdgpu_device *adev,
static void gmc_v9_0_get_vm_pde(struct amdgpu_device *adev, int level,
uint64_t *addr, uint64_t *flags)
{
- if (!(*flags & AMDGPU_PDE_PTE))
+ if (!(*flags & AMDGPU_PDE_PTE) && !(*flags & AMDGPU_PTE_SYSTEM))
*addr = adev->vm_manager.vram_base_offset + *addr -
adev->gmc.vram_start;
BUG_ON(*addr & 0xFFFF00000000003FULL);
@@ -534,8 +593,7 @@ static const struct amdgpu_gmc_funcs gmc_v9_0_gmc_funcs = {
static void gmc_v9_0_set_gmc_funcs(struct amdgpu_device *adev)
{
- if (adev->gmc.gmc_funcs == NULL)
- adev->gmc.gmc_funcs = &gmc_v9_0_gmc_funcs;
+ adev->gmc.gmc_funcs = &gmc_v9_0_gmc_funcs;
}
static int gmc_v9_0_early_init(void *handle)
@@ -634,6 +692,29 @@ static int gmc_v9_0_ecc_available(struct amdgpu_device *adev)
return lost_sheep == 0;
}
+static bool gmc_v9_0_keep_stolen_memory(struct amdgpu_device *adev)
+{
+
+ /*
+ * TODO:
+ * Currently there is a bug where some memory client outside
+ * of the driver writes to first 8M of VRAM on S3 resume,
+ * this overrides GART which by default gets placed in first 8M and
+ * causes VM_FAULTS once GTT is accessed.
+ * Keep the stolen memory reservation until the while this is not solved.
+ * Also check code in gmc_v9_0_get_vbios_fb_size and gmc_v9_0_late_init
+ */
+ switch (adev->asic_type) {
+ case CHIP_VEGA10:
+ return true;
+ case CHIP_RAVEN:
+ case CHIP_VEGA12:
+ case CHIP_VEGA20:
+ default:
+ return false;
+ }
+}
+
static int gmc_v9_0_late_init(void *handle)
{
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
@@ -650,10 +731,8 @@ static int gmc_v9_0_late_init(void *handle)
unsigned i;
int r;
- /*
- * TODO - Uncomment once GART corruption issue is fixed.
- */
- /* amdgpu_bo_late_init(adev); */
+ if (!gmc_v9_0_keep_stolen_memory(adev))
+ amdgpu_bo_late_init(adev);
for(i = 0; i < adev->num_rings; ++i) {
struct amdgpu_ring *ring = adev->rings[i];
@@ -691,10 +770,18 @@ static void gmc_v9_0_vram_gtt_location(struct amdgpu_device *adev,
u64 base = 0;
if (!amdgpu_sriov_vf(adev))
base = mmhub_v1_0_get_fb_location(adev);
- amdgpu_device_vram_location(adev, &adev->gmc, base);
- amdgpu_device_gart_location(adev, mc);
+ /* add the xgmi offset of the physical node */
+ base += adev->gmc.xgmi.physical_node_id * adev->gmc.xgmi.node_segment_size;
+ amdgpu_gmc_vram_location(adev, &adev->gmc, base);
+ amdgpu_gmc_gart_location(adev, mc);
+ if (!amdgpu_sriov_vf(adev))
+ amdgpu_gmc_agp_location(adev, mc);
/* base offset of vram pages */
adev->vm_manager.vram_base_offset = gfxhub_v1_0_get_mc_fb_offset(adev);
+
+ /* XXX: add the xgmi offset of the physical node? */
+ adev->vm_manager.vram_base_offset +=
+ adev->gmc.xgmi.physical_node_id * adev->gmc.xgmi.node_segment_size;
}
/**
@@ -774,7 +861,7 @@ static int gmc_v9_0_gart_init(struct amdgpu_device *adev)
{
int r;
- if (adev->gart.robj) {
+ if (adev->gart.bo) {
WARN(1, "VEGA10 PCIE GART already initialized\n");
return 0;
}
@@ -790,18 +877,16 @@ static int gmc_v9_0_gart_init(struct amdgpu_device *adev)
static unsigned gmc_v9_0_get_vbios_fb_size(struct amdgpu_device *adev)
{
-#if 0
u32 d1vga_control = RREG32_SOC15(DCE, 0, mmD1VGA_CONTROL);
-#endif
unsigned size;
/*
* TODO Remove once GART corruption is resolved
* Check related code in gmc_v9_0_sw_fini
* */
- size = 9 * 1024 * 1024;
+ if (gmc_v9_0_keep_stolen_memory(adev))
+ return 9 * 1024 * 1024;
-#if 0
if (REG_GET_FIELD(d1vga_control, D1VGA_CONTROL, D1VGA_MODE_ENABLE)) {
size = 9 * 1024 * 1024; /* reserve 8MB for vga emulator and 1 MB for FB */
} else {
@@ -818,6 +903,7 @@ static unsigned gmc_v9_0_get_vbios_fb_size(struct amdgpu_device *adev)
break;
case CHIP_VEGA10:
case CHIP_VEGA12:
+ case CHIP_VEGA20:
default:
viewport = RREG32_SOC15(DCE, 0, mmSCL0_VIEWPORT_SIZE);
size = (REG_GET_FIELD(viewport, SCL0_VIEWPORT_SIZE, VIEWPORT_HEIGHT) *
@@ -830,7 +916,6 @@ static unsigned gmc_v9_0_get_vbios_fb_size(struct amdgpu_device *adev)
if ((adev->gmc.real_vram_size - size) < (8 * 1024 * 1024))
return 0;
-#endif
return size;
}
@@ -872,9 +957,9 @@ static int gmc_v9_0_sw_init(void *handle)
}
/* This interrupt is VMC page fault.*/
- r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_VMC, 0,
+ r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_VMC, VMC_1_0__SRCID__VM_FAULT,
&adev->gmc.vm_fault);
- r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_UTCL2, 0,
+ r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_UTCL2, UTCL2_1_0__SRCID__FAULT,
&adev->gmc.vm_fault);
if (r)
@@ -906,6 +991,12 @@ static int gmc_v9_0_sw_init(void *handle)
}
adev->need_swiotlb = drm_get_max_iomem() > ((u64)1 << dma_bits);
+ if (adev->asic_type == CHIP_VEGA20) {
+ r = gfxhub_v1_1_get_xgmi_info(adev);
+ if (r)
+ return r;
+ }
+
r = gmc_v9_0_mc_init(adev);
if (r)
return r;
@@ -935,39 +1026,19 @@ static int gmc_v9_0_sw_init(void *handle)
return 0;
}
-/**
- * gmc_v9_0_gart_fini - vm fini callback
- *
- * @adev: amdgpu_device pointer
- *
- * Tears down the driver GART/VM setup (CIK).
- */
-static void gmc_v9_0_gart_fini(struct amdgpu_device *adev)
-{
- amdgpu_gart_table_vram_free(adev);
- amdgpu_gart_fini(adev);
-}
-
static int gmc_v9_0_sw_fini(void *handle)
{
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
amdgpu_gem_force_release(adev);
amdgpu_vm_manager_fini(adev);
- gmc_v9_0_gart_fini(adev);
- /*
- * TODO:
- * Currently there is a bug where some memory client outside
- * of the driver writes to first 8M of VRAM on S3 resume,
- * this overrides GART which by default gets placed in first 8M and
- * causes VM_FAULTS once GTT is accessed.
- * Keep the stolen memory reservation until the while this is not solved.
- * Also check code in gmc_v9_0_get_vbios_fb_size and gmc_v9_0_late_init
- */
- amdgpu_bo_free_kernel(&adev->stolen_vga_memory, NULL, NULL);
+ if (gmc_v9_0_keep_stolen_memory(adev))
+ amdgpu_bo_free_kernel(&adev->stolen_vga_memory, NULL, NULL);
+ amdgpu_gart_table_vram_free(adev);
amdgpu_bo_fini(adev);
+ amdgpu_gart_fini(adev);
return 0;
}
@@ -1012,7 +1083,7 @@ static int gmc_v9_0_gart_enable(struct amdgpu_device *adev)
golden_settings_vega10_hdp,
ARRAY_SIZE(golden_settings_vega10_hdp));
- if (adev->gart.robj == NULL) {
+ if (adev->gart.bo == NULL) {
dev_err(adev->dev, "No VRAM object for PCIE GART.\n");
return -EINVAL;
}
@@ -1022,7 +1093,6 @@ static int gmc_v9_0_gart_enable(struct amdgpu_device *adev)
switch (adev->asic_type) {
case CHIP_RAVEN:
- mmhub_v1_0_initialize_power_gating(adev);
mmhub_v1_0_update_power_gating(adev, true);
break;
default:
@@ -1056,7 +1126,7 @@ static int gmc_v9_0_gart_enable(struct amdgpu_device *adev)
DRM_INFO("PCIE GART of %uM enabled (table at 0x%016llX).\n",
(unsigned)(adev->gmc.gart_size >> 20),
- (unsigned long long)adev->gart.table_addr);
+ (unsigned long long)amdgpu_bo_gpu_offset(adev->gart.bo));
adev->gart.ready = true;
return 0;
}
diff --git a/drivers/gpu/drm/amd/amdgpu/iceland_ih.c b/drivers/gpu/drm/amd/amdgpu/iceland_ih.c
index 842c4b677b4d..cf0fc61aebe6 100644
--- a/drivers/gpu/drm/amd/amdgpu/iceland_ih.c
+++ b/drivers/gpu/drm/amd/amdgpu/iceland_ih.c
@@ -255,7 +255,7 @@ static void iceland_ih_decode_iv(struct amdgpu_device *adev,
dw[2] = le32_to_cpu(adev->irq.ih.ring[ring_index + 2]);
dw[3] = le32_to_cpu(adev->irq.ih.ring[ring_index + 3]);
- entry->client_id = AMDGPU_IH_CLIENTID_LEGACY;
+ entry->client_id = AMDGPU_IRQ_CLIENTID_LEGACY;
entry->src_id = dw[0] & 0xff;
entry->src_data[0] = dw[1] & 0xfffffff;
entry->ring_id = dw[2] & 0xff;
@@ -297,7 +297,7 @@ static int iceland_ih_sw_init(void *handle)
int r;
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
- r = amdgpu_ih_ring_init(adev, 64 * 1024, false);
+ r = amdgpu_ih_ring_init(adev, &adev->irq.ih, 64 * 1024, false);
if (r)
return r;
@@ -311,7 +311,7 @@ static int iceland_ih_sw_fini(void *handle)
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
amdgpu_irq_fini(adev);
- amdgpu_ih_ring_fini(adev);
+ amdgpu_ih_ring_fini(adev, &adev->irq.ih);
amdgpu_irq_remove_domain(adev);
return 0;
@@ -447,8 +447,7 @@ static const struct amdgpu_ih_funcs iceland_ih_funcs = {
static void iceland_ih_set_interrupt_funcs(struct amdgpu_device *adev)
{
- if (adev->irq.ih_funcs == NULL)
- adev->irq.ih_funcs = &iceland_ih_funcs;
+ adev->irq.ih_funcs = &iceland_ih_funcs;
}
const struct amdgpu_ip_block_version iceland_ih_ip_block =
diff --git a/drivers/gpu/drm/amd/amdgpu/kv_dpm.c b/drivers/gpu/drm/amd/amdgpu/kv_dpm.c
index 7a1e77c93bf1..d0e478f43443 100644
--- a/drivers/gpu/drm/amd/amdgpu/kv_dpm.c
+++ b/drivers/gpu/drm/amd/amdgpu/kv_dpm.c
@@ -65,8 +65,6 @@ static int kv_set_thermal_temperature_range(struct amdgpu_device *adev,
int min_temp, int max_temp);
static int kv_init_fps_limits(struct amdgpu_device *adev);
-static void kv_dpm_powergate_uvd(void *handle, bool gate);
-static void kv_dpm_powergate_vce(struct amdgpu_device *adev, bool gate);
static void kv_dpm_powergate_samu(struct amdgpu_device *adev, bool gate);
static void kv_dpm_powergate_acp(struct amdgpu_device *adev, bool gate);
@@ -1354,8 +1352,6 @@ static int kv_dpm_enable(struct amdgpu_device *adev)
return ret;
}
- kv_update_current_ps(adev, adev->pm.dpm.boot_ps);
-
if (adev->irq.installed &&
amdgpu_is_internal_thermal_sensor(adev->pm.int_thermal_type)) {
ret = kv_set_thermal_temperature_range(adev, KV_TEMP_RANGE_MIN, KV_TEMP_RANGE_MAX);
@@ -1374,6 +1370,8 @@ static int kv_dpm_enable(struct amdgpu_device *adev)
static void kv_dpm_disable(struct amdgpu_device *adev)
{
+ struct kv_power_info *pi = kv_get_pi(adev);
+
amdgpu_irq_put(adev, &adev->pm.dpm.thermal.irq,
AMDGPU_THERMAL_IRQ_LOW_TO_HIGH);
amdgpu_irq_put(adev, &adev->pm.dpm.thermal.irq,
@@ -1387,8 +1385,10 @@ static void kv_dpm_disable(struct amdgpu_device *adev)
/* powerup blocks */
kv_dpm_powergate_acp(adev, false);
kv_dpm_powergate_samu(adev, false);
- kv_dpm_powergate_vce(adev, false);
- kv_dpm_powergate_uvd(adev, false);
+ if (pi->caps_vce_pg) /* power on the VCE block */
+ amdgpu_kv_notify_message_to_smu(adev, PPSMC_MSG_VCEPowerON);
+ if (pi->caps_uvd_pg) /* power on the UVD block */
+ amdgpu_kv_notify_message_to_smu(adev, PPSMC_MSG_UVDPowerON);
kv_enable_smc_cac(adev, false);
kv_enable_didt(adev, false);
@@ -1551,7 +1551,6 @@ static int kv_update_vce_dpm(struct amdgpu_device *adev,
int ret;
if (amdgpu_new_state->evclk > 0 && amdgpu_current_state->evclk == 0) {
- kv_dpm_powergate_vce(adev, false);
if (pi->caps_stable_p_state)
pi->vce_boot_level = table->count - 1;
else
@@ -1573,7 +1572,6 @@ static int kv_update_vce_dpm(struct amdgpu_device *adev,
kv_enable_vce_dpm(adev, true);
} else if (amdgpu_new_state->evclk == 0 && amdgpu_current_state->evclk > 0) {
kv_enable_vce_dpm(adev, false);
- kv_dpm_powergate_vce(adev, true);
}
return 0;
@@ -1702,24 +1700,32 @@ static void kv_dpm_powergate_uvd(void *handle, bool gate)
}
}
-static void kv_dpm_powergate_vce(struct amdgpu_device *adev, bool gate)
+static void kv_dpm_powergate_vce(void *handle, bool gate)
{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
struct kv_power_info *pi = kv_get_pi(adev);
-
- if (pi->vce_power_gated == gate)
- return;
+ int ret;
pi->vce_power_gated = gate;
- if (!pi->caps_vce_pg)
- return;
-
- if (gate)
- amdgpu_kv_notify_message_to_smu(adev, PPSMC_MSG_VCEPowerOFF);
- else
- amdgpu_kv_notify_message_to_smu(adev, PPSMC_MSG_VCEPowerON);
+ if (gate) {
+ /* stop the VCE block */
+ ret = amdgpu_device_ip_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_VCE,
+ AMD_PG_STATE_GATE);
+ kv_enable_vce_dpm(adev, false);
+ if (pi->caps_vce_pg) /* power off the VCE block */
+ amdgpu_kv_notify_message_to_smu(adev, PPSMC_MSG_VCEPowerOFF);
+ } else {
+ if (pi->caps_vce_pg) /* power on the VCE block */
+ amdgpu_kv_notify_message_to_smu(adev, PPSMC_MSG_VCEPowerON);
+ kv_enable_vce_dpm(adev, true);
+ /* re-init the VCE block */
+ ret = amdgpu_device_ip_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_VCE,
+ AMD_PG_STATE_UNGATE);
+ }
}
+
static void kv_dpm_powergate_samu(struct amdgpu_device *adev, bool gate)
{
struct kv_power_info *pi = kv_get_pi(adev);
@@ -1921,7 +1927,7 @@ static int kv_dpm_set_power_state(void *handle)
int ret;
if (pi->bapm_enable) {
- ret = amdgpu_kv_smc_bapm_enable(adev, adev->pm.dpm.ac_power);
+ ret = amdgpu_kv_smc_bapm_enable(adev, adev->pm.ac_power);
if (ret) {
DRM_ERROR("amdgpu_kv_smc_bapm_enable failed\n");
return ret;
@@ -2989,12 +2995,12 @@ static int kv_dpm_sw_init(void *handle)
int ret;
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
- ret = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 230,
+ ret = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, 230,
&adev->pm.dpm.thermal.irq);
if (ret)
return ret;
- ret = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 231,
+ ret = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, 231,
&adev->pm.dpm.thermal.irq);
if (ret)
return ret;
@@ -3061,7 +3067,7 @@ static int kv_dpm_hw_init(void *handle)
else
adev->pm.dpm_enabled = true;
mutex_unlock(&adev->pm.mutex);
-
+ amdgpu_pm_compute_clocks(adev);
return ret;
}
@@ -3306,6 +3312,22 @@ static int kv_dpm_read_sensor(void *handle, int idx,
}
}
+static int kv_set_powergating_by_smu(void *handle,
+ uint32_t block_type, bool gate)
+{
+ switch (block_type) {
+ case AMD_IP_BLOCK_TYPE_UVD:
+ kv_dpm_powergate_uvd(handle, gate);
+ break;
+ case AMD_IP_BLOCK_TYPE_VCE:
+ kv_dpm_powergate_vce(handle, gate);
+ break;
+ default:
+ break;
+ }
+ return 0;
+}
+
static const struct amd_ip_funcs kv_dpm_ip_funcs = {
.name = "kv_dpm",
.early_init = kv_dpm_early_init,
@@ -3342,7 +3364,7 @@ static const struct amd_pm_funcs kv_dpm_funcs = {
.print_power_state = &kv_dpm_print_power_state,
.debugfs_print_current_performance_level = &kv_dpm_debugfs_print_current_performance_level,
.force_performance_level = &kv_dpm_force_performance_level,
- .powergate_uvd = &kv_dpm_powergate_uvd,
+ .set_powergating_by_smu = kv_set_powergating_by_smu,
.enable_bapm = &kv_dpm_enable_bapm,
.get_vce_clock_state = amdgpu_get_vce_clock_state,
.check_state_equal = kv_check_state_equal,
diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c
index 3d53c4413f13..14649f8475f3 100644
--- a/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c
@@ -38,22 +38,23 @@
u64 mmhub_v1_0_get_fb_location(struct amdgpu_device *adev)
{
u64 base = RREG32_SOC15(MMHUB, 0, mmMC_VM_FB_LOCATION_BASE);
+ u64 top = RREG32_SOC15(MMHUB, 0, mmMC_VM_FB_LOCATION_TOP);
base &= MC_VM_FB_LOCATION_BASE__FB_BASE_MASK;
base <<= 24;
+ top &= MC_VM_FB_LOCATION_TOP__FB_TOP_MASK;
+ top <<= 24;
+
+ adev->gmc.fb_start = base;
+ adev->gmc.fb_end = top;
+
return base;
}
static void mmhub_v1_0_init_gart_pt_regs(struct amdgpu_device *adev)
{
- uint64_t value;
-
- BUG_ON(adev->gart.table_addr & (~0x0000FFFFFFFFF000ULL));
- value = adev->gart.table_addr - adev->gmc.vram_start +
- adev->vm_manager.vram_base_offset;
- value &= 0x0000FFFFFFFFF000ULL;
- value |= 0x1; /* valid bit */
+ uint64_t value = amdgpu_gmc_pd_addr(adev->gart.bo);
WREG32_SOC15(MMHUB, 0, mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32,
lower_32_bits(value));
@@ -82,16 +83,28 @@ static void mmhub_v1_0_init_system_aperture_regs(struct amdgpu_device *adev)
uint64_t value;
uint32_t tmp;
- /* Disable AGP. */
+ /* Program the AGP BAR */
WREG32_SOC15(MMHUB, 0, mmMC_VM_AGP_BASE, 0);
- WREG32_SOC15(MMHUB, 0, mmMC_VM_AGP_TOP, 0);
- WREG32_SOC15(MMHUB, 0, mmMC_VM_AGP_BOT, 0x00FFFFFF);
+ WREG32_SOC15(MMHUB, 0, mmMC_VM_AGP_BOT, adev->gmc.agp_start >> 24);
+ WREG32_SOC15(MMHUB, 0, mmMC_VM_AGP_TOP, adev->gmc.agp_end >> 24);
/* Program the system aperture low logical page number. */
WREG32_SOC15(MMHUB, 0, mmMC_VM_SYSTEM_APERTURE_LOW_ADDR,
- adev->gmc.vram_start >> 18);
- WREG32_SOC15(MMHUB, 0, mmMC_VM_SYSTEM_APERTURE_HIGH_ADDR,
- adev->gmc.vram_end >> 18);
+ min(adev->gmc.vram_start, adev->gmc.agp_start) >> 18);
+
+ if (adev->asic_type == CHIP_RAVEN && adev->rev_id >= 0x8)
+ /*
+ * Raven2 has a HW issue that it is unable to use the vram which
+ * is out of MC_VM_SYSTEM_APERTURE_HIGH_ADDR. So here is the
+ * workaround that increase system aperture high address (add 1)
+ * to get rid of the VM fault and hardware hang.
+ */
+ WREG32_SOC15(MMHUB, 0, mmMC_VM_SYSTEM_APERTURE_HIGH_ADDR,
+ max((adev->gmc.vram_end >> 18) + 0x1,
+ adev->gmc.agp_end >> 18));
+ else
+ WREG32_SOC15(MMHUB, 0, mmMC_VM_SYSTEM_APERTURE_HIGH_ADDR,
+ max(adev->gmc.vram_end, adev->gmc.agp_end) >> 18);
/* Set default page address. */
value = adev->vram_scratch.gpu_addr - adev->gmc.vram_start +
@@ -260,236 +273,16 @@ static void mmhub_v1_0_program_invalidation(struct amdgpu_device *adev)
}
}
-struct pctl_data {
- uint32_t index;
- uint32_t data;
-};
-
-static const struct pctl_data pctl0_data[] = {
- {0x0, 0x7a640},
- {0x9, 0x2a64a},
- {0xd, 0x2a680},
- {0x11, 0x6a684},
- {0x19, 0xea68e},
- {0x29, 0xa69e},
- {0x2b, 0x0010a6c0},
- {0x3d, 0x83a707},
- {0xc2, 0x8a7a4},
- {0xcc, 0x1a7b8},
- {0xcf, 0xfa7cc},
- {0xe0, 0x17a7dd},
- {0xf9, 0xa7dc},
- {0xfb, 0x12a7f5},
- {0x10f, 0xa808},
- {0x111, 0x12a810},
- {0x125, 0x7a82c}
-};
-#define PCTL0_DATA_LEN (ARRAY_SIZE(pctl0_data))
-
-#define PCTL0_RENG_EXEC_END_PTR 0x12d
-#define PCTL0_STCTRL_REG_SAVE_RANGE0_BASE 0xa640
-#define PCTL0_STCTRL_REG_SAVE_RANGE0_LIMIT 0xa833
-
-static const struct pctl_data pctl1_data[] = {
- {0x0, 0x39a000},
- {0x3b, 0x44a040},
- {0x81, 0x2a08d},
- {0x85, 0x6ba094},
- {0xf2, 0x18a100},
- {0x10c, 0x4a132},
- {0x112, 0xca141},
- {0x120, 0x2fa158},
- {0x151, 0x17a1d0},
- {0x16a, 0x1a1e9},
- {0x16d, 0x13a1ec},
- {0x182, 0x7a201},
- {0x18b, 0x3a20a},
- {0x190, 0x7a580},
- {0x199, 0xa590},
- {0x19b, 0x4a594},
- {0x1a1, 0x1a59c},
- {0x1a4, 0x7a82c},
- {0x1ad, 0xfa7cc},
- {0x1be, 0x17a7dd},
- {0x1d7, 0x12a810},
- {0x1eb, 0x4000a7e1},
- {0x1ec, 0x5000a7f5},
- {0x1ed, 0x4000a7e2},
- {0x1ee, 0x5000a7dc},
- {0x1ef, 0x4000a7e3},
- {0x1f0, 0x5000a7f6},
- {0x1f1, 0x5000a7e4}
-};
-#define PCTL1_DATA_LEN (ARRAY_SIZE(pctl1_data))
-
-#define PCTL1_RENG_EXEC_END_PTR 0x1f1
-#define PCTL1_STCTRL_REG_SAVE_RANGE0_BASE 0xa000
-#define PCTL1_STCTRL_REG_SAVE_RANGE0_LIMIT 0xa20d
-#define PCTL1_STCTRL_REG_SAVE_RANGE1_BASE 0xa580
-#define PCTL1_STCTRL_REG_SAVE_RANGE1_LIMIT 0xa59d
-#define PCTL1_STCTRL_REG_SAVE_RANGE2_BASE 0xa82c
-#define PCTL1_STCTRL_REG_SAVE_RANGE2_LIMIT 0xa833
-
-static void mmhub_v1_0_power_gating_write_save_ranges(struct amdgpu_device *adev)
-{
- uint32_t tmp = 0;
-
- /* PCTL0_STCTRL_REGISTER_SAVE_RANGE0 */
- tmp = REG_SET_FIELD(tmp, PCTL0_STCTRL_REGISTER_SAVE_RANGE0,
- STCTRL_REGISTER_SAVE_BASE,
- PCTL0_STCTRL_REG_SAVE_RANGE0_BASE);
- tmp = REG_SET_FIELD(tmp, PCTL0_STCTRL_REGISTER_SAVE_RANGE0,
- STCTRL_REGISTER_SAVE_LIMIT,
- PCTL0_STCTRL_REG_SAVE_RANGE0_LIMIT);
- WREG32_SOC15(MMHUB, 0, mmPCTL0_STCTRL_REGISTER_SAVE_RANGE0, tmp);
-
- /* PCTL1_STCTRL_REGISTER_SAVE_RANGE0 */
- tmp = 0;
- tmp = REG_SET_FIELD(tmp, PCTL1_STCTRL_REGISTER_SAVE_RANGE0,
- STCTRL_REGISTER_SAVE_BASE,
- PCTL1_STCTRL_REG_SAVE_RANGE0_BASE);
- tmp = REG_SET_FIELD(tmp, PCTL1_STCTRL_REGISTER_SAVE_RANGE0,
- STCTRL_REGISTER_SAVE_LIMIT,
- PCTL1_STCTRL_REG_SAVE_RANGE0_LIMIT);
- WREG32_SOC15(MMHUB, 0, mmPCTL1_STCTRL_REGISTER_SAVE_RANGE0, tmp);
-
- /* PCTL1_STCTRL_REGISTER_SAVE_RANGE1 */
- tmp = 0;
- tmp = REG_SET_FIELD(tmp, PCTL1_STCTRL_REGISTER_SAVE_RANGE1,
- STCTRL_REGISTER_SAVE_BASE,
- PCTL1_STCTRL_REG_SAVE_RANGE1_BASE);
- tmp = REG_SET_FIELD(tmp, PCTL1_STCTRL_REGISTER_SAVE_RANGE1,
- STCTRL_REGISTER_SAVE_LIMIT,
- PCTL1_STCTRL_REG_SAVE_RANGE1_LIMIT);
- WREG32_SOC15(MMHUB, 0, mmPCTL1_STCTRL_REGISTER_SAVE_RANGE1, tmp);
-
- /* PCTL1_STCTRL_REGISTER_SAVE_RANGE2 */
- tmp = 0;
- tmp = REG_SET_FIELD(tmp, PCTL1_STCTRL_REGISTER_SAVE_RANGE2,
- STCTRL_REGISTER_SAVE_BASE,
- PCTL1_STCTRL_REG_SAVE_RANGE2_BASE);
- tmp = REG_SET_FIELD(tmp, PCTL1_STCTRL_REGISTER_SAVE_RANGE2,
- STCTRL_REGISTER_SAVE_LIMIT,
- PCTL1_STCTRL_REG_SAVE_RANGE2_LIMIT);
- WREG32_SOC15(MMHUB, 0, mmPCTL1_STCTRL_REGISTER_SAVE_RANGE2, tmp);
-}
-
-void mmhub_v1_0_initialize_power_gating(struct amdgpu_device *adev)
-{
- uint32_t pctl0_misc = 0;
- uint32_t pctl0_reng_execute = 0;
- uint32_t pctl1_misc = 0;
- uint32_t pctl1_reng_execute = 0;
- int i = 0;
-
- if (amdgpu_sriov_vf(adev))
- return;
-
- /****************** pctl0 **********************/
- pctl0_misc = RREG32_SOC15(MMHUB, 0, mmPCTL0_MISC);
- pctl0_reng_execute = RREG32_SOC15(MMHUB, 0, mmPCTL0_RENG_EXECUTE);
-
- /* Light sleep must be disabled before writing to pctl0 registers */
- pctl0_misc &= ~PCTL0_MISC__RENG_MEM_LS_ENABLE_MASK;
- WREG32_SOC15(MMHUB, 0, mmPCTL0_MISC, pctl0_misc);
-
- /* Write data used to access ram of register engine */
- for (i = 0; i < PCTL0_DATA_LEN; i++) {
- WREG32_SOC15(MMHUB, 0, mmPCTL0_RENG_RAM_INDEX,
- pctl0_data[i].index);
- WREG32_SOC15(MMHUB, 0, mmPCTL0_RENG_RAM_DATA,
- pctl0_data[i].data);
- }
-
- /* Re-enable light sleep */
- pctl0_misc |= PCTL0_MISC__RENG_MEM_LS_ENABLE_MASK;
- WREG32_SOC15(MMHUB, 0, mmPCTL0_MISC, pctl0_misc);
-
- /****************** pctl1 **********************/
- pctl1_misc = RREG32_SOC15(MMHUB, 0, mmPCTL1_MISC);
- pctl1_reng_execute = RREG32_SOC15(MMHUB, 0, mmPCTL1_RENG_EXECUTE);
-
- /* Light sleep must be disabled before writing to pctl1 registers */
- pctl1_misc &= ~PCTL1_MISC__RENG_MEM_LS_ENABLE_MASK;
- WREG32_SOC15(MMHUB, 0, mmPCTL1_MISC, pctl1_misc);
-
- /* Write data used to access ram of register engine */
- for (i = 0; i < PCTL1_DATA_LEN; i++) {
- WREG32_SOC15(MMHUB, 0, mmPCTL1_RENG_RAM_INDEX,
- pctl1_data[i].index);
- WREG32_SOC15(MMHUB, 0, mmPCTL1_RENG_RAM_DATA,
- pctl1_data[i].data);
- }
-
- /* Re-enable light sleep */
- pctl1_misc |= PCTL1_MISC__RENG_MEM_LS_ENABLE_MASK;
- WREG32_SOC15(MMHUB, 0, mmPCTL1_MISC, pctl1_misc);
-
- mmhub_v1_0_power_gating_write_save_ranges(adev);
-
- /* Set the reng execute end ptr for pctl0 */
- pctl0_reng_execute = REG_SET_FIELD(pctl0_reng_execute,
- PCTL0_RENG_EXECUTE,
- RENG_EXECUTE_END_PTR,
- PCTL0_RENG_EXEC_END_PTR);
- WREG32_SOC15(MMHUB, 0, mmPCTL0_RENG_EXECUTE, pctl0_reng_execute);
-
- /* Set the reng execute end ptr for pctl1 */
- pctl1_reng_execute = REG_SET_FIELD(pctl1_reng_execute,
- PCTL1_RENG_EXECUTE,
- RENG_EXECUTE_END_PTR,
- PCTL1_RENG_EXEC_END_PTR);
- WREG32_SOC15(MMHUB, 0, mmPCTL1_RENG_EXECUTE, pctl1_reng_execute);
-}
-
void mmhub_v1_0_update_power_gating(struct amdgpu_device *adev,
bool enable)
{
- uint32_t pctl0_reng_execute = 0;
- uint32_t pctl1_reng_execute = 0;
-
if (amdgpu_sriov_vf(adev))
return;
- pctl0_reng_execute = RREG32_SOC15(MMHUB, 0, mmPCTL0_RENG_EXECUTE);
- pctl1_reng_execute = RREG32_SOC15(MMHUB, 0, mmPCTL1_RENG_EXECUTE);
-
if (enable && adev->pg_flags & AMD_PG_SUPPORT_MMHUB) {
- pctl0_reng_execute = REG_SET_FIELD(pctl0_reng_execute,
- PCTL0_RENG_EXECUTE,
- RENG_EXECUTE_ON_PWR_UP, 1);
- pctl0_reng_execute = REG_SET_FIELD(pctl0_reng_execute,
- PCTL0_RENG_EXECUTE,
- RENG_EXECUTE_ON_REG_UPDATE, 1);
- WREG32_SOC15(MMHUB, 0, mmPCTL0_RENG_EXECUTE, pctl0_reng_execute);
-
- pctl1_reng_execute = REG_SET_FIELD(pctl1_reng_execute,
- PCTL1_RENG_EXECUTE,
- RENG_EXECUTE_ON_PWR_UP, 1);
- pctl1_reng_execute = REG_SET_FIELD(pctl1_reng_execute,
- PCTL1_RENG_EXECUTE,
- RENG_EXECUTE_ON_REG_UPDATE, 1);
- WREG32_SOC15(MMHUB, 0, mmPCTL1_RENG_EXECUTE, pctl1_reng_execute);
-
- if (adev->powerplay.pp_funcs->set_mmhub_powergating_by_smu)
- amdgpu_dpm_set_mmhub_powergating_by_smu(adev);
+ if (adev->powerplay.pp_funcs->set_powergating_by_smu)
+ amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_GMC, true);
- } else {
- pctl0_reng_execute = REG_SET_FIELD(pctl0_reng_execute,
- PCTL0_RENG_EXECUTE,
- RENG_EXECUTE_ON_PWR_UP, 0);
- pctl0_reng_execute = REG_SET_FIELD(pctl0_reng_execute,
- PCTL0_RENG_EXECUTE,
- RENG_EXECUTE_ON_REG_UPDATE, 0);
- WREG32_SOC15(MMHUB, 0, mmPCTL0_RENG_EXECUTE, pctl0_reng_execute);
-
- pctl1_reng_execute = REG_SET_FIELD(pctl1_reng_execute,
- PCTL1_RENG_EXECUTE,
- RENG_EXECUTE_ON_PWR_UP, 0);
- pctl1_reng_execute = REG_SET_FIELD(pctl1_reng_execute,
- PCTL1_RENG_EXECUTE,
- RENG_EXECUTE_ON_REG_UPDATE, 0);
- WREG32_SOC15(MMHUB, 0, mmPCTL1_RENG_EXECUTE, pctl1_reng_execute);
}
}
diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.h b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.h
index 5d38229baf69..bef3d0c0c117 100644
--- a/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.h
+++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.h
@@ -32,7 +32,6 @@ void mmhub_v1_0_init(struct amdgpu_device *adev);
int mmhub_v1_0_set_clockgating(struct amdgpu_device *adev,
enum amd_clockgating_state state);
void mmhub_v1_0_get_clockgating(struct amdgpu_device *adev, u32 *flags);
-void mmhub_v1_0_initialize_power_gating(struct amdgpu_device *adev);
void mmhub_v1_0_update_power_gating(struct amdgpu_device *adev,
bool enable);
diff --git a/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c b/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c
index 078f70faedcb..8cbb4655896a 100644
--- a/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c
+++ b/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c
@@ -266,8 +266,8 @@ flr_done:
}
/* Trigger recovery for world switch failure if no TDR */
- if (amdgpu_lockup_timeout == 0)
- amdgpu_device_gpu_recover(adev, NULL, true);
+ if (amdgpu_device_should_recover_gpu(adev))
+ amdgpu_device_gpu_recover(adev, NULL);
}
static int xgpu_ai_set_mailbox_rcv_irq(struct amdgpu_device *adev,
diff --git a/drivers/gpu/drm/amd/amdgpu/mxgpu_vi.c b/drivers/gpu/drm/amd/amdgpu/mxgpu_vi.c
index 9fc1c37344ce..64e875d528dd 100644
--- a/drivers/gpu/drm/amd/amdgpu/mxgpu_vi.c
+++ b/drivers/gpu/drm/amd/amdgpu/mxgpu_vi.c
@@ -521,7 +521,8 @@ static void xgpu_vi_mailbox_flr_work(struct work_struct *work)
}
/* Trigger recovery due to world switch failure */
- amdgpu_device_gpu_recover(adev, NULL, false);
+ if (amdgpu_device_should_recover_gpu(adev))
+ amdgpu_device_gpu_recover(adev, NULL);
}
static int xgpu_vi_set_mailbox_rcv_irq(struct amdgpu_device *adev,
@@ -579,11 +580,11 @@ int xgpu_vi_mailbox_add_irq_id(struct amdgpu_device *adev)
{
int r;
- r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 135, &adev->virt.rcv_irq);
+ r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, 135, &adev->virt.rcv_irq);
if (r)
return r;
- r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 138, &adev->virt.ack_irq);
+ r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, 138, &adev->virt.ack_irq);
if (r) {
amdgpu_irq_put(adev, &adev->virt.rcv_irq, 0);
return r;
diff --git a/drivers/gpu/drm/amd/amdgpu/nbio_v7_0.c b/drivers/gpu/drm/amd/amdgpu/nbio_v7_0.c
index 365517c0121e..df34dc79d444 100644
--- a/drivers/gpu/drm/amd/amdgpu/nbio_v7_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/nbio_v7_0.c
@@ -34,19 +34,10 @@
#define smnCPM_CONTROL 0x11180460
#define smnPCIE_CNTL2 0x11180070
-/* vega20 */
-#define mmRCC_DEV0_EPF0_STRAP0_VG20 0x0011
-#define mmRCC_DEV0_EPF0_STRAP0_VG20_BASE_IDX 2
-
static u32 nbio_v7_0_get_rev_id(struct amdgpu_device *adev)
{
u32 tmp = RREG32_SOC15(NBIO, 0, mmRCC_DEV0_EPF0_STRAP0);
- if (adev->asic_type == CHIP_VEGA20)
- tmp = RREG32_SOC15(NBIO, 0, mmRCC_DEV0_EPF0_STRAP0_VG20);
- else
- tmp = RREG32_SOC15(NBIO, 0, mmRCC_DEV0_EPF0_STRAP0);
-
tmp &= RCC_DEV0_EPF0_STRAP0__STRAP_ATI_REV_ID_DEV0_F0_MASK;
tmp >>= RCC_DEV0_EPF0_STRAP0__STRAP_ATI_REV_ID_DEV0_F0__SHIFT;
@@ -84,14 +75,10 @@ static void nbio_v7_0_sdma_doorbell_range(struct amdgpu_device *adev, int instan
SOC15_REG_OFFSET(NBIO, 0, mmBIF_SDMA1_DOORBELL_RANGE);
u32 doorbell_range = RREG32(reg);
- u32 range = 2;
-
- if (adev->asic_type == CHIP_VEGA20)
- range = 8;
if (use_doorbell) {
doorbell_range = REG_SET_FIELD(doorbell_range, BIF_SDMA0_DOORBELL_RANGE, OFFSET, doorbell_index);
- doorbell_range = REG_SET_FIELD(doorbell_range, BIF_SDMA0_DOORBELL_RANGE, SIZE, range);
+ doorbell_range = REG_SET_FIELD(doorbell_range, BIF_SDMA0_DOORBELL_RANGE, SIZE, 2);
} else
doorbell_range = REG_SET_FIELD(doorbell_range, BIF_SDMA0_DOORBELL_RANGE, SIZE, 0);
@@ -146,9 +133,6 @@ static void nbio_v7_0_update_medium_grain_clock_gating(struct amdgpu_device *ade
{
uint32_t def, data;
- if (adev->asic_type == CHIP_VEGA20)
- return;
-
/* NBIF_MGCG_CTRL_LCLK */
def = data = RREG32_PCIE(smnNBIF_MGCG_CTRL_LCLK);
diff --git a/drivers/gpu/drm/amd/amdgpu/nbio_v7_4.c b/drivers/gpu/drm/amd/amdgpu/nbio_v7_4.c
new file mode 100644
index 000000000000..f8cee95d61cc
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/nbio_v7_4.c
@@ -0,0 +1,248 @@
+/*
+ * Copyright 2018 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+#include "amdgpu.h"
+#include "amdgpu_atombios.h"
+#include "nbio_v7_4.h"
+
+#include "nbio/nbio_7_4_offset.h"
+#include "nbio/nbio_7_4_sh_mask.h"
+
+#define smnNBIF_MGCG_CTRL_LCLK 0x1013a21c
+
+#define smnCPM_CONTROL 0x11180460
+#define smnPCIE_CNTL2 0x11180070
+
+static u32 nbio_v7_4_get_rev_id(struct amdgpu_device *adev)
+{
+ u32 tmp = RREG32_SOC15(NBIO, 0, mmRCC_DEV0_EPF0_STRAP0);
+
+ tmp &= RCC_DEV0_EPF0_STRAP0__STRAP_ATI_REV_ID_DEV0_F0_MASK;
+ tmp >>= RCC_DEV0_EPF0_STRAP0__STRAP_ATI_REV_ID_DEV0_F0__SHIFT;
+
+ return tmp;
+}
+
+static void nbio_v7_4_mc_access_enable(struct amdgpu_device *adev, bool enable)
+{
+ if (enable)
+ WREG32_SOC15(NBIO, 0, mmBIF_FB_EN,
+ BIF_FB_EN__FB_READ_EN_MASK | BIF_FB_EN__FB_WRITE_EN_MASK);
+ else
+ WREG32_SOC15(NBIO, 0, mmBIF_FB_EN, 0);
+}
+
+static void nbio_v7_4_hdp_flush(struct amdgpu_device *adev,
+ struct amdgpu_ring *ring)
+{
+ if (!ring || !ring->funcs->emit_wreg)
+ WREG32_SOC15_NO_KIQ(NBIO, 0, mmHDP_MEM_COHERENCY_FLUSH_CNTL, 0);
+ else
+ amdgpu_ring_emit_wreg(ring, SOC15_REG_OFFSET(
+ NBIO, 0, mmHDP_MEM_COHERENCY_FLUSH_CNTL), 0);
+}
+
+static u32 nbio_v7_4_get_memsize(struct amdgpu_device *adev)
+{
+ return RREG32_SOC15(NBIO, 0, mmRCC_CONFIG_MEMSIZE);
+}
+
+static void nbio_v7_4_sdma_doorbell_range(struct amdgpu_device *adev, int instance,
+ bool use_doorbell, int doorbell_index)
+{
+ u32 reg = instance == 0 ? SOC15_REG_OFFSET(NBIO, 0, mmBIF_SDMA0_DOORBELL_RANGE) :
+ SOC15_REG_OFFSET(NBIO, 0, mmBIF_SDMA1_DOORBELL_RANGE);
+
+ u32 doorbell_range = RREG32(reg);
+
+ if (use_doorbell) {
+ doorbell_range = REG_SET_FIELD(doorbell_range, BIF_SDMA0_DOORBELL_RANGE, OFFSET, doorbell_index);
+ doorbell_range = REG_SET_FIELD(doorbell_range, BIF_SDMA0_DOORBELL_RANGE, SIZE, 2);
+ } else
+ doorbell_range = REG_SET_FIELD(doorbell_range, BIF_SDMA0_DOORBELL_RANGE, SIZE, 0);
+
+ WREG32(reg, doorbell_range);
+}
+
+static void nbio_v7_4_enable_doorbell_aperture(struct amdgpu_device *adev,
+ bool enable)
+{
+ WREG32_FIELD15(NBIO, 0, RCC_DOORBELL_APER_EN, BIF_DOORBELL_APER_EN, enable ? 1 : 0);
+}
+
+static void nbio_v7_4_enable_doorbell_selfring_aperture(struct amdgpu_device *adev,
+ bool enable)
+{
+
+}
+
+static void nbio_v7_4_ih_doorbell_range(struct amdgpu_device *adev,
+ bool use_doorbell, int doorbell_index)
+{
+ u32 ih_doorbell_range = RREG32_SOC15(NBIO, 0 , mmBIF_IH_DOORBELL_RANGE);
+
+ if (use_doorbell) {
+ ih_doorbell_range = REG_SET_FIELD(ih_doorbell_range, BIF_IH_DOORBELL_RANGE, OFFSET, doorbell_index);
+ ih_doorbell_range = REG_SET_FIELD(ih_doorbell_range, BIF_IH_DOORBELL_RANGE, SIZE, 2);
+ } else
+ ih_doorbell_range = REG_SET_FIELD(ih_doorbell_range, BIF_IH_DOORBELL_RANGE, SIZE, 0);
+
+ WREG32_SOC15(NBIO, 0, mmBIF_IH_DOORBELL_RANGE, ih_doorbell_range);
+}
+
+
+static void nbio_v7_4_update_medium_grain_clock_gating(struct amdgpu_device *adev,
+ bool enable)
+{
+ //TODO: Add support for v7.4
+}
+
+static void nbio_v7_4_update_medium_grain_light_sleep(struct amdgpu_device *adev,
+ bool enable)
+{
+ uint32_t def, data;
+
+ def = data = RREG32_PCIE(smnPCIE_CNTL2);
+ if (enable && (adev->cg_flags & AMD_CG_SUPPORT_BIF_LS)) {
+ data |= (PCIE_CNTL2__SLV_MEM_LS_EN_MASK |
+ PCIE_CNTL2__MST_MEM_LS_EN_MASK |
+ PCIE_CNTL2__REPLAY_MEM_LS_EN_MASK);
+ } else {
+ data &= ~(PCIE_CNTL2__SLV_MEM_LS_EN_MASK |
+ PCIE_CNTL2__MST_MEM_LS_EN_MASK |
+ PCIE_CNTL2__REPLAY_MEM_LS_EN_MASK);
+ }
+
+ if (def != data)
+ WREG32_PCIE(smnPCIE_CNTL2, data);
+}
+
+static void nbio_v7_4_get_clockgating_state(struct amdgpu_device *adev,
+ u32 *flags)
+{
+ int data;
+
+ /* AMD_CG_SUPPORT_BIF_MGCG */
+ data = RREG32_PCIE(smnCPM_CONTROL);
+ if (data & CPM_CONTROL__LCLK_DYN_GATE_ENABLE_MASK)
+ *flags |= AMD_CG_SUPPORT_BIF_MGCG;
+
+ /* AMD_CG_SUPPORT_BIF_LS */
+ data = RREG32_PCIE(smnPCIE_CNTL2);
+ if (data & PCIE_CNTL2__SLV_MEM_LS_EN_MASK)
+ *flags |= AMD_CG_SUPPORT_BIF_LS;
+}
+
+static void nbio_v7_4_ih_control(struct amdgpu_device *adev)
+{
+ u32 interrupt_cntl;
+
+ /* setup interrupt control */
+ WREG32_SOC15(NBIO, 0, mmINTERRUPT_CNTL2, adev->dummy_page_addr >> 8);
+ interrupt_cntl = RREG32_SOC15(NBIO, 0, mmINTERRUPT_CNTL);
+ /* INTERRUPT_CNTL__IH_DUMMY_RD_OVERRIDE_MASK=0 - dummy read disabled with msi, enabled without msi
+ * INTERRUPT_CNTL__IH_DUMMY_RD_OVERRIDE_MASK=1 - dummy read controlled by IH_DUMMY_RD_EN
+ */
+ interrupt_cntl = REG_SET_FIELD(interrupt_cntl, INTERRUPT_CNTL, IH_DUMMY_RD_OVERRIDE, 0);
+ /* INTERRUPT_CNTL__IH_REQ_NONSNOOP_EN_MASK=1 if ring is in non-cacheable memory, e.g., vram */
+ interrupt_cntl = REG_SET_FIELD(interrupt_cntl, INTERRUPT_CNTL, IH_REQ_NONSNOOP_EN, 0);
+ WREG32_SOC15(NBIO, 0, mmINTERRUPT_CNTL, interrupt_cntl);
+}
+
+static u32 nbio_v7_4_get_hdp_flush_req_offset(struct amdgpu_device *adev)
+{
+ return SOC15_REG_OFFSET(NBIO, 0, mmGPU_HDP_FLUSH_REQ);
+}
+
+static u32 nbio_v7_4_get_hdp_flush_done_offset(struct amdgpu_device *adev)
+{
+ return SOC15_REG_OFFSET(NBIO, 0, mmGPU_HDP_FLUSH_DONE);
+}
+
+static u32 nbio_v7_4_get_pcie_index_offset(struct amdgpu_device *adev)
+{
+ return SOC15_REG_OFFSET(NBIO, 0, mmPCIE_INDEX2);
+}
+
+static u32 nbio_v7_4_get_pcie_data_offset(struct amdgpu_device *adev)
+{
+ return SOC15_REG_OFFSET(NBIO, 0, mmPCIE_DATA2);
+}
+
+static const struct nbio_hdp_flush_reg nbio_v7_4_hdp_flush_reg = {
+ .ref_and_mask_cp0 = GPU_HDP_FLUSH_DONE__CP0_MASK,
+ .ref_and_mask_cp1 = GPU_HDP_FLUSH_DONE__CP1_MASK,
+ .ref_and_mask_cp2 = GPU_HDP_FLUSH_DONE__CP2_MASK,
+ .ref_and_mask_cp3 = GPU_HDP_FLUSH_DONE__CP3_MASK,
+ .ref_and_mask_cp4 = GPU_HDP_FLUSH_DONE__CP4_MASK,
+ .ref_and_mask_cp5 = GPU_HDP_FLUSH_DONE__CP5_MASK,
+ .ref_and_mask_cp6 = GPU_HDP_FLUSH_DONE__CP6_MASK,
+ .ref_and_mask_cp7 = GPU_HDP_FLUSH_DONE__CP7_MASK,
+ .ref_and_mask_cp8 = GPU_HDP_FLUSH_DONE__CP8_MASK,
+ .ref_and_mask_cp9 = GPU_HDP_FLUSH_DONE__CP9_MASK,
+ .ref_and_mask_sdma0 = GPU_HDP_FLUSH_DONE__SDMA0_MASK,
+ .ref_and_mask_sdma1 = GPU_HDP_FLUSH_DONE__SDMA1_MASK,
+};
+
+static void nbio_v7_4_detect_hw_virt(struct amdgpu_device *adev)
+{
+ uint32_t reg;
+
+ reg = RREG32_SOC15(NBIO, 0, mmRCC_IOV_FUNC_IDENTIFIER);
+ if (reg & 1)
+ adev->virt.caps |= AMDGPU_SRIOV_CAPS_IS_VF;
+
+ if (reg & 0x80000000)
+ adev->virt.caps |= AMDGPU_SRIOV_CAPS_ENABLE_IOV;
+
+ if (!reg) {
+ if (is_virtual_machine()) /* passthrough mode exclus sriov mod */
+ adev->virt.caps |= AMDGPU_PASSTHROUGH_MODE;
+ }
+}
+
+static void nbio_v7_4_init_registers(struct amdgpu_device *adev)
+{
+
+}
+
+const struct amdgpu_nbio_funcs nbio_v7_4_funcs = {
+ .hdp_flush_reg = &nbio_v7_4_hdp_flush_reg,
+ .get_hdp_flush_req_offset = nbio_v7_4_get_hdp_flush_req_offset,
+ .get_hdp_flush_done_offset = nbio_v7_4_get_hdp_flush_done_offset,
+ .get_pcie_index_offset = nbio_v7_4_get_pcie_index_offset,
+ .get_pcie_data_offset = nbio_v7_4_get_pcie_data_offset,
+ .get_rev_id = nbio_v7_4_get_rev_id,
+ .mc_access_enable = nbio_v7_4_mc_access_enable,
+ .hdp_flush = nbio_v7_4_hdp_flush,
+ .get_memsize = nbio_v7_4_get_memsize,
+ .sdma_doorbell_range = nbio_v7_4_sdma_doorbell_range,
+ .enable_doorbell_aperture = nbio_v7_4_enable_doorbell_aperture,
+ .enable_doorbell_selfring_aperture = nbio_v7_4_enable_doorbell_selfring_aperture,
+ .ih_doorbell_range = nbio_v7_4_ih_doorbell_range,
+ .update_medium_grain_clock_gating = nbio_v7_4_update_medium_grain_clock_gating,
+ .update_medium_grain_light_sleep = nbio_v7_4_update_medium_grain_light_sleep,
+ .get_clockgating_state = nbio_v7_4_get_clockgating_state,
+ .ih_control = nbio_v7_4_ih_control,
+ .init_registers = nbio_v7_4_init_registers,
+ .detect_hw_virt = nbio_v7_4_detect_hw_virt,
+};
diff --git a/drivers/gpu/drm/amd/amdgpu/nbio_v7_4.h b/drivers/gpu/drm/amd/amdgpu/nbio_v7_4.h
new file mode 100644
index 000000000000..c442865bac4f
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/nbio_v7_4.h
@@ -0,0 +1,31 @@
+/*
+ * Copyright 2018 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef __NBIO_V7_4_H__
+#define __NBIO_V7_4_H__
+
+#include "soc15_common.h"
+
+extern const struct amdgpu_nbio_funcs nbio_v7_4_funcs;
+
+#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/psp_gfx_if.h b/drivers/gpu/drm/amd/amdgpu/psp_gfx_if.h
index 0cf48d26c676..882bd83a28c4 100644
--- a/drivers/gpu/drm/amd/amdgpu/psp_gfx_if.h
+++ b/drivers/gpu/drm/amd/amdgpu/psp_gfx_if.h
@@ -189,7 +189,8 @@ enum psp_gfx_fw_type
GFX_FW_TYPE_RLC_RESTORE_LIST_GPM_MEM = 20,
GFX_FW_TYPE_RLC_RESTORE_LIST_SRM_MEM = 21,
GFX_FW_TYPE_RLC_RESTORE_LIST_CNTL = 22,
- GFX_FW_TYPE_MAX = 23
+ GFX_FW_TYPE_UVD1 = 23,
+ GFX_FW_TYPE_MAX = 24
};
/* Command to load HW IP FW. */
diff --git a/drivers/gpu/drm/amd/amdgpu/psp_v10_0.c b/drivers/gpu/drm/amd/amdgpu/psp_v10_0.c
index 0ff136d02d9b..295c2205485a 100644
--- a/drivers/gpu/drm/amd/amdgpu/psp_v10_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/psp_v10_0.c
@@ -35,6 +35,8 @@
#include "sdma0/sdma0_4_1_offset.h"
MODULE_FIRMWARE("amdgpu/raven_asd.bin");
+MODULE_FIRMWARE("amdgpu/picasso_asd.bin");
+MODULE_FIRMWARE("amdgpu/raven2_asd.bin");
static int
psp_v10_0_get_fw_type(struct amdgpu_firmware_info *ucode, enum psp_gfx_fw_type *type)
@@ -88,6 +90,15 @@ psp_v10_0_get_fw_type(struct amdgpu_firmware_info *ucode, enum psp_gfx_fw_type *
case AMDGPU_UCODE_ID_VCE:
*type = GFX_FW_TYPE_VCE;
break;
+ case AMDGPU_UCODE_ID_VCN:
+ *type = GFX_FW_TYPE_VCN;
+ break;
+ case AMDGPU_UCODE_ID_DMCU_ERAM:
+ *type = GFX_FW_TYPE_DMCU_ERAM;
+ break;
+ case AMDGPU_UCODE_ID_DMCU_INTV:
+ *type = GFX_FW_TYPE_DMCU_ISR;
+ break;
case AMDGPU_UCODE_ID_MAXIMUM:
default:
return -EINVAL;
@@ -108,7 +119,12 @@ static int psp_v10_0_init_microcode(struct psp_context *psp)
switch (adev->asic_type) {
case CHIP_RAVEN:
- chip_name = "raven";
+ if (adev->rev_id >= 0x8)
+ chip_name = "raven2";
+ else if (adev->pdev->device == 0x15d8)
+ chip_name = "picasso";
+ else
+ chip_name = "raven";
break;
default: BUG();
}
diff --git a/drivers/gpu/drm/amd/amdgpu/psp_v11_0.c b/drivers/gpu/drm/amd/amdgpu/psp_v11_0.c
new file mode 100644
index 000000000000..3f3fac2d50cd
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/psp_v11_0.c
@@ -0,0 +1,595 @@
+/*
+ * Copyright 2018 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include <linux/firmware.h>
+#include "amdgpu.h"
+#include "amdgpu_psp.h"
+#include "amdgpu_ucode.h"
+#include "soc15_common.h"
+#include "psp_v11_0.h"
+
+#include "mp/mp_11_0_offset.h"
+#include "mp/mp_11_0_sh_mask.h"
+#include "gc/gc_9_0_offset.h"
+#include "sdma0/sdma0_4_0_offset.h"
+#include "nbio/nbio_7_4_offset.h"
+
+MODULE_FIRMWARE("amdgpu/vega20_sos.bin");
+
+/* address block */
+#define smnMP1_FIRMWARE_FLAGS 0x3010024
+
+static int
+psp_v11_0_get_fw_type(struct amdgpu_firmware_info *ucode, enum psp_gfx_fw_type *type)
+{
+ switch (ucode->ucode_id) {
+ case AMDGPU_UCODE_ID_SDMA0:
+ *type = GFX_FW_TYPE_SDMA0;
+ break;
+ case AMDGPU_UCODE_ID_SDMA1:
+ *type = GFX_FW_TYPE_SDMA1;
+ break;
+ case AMDGPU_UCODE_ID_CP_CE:
+ *type = GFX_FW_TYPE_CP_CE;
+ break;
+ case AMDGPU_UCODE_ID_CP_PFP:
+ *type = GFX_FW_TYPE_CP_PFP;
+ break;
+ case AMDGPU_UCODE_ID_CP_ME:
+ *type = GFX_FW_TYPE_CP_ME;
+ break;
+ case AMDGPU_UCODE_ID_CP_MEC1:
+ *type = GFX_FW_TYPE_CP_MEC;
+ break;
+ case AMDGPU_UCODE_ID_CP_MEC1_JT:
+ *type = GFX_FW_TYPE_CP_MEC_ME1;
+ break;
+ case AMDGPU_UCODE_ID_CP_MEC2:
+ *type = GFX_FW_TYPE_CP_MEC;
+ break;
+ case AMDGPU_UCODE_ID_CP_MEC2_JT:
+ *type = GFX_FW_TYPE_CP_MEC_ME2;
+ break;
+ case AMDGPU_UCODE_ID_RLC_G:
+ *type = GFX_FW_TYPE_RLC_G;
+ break;
+ case AMDGPU_UCODE_ID_SMC:
+ *type = GFX_FW_TYPE_SMU;
+ break;
+ case AMDGPU_UCODE_ID_UVD:
+ *type = GFX_FW_TYPE_UVD;
+ break;
+ case AMDGPU_UCODE_ID_VCE:
+ *type = GFX_FW_TYPE_VCE;
+ break;
+ case AMDGPU_UCODE_ID_UVD1:
+ *type = GFX_FW_TYPE_UVD1;
+ break;
+ case AMDGPU_UCODE_ID_MAXIMUM:
+ default:
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+static int psp_v11_0_init_microcode(struct psp_context *psp)
+{
+ struct amdgpu_device *adev = psp->adev;
+ const char *chip_name;
+ char fw_name[30];
+ int err = 0;
+ const struct psp_firmware_header_v1_0 *hdr;
+
+ DRM_DEBUG("\n");
+
+ switch (adev->asic_type) {
+ case CHIP_VEGA20:
+ chip_name = "vega20";
+ break;
+ default:
+ BUG();
+ }
+
+ snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_sos.bin", chip_name);
+ err = request_firmware(&adev->psp.sos_fw, fw_name, adev->dev);
+ if (err)
+ goto out;
+
+ err = amdgpu_ucode_validate(adev->psp.sos_fw);
+ if (err)
+ goto out;
+
+ hdr = (const struct psp_firmware_header_v1_0 *)adev->psp.sos_fw->data;
+ adev->psp.sos_fw_version = le32_to_cpu(hdr->header.ucode_version);
+ adev->psp.sos_feature_version = le32_to_cpu(hdr->ucode_feature_version);
+ adev->psp.sos_bin_size = le32_to_cpu(hdr->sos_size_bytes);
+ adev->psp.sys_bin_size = le32_to_cpu(hdr->header.ucode_size_bytes) -
+ le32_to_cpu(hdr->sos_size_bytes);
+ adev->psp.sys_start_addr = (uint8_t *)hdr +
+ le32_to_cpu(hdr->header.ucode_array_offset_bytes);
+ adev->psp.sos_start_addr = (uint8_t *)adev->psp.sys_start_addr +
+ le32_to_cpu(hdr->sos_offset_bytes);
+ return 0;
+out:
+ if (err) {
+ dev_err(adev->dev,
+ "psp v11.0: Failed to load firmware \"%s\"\n",
+ fw_name);
+ release_firmware(adev->psp.sos_fw);
+ adev->psp.sos_fw = NULL;
+ }
+
+ return err;
+}
+
+static int psp_v11_0_bootloader_load_sysdrv(struct psp_context *psp)
+{
+ int ret;
+ uint32_t psp_gfxdrv_command_reg = 0;
+ struct amdgpu_device *adev = psp->adev;
+ uint32_t sol_reg;
+
+ /* Check sOS sign of life register to confirm sys driver and sOS
+ * are already been loaded.
+ */
+ sol_reg = RREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_81);
+ if (sol_reg)
+ return 0;
+
+ /* Wait for bootloader to signify that is ready having bit 31 of C2PMSG_35 set to 1 */
+ ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_35),
+ 0x80000000, 0x80000000, false);
+ if (ret)
+ return ret;
+
+ memset(psp->fw_pri_buf, 0, PSP_1_MEG);
+
+ /* Copy PSP System Driver binary to memory */
+ memcpy(psp->fw_pri_buf, psp->sys_start_addr, psp->sys_bin_size);
+
+ /* Provide the sys driver to bootrom */
+ WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_36,
+ (uint32_t)(psp->fw_pri_mc_addr >> 20));
+ psp_gfxdrv_command_reg = 1 << 16;
+ WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_35,
+ psp_gfxdrv_command_reg);
+
+ /* there might be handshake issue with hardware which needs delay */
+ mdelay(20);
+
+ ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_35),
+ 0x80000000, 0x80000000, false);
+
+ return ret;
+}
+
+static int psp_v11_0_bootloader_load_sos(struct psp_context *psp)
+{
+ int ret;
+ unsigned int psp_gfxdrv_command_reg = 0;
+ struct amdgpu_device *adev = psp->adev;
+ uint32_t sol_reg;
+
+ /* Check sOS sign of life register to confirm sys driver and sOS
+ * are already been loaded.
+ */
+ sol_reg = RREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_81);
+ if (sol_reg)
+ return 0;
+
+ /* Wait for bootloader to signify that is ready having bit 31 of C2PMSG_35 set to 1 */
+ ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_35),
+ 0x80000000, 0x80000000, false);
+ if (ret)
+ return ret;
+
+ memset(psp->fw_pri_buf, 0, PSP_1_MEG);
+
+ /* Copy Secure OS binary to PSP memory */
+ memcpy(psp->fw_pri_buf, psp->sos_start_addr, psp->sos_bin_size);
+
+ /* Provide the PSP secure OS to bootrom */
+ WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_36,
+ (uint32_t)(psp->fw_pri_mc_addr >> 20));
+ psp_gfxdrv_command_reg = 2 << 16;
+ WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_35,
+ psp_gfxdrv_command_reg);
+
+ /* there might be handshake issue with hardware which needs delay */
+ mdelay(20);
+ ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_81),
+ RREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_81),
+ 0, true);
+
+ return ret;
+}
+
+static int psp_v11_0_prep_cmd_buf(struct amdgpu_firmware_info *ucode,
+ struct psp_gfx_cmd_resp *cmd)
+{
+ int ret;
+ uint64_t fw_mem_mc_addr = ucode->mc_addr;
+
+ memset(cmd, 0, sizeof(struct psp_gfx_cmd_resp));
+
+ cmd->cmd_id = GFX_CMD_ID_LOAD_IP_FW;
+ cmd->cmd.cmd_load_ip_fw.fw_phy_addr_lo = lower_32_bits(fw_mem_mc_addr);
+ cmd->cmd.cmd_load_ip_fw.fw_phy_addr_hi = upper_32_bits(fw_mem_mc_addr);
+ cmd->cmd.cmd_load_ip_fw.fw_size = ucode->ucode_size;
+
+ ret = psp_v11_0_get_fw_type(ucode, &cmd->cmd.cmd_load_ip_fw.fw_type);
+ if (ret)
+ DRM_ERROR("Unknown firmware type\n");
+
+ return ret;
+}
+
+static int psp_v11_0_ring_init(struct psp_context *psp,
+ enum psp_ring_type ring_type)
+{
+ int ret = 0;
+ struct psp_ring *ring;
+ struct amdgpu_device *adev = psp->adev;
+
+ ring = &psp->km_ring;
+
+ ring->ring_type = ring_type;
+
+ /* allocate 4k Page of Local Frame Buffer memory for ring */
+ ring->ring_size = 0x1000;
+ ret = amdgpu_bo_create_kernel(adev, ring->ring_size, PAGE_SIZE,
+ AMDGPU_GEM_DOMAIN_VRAM,
+ &adev->firmware.rbuf,
+ &ring->ring_mem_mc_addr,
+ (void **)&ring->ring_mem);
+ if (ret) {
+ ring->ring_size = 0;
+ return ret;
+ }
+
+ return 0;
+}
+
+static int psp_v11_0_ring_create(struct psp_context *psp,
+ enum psp_ring_type ring_type)
+{
+ int ret = 0;
+ unsigned int psp_ring_reg = 0;
+ struct psp_ring *ring = &psp->km_ring;
+ struct amdgpu_device *adev = psp->adev;
+
+ /* Write low address of the ring to C2PMSG_69 */
+ psp_ring_reg = lower_32_bits(ring->ring_mem_mc_addr);
+ WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_69, psp_ring_reg);
+ /* Write high address of the ring to C2PMSG_70 */
+ psp_ring_reg = upper_32_bits(ring->ring_mem_mc_addr);
+ WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_70, psp_ring_reg);
+ /* Write size of ring to C2PMSG_71 */
+ psp_ring_reg = ring->ring_size;
+ WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_71, psp_ring_reg);
+ /* Write the ring initialization command to C2PMSG_64 */
+ psp_ring_reg = ring_type;
+ psp_ring_reg = psp_ring_reg << 16;
+ WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_64, psp_ring_reg);
+
+ /* there might be handshake issue with hardware which needs delay */
+ mdelay(20);
+
+ /* Wait for response flag (bit 31) in C2PMSG_64 */
+ ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_64),
+ 0x80000000, 0x8000FFFF, false);
+
+ return ret;
+}
+
+static int psp_v11_0_ring_stop(struct psp_context *psp,
+ enum psp_ring_type ring_type)
+{
+ int ret = 0;
+ struct amdgpu_device *adev = psp->adev;
+
+ /* Write the ring destroy command to C2PMSG_64 */
+ WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_64, GFX_CTRL_CMD_ID_DESTROY_RINGS);
+
+ /* there might be handshake issue with hardware which needs delay */
+ mdelay(20);
+
+ /* Wait for response flag (bit 31) in C2PMSG_64 */
+ ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_64),
+ 0x80000000, 0x80000000, false);
+
+ return ret;
+}
+
+static int psp_v11_0_ring_destroy(struct psp_context *psp,
+ enum psp_ring_type ring_type)
+{
+ int ret = 0;
+ struct psp_ring *ring = &psp->km_ring;
+ struct amdgpu_device *adev = psp->adev;
+
+ ret = psp_v11_0_ring_stop(psp, ring_type);
+ if (ret)
+ DRM_ERROR("Fail to stop psp ring\n");
+
+ amdgpu_bo_free_kernel(&adev->firmware.rbuf,
+ &ring->ring_mem_mc_addr,
+ (void **)&ring->ring_mem);
+
+ return ret;
+}
+
+static int psp_v11_0_cmd_submit(struct psp_context *psp,
+ struct amdgpu_firmware_info *ucode,
+ uint64_t cmd_buf_mc_addr, uint64_t fence_mc_addr,
+ int index)
+{
+ unsigned int psp_write_ptr_reg = 0;
+ struct psp_gfx_rb_frame *write_frame = psp->km_ring.ring_mem;
+ struct psp_ring *ring = &psp->km_ring;
+ struct psp_gfx_rb_frame *ring_buffer_start = ring->ring_mem;
+ struct psp_gfx_rb_frame *ring_buffer_end = ring_buffer_start +
+ ring->ring_size / sizeof(struct psp_gfx_rb_frame) - 1;
+ struct amdgpu_device *adev = psp->adev;
+ uint32_t ring_size_dw = ring->ring_size / 4;
+ uint32_t rb_frame_size_dw = sizeof(struct psp_gfx_rb_frame) / 4;
+
+ /* KM (GPCOM) prepare write pointer */
+ psp_write_ptr_reg = RREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_67);
+
+ /* Update KM RB frame pointer to new frame */
+ /* write_frame ptr increments by size of rb_frame in bytes */
+ /* psp_write_ptr_reg increments by size of rb_frame in DWORDs */
+ if ((psp_write_ptr_reg % ring_size_dw) == 0)
+ write_frame = ring_buffer_start;
+ else
+ write_frame = ring_buffer_start + (psp_write_ptr_reg / rb_frame_size_dw);
+ /* Check invalid write_frame ptr address */
+ if ((write_frame < ring_buffer_start) || (ring_buffer_end < write_frame)) {
+ DRM_ERROR("ring_buffer_start = %p; ring_buffer_end = %p; write_frame = %p\n",
+ ring_buffer_start, ring_buffer_end, write_frame);
+ DRM_ERROR("write_frame is pointing to address out of bounds\n");
+ return -EINVAL;
+ }
+
+ /* Initialize KM RB frame */
+ memset(write_frame, 0, sizeof(struct psp_gfx_rb_frame));
+
+ /* Update KM RB frame */
+ write_frame->cmd_buf_addr_hi = upper_32_bits(cmd_buf_mc_addr);
+ write_frame->cmd_buf_addr_lo = lower_32_bits(cmd_buf_mc_addr);
+ write_frame->fence_addr_hi = upper_32_bits(fence_mc_addr);
+ write_frame->fence_addr_lo = lower_32_bits(fence_mc_addr);
+ write_frame->fence_value = index;
+
+ /* Update the write Pointer in DWORDs */
+ psp_write_ptr_reg = (psp_write_ptr_reg + rb_frame_size_dw) % ring_size_dw;
+ WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_67, psp_write_ptr_reg);
+
+ return 0;
+}
+
+static int
+psp_v11_0_sram_map(struct amdgpu_device *adev,
+ unsigned int *sram_offset, unsigned int *sram_addr_reg_offset,
+ unsigned int *sram_data_reg_offset,
+ enum AMDGPU_UCODE_ID ucode_id)
+{
+ int ret = 0;
+
+ switch (ucode_id) {
+/* TODO: needs to confirm */
+#if 0
+ case AMDGPU_UCODE_ID_SMC:
+ *sram_offset = 0;
+ *sram_addr_reg_offset = 0;
+ *sram_data_reg_offset = 0;
+ break;
+#endif
+
+ case AMDGPU_UCODE_ID_CP_CE:
+ *sram_offset = 0x0;
+ *sram_addr_reg_offset = SOC15_REG_OFFSET(GC, 0, mmCP_CE_UCODE_ADDR);
+ *sram_data_reg_offset = SOC15_REG_OFFSET(GC, 0, mmCP_CE_UCODE_DATA);
+ break;
+
+ case AMDGPU_UCODE_ID_CP_PFP:
+ *sram_offset = 0x0;
+ *sram_addr_reg_offset = SOC15_REG_OFFSET(GC, 0, mmCP_PFP_UCODE_ADDR);
+ *sram_data_reg_offset = SOC15_REG_OFFSET(GC, 0, mmCP_PFP_UCODE_DATA);
+ break;
+
+ case AMDGPU_UCODE_ID_CP_ME:
+ *sram_offset = 0x0;
+ *sram_addr_reg_offset = SOC15_REG_OFFSET(GC, 0, mmCP_HYP_ME_UCODE_ADDR);
+ *sram_data_reg_offset = SOC15_REG_OFFSET(GC, 0, mmCP_HYP_ME_UCODE_DATA);
+ break;
+
+ case AMDGPU_UCODE_ID_CP_MEC1:
+ *sram_offset = 0x10000;
+ *sram_addr_reg_offset = SOC15_REG_OFFSET(GC, 0, mmCP_MEC_ME1_UCODE_ADDR);
+ *sram_data_reg_offset = SOC15_REG_OFFSET(GC, 0, mmCP_MEC_ME1_UCODE_DATA);
+ break;
+
+ case AMDGPU_UCODE_ID_CP_MEC2:
+ *sram_offset = 0x10000;
+ *sram_addr_reg_offset = SOC15_REG_OFFSET(GC, 0, mmCP_HYP_MEC2_UCODE_ADDR);
+ *sram_data_reg_offset = SOC15_REG_OFFSET(GC, 0, mmCP_HYP_MEC2_UCODE_DATA);
+ break;
+
+ case AMDGPU_UCODE_ID_RLC_G:
+ *sram_offset = 0x2000;
+ *sram_addr_reg_offset = SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_UCODE_ADDR);
+ *sram_data_reg_offset = SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_UCODE_DATA);
+ break;
+
+ case AMDGPU_UCODE_ID_SDMA0:
+ *sram_offset = 0x0;
+ *sram_addr_reg_offset = SOC15_REG_OFFSET(SDMA0, 0, mmSDMA0_UCODE_ADDR);
+ *sram_data_reg_offset = SOC15_REG_OFFSET(SDMA0, 0, mmSDMA0_UCODE_DATA);
+ break;
+
+/* TODO: needs to confirm */
+#if 0
+ case AMDGPU_UCODE_ID_SDMA1:
+ *sram_offset = ;
+ *sram_addr_reg_offset = ;
+ break;
+
+ case AMDGPU_UCODE_ID_UVD:
+ *sram_offset = ;
+ *sram_addr_reg_offset = ;
+ break;
+
+ case AMDGPU_UCODE_ID_VCE:
+ *sram_offset = ;
+ *sram_addr_reg_offset = ;
+ break;
+#endif
+
+ case AMDGPU_UCODE_ID_MAXIMUM:
+ default:
+ ret = -EINVAL;
+ break;
+ }
+
+ return ret;
+}
+
+static bool psp_v11_0_compare_sram_data(struct psp_context *psp,
+ struct amdgpu_firmware_info *ucode,
+ enum AMDGPU_UCODE_ID ucode_type)
+{
+ int err = 0;
+ unsigned int fw_sram_reg_val = 0;
+ unsigned int fw_sram_addr_reg_offset = 0;
+ unsigned int fw_sram_data_reg_offset = 0;
+ unsigned int ucode_size;
+ uint32_t *ucode_mem = NULL;
+ struct amdgpu_device *adev = psp->adev;
+
+ err = psp_v11_0_sram_map(adev, &fw_sram_reg_val, &fw_sram_addr_reg_offset,
+ &fw_sram_data_reg_offset, ucode_type);
+ if (err)
+ return false;
+
+ WREG32(fw_sram_addr_reg_offset, fw_sram_reg_val);
+
+ ucode_size = ucode->ucode_size;
+ ucode_mem = (uint32_t *)ucode->kaddr;
+ while (ucode_size) {
+ fw_sram_reg_val = RREG32(fw_sram_data_reg_offset);
+
+ if (*ucode_mem != fw_sram_reg_val)
+ return false;
+
+ ucode_mem++;
+ /* 4 bytes */
+ ucode_size -= 4;
+ }
+
+ return true;
+}
+
+static int psp_v11_0_mode1_reset(struct psp_context *psp)
+{
+ int ret;
+ uint32_t offset;
+ struct amdgpu_device *adev = psp->adev;
+
+ offset = SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_64);
+
+ ret = psp_wait_for(psp, offset, 0x80000000, 0x8000FFFF, false);
+
+ if (ret) {
+ DRM_INFO("psp is not working correctly before mode1 reset!\n");
+ return -EINVAL;
+ }
+
+ /*send the mode 1 reset command*/
+ WREG32(offset, GFX_CTRL_CMD_ID_MODE1_RST);
+
+ mdelay(1000);
+
+ offset = SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_33);
+
+ ret = psp_wait_for(psp, offset, 0x80000000, 0x80000000, false);
+
+ if (ret) {
+ DRM_INFO("psp mode 1 reset failed!\n");
+ return -EINVAL;
+ }
+
+ DRM_INFO("psp mode1 reset succeed \n");
+
+ return 0;
+}
+
+/* TODO: Fill in follow functions once PSP firmware interface for XGMI is ready.
+ * For now, return success and hack the hive_id so high level code can
+ * start testing
+ */
+static int psp_v11_0_xgmi_get_topology_info(struct psp_context *psp,
+ int number_devices, struct psp_xgmi_topology_info *topology)
+{
+ return 0;
+}
+
+static int psp_v11_0_xgmi_set_topology_info(struct psp_context *psp,
+ int number_devices, struct psp_xgmi_topology_info *topology)
+{
+ return 0;
+}
+
+static u64 psp_v11_0_xgmi_get_hive_id(struct psp_context *psp)
+{
+ u64 hive_id = 0;
+
+ /* Remove me when we can get correct hive_id through PSP */
+ if (psp->adev->gmc.xgmi.num_physical_nodes)
+ hive_id = 0x123456789abcdef;
+
+ return hive_id;
+}
+
+static const struct psp_funcs psp_v11_0_funcs = {
+ .init_microcode = psp_v11_0_init_microcode,
+ .bootloader_load_sysdrv = psp_v11_0_bootloader_load_sysdrv,
+ .bootloader_load_sos = psp_v11_0_bootloader_load_sos,
+ .prep_cmd_buf = psp_v11_0_prep_cmd_buf,
+ .ring_init = psp_v11_0_ring_init,
+ .ring_create = psp_v11_0_ring_create,
+ .ring_stop = psp_v11_0_ring_stop,
+ .ring_destroy = psp_v11_0_ring_destroy,
+ .cmd_submit = psp_v11_0_cmd_submit,
+ .compare_sram_data = psp_v11_0_compare_sram_data,
+ .mode1_reset = psp_v11_0_mode1_reset,
+ .xgmi_get_topology_info = psp_v11_0_xgmi_get_topology_info,
+ .xgmi_set_topology_info = psp_v11_0_xgmi_set_topology_info,
+ .xgmi_get_hive_id = psp_v11_0_xgmi_get_hive_id,
+};
+
+void psp_v11_0_set_psp_funcs(struct psp_context *psp)
+{
+ psp->funcs = &psp_v11_0_funcs;
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/psp_v11_0.h b/drivers/gpu/drm/amd/amdgpu/psp_v11_0.h
new file mode 100644
index 000000000000..082c16c887bf
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/psp_v11_0.h
@@ -0,0 +1,30 @@
+/*
+ * Copyright 2018 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+#ifndef __PSP_V11_0_H__
+#define __PSP_V11_0_H__
+
+#include "amdgpu_psp.h"
+
+void psp_v11_0_set_psp_funcs(struct psp_context *psp);
+
+#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/psp_v3_1.c b/drivers/gpu/drm/amd/amdgpu/psp_v3_1.c
index 727071fee6f6..e1ebf770c303 100644
--- a/drivers/gpu/drm/amd/amdgpu/psp_v3_1.c
+++ b/drivers/gpu/drm/amd/amdgpu/psp_v3_1.c
@@ -41,8 +41,6 @@ MODULE_FIRMWARE("amdgpu/vega10_sos.bin");
MODULE_FIRMWARE("amdgpu/vega10_asd.bin");
MODULE_FIRMWARE("amdgpu/vega12_sos.bin");
MODULE_FIRMWARE("amdgpu/vega12_asd.bin");
-MODULE_FIRMWARE("amdgpu/vega20_sos.bin");
-MODULE_FIRMWARE("amdgpu/vega20_asd.bin");
#define smnMP1_FIRMWARE_FLAGS 0x3010028
diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c b/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c
index c7190c39c4f5..2d4770e173dd 100644
--- a/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c
+++ b/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c
@@ -44,6 +44,8 @@
#include "iceland_sdma_pkt_open.h"
+#include "ivsrcid/ivsrcid_vislands30.h"
+
static void sdma_v2_4_set_ring_funcs(struct amdgpu_device *adev);
static void sdma_v2_4_set_buffer_funcs(struct amdgpu_device *adev);
static void sdma_v2_4_set_vm_pte_funcs(struct amdgpu_device *adev);
@@ -202,8 +204,7 @@ static uint64_t sdma_v2_4_ring_get_rptr(struct amdgpu_ring *ring)
static uint64_t sdma_v2_4_ring_get_wptr(struct amdgpu_ring *ring)
{
struct amdgpu_device *adev = ring->adev;
- int me = (ring == &ring->adev->sdma.instance[0].ring) ? 0 : 1;
- u32 wptr = RREG32(mmSDMA0_GFX_RB_WPTR + sdma_offsets[me]) >> 2;
+ u32 wptr = RREG32(mmSDMA0_GFX_RB_WPTR + sdma_offsets[ring->me]) >> 2;
return wptr;
}
@@ -218,9 +219,8 @@ static uint64_t sdma_v2_4_ring_get_wptr(struct amdgpu_ring *ring)
static void sdma_v2_4_ring_set_wptr(struct amdgpu_ring *ring)
{
struct amdgpu_device *adev = ring->adev;
- int me = (ring == &ring->adev->sdma.instance[0].ring) ? 0 : 1;
- WREG32(mmSDMA0_GFX_RB_WPTR + sdma_offsets[me], lower_32_bits(ring->wptr) << 2);
+ WREG32(mmSDMA0_GFX_RB_WPTR + sdma_offsets[ring->me], lower_32_bits(ring->wptr) << 2);
}
static void sdma_v2_4_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count)
@@ -273,7 +273,7 @@ static void sdma_v2_4_ring_emit_hdp_flush(struct amdgpu_ring *ring)
{
u32 ref_and_mask = 0;
- if (ring == &ring->adev->sdma.instance[0].ring)
+ if (ring->me == 0)
ref_and_mask = REG_SET_FIELD(ref_and_mask, GPU_HDP_FLUSH_DONE, SDMA0, 1);
else
ref_and_mask = REG_SET_FIELD(ref_and_mask, GPU_HDP_FLUSH_DONE, SDMA1, 1);
@@ -504,41 +504,6 @@ static int sdma_v2_4_rlc_resume(struct amdgpu_device *adev)
return 0;
}
-/**
- * sdma_v2_4_load_microcode - load the sDMA ME ucode
- *
- * @adev: amdgpu_device pointer
- *
- * Loads the sDMA0/1 ucode.
- * Returns 0 for success, -EINVAL if the ucode is not available.
- */
-static int sdma_v2_4_load_microcode(struct amdgpu_device *adev)
-{
- const struct sdma_firmware_header_v1_0 *hdr;
- const __le32 *fw_data;
- u32 fw_size;
- int i, j;
-
- /* halt the MEs */
- sdma_v2_4_enable(adev, false);
-
- for (i = 0; i < adev->sdma.num_instances; i++) {
- if (!adev->sdma.instance[i].fw)
- return -EINVAL;
- hdr = (const struct sdma_firmware_header_v1_0 *)adev->sdma.instance[i].fw->data;
- amdgpu_ucode_print_sdma_hdr(&hdr->header);
- fw_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
- fw_data = (const __le32 *)
- (adev->sdma.instance[i].fw->data +
- le32_to_cpu(hdr->header.ucode_array_offset_bytes));
- WREG32(mmSDMA0_UCODE_ADDR + sdma_offsets[i], 0);
- for (j = 0; j < fw_size; j++)
- WREG32(mmSDMA0_UCODE_DATA + sdma_offsets[i], le32_to_cpup(fw_data++));
- WREG32(mmSDMA0_UCODE_ADDR + sdma_offsets[i], adev->sdma.instance[i].fw_version);
- }
-
- return 0;
-}
/**
* sdma_v2_4_start - setup and start the async dma engines
@@ -552,13 +517,6 @@ static int sdma_v2_4_start(struct amdgpu_device *adev)
{
int r;
-
- if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) {
- r = sdma_v2_4_load_microcode(adev);
- if (r)
- return r;
- }
-
/* halt the engine before programing */
sdma_v2_4_enable(adev, false);
@@ -898,19 +856,19 @@ static int sdma_v2_4_sw_init(void *handle)
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
/* SDMA trap event */
- r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 224,
+ r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, VISLANDS30_IV_SRCID_SDMA_TRAP,
&adev->sdma.trap_irq);
if (r)
return r;
/* SDMA Privileged inst */
- r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 241,
+ r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, 241,
&adev->sdma.illegal_inst_irq);
if (r)
return r;
/* SDMA Privileged inst */
- r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 247,
+ r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, VISLANDS30_IV_SRCID_SDMA_SRBM_WRITE,
&adev->sdma.illegal_inst_irq);
if (r)
return r;
@@ -1213,8 +1171,10 @@ static void sdma_v2_4_set_ring_funcs(struct amdgpu_device *adev)
{
int i;
- for (i = 0; i < adev->sdma.num_instances; i++)
+ for (i = 0; i < adev->sdma.num_instances; i++) {
adev->sdma.instance[i].ring.funcs = &sdma_v2_4_ring_funcs;
+ adev->sdma.instance[i].ring.me = i;
+ }
}
static const struct amdgpu_irq_src_funcs sdma_v2_4_trap_irq_funcs = {
@@ -1294,10 +1254,8 @@ static const struct amdgpu_buffer_funcs sdma_v2_4_buffer_funcs = {
static void sdma_v2_4_set_buffer_funcs(struct amdgpu_device *adev)
{
- if (adev->mman.buffer_funcs == NULL) {
- adev->mman.buffer_funcs = &sdma_v2_4_buffer_funcs;
- adev->mman.buffer_funcs_ring = &adev->sdma.instance[0].ring;
- }
+ adev->mman.buffer_funcs = &sdma_v2_4_buffer_funcs;
+ adev->mman.buffer_funcs_ring = &adev->sdma.instance[0].ring;
}
static const struct amdgpu_vm_pte_funcs sdma_v2_4_vm_pte_funcs = {
@@ -1310,16 +1268,16 @@ static const struct amdgpu_vm_pte_funcs sdma_v2_4_vm_pte_funcs = {
static void sdma_v2_4_set_vm_pte_funcs(struct amdgpu_device *adev)
{
+ struct drm_gpu_scheduler *sched;
unsigned i;
- if (adev->vm_manager.vm_pte_funcs == NULL) {
- adev->vm_manager.vm_pte_funcs = &sdma_v2_4_vm_pte_funcs;
- for (i = 0; i < adev->sdma.num_instances; i++)
- adev->vm_manager.vm_pte_rings[i] =
- &adev->sdma.instance[i].ring;
-
- adev->vm_manager.vm_pte_num_rings = adev->sdma.num_instances;
+ adev->vm_manager.vm_pte_funcs = &sdma_v2_4_vm_pte_funcs;
+ for (i = 0; i < adev->sdma.num_instances; i++) {
+ sched = &adev->sdma.instance[i].ring.sched;
+ adev->vm_manager.vm_pte_rqs[i] =
+ &sched->sched_rq[DRM_SCHED_PRIORITY_KERNEL];
}
+ adev->vm_manager.vm_pte_num_rqs = adev->sdma.num_instances;
}
const struct amdgpu_ip_block_version sdma_v2_4_ip_block =
diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c
index aa9ab299fd32..6fb3edaba0ec 100644
--- a/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c
@@ -44,6 +44,8 @@
#include "tonga_sdma_pkt_open.h"
+#include "ivsrcid/ivsrcid_vislands30.h"
+
static void sdma_v3_0_set_ring_funcs(struct amdgpu_device *adev);
static void sdma_v3_0_set_buffer_funcs(struct amdgpu_device *adev);
static void sdma_v3_0_set_vm_pte_funcs(struct amdgpu_device *adev);
@@ -316,14 +318,13 @@ static int sdma_v3_0_init_microcode(struct amdgpu_device *adev)
if (adev->sdma.instance[i].feature_version >= 20)
adev->sdma.instance[i].burst_nop = true;
- if (adev->firmware.load_type == AMDGPU_FW_LOAD_SMU) {
- info = &adev->firmware.ucode[AMDGPU_UCODE_ID_SDMA0 + i];
- info->ucode_id = AMDGPU_UCODE_ID_SDMA0 + i;
- info->fw = adev->sdma.instance[i].fw;
- header = (const struct common_firmware_header *)info->fw->data;
- adev->firmware.fw_size +=
- ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
- }
+ info = &adev->firmware.ucode[AMDGPU_UCODE_ID_SDMA0 + i];
+ info->ucode_id = AMDGPU_UCODE_ID_SDMA0 + i;
+ info->fw = adev->sdma.instance[i].fw;
+ header = (const struct common_firmware_header *)info->fw->data;
+ adev->firmware.fw_size +=
+ ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
+
}
out:
if (err) {
@@ -365,9 +366,7 @@ static uint64_t sdma_v3_0_ring_get_wptr(struct amdgpu_ring *ring)
/* XXX check if swapping is necessary on BE */
wptr = ring->adev->wb.wb[ring->wptr_offs] >> 2;
} else {
- int me = (ring == &ring->adev->sdma.instance[0].ring) ? 0 : 1;
-
- wptr = RREG32(mmSDMA0_GFX_RB_WPTR + sdma_offsets[me]) >> 2;
+ wptr = RREG32(mmSDMA0_GFX_RB_WPTR + sdma_offsets[ring->me]) >> 2;
}
return wptr;
@@ -394,9 +393,7 @@ static void sdma_v3_0_ring_set_wptr(struct amdgpu_ring *ring)
WRITE_ONCE(*wb, (lower_32_bits(ring->wptr) << 2));
} else {
- int me = (ring == &ring->adev->sdma.instance[0].ring) ? 0 : 1;
-
- WREG32(mmSDMA0_GFX_RB_WPTR + sdma_offsets[me], lower_32_bits(ring->wptr) << 2);
+ WREG32(mmSDMA0_GFX_RB_WPTR + sdma_offsets[ring->me], lower_32_bits(ring->wptr) << 2);
}
}
@@ -450,7 +447,7 @@ static void sdma_v3_0_ring_emit_hdp_flush(struct amdgpu_ring *ring)
{
u32 ref_and_mask = 0;
- if (ring == &ring->adev->sdma.instance[0].ring)
+ if (ring->me == 0)
ref_and_mask = REG_SET_FIELD(ref_and_mask, GPU_HDP_FLUSH_DONE, SDMA0, 1);
else
ref_and_mask = REG_SET_FIELD(ref_and_mask, GPU_HDP_FLUSH_DONE, SDMA1, 1);
@@ -780,42 +777,6 @@ static int sdma_v3_0_rlc_resume(struct amdgpu_device *adev)
}
/**
- * sdma_v3_0_load_microcode - load the sDMA ME ucode
- *
- * @adev: amdgpu_device pointer
- *
- * Loads the sDMA0/1 ucode.
- * Returns 0 for success, -EINVAL if the ucode is not available.
- */
-static int sdma_v3_0_load_microcode(struct amdgpu_device *adev)
-{
- const struct sdma_firmware_header_v1_0 *hdr;
- const __le32 *fw_data;
- u32 fw_size;
- int i, j;
-
- /* halt the MEs */
- sdma_v3_0_enable(adev, false);
-
- for (i = 0; i < adev->sdma.num_instances; i++) {
- if (!adev->sdma.instance[i].fw)
- return -EINVAL;
- hdr = (const struct sdma_firmware_header_v1_0 *)adev->sdma.instance[i].fw->data;
- amdgpu_ucode_print_sdma_hdr(&hdr->header);
- fw_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
- fw_data = (const __le32 *)
- (adev->sdma.instance[i].fw->data +
- le32_to_cpu(hdr->header.ucode_array_offset_bytes));
- WREG32(mmSDMA0_UCODE_ADDR + sdma_offsets[i], 0);
- for (j = 0; j < fw_size; j++)
- WREG32(mmSDMA0_UCODE_DATA + sdma_offsets[i], le32_to_cpup(fw_data++));
- WREG32(mmSDMA0_UCODE_ADDR + sdma_offsets[i], adev->sdma.instance[i].fw_version);
- }
-
- return 0;
-}
-
-/**
* sdma_v3_0_start - setup and start the async dma engines
*
* @adev: amdgpu_device pointer
@@ -827,12 +788,6 @@ static int sdma_v3_0_start(struct amdgpu_device *adev)
{
int r;
- if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) {
- r = sdma_v3_0_load_microcode(adev);
- if (r)
- return r;
- }
-
/* disable sdma engine before programing it */
sdma_v3_0_ctx_switch_enable(adev, false);
sdma_v3_0_enable(adev, false);
@@ -1179,19 +1134,19 @@ static int sdma_v3_0_sw_init(void *handle)
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
/* SDMA trap event */
- r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 224,
+ r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, VISLANDS30_IV_SRCID_SDMA_TRAP,
&adev->sdma.trap_irq);
if (r)
return r;
/* SDMA Privileged inst */
- r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 241,
+ r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, 241,
&adev->sdma.illegal_inst_irq);
if (r)
return r;
/* SDMA Privileged inst */
- r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 247,
+ r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, VISLANDS30_IV_SRCID_SDMA_SRBM_WRITE,
&adev->sdma.illegal_inst_irq);
if (r)
return r;
@@ -1655,8 +1610,10 @@ static void sdma_v3_0_set_ring_funcs(struct amdgpu_device *adev)
{
int i;
- for (i = 0; i < adev->sdma.num_instances; i++)
+ for (i = 0; i < adev->sdma.num_instances; i++) {
adev->sdma.instance[i].ring.funcs = &sdma_v3_0_ring_funcs;
+ adev->sdma.instance[i].ring.me = i;
+ }
}
static const struct amdgpu_irq_src_funcs sdma_v3_0_trap_irq_funcs = {
@@ -1736,10 +1693,8 @@ static const struct amdgpu_buffer_funcs sdma_v3_0_buffer_funcs = {
static void sdma_v3_0_set_buffer_funcs(struct amdgpu_device *adev)
{
- if (adev->mman.buffer_funcs == NULL) {
- adev->mman.buffer_funcs = &sdma_v3_0_buffer_funcs;
- adev->mman.buffer_funcs_ring = &adev->sdma.instance[0].ring;
- }
+ adev->mman.buffer_funcs = &sdma_v3_0_buffer_funcs;
+ adev->mman.buffer_funcs_ring = &adev->sdma.instance[0].ring;
}
static const struct amdgpu_vm_pte_funcs sdma_v3_0_vm_pte_funcs = {
@@ -1752,16 +1707,16 @@ static const struct amdgpu_vm_pte_funcs sdma_v3_0_vm_pte_funcs = {
static void sdma_v3_0_set_vm_pte_funcs(struct amdgpu_device *adev)
{
+ struct drm_gpu_scheduler *sched;
unsigned i;
- if (adev->vm_manager.vm_pte_funcs == NULL) {
- adev->vm_manager.vm_pte_funcs = &sdma_v3_0_vm_pte_funcs;
- for (i = 0; i < adev->sdma.num_instances; i++)
- adev->vm_manager.vm_pte_rings[i] =
- &adev->sdma.instance[i].ring;
-
- adev->vm_manager.vm_pte_num_rings = adev->sdma.num_instances;
+ adev->vm_manager.vm_pte_funcs = &sdma_v3_0_vm_pte_funcs;
+ for (i = 0; i < adev->sdma.num_instances; i++) {
+ sched = &adev->sdma.instance[i].ring.sched;
+ adev->vm_manager.vm_pte_rqs[i] =
+ &sched->sched_rq[DRM_SCHED_PRIORITY_KERNEL];
}
+ adev->vm_manager.vm_pte_num_rqs = adev->sdma.num_instances;
}
const struct amdgpu_ip_block_version sdma_v3_0_ip_block =
diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
index ca53b3fba422..04fa3d972636 100644
--- a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
@@ -27,10 +27,10 @@
#include "amdgpu_ucode.h"
#include "amdgpu_trace.h"
-#include "sdma0/sdma0_4_0_offset.h"
-#include "sdma0/sdma0_4_0_sh_mask.h"
-#include "sdma1/sdma1_4_0_offset.h"
-#include "sdma1/sdma1_4_0_sh_mask.h"
+#include "sdma0/sdma0_4_2_offset.h"
+#include "sdma0/sdma0_4_2_sh_mask.h"
+#include "sdma1/sdma1_4_2_offset.h"
+#include "sdma1/sdma1_4_2_sh_mask.h"
#include "hdp/hdp_4_0_offset.h"
#include "sdma0/sdma0_4_1_default.h"
@@ -38,6 +38,9 @@
#include "soc15.h"
#include "vega10_sdma_pkt_open.h"
+#include "ivsrcid/sdma0/irqsrcs_sdma0_4_0.h"
+#include "ivsrcid/sdma1/irqsrcs_sdma1_4_0.h"
+
MODULE_FIRMWARE("amdgpu/vega10_sdma.bin");
MODULE_FIRMWARE("amdgpu/vega10_sdma1.bin");
MODULE_FIRMWARE("amdgpu/vega12_sdma.bin");
@@ -45,6 +48,8 @@ MODULE_FIRMWARE("amdgpu/vega12_sdma1.bin");
MODULE_FIRMWARE("amdgpu/vega20_sdma.bin");
MODULE_FIRMWARE("amdgpu/vega20_sdma1.bin");
MODULE_FIRMWARE("amdgpu/raven_sdma.bin");
+MODULE_FIRMWARE("amdgpu/picasso_sdma.bin");
+MODULE_FIRMWARE("amdgpu/raven2_sdma.bin");
#define SDMA0_POWER_CNTL__ON_OFF_CONDITION_HOLD_TIME_MASK 0x000000F8L
#define SDMA0_POWER_CNTL__ON_OFF_STATUS_DURATION_TIME_MASK 0xFC000000L
@@ -67,6 +72,7 @@ static const struct soc15_reg_golden golden_settings_sdma_4[] = {
SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_RLC1_IB_CNTL, 0x800f0100, 0x00000100),
SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_RLC1_RB_WPTR_POLL_CNTL, 0x0000fff0, 0x00403000),
SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_UTCL1_PAGE, 0x000003ff, 0x000003c0),
+ SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_UTCL1_WATERMK, 0xfc000000, 0x00000000),
SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_CHICKEN_BITS, 0xfe931f07, 0x02831f07),
SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_CLK_CTRL, 0xffffffff, 0x3f000100),
SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_GFX_IB_CNTL, 0x800f0100, 0x00000100),
@@ -78,7 +84,8 @@ static const struct soc15_reg_golden golden_settings_sdma_4[] = {
SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_RLC0_RB_WPTR_POLL_CNTL, 0x0000fff0, 0x00403000),
SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_RLC1_IB_CNTL, 0x800f0100, 0x00000100),
SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_RLC1_RB_WPTR_POLL_CNTL, 0x0000fff0, 0x00403000),
- SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_UTCL1_PAGE, 0x000003ff, 0x000003c0)
+ SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_UTCL1_PAGE, 0x000003ff, 0x000003c0),
+ SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_UTCL1_WATERMK, 0xfc000000, 0x00000000)
};
static const struct soc15_reg_golden golden_settings_sdma_vg10[] = {
@@ -95,8 +102,7 @@ static const struct soc15_reg_golden golden_settings_sdma_vg12[] = {
SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_GB_ADDR_CONFIG_READ, 0x0018773f, 0x00104001)
};
-static const struct soc15_reg_golden golden_settings_sdma_4_1[] =
-{
+static const struct soc15_reg_golden golden_settings_sdma_4_1[] = {
SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_CHICKEN_BITS, 0xfe931f07, 0x02831d07),
SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_CLK_CTRL, 0xffffffff, 0x3f000100),
SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_GFX_IB_CNTL, 0x800f0111, 0x00000100),
@@ -106,29 +112,73 @@ static const struct soc15_reg_golden golden_settings_sdma_4_1[] =
SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_RLC0_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000),
SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_RLC1_IB_CNTL, 0x800f0111, 0x00000100),
SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_RLC1_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000),
- SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_UTCL1_PAGE, 0x000003ff, 0x000003c0)
+ SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_UTCL1_PAGE, 0x000003ff, 0x000003c0),
+ SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_UTCL1_WATERMK, 0xfc000000, 0x00000000)
};
-static const struct soc15_reg_golden golden_settings_sdma_4_2[] =
+static const struct soc15_reg_golden golden_settings_sdma0_4_2_init[] = {
+ SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_RLC0_RB_WPTR_POLL_CNTL, 0xfffffff0, 0x00403000),
+};
+
+static const struct soc15_reg_golden golden_settings_sdma0_4_2[] =
{
SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_CHICKEN_BITS, 0xfe931f07, 0x02831d07),
SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_CLK_CTRL, 0xffffffff, 0x3f000100),
SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_GB_ADDR_CONFIG, 0x0000773f, 0x00004002),
SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_GB_ADDR_CONFIG_READ, 0x0000773f, 0x00004002),
+ SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_GFX_RB_RPTR_ADDR_LO, 0xfffffffd, 0x00000001),
SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_GFX_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000),
+ SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_PAGE_RB_RPTR_ADDR_LO, 0xfffffffd, 0x00000001),
SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_PAGE_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000),
+ SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_RD_BURST_CNTL, 0x0000000f, 0x00000003),
+ SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_RLC0_RB_RPTR_ADDR_LO, 0xfffffffd, 0x00000001),
SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_RLC0_RB_WPTR_POLL_CNTL, 0xfffffff0, 0x00403000),
+ SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_RLC1_RB_RPTR_ADDR_LO, 0xfffffffd, 0x00000001),
SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_RLC1_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000),
+ SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_RLC2_RB_RPTR_ADDR_LO, 0xfffffffd, 0x00000001),
+ SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_RLC2_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000),
+ SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_RLC3_RB_RPTR_ADDR_LO, 0xfffffffd, 0x00000001),
+ SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_RLC3_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000),
+ SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_RLC4_RB_RPTR_ADDR_LO, 0xfffffffd, 0x00000001),
+ SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_RLC4_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000),
+ SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_RLC5_RB_RPTR_ADDR_LO, 0xfffffffd, 0x00000001),
+ SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_RLC5_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000),
+ SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_RLC6_RB_RPTR_ADDR_LO, 0xfffffffd, 0x00000001),
+ SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_RLC6_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000),
+ SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_RLC7_RB_RPTR_ADDR_LO, 0xfffffffd, 0x00000001),
+ SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_RLC7_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000),
SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_UTCL1_PAGE, 0x000003ff, 0x000003c0),
+ SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_UTCL1_WATERMK, 0xFE000000, 0x00000000),
+};
+
+static const struct soc15_reg_golden golden_settings_sdma1_4_2[] = {
SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_CHICKEN_BITS, 0xfe931f07, 0x02831d07),
SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_CLK_CTRL, 0xffffffff, 0x3f000100),
SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_GB_ADDR_CONFIG, 0x0000773f, 0x00004002),
SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_GB_ADDR_CONFIG_READ, 0x0000773f, 0x00004002),
+ SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_GFX_RB_RPTR_ADDR_LO, 0xfffffffd, 0x00000001),
SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_GFX_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000),
+ SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_PAGE_RB_RPTR_ADDR_LO, 0xfffffffd, 0x00000001),
SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_PAGE_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000),
- SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_RLC0_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000),
+ SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_RD_BURST_CNTL, 0x0000000f, 0x00000003),
+ SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_RLC0_RB_RPTR_ADDR_LO, 0xfffffffd, 0x00000001),
+ SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_RLC0_RB_WPTR_POLL_CNTL, 0xfffffff0, 0x00403000),
+ SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_RLC1_RB_RPTR_ADDR_LO, 0xfffffffd, 0x00000001),
SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_RLC1_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000),
- SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_UTCL1_PAGE, 0x000003ff, 0x000003c0)
+ SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_RLC2_RB_RPTR_ADDR_LO, 0xfffffffd, 0x00000001),
+ SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_RLC2_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000),
+ SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_RLC3_RB_RPTR_ADDR_LO, 0xfffffffd, 0x00000001),
+ SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_RLC3_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000),
+ SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_RLC4_RB_RPTR_ADDR_LO, 0xfffffffd, 0x00000001),
+ SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_RLC4_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000),
+ SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_RLC5_RB_RPTR_ADDR_LO, 0xfffffffd, 0x00000001),
+ SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_RLC5_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000),
+ SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_RLC6_RB_RPTR_ADDR_LO, 0xfffffffd, 0x00000001),
+ SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_RLC6_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000),
+ SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_RLC7_RB_RPTR_ADDR_LO, 0xfffffffd, 0x00000001),
+ SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_RLC7_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000),
+ SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_UTCL1_PAGE, 0x000003ff, 0x000003c0),
+ SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_UTCL1_WATERMK, 0xFE000000, 0x00000000),
};
static const struct soc15_reg_golden golden_settings_sdma_rv1[] =
@@ -137,6 +187,12 @@ static const struct soc15_reg_golden golden_settings_sdma_rv1[] =
SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_GB_ADDR_CONFIG_READ, 0x0018773f, 0x00000002)
};
+static const struct soc15_reg_golden golden_settings_sdma_rv2[] =
+{
+ SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_GB_ADDR_CONFIG, 0x0018773f, 0x00003001),
+ SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_GB_ADDR_CONFIG_READ, 0x0018773f, 0x00003001)
+};
+
static u32 sdma_v4_0_get_reg_offset(struct amdgpu_device *adev,
u32 instance, u32 offset)
{
@@ -165,16 +221,27 @@ static void sdma_v4_0_init_golden_registers(struct amdgpu_device *adev)
break;
case CHIP_VEGA20:
soc15_program_register_sequence(adev,
- golden_settings_sdma_4_2,
- ARRAY_SIZE(golden_settings_sdma_4_2));
+ golden_settings_sdma0_4_2_init,
+ ARRAY_SIZE(golden_settings_sdma0_4_2_init));
+ soc15_program_register_sequence(adev,
+ golden_settings_sdma0_4_2,
+ ARRAY_SIZE(golden_settings_sdma0_4_2));
+ soc15_program_register_sequence(adev,
+ golden_settings_sdma1_4_2,
+ ARRAY_SIZE(golden_settings_sdma1_4_2));
break;
case CHIP_RAVEN:
soc15_program_register_sequence(adev,
- golden_settings_sdma_4_1,
- ARRAY_SIZE(golden_settings_sdma_4_1));
- soc15_program_register_sequence(adev,
- golden_settings_sdma_rv1,
- ARRAY_SIZE(golden_settings_sdma_rv1));
+ golden_settings_sdma_4_1,
+ ARRAY_SIZE(golden_settings_sdma_4_1));
+ if (adev->rev_id >= 8)
+ soc15_program_register_sequence(adev,
+ golden_settings_sdma_rv2,
+ ARRAY_SIZE(golden_settings_sdma_rv2));
+ else
+ soc15_program_register_sequence(adev,
+ golden_settings_sdma_rv1,
+ ARRAY_SIZE(golden_settings_sdma_rv1));
break;
default:
break;
@@ -215,7 +282,12 @@ static int sdma_v4_0_init_microcode(struct amdgpu_device *adev)
chip_name = "vega20";
break;
case CHIP_RAVEN:
- chip_name = "raven";
+ if (adev->rev_id >= 8)
+ chip_name = "raven2";
+ else if (adev->pdev->device == 0x15d8)
+ chip_name = "picasso";
+ else
+ chip_name = "raven";
break;
default:
BUG();
@@ -296,13 +368,12 @@ static uint64_t sdma_v4_0_ring_get_wptr(struct amdgpu_ring *ring)
DRM_DEBUG("wptr/doorbell before shift == 0x%016llx\n", wptr);
} else {
u32 lowbit, highbit;
- int me = (ring == &adev->sdma.instance[0].ring) ? 0 : 1;
- lowbit = RREG32(sdma_v4_0_get_reg_offset(adev, me, mmSDMA0_GFX_RB_WPTR)) >> 2;
- highbit = RREG32(sdma_v4_0_get_reg_offset(adev, me, mmSDMA0_GFX_RB_WPTR_HI)) >> 2;
+ lowbit = RREG32(sdma_v4_0_get_reg_offset(adev, ring->me, mmSDMA0_GFX_RB_WPTR)) >> 2;
+ highbit = RREG32(sdma_v4_0_get_reg_offset(adev, ring->me, mmSDMA0_GFX_RB_WPTR_HI)) >> 2;
DRM_DEBUG("wptr [%i]high== 0x%08x low==0x%08x\n",
- me, highbit, lowbit);
+ ring->me, highbit, lowbit);
wptr = highbit;
wptr = wptr << 32;
wptr |= lowbit;
@@ -339,17 +410,15 @@ static void sdma_v4_0_ring_set_wptr(struct amdgpu_ring *ring)
ring->doorbell_index, ring->wptr << 2);
WDOORBELL64(ring->doorbell_index, ring->wptr << 2);
} else {
- int me = (ring == &ring->adev->sdma.instance[0].ring) ? 0 : 1;
-
DRM_DEBUG("Not using doorbell -- "
"mmSDMA%i_GFX_RB_WPTR == 0x%08x "
"mmSDMA%i_GFX_RB_WPTR_HI == 0x%08x\n",
- me,
+ ring->me,
lower_32_bits(ring->wptr << 2),
- me,
+ ring->me,
upper_32_bits(ring->wptr << 2));
- WREG32(sdma_v4_0_get_reg_offset(adev, me, mmSDMA0_GFX_RB_WPTR), lower_32_bits(ring->wptr << 2));
- WREG32(sdma_v4_0_get_reg_offset(adev, me, mmSDMA0_GFX_RB_WPTR_HI), upper_32_bits(ring->wptr << 2));
+ WREG32(sdma_v4_0_get_reg_offset(adev, ring->me, mmSDMA0_GFX_RB_WPTR), lower_32_bits(ring->wptr << 2));
+ WREG32(sdma_v4_0_get_reg_offset(adev, ring->me, mmSDMA0_GFX_RB_WPTR_HI), upper_32_bits(ring->wptr << 2));
}
}
@@ -430,7 +499,7 @@ static void sdma_v4_0_ring_emit_hdp_flush(struct amdgpu_ring *ring)
u32 ref_and_mask = 0;
const struct nbio_hdp_flush_reg *nbio_hf_reg = adev->nbio_funcs->hdp_flush_reg;
- if (ring == &ring->adev->sdma.instance[0].ring)
+ if (ring->me == 0)
ref_and_mask = nbio_hf_reg->ref_and_mask_sdma0;
else
ref_and_mask = nbio_hf_reg->ref_and_mask_sdma1;
@@ -751,7 +820,7 @@ sdma_v4_1_update_power_gating(struct amdgpu_device *adev, bool enable)
uint32_t def, data;
if (enable && (adev->pg_flags & AMD_PG_SUPPORT_SDMA)) {
- /* disable idle interrupt */
+ /* enable idle interrupt */
def = data = RREG32(SOC15_REG_OFFSET(SDMA0, 0, mmSDMA0_CNTL));
data |= SDMA0_CNTL__CTXEMPTY_INT_ENABLE_MASK;
@@ -1228,13 +1297,13 @@ static int sdma_v4_0_sw_init(void *handle)
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
/* SDMA trap event */
- r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_SDMA0, 224,
+ r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_SDMA0, SDMA0_4_0__SRCID__SDMA_TRAP,
&adev->sdma.trap_irq);
if (r)
return r;
/* SDMA trap event */
- r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_SDMA1, 224,
+ r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_SDMA1, SDMA1_4_0__SRCID__SDMA_TRAP,
&adev->sdma.trap_irq);
if (r)
return r;
@@ -1253,9 +1322,15 @@ static int sdma_v4_0_sw_init(void *handle)
DRM_INFO("use_doorbell being set to: [%s]\n",
ring->use_doorbell?"true":"false");
- ring->doorbell_index = (i == 0) ?
- (AMDGPU_DOORBELL64_sDMA_ENGINE0 << 1) //get DWORD offset
- : (AMDGPU_DOORBELL64_sDMA_ENGINE1 << 1); // get DWORD offset
+ if (adev->asic_type == CHIP_VEGA10)
+ ring->doorbell_index = (i == 0) ?
+ (AMDGPU_VEGA10_DOORBELL64_sDMA_ENGINE0 << 1) //get DWORD offset
+ : (AMDGPU_VEGA10_DOORBELL64_sDMA_ENGINE1 << 1); // get DWORD offset
+ else
+ ring->doorbell_index = (i == 0) ?
+ (AMDGPU_DOORBELL64_sDMA_ENGINE0 << 1) //get DWORD offset
+ : (AMDGPU_DOORBELL64_sDMA_ENGINE1 << 1); // get DWORD offset
+
sprintf(ring->name, "sdma%d", i);
r = amdgpu_ring_init(adev, ring, 1024,
@@ -1291,6 +1366,9 @@ static int sdma_v4_0_hw_init(void *handle)
int r;
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ if (adev->asic_type == CHIP_RAVEN && adev->powerplay.pp_funcs->set_powergating_by_smu)
+ amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_SDMA, false);
+
sdma_v4_0_init_golden_registers(adev);
r = sdma_v4_0_start(adev);
@@ -1308,6 +1386,9 @@ static int sdma_v4_0_hw_fini(void *handle)
sdma_v4_0_ctx_switch_enable(adev, false);
sdma_v4_0_enable(adev, false);
+ if (adev->asic_type == CHIP_RAVEN && adev->powerplay.pp_funcs->set_powergating_by_smu)
+ amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_SDMA, true);
+
return 0;
}
@@ -1651,8 +1732,10 @@ static void sdma_v4_0_set_ring_funcs(struct amdgpu_device *adev)
{
int i;
- for (i = 0; i < adev->sdma.num_instances; i++)
+ for (i = 0; i < adev->sdma.num_instances; i++) {
adev->sdma.instance[i].ring.funcs = &sdma_v4_0_ring_funcs;
+ adev->sdma.instance[i].ring.me = i;
+ }
}
static const struct amdgpu_irq_src_funcs sdma_v4_0_trap_irq_funcs = {
@@ -1732,10 +1815,8 @@ static const struct amdgpu_buffer_funcs sdma_v4_0_buffer_funcs = {
static void sdma_v4_0_set_buffer_funcs(struct amdgpu_device *adev)
{
- if (adev->mman.buffer_funcs == NULL) {
- adev->mman.buffer_funcs = &sdma_v4_0_buffer_funcs;
- adev->mman.buffer_funcs_ring = &adev->sdma.instance[0].ring;
- }
+ adev->mman.buffer_funcs = &sdma_v4_0_buffer_funcs;
+ adev->mman.buffer_funcs_ring = &adev->sdma.instance[0].ring;
}
static const struct amdgpu_vm_pte_funcs sdma_v4_0_vm_pte_funcs = {
@@ -1748,16 +1829,16 @@ static const struct amdgpu_vm_pte_funcs sdma_v4_0_vm_pte_funcs = {
static void sdma_v4_0_set_vm_pte_funcs(struct amdgpu_device *adev)
{
+ struct drm_gpu_scheduler *sched;
unsigned i;
- if (adev->vm_manager.vm_pte_funcs == NULL) {
- adev->vm_manager.vm_pte_funcs = &sdma_v4_0_vm_pte_funcs;
- for (i = 0; i < adev->sdma.num_instances; i++)
- adev->vm_manager.vm_pte_rings[i] =
- &adev->sdma.instance[i].ring;
-
- adev->vm_manager.vm_pte_num_rings = adev->sdma.num_instances;
+ adev->vm_manager.vm_pte_funcs = &sdma_v4_0_vm_pte_funcs;
+ for (i = 0; i < adev->sdma.num_instances; i++) {
+ sched = &adev->sdma.instance[i].ring.sched;
+ adev->vm_manager.vm_pte_rqs[i] =
+ &sched->sched_rq[DRM_SCHED_PRIORITY_KERNEL];
}
+ adev->vm_manager.vm_pte_num_rqs = adev->sdma.num_instances;
}
const struct amdgpu_ip_block_version sdma_v4_0_ip_block = {
diff --git a/drivers/gpu/drm/amd/amdgpu/si.c b/drivers/gpu/drm/amd/amdgpu/si.c
index c364ef94cc36..f8408f88cd37 100644
--- a/drivers/gpu/drm/amd/amdgpu/si.c
+++ b/drivers/gpu/drm/amd/amdgpu/si.c
@@ -2057,13 +2057,13 @@ int si_set_ip_blocks(struct amdgpu_device *adev)
amdgpu_device_ip_block_add(adev, &si_common_ip_block);
amdgpu_device_ip_block_add(adev, &gmc_v6_0_ip_block);
amdgpu_device_ip_block_add(adev, &si_ih_ip_block);
+ amdgpu_device_ip_block_add(adev, &gfx_v6_0_ip_block);
+ amdgpu_device_ip_block_add(adev, &si_dma_ip_block);
amdgpu_device_ip_block_add(adev, &si_smu_ip_block);
if (adev->enable_virtual_display)
amdgpu_device_ip_block_add(adev, &dce_virtual_ip_block);
else
amdgpu_device_ip_block_add(adev, &dce_v6_0_ip_block);
- amdgpu_device_ip_block_add(adev, &gfx_v6_0_ip_block);
- amdgpu_device_ip_block_add(adev, &si_dma_ip_block);
/* amdgpu_device_ip_block_add(adev, &uvd_v3_1_ip_block); */
/* amdgpu_device_ip_block_add(adev, &vce_v1_0_ip_block); */
break;
@@ -2071,13 +2071,14 @@ int si_set_ip_blocks(struct amdgpu_device *adev)
amdgpu_device_ip_block_add(adev, &si_common_ip_block);
amdgpu_device_ip_block_add(adev, &gmc_v6_0_ip_block);
amdgpu_device_ip_block_add(adev, &si_ih_ip_block);
+ amdgpu_device_ip_block_add(adev, &gfx_v6_0_ip_block);
+ amdgpu_device_ip_block_add(adev, &si_dma_ip_block);
amdgpu_device_ip_block_add(adev, &si_smu_ip_block);
if (adev->enable_virtual_display)
amdgpu_device_ip_block_add(adev, &dce_virtual_ip_block);
else
amdgpu_device_ip_block_add(adev, &dce_v6_4_ip_block);
- amdgpu_device_ip_block_add(adev, &gfx_v6_0_ip_block);
- amdgpu_device_ip_block_add(adev, &si_dma_ip_block);
+
/* amdgpu_device_ip_block_add(adev, &uvd_v3_1_ip_block); */
/* amdgpu_device_ip_block_add(adev, &vce_v1_0_ip_block); */
break;
@@ -2085,11 +2086,11 @@ int si_set_ip_blocks(struct amdgpu_device *adev)
amdgpu_device_ip_block_add(adev, &si_common_ip_block);
amdgpu_device_ip_block_add(adev, &gmc_v6_0_ip_block);
amdgpu_device_ip_block_add(adev, &si_ih_ip_block);
+ amdgpu_device_ip_block_add(adev, &gfx_v6_0_ip_block);
+ amdgpu_device_ip_block_add(adev, &si_dma_ip_block);
amdgpu_device_ip_block_add(adev, &si_smu_ip_block);
if (adev->enable_virtual_display)
amdgpu_device_ip_block_add(adev, &dce_virtual_ip_block);
- amdgpu_device_ip_block_add(adev, &gfx_v6_0_ip_block);
- amdgpu_device_ip_block_add(adev, &si_dma_ip_block);
break;
default:
BUG();
diff --git a/drivers/gpu/drm/amd/amdgpu/si_dma.c b/drivers/gpu/drm/amd/amdgpu/si_dma.c
index b75d901ba3c4..adbaea6da0d7 100644
--- a/drivers/gpu/drm/amd/amdgpu/si_dma.c
+++ b/drivers/gpu/drm/amd/amdgpu/si_dma.c
@@ -502,12 +502,14 @@ static int si_dma_sw_init(void *handle)
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
/* DMA0 trap event */
- r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 224, &adev->sdma.trap_irq);
+ r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, 224,
+ &adev->sdma.trap_irq);
if (r)
return r;
/* DMA1 trap event */
- r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 244, &adev->sdma.trap_irq_1);
+ r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, 244,
+ &adev->sdma.trap_irq);
if (r)
return r;
@@ -649,17 +651,10 @@ static int si_dma_process_trap_irq(struct amdgpu_device *adev,
struct amdgpu_irq_src *source,
struct amdgpu_iv_entry *entry)
{
- amdgpu_fence_process(&adev->sdma.instance[0].ring);
-
- return 0;
-}
-
-static int si_dma_process_trap_irq_1(struct amdgpu_device *adev,
- struct amdgpu_irq_src *source,
- struct amdgpu_iv_entry *entry)
-{
- amdgpu_fence_process(&adev->sdma.instance[1].ring);
-
+ if (entry->src_id == 224)
+ amdgpu_fence_process(&adev->sdma.instance[0].ring);
+ else
+ amdgpu_fence_process(&adev->sdma.instance[1].ring);
return 0;
}
@@ -786,11 +781,6 @@ static const struct amdgpu_irq_src_funcs si_dma_trap_irq_funcs = {
.process = si_dma_process_trap_irq,
};
-static const struct amdgpu_irq_src_funcs si_dma_trap_irq_funcs_1 = {
- .set = si_dma_set_trap_irq_state,
- .process = si_dma_process_trap_irq_1,
-};
-
static const struct amdgpu_irq_src_funcs si_dma_illegal_inst_irq_funcs = {
.process = si_dma_process_illegal_inst_irq,
};
@@ -799,7 +789,6 @@ static void si_dma_set_irq_funcs(struct amdgpu_device *adev)
{
adev->sdma.trap_irq.num_types = AMDGPU_SDMA_IRQ_LAST;
adev->sdma.trap_irq.funcs = &si_dma_trap_irq_funcs;
- adev->sdma.trap_irq_1.funcs = &si_dma_trap_irq_funcs_1;
adev->sdma.illegal_inst_irq.funcs = &si_dma_illegal_inst_irq_funcs;
}
@@ -863,10 +852,8 @@ static const struct amdgpu_buffer_funcs si_dma_buffer_funcs = {
static void si_dma_set_buffer_funcs(struct amdgpu_device *adev)
{
- if (adev->mman.buffer_funcs == NULL) {
- adev->mman.buffer_funcs = &si_dma_buffer_funcs;
- adev->mman.buffer_funcs_ring = &adev->sdma.instance[0].ring;
- }
+ adev->mman.buffer_funcs = &si_dma_buffer_funcs;
+ adev->mman.buffer_funcs_ring = &adev->sdma.instance[0].ring;
}
static const struct amdgpu_vm_pte_funcs si_dma_vm_pte_funcs = {
@@ -879,16 +866,16 @@ static const struct amdgpu_vm_pte_funcs si_dma_vm_pte_funcs = {
static void si_dma_set_vm_pte_funcs(struct amdgpu_device *adev)
{
+ struct drm_gpu_scheduler *sched;
unsigned i;
- if (adev->vm_manager.vm_pte_funcs == NULL) {
- adev->vm_manager.vm_pte_funcs = &si_dma_vm_pte_funcs;
- for (i = 0; i < adev->sdma.num_instances; i++)
- adev->vm_manager.vm_pte_rings[i] =
- &adev->sdma.instance[i].ring;
-
- adev->vm_manager.vm_pte_num_rings = adev->sdma.num_instances;
+ adev->vm_manager.vm_pte_funcs = &si_dma_vm_pte_funcs;
+ for (i = 0; i < adev->sdma.num_instances; i++) {
+ sched = &adev->sdma.instance[i].ring.sched;
+ adev->vm_manager.vm_pte_rqs[i] =
+ &sched->sched_rq[DRM_SCHED_PRIORITY_KERNEL];
}
+ adev->vm_manager.vm_pte_num_rqs = adev->sdma.num_instances;
}
const struct amdgpu_ip_block_version si_dma_ip_block =
diff --git a/drivers/gpu/drm/amd/amdgpu/si_dpm.c b/drivers/gpu/drm/amd/amdgpu/si_dpm.c
index 5c97a3671726..da58040fdbdc 100644
--- a/drivers/gpu/drm/amd/amdgpu/si_dpm.c
+++ b/drivers/gpu/drm/amd/amdgpu/si_dpm.c
@@ -56,16 +56,16 @@
#define BIOS_SCRATCH_4 0x5cd
-MODULE_FIRMWARE("radeon/tahiti_smc.bin");
-MODULE_FIRMWARE("radeon/pitcairn_smc.bin");
-MODULE_FIRMWARE("radeon/pitcairn_k_smc.bin");
-MODULE_FIRMWARE("radeon/verde_smc.bin");
-MODULE_FIRMWARE("radeon/verde_k_smc.bin");
-MODULE_FIRMWARE("radeon/oland_smc.bin");
-MODULE_FIRMWARE("radeon/oland_k_smc.bin");
-MODULE_FIRMWARE("radeon/hainan_smc.bin");
-MODULE_FIRMWARE("radeon/hainan_k_smc.bin");
-MODULE_FIRMWARE("radeon/banks_k_2_smc.bin");
+MODULE_FIRMWARE("amdgpu/tahiti_smc.bin");
+MODULE_FIRMWARE("amdgpu/pitcairn_smc.bin");
+MODULE_FIRMWARE("amdgpu/pitcairn_k_smc.bin");
+MODULE_FIRMWARE("amdgpu/verde_smc.bin");
+MODULE_FIRMWARE("amdgpu/verde_k_smc.bin");
+MODULE_FIRMWARE("amdgpu/oland_smc.bin");
+MODULE_FIRMWARE("amdgpu/oland_k_smc.bin");
+MODULE_FIRMWARE("amdgpu/hainan_smc.bin");
+MODULE_FIRMWARE("amdgpu/hainan_k_smc.bin");
+MODULE_FIRMWARE("amdgpu/banks_k_2_smc.bin");
static const struct amd_pm_funcs si_dpm_funcs;
@@ -3480,7 +3480,7 @@ static void si_apply_state_adjust_rules(struct amdgpu_device *adev,
disable_sclk_switching = true;
}
- if (adev->pm.dpm.ac_power)
+ if (adev->pm.ac_power)
max_limits = &adev->pm.dpm.dyn_state.max_clock_voltage_on_ac;
else
max_limits = &adev->pm.dpm.dyn_state.max_clock_voltage_on_dc;
@@ -3489,7 +3489,7 @@ static void si_apply_state_adjust_rules(struct amdgpu_device *adev,
if (ps->performance_levels[i].vddc > ps->performance_levels[i+1].vddc)
ps->performance_levels[i].vddc = ps->performance_levels[i+1].vddc;
}
- if (adev->pm.dpm.ac_power == false) {
+ if (adev->pm.ac_power == false) {
for (i = 0; i < ps->performance_level_count; i++) {
if (ps->performance_levels[i].mclk > max_limits->mclk)
ps->performance_levels[i].mclk = max_limits->mclk;
@@ -6887,7 +6887,6 @@ static int si_dpm_enable(struct amdgpu_device *adev)
si_enable_auto_throttle_source(adev, AMDGPU_DPM_AUTO_THROTTLE_SRC_THERMAL, true);
si_thermal_start_thermal_controller(adev);
- ni_update_current_ps(adev, boot_ps);
return 0;
}
@@ -7318,8 +7317,7 @@ static int si_dpm_init(struct amdgpu_device *adev)
pi = &eg_pi->rv7xx;
si_pi->sys_pcie_mask =
- (adev->pm.pcie_gen_mask & CAIL_PCIE_LINK_SPEED_SUPPORT_MASK) >>
- CAIL_PCIE_LINK_SPEED_SUPPORT_SHIFT;
+ adev->pm.pcie_gen_mask & CAIL_PCIE_LINK_SPEED_SUPPORT_MASK;
si_pi->force_pcie_gen = AMDGPU_PCIE_GEN_INVALID;
si_pi->boot_pcie_gen = si_get_current_pcie_speed(adev);
@@ -7667,7 +7665,7 @@ static int si_dpm_init_microcode(struct amdgpu_device *adev)
default: BUG();
}
- snprintf(fw_name, sizeof(fw_name), "radeon/%s_smc.bin", chip_name);
+ snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_smc.bin", chip_name);
err = request_firmware(&adev->pm.fw, fw_name, adev->dev);
if (err)
goto out;
@@ -7689,11 +7687,11 @@ static int si_dpm_sw_init(void *handle)
int ret;
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
- ret = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 230, &adev->pm.dpm.thermal.irq);
+ ret = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, 230, &adev->pm.dpm.thermal.irq);
if (ret)
return ret;
- ret = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 231, &adev->pm.dpm.thermal.irq);
+ ret = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, 231, &adev->pm.dpm.thermal.irq);
if (ret)
return ret;
@@ -7764,7 +7762,7 @@ static int si_dpm_hw_init(void *handle)
else
adev->pm.dpm_enabled = true;
mutex_unlock(&adev->pm.mutex);
-
+ amdgpu_pm_compute_clocks(adev);
return ret;
}
diff --git a/drivers/gpu/drm/amd/amdgpu/si_enums.h b/drivers/gpu/drm/amd/amdgpu/si_enums.h
index dc9e0e6b4558..790ba46eaebb 100644
--- a/drivers/gpu/drm/amd/amdgpu/si_enums.h
+++ b/drivers/gpu/drm/amd/amdgpu/si_enums.h
@@ -46,6 +46,26 @@
#define GRPH_ENDIAN_8IN16 1
#define GRPH_ENDIAN_8IN32 2
#define GRPH_ENDIAN_8IN64 3
+#define GRPH_RED_CROSSBAR(x) (((x) & 0x3) << 4)
+#define GRPH_RED_SEL_R 0
+#define GRPH_RED_SEL_G 1
+#define GRPH_RED_SEL_B 2
+#define GRPH_RED_SEL_A 3
+#define GRPH_GREEN_CROSSBAR(x) (((x) & 0x3) << 6)
+#define GRPH_GREEN_SEL_G 0
+#define GRPH_GREEN_SEL_B 1
+#define GRPH_GREEN_SEL_A 2
+#define GRPH_GREEN_SEL_R 3
+#define GRPH_BLUE_CROSSBAR(x) (((x) & 0x3) << 8)
+#define GRPH_BLUE_SEL_B 0
+#define GRPH_BLUE_SEL_A 1
+#define GRPH_BLUE_SEL_R 2
+#define GRPH_BLUE_SEL_G 3
+#define GRPH_ALPHA_CROSSBAR(x) (((x) & 0x3) << 10)
+#define GRPH_ALPHA_SEL_A 0
+#define GRPH_ALPHA_SEL_R 1
+#define GRPH_ALPHA_SEL_G 2
+#define GRPH_ALPHA_SEL_B 3
#define GRPH_DEPTH(x) (((x) & 0x3) << 0)
#define GRPH_DEPTH_8BPP 0
diff --git a/drivers/gpu/drm/amd/amdgpu/si_ih.c b/drivers/gpu/drm/amd/amdgpu/si_ih.c
index 60dad63098a2..b3d7d9f83202 100644
--- a/drivers/gpu/drm/amd/amdgpu/si_ih.c
+++ b/drivers/gpu/drm/amd/amdgpu/si_ih.c
@@ -142,7 +142,7 @@ static void si_ih_decode_iv(struct amdgpu_device *adev,
dw[2] = le32_to_cpu(adev->irq.ih.ring[ring_index + 2]);
dw[3] = le32_to_cpu(adev->irq.ih.ring[ring_index + 3]);
- entry->client_id = AMDGPU_IH_CLIENTID_LEGACY;
+ entry->client_id = AMDGPU_IRQ_CLIENTID_LEGACY;
entry->src_id = dw[0] & 0xff;
entry->src_data[0] = dw[1] & 0xfffffff;
entry->ring_id = dw[2] & 0xff;
@@ -170,7 +170,7 @@ static int si_ih_sw_init(void *handle)
int r;
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
- r = amdgpu_ih_ring_init(adev, 64 * 1024, false);
+ r = amdgpu_ih_ring_init(adev, &adev->irq.ih, 64 * 1024, false);
if (r)
return r;
@@ -182,7 +182,7 @@ static int si_ih_sw_fini(void *handle)
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
amdgpu_irq_fini(adev);
- amdgpu_ih_ring_fini(adev);
+ amdgpu_ih_ring_fini(adev, &adev->irq.ih);
return 0;
}
@@ -308,8 +308,7 @@ static const struct amdgpu_ih_funcs si_ih_funcs = {
static void si_ih_set_interrupt_funcs(struct amdgpu_device *adev)
{
- if (adev->irq.ih_funcs == NULL)
- adev->irq.ih_funcs = &si_ih_funcs;
+ adev->irq.ih_funcs = &si_ih_funcs;
}
const struct amdgpu_ip_block_version si_ih_ip_block =
diff --git a/drivers/gpu/drm/amd/amdgpu/sid.h b/drivers/gpu/drm/amd/amdgpu/sid.h
index c57eff159374..7cf12adb3915 100644
--- a/drivers/gpu/drm/amd/amdgpu/sid.h
+++ b/drivers/gpu/drm/amd/amdgpu/sid.h
@@ -2201,6 +2201,26 @@
# define EVERGREEN_GRPH_ENDIAN_8IN16 1
# define EVERGREEN_GRPH_ENDIAN_8IN32 2
# define EVERGREEN_GRPH_ENDIAN_8IN64 3
+#define EVERGREEN_GRPH_RED_CROSSBAR(x) (((x) & 0x3) << 4)
+# define EVERGREEN_GRPH_RED_SEL_R 0
+# define EVERGREEN_GRPH_RED_SEL_G 1
+# define EVERGREEN_GRPH_RED_SEL_B 2
+# define EVERGREEN_GRPH_RED_SEL_A 3
+#define EVERGREEN_GRPH_GREEN_CROSSBAR(x) (((x) & 0x3) << 6)
+# define EVERGREEN_GRPH_GREEN_SEL_G 0
+# define EVERGREEN_GRPH_GREEN_SEL_B 1
+# define EVERGREEN_GRPH_GREEN_SEL_A 2
+# define EVERGREEN_GRPH_GREEN_SEL_R 3
+#define EVERGREEN_GRPH_BLUE_CROSSBAR(x) (((x) & 0x3) << 8)
+# define EVERGREEN_GRPH_BLUE_SEL_B 0
+# define EVERGREEN_GRPH_BLUE_SEL_A 1
+# define EVERGREEN_GRPH_BLUE_SEL_R 2
+# define EVERGREEN_GRPH_BLUE_SEL_G 3
+#define EVERGREEN_GRPH_ALPHA_CROSSBAR(x) (((x) & 0x3) << 10)
+# define EVERGREEN_GRPH_ALPHA_SEL_A 0
+# define EVERGREEN_GRPH_ALPHA_SEL_R 1
+# define EVERGREEN_GRPH_ALPHA_SEL_G 2
+# define EVERGREEN_GRPH_ALPHA_SEL_B 3
#define EVERGREEN_D3VGA_CONTROL 0xf8
#define EVERGREEN_D4VGA_CONTROL 0xf9
diff --git a/drivers/gpu/drm/amd/amdgpu/soc15.c b/drivers/gpu/drm/amd/amdgpu/soc15.c
index 83f2717fcf81..bf5e6a413dee 100644
--- a/drivers/gpu/drm/amd/amdgpu/soc15.c
+++ b/drivers/gpu/drm/amd/amdgpu/soc15.c
@@ -479,6 +479,11 @@ static const struct amdgpu_ip_block_version vega10_common_ip_block =
.funcs = &soc15_common_ip_funcs,
};
+static uint32_t soc15_get_rev_id(struct amdgpu_device *adev)
+{
+ return adev->nbio_funcs->get_rev_id(adev);
+}
+
int soc15_set_ip_blocks(struct amdgpu_device *adev)
{
/* Set IP register base before any HW register access */
@@ -498,7 +503,7 @@ int soc15_set_ip_blocks(struct amdgpu_device *adev)
if (adev->flags & AMD_IS_APU)
adev->nbio_funcs = &nbio_v7_0_funcs;
else if (adev->asic_type == CHIP_VEGA20)
- adev->nbio_funcs = &nbio_v7_0_funcs;
+ adev->nbio_funcs = &nbio_v7_4_funcs;
else
adev->nbio_funcs = &nbio_v6_1_funcs;
@@ -506,6 +511,8 @@ int soc15_set_ip_blocks(struct amdgpu_device *adev)
adev->df_funcs = &df_v3_6_funcs;
else
adev->df_funcs = &df_v1_7_funcs;
+
+ adev->rev_id = soc15_get_rev_id(adev);
adev->nbio_funcs->detect_hw_virt(adev);
if (amdgpu_sriov_vf(adev))
@@ -518,11 +525,14 @@ int soc15_set_ip_blocks(struct amdgpu_device *adev)
amdgpu_device_ip_block_add(adev, &vega10_common_ip_block);
amdgpu_device_ip_block_add(adev, &gmc_v9_0_ip_block);
amdgpu_device_ip_block_add(adev, &vega10_ih_ip_block);
- if (adev->asic_type != CHIP_VEGA20) {
+ if (adev->asic_type == CHIP_VEGA20)
+ amdgpu_device_ip_block_add(adev, &psp_v11_0_ip_block);
+ else
amdgpu_device_ip_block_add(adev, &psp_v3_1_ip_block);
- if (!amdgpu_sriov_vf(adev))
- amdgpu_device_ip_block_add(adev, &pp_smu_ip_block);
- }
+ amdgpu_device_ip_block_add(adev, &gfx_v9_0_ip_block);
+ amdgpu_device_ip_block_add(adev, &sdma_v4_0_ip_block);
+ if (!amdgpu_sriov_vf(adev))
+ amdgpu_device_ip_block_add(adev, &pp_smu_ip_block);
if (adev->enable_virtual_display || amdgpu_sriov_vf(adev))
amdgpu_device_ip_block_add(adev, &dce_virtual_ip_block);
#if defined(CONFIG_DRM_AMD_DC)
@@ -531,16 +541,18 @@ int soc15_set_ip_blocks(struct amdgpu_device *adev)
#else
# warning "Enable CONFIG_DRM_AMD_DC for display support on SOC15."
#endif
- amdgpu_device_ip_block_add(adev, &gfx_v9_0_ip_block);
- amdgpu_device_ip_block_add(adev, &sdma_v4_0_ip_block);
- amdgpu_device_ip_block_add(adev, &uvd_v7_0_ip_block);
- amdgpu_device_ip_block_add(adev, &vce_v4_0_ip_block);
+ if (!(adev->asic_type == CHIP_VEGA20 && amdgpu_sriov_vf(adev))) {
+ amdgpu_device_ip_block_add(adev, &uvd_v7_0_ip_block);
+ amdgpu_device_ip_block_add(adev, &vce_v4_0_ip_block);
+ }
break;
case CHIP_RAVEN:
amdgpu_device_ip_block_add(adev, &vega10_common_ip_block);
amdgpu_device_ip_block_add(adev, &gmc_v9_0_ip_block);
amdgpu_device_ip_block_add(adev, &vega10_ih_ip_block);
amdgpu_device_ip_block_add(adev, &psp_v10_0_ip_block);
+ amdgpu_device_ip_block_add(adev, &gfx_v9_0_ip_block);
+ amdgpu_device_ip_block_add(adev, &sdma_v4_0_ip_block);
amdgpu_device_ip_block_add(adev, &pp_smu_ip_block);
if (adev->enable_virtual_display || amdgpu_sriov_vf(adev))
amdgpu_device_ip_block_add(adev, &dce_virtual_ip_block);
@@ -550,8 +562,6 @@ int soc15_set_ip_blocks(struct amdgpu_device *adev)
#else
# warning "Enable CONFIG_DRM_AMD_DC for display support on SOC15."
#endif
- amdgpu_device_ip_block_add(adev, &gfx_v9_0_ip_block);
- amdgpu_device_ip_block_add(adev, &sdma_v4_0_ip_block);
amdgpu_device_ip_block_add(adev, &vcn_v1_0_ip_block);
break;
default:
@@ -561,11 +571,6 @@ int soc15_set_ip_blocks(struct amdgpu_device *adev)
return 0;
}
-static uint32_t soc15_get_rev_id(struct amdgpu_device *adev)
-{
- return adev->nbio_funcs->get_rev_id(adev);
-}
-
static void soc15_flush_hdp(struct amdgpu_device *adev, struct amdgpu_ring *ring)
{
adev->nbio_funcs->hdp_flush(adev, ring);
@@ -622,7 +627,6 @@ static int soc15_common_early_init(void *handle)
adev->asic_funcs = &soc15_asic_funcs;
- adev->rev_id = soc15_get_rev_id(adev);
adev->external_rev_id = 0xFF;
switch (adev->asic_type) {
case CHIP_VEGA10:
@@ -693,35 +697,79 @@ static int soc15_common_early_init(void *handle)
adev->external_rev_id = adev->rev_id + 0x28;
break;
case CHIP_RAVEN:
- adev->cg_flags = AMD_CG_SUPPORT_GFX_MGCG |
- AMD_CG_SUPPORT_GFX_MGLS |
- AMD_CG_SUPPORT_GFX_RLC_LS |
- AMD_CG_SUPPORT_GFX_CP_LS |
- AMD_CG_SUPPORT_GFX_3D_CGCG |
- AMD_CG_SUPPORT_GFX_3D_CGLS |
- AMD_CG_SUPPORT_GFX_CGCG |
- AMD_CG_SUPPORT_GFX_CGLS |
- AMD_CG_SUPPORT_BIF_MGCG |
- AMD_CG_SUPPORT_BIF_LS |
- AMD_CG_SUPPORT_HDP_MGCG |
- AMD_CG_SUPPORT_HDP_LS |
- AMD_CG_SUPPORT_DRM_MGCG |
- AMD_CG_SUPPORT_DRM_LS |
- AMD_CG_SUPPORT_ROM_MGCG |
- AMD_CG_SUPPORT_MC_MGCG |
- AMD_CG_SUPPORT_MC_LS |
- AMD_CG_SUPPORT_SDMA_MGCG |
- AMD_CG_SUPPORT_SDMA_LS |
- AMD_CG_SUPPORT_VCN_MGCG;
-
- adev->pg_flags = AMD_PG_SUPPORT_SDMA | AMD_PG_SUPPORT_VCN;
+ if (adev->rev_id >= 0x8)
+ adev->external_rev_id = adev->rev_id + 0x81;
+ else if (adev->pdev->device == 0x15d8)
+ adev->external_rev_id = adev->rev_id + 0x41;
+ else
+ adev->external_rev_id = 0x1;
+
+ if (adev->rev_id >= 0x8) {
+ adev->cg_flags = AMD_CG_SUPPORT_GFX_MGCG |
+ AMD_CG_SUPPORT_GFX_MGLS |
+ AMD_CG_SUPPORT_GFX_CP_LS |
+ AMD_CG_SUPPORT_GFX_3D_CGCG |
+ AMD_CG_SUPPORT_GFX_3D_CGLS |
+ AMD_CG_SUPPORT_GFX_CGCG |
+ AMD_CG_SUPPORT_GFX_CGLS |
+ AMD_CG_SUPPORT_BIF_LS |
+ AMD_CG_SUPPORT_HDP_LS |
+ AMD_CG_SUPPORT_ROM_MGCG |
+ AMD_CG_SUPPORT_MC_MGCG |
+ AMD_CG_SUPPORT_MC_LS |
+ AMD_CG_SUPPORT_SDMA_MGCG |
+ AMD_CG_SUPPORT_SDMA_LS |
+ AMD_CG_SUPPORT_VCN_MGCG;
+
+ adev->pg_flags = AMD_PG_SUPPORT_SDMA | AMD_PG_SUPPORT_VCN;
+ } else if (adev->pdev->device == 0x15d8) {
+ adev->cg_flags = AMD_CG_SUPPORT_GFX_MGLS |
+ AMD_CG_SUPPORT_GFX_CP_LS |
+ AMD_CG_SUPPORT_GFX_3D_CGCG |
+ AMD_CG_SUPPORT_GFX_3D_CGLS |
+ AMD_CG_SUPPORT_GFX_CGCG |
+ AMD_CG_SUPPORT_GFX_CGLS |
+ AMD_CG_SUPPORT_BIF_LS |
+ AMD_CG_SUPPORT_HDP_LS |
+ AMD_CG_SUPPORT_ROM_MGCG |
+ AMD_CG_SUPPORT_MC_MGCG |
+ AMD_CG_SUPPORT_MC_LS |
+ AMD_CG_SUPPORT_SDMA_MGCG |
+ AMD_CG_SUPPORT_SDMA_LS;
+
+ adev->pg_flags = AMD_PG_SUPPORT_SDMA |
+ AMD_PG_SUPPORT_MMHUB |
+ AMD_PG_SUPPORT_VCN |
+ AMD_PG_SUPPORT_VCN_DPG;
+ } else {
+ adev->cg_flags = AMD_CG_SUPPORT_GFX_MGCG |
+ AMD_CG_SUPPORT_GFX_MGLS |
+ AMD_CG_SUPPORT_GFX_RLC_LS |
+ AMD_CG_SUPPORT_GFX_CP_LS |
+ AMD_CG_SUPPORT_GFX_3D_CGCG |
+ AMD_CG_SUPPORT_GFX_3D_CGLS |
+ AMD_CG_SUPPORT_GFX_CGCG |
+ AMD_CG_SUPPORT_GFX_CGLS |
+ AMD_CG_SUPPORT_BIF_MGCG |
+ AMD_CG_SUPPORT_BIF_LS |
+ AMD_CG_SUPPORT_HDP_MGCG |
+ AMD_CG_SUPPORT_HDP_LS |
+ AMD_CG_SUPPORT_DRM_MGCG |
+ AMD_CG_SUPPORT_DRM_LS |
+ AMD_CG_SUPPORT_ROM_MGCG |
+ AMD_CG_SUPPORT_MC_MGCG |
+ AMD_CG_SUPPORT_MC_LS |
+ AMD_CG_SUPPORT_SDMA_MGCG |
+ AMD_CG_SUPPORT_SDMA_LS |
+ AMD_CG_SUPPORT_VCN_MGCG;
+
+ adev->pg_flags = AMD_PG_SUPPORT_SDMA | AMD_PG_SUPPORT_VCN;
+ }
if (adev->powerplay.pp_feature & PP_GFXOFF_MASK)
adev->pg_flags |= AMD_PG_SUPPORT_GFX_PG |
AMD_PG_SUPPORT_CP |
AMD_PG_SUPPORT_RLC_SMU_HS;
-
- adev->external_rev_id = 0x1;
break;
default:
/* FIXME: not supported yet */
diff --git a/drivers/gpu/drm/amd/amdgpu/soc15.h b/drivers/gpu/drm/amd/amdgpu/soc15.h
index 1f714b7af520..f8ad7804dc40 100644
--- a/drivers/gpu/drm/amd/amdgpu/soc15.h
+++ b/drivers/gpu/drm/amd/amdgpu/soc15.h
@@ -26,6 +26,7 @@
#include "nbio_v6_1.h"
#include "nbio_v7_0.h"
+#include "nbio_v7_4.h"
#define SOC15_FLUSH_GPU_TLB_NUM_WREG 4
#define SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT 1
diff --git a/drivers/gpu/drm/amd/amdgpu/soc15_common.h b/drivers/gpu/drm/amd/amdgpu/soc15_common.h
index 0942f492d2e1..958b10a57073 100644
--- a/drivers/gpu/drm/amd/amdgpu/soc15_common.h
+++ b/drivers/gpu/drm/amd/amdgpu/soc15_common.h
@@ -56,12 +56,34 @@
tmp_ = RREG32(adev->reg_offset[ip##_HWIP][inst][reg##_BASE_IDX] + reg); \
loop--; \
if (!loop) { \
+ DRM_ERROR("Register(%d) [%s] failed to reach value 0x%08x != 0x%08x\n", \
+ inst, #reg, (unsigned)expected_value, (unsigned)(tmp_ & (mask))); \
ret = -ETIMEDOUT; \
break; \
} \
} \
} while (0)
+#define RREG32_SOC15_DPG_MODE(ip, inst, reg, mask, sram_sel) \
+ ({ WREG32_SOC15(ip, inst, mmUVD_DPG_LMA_MASK, mask); \
+ WREG32_SOC15(ip, inst, mmUVD_DPG_LMA_CTL, \
+ UVD_DPG_LMA_CTL__MASK_EN_MASK | \
+ ((adev->reg_offset[ip##_HWIP][inst][reg##_BASE_IDX] + reg) \
+ << UVD_DPG_LMA_CTL__READ_WRITE_ADDR__SHIFT) | \
+ (sram_sel << UVD_DPG_LMA_CTL__SRAM_SEL__SHIFT)); \
+ RREG32_SOC15(ip, inst, mmUVD_DPG_LMA_DATA); })
+
+#define WREG32_SOC15_DPG_MODE(ip, inst, reg, value, mask, sram_sel) \
+ do { \
+ WREG32_SOC15(ip, inst, mmUVD_DPG_LMA_DATA, value); \
+ WREG32_SOC15(ip, inst, mmUVD_DPG_LMA_MASK, mask); \
+ WREG32_SOC15(ip, inst, mmUVD_DPG_LMA_CTL, \
+ UVD_DPG_LMA_CTL__READ_WRITE_MASK | \
+ ((adev->reg_offset[ip##_HWIP][inst][reg##_BASE_IDX] + reg) \
+ << UVD_DPG_LMA_CTL__READ_WRITE_ADDR__SHIFT) | \
+ (sram_sel << UVD_DPG_LMA_CTL__SRAM_SEL__SHIFT)); \
+ } while (0)
+
#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/soc15d.h b/drivers/gpu/drm/amd/amdgpu/soc15d.h
index 8dc29107228f..edfe50821cd9 100644
--- a/drivers/gpu/drm/amd/amdgpu/soc15d.h
+++ b/drivers/gpu/drm/amd/amdgpu/soc15d.h
@@ -53,6 +53,29 @@
#define PACKET3_COMPUTE(op, n) (PACKET3(op, n) | 1 << 1)
+#define PACKETJ_CONDITION_CHECK0 0
+#define PACKETJ_CONDITION_CHECK1 1
+#define PACKETJ_CONDITION_CHECK2 2
+#define PACKETJ_CONDITION_CHECK3 3
+#define PACKETJ_CONDITION_CHECK4 4
+#define PACKETJ_CONDITION_CHECK5 5
+#define PACKETJ_CONDITION_CHECK6 6
+#define PACKETJ_CONDITION_CHECK7 7
+
+#define PACKETJ_TYPE0 0
+#define PACKETJ_TYPE1 1
+#define PACKETJ_TYPE2 2
+#define PACKETJ_TYPE3 3
+#define PACKETJ_TYPE4 4
+#define PACKETJ_TYPE5 5
+#define PACKETJ_TYPE6 6
+#define PACKETJ_TYPE7 7
+
+#define PACKETJ(reg, r, cond, type) ((reg & 0x3FFFF) | \
+ ((r & 0x3F) << 18) | \
+ ((cond & 0xF) << 24) | \
+ ((type & 0xF) << 28))
+
/* Packet 3 types */
#define PACKET3_NOP 0x10
#define PACKET3_SET_BASE 0x11
diff --git a/drivers/gpu/drm/amd/amdgpu/tonga_ih.c b/drivers/gpu/drm/amd/amdgpu/tonga_ih.c
index 52853d8a8fdd..3abffd06b5c7 100644
--- a/drivers/gpu/drm/amd/amdgpu/tonga_ih.c
+++ b/drivers/gpu/drm/amd/amdgpu/tonga_ih.c
@@ -266,7 +266,7 @@ static void tonga_ih_decode_iv(struct amdgpu_device *adev,
dw[2] = le32_to_cpu(adev->irq.ih.ring[ring_index + 2]);
dw[3] = le32_to_cpu(adev->irq.ih.ring[ring_index + 3]);
- entry->client_id = AMDGPU_IH_CLIENTID_LEGACY;
+ entry->client_id = AMDGPU_IRQ_CLIENTID_LEGACY;
entry->src_id = dw[0] & 0xff;
entry->src_data[0] = dw[1] & 0xfffffff;
entry->ring_id = dw[2] & 0xff;
@@ -317,7 +317,7 @@ static int tonga_ih_sw_init(void *handle)
int r;
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
- r = amdgpu_ih_ring_init(adev, 64 * 1024, true);
+ r = amdgpu_ih_ring_init(adev, &adev->irq.ih, 64 * 1024, true);
if (r)
return r;
@@ -334,7 +334,7 @@ static int tonga_ih_sw_fini(void *handle)
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
amdgpu_irq_fini(adev);
- amdgpu_ih_ring_fini(adev);
+ amdgpu_ih_ring_fini(adev, &adev->irq.ih);
amdgpu_irq_remove_domain(adev);
return 0;
@@ -513,8 +513,7 @@ static const struct amdgpu_ih_funcs tonga_ih_funcs = {
static void tonga_ih_set_interrupt_funcs(struct amdgpu_device *adev)
{
- if (adev->irq.ih_funcs == NULL)
- adev->irq.ih_funcs = &tonga_ih_funcs;
+ adev->irq.ih_funcs = &tonga_ih_funcs;
}
const struct amdgpu_ip_block_version tonga_ih_ip_block =
diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v4_2.c b/drivers/gpu/drm/amd/amdgpu/uvd_v4_2.c
index 6fed3d7797a8..1fc17bf39fed 100644
--- a/drivers/gpu/drm/amd/amdgpu/uvd_v4_2.c
+++ b/drivers/gpu/drm/amd/amdgpu/uvd_v4_2.c
@@ -108,7 +108,7 @@ static int uvd_v4_2_sw_init(void *handle)
int r;
/* UVD TRAP */
- r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 124, &adev->uvd.inst->irq);
+ r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, 124, &adev->uvd.inst->irq);
if (r)
return r;
@@ -123,6 +123,10 @@ static int uvd_v4_2_sw_init(void *handle)
ring = &adev->uvd.inst->ring;
sprintf(ring->name, "uvd");
r = amdgpu_ring_init(adev, ring, 512, &adev->uvd.inst->irq, 0);
+ if (r)
+ return r;
+
+ r = amdgpu_uvd_entity_init(adev);
return r;
}
diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v5_0.c b/drivers/gpu/drm/amd/amdgpu/uvd_v5_0.c
index 341ee6d55ce8..fde6ad5ac9ab 100644
--- a/drivers/gpu/drm/amd/amdgpu/uvd_v5_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/uvd_v5_0.c
@@ -35,6 +35,7 @@
#include "vi.h"
#include "smu/smu_7_1_2_d.h"
#include "smu/smu_7_1_2_sh_mask.h"
+#include "ivsrcid/ivsrcid_vislands30.h"
static void uvd_v5_0_set_ring_funcs(struct amdgpu_device *adev);
static void uvd_v5_0_set_irq_funcs(struct amdgpu_device *adev);
@@ -104,7 +105,7 @@ static int uvd_v5_0_sw_init(void *handle)
int r;
/* UVD TRAP */
- r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 124, &adev->uvd.inst->irq);
+ r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, VISLANDS30_IV_SRCID_UVD_SYSTEM_MESSAGE, &adev->uvd.inst->irq);
if (r)
return r;
@@ -119,6 +120,10 @@ static int uvd_v5_0_sw_init(void *handle)
ring = &adev->uvd.inst->ring;
sprintf(ring->name, "uvd");
r = amdgpu_ring_init(adev, ring, 512, &adev->uvd.inst->irq, 0);
+ if (r)
+ return r;
+
+ r = amdgpu_uvd_entity_init(adev);
return r;
}
diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c b/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c
index bfddf97dd13e..7a5b40275e8e 100644
--- a/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c
@@ -36,6 +36,7 @@
#include "bif/bif_5_1_d.h"
#include "gmc/gmc_8_1_d.h"
#include "vi.h"
+#include "ivsrcid/ivsrcid_vislands30.h"
/* Polaris10/11/12 firmware version */
#define FW_1_130_16 ((1 << 24) | (130 << 16) | (16 << 8))
@@ -247,12 +248,10 @@ static int uvd_v6_0_enc_get_create_msg(struct amdgpu_ring *ring, uint32_t handle
for (i = ib->length_dw; i < ib_size_dw; ++i)
ib->ptr[i] = 0x0;
- r = amdgpu_ib_schedule(ring, 1, ib, NULL, &f);
- job->fence = dma_fence_get(f);
+ r = amdgpu_job_submit_direct(job, ring, &f);
if (r)
goto err;
- amdgpu_job_free(job);
if (fence)
*fence = dma_fence_get(f);
dma_fence_put(f);
@@ -275,7 +274,7 @@ err:
*/
static int uvd_v6_0_enc_get_destroy_msg(struct amdgpu_ring *ring,
uint32_t handle,
- bool direct, struct dma_fence **fence)
+ struct dma_fence **fence)
{
const unsigned ib_size_dw = 16;
struct amdgpu_job *job;
@@ -311,19 +310,9 @@ static int uvd_v6_0_enc_get_destroy_msg(struct amdgpu_ring *ring,
for (i = ib->length_dw; i < ib_size_dw; ++i)
ib->ptr[i] = 0x0;
- if (direct) {
- r = amdgpu_ib_schedule(ring, 1, ib, NULL, &f);
- job->fence = dma_fence_get(f);
- if (r)
- goto err;
-
- amdgpu_job_free(job);
- } else {
- r = amdgpu_job_submit(job, ring, &ring->adev->vce.entity,
- AMDGPU_FENCE_OWNER_UNDEFINED, &f);
- if (r)
- goto err;
- }
+ r = amdgpu_job_submit_direct(job, ring, &f);
+ if (r)
+ goto err;
if (fence)
*fence = dma_fence_get(f);
@@ -352,7 +341,7 @@ static int uvd_v6_0_enc_ring_test_ib(struct amdgpu_ring *ring, long timeout)
goto error;
}
- r = uvd_v6_0_enc_get_destroy_msg(ring, 1, true, &fence);
+ r = uvd_v6_0_enc_get_destroy_msg(ring, 1, &fence);
if (r) {
DRM_ERROR("amdgpu: failed to get destroy ib (%ld).\n", r);
goto error;
@@ -400,14 +389,14 @@ static int uvd_v6_0_sw_init(void *handle)
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
/* UVD TRAP */
- r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 124, &adev->uvd.inst->irq);
+ r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, VISLANDS30_IV_SRCID_UVD_SYSTEM_MESSAGE, &adev->uvd.inst->irq);
if (r)
return r;
/* UVD ENC TRAP */
if (uvd_v6_0_enc_support(adev)) {
for (i = 0; i < adev->uvd.num_enc_rings; ++i) {
- r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, i + 119, &adev->uvd.inst->irq);
+ r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, i + VISLANDS30_IV_SRCID_UVD_ENC_GEN_PURP, &adev->uvd.inst->irq);
if (r)
return r;
}
@@ -425,16 +414,6 @@ static int uvd_v6_0_sw_init(void *handle)
adev->uvd.num_enc_rings = 0;
DRM_INFO("UVD ENC is disabled\n");
- } else {
- struct drm_sched_rq *rq;
- ring = &adev->uvd.inst->ring_enc[0];
- rq = &ring->sched.sched_rq[DRM_SCHED_PRIORITY_NORMAL];
- r = drm_sched_entity_init(&ring->sched, &adev->uvd.inst->entity_enc,
- rq, NULL);
- if (r) {
- DRM_ERROR("Failed setting up UVD ENC run queue.\n");
- return r;
- }
}
r = amdgpu_uvd_resume(adev);
@@ -457,6 +436,8 @@ static int uvd_v6_0_sw_init(void *handle)
}
}
+ r = amdgpu_uvd_entity_init(adev);
+
return r;
}
@@ -470,8 +451,6 @@ static int uvd_v6_0_sw_fini(void *handle)
return r;
if (uvd_v6_0_enc_support(adev)) {
- drm_sched_entity_fini(&adev->uvd.inst->ring_enc[0].sched, &adev->uvd.inst->entity_enc);
-
for (i = 0; i < adev->uvd.num_enc_rings; ++i)
amdgpu_ring_fini(&adev->uvd.inst->ring_enc[i]);
}
@@ -1569,7 +1548,6 @@ static const struct amdgpu_ring_funcs uvd_v6_0_ring_phys_funcs = {
static const struct amdgpu_ring_funcs uvd_v6_0_ring_vm_funcs = {
.type = AMDGPU_RING_TYPE_UVD,
.align_mask = 0xf,
- .nop = PACKET0(mmUVD_NO_OP, 0),
.support_64bit_ptrs = false,
.get_rptr = uvd_v6_0_ring_get_rptr,
.get_wptr = uvd_v6_0_ring_get_wptr,
@@ -1587,7 +1565,7 @@ static const struct amdgpu_ring_funcs uvd_v6_0_ring_vm_funcs = {
.emit_hdp_flush = uvd_v6_0_ring_emit_hdp_flush,
.test_ring = uvd_v6_0_ring_test_ring,
.test_ib = amdgpu_uvd_ring_test_ib,
- .insert_nop = amdgpu_ring_insert_nop,
+ .insert_nop = uvd_v6_0_ring_insert_nop,
.pad_ib = amdgpu_ring_generic_pad_ib,
.begin_use = amdgpu_uvd_ring_begin_use,
.end_use = amdgpu_uvd_ring_end_use,
diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c b/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c
index 57d32f21b3a6..58b39afcfb86 100644
--- a/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c
@@ -39,6 +39,13 @@
#include "hdp/hdp_4_0_offset.h"
#include "mmhub/mmhub_1_0_offset.h"
#include "mmhub/mmhub_1_0_sh_mask.h"
+#include "ivsrcid/uvd/irqsrcs_uvd_7_0.h"
+
+#define mmUVD_PG0_CC_UVD_HARVESTING 0x00c7
+#define mmUVD_PG0_CC_UVD_HARVESTING_BASE_IDX 1
+//UVD_PG0_CC_UVD_HARVESTING
+#define UVD_PG0_CC_UVD_HARVESTING__UVD_DISABLE__SHIFT 0x1
+#define UVD_PG0_CC_UVD_HARVESTING__UVD_DISABLE_MASK 0x00000002L
#define UVD7_MAX_HW_INSTANCES_VEGA20 2
@@ -249,12 +256,10 @@ static int uvd_v7_0_enc_get_create_msg(struct amdgpu_ring *ring, uint32_t handle
for (i = ib->length_dw; i < ib_size_dw; ++i)
ib->ptr[i] = 0x0;
- r = amdgpu_ib_schedule(ring, 1, ib, NULL, &f);
- job->fence = dma_fence_get(f);
+ r = amdgpu_job_submit_direct(job, ring, &f);
if (r)
goto err;
- amdgpu_job_free(job);
if (fence)
*fence = dma_fence_get(f);
dma_fence_put(f);
@@ -275,8 +280,8 @@ err:
*
* Close up a stream for HW test or if userspace failed to do so
*/
-int uvd_v7_0_enc_get_destroy_msg(struct amdgpu_ring *ring, uint32_t handle,
- bool direct, struct dma_fence **fence)
+static int uvd_v7_0_enc_get_destroy_msg(struct amdgpu_ring *ring, uint32_t handle,
+ struct dma_fence **fence)
{
const unsigned ib_size_dw = 16;
struct amdgpu_job *job;
@@ -312,19 +317,9 @@ int uvd_v7_0_enc_get_destroy_msg(struct amdgpu_ring *ring, uint32_t handle,
for (i = ib->length_dw; i < ib_size_dw; ++i)
ib->ptr[i] = 0x0;
- if (direct) {
- r = amdgpu_ib_schedule(ring, 1, ib, NULL, &f);
- job->fence = dma_fence_get(f);
- if (r)
- goto err;
-
- amdgpu_job_free(job);
- } else {
- r = amdgpu_job_submit(job, ring, &ring->adev->vce.entity,
- AMDGPU_FENCE_OWNER_UNDEFINED, &f);
- if (r)
- goto err;
- }
+ r = amdgpu_job_submit_direct(job, ring, &f);
+ if (r)
+ goto err;
if (fence)
*fence = dma_fence_get(f);
@@ -353,7 +348,7 @@ static int uvd_v7_0_enc_ring_test_ib(struct amdgpu_ring *ring, long timeout)
goto error;
}
- r = uvd_v7_0_enc_get_destroy_msg(ring, 1, true, &fence);
+ r = uvd_v7_0_enc_get_destroy_msg(ring, 1, &fence);
if (r) {
DRM_ERROR("amdgpu: (%d)failed to get destroy ib (%ld).\n", ring->me, r);
goto error;
@@ -377,10 +372,25 @@ error:
static int uvd_v7_0_early_init(void *handle)
{
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
- if (adev->asic_type == CHIP_VEGA20)
+
+ if (adev->asic_type == CHIP_VEGA20) {
+ u32 harvest;
+ int i;
+
adev->uvd.num_uvd_inst = UVD7_MAX_HW_INSTANCES_VEGA20;
- else
+ for (i = 0; i < adev->uvd.num_uvd_inst; i++) {
+ harvest = RREG32_SOC15(UVD, i, mmUVD_PG0_CC_UVD_HARVESTING);
+ if (harvest & UVD_PG0_CC_UVD_HARVESTING__UVD_DISABLE_MASK) {
+ adev->uvd.harvest_config |= 1 << i;
+ }
+ }
+ if (adev->uvd.harvest_config == (AMDGPU_UVD_HARVEST_UVD0 |
+ AMDGPU_UVD_HARVEST_UVD1))
+ /* both instances are harvested, disable the block */
+ return -ENOENT;
+ } else {
adev->uvd.num_uvd_inst = 1;
+ }
if (amdgpu_sriov_vf(adev))
adev->uvd.num_enc_rings = 1;
@@ -396,19 +406,21 @@ static int uvd_v7_0_early_init(void *handle)
static int uvd_v7_0_sw_init(void *handle)
{
struct amdgpu_ring *ring;
- struct drm_sched_rq *rq;
+
int i, j, r;
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
for (j = 0; j < adev->uvd.num_uvd_inst; j++) {
+ if (adev->uvd.harvest_config & (1 << j))
+ continue;
/* UVD TRAP */
- r = amdgpu_irq_add_id(adev, amdgpu_ih_clientid_uvds[j], 124, &adev->uvd.inst[j].irq);
+ r = amdgpu_irq_add_id(adev, amdgpu_ih_clientid_uvds[j], UVD_7_0__SRCID__UVD_SYSTEM_MESSAGE_INTERRUPT, &adev->uvd.inst[j].irq);
if (r)
return r;
/* UVD ENC TRAP */
for (i = 0; i < adev->uvd.num_enc_rings; ++i) {
- r = amdgpu_irq_add_id(adev, amdgpu_ih_clientid_uvds[j], i + 119, &adev->uvd.inst[j].irq);
+ r = amdgpu_irq_add_id(adev, amdgpu_ih_clientid_uvds[j], i + UVD_7_0__SRCID__UVD_ENC_GEN_PURP, &adev->uvd.inst[j].irq);
if (r)
return r;
}
@@ -425,18 +437,14 @@ static int uvd_v7_0_sw_init(void *handle)
adev->firmware.ucode[AMDGPU_UCODE_ID_UVD].fw = adev->uvd.fw;
adev->firmware.fw_size +=
ALIGN(le32_to_cpu(hdr->ucode_size_bytes), PAGE_SIZE);
- DRM_INFO("PSP loading UVD firmware\n");
- }
- for (j = 0; j < adev->uvd.num_uvd_inst; j++) {
- ring = &adev->uvd.inst[j].ring_enc[0];
- rq = &ring->sched.sched_rq[DRM_SCHED_PRIORITY_NORMAL];
- r = drm_sched_entity_init(&ring->sched, &adev->uvd.inst[j].entity_enc,
- rq, NULL);
- if (r) {
- DRM_ERROR("(%d)Failed setting up UVD ENC run queue.\n", j);
- return r;
+ if (adev->uvd.num_uvd_inst == UVD7_MAX_HW_INSTANCES_VEGA20) {
+ adev->firmware.ucode[AMDGPU_UCODE_ID_UVD1].ucode_id = AMDGPU_UCODE_ID_UVD1;
+ adev->firmware.ucode[AMDGPU_UCODE_ID_UVD1].fw = adev->uvd.fw;
+ adev->firmware.fw_size +=
+ ALIGN(le32_to_cpu(hdr->ucode_size_bytes), PAGE_SIZE);
}
+ DRM_INFO("PSP loading UVD firmware\n");
}
r = amdgpu_uvd_resume(adev);
@@ -444,6 +452,8 @@ static int uvd_v7_0_sw_init(void *handle)
return r;
for (j = 0; j < adev->uvd.num_uvd_inst; j++) {
+ if (adev->uvd.harvest_config & (1 << j))
+ continue;
if (!amdgpu_sriov_vf(adev)) {
ring = &adev->uvd.inst[j].ring;
sprintf(ring->name, "uvd<%d>", j);
@@ -472,6 +482,10 @@ static int uvd_v7_0_sw_init(void *handle)
}
}
+ r = amdgpu_uvd_entity_init(adev);
+ if (r)
+ return r;
+
r = amdgpu_virt_alloc_mm_table(adev);
if (r)
return r;
@@ -491,8 +505,8 @@ static int uvd_v7_0_sw_fini(void *handle)
return r;
for (j = 0; j < adev->uvd.num_uvd_inst; ++j) {
- drm_sched_entity_fini(&adev->uvd.inst[j].ring_enc[0].sched, &adev->uvd.inst[j].entity_enc);
-
+ if (adev->uvd.harvest_config & (1 << j))
+ continue;
for (i = 0; i < adev->uvd.num_enc_rings; ++i)
amdgpu_ring_fini(&adev->uvd.inst[j].ring_enc[i]);
}
@@ -521,6 +535,8 @@ static int uvd_v7_0_hw_init(void *handle)
goto done;
for (j = 0; j < adev->uvd.num_uvd_inst; ++j) {
+ if (adev->uvd.harvest_config & (1 << j))
+ continue;
ring = &adev->uvd.inst[j].ring;
if (!amdgpu_sriov_vf(adev)) {
@@ -600,8 +616,11 @@ static int uvd_v7_0_hw_fini(void *handle)
DRM_DEBUG("For SRIOV client, shouldn't do anything.\n");
}
- for (i = 0; i < adev->uvd.num_uvd_inst; ++i)
+ for (i = 0; i < adev->uvd.num_uvd_inst; ++i) {
+ if (adev->uvd.harvest_config & (1 << i))
+ continue;
adev->uvd.inst[i].ring.ready = false;
+ }
return 0;
}
@@ -644,11 +663,18 @@ static void uvd_v7_0_mc_resume(struct amdgpu_device *adev)
int i;
for (i = 0; i < adev->uvd.num_uvd_inst; ++i) {
+ if (adev->uvd.harvest_config & (1 << i))
+ continue;
if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
WREG32_SOC15(UVD, i, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW,
- lower_32_bits(adev->firmware.ucode[AMDGPU_UCODE_ID_UVD].mc_addr));
+ i == 0 ?
+ adev->firmware.ucode[AMDGPU_UCODE_ID_UVD].tmr_mc_addr_lo:
+ adev->firmware.ucode[AMDGPU_UCODE_ID_UVD1].tmr_mc_addr_lo);
WREG32_SOC15(UVD, i, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH,
- upper_32_bits(adev->firmware.ucode[AMDGPU_UCODE_ID_UVD].mc_addr));
+ i == 0 ?
+ adev->firmware.ucode[AMDGPU_UCODE_ID_UVD].tmr_mc_addr_hi:
+ adev->firmware.ucode[AMDGPU_UCODE_ID_UVD1].tmr_mc_addr_hi);
+ WREG32_SOC15(UVD, i, mmUVD_VCPU_CACHE_OFFSET0, 0);
offset = 0;
} else {
WREG32_SOC15(UVD, i, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW,
@@ -656,10 +682,10 @@ static void uvd_v7_0_mc_resume(struct amdgpu_device *adev)
WREG32_SOC15(UVD, i, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH,
upper_32_bits(adev->uvd.inst[i].gpu_addr));
offset = size;
+ WREG32_SOC15(UVD, i, mmUVD_VCPU_CACHE_OFFSET0,
+ AMDGPU_UVD_FIRMWARE_OFFSET >> 3);
}
- WREG32_SOC15(UVD, i, mmUVD_VCPU_CACHE_OFFSET0,
- AMDGPU_UVD_FIRMWARE_OFFSET >> 3);
WREG32_SOC15(UVD, i, mmUVD_VCPU_CACHE_SIZE0, size);
WREG32_SOC15(UVD, i, mmUVD_LMI_VCPU_CACHE1_64BIT_BAR_LOW,
@@ -716,6 +742,8 @@ static int uvd_v7_0_mmsch_start(struct amdgpu_device *adev,
WREG32_SOC15(VCE, 0, mmVCE_MMSCH_VF_MAILBOX_RESP, 0);
for (i = 0; i < adev->uvd.num_uvd_inst; ++i) {
+ if (adev->uvd.harvest_config & (1 << i))
+ continue;
WDOORBELL32(adev->uvd.inst[i].ring_enc[0].doorbell_index, 0);
adev->wb.wb[adev->uvd.inst[i].ring_enc[0].wptr_offs] = 0;
adev->uvd.inst[i].ring_enc[0].wptr = 0;
@@ -772,6 +800,8 @@ static int uvd_v7_0_sriov_start(struct amdgpu_device *adev)
init_table += header->uvd_table_offset;
for (i = 0; i < adev->uvd.num_uvd_inst; ++i) {
+ if (adev->uvd.harvest_config & (1 << i))
+ continue;
ring = &adev->uvd.inst[i].ring;
ring->wptr = 0;
size = AMDGPU_GPU_PAGE_ALIGN(adev->uvd.fw->size + 4);
@@ -911,6 +941,8 @@ static int uvd_v7_0_start(struct amdgpu_device *adev)
int i, j, k, r;
for (k = 0; k < adev->uvd.num_uvd_inst; ++k) {
+ if (adev->uvd.harvest_config & (1 << k))
+ continue;
/* disable DPG */
WREG32_P(SOC15_REG_OFFSET(UVD, k, mmUVD_POWER_STATUS), 0,
~UVD_POWER_STATUS__UVD_PG_MODE_MASK);
@@ -923,6 +955,8 @@ static int uvd_v7_0_start(struct amdgpu_device *adev)
uvd_v7_0_mc_resume(adev);
for (k = 0; k < adev->uvd.num_uvd_inst; ++k) {
+ if (adev->uvd.harvest_config & (1 << k))
+ continue;
ring = &adev->uvd.inst[k].ring;
/* disable clock gating */
WREG32_P(SOC15_REG_OFFSET(UVD, k, mmUVD_CGC_CTRL), 0,
@@ -1090,6 +1124,8 @@ static void uvd_v7_0_stop(struct amdgpu_device *adev)
uint8_t i = 0;
for (i = 0; i < adev->uvd.num_uvd_inst; ++i) {
+ if (adev->uvd.harvest_config & (1 << i))
+ continue;
/* force RBC into idle state */
WREG32_SOC15(UVD, i, mmUVD_RBC_RB_CNTL, 0x11010101);
@@ -1227,6 +1263,35 @@ static int uvd_v7_0_ring_test_ring(struct amdgpu_ring *ring)
}
/**
+ * uvd_v7_0_ring_patch_cs_in_place - Patch the IB for command submission.
+ *
+ * @p: the CS parser with the IBs
+ * @ib_idx: which IB to patch
+ *
+ */
+static int uvd_v7_0_ring_patch_cs_in_place(struct amdgpu_cs_parser *p,
+ uint32_t ib_idx)
+{
+ struct amdgpu_ring *ring = to_amdgpu_ring(p->entity->rq->sched);
+ struct amdgpu_ib *ib = &p->job->ibs[ib_idx];
+ unsigned i;
+
+ /* No patching necessary for the first instance */
+ if (!ring->me)
+ return 0;
+
+ for (i = 0; i < ib->length_dw; i += 2) {
+ uint32_t reg = amdgpu_get_ib_value(p, ib_idx, i);
+
+ reg -= p->adev->reg_offset[UVD_HWIP][0][1];
+ reg += p->adev->reg_offset[UVD_HWIP][1][1];
+
+ amdgpu_set_ib_value(p, ib_idx, i, reg);
+ }
+ return 0;
+}
+
+/**
* uvd_v7_0_ring_emit_ib - execute indirect buffer
*
* @ring: amdgpu_ring pointer
@@ -1718,6 +1783,7 @@ static const struct amdgpu_ring_funcs uvd_v7_0_ring_vm_funcs = {
.get_rptr = uvd_v7_0_ring_get_rptr,
.get_wptr = uvd_v7_0_ring_get_wptr,
.set_wptr = uvd_v7_0_ring_set_wptr,
+ .patch_cs_in_place = uvd_v7_0_ring_patch_cs_in_place,
.emit_frame_size =
6 + /* hdp invalidate */
SOC15_FLUSH_GPU_TLB_NUM_WREG * 6 +
@@ -1777,6 +1843,8 @@ static void uvd_v7_0_set_ring_funcs(struct amdgpu_device *adev)
int i;
for (i = 0; i < adev->uvd.num_uvd_inst; i++) {
+ if (adev->uvd.harvest_config & (1 << i))
+ continue;
adev->uvd.inst[i].ring.funcs = &uvd_v7_0_ring_vm_funcs;
adev->uvd.inst[i].ring.me = i;
DRM_INFO("UVD(%d) is enabled in VM mode\n", i);
@@ -1788,6 +1856,8 @@ static void uvd_v7_0_set_enc_ring_funcs(struct amdgpu_device *adev)
int i, j;
for (j = 0; j < adev->uvd.num_uvd_inst; j++) {
+ if (adev->uvd.harvest_config & (1 << j))
+ continue;
for (i = 0; i < adev->uvd.num_enc_rings; ++i) {
adev->uvd.inst[j].ring_enc[i].funcs = &uvd_v7_0_enc_ring_vm_funcs;
adev->uvd.inst[j].ring_enc[i].me = j;
@@ -1807,6 +1877,8 @@ static void uvd_v7_0_set_irq_funcs(struct amdgpu_device *adev)
int i;
for (i = 0; i < adev->uvd.num_uvd_inst; i++) {
+ if (adev->uvd.harvest_config & (1 << i))
+ continue;
adev->uvd.inst[i].irq.num_types = adev->uvd.num_enc_rings + 1;
adev->uvd.inst[i].irq.funcs = &uvd_v7_0_irq_funcs;
}
diff --git a/drivers/gpu/drm/amd/amdgpu/vce_v2_0.c b/drivers/gpu/drm/amd/amdgpu/vce_v2_0.c
index 47f70827195b..ea28828360d3 100644
--- a/drivers/gpu/drm/amd/amdgpu/vce_v2_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/vce_v2_0.c
@@ -56,7 +56,7 @@ static uint64_t vce_v2_0_ring_get_rptr(struct amdgpu_ring *ring)
{
struct amdgpu_device *adev = ring->adev;
- if (ring == &adev->vce.ring[0])
+ if (ring->me == 0)
return RREG32(mmVCE_RB_RPTR);
else
return RREG32(mmVCE_RB_RPTR2);
@@ -73,7 +73,7 @@ static uint64_t vce_v2_0_ring_get_wptr(struct amdgpu_ring *ring)
{
struct amdgpu_device *adev = ring->adev;
- if (ring == &adev->vce.ring[0])
+ if (ring->me == 0)
return RREG32(mmVCE_RB_WPTR);
else
return RREG32(mmVCE_RB_WPTR2);
@@ -90,7 +90,7 @@ static void vce_v2_0_ring_set_wptr(struct amdgpu_ring *ring)
{
struct amdgpu_device *adev = ring->adev;
- if (ring == &adev->vce.ring[0])
+ if (ring->me == 0)
WREG32(mmVCE_RB_WPTR, lower_32_bits(ring->wptr));
else
WREG32(mmVCE_RB_WPTR2, lower_32_bits(ring->wptr));
@@ -417,7 +417,7 @@ static int vce_v2_0_sw_init(void *handle)
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
/* VCE */
- r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 167, &adev->vce.irq);
+ r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, 167, &adev->vce.irq);
if (r)
return r;
@@ -439,6 +439,8 @@ static int vce_v2_0_sw_init(void *handle)
return r;
}
+ r = amdgpu_vce_entity_init(adev);
+
return r;
}
@@ -627,8 +629,10 @@ static void vce_v2_0_set_ring_funcs(struct amdgpu_device *adev)
{
int i;
- for (i = 0; i < adev->vce.num_rings; i++)
+ for (i = 0; i < adev->vce.num_rings; i++) {
adev->vce.ring[i].funcs = &vce_v2_0_ring_funcs;
+ adev->vce.ring[i].me = i;
+ }
}
static const struct amdgpu_irq_src_funcs vce_v2_0_irq_funcs = {
diff --git a/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c b/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c
index a71b97519cc0..6dbd39730070 100644
--- a/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c
@@ -39,6 +39,7 @@
#include "smu/smu_7_1_2_sh_mask.h"
#include "gca/gfx_8_0_d.h"
#include "gca/gfx_8_0_sh_mask.h"
+#include "ivsrcid/ivsrcid_vislands30.h"
#define GRBM_GFX_INDEX__VCE_INSTANCE__SHIFT 0x04
@@ -86,9 +87,9 @@ static uint64_t vce_v3_0_ring_get_rptr(struct amdgpu_ring *ring)
else if (adev->vce.harvest_config == AMDGPU_VCE_HARVEST_VCE0)
WREG32(mmGRBM_GFX_INDEX, GET_VCE_INSTANCE(1));
- if (ring == &adev->vce.ring[0])
+ if (ring->me == 0)
v = RREG32(mmVCE_RB_RPTR);
- else if (ring == &adev->vce.ring[1])
+ else if (ring->me == 1)
v = RREG32(mmVCE_RB_RPTR2);
else
v = RREG32(mmVCE_RB_RPTR3);
@@ -118,9 +119,9 @@ static uint64_t vce_v3_0_ring_get_wptr(struct amdgpu_ring *ring)
else if (adev->vce.harvest_config == AMDGPU_VCE_HARVEST_VCE0)
WREG32(mmGRBM_GFX_INDEX, GET_VCE_INSTANCE(1));
- if (ring == &adev->vce.ring[0])
+ if (ring->me == 0)
v = RREG32(mmVCE_RB_WPTR);
- else if (ring == &adev->vce.ring[1])
+ else if (ring->me == 1)
v = RREG32(mmVCE_RB_WPTR2);
else
v = RREG32(mmVCE_RB_WPTR3);
@@ -149,9 +150,9 @@ static void vce_v3_0_ring_set_wptr(struct amdgpu_ring *ring)
else if (adev->vce.harvest_config == AMDGPU_VCE_HARVEST_VCE0)
WREG32(mmGRBM_GFX_INDEX, GET_VCE_INSTANCE(1));
- if (ring == &adev->vce.ring[0])
+ if (ring->me == 0)
WREG32(mmVCE_RB_WPTR, lower_32_bits(ring->wptr));
- else if (ring == &adev->vce.ring[1])
+ else if (ring->me == 1)
WREG32(mmVCE_RB_WPTR2, lower_32_bits(ring->wptr));
else
WREG32(mmVCE_RB_WPTR3, lower_32_bits(ring->wptr));
@@ -422,7 +423,7 @@ static int vce_v3_0_sw_init(void *handle)
int r, i;
/* VCE */
- r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 167, &adev->vce.irq);
+ r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, VISLANDS30_IV_SRCID_VCE_TRAP, &adev->vce.irq);
if (r)
return r;
@@ -447,6 +448,8 @@ static int vce_v3_0_sw_init(void *handle)
return r;
}
+ r = amdgpu_vce_entity_init(adev);
+
return r;
}
@@ -942,12 +945,16 @@ static void vce_v3_0_set_ring_funcs(struct amdgpu_device *adev)
int i;
if (adev->asic_type >= CHIP_STONEY) {
- for (i = 0; i < adev->vce.num_rings; i++)
+ for (i = 0; i < adev->vce.num_rings; i++) {
adev->vce.ring[i].funcs = &vce_v3_0_ring_vm_funcs;
+ adev->vce.ring[i].me = i;
+ }
DRM_INFO("VCE enabled in VM mode\n");
} else {
- for (i = 0; i < adev->vce.num_rings; i++)
+ for (i = 0; i < adev->vce.num_rings; i++) {
adev->vce.ring[i].funcs = &vce_v3_0_ring_phys_funcs;
+ adev->vce.ring[i].me = i;
+ }
DRM_INFO("VCE enabled in physical mode\n");
}
}
diff --git a/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c b/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c
index 8fd1b742985a..1c9471890bf7 100644
--- a/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c
@@ -39,6 +39,8 @@
#include "mmhub/mmhub_1_0_offset.h"
#include "mmhub/mmhub_1_0_sh_mask.h"
+#include "ivsrcid/vce/irqsrcs_vce_4_0.h"
+
#define VCE_STATUS_VCPU_REPORT_FW_LOADED_MASK 0x02
#define VCE_V4_0_FW_SIZE (384 * 1024)
@@ -60,9 +62,9 @@ static uint64_t vce_v4_0_ring_get_rptr(struct amdgpu_ring *ring)
{
struct amdgpu_device *adev = ring->adev;
- if (ring == &adev->vce.ring[0])
+ if (ring->me == 0)
return RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_RPTR));
- else if (ring == &adev->vce.ring[1])
+ else if (ring->me == 1)
return RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_RPTR2));
else
return RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_RPTR3));
@@ -82,9 +84,9 @@ static uint64_t vce_v4_0_ring_get_wptr(struct amdgpu_ring *ring)
if (ring->use_doorbell)
return adev->wb.wb[ring->wptr_offs];
- if (ring == &adev->vce.ring[0])
+ if (ring->me == 0)
return RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR));
- else if (ring == &adev->vce.ring[1])
+ else if (ring->me == 1)
return RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR2));
else
return RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR3));
@@ -108,10 +110,10 @@ static void vce_v4_0_ring_set_wptr(struct amdgpu_ring *ring)
return;
}
- if (ring == &adev->vce.ring[0])
+ if (ring->me == 0)
WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR),
lower_32_bits(ring->wptr));
- else if (ring == &adev->vce.ring[1])
+ else if (ring->me == 1)
WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR2),
lower_32_bits(ring->wptr));
else
@@ -417,6 +419,7 @@ static int vce_v4_0_sw_init(void *handle)
{
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
struct amdgpu_ring *ring;
+
unsigned size;
int r, i;
@@ -436,7 +439,7 @@ static int vce_v4_0_sw_init(void *handle)
const struct common_firmware_header *hdr;
unsigned size = amdgpu_bo_size(adev->vce.vcpu_bo);
- adev->vce.saved_bo = kmalloc(size, GFP_KERNEL);
+ adev->vce.saved_bo = kvmalloc(size, GFP_KERNEL);
if (!adev->vce.saved_bo)
return -ENOMEM;
@@ -472,6 +475,11 @@ static int vce_v4_0_sw_init(void *handle)
return r;
}
+
+ r = amdgpu_vce_entity_init(adev);
+ if (r)
+ return r;
+
r = amdgpu_virt_alloc_mm_table(adev);
if (r)
return r;
@@ -488,7 +496,7 @@ static int vce_v4_0_sw_fini(void *handle)
amdgpu_virt_free_mm_table(adev);
if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
- kfree(adev->vce.saved_bo);
+ kvfree(adev->vce.saved_bo);
adev->vce.saved_bo = NULL;
}
@@ -593,6 +601,7 @@ static int vce_v4_0_resume(void *handle)
static void vce_v4_0_mc_resume(struct amdgpu_device *adev)
{
uint32_t offset, size;
+ uint64_t tmr_mc_addr;
WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_CLOCK_GATING_A), 0, ~(1 << 16));
WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING), 0x1FF000, ~0xFF9FF000);
@@ -605,21 +614,25 @@ static void vce_v4_0_mc_resume(struct amdgpu_device *adev)
WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_SWAP_CNTL1), 0);
WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VM_CTRL), 0);
+ offset = AMDGPU_VCE_FIRMWARE_OFFSET;
+
if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
+ tmr_mc_addr = (uint64_t)(adev->firmware.ucode[AMDGPU_UCODE_ID_VCE].tmr_mc_addr_hi) << 32 |
+ adev->firmware.ucode[AMDGPU_UCODE_ID_VCE].tmr_mc_addr_lo;
WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_40BIT_BAR0),
- (adev->firmware.ucode[AMDGPU_UCODE_ID_VCE].mc_addr >> 8));
+ (tmr_mc_addr >> 8));
WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_64BIT_BAR0),
- (adev->firmware.ucode[AMDGPU_UCODE_ID_VCE].mc_addr >> 40) & 0xff);
+ (tmr_mc_addr >> 40) & 0xff);
+ WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_OFFSET0), 0);
} else {
WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_40BIT_BAR0),
(adev->vce.gpu_addr >> 8));
WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_64BIT_BAR0),
(adev->vce.gpu_addr >> 40) & 0xff);
+ WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_OFFSET0), offset & ~0x0f000000);
}
- offset = AMDGPU_VCE_FIRMWARE_OFFSET;
size = VCE_V4_0_FW_SIZE;
- WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_OFFSET0), offset & ~0x0f000000);
WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_SIZE0), size);
WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_40BIT_BAR1), (adev->vce.gpu_addr >> 8));
@@ -1088,8 +1101,10 @@ static void vce_v4_0_set_ring_funcs(struct amdgpu_device *adev)
{
int i;
- for (i = 0; i < adev->vce.num_rings; i++)
+ for (i = 0; i < adev->vce.num_rings; i++) {
adev->vce.ring[i].funcs = &vce_v4_0_ring_vm_funcs;
+ adev->vce.ring[i].me = i;
+ }
DRM_INFO("VCE enabled in VM mode\n");
}
diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c b/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c
index 29684c3ea4ef..eae90922fdbe 100644
--- a/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c
@@ -35,10 +35,19 @@
#include "mmhub/mmhub_9_1_offset.h"
#include "mmhub/mmhub_9_1_sh_mask.h"
+#include "ivsrcid/vcn/irqsrcs_vcn_1_0.h"
+
+#define mmUVD_RBC_XX_IB_REG_CHECK 0x05ab
+#define mmUVD_RBC_XX_IB_REG_CHECK_BASE_IDX 1
+#define mmUVD_REG_XX_MASK 0x05ac
+#define mmUVD_REG_XX_MASK_BASE_IDX 1
+
static int vcn_v1_0_stop(struct amdgpu_device *adev);
static void vcn_v1_0_set_dec_ring_funcs(struct amdgpu_device *adev);
static void vcn_v1_0_set_enc_ring_funcs(struct amdgpu_device *adev);
+static void vcn_v1_0_set_jpeg_ring_funcs(struct amdgpu_device *adev);
static void vcn_v1_0_set_irq_funcs(struct amdgpu_device *adev);
+static void vcn_v1_0_jpeg_ring_set_patch_ring(struct amdgpu_ring *ring, uint32_t ptr);
/**
* vcn_v1_0_early_init - set function pointers
@@ -55,6 +64,7 @@ static int vcn_v1_0_early_init(void *handle)
vcn_v1_0_set_dec_ring_funcs(adev);
vcn_v1_0_set_enc_ring_funcs(adev);
+ vcn_v1_0_set_jpeg_ring_funcs(adev);
vcn_v1_0_set_irq_funcs(adev);
return 0;
@@ -74,22 +84,37 @@ static int vcn_v1_0_sw_init(void *handle)
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
/* VCN DEC TRAP */
- r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_VCN, 124, &adev->vcn.irq);
+ r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_VCN, VCN_1_0__SRCID__UVD_SYSTEM_MESSAGE_INTERRUPT, &adev->vcn.irq);
if (r)
return r;
/* VCN ENC TRAP */
for (i = 0; i < adev->vcn.num_enc_rings; ++i) {
- r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_VCN, i + 119,
+ r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_VCN, i + VCN_1_0__SRCID__UVD_ENC_GENERAL_PURPOSE,
&adev->vcn.irq);
if (r)
return r;
}
+ /* VCN JPEG TRAP */
+ r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_VCN, 126, &adev->vcn.irq);
+ if (r)
+ return r;
+
r = amdgpu_vcn_sw_init(adev);
if (r)
return r;
+ if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
+ const struct common_firmware_header *hdr;
+ hdr = (const struct common_firmware_header *)adev->vcn.fw->data;
+ adev->firmware.ucode[AMDGPU_UCODE_ID_VCN].ucode_id = AMDGPU_UCODE_ID_VCN;
+ adev->firmware.ucode[AMDGPU_UCODE_ID_VCN].fw = adev->vcn.fw;
+ adev->firmware.fw_size +=
+ ALIGN(le32_to_cpu(hdr->ucode_size_bytes), PAGE_SIZE);
+ DRM_INFO("PSP loading VCN firmware\n");
+ }
+
r = amdgpu_vcn_resume(adev);
if (r)
return r;
@@ -108,6 +133,12 @@ static int vcn_v1_0_sw_init(void *handle)
return r;
}
+ ring = &adev->vcn.ring_jpeg;
+ sprintf(ring->name, "vcn_jpeg");
+ r = amdgpu_ring_init(adev, ring, 512, &adev->vcn.irq, 0);
+ if (r)
+ return r;
+
return r;
}
@@ -162,9 +193,18 @@ static int vcn_v1_0_hw_init(void *handle)
}
}
+ ring = &adev->vcn.ring_jpeg;
+ ring->ready = true;
+ r = amdgpu_ring_test_ring(ring);
+ if (r) {
+ ring->ready = false;
+ goto done;
+ }
+
done:
if (!r)
- DRM_INFO("VCN decode and encode initialized successfully.\n");
+ DRM_INFO("VCN decode and encode initialized successfully(under %s).\n",
+ (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG)?"DPG Mode":"SPG Mode");
return r;
}
@@ -232,38 +272,52 @@ static int vcn_v1_0_resume(void *handle)
}
/**
- * vcn_v1_0_mc_resume - memory controller programming
+ * vcn_v1_0_mc_resume_spg_mode - memory controller programming
*
* @adev: amdgpu_device pointer
*
* Let the VCN memory controller know it's offsets
*/
-static void vcn_v1_0_mc_resume(struct amdgpu_device *adev)
+static void vcn_v1_0_mc_resume_spg_mode(struct amdgpu_device *adev)
{
uint32_t size = AMDGPU_GPU_PAGE_ALIGN(adev->vcn.fw->size + 4);
-
- WREG32_SOC15(UVD, 0, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW,
+ uint32_t offset;
+
+ /* cache window 0: fw */
+ if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
+ WREG32_SOC15(UVD, 0, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW,
+ (adev->firmware.ucode[AMDGPU_UCODE_ID_VCN].tmr_mc_addr_lo));
+ WREG32_SOC15(UVD, 0, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH,
+ (adev->firmware.ucode[AMDGPU_UCODE_ID_VCN].tmr_mc_addr_hi));
+ WREG32_SOC15(UVD, 0, mmUVD_VCPU_CACHE_OFFSET0, 0);
+ offset = 0;
+ } else {
+ WREG32_SOC15(UVD, 0, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW,
lower_32_bits(adev->vcn.gpu_addr));
- WREG32_SOC15(UVD, 0, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH,
+ WREG32_SOC15(UVD, 0, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH,
upper_32_bits(adev->vcn.gpu_addr));
- WREG32_SOC15(UVD, 0, mmUVD_VCPU_CACHE_OFFSET0,
- AMDGPU_UVD_FIRMWARE_OFFSET >> 3);
+ offset = size;
+ WREG32_SOC15(UVD, 0, mmUVD_VCPU_CACHE_OFFSET0,
+ AMDGPU_UVD_FIRMWARE_OFFSET >> 3);
+ }
+
WREG32_SOC15(UVD, 0, mmUVD_VCPU_CACHE_SIZE0, size);
+ /* cache window 1: stack */
WREG32_SOC15(UVD, 0, mmUVD_LMI_VCPU_CACHE1_64BIT_BAR_LOW,
- lower_32_bits(adev->vcn.gpu_addr + size));
+ lower_32_bits(adev->vcn.gpu_addr + offset));
WREG32_SOC15(UVD, 0, mmUVD_LMI_VCPU_CACHE1_64BIT_BAR_HIGH,
- upper_32_bits(adev->vcn.gpu_addr + size));
+ upper_32_bits(adev->vcn.gpu_addr + offset));
WREG32_SOC15(UVD, 0, mmUVD_VCPU_CACHE_OFFSET1, 0);
- WREG32_SOC15(UVD, 0, mmUVD_VCPU_CACHE_SIZE1, AMDGPU_VCN_HEAP_SIZE);
+ WREG32_SOC15(UVD, 0, mmUVD_VCPU_CACHE_SIZE1, AMDGPU_VCN_STACK_SIZE);
+ /* cache window 2: context */
WREG32_SOC15(UVD, 0, mmUVD_LMI_VCPU_CACHE2_64BIT_BAR_LOW,
- lower_32_bits(adev->vcn.gpu_addr + size + AMDGPU_VCN_HEAP_SIZE));
+ lower_32_bits(adev->vcn.gpu_addr + offset + AMDGPU_VCN_STACK_SIZE));
WREG32_SOC15(UVD, 0, mmUVD_LMI_VCPU_CACHE2_64BIT_BAR_HIGH,
- upper_32_bits(adev->vcn.gpu_addr + size + AMDGPU_VCN_HEAP_SIZE));
+ upper_32_bits(adev->vcn.gpu_addr + offset + AMDGPU_VCN_STACK_SIZE));
WREG32_SOC15(UVD, 0, mmUVD_VCPU_CACHE_OFFSET2, 0);
- WREG32_SOC15(UVD, 0, mmUVD_VCPU_CACHE_SIZE2,
- AMDGPU_VCN_STACK_SIZE + (AMDGPU_VCN_SESSION_SIZE * 40));
+ WREG32_SOC15(UVD, 0, mmUVD_VCPU_CACHE_SIZE2, AMDGPU_VCN_CONTEXT_SIZE);
WREG32_SOC15(UVD, 0, mmUVD_UDEC_ADDR_CONFIG,
adev->gfx.config.gb_addr_config);
@@ -271,6 +325,96 @@ static void vcn_v1_0_mc_resume(struct amdgpu_device *adev)
adev->gfx.config.gb_addr_config);
WREG32_SOC15(UVD, 0, mmUVD_UDEC_DBW_ADDR_CONFIG,
adev->gfx.config.gb_addr_config);
+ WREG32_SOC15(UVD, 0, mmUVD_UDEC_DBW_UV_ADDR_CONFIG,
+ adev->gfx.config.gb_addr_config);
+ WREG32_SOC15(UVD, 0, mmUVD_MIF_CURR_ADDR_CONFIG,
+ adev->gfx.config.gb_addr_config);
+ WREG32_SOC15(UVD, 0, mmUVD_MIF_CURR_UV_ADDR_CONFIG,
+ adev->gfx.config.gb_addr_config);
+ WREG32_SOC15(UVD, 0, mmUVD_MIF_RECON1_ADDR_CONFIG,
+ adev->gfx.config.gb_addr_config);
+ WREG32_SOC15(UVD, 0, mmUVD_MIF_RECON1_UV_ADDR_CONFIG,
+ adev->gfx.config.gb_addr_config);
+ WREG32_SOC15(UVD, 0, mmUVD_MIF_REF_ADDR_CONFIG,
+ adev->gfx.config.gb_addr_config);
+ WREG32_SOC15(UVD, 0, mmUVD_MIF_REF_UV_ADDR_CONFIG,
+ adev->gfx.config.gb_addr_config);
+ WREG32_SOC15(UVD, 0, mmUVD_JPEG_ADDR_CONFIG,
+ adev->gfx.config.gb_addr_config);
+ WREG32_SOC15(UVD, 0, mmUVD_JPEG_UV_ADDR_CONFIG,
+ adev->gfx.config.gb_addr_config);
+}
+
+static void vcn_v1_0_mc_resume_dpg_mode(struct amdgpu_device *adev)
+{
+ uint32_t size = AMDGPU_GPU_PAGE_ALIGN(adev->vcn.fw->size + 4);
+ uint32_t offset;
+
+ /* cache window 0: fw */
+ if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
+ WREG32_SOC15_DPG_MODE(UVD, 0, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW,
+ (adev->firmware.ucode[AMDGPU_UCODE_ID_VCN].tmr_mc_addr_lo),
+ 0xFFFFFFFF, 0);
+ WREG32_SOC15_DPG_MODE(UVD, 0, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH,
+ (adev->firmware.ucode[AMDGPU_UCODE_ID_VCN].tmr_mc_addr_hi),
+ 0xFFFFFFFF, 0);
+ WREG32_SOC15_DPG_MODE(UVD, 0, mmUVD_VCPU_CACHE_OFFSET0, 0,
+ 0xFFFFFFFF, 0);
+ offset = 0;
+ } else {
+ WREG32_SOC15_DPG_MODE(UVD, 0, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW,
+ lower_32_bits(adev->vcn.gpu_addr), 0xFFFFFFFF, 0);
+ WREG32_SOC15_DPG_MODE(UVD, 0, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH,
+ upper_32_bits(adev->vcn.gpu_addr), 0xFFFFFFFF, 0);
+ offset = size;
+ WREG32_SOC15_DPG_MODE(UVD, 0, mmUVD_VCPU_CACHE_OFFSET0,
+ AMDGPU_UVD_FIRMWARE_OFFSET >> 3, 0xFFFFFFFF, 0);
+ }
+
+ WREG32_SOC15_DPG_MODE(UVD, 0, mmUVD_VCPU_CACHE_SIZE0, size, 0xFFFFFFFF, 0);
+
+ /* cache window 1: stack */
+ WREG32_SOC15_DPG_MODE(UVD, 0, mmUVD_LMI_VCPU_CACHE1_64BIT_BAR_LOW,
+ lower_32_bits(adev->vcn.gpu_addr + offset), 0xFFFFFFFF, 0);
+ WREG32_SOC15_DPG_MODE(UVD, 0, mmUVD_LMI_VCPU_CACHE1_64BIT_BAR_HIGH,
+ upper_32_bits(adev->vcn.gpu_addr + offset), 0xFFFFFFFF, 0);
+ WREG32_SOC15_DPG_MODE(UVD, 0, mmUVD_VCPU_CACHE_OFFSET1, 0,
+ 0xFFFFFFFF, 0);
+ WREG32_SOC15_DPG_MODE(UVD, 0, mmUVD_VCPU_CACHE_SIZE1, AMDGPU_VCN_STACK_SIZE,
+ 0xFFFFFFFF, 0);
+
+ /* cache window 2: context */
+ WREG32_SOC15_DPG_MODE(UVD, 0, mmUVD_LMI_VCPU_CACHE2_64BIT_BAR_LOW,
+ lower_32_bits(adev->vcn.gpu_addr + offset + AMDGPU_VCN_STACK_SIZE),
+ 0xFFFFFFFF, 0);
+ WREG32_SOC15_DPG_MODE(UVD, 0, mmUVD_LMI_VCPU_CACHE2_64BIT_BAR_HIGH,
+ upper_32_bits(adev->vcn.gpu_addr + offset + AMDGPU_VCN_STACK_SIZE),
+ 0xFFFFFFFF, 0);
+ WREG32_SOC15_DPG_MODE(UVD, 0, mmUVD_VCPU_CACHE_OFFSET2, 0, 0xFFFFFFFF, 0);
+ WREG32_SOC15_DPG_MODE(UVD, 0, mmUVD_VCPU_CACHE_SIZE2, AMDGPU_VCN_CONTEXT_SIZE,
+ 0xFFFFFFFF, 0);
+
+ /* VCN global tiling registers */
+ WREG32_SOC15_DPG_MODE(UVD, 0, mmUVD_UDEC_ADDR_CONFIG,
+ adev->gfx.config.gb_addr_config, 0xFFFFFFFF, 0);
+ WREG32_SOC15_DPG_MODE(UVD, 0, mmUVD_UDEC_DB_ADDR_CONFIG,
+ adev->gfx.config.gb_addr_config, 0xFFFFFFFF, 0);
+ WREG32_SOC15_DPG_MODE(UVD, 0, mmUVD_UDEC_DBW_ADDR_CONFIG,
+ adev->gfx.config.gb_addr_config, 0xFFFFFFFF, 0);
+ WREG32_SOC15_DPG_MODE(UVD, 0, mmUVD_UDEC_DBW_UV_ADDR_CONFIG,
+ adev->gfx.config.gb_addr_config, 0xFFFFFFFF, 0);
+ WREG32_SOC15_DPG_MODE(UVD, 0, mmUVD_MIF_CURR_ADDR_CONFIG,
+ adev->gfx.config.gb_addr_config, 0xFFFFFFFF, 0);
+ WREG32_SOC15_DPG_MODE(UVD, 0, mmUVD_MIF_CURR_UV_ADDR_CONFIG,
+ adev->gfx.config.gb_addr_config, 0xFFFFFFFF, 0);
+ WREG32_SOC15_DPG_MODE(UVD, 0, mmUVD_MIF_RECON1_ADDR_CONFIG,
+ adev->gfx.config.gb_addr_config, 0xFFFFFFFF, 0);
+ WREG32_SOC15_DPG_MODE(UVD, 0, mmUVD_MIF_RECON1_UV_ADDR_CONFIG,
+ adev->gfx.config.gb_addr_config, 0xFFFFFFFF, 0);
+ WREG32_SOC15_DPG_MODE(UVD, 0, mmUVD_MIF_REF_ADDR_CONFIG,
+ adev->gfx.config.gb_addr_config, 0xFFFFFFFF, 0);
+ WREG32_SOC15_DPG_MODE(UVD, 0, mmUVD_MIF_REF_UV_ADDR_CONFIG,
+ adev->gfx.config.gb_addr_config, 0xFFFFFFFF, 0);
}
/**
@@ -473,6 +617,60 @@ static void vcn_v1_0_enable_clock_gating(struct amdgpu_device *adev)
WREG32_SOC15(VCN, 0, mmUVD_SUVD_CGC_CTRL, data);
}
+static void vcn_v1_0_clock_gating_dpg_mode(struct amdgpu_device *adev, uint8_t sram_sel)
+{
+ uint32_t reg_data = 0;
+
+ /* disable JPEG CGC */
+ if (adev->cg_flags & AMD_CG_SUPPORT_VCN_MGCG)
+ reg_data = 1 << JPEG_CGC_CTRL__DYN_CLOCK_MODE__SHIFT;
+ else
+ reg_data = 0 << JPEG_CGC_CTRL__DYN_CLOCK_MODE__SHIFT;
+ reg_data |= 1 << JPEG_CGC_CTRL__CLK_GATE_DLY_TIMER__SHIFT;
+ reg_data |= 4 << JPEG_CGC_CTRL__CLK_OFF_DELAY__SHIFT;
+ WREG32_SOC15_DPG_MODE(UVD, 0, mmJPEG_CGC_CTRL, reg_data, 0xFFFFFFFF, sram_sel);
+
+ WREG32_SOC15_DPG_MODE(UVD, 0, mmJPEG_CGC_GATE, 0, 0xFFFFFFFF, sram_sel);
+
+ /* enable sw clock gating control */
+ if (adev->cg_flags & AMD_CG_SUPPORT_VCN_MGCG)
+ reg_data = 1 << UVD_CGC_CTRL__DYN_CLOCK_MODE__SHIFT;
+ else
+ reg_data = 0 << UVD_CGC_CTRL__DYN_CLOCK_MODE__SHIFT;
+ reg_data |= 1 << UVD_CGC_CTRL__CLK_GATE_DLY_TIMER__SHIFT;
+ reg_data |= 4 << UVD_CGC_CTRL__CLK_OFF_DELAY__SHIFT;
+ reg_data &= ~(UVD_CGC_CTRL__UDEC_RE_MODE_MASK |
+ UVD_CGC_CTRL__UDEC_CM_MODE_MASK |
+ UVD_CGC_CTRL__UDEC_IT_MODE_MASK |
+ UVD_CGC_CTRL__UDEC_DB_MODE_MASK |
+ UVD_CGC_CTRL__UDEC_MP_MODE_MASK |
+ UVD_CGC_CTRL__SYS_MODE_MASK |
+ UVD_CGC_CTRL__UDEC_MODE_MASK |
+ UVD_CGC_CTRL__MPEG2_MODE_MASK |
+ UVD_CGC_CTRL__REGS_MODE_MASK |
+ UVD_CGC_CTRL__RBC_MODE_MASK |
+ UVD_CGC_CTRL__LMI_MC_MODE_MASK |
+ UVD_CGC_CTRL__LMI_UMC_MODE_MASK |
+ UVD_CGC_CTRL__IDCT_MODE_MASK |
+ UVD_CGC_CTRL__MPRD_MODE_MASK |
+ UVD_CGC_CTRL__MPC_MODE_MASK |
+ UVD_CGC_CTRL__LBSI_MODE_MASK |
+ UVD_CGC_CTRL__LRBBM_MODE_MASK |
+ UVD_CGC_CTRL__WCB_MODE_MASK |
+ UVD_CGC_CTRL__VCPU_MODE_MASK |
+ UVD_CGC_CTRL__SCPU_MODE_MASK);
+ WREG32_SOC15_DPG_MODE(UVD, 0, mmUVD_CGC_CTRL, reg_data, 0xFFFFFFFF, sram_sel);
+
+ /* turn off clock gating */
+ WREG32_SOC15_DPG_MODE(UVD, 0, mmUVD_CGC_GATE, 0, 0xFFFFFFFF, sram_sel);
+
+ /* turn on SUVD clock gating */
+ WREG32_SOC15_DPG_MODE(UVD, 0, mmUVD_SUVD_CGC_GATE, 1, 0xFFFFFFFF, sram_sel);
+
+ /* turn on sw mode in UVD_SUVD_CGC_CTRL */
+ WREG32_SOC15_DPG_MODE(UVD, 0, mmUVD_SUVD_CGC_CTRL, 0, 0xFFFFFFFF, sram_sel);
+}
+
static void vcn_1_0_disable_static_power_gating(struct amdgpu_device *adev)
{
uint32_t data = 0;
@@ -568,7 +766,7 @@ static void vcn_1_0_enable_static_power_gating(struct amdgpu_device *adev)
*
* Setup and start the VCN block
*/
-static int vcn_v1_0_start(struct amdgpu_device *adev)
+static int vcn_v1_0_start_spg_mode(struct amdgpu_device *adev)
{
struct amdgpu_ring *ring = &adev->vcn.ring_dec;
uint32_t rb_bufsz, tmp;
@@ -578,9 +776,11 @@ static int vcn_v1_0_start(struct amdgpu_device *adev)
/* disable byte swapping */
lmi_swap_cntl = 0;
- vcn_v1_0_mc_resume(adev);
-
vcn_1_0_disable_static_power_gating(adev);
+
+ tmp = RREG32_SOC15(UVD, 0, mmUVD_STATUS) | UVD_STATUS__UVD_BUSY;
+ WREG32_SOC15(UVD, 0, mmUVD_STATUS, tmp);
+
/* disable clock gating */
vcn_v1_0_disable_clock_gating(adev);
@@ -588,32 +788,13 @@ static int vcn_v1_0_start(struct amdgpu_device *adev)
WREG32_P(SOC15_REG_OFFSET(UVD, 0, mmUVD_MASTINT_EN), 0,
~UVD_MASTINT_EN__VCPU_EN_MASK);
- /* stall UMC and register bus before resetting VCPU */
- WREG32_P(SOC15_REG_OFFSET(UVD, 0, mmUVD_LMI_CTRL2),
- UVD_LMI_CTRL2__STALL_ARB_UMC_MASK,
- ~UVD_LMI_CTRL2__STALL_ARB_UMC_MASK);
- mdelay(1);
-
- /* put LMI, VCPU, RBC etc... into reset */
- WREG32_SOC15(UVD, 0, mmUVD_SOFT_RESET,
- UVD_SOFT_RESET__LMI_SOFT_RESET_MASK |
- UVD_SOFT_RESET__VCPU_SOFT_RESET_MASK |
- UVD_SOFT_RESET__LBSI_SOFT_RESET_MASK |
- UVD_SOFT_RESET__RBC_SOFT_RESET_MASK |
- UVD_SOFT_RESET__CSM_SOFT_RESET_MASK |
- UVD_SOFT_RESET__CXW_SOFT_RESET_MASK |
- UVD_SOFT_RESET__TAP_SOFT_RESET_MASK |
- UVD_SOFT_RESET__LMI_UMC_SOFT_RESET_MASK);
- mdelay(5);
-
/* initialize VCN memory controller */
- WREG32_SOC15(UVD, 0, mmUVD_LMI_CTRL,
- (0x40 << UVD_LMI_CTRL__WRITE_CLEAN_TIMER__SHIFT) |
- UVD_LMI_CTRL__WRITE_CLEAN_TIMER_EN_MASK |
- UVD_LMI_CTRL__DATA_COHERENCY_EN_MASK |
- UVD_LMI_CTRL__VCPU_DATA_COHERENCY_EN_MASK |
- UVD_LMI_CTRL__REQ_MODE_MASK |
- 0x00100000L);
+ tmp = RREG32_SOC15(UVD, 0, mmUVD_LMI_CTRL);
+ WREG32_SOC15(UVD, 0, mmUVD_LMI_CTRL, tmp |
+ UVD_LMI_CTRL__WRITE_CLEAN_TIMER_EN_MASK |
+ UVD_LMI_CTRL__MASK_MC_URGENT_MASK |
+ UVD_LMI_CTRL__DATA_COHERENCY_EN_MASK |
+ UVD_LMI_CTRL__VCPU_DATA_COHERENCY_EN_MASK);
#ifdef __BIG_ENDIAN
/* swap (8 in 32) RB and IB */
@@ -621,41 +802,61 @@ static int vcn_v1_0_start(struct amdgpu_device *adev)
#endif
WREG32_SOC15(UVD, 0, mmUVD_LMI_SWAP_CNTL, lmi_swap_cntl);
- WREG32_SOC15(UVD, 0, mmUVD_MPC_SET_MUXA0, 0x40c2040);
- WREG32_SOC15(UVD, 0, mmUVD_MPC_SET_MUXA1, 0x0);
- WREG32_SOC15(UVD, 0, mmUVD_MPC_SET_MUXB0, 0x40c2040);
- WREG32_SOC15(UVD, 0, mmUVD_MPC_SET_MUXB1, 0x0);
- WREG32_SOC15(UVD, 0, mmUVD_MPC_SET_ALU, 0);
- WREG32_SOC15(UVD, 0, mmUVD_MPC_SET_MUX, 0x88);
+ tmp = RREG32_SOC15(UVD, 0, mmUVD_MPC_CNTL);
+ tmp &= ~UVD_MPC_CNTL__REPLACEMENT_MODE_MASK;
+ tmp |= 0x2 << UVD_MPC_CNTL__REPLACEMENT_MODE__SHIFT;
+ WREG32_SOC15(UVD, 0, mmUVD_MPC_CNTL, tmp);
+
+ WREG32_SOC15(UVD, 0, mmUVD_MPC_SET_MUXA0,
+ ((0x1 << UVD_MPC_SET_MUXA0__VARA_1__SHIFT) |
+ (0x2 << UVD_MPC_SET_MUXA0__VARA_2__SHIFT) |
+ (0x3 << UVD_MPC_SET_MUXA0__VARA_3__SHIFT) |
+ (0x4 << UVD_MPC_SET_MUXA0__VARA_4__SHIFT)));
+
+ WREG32_SOC15(UVD, 0, mmUVD_MPC_SET_MUXB0,
+ ((0x1 << UVD_MPC_SET_MUXB0__VARB_1__SHIFT) |
+ (0x2 << UVD_MPC_SET_MUXB0__VARB_2__SHIFT) |
+ (0x3 << UVD_MPC_SET_MUXB0__VARB_3__SHIFT) |
+ (0x4 << UVD_MPC_SET_MUXB0__VARB_4__SHIFT)));
+
+ WREG32_SOC15(UVD, 0, mmUVD_MPC_SET_MUX,
+ ((0x0 << UVD_MPC_SET_MUX__SET_0__SHIFT) |
+ (0x1 << UVD_MPC_SET_MUX__SET_1__SHIFT) |
+ (0x2 << UVD_MPC_SET_MUX__SET_2__SHIFT)));
- /* take all subblocks out of reset, except VCPU */
- WREG32_SOC15(UVD, 0, mmUVD_SOFT_RESET,
- UVD_SOFT_RESET__VCPU_SOFT_RESET_MASK);
- mdelay(5);
+ vcn_v1_0_mc_resume_spg_mode(adev);
+
+ WREG32_SOC15(UVD, 0, mmUVD_REG_XX_MASK, 0x10);
+ WREG32_SOC15(UVD, 0, mmUVD_RBC_XX_IB_REG_CHECK,
+ RREG32_SOC15(UVD, 0, mmUVD_RBC_XX_IB_REG_CHECK) | 0x3);
/* enable VCPU clock */
- WREG32_SOC15(UVD, 0, mmUVD_VCPU_CNTL,
- UVD_VCPU_CNTL__CLK_EN_MASK);
+ WREG32_SOC15(UVD, 0, mmUVD_VCPU_CNTL, UVD_VCPU_CNTL__CLK_EN_MASK);
+
+ /* boot up the VCPU */
+ WREG32_P(SOC15_REG_OFFSET(UVD, 0, mmUVD_SOFT_RESET), 0,
+ ~UVD_SOFT_RESET__VCPU_SOFT_RESET_MASK);
/* enable UMC */
WREG32_P(SOC15_REG_OFFSET(UVD, 0, mmUVD_LMI_CTRL2), 0,
~UVD_LMI_CTRL2__STALL_ARB_UMC_MASK);
- /* boot up the VCPU */
- WREG32_SOC15(UVD, 0, mmUVD_SOFT_RESET, 0);
- mdelay(10);
+ tmp = RREG32_SOC15(UVD, 0, mmUVD_SOFT_RESET);
+ tmp &= ~UVD_SOFT_RESET__LMI_SOFT_RESET_MASK;
+ tmp &= ~UVD_SOFT_RESET__LMI_UMC_SOFT_RESET_MASK;
+ WREG32_SOC15(UVD, 0, mmUVD_SOFT_RESET, tmp);
for (i = 0; i < 10; ++i) {
uint32_t status;
for (j = 0; j < 100; ++j) {
status = RREG32_SOC15(UVD, 0, mmUVD_STATUS);
- if (status & 2)
+ if (status & UVD_STATUS__IDLE)
break;
mdelay(10);
}
r = 0;
- if (status & 2)
+ if (status & UVD_STATUS__IDLE)
break;
DRM_ERROR("VCN decode not responding, trying to reset the VCPU!!!\n");
@@ -675,19 +876,22 @@ static int vcn_v1_0_start(struct amdgpu_device *adev)
}
/* enable master interrupt */
WREG32_P(SOC15_REG_OFFSET(UVD, 0, mmUVD_MASTINT_EN),
- (UVD_MASTINT_EN__VCPU_EN_MASK|UVD_MASTINT_EN__SYS_EN_MASK),
- ~(UVD_MASTINT_EN__VCPU_EN_MASK|UVD_MASTINT_EN__SYS_EN_MASK));
+ UVD_MASTINT_EN__VCPU_EN_MASK, ~UVD_MASTINT_EN__VCPU_EN_MASK);
- /* clear the bit 4 of VCN_STATUS */
- WREG32_P(SOC15_REG_OFFSET(UVD, 0, mmUVD_STATUS), 0,
- ~(2 << UVD_STATUS__VCPU_REPORT__SHIFT));
+ /* enable system interrupt for JRBC, TODO: move to set interrupt*/
+ WREG32_P(SOC15_REG_OFFSET(UVD, 0, mmUVD_SYS_INT_EN),
+ UVD_SYS_INT_EN__UVD_JRBC_EN_MASK,
+ ~UVD_SYS_INT_EN__UVD_JRBC_EN_MASK);
+
+ /* clear the busy bit of UVD_STATUS */
+ tmp = RREG32_SOC15(UVD, 0, mmUVD_STATUS) & ~UVD_STATUS__UVD_BUSY;
+ WREG32_SOC15(UVD, 0, mmUVD_STATUS, tmp);
/* force RBC into idle state */
rb_bufsz = order_base_2(ring->ring_size);
tmp = REG_SET_FIELD(0, UVD_RBC_RB_CNTL, RB_BUFSZ, rb_bufsz);
tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_BLKSZ, 1);
tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_NO_FETCH, 1);
- tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_WPTR_POLL_EN, 0);
tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_NO_UPDATE, 1);
tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_RPTR_WR_EN, 1);
WREG32_SOC15(UVD, 0, mmUVD_RBC_RB_CNTL, tmp);
@@ -708,6 +912,8 @@ static int vcn_v1_0_start(struct amdgpu_device *adev)
/* Initialize the ring buffer's read and write pointers */
WREG32_SOC15(UVD, 0, mmUVD_RBC_RB_RPTR, 0);
+ WREG32_SOC15(UVD, 0, mmUVD_SCRATCH2, 0);
+
ring->wptr = RREG32_SOC15(UVD, 0, mmUVD_RBC_RB_RPTR);
WREG32_SOC15(UVD, 0, mmUVD_RBC_RB_WPTR,
lower_32_bits(ring->wptr));
@@ -729,9 +935,186 @@ static int vcn_v1_0_start(struct amdgpu_device *adev)
WREG32_SOC15(UVD, 0, mmUVD_RB_BASE_HI2, upper_32_bits(ring->gpu_addr));
WREG32_SOC15(UVD, 0, mmUVD_RB_SIZE2, ring->ring_size / 4);
+ ring = &adev->vcn.ring_jpeg;
+ WREG32_SOC15(UVD, 0, mmUVD_LMI_JRBC_RB_VMID, 0);
+ WREG32_SOC15(UVD, 0, mmUVD_JRBC_RB_CNTL, UVD_JRBC_RB_CNTL__RB_NO_FETCH_MASK |
+ UVD_JRBC_RB_CNTL__RB_RPTR_WR_EN_MASK);
+ WREG32_SOC15(UVD, 0, mmUVD_LMI_JRBC_RB_64BIT_BAR_LOW, lower_32_bits(ring->gpu_addr));
+ WREG32_SOC15(UVD, 0, mmUVD_LMI_JRBC_RB_64BIT_BAR_HIGH, upper_32_bits(ring->gpu_addr));
+ WREG32_SOC15(UVD, 0, mmUVD_JRBC_RB_RPTR, 0);
+ WREG32_SOC15(UVD, 0, mmUVD_JRBC_RB_WPTR, 0);
+ WREG32_SOC15(UVD, 0, mmUVD_JRBC_RB_CNTL, UVD_JRBC_RB_CNTL__RB_RPTR_WR_EN_MASK);
+
+ /* initialize wptr */
+ ring->wptr = RREG32_SOC15(UVD, 0, mmUVD_JRBC_RB_WPTR);
+
+ /* copy patch commands to the jpeg ring */
+ vcn_v1_0_jpeg_ring_set_patch_ring(ring,
+ (ring->wptr + ring->max_dw * amdgpu_sched_hw_submission));
+
return 0;
}
+static int vcn_v1_0_start_dpg_mode(struct amdgpu_device *adev)
+{
+ struct amdgpu_ring *ring = &adev->vcn.ring_dec;
+ uint32_t rb_bufsz, tmp;
+ uint32_t lmi_swap_cntl;
+
+ /* disable byte swapping */
+ lmi_swap_cntl = 0;
+
+ vcn_1_0_enable_static_power_gating(adev);
+
+ /* enable dynamic power gating mode */
+ tmp = RREG32_SOC15(UVD, 0, mmUVD_POWER_STATUS);
+ tmp |= UVD_POWER_STATUS__UVD_PG_MODE_MASK;
+ tmp |= UVD_POWER_STATUS__UVD_PG_EN_MASK;
+ WREG32_SOC15(UVD, 0, mmUVD_POWER_STATUS, tmp);
+
+ /* enable clock gating */
+ vcn_v1_0_clock_gating_dpg_mode(adev, 0);
+
+ /* enable VCPU clock */
+ tmp = (0xFF << UVD_VCPU_CNTL__PRB_TIMEOUT_VAL__SHIFT);
+ tmp |= UVD_VCPU_CNTL__CLK_EN_MASK;
+ tmp |= UVD_VCPU_CNTL__MIF_WR_LOW_THRESHOLD_BP_MASK;
+ WREG32_SOC15_DPG_MODE(UVD, 0, mmUVD_VCPU_CNTL, tmp, 0xFFFFFFFF, 0);
+
+ /* disable interupt */
+ WREG32_SOC15_DPG_MODE(UVD, 0, mmUVD_MASTINT_EN,
+ 0, UVD_MASTINT_EN__VCPU_EN_MASK, 0);
+
+ /* initialize VCN memory controller */
+ WREG32_SOC15_DPG_MODE(UVD, 0, mmUVD_LMI_CTRL,
+ (8 << UVD_LMI_CTRL__WRITE_CLEAN_TIMER__SHIFT) |
+ UVD_LMI_CTRL__WRITE_CLEAN_TIMER_EN_MASK |
+ UVD_LMI_CTRL__DATA_COHERENCY_EN_MASK |
+ UVD_LMI_CTRL__VCPU_DATA_COHERENCY_EN_MASK |
+ UVD_LMI_CTRL__REQ_MODE_MASK |
+ UVD_LMI_CTRL__CRC_RESET_MASK |
+ UVD_LMI_CTRL__MASK_MC_URGENT_MASK |
+ 0x00100000L, 0xFFFFFFFF, 0);
+
+#ifdef __BIG_ENDIAN
+ /* swap (8 in 32) RB and IB */
+ lmi_swap_cntl = 0xa;
+#endif
+ WREG32_SOC15_DPG_MODE(UVD, 0, mmUVD_LMI_SWAP_CNTL, lmi_swap_cntl, 0xFFFFFFFF, 0);
+
+ WREG32_SOC15_DPG_MODE(UVD, 0, mmUVD_MPC_CNTL,
+ 0x2 << UVD_MPC_CNTL__REPLACEMENT_MODE__SHIFT, 0xFFFFFFFF, 0);
+
+ WREG32_SOC15_DPG_MODE(UVD, 0, mmUVD_MPC_SET_MUXA0,
+ ((0x1 << UVD_MPC_SET_MUXA0__VARA_1__SHIFT) |
+ (0x2 << UVD_MPC_SET_MUXA0__VARA_2__SHIFT) |
+ (0x3 << UVD_MPC_SET_MUXA0__VARA_3__SHIFT) |
+ (0x4 << UVD_MPC_SET_MUXA0__VARA_4__SHIFT)), 0xFFFFFFFF, 0);
+
+ WREG32_SOC15_DPG_MODE(UVD, 0, mmUVD_MPC_SET_MUXB0,
+ ((0x1 << UVD_MPC_SET_MUXB0__VARB_1__SHIFT) |
+ (0x2 << UVD_MPC_SET_MUXB0__VARB_2__SHIFT) |
+ (0x3 << UVD_MPC_SET_MUXB0__VARB_3__SHIFT) |
+ (0x4 << UVD_MPC_SET_MUXB0__VARB_4__SHIFT)), 0xFFFFFFFF, 0);
+
+ WREG32_SOC15_DPG_MODE(UVD, 0, mmUVD_MPC_SET_MUX,
+ ((0x0 << UVD_MPC_SET_MUX__SET_0__SHIFT) |
+ (0x1 << UVD_MPC_SET_MUX__SET_1__SHIFT) |
+ (0x2 << UVD_MPC_SET_MUX__SET_2__SHIFT)), 0xFFFFFFFF, 0);
+
+ vcn_v1_0_mc_resume_dpg_mode(adev);
+
+ WREG32_SOC15_DPG_MODE(UVD, 0, mmUVD_REG_XX_MASK, 0x10, 0xFFFFFFFF, 0);
+ WREG32_SOC15_DPG_MODE(UVD, 0, mmUVD_RBC_XX_IB_REG_CHECK, 0x3, 0xFFFFFFFF, 0);
+
+ /* boot up the VCPU */
+ WREG32_SOC15_DPG_MODE(UVD, 0, mmUVD_SOFT_RESET, 0, 0xFFFFFFFF, 0);
+
+ /* enable UMC */
+ WREG32_SOC15_DPG_MODE(UVD, 0, mmUVD_LMI_CTRL2,
+ 0x1F << UVD_LMI_CTRL2__RE_OFLD_MIF_WR_REQ_NUM__SHIFT,
+ 0xFFFFFFFF, 0);
+
+ /* enable master interrupt */
+ WREG32_SOC15_DPG_MODE(UVD, 0, mmUVD_MASTINT_EN,
+ UVD_MASTINT_EN__VCPU_EN_MASK, UVD_MASTINT_EN__VCPU_EN_MASK, 0);
+
+ vcn_v1_0_clock_gating_dpg_mode(adev, 1);
+ /* setup mmUVD_LMI_CTRL */
+ WREG32_SOC15_DPG_MODE(UVD, 0, mmUVD_LMI_CTRL,
+ (8 << UVD_LMI_CTRL__WRITE_CLEAN_TIMER__SHIFT) |
+ UVD_LMI_CTRL__WRITE_CLEAN_TIMER_EN_MASK |
+ UVD_LMI_CTRL__DATA_COHERENCY_EN_MASK |
+ UVD_LMI_CTRL__VCPU_DATA_COHERENCY_EN_MASK |
+ UVD_LMI_CTRL__REQ_MODE_MASK |
+ UVD_LMI_CTRL__CRC_RESET_MASK |
+ UVD_LMI_CTRL__MASK_MC_URGENT_MASK |
+ 0x00100000L, 0xFFFFFFFF, 1);
+
+ tmp = adev->gfx.config.gb_addr_config;
+ /* setup VCN global tiling registers */
+ WREG32_SOC15_DPG_MODE(UVD, 0, mmUVD_JPEG_ADDR_CONFIG, tmp, 0xFFFFFFFF, 1);
+ WREG32_SOC15_DPG_MODE(UVD, 0, mmUVD_JPEG_UV_ADDR_CONFIG, tmp, 0xFFFFFFFF, 1);
+
+ /* enable System Interrupt for JRBC */
+ WREG32_SOC15_DPG_MODE(UVD, 0, mmUVD_SYS_INT_EN,
+ UVD_SYS_INT_EN__UVD_JRBC_EN_MASK, 0xFFFFFFFF, 1);
+
+ /* force RBC into idle state */
+ rb_bufsz = order_base_2(ring->ring_size);
+ tmp = REG_SET_FIELD(0, UVD_RBC_RB_CNTL, RB_BUFSZ, rb_bufsz);
+ tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_BLKSZ, 1);
+ tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_NO_FETCH, 1);
+ tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_NO_UPDATE, 1);
+ tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_RPTR_WR_EN, 1);
+ WREG32_SOC15(UVD, 0, mmUVD_RBC_RB_CNTL, tmp);
+
+ /* set the write pointer delay */
+ WREG32_SOC15(UVD, 0, mmUVD_RBC_RB_WPTR_CNTL, 0);
+
+ /* set the wb address */
+ WREG32_SOC15(UVD, 0, mmUVD_RBC_RB_RPTR_ADDR,
+ (upper_32_bits(ring->gpu_addr) >> 2));
+
+ /* programm the RB_BASE for ring buffer */
+ WREG32_SOC15(UVD, 0, mmUVD_LMI_RBC_RB_64BIT_BAR_LOW,
+ lower_32_bits(ring->gpu_addr));
+ WREG32_SOC15(UVD, 0, mmUVD_LMI_RBC_RB_64BIT_BAR_HIGH,
+ upper_32_bits(ring->gpu_addr));
+
+ /* Initialize the ring buffer's read and write pointers */
+ WREG32_SOC15(UVD, 0, mmUVD_RBC_RB_RPTR, 0);
+
+ WREG32_SOC15(UVD, 0, mmUVD_SCRATCH2, 0);
+
+ ring->wptr = RREG32_SOC15(UVD, 0, mmUVD_RBC_RB_RPTR);
+ WREG32_SOC15(UVD, 0, mmUVD_RBC_RB_WPTR,
+ lower_32_bits(ring->wptr));
+
+ WREG32_P(SOC15_REG_OFFSET(UVD, 0, mmUVD_RBC_RB_CNTL), 0,
+ ~UVD_RBC_RB_CNTL__RB_NO_FETCH_MASK);
+
+ /* initialize wptr */
+ ring->wptr = RREG32_SOC15(UVD, 0, mmUVD_JRBC_RB_WPTR);
+
+ /* copy patch commands to the jpeg ring */
+ vcn_v1_0_jpeg_ring_set_patch_ring(ring,
+ (ring->wptr + ring->max_dw * amdgpu_sched_hw_submission));
+
+ return 0;
+}
+
+static int vcn_v1_0_start(struct amdgpu_device *adev)
+{
+ int r;
+
+ if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG)
+ r = vcn_v1_0_start_dpg_mode(adev);
+ else
+ r = vcn_v1_0_start_spg_mode(adev);
+ return r;
+}
+
/**
* vcn_v1_0_stop - stop VCN block
*
@@ -739,41 +1122,90 @@ static int vcn_v1_0_start(struct amdgpu_device *adev)
*
* stop the VCN block
*/
-static int vcn_v1_0_stop(struct amdgpu_device *adev)
+static int vcn_v1_0_stop_spg_mode(struct amdgpu_device *adev)
{
- /* force RBC into idle state */
- WREG32_SOC15(UVD, 0, mmUVD_RBC_RB_CNTL, 0x11010101);
+ int ret_code, tmp;
- /* Stall UMC and register bus before resetting VCPU */
- WREG32_P(SOC15_REG_OFFSET(UVD, 0, mmUVD_LMI_CTRL2),
- UVD_LMI_CTRL2__STALL_ARB_UMC_MASK,
- ~UVD_LMI_CTRL2__STALL_ARB_UMC_MASK);
- mdelay(1);
+ SOC15_WAIT_ON_RREG(UVD, 0, mmUVD_STATUS, UVD_STATUS__IDLE, 0x7, ret_code);
+
+ tmp = UVD_LMI_STATUS__VCPU_LMI_WRITE_CLEAN_MASK |
+ UVD_LMI_STATUS__READ_CLEAN_MASK |
+ UVD_LMI_STATUS__WRITE_CLEAN_MASK |
+ UVD_LMI_STATUS__WRITE_CLEAN_RAW_MASK;
+ SOC15_WAIT_ON_RREG(UVD, 0, mmUVD_LMI_STATUS, tmp, tmp, ret_code);
/* put VCPU into reset */
- WREG32_SOC15(UVD, 0, mmUVD_SOFT_RESET,
- UVD_SOFT_RESET__VCPU_SOFT_RESET_MASK);
- mdelay(5);
+ WREG32_P(SOC15_REG_OFFSET(UVD, 0, mmUVD_SOFT_RESET),
+ UVD_SOFT_RESET__VCPU_SOFT_RESET_MASK,
+ ~UVD_SOFT_RESET__VCPU_SOFT_RESET_MASK);
+
+ tmp = UVD_LMI_STATUS__UMC_READ_CLEAN_RAW_MASK |
+ UVD_LMI_STATUS__UMC_WRITE_CLEAN_RAW_MASK;
+ SOC15_WAIT_ON_RREG(UVD, 0, mmUVD_LMI_STATUS, tmp, tmp, ret_code);
/* disable VCPU clock */
- WREG32_SOC15(UVD, 0, mmUVD_VCPU_CNTL, 0x0);
+ WREG32_P(SOC15_REG_OFFSET(UVD, 0, mmUVD_VCPU_CNTL), 0,
+ ~UVD_VCPU_CNTL__CLK_EN_MASK);
- /* Unstall UMC and register bus */
- WREG32_P(SOC15_REG_OFFSET(UVD, 0, mmUVD_LMI_CTRL2), 0,
- ~UVD_LMI_CTRL2__STALL_ARB_UMC_MASK);
+ /* reset LMI UMC/LMI */
+ WREG32_P(SOC15_REG_OFFSET(UVD, 0, mmUVD_SOFT_RESET),
+ UVD_SOFT_RESET__LMI_UMC_SOFT_RESET_MASK,
+ ~UVD_SOFT_RESET__LMI_UMC_SOFT_RESET_MASK);
+
+ WREG32_P(SOC15_REG_OFFSET(UVD, 0, mmUVD_SOFT_RESET),
+ UVD_SOFT_RESET__LMI_SOFT_RESET_MASK,
+ ~UVD_SOFT_RESET__LMI_SOFT_RESET_MASK);
- WREG32_SOC15(VCN, 0, mmUVD_STATUS, 0);
+ WREG32_SOC15(UVD, 0, mmUVD_STATUS, 0);
vcn_v1_0_enable_clock_gating(adev);
vcn_1_0_enable_static_power_gating(adev);
return 0;
}
+static int vcn_v1_0_stop_dpg_mode(struct amdgpu_device *adev)
+{
+ int ret_code = 0;
+
+ /* Wait for power status to be UVD_POWER_STATUS__UVD_POWER_STATUS_TILES_OFF */
+ SOC15_WAIT_ON_RREG(UVD, 0, mmUVD_POWER_STATUS,
+ UVD_POWER_STATUS__UVD_POWER_STATUS_TILES_OFF,
+ UVD_POWER_STATUS__UVD_POWER_STATUS_MASK, ret_code);
+
+ if (!ret_code) {
+ int tmp = RREG32_SOC15(UVD, 0, mmUVD_RBC_RB_WPTR) & 0x7FFFFFFF;
+ /* wait for read ptr to be equal to write ptr */
+ SOC15_WAIT_ON_RREG(UVD, 0, mmUVD_RBC_RB_RPTR, tmp, 0xFFFFFFFF, ret_code);
+
+ SOC15_WAIT_ON_RREG(UVD, 0, mmUVD_POWER_STATUS,
+ UVD_POWER_STATUS__UVD_POWER_STATUS_TILES_OFF,
+ UVD_POWER_STATUS__UVD_POWER_STATUS_MASK, ret_code);
+ }
+
+ /* disable dynamic power gating mode */
+ WREG32_P(SOC15_REG_OFFSET(UVD, 0, mmUVD_POWER_STATUS), 0,
+ ~UVD_POWER_STATUS__UVD_PG_MODE_MASK);
+
+ return 0;
+}
+
+static int vcn_v1_0_stop(struct amdgpu_device *adev)
+{
+ int r;
+
+ if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG)
+ r = vcn_v1_0_stop_dpg_mode(adev);
+ else
+ r = vcn_v1_0_stop_spg_mode(adev);
+
+ return r;
+}
+
static bool vcn_v1_0_is_idle(void *handle)
{
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
- return (RREG32_SOC15(VCN, 0, mmUVD_STATUS) == 0x2);
+ return (RREG32_SOC15(VCN, 0, mmUVD_STATUS) == UVD_STATUS__IDLE);
}
static int vcn_v1_0_wait_for_idle(void *handle)
@@ -781,7 +1213,8 @@ static int vcn_v1_0_wait_for_idle(void *handle)
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
int ret = 0;
- SOC15_WAIT_ON_RREG(VCN, 0, mmUVD_STATUS, 0x2, 0x2, ret);
+ SOC15_WAIT_ON_RREG(VCN, 0, mmUVD_STATUS, UVD_STATUS__IDLE,
+ UVD_STATUS__IDLE, ret);
return ret;
}
@@ -843,6 +1276,10 @@ static void vcn_v1_0_dec_ring_set_wptr(struct amdgpu_ring *ring)
{
struct amdgpu_device *adev = ring->adev;
+ if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG)
+ WREG32_SOC15(UVD, 0, mmUVD_SCRATCH2,
+ lower_32_bits(ring->wptr) | 0x80000000);
+
WREG32_SOC15(UVD, 0, mmUVD_RBC_RB_WPTR, lower_32_bits(ring->wptr));
}
@@ -1126,6 +1563,387 @@ static void vcn_v1_0_enc_ring_emit_wreg(struct amdgpu_ring *ring,
amdgpu_ring_write(ring, val);
}
+
+/**
+ * vcn_v1_0_jpeg_ring_get_rptr - get read pointer
+ *
+ * @ring: amdgpu_ring pointer
+ *
+ * Returns the current hardware read pointer
+ */
+static uint64_t vcn_v1_0_jpeg_ring_get_rptr(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+
+ return RREG32_SOC15(UVD, 0, mmUVD_JRBC_RB_RPTR);
+}
+
+/**
+ * vcn_v1_0_jpeg_ring_get_wptr - get write pointer
+ *
+ * @ring: amdgpu_ring pointer
+ *
+ * Returns the current hardware write pointer
+ */
+static uint64_t vcn_v1_0_jpeg_ring_get_wptr(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+
+ return RREG32_SOC15(UVD, 0, mmUVD_JRBC_RB_WPTR);
+}
+
+/**
+ * vcn_v1_0_jpeg_ring_set_wptr - set write pointer
+ *
+ * @ring: amdgpu_ring pointer
+ *
+ * Commits the write pointer to the hardware
+ */
+static void vcn_v1_0_jpeg_ring_set_wptr(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+
+ WREG32_SOC15(UVD, 0, mmUVD_JRBC_RB_WPTR, lower_32_bits(ring->wptr));
+}
+
+/**
+ * vcn_v1_0_jpeg_ring_insert_start - insert a start command
+ *
+ * @ring: amdgpu_ring pointer
+ *
+ * Write a start command to the ring.
+ */
+static void vcn_v1_0_jpeg_ring_insert_start(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+
+ amdgpu_ring_write(ring,
+ PACKETJ(SOC15_REG_OFFSET(UVD, 0, mmUVD_JRBC_EXTERNAL_REG_BASE), 0, 0, PACKETJ_TYPE0));
+ amdgpu_ring_write(ring, 0x68e04);
+
+ amdgpu_ring_write(ring, PACKETJ(0, 0, 0, PACKETJ_TYPE0));
+ amdgpu_ring_write(ring, 0x80010000);
+}
+
+/**
+ * vcn_v1_0_jpeg_ring_insert_end - insert a end command
+ *
+ * @ring: amdgpu_ring pointer
+ *
+ * Write a end command to the ring.
+ */
+static void vcn_v1_0_jpeg_ring_insert_end(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+
+ amdgpu_ring_write(ring,
+ PACKETJ(SOC15_REG_OFFSET(UVD, 0, mmUVD_JRBC_EXTERNAL_REG_BASE), 0, 0, PACKETJ_TYPE0));
+ amdgpu_ring_write(ring, 0x68e04);
+
+ amdgpu_ring_write(ring, PACKETJ(0, 0, 0, PACKETJ_TYPE0));
+ amdgpu_ring_write(ring, 0x00010000);
+}
+
+/**
+ * vcn_v1_0_jpeg_ring_emit_fence - emit an fence & trap command
+ *
+ * @ring: amdgpu_ring pointer
+ * @fence: fence to emit
+ *
+ * Write a fence and a trap command to the ring.
+ */
+static void vcn_v1_0_jpeg_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u64 seq,
+ unsigned flags)
+{
+ struct amdgpu_device *adev = ring->adev;
+
+ WARN_ON(flags & AMDGPU_FENCE_FLAG_64BIT);
+
+ amdgpu_ring_write(ring,
+ PACKETJ(SOC15_REG_OFFSET(UVD, 0, mmUVD_JPEG_GPCOM_DATA0), 0, 0, PACKETJ_TYPE0));
+ amdgpu_ring_write(ring, seq);
+
+ amdgpu_ring_write(ring,
+ PACKETJ(SOC15_REG_OFFSET(UVD, 0, mmUVD_JPEG_GPCOM_DATA1), 0, 0, PACKETJ_TYPE0));
+ amdgpu_ring_write(ring, seq);
+
+ amdgpu_ring_write(ring,
+ PACKETJ(SOC15_REG_OFFSET(UVD, 0, mmUVD_LMI_JRBC_RB_MEM_WR_64BIT_BAR_LOW), 0, 0, PACKETJ_TYPE0));
+ amdgpu_ring_write(ring, lower_32_bits(addr));
+
+ amdgpu_ring_write(ring,
+ PACKETJ(SOC15_REG_OFFSET(UVD, 0, mmUVD_LMI_JRBC_RB_MEM_WR_64BIT_BAR_HIGH), 0, 0, PACKETJ_TYPE0));
+ amdgpu_ring_write(ring, upper_32_bits(addr));
+
+ amdgpu_ring_write(ring,
+ PACKETJ(SOC15_REG_OFFSET(UVD, 0, mmUVD_JPEG_GPCOM_CMD), 0, 0, PACKETJ_TYPE0));
+ amdgpu_ring_write(ring, 0x8);
+
+ amdgpu_ring_write(ring,
+ PACKETJ(SOC15_REG_OFFSET(UVD, 0, mmUVD_JPEG_GPCOM_CMD), 0, PACKETJ_CONDITION_CHECK0, PACKETJ_TYPE4));
+ amdgpu_ring_write(ring, 0);
+
+ amdgpu_ring_write(ring,
+ PACKETJ(SOC15_REG_OFFSET(UVD, 0, mmUVD_JRBC_RB_COND_RD_TIMER), 0, 0, PACKETJ_TYPE0));
+ amdgpu_ring_write(ring, 0x01400200);
+
+ amdgpu_ring_write(ring,
+ PACKETJ(SOC15_REG_OFFSET(UVD, 0, mmUVD_JRBC_RB_REF_DATA), 0, 0, PACKETJ_TYPE0));
+ amdgpu_ring_write(ring, seq);
+
+ amdgpu_ring_write(ring,
+ PACKETJ(SOC15_REG_OFFSET(UVD, 0, mmUVD_LMI_JRBC_RB_MEM_RD_64BIT_BAR_LOW), 0, 0, PACKETJ_TYPE0));
+ amdgpu_ring_write(ring, lower_32_bits(addr));
+
+ amdgpu_ring_write(ring,
+ PACKETJ(SOC15_REG_OFFSET(UVD, 0, mmUVD_LMI_JRBC_RB_MEM_RD_64BIT_BAR_HIGH), 0, 0, PACKETJ_TYPE0));
+ amdgpu_ring_write(ring, upper_32_bits(addr));
+
+ amdgpu_ring_write(ring,
+ PACKETJ(0, 0, PACKETJ_CONDITION_CHECK3, PACKETJ_TYPE2));
+ amdgpu_ring_write(ring, 0xffffffff);
+
+ amdgpu_ring_write(ring,
+ PACKETJ(SOC15_REG_OFFSET(UVD, 0, mmUVD_JRBC_EXTERNAL_REG_BASE), 0, 0, PACKETJ_TYPE0));
+ amdgpu_ring_write(ring, 0x3fbc);
+
+ amdgpu_ring_write(ring,
+ PACKETJ(0, 0, 0, PACKETJ_TYPE0));
+ amdgpu_ring_write(ring, 0x1);
+
+ /* emit trap */
+ amdgpu_ring_write(ring, PACKETJ(0, 0, 0, PACKETJ_TYPE7));
+ amdgpu_ring_write(ring, 0);
+}
+
+/**
+ * vcn_v1_0_jpeg_ring_emit_ib - execute indirect buffer
+ *
+ * @ring: amdgpu_ring pointer
+ * @ib: indirect buffer to execute
+ *
+ * Write ring commands to execute the indirect buffer.
+ */
+static void vcn_v1_0_jpeg_ring_emit_ib(struct amdgpu_ring *ring,
+ struct amdgpu_ib *ib,
+ unsigned vmid, bool ctx_switch)
+{
+ struct amdgpu_device *adev = ring->adev;
+
+ amdgpu_ring_write(ring,
+ PACKETJ(SOC15_REG_OFFSET(UVD, 0, mmUVD_LMI_JRBC_IB_VMID), 0, 0, PACKETJ_TYPE0));
+ amdgpu_ring_write(ring, (vmid | (vmid << 4)));
+
+ amdgpu_ring_write(ring,
+ PACKETJ(SOC15_REG_OFFSET(UVD, 0, mmUVD_LMI_JPEG_VMID), 0, 0, PACKETJ_TYPE0));
+ amdgpu_ring_write(ring, (vmid | (vmid << 4)));
+
+ amdgpu_ring_write(ring,
+ PACKETJ(SOC15_REG_OFFSET(UVD, 0, mmUVD_LMI_JRBC_IB_64BIT_BAR_LOW), 0, 0, PACKETJ_TYPE0));
+ amdgpu_ring_write(ring, lower_32_bits(ib->gpu_addr));
+
+ amdgpu_ring_write(ring,
+ PACKETJ(SOC15_REG_OFFSET(UVD, 0, mmUVD_LMI_JRBC_IB_64BIT_BAR_HIGH), 0, 0, PACKETJ_TYPE0));
+ amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr));
+
+ amdgpu_ring_write(ring,
+ PACKETJ(SOC15_REG_OFFSET(UVD, 0, mmUVD_JRBC_IB_SIZE), 0, 0, PACKETJ_TYPE0));
+ amdgpu_ring_write(ring, ib->length_dw);
+
+ amdgpu_ring_write(ring,
+ PACKETJ(SOC15_REG_OFFSET(UVD, 0, mmUVD_LMI_JRBC_RB_MEM_RD_64BIT_BAR_LOW), 0, 0, PACKETJ_TYPE0));
+ amdgpu_ring_write(ring, lower_32_bits(ring->gpu_addr));
+
+ amdgpu_ring_write(ring,
+ PACKETJ(SOC15_REG_OFFSET(UVD, 0, mmUVD_LMI_JRBC_RB_MEM_RD_64BIT_BAR_HIGH), 0, 0, PACKETJ_TYPE0));
+ amdgpu_ring_write(ring, upper_32_bits(ring->gpu_addr));
+
+ amdgpu_ring_write(ring,
+ PACKETJ(0, 0, PACKETJ_CONDITION_CHECK0, PACKETJ_TYPE2));
+ amdgpu_ring_write(ring, 0);
+
+ amdgpu_ring_write(ring,
+ PACKETJ(SOC15_REG_OFFSET(UVD, 0, mmUVD_JRBC_RB_COND_RD_TIMER), 0, 0, PACKETJ_TYPE0));
+ amdgpu_ring_write(ring, 0x01400200);
+
+ amdgpu_ring_write(ring,
+ PACKETJ(SOC15_REG_OFFSET(UVD, 0, mmUVD_JRBC_RB_REF_DATA), 0, 0, PACKETJ_TYPE0));
+ amdgpu_ring_write(ring, 0x2);
+
+ amdgpu_ring_write(ring,
+ PACKETJ(SOC15_REG_OFFSET(UVD, 0, mmUVD_JRBC_STATUS), 0, PACKETJ_CONDITION_CHECK3, PACKETJ_TYPE3));
+ amdgpu_ring_write(ring, 0x2);
+}
+
+static void vcn_v1_0_jpeg_ring_emit_reg_wait(struct amdgpu_ring *ring,
+ uint32_t reg, uint32_t val,
+ uint32_t mask)
+{
+ struct amdgpu_device *adev = ring->adev;
+ uint32_t reg_offset = (reg << 2);
+
+ amdgpu_ring_write(ring,
+ PACKETJ(SOC15_REG_OFFSET(UVD, 0, mmUVD_JRBC_RB_COND_RD_TIMER), 0, 0, PACKETJ_TYPE0));
+ amdgpu_ring_write(ring, 0x01400200);
+
+ amdgpu_ring_write(ring,
+ PACKETJ(SOC15_REG_OFFSET(UVD, 0, mmUVD_JRBC_RB_REF_DATA), 0, 0, PACKETJ_TYPE0));
+ amdgpu_ring_write(ring, val);
+
+ amdgpu_ring_write(ring,
+ PACKETJ(SOC15_REG_OFFSET(UVD, 0, mmUVD_JRBC_EXTERNAL_REG_BASE), 0, 0, PACKETJ_TYPE0));
+ if (((reg_offset >= 0x1f800) && (reg_offset <= 0x21fff)) ||
+ ((reg_offset >= 0x1e000) && (reg_offset <= 0x1e1ff))) {
+ amdgpu_ring_write(ring, 0);
+ amdgpu_ring_write(ring,
+ PACKETJ((reg_offset >> 2), 0, 0, PACKETJ_TYPE3));
+ } else {
+ amdgpu_ring_write(ring, reg_offset);
+ amdgpu_ring_write(ring,
+ PACKETJ(0, 0, 0, PACKETJ_TYPE3));
+ }
+ amdgpu_ring_write(ring, mask);
+}
+
+static void vcn_v1_0_jpeg_ring_emit_vm_flush(struct amdgpu_ring *ring,
+ unsigned vmid, uint64_t pd_addr)
+{
+ struct amdgpu_vmhub *hub = &ring->adev->vmhub[ring->funcs->vmhub];
+ uint32_t data0, data1, mask;
+
+ pd_addr = amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr);
+
+ /* wait for register write */
+ data0 = hub->ctx0_ptb_addr_lo32 + vmid * 2;
+ data1 = lower_32_bits(pd_addr);
+ mask = 0xffffffff;
+ vcn_v1_0_jpeg_ring_emit_reg_wait(ring, data0, data1, mask);
+}
+
+static void vcn_v1_0_jpeg_ring_emit_wreg(struct amdgpu_ring *ring,
+ uint32_t reg, uint32_t val)
+{
+ struct amdgpu_device *adev = ring->adev;
+ uint32_t reg_offset = (reg << 2);
+
+ amdgpu_ring_write(ring,
+ PACKETJ(SOC15_REG_OFFSET(UVD, 0, mmUVD_JRBC_EXTERNAL_REG_BASE), 0, 0, PACKETJ_TYPE0));
+ if (((reg_offset >= 0x1f800) && (reg_offset <= 0x21fff)) ||
+ ((reg_offset >= 0x1e000) && (reg_offset <= 0x1e1ff))) {
+ amdgpu_ring_write(ring, 0);
+ amdgpu_ring_write(ring,
+ PACKETJ((reg_offset >> 2), 0, 0, PACKETJ_TYPE0));
+ } else {
+ amdgpu_ring_write(ring, reg_offset);
+ amdgpu_ring_write(ring,
+ PACKETJ(0, 0, 0, PACKETJ_TYPE0));
+ }
+ amdgpu_ring_write(ring, val);
+}
+
+static void vcn_v1_0_jpeg_ring_nop(struct amdgpu_ring *ring, uint32_t count)
+{
+ int i;
+
+ WARN_ON(ring->wptr % 2 || count % 2);
+
+ for (i = 0; i < count / 2; i++) {
+ amdgpu_ring_write(ring, PACKETJ(0, 0, 0, PACKETJ_TYPE6));
+ amdgpu_ring_write(ring, 0);
+ }
+}
+
+static void vcn_v1_0_jpeg_ring_patch_wreg(struct amdgpu_ring *ring, uint32_t *ptr, uint32_t reg_offset, uint32_t val)
+{
+ struct amdgpu_device *adev = ring->adev;
+ ring->ring[(*ptr)++] = PACKETJ(SOC15_REG_OFFSET(UVD, 0, mmUVD_JRBC_EXTERNAL_REG_BASE), 0, 0, PACKETJ_TYPE0);
+ if (((reg_offset >= 0x1f800) && (reg_offset <= 0x21fff)) ||
+ ((reg_offset >= 0x1e000) && (reg_offset <= 0x1e1ff))) {
+ ring->ring[(*ptr)++] = 0;
+ ring->ring[(*ptr)++] = PACKETJ((reg_offset >> 2), 0, 0, PACKETJ_TYPE0);
+ } else {
+ ring->ring[(*ptr)++] = reg_offset;
+ ring->ring[(*ptr)++] = PACKETJ(0, 0, 0, PACKETJ_TYPE0);
+ }
+ ring->ring[(*ptr)++] = val;
+}
+
+static void vcn_v1_0_jpeg_ring_set_patch_ring(struct amdgpu_ring *ring, uint32_t ptr)
+{
+ struct amdgpu_device *adev = ring->adev;
+
+ uint32_t reg, reg_offset, val, mask, i;
+
+ // 1st: program mmUVD_LMI_JRBC_RB_MEM_RD_64BIT_BAR_LOW
+ reg = SOC15_REG_OFFSET(UVD, 0, mmUVD_LMI_JRBC_RB_MEM_RD_64BIT_BAR_LOW);
+ reg_offset = (reg << 2);
+ val = lower_32_bits(ring->gpu_addr);
+ vcn_v1_0_jpeg_ring_patch_wreg(ring, &ptr, reg_offset, val);
+
+ // 2nd: program mmUVD_LMI_JRBC_RB_MEM_RD_64BIT_BAR_HIGH
+ reg = SOC15_REG_OFFSET(UVD, 0, mmUVD_LMI_JRBC_RB_MEM_RD_64BIT_BAR_HIGH);
+ reg_offset = (reg << 2);
+ val = upper_32_bits(ring->gpu_addr);
+ vcn_v1_0_jpeg_ring_patch_wreg(ring, &ptr, reg_offset, val);
+
+ // 3rd to 5th: issue MEM_READ commands
+ for (i = 0; i <= 2; i++) {
+ ring->ring[ptr++] = PACKETJ(0, 0, 0, PACKETJ_TYPE2);
+ ring->ring[ptr++] = 0;
+ }
+
+ // 6th: program mmUVD_JRBC_RB_CNTL register to enable NO_FETCH and RPTR write ability
+ reg = SOC15_REG_OFFSET(UVD, 0, mmUVD_JRBC_RB_CNTL);
+ reg_offset = (reg << 2);
+ val = 0x13;
+ vcn_v1_0_jpeg_ring_patch_wreg(ring, &ptr, reg_offset, val);
+
+ // 7th: program mmUVD_JRBC_RB_REF_DATA
+ reg = SOC15_REG_OFFSET(UVD, 0, mmUVD_JRBC_RB_REF_DATA);
+ reg_offset = (reg << 2);
+ val = 0x1;
+ vcn_v1_0_jpeg_ring_patch_wreg(ring, &ptr, reg_offset, val);
+
+ // 8th: issue conditional register read mmUVD_JRBC_RB_CNTL
+ reg = SOC15_REG_OFFSET(UVD, 0, mmUVD_JRBC_RB_CNTL);
+ reg_offset = (reg << 2);
+ val = 0x1;
+ mask = 0x1;
+
+ ring->ring[ptr++] = PACKETJ(SOC15_REG_OFFSET(UVD, 0, mmUVD_JRBC_RB_COND_RD_TIMER), 0, 0, PACKETJ_TYPE0);
+ ring->ring[ptr++] = 0x01400200;
+ ring->ring[ptr++] = PACKETJ(SOC15_REG_OFFSET(UVD, 0, mmUVD_JRBC_RB_REF_DATA), 0, 0, PACKETJ_TYPE0);
+ ring->ring[ptr++] = val;
+ ring->ring[ptr++] = PACKETJ(SOC15_REG_OFFSET(UVD, 0, mmUVD_JRBC_EXTERNAL_REG_BASE), 0, 0, PACKETJ_TYPE0);
+ if (((reg_offset >= 0x1f800) && (reg_offset <= 0x21fff)) ||
+ ((reg_offset >= 0x1e000) && (reg_offset <= 0x1e1ff))) {
+ ring->ring[ptr++] = 0;
+ ring->ring[ptr++] = PACKETJ((reg_offset >> 2), 0, 0, PACKETJ_TYPE3);
+ } else {
+ ring->ring[ptr++] = reg_offset;
+ ring->ring[ptr++] = PACKETJ(0, 0, 0, PACKETJ_TYPE3);
+ }
+ ring->ring[ptr++] = mask;
+
+ //9th to 21st: insert no-op
+ for (i = 0; i <= 12; i++) {
+ ring->ring[ptr++] = PACKETJ(0, 0, 0, PACKETJ_TYPE6);
+ ring->ring[ptr++] = 0;
+ }
+
+ //22nd: reset mmUVD_JRBC_RB_RPTR
+ reg = SOC15_REG_OFFSET(UVD, 0, mmUVD_JRBC_RB_RPTR);
+ reg_offset = (reg << 2);
+ val = 0;
+ vcn_v1_0_jpeg_ring_patch_wreg(ring, &ptr, reg_offset, val);
+
+ //23rd: program mmUVD_JRBC_RB_CNTL to disable no_fetch
+ reg = SOC15_REG_OFFSET(UVD, 0, mmUVD_JRBC_RB_CNTL);
+ reg_offset = (reg << 2);
+ val = 0x12;
+ vcn_v1_0_jpeg_ring_patch_wreg(ring, &ptr, reg_offset, val);
+}
+
static int vcn_v1_0_set_interrupt_state(struct amdgpu_device *adev,
struct amdgpu_irq_src *source,
unsigned type,
@@ -1150,6 +1968,9 @@ static int vcn_v1_0_process_interrupt(struct amdgpu_device *adev,
case 120:
amdgpu_fence_process(&adev->vcn.ring_enc[1]);
break;
+ case 126:
+ amdgpu_fence_process(&adev->vcn.ring_jpeg);
+ break;
default:
DRM_ERROR("Unhandled interrupt: %d %d\n",
entry->src_id, entry->src_data[0]);
@@ -1182,12 +2003,20 @@ static int vcn_v1_0_set_powergating_state(void *handle,
* revisit this when there is a cleaner line between
* the smc and the hw blocks
*/
+ int ret;
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ if(state == adev->vcn.cur_state)
+ return 0;
+
if (state == AMD_PG_STATE_GATE)
- return vcn_v1_0_stop(adev);
+ ret = vcn_v1_0_stop(adev);
else
- return vcn_v1_0_start(adev);
+ ret = vcn_v1_0_start(adev);
+
+ if(!ret)
+ adev->vcn.cur_state = state;
+ return ret;
}
static const struct amd_ip_funcs vcn_v1_0_ip_funcs = {
@@ -1273,6 +2102,40 @@ static const struct amdgpu_ring_funcs vcn_v1_0_enc_ring_vm_funcs = {
.emit_reg_write_reg_wait = amdgpu_ring_emit_reg_write_reg_wait_helper,
};
+static const struct amdgpu_ring_funcs vcn_v1_0_jpeg_ring_vm_funcs = {
+ .type = AMDGPU_RING_TYPE_VCN_JPEG,
+ .align_mask = 0xf,
+ .nop = PACKET0(0x81ff, 0),
+ .support_64bit_ptrs = false,
+ .vmhub = AMDGPU_MMHUB,
+ .extra_dw = 64,
+ .get_rptr = vcn_v1_0_jpeg_ring_get_rptr,
+ .get_wptr = vcn_v1_0_jpeg_ring_get_wptr,
+ .set_wptr = vcn_v1_0_jpeg_ring_set_wptr,
+ .emit_frame_size =
+ 6 + 6 + /* hdp invalidate / flush */
+ SOC15_FLUSH_GPU_TLB_NUM_WREG * 6 +
+ SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 8 +
+ 8 + /* vcn_v1_0_jpeg_ring_emit_vm_flush */
+ 26 + 26 + /* vcn_v1_0_jpeg_ring_emit_fence x2 vm fence */
+ 6,
+ .emit_ib_size = 22, /* vcn_v1_0_jpeg_ring_emit_ib */
+ .emit_ib = vcn_v1_0_jpeg_ring_emit_ib,
+ .emit_fence = vcn_v1_0_jpeg_ring_emit_fence,
+ .emit_vm_flush = vcn_v1_0_jpeg_ring_emit_vm_flush,
+ .test_ring = amdgpu_vcn_jpeg_ring_test_ring,
+ .test_ib = amdgpu_vcn_jpeg_ring_test_ib,
+ .insert_nop = vcn_v1_0_jpeg_ring_nop,
+ .insert_start = vcn_v1_0_jpeg_ring_insert_start,
+ .insert_end = vcn_v1_0_jpeg_ring_insert_end,
+ .pad_ib = amdgpu_ring_generic_pad_ib,
+ .begin_use = amdgpu_vcn_ring_begin_use,
+ .end_use = amdgpu_vcn_ring_end_use,
+ .emit_wreg = vcn_v1_0_jpeg_ring_emit_wreg,
+ .emit_reg_wait = vcn_v1_0_jpeg_ring_emit_reg_wait,
+ .emit_reg_write_reg_wait = amdgpu_ring_emit_reg_write_reg_wait_helper,
+};
+
static void vcn_v1_0_set_dec_ring_funcs(struct amdgpu_device *adev)
{
adev->vcn.ring_dec.funcs = &vcn_v1_0_dec_ring_vm_funcs;
@@ -1289,6 +2152,12 @@ static void vcn_v1_0_set_enc_ring_funcs(struct amdgpu_device *adev)
DRM_INFO("VCN encode is enabled in VM mode\n");
}
+static void vcn_v1_0_set_jpeg_ring_funcs(struct amdgpu_device *adev)
+{
+ adev->vcn.ring_jpeg.funcs = &vcn_v1_0_jpeg_ring_vm_funcs;
+ DRM_INFO("VCN jpeg decode is enabled in VM mode\n");
+}
+
static const struct amdgpu_irq_src_funcs vcn_v1_0_irq_funcs = {
.set = vcn_v1_0_set_interrupt_state,
.process = vcn_v1_0_process_interrupt,
@@ -1296,7 +2165,7 @@ static const struct amdgpu_irq_src_funcs vcn_v1_0_irq_funcs = {
static void vcn_v1_0_set_irq_funcs(struct amdgpu_device *adev)
{
- adev->vcn.irq.num_types = adev->vcn.num_enc_rings + 1;
+ adev->vcn.irq.num_types = adev->vcn.num_enc_rings + 2;
adev->vcn.irq.funcs = &vcn_v1_0_irq_funcs;
}
diff --git a/drivers/gpu/drm/amd/amdgpu/vega10_ih.c b/drivers/gpu/drm/amd/amdgpu/vega10_ih.c
index 5ae5ed2e62d6..a99f71797aa3 100644
--- a/drivers/gpu/drm/amd/amdgpu/vega10_ih.c
+++ b/drivers/gpu/drm/amd/amdgpu/vega10_ih.c
@@ -265,35 +265,36 @@ static bool vega10_ih_prescreen_iv(struct amdgpu_device *adev)
return true;
}
- addr = ((u64)(dw5 & 0xf) << 44) | ((u64)dw4 << 12);
- key = AMDGPU_VM_FAULT(pasid, addr);
- r = amdgpu_ih_add_fault(adev, key);
-
- /* Hash table is full or the fault is already being processed,
- * ignore further page faults
- */
- if (r != 0)
- goto ignore_iv;
-
/* Track retry faults in per-VM fault FIFO. */
spin_lock(&adev->vm_manager.pasid_lock);
vm = idr_find(&adev->vm_manager.pasid_idr, pasid);
+ addr = ((u64)(dw5 & 0xf) << 44) | ((u64)dw4 << 12);
+ key = AMDGPU_VM_FAULT(pasid, addr);
if (!vm) {
/* VM not found, process it normally */
spin_unlock(&adev->vm_manager.pasid_lock);
- amdgpu_ih_clear_fault(adev, key);
return true;
+ } else {
+ r = amdgpu_vm_add_fault(vm->fault_hash, key);
+
+ /* Hash table is full or the fault is already being processed,
+ * ignore further page faults
+ */
+ if (r != 0) {
+ spin_unlock(&adev->vm_manager.pasid_lock);
+ goto ignore_iv;
+ }
}
/* No locking required with single writer and single reader */
r = kfifo_put(&vm->faults, key);
if (!r) {
/* FIFO is full. Ignore it until there is space */
+ amdgpu_vm_clear_fault(vm->fault_hash, key);
spin_unlock(&adev->vm_manager.pasid_lock);
- amdgpu_ih_clear_fault(adev, key);
goto ignore_iv;
}
- spin_unlock(&adev->vm_manager.pasid_lock);
+ spin_unlock(&adev->vm_manager.pasid_lock);
/* It's the first fault for this address, process it normally */
return true;
@@ -379,21 +380,13 @@ static int vega10_ih_sw_init(void *handle)
int r;
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
- r = amdgpu_ih_ring_init(adev, 256 * 1024, true);
+ r = amdgpu_ih_ring_init(adev, &adev->irq.ih, 256 * 1024, true);
if (r)
return r;
adev->irq.ih.use_doorbell = true;
adev->irq.ih.doorbell_index = AMDGPU_DOORBELL64_IH << 1;
- adev->irq.ih.faults = kmalloc(sizeof(*adev->irq.ih.faults), GFP_KERNEL);
- if (!adev->irq.ih.faults)
- return -ENOMEM;
- INIT_CHASH_TABLE(adev->irq.ih.faults->hash,
- AMDGPU_PAGEFAULT_HASH_BITS, 8, 0);
- spin_lock_init(&adev->irq.ih.faults->lock);
- adev->irq.ih.faults->count = 0;
-
r = amdgpu_irq_init(adev);
return r;
@@ -404,10 +397,7 @@ static int vega10_ih_sw_fini(void *handle)
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
amdgpu_irq_fini(adev);
- amdgpu_ih_ring_fini(adev);
-
- kfree(adev->irq.ih.faults);
- adev->irq.ih.faults = NULL;
+ amdgpu_ih_ring_fini(adev, &adev->irq.ih);
return 0;
}
@@ -504,8 +494,7 @@ static const struct amdgpu_ih_funcs vega10_ih_funcs = {
static void vega10_ih_set_interrupt_funcs(struct amdgpu_device *adev)
{
- if (adev->irq.ih_funcs == NULL)
- adev->irq.ih_funcs = &vega10_ih_funcs;
+ adev->irq.ih_funcs = &vega10_ih_funcs;
}
const struct amdgpu_ip_block_version vega10_ih_ip_block =
diff --git a/drivers/gpu/drm/amd/amdgpu/vega10_reg_init.c b/drivers/gpu/drm/amd/amdgpu/vega10_reg_init.c
index 45aafca7f315..c5c9b2bc190d 100644
--- a/drivers/gpu/drm/amd/amdgpu/vega10_reg_init.c
+++ b/drivers/gpu/drm/amd/amdgpu/vega10_reg_init.c
@@ -51,6 +51,7 @@ int vega10_reg_base_init(struct amdgpu_device *adev)
adev->reg_offset[PWR_HWIP][i] = (uint32_t *)(&(PWR_BASE.instance[i]));
adev->reg_offset[NBIF_HWIP][i] = (uint32_t *)(&(NBIF_BASE.instance[i]));
adev->reg_offset[THM_HWIP][i] = (uint32_t *)(&(THM_BASE.instance[i]));
+ adev->reg_offset[CLK_HWIP][i] = (uint32_t *)(&(CLK_BASE.instance[i]));
}
return 0;
}
diff --git a/drivers/gpu/drm/amd/amdgpu/vega20_reg_init.c b/drivers/gpu/drm/amd/amdgpu/vega20_reg_init.c
index 52778de93ab0..2d4473557b0d 100644
--- a/drivers/gpu/drm/amd/amdgpu/vega20_reg_init.c
+++ b/drivers/gpu/drm/amd/amdgpu/vega20_reg_init.c
@@ -38,6 +38,7 @@ int vega20_reg_base_init(struct amdgpu_device *adev)
adev->reg_offset[ATHUB_HWIP][i] = (uint32_t *)(&(ATHUB_BASE.instance[i]));
adev->reg_offset[NBIO_HWIP][i] = (uint32_t *)(&(NBIO_BASE.instance[i]));
adev->reg_offset[MP0_HWIP][i] = (uint32_t *)(&(MP0_BASE.instance[i]));
+ adev->reg_offset[MP1_HWIP][i] = (uint32_t *)(&(MP1_BASE.instance[i]));
adev->reg_offset[UVD_HWIP][i] = (uint32_t *)(&(UVD_BASE.instance[i]));
adev->reg_offset[VCE_HWIP][i] = (uint32_t *)(&(VCE_BASE.instance[i]));
adev->reg_offset[DF_HWIP][i] = (uint32_t *)(&(DF_BASE.instance[i]));
@@ -46,6 +47,8 @@ int vega20_reg_base_init(struct amdgpu_device *adev)
adev->reg_offset[SDMA0_HWIP][i] = (uint32_t *)(&(SDMA0_BASE.instance[i]));
adev->reg_offset[SDMA1_HWIP][i] = (uint32_t *)(&(SDMA1_BASE.instance[i]));
adev->reg_offset[SMUIO_HWIP][i] = (uint32_t *)(&(SMUIO_BASE.instance[i]));
+ adev->reg_offset[NBIF_HWIP][i] = (uint32_t *)(&(NBIO_BASE.instance[i]));
+ adev->reg_offset[THM_HWIP][i] = (uint32_t *)(&(THM_BASE.instance[i]));
}
return 0;
}
diff --git a/drivers/gpu/drm/amd/amdgpu/vi.c b/drivers/gpu/drm/amd/amdgpu/vi.c
index 4ac1288ab7df..07880d35e9de 100644
--- a/drivers/gpu/drm/amd/amdgpu/vi.c
+++ b/drivers/gpu/drm/amd/amdgpu/vi.c
@@ -112,8 +112,8 @@ static u32 vi_smc_rreg(struct amdgpu_device *adev, u32 reg)
u32 r;
spin_lock_irqsave(&adev->smc_idx_lock, flags);
- WREG32(mmSMC_IND_INDEX_11, (reg));
- r = RREG32(mmSMC_IND_DATA_11);
+ WREG32_NO_KIQ(mmSMC_IND_INDEX_11, (reg));
+ r = RREG32_NO_KIQ(mmSMC_IND_DATA_11);
spin_unlock_irqrestore(&adev->smc_idx_lock, flags);
return r;
}
@@ -1363,11 +1363,11 @@ static int vi_common_set_clockgating_state_by_smu(void *handle,
if (adev->cg_flags & (AMD_CG_SUPPORT_MC_LS | AMD_CG_SUPPORT_MC_MGCG)) {
if (adev->cg_flags & AMD_CG_SUPPORT_MC_LS) {
- pp_support_state = AMD_CG_SUPPORT_MC_LS;
+ pp_support_state = PP_STATE_SUPPORT_LS;
pp_state = PP_STATE_LS;
}
if (adev->cg_flags & AMD_CG_SUPPORT_MC_MGCG) {
- pp_support_state |= AMD_CG_SUPPORT_MC_MGCG;
+ pp_support_state |= PP_STATE_SUPPORT_CG;
pp_state |= PP_STATE_CG;
}
if (state == AMD_CG_STATE_UNGATE)
@@ -1382,11 +1382,11 @@ static int vi_common_set_clockgating_state_by_smu(void *handle,
if (adev->cg_flags & (AMD_CG_SUPPORT_SDMA_LS | AMD_CG_SUPPORT_SDMA_MGCG)) {
if (adev->cg_flags & AMD_CG_SUPPORT_SDMA_LS) {
- pp_support_state = AMD_CG_SUPPORT_SDMA_LS;
+ pp_support_state = PP_STATE_SUPPORT_LS;
pp_state = PP_STATE_LS;
}
if (adev->cg_flags & AMD_CG_SUPPORT_SDMA_MGCG) {
- pp_support_state |= AMD_CG_SUPPORT_SDMA_MGCG;
+ pp_support_state |= PP_STATE_SUPPORT_CG;
pp_state |= PP_STATE_CG;
}
if (state == AMD_CG_STATE_UNGATE)
@@ -1401,11 +1401,11 @@ static int vi_common_set_clockgating_state_by_smu(void *handle,
if (adev->cg_flags & (AMD_CG_SUPPORT_HDP_LS | AMD_CG_SUPPORT_HDP_MGCG)) {
if (adev->cg_flags & AMD_CG_SUPPORT_HDP_LS) {
- pp_support_state = AMD_CG_SUPPORT_HDP_LS;
+ pp_support_state = PP_STATE_SUPPORT_LS;
pp_state = PP_STATE_LS;
}
if (adev->cg_flags & AMD_CG_SUPPORT_HDP_MGCG) {
- pp_support_state |= AMD_CG_SUPPORT_HDP_MGCG;
+ pp_support_state |= PP_STATE_SUPPORT_CG;
pp_state |= PP_STATE_CG;
}
if (state == AMD_CG_STATE_UNGATE)
@@ -1596,16 +1596,18 @@ int vi_set_ip_blocks(struct amdgpu_device *adev)
amdgpu_device_ip_block_add(adev, &vi_common_ip_block);
amdgpu_device_ip_block_add(adev, &gmc_v7_4_ip_block);
amdgpu_device_ip_block_add(adev, &iceland_ih_ip_block);
+ amdgpu_device_ip_block_add(adev, &gfx_v8_0_ip_block);
+ amdgpu_device_ip_block_add(adev, &sdma_v2_4_ip_block);
amdgpu_device_ip_block_add(adev, &pp_smu_ip_block);
if (adev->enable_virtual_display)
amdgpu_device_ip_block_add(adev, &dce_virtual_ip_block);
- amdgpu_device_ip_block_add(adev, &gfx_v8_0_ip_block);
- amdgpu_device_ip_block_add(adev, &sdma_v2_4_ip_block);
break;
case CHIP_FIJI:
amdgpu_device_ip_block_add(adev, &vi_common_ip_block);
amdgpu_device_ip_block_add(adev, &gmc_v8_5_ip_block);
amdgpu_device_ip_block_add(adev, &tonga_ih_ip_block);
+ amdgpu_device_ip_block_add(adev, &gfx_v8_0_ip_block);
+ amdgpu_device_ip_block_add(adev, &sdma_v3_0_ip_block);
amdgpu_device_ip_block_add(adev, &pp_smu_ip_block);
if (adev->enable_virtual_display || amdgpu_sriov_vf(adev))
amdgpu_device_ip_block_add(adev, &dce_virtual_ip_block);
@@ -1615,8 +1617,6 @@ int vi_set_ip_blocks(struct amdgpu_device *adev)
#endif
else
amdgpu_device_ip_block_add(adev, &dce_v10_1_ip_block);
- amdgpu_device_ip_block_add(adev, &gfx_v8_0_ip_block);
- amdgpu_device_ip_block_add(adev, &sdma_v3_0_ip_block);
if (!amdgpu_sriov_vf(adev)) {
amdgpu_device_ip_block_add(adev, &uvd_v6_0_ip_block);
amdgpu_device_ip_block_add(adev, &vce_v3_0_ip_block);
@@ -1626,6 +1626,8 @@ int vi_set_ip_blocks(struct amdgpu_device *adev)
amdgpu_device_ip_block_add(adev, &vi_common_ip_block);
amdgpu_device_ip_block_add(adev, &gmc_v8_0_ip_block);
amdgpu_device_ip_block_add(adev, &tonga_ih_ip_block);
+ amdgpu_device_ip_block_add(adev, &gfx_v8_0_ip_block);
+ amdgpu_device_ip_block_add(adev, &sdma_v3_0_ip_block);
amdgpu_device_ip_block_add(adev, &pp_smu_ip_block);
if (adev->enable_virtual_display || amdgpu_sriov_vf(adev))
amdgpu_device_ip_block_add(adev, &dce_virtual_ip_block);
@@ -1635,8 +1637,6 @@ int vi_set_ip_blocks(struct amdgpu_device *adev)
#endif
else
amdgpu_device_ip_block_add(adev, &dce_v10_0_ip_block);
- amdgpu_device_ip_block_add(adev, &gfx_v8_0_ip_block);
- amdgpu_device_ip_block_add(adev, &sdma_v3_0_ip_block);
if (!amdgpu_sriov_vf(adev)) {
amdgpu_device_ip_block_add(adev, &uvd_v5_0_ip_block);
amdgpu_device_ip_block_add(adev, &vce_v3_0_ip_block);
@@ -1649,6 +1649,8 @@ int vi_set_ip_blocks(struct amdgpu_device *adev)
amdgpu_device_ip_block_add(adev, &vi_common_ip_block);
amdgpu_device_ip_block_add(adev, &gmc_v8_1_ip_block);
amdgpu_device_ip_block_add(adev, &tonga_ih_ip_block);
+ amdgpu_device_ip_block_add(adev, &gfx_v8_0_ip_block);
+ amdgpu_device_ip_block_add(adev, &sdma_v3_1_ip_block);
amdgpu_device_ip_block_add(adev, &pp_smu_ip_block);
if (adev->enable_virtual_display)
amdgpu_device_ip_block_add(adev, &dce_virtual_ip_block);
@@ -1658,8 +1660,6 @@ int vi_set_ip_blocks(struct amdgpu_device *adev)
#endif
else
amdgpu_device_ip_block_add(adev, &dce_v11_2_ip_block);
- amdgpu_device_ip_block_add(adev, &gfx_v8_0_ip_block);
- amdgpu_device_ip_block_add(adev, &sdma_v3_1_ip_block);
amdgpu_device_ip_block_add(adev, &uvd_v6_3_ip_block);
amdgpu_device_ip_block_add(adev, &vce_v3_4_ip_block);
break;
@@ -1667,6 +1667,8 @@ int vi_set_ip_blocks(struct amdgpu_device *adev)
amdgpu_device_ip_block_add(adev, &vi_common_ip_block);
amdgpu_device_ip_block_add(adev, &gmc_v8_0_ip_block);
amdgpu_device_ip_block_add(adev, &cz_ih_ip_block);
+ amdgpu_device_ip_block_add(adev, &gfx_v8_0_ip_block);
+ amdgpu_device_ip_block_add(adev, &sdma_v3_0_ip_block);
amdgpu_device_ip_block_add(adev, &pp_smu_ip_block);
if (adev->enable_virtual_display)
amdgpu_device_ip_block_add(adev, &dce_virtual_ip_block);
@@ -1676,8 +1678,6 @@ int vi_set_ip_blocks(struct amdgpu_device *adev)
#endif
else
amdgpu_device_ip_block_add(adev, &dce_v11_0_ip_block);
- amdgpu_device_ip_block_add(adev, &gfx_v8_0_ip_block);
- amdgpu_device_ip_block_add(adev, &sdma_v3_0_ip_block);
amdgpu_device_ip_block_add(adev, &uvd_v6_0_ip_block);
amdgpu_device_ip_block_add(adev, &vce_v3_1_ip_block);
#if defined(CONFIG_DRM_AMD_ACP)
@@ -1688,6 +1688,8 @@ int vi_set_ip_blocks(struct amdgpu_device *adev)
amdgpu_device_ip_block_add(adev, &vi_common_ip_block);
amdgpu_device_ip_block_add(adev, &gmc_v8_0_ip_block);
amdgpu_device_ip_block_add(adev, &cz_ih_ip_block);
+ amdgpu_device_ip_block_add(adev, &gfx_v8_1_ip_block);
+ amdgpu_device_ip_block_add(adev, &sdma_v3_0_ip_block);
amdgpu_device_ip_block_add(adev, &pp_smu_ip_block);
if (adev->enable_virtual_display)
amdgpu_device_ip_block_add(adev, &dce_virtual_ip_block);
@@ -1697,8 +1699,6 @@ int vi_set_ip_blocks(struct amdgpu_device *adev)
#endif
else
amdgpu_device_ip_block_add(adev, &dce_v11_0_ip_block);
- amdgpu_device_ip_block_add(adev, &gfx_v8_1_ip_block);
- amdgpu_device_ip_block_add(adev, &sdma_v3_0_ip_block);
amdgpu_device_ip_block_add(adev, &uvd_v6_2_ip_block);
amdgpu_device_ip_block_add(adev, &vce_v3_4_ip_block);
#if defined(CONFIG_DRM_AMD_ACP)