From b1bb8c0118b3b9d44a6a31ea5242bdd9ed040a9b Mon Sep 17 00:00:00 2001 From: Tom St Denis Date: Fri, 7 Apr 2017 07:53:31 -0400 Subject: drm/amd/amdgpu: Introduce new read/write macros for SOC15 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Tom St Denis Reviewed-by: Christian König Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu.h | 3 --- 1 file changed, 3 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu.h') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index 6a8129949333..c4f6211640da 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -1704,9 +1704,6 @@ void amdgpu_mm_wdoorbell64(struct amdgpu_device *adev, u32 index, u64 v); #define WREG32_FIELD_OFFSET(reg, offset, field, val) \ WREG32(mm##reg + offset, (RREG32(mm##reg + offset) & ~REG_FIELD_MASK(reg, field)) | (val) << REG_FIELD_SHIFT(reg, field)) -#define WREG32_FIELD15(ip, idx, reg, field, val) \ - WREG32(SOC15_REG_OFFSET(ip, idx, mm##reg), (RREG32(SOC15_REG_OFFSET(ip, idx, mm##reg)) & ~REG_FIELD_MASK(reg, field)) | (val) << REG_FIELD_SHIFT(reg, field)) - /* * BIOS helpers. */ -- cgit From 55ed8caf14fdfecf300d35cfe2e04b670b0191c1 Mon Sep 17 00:00:00 2001 From: Chunming Zhou Date: Fri, 21 Apr 2017 16:40:00 +0800 Subject: drm/amdgpu: increase gtt size to 3GB by default v2 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit v2: address Alex's comment, add AMDGPU_DEFAULT_GTT_SIZE_MB. Signed-off-by: Chunming Zhou Reviewed-by: Christian König Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu.h | 1 + drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c | 3 ++- drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c | 3 ++- drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c | 3 ++- drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c | 3 ++- 5 files changed, 9 insertions(+), 4 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu.h') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index c4f6211640da..a4fc54c70f30 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -110,6 +110,7 @@ extern int amdgpu_pos_buf_per_se; extern int amdgpu_cntl_sb_buf_per_se; extern int amdgpu_param_buf_per_se; +#define AMDGPU_DEFAULT_GTT_SIZE_MB 3072ULL /* 3GB by default */ #define AMDGPU_WAIT_IDLE_TIMEOUT_IN_MS 3000 #define AMDGPU_MAX_USEC_TIMEOUT 100000 /* 100 ms */ #define AMDGPU_FENCE_JIFFIES_TIMEOUT (HZ / 2) diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c index 31c50b2dbbc1..a572979f186c 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c @@ -346,7 +346,8 @@ static int gmc_v6_0_mc_init(struct amdgpu_device *adev) * size equal to the 1024 or vram, whichever is larger. */ if (amdgpu_gart_size == -1) - adev->mc.gtt_size = max((1024ULL << 20), adev->mc.mc_vram_size); + adev->mc.gtt_size = max((AMDGPU_DEFAULT_GTT_SIZE_MB << 20), + adev->mc.mc_vram_size); else adev->mc.gtt_size = (uint64_t)amdgpu_gart_size << 20; diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c index 2217d4d0f895..a9083a16a250 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c @@ -395,7 +395,8 @@ static int gmc_v7_0_mc_init(struct amdgpu_device *adev) * size equal to the 1024 or vram, whichever is larger. */ if (amdgpu_gart_size == -1) - adev->mc.gtt_size = max((1024ULL << 20), adev->mc.mc_vram_size); + adev->mc.gtt_size = max((AMDGPU_DEFAULT_GTT_SIZE_MB << 20), + adev->mc.mc_vram_size); else adev->mc.gtt_size = (uint64_t)amdgpu_gart_size << 20; diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c index e4f5df32f31e..4ac99784160a 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c @@ -557,7 +557,8 @@ static int gmc_v8_0_mc_init(struct amdgpu_device *adev) * size equal to the 1024 or vram, whichever is larger. */ if (amdgpu_gart_size == -1) - adev->mc.gtt_size = max((1024ULL << 20), adev->mc.mc_vram_size); + adev->mc.gtt_size = max((AMDGPU_DEFAULT_GTT_SIZE_MB << 20), + adev->mc.mc_vram_size); else adev->mc.gtt_size = (uint64_t)amdgpu_gart_size << 20; diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c index edf43769ae70..e5d4dfeae3fe 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c @@ -486,7 +486,8 @@ static int gmc_v9_0_mc_init(struct amdgpu_device *adev) * size equal to the 1024 or vram, whichever is larger. */ if (amdgpu_gart_size == -1) - adev->mc.gtt_size = max((1024ULL << 20), adev->mc.mc_vram_size); + adev->mc.gtt_size = max((AMDGPU_DEFAULT_GTT_SIZE_MB << 20), + adev->mc.mc_vram_size); else adev->mc.gtt_size = (uint64_t)amdgpu_gart_size << 20; -- cgit From 408bfe7c3c5d036947b509356f494dc6b46025ff Mon Sep 17 00:00:00 2001 From: Junwei Zhang Date: Thu, 27 Apr 2017 11:12:07 +0800 Subject: drm/amdgpu: export more gpu info for gfx9 v2: 64-bit aligned for gpu info v3: squash in wave_front_fix Signed-off-by: Ken Wang Signed-off-by: Junwei Zhang Reviewed-by: Alex Deucher Reviewed-by: Qiang Yu Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu.h | 4 ++++ drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c | 11 +++++++++++ drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c | 3 +++ include/uapi/drm/amdgpu_drm.h | 19 +++++++++++++++++++ 4 files changed, 37 insertions(+) (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu.h') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index a4fc54c70f30..c9f935710d40 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -967,6 +967,9 @@ struct amdgpu_gfx_config { unsigned mc_arb_ramcfg; unsigned gb_addr_config; unsigned num_rbs; + unsigned gs_vgt_table_depth; + unsigned gs_prim_buffer_depth; + unsigned max_gs_waves_per_vgt; uint32_t tile_mode_array[32]; uint32_t macrotile_mode_array[16]; @@ -981,6 +984,7 @@ struct amdgpu_gfx_config { struct amdgpu_cu_info { uint32_t number; /* total active CU number */ uint32_t ao_cu_mask; + uint32_t wave_front_size; uint32_t bitmap[4][4]; }; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c index 832be632478f..21f616cdb279 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c @@ -546,10 +546,21 @@ static int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file if (amdgpu_ngg) { dev_info.prim_buf_gpu_addr = adev->gfx.ngg.buf[PRIM].gpu_addr; + dev_info.prim_buf_size = adev->gfx.ngg.buf[PRIM].size; dev_info.pos_buf_gpu_addr = adev->gfx.ngg.buf[POS].gpu_addr; + dev_info.pos_buf_size = adev->gfx.ngg.buf[POS].size; dev_info.cntl_sb_buf_gpu_addr = adev->gfx.ngg.buf[CNTL].gpu_addr; + dev_info.cntl_sb_buf_size = adev->gfx.ngg.buf[CNTL].size; dev_info.param_buf_gpu_addr = adev->gfx.ngg.buf[PARAM].gpu_addr; + dev_info.param_buf_size = adev->gfx.ngg.buf[PARAM].size; } + dev_info.wave_front_size = adev->gfx.cu_info.wave_front_size; + dev_info.num_shader_visible_vgprs = adev->gfx.config.max_gprs; + dev_info.num_cu_per_sh = adev->gfx.config.max_cu_per_sh; + dev_info.num_tcc_blocks = adev->gfx.config.max_texture_channel_caches; + dev_info.gs_vgt_table_depth = adev->gfx.config.gs_vgt_table_depth; + dev_info.gs_prim_buffer_depth = adev->gfx.config.gs_prim_buffer_depth; + dev_info.max_gs_waves_per_vgt = adev->gfx.config.max_gs_waves_per_vgt; return copy_to_user(out, &dev_info, min((size_t)size, sizeof(dev_info))) ? -EFAULT : 0; diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c index 9a9cb90e2f5f..210d21c085f2 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c @@ -785,6 +785,9 @@ static void gfx_v9_0_gpu_early_init(struct amdgpu_device *adev) adev->gfx.config.sc_prim_fifo_size_backend = 0x100; adev->gfx.config.sc_hiz_tile_fifo_size = 0x30; adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0; + adev->gfx.config.gs_vgt_table_depth = 32; + adev->gfx.config.gs_prim_buffer_depth = 1792; + adev->gfx.config.max_gs_waves_per_vgt = 32; gb_addr_config = VEGA10_GB_ADDR_CONFIG_GOLDEN; break; default: diff --git a/include/uapi/drm/amdgpu_drm.h b/include/uapi/drm/amdgpu_drm.h index 95260e5043af..6c249e5cfb09 100644 --- a/include/uapi/drm/amdgpu_drm.h +++ b/include/uapi/drm/amdgpu_drm.h @@ -767,6 +767,25 @@ struct drm_amdgpu_info_device { __u64 cntl_sb_buf_gpu_addr; /* NGG Parameter Cache */ __u64 param_buf_gpu_addr; + __u32 prim_buf_size; + __u32 pos_buf_size; + __u32 cntl_sb_buf_size; + __u32 param_buf_size; + /* wavefront size*/ + __u32 wave_front_size; + /* shader visible vgprs*/ + __u32 num_shader_visible_vgprs; + /* CU per shader array*/ + __u32 num_cu_per_sh; + /* number of tcc blocks*/ + __u32 num_tcc_blocks; + /* gs vgt table depth*/ + __u32 gs_vgt_table_depth; + /* gs primitive buffer depth*/ + __u32 gs_prim_buffer_depth; + /* max gs wavefront per vgt*/ + __u32 max_gs_waves_per_vgt; + __u32 _pad1; }; struct drm_amdgpu_info_hw_ip { -- cgit From f47b77b4e4baeb7922b5652ae7040d4d5de684a7 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Tue, 2 May 2017 15:49:36 -0400 Subject: drm/amdgpu/gfx: drop max_gs_waves_per_vgt We already have this info: max_gs_threads. Drop the duplicate. Reviewed-by: Junwei Zhang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu.h | 1 - drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c | 2 +- drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c | 1 - 3 files changed, 1 insertion(+), 3 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu.h') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index c9f935710d40..8033001b913a 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -969,7 +969,6 @@ struct amdgpu_gfx_config { unsigned num_rbs; unsigned gs_vgt_table_depth; unsigned gs_prim_buffer_depth; - unsigned max_gs_waves_per_vgt; uint32_t tile_mode_array[32]; uint32_t macrotile_mode_array[16]; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c index 4f2b6679f72f..1a7830a291d6 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c @@ -560,7 +560,7 @@ static int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file dev_info.num_tcc_blocks = adev->gfx.config.max_texture_channel_caches; dev_info.gs_vgt_table_depth = adev->gfx.config.gs_vgt_table_depth; dev_info.gs_prim_buffer_depth = adev->gfx.config.gs_prim_buffer_depth; - dev_info.max_gs_waves_per_vgt = adev->gfx.config.max_gs_waves_per_vgt; + dev_info.max_gs_waves_per_vgt = adev->gfx.config.max_gs_threads; return copy_to_user(out, &dev_info, min((size_t)size, sizeof(dev_info))) ? -EFAULT : 0; diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c index 5f2ae4cb510f..484ead2a20a4 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c @@ -786,7 +786,6 @@ static void gfx_v9_0_gpu_early_init(struct amdgpu_device *adev) adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0; adev->gfx.config.gs_vgt_table_depth = 32; adev->gfx.config.gs_prim_buffer_depth = 1792; - adev->gfx.config.max_gs_waves_per_vgt = 32; gb_addr_config = VEGA10_GB_ADDR_CONFIG_GOLDEN; break; default: -- cgit From af8baf1518d8b3d086ac6d11d8f6acd57e9cab99 Mon Sep 17 00:00:00 2001 From: Guenter Roeck Date: Wed, 3 May 2017 23:49:18 -0700 Subject: drm/amdgpu: Use less generic enum definitions MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit alpha:allmodconfig fails to build as follows. drivers/gpu/drm/amd/amdgpu/amdgpu.h:1006:2: error: expected identifier before '(' token drivers/gpu/drm/amd/amdgpu/amdgpu.h:1011:28: error: 'NGG_BUF_MAX' undeclared here The problem is not really the enum definition of NGG_BUF_MAX but PARAM, which happens to be defined differently for alpha and a couple of other architectures. Use less generic defines for NGG enums to solve the problem. Fixes: bce23e00f3369 ("drm/amdgpu: add NGG parameters") Cc: Christian König Cc: Alex Deucher Signed-off-by: Guenter Roeck Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu.h | 8 ++++---- drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c | 16 ++++++++-------- drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c | 28 ++++++++++++++-------------- 3 files changed, 26 insertions(+), 26 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu.h') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index 8033001b913a..86923c57908b 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -1004,10 +1004,10 @@ struct amdgpu_ngg_buf { }; enum { - PRIM = 0, - POS, - CNTL, - PARAM, + NGG_PRIM = 0, + NGG_POS, + NGG_CNTL, + NGG_PARAM, NGG_BUF_MAX }; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c index 1a7830a291d6..96c341670782 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c @@ -545,14 +545,14 @@ static int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file adev->gfx.config.double_offchip_lds_buf; if (amdgpu_ngg) { - dev_info.prim_buf_gpu_addr = adev->gfx.ngg.buf[PRIM].gpu_addr; - dev_info.prim_buf_size = adev->gfx.ngg.buf[PRIM].size; - dev_info.pos_buf_gpu_addr = adev->gfx.ngg.buf[POS].gpu_addr; - dev_info.pos_buf_size = adev->gfx.ngg.buf[POS].size; - dev_info.cntl_sb_buf_gpu_addr = adev->gfx.ngg.buf[CNTL].gpu_addr; - dev_info.cntl_sb_buf_size = adev->gfx.ngg.buf[CNTL].size; - dev_info.param_buf_gpu_addr = adev->gfx.ngg.buf[PARAM].gpu_addr; - dev_info.param_buf_size = adev->gfx.ngg.buf[PARAM].size; + dev_info.prim_buf_gpu_addr = adev->gfx.ngg.buf[NGG_PRIM].gpu_addr; + dev_info.prim_buf_size = adev->gfx.ngg.buf[NGG_PRIM].size; + dev_info.pos_buf_gpu_addr = adev->gfx.ngg.buf[NGG_POS].gpu_addr; + dev_info.pos_buf_size = adev->gfx.ngg.buf[NGG_POS].size; + dev_info.cntl_sb_buf_gpu_addr = adev->gfx.ngg.buf[NGG_CNTL].gpu_addr; + dev_info.cntl_sb_buf_size = adev->gfx.ngg.buf[NGG_CNTL].size; + dev_info.param_buf_gpu_addr = adev->gfx.ngg.buf[NGG_PARAM].gpu_addr; + dev_info.param_buf_size = adev->gfx.ngg.buf[NGG_PARAM].size; } dev_info.wave_front_size = adev->gfx.cu_info.wave_front_size; dev_info.num_shader_visible_vgprs = adev->gfx.config.max_gprs; diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c index 741b56f996c4..0c16b7563b73 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c @@ -890,7 +890,7 @@ static int gfx_v9_0_ngg_init(struct amdgpu_device *adev) adev->gfx.ngg.gds_reserve_addr += adev->gds.mem.gfx_partition_size; /* Primitive Buffer */ - r = gfx_v9_0_ngg_create_buf(adev, &adev->gfx.ngg.buf[PRIM], + r = gfx_v9_0_ngg_create_buf(adev, &adev->gfx.ngg.buf[NGG_PRIM], amdgpu_prim_buf_per_se, 64 * 1024); if (r) { @@ -899,7 +899,7 @@ static int gfx_v9_0_ngg_init(struct amdgpu_device *adev) } /* Position Buffer */ - r = gfx_v9_0_ngg_create_buf(adev, &adev->gfx.ngg.buf[POS], + r = gfx_v9_0_ngg_create_buf(adev, &adev->gfx.ngg.buf[NGG_POS], amdgpu_pos_buf_per_se, 256 * 1024); if (r) { @@ -908,7 +908,7 @@ static int gfx_v9_0_ngg_init(struct amdgpu_device *adev) } /* Control Sideband */ - r = gfx_v9_0_ngg_create_buf(adev, &adev->gfx.ngg.buf[CNTL], + r = gfx_v9_0_ngg_create_buf(adev, &adev->gfx.ngg.buf[NGG_CNTL], amdgpu_cntl_sb_buf_per_se, 256); if (r) { @@ -920,7 +920,7 @@ static int gfx_v9_0_ngg_init(struct amdgpu_device *adev) if (amdgpu_param_buf_per_se <= 0) goto out; - r = gfx_v9_0_ngg_create_buf(adev, &adev->gfx.ngg.buf[PARAM], + r = gfx_v9_0_ngg_create_buf(adev, &adev->gfx.ngg.buf[NGG_PARAM], amdgpu_param_buf_per_se, 512 * 1024); if (r) { @@ -949,45 +949,45 @@ static int gfx_v9_0_ngg_en(struct amdgpu_device *adev) /* Program buffer size */ data = 0; - size = adev->gfx.ngg.buf[PRIM].size / 256; + size = adev->gfx.ngg.buf[NGG_PRIM].size / 256; data = REG_SET_FIELD(data, WD_BUF_RESOURCE_1, INDEX_BUF_SIZE, size); - size = adev->gfx.ngg.buf[POS].size / 256; + size = adev->gfx.ngg.buf[NGG_POS].size / 256; data = REG_SET_FIELD(data, WD_BUF_RESOURCE_1, POS_BUF_SIZE, size); WREG32_SOC15(GC, 0, mmWD_BUF_RESOURCE_1, data); data = 0; - size = adev->gfx.ngg.buf[CNTL].size / 256; + size = adev->gfx.ngg.buf[NGG_CNTL].size / 256; data = REG_SET_FIELD(data, WD_BUF_RESOURCE_2, CNTL_SB_BUF_SIZE, size); - size = adev->gfx.ngg.buf[PARAM].size / 1024; + size = adev->gfx.ngg.buf[NGG_PARAM].size / 1024; data = REG_SET_FIELD(data, WD_BUF_RESOURCE_2, PARAM_BUF_SIZE, size); WREG32_SOC15(GC, 0, mmWD_BUF_RESOURCE_2, data); /* Program buffer base address */ - base = lower_32_bits(adev->gfx.ngg.buf[PRIM].gpu_addr); + base = lower_32_bits(adev->gfx.ngg.buf[NGG_PRIM].gpu_addr); data = REG_SET_FIELD(0, WD_INDEX_BUF_BASE, BASE, base); WREG32_SOC15(GC, 0, mmWD_INDEX_BUF_BASE, data); - base = upper_32_bits(adev->gfx.ngg.buf[PRIM].gpu_addr); + base = upper_32_bits(adev->gfx.ngg.buf[NGG_PRIM].gpu_addr); data = REG_SET_FIELD(0, WD_INDEX_BUF_BASE_HI, BASE_HI, base); WREG32_SOC15(GC, 0, mmWD_INDEX_BUF_BASE_HI, data); - base = lower_32_bits(adev->gfx.ngg.buf[POS].gpu_addr); + base = lower_32_bits(adev->gfx.ngg.buf[NGG_POS].gpu_addr); data = REG_SET_FIELD(0, WD_POS_BUF_BASE, BASE, base); WREG32_SOC15(GC, 0, mmWD_POS_BUF_BASE, data); - base = upper_32_bits(adev->gfx.ngg.buf[POS].gpu_addr); + base = upper_32_bits(adev->gfx.ngg.buf[NGG_POS].gpu_addr); data = REG_SET_FIELD(0, WD_POS_BUF_BASE_HI, BASE_HI, base); WREG32_SOC15(GC, 0, mmWD_POS_BUF_BASE_HI, data); - base = lower_32_bits(adev->gfx.ngg.buf[CNTL].gpu_addr); + base = lower_32_bits(adev->gfx.ngg.buf[NGG_CNTL].gpu_addr); data = REG_SET_FIELD(0, WD_CNTL_SB_BUF_BASE, BASE, base); WREG32_SOC15(GC, 0, mmWD_CNTL_SB_BUF_BASE, data); - base = upper_32_bits(adev->gfx.ngg.buf[CNTL].gpu_addr); + base = upper_32_bits(adev->gfx.ngg.buf[NGG_CNTL].gpu_addr); data = REG_SET_FIELD(0, WD_CNTL_SB_BUF_BASE_HI, BASE_HI, base); WREG32_SOC15(GC, 0, mmWD_CNTL_SB_BUF_BASE_HI, data); -- cgit From 30514decb27d45b98599612cb5d3e6a20ba733a5 Mon Sep 17 00:00:00 2001 From: Chunming Zhou Date: Tue, 9 May 2017 13:39:40 +0800 Subject: drm/amdgpu: fix dependency issue MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The problem is that executing the jobs in the right order doesn't give you the right result because consecutive jobs executed on the same engine are pipelined. In other words job B does it buffer read before job A has written it's result. Signed-off-by: Chunming Zhou Reviewed-by: Christian König Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu.h | 1 + drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c | 2 ++ drivers/gpu/drm/amd/amdgpu/amdgpu_job.c | 4 ++++ drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 2 +- drivers/gpu/drm/amd/scheduler/gpu_scheduler.c | 17 +++++++++++++++++ drivers/gpu/drm/amd/scheduler/gpu_scheduler.h | 2 ++ 6 files changed, 27 insertions(+), 1 deletion(-) (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu.h') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index 86923c57908b..833c3c16501a 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -1129,6 +1129,7 @@ struct amdgpu_job { void *owner; uint64_t fence_ctx; /* the fence_context this job uses */ bool vm_needs_flush; + bool need_pipeline_sync; unsigned vm_id; uint64_t vm_pd_addr; uint32_t gds_base, gds_size; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c index 2d11ac8d1aa9..6e4ae0d983c2 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c @@ -160,6 +160,8 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs, dev_err(adev->dev, "scheduling IB failed (%d).\n", r); return r; } + if (ring->funcs->emit_pipeline_sync && job && job->need_pipeline_sync) + amdgpu_ring_emit_pipeline_sync(ring); if (vm) { r = amdgpu_vm_flush(ring, job); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c index c3cfeb335d99..7570f2439a11 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c @@ -57,6 +57,7 @@ int amdgpu_job_alloc(struct amdgpu_device *adev, unsigned num_ibs, (*job)->vm = vm; (*job)->ibs = (void *)&(*job)[1]; (*job)->num_ibs = num_ibs; + (*job)->need_pipeline_sync = false; amdgpu_sync_create(&(*job)->sync); @@ -152,6 +153,9 @@ static struct dma_fence *amdgpu_job_dependency(struct amd_sched_job *sched_job) fence = amdgpu_sync_get_fence(&job->sync); } + if (amd_sched_dependency_optimized(fence, sched_job->s_entity)) + job->need_pipeline_sync = true; + return fence; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c index c42a9979d056..07ff3b1514f1 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c @@ -614,7 +614,7 @@ int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job) if (ring->funcs->init_cond_exec) patch_offset = amdgpu_ring_init_cond_exec(ring); - if (ring->funcs->emit_pipeline_sync) + if (ring->funcs->emit_pipeline_sync && !job->need_pipeline_sync) amdgpu_ring_emit_pipeline_sync(ring); if (ring->funcs->emit_vm_flush && vm_flush_needed) { diff --git a/drivers/gpu/drm/amd/scheduler/gpu_scheduler.c b/drivers/gpu/drm/amd/scheduler/gpu_scheduler.c index 28b92c8d9985..fea96a765cf1 100644 --- a/drivers/gpu/drm/amd/scheduler/gpu_scheduler.c +++ b/drivers/gpu/drm/amd/scheduler/gpu_scheduler.c @@ -236,6 +236,23 @@ static void amd_sched_entity_clear_dep(struct dma_fence *f, struct dma_fence_cb dma_fence_put(f); } +bool amd_sched_dependency_optimized(struct dma_fence* fence, + struct amd_sched_entity *entity) +{ + struct amd_gpu_scheduler *sched = entity->sched; + struct amd_sched_fence *s_fence; + + if (!fence || dma_fence_is_signaled(fence)) + return false; + if (fence->context == entity->fence_context) + return true; + s_fence = to_amd_sched_fence(fence); + if (s_fence && s_fence->sched == sched) + return true; + + return false; +} + static bool amd_sched_entity_add_dependency_cb(struct amd_sched_entity *entity) { struct amd_gpu_scheduler *sched = entity->sched; diff --git a/drivers/gpu/drm/amd/scheduler/gpu_scheduler.h b/drivers/gpu/drm/amd/scheduler/gpu_scheduler.h index 0255c7f8a6d8..924d4a5899e1 100644 --- a/drivers/gpu/drm/amd/scheduler/gpu_scheduler.h +++ b/drivers/gpu/drm/amd/scheduler/gpu_scheduler.h @@ -158,4 +158,6 @@ int amd_sched_job_init(struct amd_sched_job *job, void *owner); void amd_sched_hw_job_reset(struct amd_gpu_scheduler *sched); void amd_sched_job_recovery(struct amd_gpu_scheduler *sched); +bool amd_sched_dependency_optimized(struct dma_fence* fence, + struct amd_sched_entity *entity); #endif -- cgit