From a05502e5cfa9abe17a16592be82c2f5692c91f35 Mon Sep 17 00:00:00 2001 From: Horace Chen Date: Fri, 29 Sep 2017 14:41:57 +0800 Subject: drm/amdgpu: Reserve shared memory on VRAM for SR-IOV MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit SR-IOV need to reserve a piece of shared VRAM at the exact place to exchange data betweem PF and VF. The start address and size of the shared mem are passed to guest through VBIOS structure VRAM_UsageByFirmware. VRAM_UsageByFirmware is a general feature in VBIOS, it indicates that VBIOS need to reserve a piece of memory on the VRAM. Because the mem address is specified. Reserve it early in amdgpu_ttm_init to make sure that it can monoplize the space. Signed-off-by: Horace Chen Reviewed-by: Christian König Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 76 ++++++++++++++++++++++++++++++ 1 file changed, 76 insertions(+) (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_device.c') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index 1949d8aedf49..7b3e3b5461c3 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -657,6 +657,81 @@ void amdgpu_gart_location(struct amdgpu_device *adev, struct amdgpu_mc *mc) mc->gart_size >> 20, mc->gart_start, mc->gart_end); } +/* + * Firmware Reservation functions + */ +/** + * amdgpu_fw_reserve_vram_fini - free fw reserved vram + * + * @adev: amdgpu_device pointer + * + * free fw reserved vram if it has been reserved. + */ +void amdgpu_fw_reserve_vram_fini(struct amdgpu_device *adev) +{ + amdgpu_bo_free_kernel(&adev->fw_vram_usage.reserved_bo, + NULL, &adev->fw_vram_usage.va); +} + +/** + * amdgpu_fw_reserve_vram_init - create bo vram reservation from fw + * + * @adev: amdgpu_device pointer + * + * create bo vram reservation from fw. + */ +int amdgpu_fw_reserve_vram_init(struct amdgpu_device *adev) +{ + int r = 0; + u64 gpu_addr; + u64 vram_size = adev->mc.visible_vram_size; + + adev->fw_vram_usage.va = NULL; + adev->fw_vram_usage.reserved_bo = NULL; + + if (adev->fw_vram_usage.size > 0 && + adev->fw_vram_usage.size <= vram_size) { + + r = amdgpu_bo_create(adev, adev->fw_vram_usage.size, + PAGE_SIZE, true, 0, + AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED | + AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS, NULL, NULL, 0, + &adev->fw_vram_usage.reserved_bo); + if (r) + goto error_create; + + r = amdgpu_bo_reserve(adev->fw_vram_usage.reserved_bo, false); + if (r) + goto error_reserve; + r = amdgpu_bo_pin_restricted(adev->fw_vram_usage.reserved_bo, + AMDGPU_GEM_DOMAIN_VRAM, + adev->fw_vram_usage.start_offset, + (adev->fw_vram_usage.start_offset + + adev->fw_vram_usage.size), &gpu_addr); + if (r) + goto error_pin; + r = amdgpu_bo_kmap(adev->fw_vram_usage.reserved_bo, + &adev->fw_vram_usage.va); + if (r) + goto error_kmap; + + amdgpu_bo_unreserve(adev->fw_vram_usage.reserved_bo); + } + return r; + +error_kmap: + amdgpu_bo_unpin(adev->fw_vram_usage.reserved_bo); +error_pin: + amdgpu_bo_unreserve(adev->fw_vram_usage.reserved_bo); +error_reserve: + amdgpu_bo_unref(&adev->fw_vram_usage.reserved_bo); +error_create: + adev->fw_vram_usage.va = NULL; + adev->fw_vram_usage.reserved_bo = NULL; + return r; +} + + /* * GPU helpers function. */ @@ -2300,6 +2375,7 @@ void amdgpu_device_fini(struct amdgpu_device *adev) /* evict vram memory */ amdgpu_bo_evict_vram(adev); amdgpu_ib_pool_fini(adev); + amdgpu_fw_reserve_vram_fini(adev); amdgpu_fence_driver_fini(adev); amdgpu_fbdev_fini(adev); r = amdgpu_fini(adev); -- cgit From b8866c26ec072f1924f5cf601ebea33ca9823326 Mon Sep 17 00:00:00 2001 From: Andres Rodriguez Date: Fri, 28 Apr 2017 20:05:51 -0400 Subject: drm/amdgpu: implement ring set_priority for gfx_v8 compute v9 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Programming CP_HQD_QUEUE_PRIORITY enables a queue to take priority over other queues on the same pipe. Multiple queues on a pipe are timesliced so this gives us full precedence over other queues. Programming CP_HQD_PIPE_PRIORITY changes the SPI_ARB_PRIORITY of the wave as follows: 0x2: CS_H 0x1: CS_M 0x0: CS_L The SPI block will then dispatch work according to the policy set by SPI_ARB_PRIORITY. In the current policy CS_H is higher priority than gfx. In order to prevent getting stuck in loops of resources bouncing between GFX and high priority compute and introducing further latency, we statically reserve a portion of the pipe. v2: fix srbm_select to ring->queue and use ring->funcs->type v3: use AMD_SCHED_PRIORITY_* instead of AMDGPU_CTX_PRIORITY_* v4: switch int to enum amd_sched_priority v5: corresponding changes for srbm_lock v6: change CU reservation to PIPE_PERCENT allocation v7: use kiq instead of MMIO v8: back to MMIO, and make the implementation sleep safe. v9: corresponding changes for splitting HIGH into _HW/_SW Acked-by: Christian König Signed-off-by: Andres Rodriguez Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu.h | 4 ++ drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 2 + drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c | 99 ++++++++++++++++++++++++++++++ 3 files changed, 105 insertions(+) (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_device.c') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index 5c4bed7778d9..715ce4863bc8 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -1035,6 +1035,10 @@ struct amdgpu_gfx { bool in_suspend; /* NGG */ struct amdgpu_ngg ngg; + + /* pipe reservation */ + struct mutex pipe_reserve_mutex; + DECLARE_BITMAP (pipe_reserve_bitmap, AMDGPU_MAX_COMPUTE_QUEUES); }; int amdgpu_ib_get(struct amdgpu_device *adev, struct amdgpu_vm *vm, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index 7b3e3b5461c3..412ad99d8871 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -2094,6 +2094,7 @@ int amdgpu_device_init(struct amdgpu_device *adev, adev->vm_manager.vm_pte_num_rings = 0; adev->gart.gart_funcs = NULL; adev->fence_context = dma_fence_context_alloc(AMDGPU_MAX_RINGS); + bitmap_zero(adev->gfx.pipe_reserve_bitmap, AMDGPU_MAX_COMPUTE_QUEUES); adev->smc_rreg = &amdgpu_invalid_rreg; adev->smc_wreg = &amdgpu_invalid_wreg; @@ -2122,6 +2123,7 @@ int amdgpu_device_init(struct amdgpu_device *adev, mutex_init(&adev->pm.mutex); mutex_init(&adev->gfx.gpu_clock_mutex); mutex_init(&adev->srbm_mutex); + mutex_init(&adev->gfx.pipe_reserve_mutex); mutex_init(&adev->grbm_idx_mutex); mutex_init(&adev->mn_lock); mutex_init(&adev->virt.vf_errors.lock); diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c index 147e92b3a959..51896b7353b6 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c @@ -6394,6 +6394,104 @@ static void gfx_v8_0_ring_set_wptr_compute(struct amdgpu_ring *ring) WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr)); } +static void gfx_v8_0_ring_set_pipe_percent(struct amdgpu_ring *ring, + bool acquire) +{ + struct amdgpu_device *adev = ring->adev; + int pipe_num, tmp, reg; + int pipe_percent = acquire ? SPI_WCL_PIPE_PERCENT_GFX__VALUE_MASK : 0x1; + + pipe_num = ring->me * adev->gfx.mec.num_pipe_per_mec + ring->pipe; + + /* first me only has 2 entries, GFX and HP3D */ + if (ring->me > 0) + pipe_num -= 2; + + reg = mmSPI_WCL_PIPE_PERCENT_GFX + pipe_num; + tmp = RREG32(reg); + tmp = REG_SET_FIELD(tmp, SPI_WCL_PIPE_PERCENT_GFX, VALUE, pipe_percent); + WREG32(reg, tmp); +} + +static void gfx_v8_0_pipe_reserve_resources(struct amdgpu_device *adev, + struct amdgpu_ring *ring, + bool acquire) +{ + int i, pipe; + bool reserve; + struct amdgpu_ring *iring; + + mutex_lock(&adev->gfx.pipe_reserve_mutex); + pipe = amdgpu_gfx_queue_to_bit(adev, ring->me, ring->pipe, 0); + if (acquire) + set_bit(pipe, adev->gfx.pipe_reserve_bitmap); + else + clear_bit(pipe, adev->gfx.pipe_reserve_bitmap); + + if (!bitmap_weight(adev->gfx.pipe_reserve_bitmap, AMDGPU_MAX_COMPUTE_QUEUES)) { + /* Clear all reservations - everyone reacquires all resources */ + for (i = 0; i < adev->gfx.num_gfx_rings; ++i) + gfx_v8_0_ring_set_pipe_percent(&adev->gfx.gfx_ring[i], + true); + + for (i = 0; i < adev->gfx.num_compute_rings; ++i) + gfx_v8_0_ring_set_pipe_percent(&adev->gfx.compute_ring[i], + true); + } else { + /* Lower all pipes without a current reservation */ + for (i = 0; i < adev->gfx.num_gfx_rings; ++i) { + iring = &adev->gfx.gfx_ring[i]; + pipe = amdgpu_gfx_queue_to_bit(adev, + iring->me, + iring->pipe, + 0); + reserve = test_bit(pipe, adev->gfx.pipe_reserve_bitmap); + gfx_v8_0_ring_set_pipe_percent(iring, reserve); + } + + for (i = 0; i < adev->gfx.num_compute_rings; ++i) { + iring = &adev->gfx.compute_ring[i]; + pipe = amdgpu_gfx_queue_to_bit(adev, + iring->me, + iring->pipe, + 0); + reserve = test_bit(pipe, adev->gfx.pipe_reserve_bitmap); + gfx_v8_0_ring_set_pipe_percent(iring, reserve); + } + } + + mutex_unlock(&adev->gfx.pipe_reserve_mutex); +} + +static void gfx_v8_0_hqd_set_priority(struct amdgpu_device *adev, + struct amdgpu_ring *ring, + bool acquire) +{ + uint32_t pipe_priority = acquire ? 0x2 : 0x0; + uint32_t queue_priority = acquire ? 0xf : 0x0; + + mutex_lock(&adev->srbm_mutex); + vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0); + + WREG32(mmCP_HQD_PIPE_PRIORITY, pipe_priority); + WREG32(mmCP_HQD_QUEUE_PRIORITY, queue_priority); + + vi_srbm_select(adev, 0, 0, 0, 0); + mutex_unlock(&adev->srbm_mutex); +} +static void gfx_v8_0_ring_set_priority_compute(struct amdgpu_ring *ring, + enum amd_sched_priority priority) +{ + struct amdgpu_device *adev = ring->adev; + bool acquire = priority == AMD_SCHED_PRIORITY_HIGH_HW; + + if (ring->funcs->type != AMDGPU_RING_TYPE_COMPUTE) + return; + + gfx_v8_0_hqd_set_priority(adev, ring, acquire); + gfx_v8_0_pipe_reserve_resources(adev, ring, acquire); +} + static void gfx_v8_0_ring_emit_fence_compute(struct amdgpu_ring *ring, u64 addr, u64 seq, unsigned flags) @@ -6839,6 +6937,7 @@ static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_compute = { .test_ib = gfx_v8_0_ring_test_ib, .insert_nop = amdgpu_ring_insert_nop, .pad_ib = amdgpu_ring_generic_pad_ib, + .set_priority = gfx_v8_0_ring_set_priority_compute, }; static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_kiq = { -- cgit From 202f5d6e1a5962e3acb21c50cd9b4e3ab5990dab Mon Sep 17 00:00:00 2001 From: Rex Zhu Date: Mon, 9 Oct 2017 13:50:31 +0800 Subject: drm/amdgpu: refine code delete duplicated error handling in function amdgpu_ucode_init_bo, when failed, it will set load_type to AMDGPU_FW_LOAD_DIRECT. Reviewed-by: Alex Deucher Signed-off-by: Rex Zhu Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_device.c') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index 412ad99d8871..1f793eb301ef 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -1711,8 +1711,8 @@ static int amdgpu_init(struct amdgpu_device *adev) } mutex_lock(&adev->firmware.mutex); - if (amdgpu_ucode_init_bo(adev)) - adev->firmware.load_type = AMDGPU_FW_LOAD_DIRECT; + if (adev->firmware.load_type != AMDGPU_FW_LOAD_DIRECT) + amdgpu_ucode_init_bo(adev); mutex_unlock(&adev->firmware.mutex); for (i = 0; i < adev->num_ip_blocks; i++) { -- cgit From 2dc8f81e4f822cfe8f6475da968ab2dd5881b8d8 Mon Sep 17 00:00:00 2001 From: Horace Chen Date: Mon, 9 Oct 2017 16:17:16 +0800 Subject: drm/amdgpu: SR-IOV data exchange between PF&VF SR-IOV need to exchange some data between PF&VF through shared VRAM PF will copy some necessary firmware and information to the shared VRAM. It also requires some information from VF. PF will send a key through mailbox2 to help guest calculate checksum so that it can verify whether the data is correct. So check the data on the specified offset of the shared VRAM, if the checksum is right, read values from it and write some VF information next to the data from PF. Signed-off-by: Horace Chen Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 3 + drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c | 75 ++++++++++++ drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h | 178 +++++++++++++++++++++++++++++ drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c | 6 + 4 files changed, 262 insertions(+) (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_device.c') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index 1f793eb301ef..9cdaba4af216 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -2300,6 +2300,9 @@ int amdgpu_device_init(struct amdgpu_device *adev, if (r) DRM_ERROR("ib ring test failed (%d).\n", r); + if (amdgpu_sriov_vf(adev)) + amdgpu_virt_init_data_exchange(adev); + amdgpu_fbdev_init(adev); r = amdgpu_pm_sysfs_init(adev); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c index ab05121b9272..ed7be2eb24b0 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c @@ -274,3 +274,78 @@ void amdgpu_virt_free_mm_table(struct amdgpu_device *adev) (void *)&adev->virt.mm_table.cpu_addr); adev->virt.mm_table.gpu_addr = 0; } + + +int amdgpu_virt_fw_reserve_get_checksum(void *obj, + unsigned long obj_size, + unsigned int key, + unsigned int chksum) +{ + unsigned int ret = key; + unsigned long i = 0; + unsigned char *pos; + + pos = (char *)obj; + /* calculate checksum */ + for (i = 0; i < obj_size; ++i) + ret += *(pos + i); + /* minus the chksum itself */ + pos = (char *)&chksum; + for (i = 0; i < sizeof(chksum); ++i) + ret -= *(pos + i); + return ret; +} + +void amdgpu_virt_init_data_exchange(struct amdgpu_device *adev) +{ + uint32_t pf2vf_ver = 0; + uint32_t pf2vf_size = 0; + uint32_t checksum = 0; + uint32_t checkval; + char *str; + + adev->virt.fw_reserve.p_pf2vf = NULL; + adev->virt.fw_reserve.p_vf2pf = NULL; + + if (adev->fw_vram_usage.va != NULL) { + adev->virt.fw_reserve.p_pf2vf = + (struct amdgim_pf2vf_info_header *)( + adev->fw_vram_usage.va + AMDGIM_DATAEXCHANGE_OFFSET); + pf2vf_ver = adev->virt.fw_reserve.p_pf2vf->version; + AMDGPU_FW_VRAM_PF2VF_READ(adev, header.size, &pf2vf_size); + AMDGPU_FW_VRAM_PF2VF_READ(adev, checksum, &checksum); + + /* pf2vf message must be in 4K */ + if (pf2vf_size > 0 && pf2vf_size < 4096) { + checkval = amdgpu_virt_fw_reserve_get_checksum( + adev->virt.fw_reserve.p_pf2vf, pf2vf_size, + adev->virt.fw_reserve.checksum_key, checksum); + if (checkval == checksum) { + adev->virt.fw_reserve.p_vf2pf = + ((void *)adev->virt.fw_reserve.p_pf2vf + + pf2vf_size); + memset((void *)adev->virt.fw_reserve.p_vf2pf, 0, + sizeof(amdgim_vf2pf_info)); + AMDGPU_FW_VRAM_VF2PF_WRITE(adev, header.version, + AMDGPU_FW_VRAM_VF2PF_VER); + AMDGPU_FW_VRAM_VF2PF_WRITE(adev, header.size, + sizeof(amdgim_vf2pf_info)); + AMDGPU_FW_VRAM_VF2PF_READ(adev, driver_version, + &str); + if (THIS_MODULE->version != NULL) + strcpy(str, THIS_MODULE->version); + else + strcpy(str, "N/A"); + AMDGPU_FW_VRAM_VF2PF_WRITE(adev, driver_cert, + 0); + AMDGPU_FW_VRAM_VF2PF_WRITE(adev, checksum, + amdgpu_virt_fw_reserve_get_checksum( + adev->virt.fw_reserve.p_vf2pf, + pf2vf_size, + adev->virt.fw_reserve.checksum_key, 0)); + } + } + } +} + + diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h index e5fd0ff6b29d..b89d37fc406f 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h @@ -58,6 +58,179 @@ struct amdgpu_virt_ops { void (*trans_msg)(struct amdgpu_device *adev, u32 req, u32 data1, u32 data2, u32 data3); }; +/* + * Firmware Reserve Frame buffer + */ +struct amdgpu_virt_fw_reserve { + struct amdgim_pf2vf_info_header *p_pf2vf; + struct amdgim_vf2pf_info_header *p_vf2pf; + unsigned int checksum_key; +}; +/* + * Defination between PF and VF + * Structures forcibly aligned to 4 to keep the same style as PF. + */ +#define AMDGIM_DATAEXCHANGE_OFFSET (64 * 1024) + +#define AMDGIM_GET_STRUCTURE_RESERVED_SIZE(total, u8, u16, u32, u64) \ + (total - (((u8)+3) / 4 + ((u16)+1) / 2 + (u32) + (u64)*2)) + +enum AMDGIM_FEATURE_FLAG { + /* GIM supports feature of Error log collecting */ + AMDGIM_FEATURE_ERROR_LOG_COLLECT = 0x1, + /* GIM supports feature of loading uCodes */ + AMDGIM_FEATURE_GIM_LOAD_UCODES = 0x2, +}; + +struct amdgim_pf2vf_info_header { + /* the total structure size in byte. */ + uint32_t size; + /* version of this structure, written by the GIM */ + uint32_t version; +} __aligned(4); +struct amdgim_pf2vf_info_v1 { + /* header contains size and version */ + struct amdgim_pf2vf_info_header header; + /* max_width * max_height */ + unsigned int uvd_enc_max_pixels_count; + /* 16x16 pixels/sec, codec independent */ + unsigned int uvd_enc_max_bandwidth; + /* max_width * max_height */ + unsigned int vce_enc_max_pixels_count; + /* 16x16 pixels/sec, codec independent */ + unsigned int vce_enc_max_bandwidth; + /* MEC FW position in kb from the start of visible frame buffer */ + unsigned int mecfw_kboffset; + /* The features flags of the GIM driver supports. */ + unsigned int feature_flags; + /* use private key from mailbox 2 to create chueksum */ + unsigned int checksum; +} __aligned(4); + +struct amdgim_pf2vf_info_v2 { + /* header contains size and version */ + struct amdgim_pf2vf_info_header header; + /* use private key from mailbox 2 to create chueksum */ + uint32_t checksum; + /* The features flags of the GIM driver supports. */ + uint32_t feature_flags; + /* max_width * max_height */ + uint32_t uvd_enc_max_pixels_count; + /* 16x16 pixels/sec, codec independent */ + uint32_t uvd_enc_max_bandwidth; + /* max_width * max_height */ + uint32_t vce_enc_max_pixels_count; + /* 16x16 pixels/sec, codec independent */ + uint32_t vce_enc_max_bandwidth; + /* MEC FW position in kb from the start of VF visible frame buffer */ + uint64_t mecfw_kboffset; + /* MEC FW size in KB */ + uint32_t mecfw_ksize; + /* UVD FW position in kb from the start of VF visible frame buffer */ + uint64_t uvdfw_kboffset; + /* UVD FW size in KB */ + uint32_t uvdfw_ksize; + /* VCE FW position in kb from the start of VF visible frame buffer */ + uint64_t vcefw_kboffset; + /* VCE FW size in KB */ + uint32_t vcefw_ksize; + uint32_t reserved[AMDGIM_GET_STRUCTURE_RESERVED_SIZE(256, 0, 0, (9 + sizeof(struct amdgim_pf2vf_info_header)/sizeof(uint32_t)), 3)]; +} __aligned(4); + + +struct amdgim_vf2pf_info_header { + /* the total structure size in byte. */ + uint32_t size; + /*version of this structure, written by the guest */ + uint32_t version; +} __aligned(4); + +struct amdgim_vf2pf_info_v1 { + /* header contains size and version */ + struct amdgim_vf2pf_info_header header; + /* driver version */ + char driver_version[64]; + /* driver certification, 1=WHQL, 0=None */ + unsigned int driver_cert; + /* guest OS type and version: need a define */ + unsigned int os_info; + /* in the unit of 1M */ + unsigned int fb_usage; + /* guest gfx engine usage percentage */ + unsigned int gfx_usage; + /* guest gfx engine health percentage */ + unsigned int gfx_health; + /* guest compute engine usage percentage */ + unsigned int compute_usage; + /* guest compute engine health percentage */ + unsigned int compute_health; + /* guest vce engine usage percentage. 0xffff means N/A. */ + unsigned int vce_enc_usage; + /* guest vce engine health percentage. 0xffff means N/A. */ + unsigned int vce_enc_health; + /* guest uvd engine usage percentage. 0xffff means N/A. */ + unsigned int uvd_enc_usage; + /* guest uvd engine usage percentage. 0xffff means N/A. */ + unsigned int uvd_enc_health; + unsigned int checksum; +} __aligned(4); + +struct amdgim_vf2pf_info_v2 { + /* header contains size and version */ + struct amdgim_vf2pf_info_header header; + uint32_t checksum; + /* driver version */ + uint8_t driver_version[64]; + /* driver certification, 1=WHQL, 0=None */ + uint32_t driver_cert; + /* guest OS type and version: need a define */ + uint32_t os_info; + /* in the unit of 1M */ + uint32_t fb_usage; + /* guest gfx engine usage percentage */ + uint32_t gfx_usage; + /* guest gfx engine health percentage */ + uint32_t gfx_health; + /* guest compute engine usage percentage */ + uint32_t compute_usage; + /* guest compute engine health percentage */ + uint32_t compute_health; + /* guest vce engine usage percentage. 0xffff means N/A. */ + uint32_t vce_enc_usage; + /* guest vce engine health percentage. 0xffff means N/A. */ + uint32_t vce_enc_health; + /* guest uvd engine usage percentage. 0xffff means N/A. */ + uint32_t uvd_enc_usage; + /* guest uvd engine usage percentage. 0xffff means N/A. */ + uint32_t uvd_enc_health; + uint32_t reserved[AMDGIM_GET_STRUCTURE_RESERVED_SIZE(256, 64, 0, (12 + sizeof(struct amdgim_vf2pf_info_header)/sizeof(uint32_t)), 0)]; +} __aligned(4); + +#define AMDGPU_FW_VRAM_VF2PF_VER 2 +typedef struct amdgim_vf2pf_info_v2 amdgim_vf2pf_info ; + +#define AMDGPU_FW_VRAM_VF2PF_WRITE(adev, field, val) \ + do { \ + ((amdgim_vf2pf_info *)adev->virt.fw_reserve.p_vf2pf)->field = (val); \ + } while (0) + +#define AMDGPU_FW_VRAM_VF2PF_READ(adev, field, val) \ + do { \ + (*val) = ((amdgim_vf2pf_info *)adev->virt.fw_reserve.p_vf2pf)->field; \ + } while (0) + +#define AMDGPU_FW_VRAM_PF2VF_READ(adev, field, val) \ + do { \ + if (!adev->virt.fw_reserve.p_pf2vf) \ + *(val) = 0; \ + else { \ + if (adev->virt.fw_reserve.p_pf2vf->version == 1) \ + *(val) = ((struct amdgim_pf2vf_info_v1 *)adev->virt.fw_reserve.p_pf2vf)->field; \ + if (adev->virt.fw_reserve.p_pf2vf->version == 2) \ + *(val) = ((struct amdgim_pf2vf_info_v2 *)adev->virt.fw_reserve.p_pf2vf)->field; \ + } \ + } while (0) + /* GPU virtualization */ struct amdgpu_virt { uint32_t caps; @@ -72,6 +245,7 @@ struct amdgpu_virt { struct amdgpu_mm_table mm_table; const struct amdgpu_virt_ops *ops; struct amdgpu_vf_error_buffer vf_errors; + struct amdgpu_virt_fw_reserve fw_reserve; }; #define AMDGPU_CSA_SIZE (8 * 1024) @@ -114,5 +288,9 @@ int amdgpu_virt_reset_gpu(struct amdgpu_device *adev); int amdgpu_sriov_gpu_reset(struct amdgpu_device *adev, struct amdgpu_job *job); int amdgpu_virt_alloc_mm_table(struct amdgpu_device *adev); void amdgpu_virt_free_mm_table(struct amdgpu_device *adev); +int amdgpu_virt_fw_reserve_get_checksum(void *obj, unsigned long obj_size, + unsigned int key, + unsigned int chksum); +void amdgpu_virt_init_data_exchange(struct amdgpu_device *adev); #endif diff --git a/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c b/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c index 2812d88a8bdd..b4906d2f30d3 100644 --- a/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c +++ b/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c @@ -183,6 +183,12 @@ static int xgpu_ai_send_access_requests(struct amdgpu_device *adev, pr_err("Doesn't get READY_TO_ACCESS_GPU from pf, give up\n"); return r; } + /* Retrieve checksum from mailbox2 */ + if (req == IDH_REQ_GPU_INIT_ACCESS) { + adev->virt.fw_reserve.checksum_key = + RREG32_NO_KIQ(SOC15_REG_OFFSET(NBIO, 0, + mmBIF_BX_PF0_MAILBOX_MSGBUF_RCV_DW2)); + } } return 0; -- cgit From 6e13bdf6b2d71ab2366a9f87c99d11963aed3bad Mon Sep 17 00:00:00 2001 From: Rex Zhu Date: Wed, 18 Oct 2017 17:19:42 +0800 Subject: drm/amdgpu: fix regresstion on SR-IOV gpu reset failed fw ucode is corrupted after vf flr by PSP so ucode_init() is a must in psp_hw_init othewise KIQ/KCQ enabling will fail Revert "drm/amdgpu: refine code delete duplicated error handling" This reverts commit e57b87ff828f95efe992468e6d18c2c059b27aa9. Revert "drm/amdgpu: move amdgpu_ucode_init_bo to amdgpu_device.c" This reverts commit 815b8f8595148d06a64d2ce4282e8e80dfcb02f1. Reviewed-by: Monk Liu Signed-off-by: Rex Zhu Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 8 -------- drivers/gpu/drm/amd/amdgpu/amdgpu_powerplay.c | 5 +++++ drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c | 9 +++++++++ 3 files changed, 14 insertions(+), 8 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_device.c') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index 9cdaba4af216..0731b4f9b25c 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -1679,7 +1679,6 @@ static int amdgpu_init(struct amdgpu_device *adev) return r; } adev->ip_blocks[i].status.sw = true; - /* need to do gmc hw init early so we can allocate gpu mem */ if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC) { r = amdgpu_vram_scratch_init(adev); @@ -1710,11 +1709,6 @@ static int amdgpu_init(struct amdgpu_device *adev) } } - mutex_lock(&adev->firmware.mutex); - if (adev->firmware.load_type != AMDGPU_FW_LOAD_DIRECT) - amdgpu_ucode_init_bo(adev); - mutex_unlock(&adev->firmware.mutex); - for (i = 0; i < adev->num_ip_blocks; i++) { if (!adev->ip_blocks[i].status.sw) continue; @@ -1850,8 +1844,6 @@ static int amdgpu_fini(struct amdgpu_device *adev) adev->ip_blocks[i].status.hw = false; } - if (adev->firmware.load_type != AMDGPU_FW_LOAD_DIRECT) - amdgpu_ucode_fini_bo(adev); for (i = adev->num_ip_blocks - 1; i >= 0; i--) { if (!adev->ip_blocks[i].status.sw) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_powerplay.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_powerplay.c index 3b42f407971d..5f5aa5fddc16 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_powerplay.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_powerplay.c @@ -145,6 +145,8 @@ static int amdgpu_pp_hw_init(void *handle) int ret = 0; struct amdgpu_device *adev = (struct amdgpu_device *)handle; + if (adev->firmware.load_type == AMDGPU_FW_LOAD_SMU) + amdgpu_ucode_init_bo(adev); if (adev->powerplay.ip_funcs->hw_init) ret = adev->powerplay.ip_funcs->hw_init( @@ -162,6 +164,9 @@ static int amdgpu_pp_hw_fini(void *handle) ret = adev->powerplay.ip_funcs->hw_fini( adev->powerplay.pp_handle); + if (adev->firmware.load_type == AMDGPU_FW_LOAD_SMU) + amdgpu_ucode_fini_bo(adev); + return ret; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c index f1035a689d35..447d446b5015 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c @@ -411,6 +411,13 @@ static int psp_hw_init(void *handle) return 0; mutex_lock(&adev->firmware.mutex); + /* + * This sequence is just used on hw_init only once, no need on + * resume. + */ + ret = amdgpu_ucode_init_bo(adev); + if (ret) + goto failed; ret = psp_load_fw(adev); if (ret) { @@ -435,6 +442,8 @@ static int psp_hw_fini(void *handle) if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) return 0; + amdgpu_ucode_fini_bo(adev); + psp_ring_destroy(psp, PSP_RING_TYPE__KM); amdgpu_bo_free_kernel(&psp->tmr_bo, &psp->tmr_mc_addr, &psp->tmr_buf); -- cgit From f993d628a2d3cb5e0a82a5284b24cef745f42b41 Mon Sep 17 00:00:00 2001 From: Monk Liu Date: Mon, 16 Oct 2017 19:46:01 +0800 Subject: drm/amdgpu:don't check soft_reset for sriov MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Monk Liu Ack-by: Christian König Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_device.c') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index 0731b4f9b25c..3a6ce6386ad0 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -2625,6 +2625,9 @@ static bool amdgpu_check_soft_reset(struct amdgpu_device *adev) int i; bool asic_hang = false; + if (amdgpu_sriov_vf(adev)) + return true; + for (i = 0; i < adev->num_ip_blocks; i++) { if (!adev->ip_blocks[i].status.valid) continue; -- cgit From 43ca8efa46d9b1c4defa1b27c4dd1ef3866aaad9 Mon Sep 17 00:00:00 2001 From: pding Date: Fri, 13 Oct 2017 15:38:35 +0800 Subject: drm/amdgpu: busywait KIQ register accessing (v4) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Register accessing is performed when IRQ is disabled. Never sleep in this function. Known issue: dead sleep in many use cases of index/data registers. v2: - wrap polling fence functions. - don't trigger IRQ for polling in case of wrongly fence signal. v3: - handle wrap round gracefully. - add comments for polling function v4: - don't return negative timeout confused with error code Signed-off-by: pding Reviewed-by: Christian König Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu.h | 2 +- drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 8 ++--- drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c | 50 ++++++++++++++++++++++++++++++ drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c | 2 +- drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h | 4 +++ drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c | 30 ++++++++---------- 6 files changed, 71 insertions(+), 25 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_device.c') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index b8ba1f5ae5e7..cbcb6a153aba 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -879,7 +879,7 @@ struct amdgpu_mec { struct amdgpu_kiq { u64 eop_gpu_addr; struct amdgpu_bo *eop_obj; - struct mutex ring_mutex; + spinlock_t ring_lock; struct amdgpu_ring ring; struct amdgpu_irq_src irq; }; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index 3a6ce6386ad0..0b9332e65a4c 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -109,10 +109,8 @@ uint32_t amdgpu_mm_rreg(struct amdgpu_device *adev, uint32_t reg, { uint32_t ret; - if (!(acc_flags & AMDGPU_REGS_NO_KIQ) && amdgpu_sriov_runtime(adev)) { - BUG_ON(in_interrupt()); + if (!(acc_flags & AMDGPU_REGS_NO_KIQ) && amdgpu_sriov_runtime(adev)) return amdgpu_virt_kiq_rreg(adev, reg); - } if ((reg * 4) < adev->rmmio_size && !(acc_flags & AMDGPU_REGS_IDX)) ret = readl(((void __iomem *)adev->rmmio) + (reg * 4)); @@ -137,10 +135,8 @@ void amdgpu_mm_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v, adev->last_mm_index = v; } - if (!(acc_flags & AMDGPU_REGS_NO_KIQ) && amdgpu_sriov_runtime(adev)) { - BUG_ON(in_interrupt()); + if (!(acc_flags & AMDGPU_REGS_NO_KIQ) && amdgpu_sriov_runtime(adev)) return amdgpu_virt_kiq_wreg(adev, reg, v); - } if ((reg * 4) < adev->rmmio_size && !(acc_flags & AMDGPU_REGS_IDX)) writel(v, ((void __iomem *)adev->rmmio) + (reg * 4)); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c index 7bdedd788f5a..fb9f88ef6059 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c @@ -168,6 +168,32 @@ int amdgpu_fence_emit(struct amdgpu_ring *ring, struct dma_fence **f) return 0; } +/** + * amdgpu_fence_emit_polling - emit a fence on the requeste ring + * + * @ring: ring the fence is associated with + * @s: resulting sequence number + * + * Emits a fence command on the requested ring (all asics). + * Used For polling fence. + * Returns 0 on success, -ENOMEM on failure. + */ +int amdgpu_fence_emit_polling(struct amdgpu_ring *ring, uint32_t *s) +{ + uint32_t seq; + + if (!s) + return -EINVAL; + + seq = ++ring->fence_drv.sync_seq; + amdgpu_ring_emit_fence(ring, ring->fence_drv.gpu_addr, + seq, AMDGPU_FENCE_FLAG_INT); + + *s = seq; + + return 0; +} + /** * amdgpu_fence_schedule_fallback - schedule fallback check * @@ -281,6 +307,30 @@ int amdgpu_fence_wait_empty(struct amdgpu_ring *ring) return r; } +/** + * amdgpu_fence_wait_polling - busy wait for givn sequence number + * + * @ring: ring index the fence is associated with + * @wait_seq: sequence number to wait + * @timeout: the timeout for waiting in usecs + * + * Wait for all fences on the requested ring to signal (all asics). + * Returns left time if no timeout, 0 or minus if timeout. + */ +signed long amdgpu_fence_wait_polling(struct amdgpu_ring *ring, + uint32_t wait_seq, + signed long timeout) +{ + uint32_t seq; + + do { + seq = amdgpu_fence_read(ring); + udelay(5); + timeout -= 5; + } while ((int32_t)(wait_seq - seq) > 0 && timeout > 0); + + return timeout > 0 ? timeout : 0; +} /** * amdgpu_fence_count_emitted - get the count of emitted fences * diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c index 83435ccbad44..ef043361009f 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c @@ -201,7 +201,7 @@ int amdgpu_gfx_kiq_init_ring(struct amdgpu_device *adev, struct amdgpu_kiq *kiq = &adev->gfx.kiq; int r = 0; - mutex_init(&kiq->ring_mutex); + spin_lock_init(&kiq->ring_lock); r = amdgpu_wb_get(adev, &adev->virt.reg_val_offs); if (r) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h index 0d9ce141404c..b18c2b96691f 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h @@ -90,8 +90,12 @@ int amdgpu_fence_driver_start_ring(struct amdgpu_ring *ring, void amdgpu_fence_driver_suspend(struct amdgpu_device *adev); void amdgpu_fence_driver_resume(struct amdgpu_device *adev); int amdgpu_fence_emit(struct amdgpu_ring *ring, struct dma_fence **fence); +int amdgpu_fence_emit_polling(struct amdgpu_ring *ring, uint32_t *s); void amdgpu_fence_process(struct amdgpu_ring *ring); int amdgpu_fence_wait_empty(struct amdgpu_ring *ring); +signed long amdgpu_fence_wait_polling(struct amdgpu_ring *ring, + uint32_t wait_seq, + signed long timeout); unsigned amdgpu_fence_count_emitted(struct amdgpu_ring *ring); /* diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c index ed7be2eb24b0..e97f80f86005 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c @@ -22,7 +22,7 @@ */ #include "amdgpu.h" -#define MAX_KIQ_REG_WAIT 100000 +#define MAX_KIQ_REG_WAIT 100000000 /* in usecs */ int amdgpu_allocate_static_csa(struct amdgpu_device *adev) { @@ -114,27 +114,24 @@ void amdgpu_virt_init_setting(struct amdgpu_device *adev) uint32_t amdgpu_virt_kiq_rreg(struct amdgpu_device *adev, uint32_t reg) { signed long r; - uint32_t val; - struct dma_fence *f; + uint32_t val, seq; struct amdgpu_kiq *kiq = &adev->gfx.kiq; struct amdgpu_ring *ring = &kiq->ring; BUG_ON(!ring->funcs->emit_rreg); - mutex_lock(&kiq->ring_mutex); + spin_lock(&kiq->ring_lock); amdgpu_ring_alloc(ring, 32); amdgpu_ring_emit_rreg(ring, reg); - amdgpu_fence_emit(ring, &f); + amdgpu_fence_emit_polling(ring, &seq); amdgpu_ring_commit(ring); - mutex_unlock(&kiq->ring_mutex); + spin_unlock(&kiq->ring_lock); - r = dma_fence_wait_timeout(f, false, msecs_to_jiffies(MAX_KIQ_REG_WAIT)); - dma_fence_put(f); + r = amdgpu_fence_wait_polling(ring, seq, MAX_KIQ_REG_WAIT); if (r < 1) { - DRM_ERROR("wait for kiq fence error: %ld.\n", r); + DRM_ERROR("wait for kiq fence error: %ld\n", r); return ~0; } - val = adev->wb.wb[adev->virt.reg_val_offs]; return val; @@ -143,23 +140,22 @@ uint32_t amdgpu_virt_kiq_rreg(struct amdgpu_device *adev, uint32_t reg) void amdgpu_virt_kiq_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v) { signed long r; - struct dma_fence *f; + uint32_t seq; struct amdgpu_kiq *kiq = &adev->gfx.kiq; struct amdgpu_ring *ring = &kiq->ring; BUG_ON(!ring->funcs->emit_wreg); - mutex_lock(&kiq->ring_mutex); + spin_lock(&kiq->ring_lock); amdgpu_ring_alloc(ring, 32); amdgpu_ring_emit_wreg(ring, reg, v); - amdgpu_fence_emit(ring, &f); + amdgpu_fence_emit_polling(ring, &seq); amdgpu_ring_commit(ring); - mutex_unlock(&kiq->ring_mutex); + spin_unlock(&kiq->ring_lock); - r = dma_fence_wait_timeout(f, false, msecs_to_jiffies(MAX_KIQ_REG_WAIT)); + r = amdgpu_fence_wait_polling(ring, seq, MAX_KIQ_REG_WAIT); if (r < 1) - DRM_ERROR("wait for kiq fence error: %ld.\n", r); - dma_fence_put(f); + DRM_ERROR("wait for kiq fence error: %ld\n", r); } /** -- cgit