From db7b81545f5abdfd1f13b7f0a3f995994701cf92 Mon Sep 17 00:00:00 2001 From: zhanglianjie Date: Sat, 29 Jan 2022 15:35:23 +0800 Subject: drm/amd/amdgpu/amdgpu_uvd: Fix forgotten unmap buffer object After the buffer object is successfully mapped, call amdgpu_bo_kunmap before the function returns. Signed-off-by: zhanglianjie Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c index 6f8de11a17f1..9cc23b220537 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c @@ -834,6 +834,7 @@ static int amdgpu_uvd_cs_msg(struct amdgpu_uvd_cs_ctx *ctx, handle = msg[2]; if (handle == 0) { + amdgpu_bo_kunmap(bo); DRM_ERROR("Invalid UVD handle!\n"); return -EINVAL; } @@ -892,6 +893,7 @@ static int amdgpu_uvd_cs_msg(struct amdgpu_uvd_cs_ctx *ctx, DRM_ERROR("Illegal UVD message type (%d)!\n", msg_type); } + amdgpu_bo_kunmap(bo); return -EINVAL; } -- cgit From a190f8dc4aaf6064527bb81c07f7cff1904dc927 Mon Sep 17 00:00:00 2001 From: Christian König Date: Mon, 21 Feb 2022 13:51:17 +0100 Subject: drm/amdgpu: header cleanup MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit No function change, just move a bunch of definitions from amdgpu.h into separate header files. Signed-off-by: Christian König Acked-by: Andrey Grodzovsky Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu.h | 95 ------------------------ drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 1 + drivers/gpu/drm/amd/amdgpu/amdgpu_cs.h | 93 +++++++++++++++++++++++ drivers/gpu/drm/amd/amdgpu/amdgpu_job.h | 3 + drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h | 35 +++++++-- drivers/gpu/drm/amd/amdgpu/amdgpu_trace_points.c | 1 + drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c | 1 + drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c | 1 + drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c | 1 + drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c | 1 + 10 files changed, 132 insertions(+), 100 deletions(-) create mode 100644 drivers/gpu/drm/amd/amdgpu/amdgpu_cs.h (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index bfb7ce79054a..cdf0818088b3 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -60,7 +60,6 @@ #include #include #include -#include #include #include "dm_pp_interface.h" @@ -277,9 +276,6 @@ extern int amdgpu_vcnfw_log; #define AMDGPU_SMARTSHIFT_MIN_BIAS (-100) struct amdgpu_device; -struct amdgpu_ib; -struct amdgpu_cs_parser; -struct amdgpu_job; struct amdgpu_irq_src; struct amdgpu_fpriv; struct amdgpu_bo_va_mapping; @@ -467,20 +463,6 @@ struct amdgpu_flip_work { }; -/* - * CP & rings. - */ - -struct amdgpu_ib { - struct amdgpu_sa_bo *sa_bo; - uint32_t length_dw; - uint64_t gpu_addr; - uint32_t *ptr; - uint32_t flags; -}; - -extern const struct drm_sched_backend_ops amdgpu_sched_ops; - /* * file private structure */ @@ -496,79 +478,6 @@ struct amdgpu_fpriv { int amdgpu_file_to_fpriv(struct file *filp, struct amdgpu_fpriv **fpriv); -int amdgpu_ib_get(struct amdgpu_device *adev, struct amdgpu_vm *vm, - unsigned size, - enum amdgpu_ib_pool_type pool, - struct amdgpu_ib *ib); -void amdgpu_ib_free(struct amdgpu_device *adev, struct amdgpu_ib *ib, - struct dma_fence *f); -int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs, - struct amdgpu_ib *ibs, struct amdgpu_job *job, - struct dma_fence **f); -int amdgpu_ib_pool_init(struct amdgpu_device *adev); -void amdgpu_ib_pool_fini(struct amdgpu_device *adev); -int amdgpu_ib_ring_tests(struct amdgpu_device *adev); - -/* - * CS. - */ -struct amdgpu_cs_chunk { - uint32_t chunk_id; - uint32_t length_dw; - void *kdata; -}; - -struct amdgpu_cs_post_dep { - struct drm_syncobj *syncobj; - struct dma_fence_chain *chain; - u64 point; -}; - -struct amdgpu_cs_parser { - struct amdgpu_device *adev; - struct drm_file *filp; - struct amdgpu_ctx *ctx; - - /* chunks */ - unsigned nchunks; - struct amdgpu_cs_chunk *chunks; - - /* scheduler job object */ - struct amdgpu_job *job; - struct drm_sched_entity *entity; - - /* buffer objects */ - struct ww_acquire_ctx ticket; - struct amdgpu_bo_list *bo_list; - struct amdgpu_mn *mn; - struct amdgpu_bo_list_entry vm_pd; - struct list_head validated; - struct dma_fence *fence; - uint64_t bytes_moved_threshold; - uint64_t bytes_moved_vis_threshold; - uint64_t bytes_moved; - uint64_t bytes_moved_vis; - - /* user fence */ - struct amdgpu_bo_list_entry uf_entry; - - unsigned num_post_deps; - struct amdgpu_cs_post_dep *post_deps; -}; - -static inline u32 amdgpu_get_ib_value(struct amdgpu_cs_parser *p, - uint32_t ib_idx, int idx) -{ - return p->job->ibs[ib_idx].ptr[idx]; -} - -static inline void amdgpu_set_ib_value(struct amdgpu_cs_parser *p, - uint32_t ib_idx, int idx, - uint32_t value) -{ - p->job->ibs[ib_idx].ptr[idx] = value; -} - /* * Writeback */ @@ -1439,10 +1348,6 @@ static inline bool amdgpu_acpi_is_s0ix_active(struct amdgpu_device *adev) { retu static inline bool amdgpu_acpi_is_s3_active(struct amdgpu_device *adev) { return false; } #endif -int amdgpu_cs_find_mapping(struct amdgpu_cs_parser *parser, - uint64_t addr, struct amdgpu_bo **bo, - struct amdgpu_bo_va_mapping **mapping); - #if defined(CONFIG_DRM_AMD_DC) int amdgpu_dm_display_resume(struct amdgpu_device *adev ); #else diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c index e762e45b7b85..bbeabca21d95 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c @@ -32,6 +32,7 @@ #include #include +#include "amdgpu_cs.h" #include "amdgpu.h" #include "amdgpu_trace.h" #include "amdgpu_gmc.h" diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.h new file mode 100644 index 000000000000..92d07816743e --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.h @@ -0,0 +1,93 @@ +/* + * Copyright 2022 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ +#ifndef __AMDGPU_CS_H__ +#define __AMDGPU_CS_H__ + +#include "amdgpu_job.h" +#include "amdgpu_bo_list.h" +#include "amdgpu_ring.h" + +struct amdgpu_bo_va_mapping; + +struct amdgpu_cs_chunk { + uint32_t chunk_id; + uint32_t length_dw; + void *kdata; +}; + +struct amdgpu_cs_post_dep { + struct drm_syncobj *syncobj; + struct dma_fence_chain *chain; + u64 point; +}; + +struct amdgpu_cs_parser { + struct amdgpu_device *adev; + struct drm_file *filp; + struct amdgpu_ctx *ctx; + + /* chunks */ + unsigned nchunks; + struct amdgpu_cs_chunk *chunks; + + /* scheduler job object */ + struct amdgpu_job *job; + struct drm_sched_entity *entity; + + /* buffer objects */ + struct ww_acquire_ctx ticket; + struct amdgpu_bo_list *bo_list; + struct amdgpu_mn *mn; + struct amdgpu_bo_list_entry vm_pd; + struct list_head validated; + struct dma_fence *fence; + uint64_t bytes_moved_threshold; + uint64_t bytes_moved_vis_threshold; + uint64_t bytes_moved; + uint64_t bytes_moved_vis; + + /* user fence */ + struct amdgpu_bo_list_entry uf_entry; + + unsigned num_post_deps; + struct amdgpu_cs_post_dep *post_deps; +}; + +static inline u32 amdgpu_get_ib_value(struct amdgpu_cs_parser *p, + uint32_t ib_idx, int idx) +{ + return p->job->ibs[ib_idx].ptr[idx]; +} + +static inline void amdgpu_set_ib_value(struct amdgpu_cs_parser *p, + uint32_t ib_idx, int idx, + uint32_t value) +{ + p->job->ibs[ib_idx].ptr[idx] = value; +} + +int amdgpu_cs_find_mapping(struct amdgpu_cs_parser *parser, + uint64_t addr, struct amdgpu_bo **bo, + struct amdgpu_bo_va_mapping **mapping); + +#endif diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.h index 9e65730193b8..6d704772ff42 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.h @@ -23,6 +23,9 @@ #ifndef __AMDGPU_JOB_H__ #define __AMDGPU_JOB_H__ +#include +#include "amdgpu_sync.h" + /* bit set means command submit involves a preamble IB */ #define AMDGPU_PREAMBLE_IB_PRESENT (1 << 0) /* bit set means preamble IB is first presented in belonging context */ diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h index 48365da213dc..05e789fc7a9e 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h @@ -28,6 +28,13 @@ #include #include +struct amdgpu_device; +struct amdgpu_ring; +struct amdgpu_ib; +struct amdgpu_cs_parser; +struct amdgpu_job; +struct amdgpu_vm; + /* max number of rings */ #define AMDGPU_MAX_RINGS 28 #define AMDGPU_MAX_HWIP_RINGS 8 @@ -82,11 +89,13 @@ enum amdgpu_ib_pool_type { AMDGPU_IB_POOL_MAX }; -struct amdgpu_device; -struct amdgpu_ring; -struct amdgpu_ib; -struct amdgpu_cs_parser; -struct amdgpu_job; +struct amdgpu_ib { + struct amdgpu_sa_bo *sa_bo; + uint32_t length_dw; + uint64_t gpu_addr; + uint32_t *ptr; + uint32_t flags; +}; struct amdgpu_sched { u32 num_scheds; @@ -111,6 +120,8 @@ struct amdgpu_fence_driver { struct dma_fence **fences; }; +extern const struct drm_sched_backend_ops amdgpu_sched_ops; + void amdgpu_fence_driver_clear_job_fences(struct amdgpu_ring *ring); void amdgpu_fence_driver_force_completion(struct amdgpu_ring *ring); @@ -352,4 +363,18 @@ int amdgpu_ring_test_helper(struct amdgpu_ring *ring); void amdgpu_debugfs_ring_init(struct amdgpu_device *adev, struct amdgpu_ring *ring); + +int amdgpu_ib_get(struct amdgpu_device *adev, struct amdgpu_vm *vm, + unsigned size, + enum amdgpu_ib_pool_type pool, + struct amdgpu_ib *ib); +void amdgpu_ib_free(struct amdgpu_device *adev, struct amdgpu_ib *ib, + struct dma_fence *f); +int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs, + struct amdgpu_ib *ibs, struct amdgpu_job *job, + struct dma_fence **f); +int amdgpu_ib_pool_init(struct amdgpu_device *adev); +void amdgpu_ib_pool_fini(struct amdgpu_device *adev); +int amdgpu_ib_ring_tests(struct amdgpu_device *adev); + #endif diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_trace_points.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_trace_points.c index 57c6c39ba064..b96d885f6e33 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_trace_points.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_trace_points.c @@ -23,6 +23,7 @@ */ #include +#include "amdgpu_cs.h" #include "amdgpu.h" #define CREATE_TRACE_POINTS diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c index 9cc23b220537..14833615ee91 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c @@ -37,6 +37,7 @@ #include "amdgpu.h" #include "amdgpu_pm.h" #include "amdgpu_uvd.h" +#include "amdgpu_cs.h" #include "cikd.h" #include "uvd/uvd_4_2_d.h" diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c index 344f711ad144..6179230b6c6e 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c @@ -34,6 +34,7 @@ #include "amdgpu.h" #include "amdgpu_pm.h" #include "amdgpu_vce.h" +#include "amdgpu_cs.h" #include "cikd.h" /* 1 second timeout */ diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c b/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c index b483f03b4591..7afa660e341c 100644 --- a/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c +++ b/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c @@ -25,6 +25,7 @@ #include "amdgpu.h" #include "amdgpu_uvd.h" +#include "amdgpu_cs.h" #include "soc15.h" #include "soc15d.h" #include "soc15_common.h" diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c b/drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c index 42f943d2d6ce..d09474a22201 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c @@ -25,6 +25,7 @@ #include "amdgpu.h" #include "amdgpu_vcn.h" #include "amdgpu_pm.h" +#include "amdgpu_cs.h" #include "soc15.h" #include "soc15d.h" #include "vcn_v2_0.h" -- cgit From cdc7893fc93f1969038ed333b33eac1452c8d255 Mon Sep 17 00:00:00 2001 From: Christian König Date: Tue, 1 Mar 2022 09:51:58 +0100 Subject: drm/amdgpu: use job and ib structures directly in CS parsers MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Instead of providing the ib index provide the job and ib pointers directly to the patch and parse functions for UVD and VCE. Also move the set/get functions for IB values to the IB declerations. Signed-off-by: Christian König Acked-by: Andrey Grodzovsky Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 7 +- drivers/gpu/drm/amd/amdgpu/amdgpu_cs.h | 13 ---- drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h | 23 ++++-- drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c | 36 +++++----- drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.h | 4 +- drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c | 116 ++++++++++++++++--------------- drivers/gpu/drm/amd/amdgpu/amdgpu_vce.h | 7 +- drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c | 13 ++-- drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c | 25 ++++--- 9 files changed, 130 insertions(+), 114 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c index bbeabca21d95..59c5dda3378e 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c @@ -783,12 +783,15 @@ static int amdgpu_cs_vm_handling(struct amdgpu_cs_parser *p) memcpy(ib->ptr, kptr, chunk_ib->ib_bytes); amdgpu_bo_kunmap(aobj); - r = amdgpu_ring_parse_cs(ring, p, j); + r = amdgpu_ring_parse_cs(ring, p, p->job, + &p->job->ibs[i]); if (r) return r; } else { ib->ptr = (uint32_t *)kptr; - r = amdgpu_ring_patch_cs_in_place(ring, p, j); + r = amdgpu_ring_patch_cs_in_place(ring, p, + p->job, + &p->job->ibs[i]); amdgpu_bo_kunmap(aobj); if (r) return r; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.h index 92d07816743e..30ecc4917f81 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.h @@ -73,19 +73,6 @@ struct amdgpu_cs_parser { struct amdgpu_cs_post_dep *post_deps; }; -static inline u32 amdgpu_get_ib_value(struct amdgpu_cs_parser *p, - uint32_t ib_idx, int idx) -{ - return p->job->ibs[ib_idx].ptr[idx]; -} - -static inline void amdgpu_set_ib_value(struct amdgpu_cs_parser *p, - uint32_t ib_idx, int idx, - uint32_t value) -{ - p->job->ibs[ib_idx].ptr[idx] = value; -} - int amdgpu_cs_find_mapping(struct amdgpu_cs_parser *parser, uint64_t addr, struct amdgpu_bo **bo, struct amdgpu_bo_va_mapping **mapping); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h index 05e789fc7a9e..a8bed1b47899 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h @@ -163,8 +163,12 @@ struct amdgpu_ring_funcs { u64 (*get_wptr)(struct amdgpu_ring *ring); void (*set_wptr)(struct amdgpu_ring *ring); /* validating and patching of IBs */ - int (*parse_cs)(struct amdgpu_cs_parser *p, uint32_t ib_idx); - int (*patch_cs_in_place)(struct amdgpu_cs_parser *p, uint32_t ib_idx); + int (*parse_cs)(struct amdgpu_cs_parser *p, + struct amdgpu_job *job, + struct amdgpu_ib *ib); + int (*patch_cs_in_place)(struct amdgpu_cs_parser *p, + struct amdgpu_job *job, + struct amdgpu_ib *ib); /* constants to calculate how many DW are needed for an emit */ unsigned emit_frame_size; unsigned emit_ib_size; @@ -264,8 +268,8 @@ struct amdgpu_ring { atomic_t *sched_score; }; -#define amdgpu_ring_parse_cs(r, p, ib) ((r)->funcs->parse_cs((p), (ib))) -#define amdgpu_ring_patch_cs_in_place(r, p, ib) ((r)->funcs->patch_cs_in_place((p), (ib))) +#define amdgpu_ring_parse_cs(r, p, job, ib) ((r)->funcs->parse_cs((p), (job), (ib))) +#define amdgpu_ring_patch_cs_in_place(r, p, job, ib) ((r)->funcs->patch_cs_in_place((p), (job), (ib))) #define amdgpu_ring_test_ring(r) (r)->funcs->test_ring((r)) #define amdgpu_ring_test_ib(r, t) (r)->funcs->test_ib((r), (t)) #define amdgpu_ring_get_rptr(r) (r)->funcs->get_rptr((r)) @@ -364,6 +368,17 @@ int amdgpu_ring_test_helper(struct amdgpu_ring *ring); void amdgpu_debugfs_ring_init(struct amdgpu_device *adev, struct amdgpu_ring *ring); +static inline u32 amdgpu_ib_get_value(struct amdgpu_ib *ib, int idx) +{ + return ib->ptr[idx]; +} + +static inline void amdgpu_ib_set_value(struct amdgpu_ib *ib, int idx, + uint32_t value) +{ + ib->ptr[idx] = value; +} + int amdgpu_ib_get(struct amdgpu_device *adev, struct amdgpu_vm *vm, unsigned size, enum amdgpu_ib_pool_type pool, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c index 14833615ee91..39c74d9fa7cc 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c @@ -99,7 +99,7 @@ struct amdgpu_uvd_cs_ctx { unsigned reg, count; unsigned data0, data1; unsigned idx; - unsigned ib_idx; + struct amdgpu_ib *ib; /* does the IB has a msg command */ bool has_msg_cmd; @@ -558,8 +558,8 @@ static u64 amdgpu_uvd_get_addr_from_ctx(struct amdgpu_uvd_cs_ctx *ctx) uint32_t lo, hi; uint64_t addr; - lo = amdgpu_get_ib_value(ctx->parser, ctx->ib_idx, ctx->data0); - hi = amdgpu_get_ib_value(ctx->parser, ctx->ib_idx, ctx->data1); + lo = amdgpu_ib_get_value(ctx->ib, ctx->data0); + hi = amdgpu_ib_get_value(ctx->ib, ctx->data1); addr = ((uint64_t)lo) | (((uint64_t)hi) << 32); return addr; @@ -590,7 +590,7 @@ static int amdgpu_uvd_cs_pass1(struct amdgpu_uvd_cs_ctx *ctx) if (!ctx->parser->adev->uvd.address_64_bit) { /* check if it's a message or feedback command */ - cmd = amdgpu_get_ib_value(ctx->parser, ctx->ib_idx, ctx->idx) >> 1; + cmd = amdgpu_ib_get_value(ctx->ib, ctx->idx) >> 1; if (cmd == 0x0 || cmd == 0x3) { /* yes, force it into VRAM */ uint32_t domain = AMDGPU_GEM_DOMAIN_VRAM; @@ -928,12 +928,10 @@ static int amdgpu_uvd_cs_pass2(struct amdgpu_uvd_cs_ctx *ctx) addr -= mapping->start * AMDGPU_GPU_PAGE_SIZE; start += addr; - amdgpu_set_ib_value(ctx->parser, ctx->ib_idx, ctx->data0, - lower_32_bits(start)); - amdgpu_set_ib_value(ctx->parser, ctx->ib_idx, ctx->data1, - upper_32_bits(start)); + amdgpu_ib_set_value(ctx->ib, ctx->data0, lower_32_bits(start)); + amdgpu_ib_set_value(ctx->ib, ctx->data1, upper_32_bits(start)); - cmd = amdgpu_get_ib_value(ctx->parser, ctx->ib_idx, ctx->idx) >> 1; + cmd = amdgpu_ib_get_value(ctx->ib, ctx->idx) >> 1; if (cmd < 0x4) { if ((end - start) < ctx->buf_sizes[cmd]) { DRM_ERROR("buffer (%d) to small (%d / %d)!\n", cmd, @@ -993,14 +991,13 @@ static int amdgpu_uvd_cs_pass2(struct amdgpu_uvd_cs_ctx *ctx) static int amdgpu_uvd_cs_reg(struct amdgpu_uvd_cs_ctx *ctx, int (*cb)(struct amdgpu_uvd_cs_ctx *ctx)) { - struct amdgpu_ib *ib = &ctx->parser->job->ibs[ctx->ib_idx]; int i, r; ctx->idx++; for (i = 0; i <= ctx->count; ++i) { unsigned reg = ctx->reg + i; - if (ctx->idx >= ib->length_dw) { + if (ctx->idx >= ctx->ib->length_dw) { DRM_ERROR("Register command after end of CS!\n"); return -EINVAL; } @@ -1040,11 +1037,10 @@ static int amdgpu_uvd_cs_reg(struct amdgpu_uvd_cs_ctx *ctx, static int amdgpu_uvd_cs_packets(struct amdgpu_uvd_cs_ctx *ctx, int (*cb)(struct amdgpu_uvd_cs_ctx *ctx)) { - struct amdgpu_ib *ib = &ctx->parser->job->ibs[ctx->ib_idx]; int r; - for (ctx->idx = 0 ; ctx->idx < ib->length_dw; ) { - uint32_t cmd = amdgpu_get_ib_value(ctx->parser, ctx->ib_idx, ctx->idx); + for (ctx->idx = 0 ; ctx->idx < ctx->ib->length_dw; ) { + uint32_t cmd = amdgpu_ib_get_value(ctx->ib, ctx->idx); unsigned type = CP_PACKET_GET_TYPE(cmd); switch (type) { case PACKET_TYPE0: @@ -1069,11 +1065,14 @@ static int amdgpu_uvd_cs_packets(struct amdgpu_uvd_cs_ctx *ctx, * amdgpu_uvd_ring_parse_cs - UVD command submission parser * * @parser: Command submission parser context - * @ib_idx: Which indirect buffer to use + * @job: the job to parse + * @ib: the IB to patch * * Parse the command stream, patch in addresses as necessary. */ -int amdgpu_uvd_ring_parse_cs(struct amdgpu_cs_parser *parser, uint32_t ib_idx) +int amdgpu_uvd_ring_parse_cs(struct amdgpu_cs_parser *parser, + struct amdgpu_job *job, + struct amdgpu_ib *ib) { struct amdgpu_uvd_cs_ctx ctx = {}; unsigned buf_sizes[] = { @@ -1083,10 +1082,9 @@ int amdgpu_uvd_ring_parse_cs(struct amdgpu_cs_parser *parser, uint32_t ib_idx) [0x00000003] = 2048, [0x00000004] = 0xFFFFFFFF, }; - struct amdgpu_ib *ib = &parser->job->ibs[ib_idx]; int r; - parser->job->vm = NULL; + job->vm = NULL; ib->gpu_addr = amdgpu_sa_bo_gpu_addr(ib->sa_bo); if (ib->length_dw % 16) { @@ -1097,7 +1095,7 @@ int amdgpu_uvd_ring_parse_cs(struct amdgpu_cs_parser *parser, uint32_t ib_idx) ctx.parser = parser; ctx.buf_sizes = buf_sizes; - ctx.ib_idx = ib_idx; + ctx.ib = ib; /* first round only required on chips without UVD 64 bit address support */ if (!parser->adev->uvd.address_64_bit) { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.h index 76ac9699885d..9f89bb7cd60b 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.h @@ -82,7 +82,9 @@ int amdgpu_uvd_get_destroy_msg(struct amdgpu_ring *ring, uint32_t handle, bool direct, struct dma_fence **fence); void amdgpu_uvd_free_handles(struct amdgpu_device *adev, struct drm_file *filp); -int amdgpu_uvd_ring_parse_cs(struct amdgpu_cs_parser *parser, uint32_t ib_idx); +int amdgpu_uvd_ring_parse_cs(struct amdgpu_cs_parser *parser, + struct amdgpu_job *job, + struct amdgpu_ib *ib); void amdgpu_uvd_ring_begin_use(struct amdgpu_ring *ring); void amdgpu_uvd_ring_end_use(struct amdgpu_ring *ring); int amdgpu_uvd_ring_test_ib(struct amdgpu_ring *ring, long timeout); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c index 6179230b6c6e..02cb3a12dd76 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c @@ -588,8 +588,7 @@ err: /** * amdgpu_vce_validate_bo - make sure not to cross 4GB boundary * - * @p: parser context - * @ib_idx: indirect buffer to use + * @ib: indirect buffer to use * @lo: address of lower dword * @hi: address of higher dword * @size: minimum size @@ -597,8 +596,9 @@ err: * * Make sure that no BO cross a 4GB boundary. */ -static int amdgpu_vce_validate_bo(struct amdgpu_cs_parser *p, uint32_t ib_idx, - int lo, int hi, unsigned size, int32_t index) +static int amdgpu_vce_validate_bo(struct amdgpu_cs_parser *p, + struct amdgpu_ib *ib, int lo, int hi, + unsigned size, int32_t index) { int64_t offset = ((uint64_t)size) * ((int64_t)index); struct ttm_operation_ctx ctx = { false, false }; @@ -608,8 +608,8 @@ static int amdgpu_vce_validate_bo(struct amdgpu_cs_parser *p, uint32_t ib_idx, uint64_t addr; int r; - addr = ((uint64_t)amdgpu_get_ib_value(p, ib_idx, lo)) | - ((uint64_t)amdgpu_get_ib_value(p, ib_idx, hi)) << 32; + addr = ((uint64_t)amdgpu_ib_get_value(ib, lo)) | + ((uint64_t)amdgpu_ib_get_value(ib, hi)) << 32; if (index >= 0) { addr += offset; fpfn = PAGE_ALIGN(offset) >> PAGE_SHIFT; @@ -639,7 +639,7 @@ static int amdgpu_vce_validate_bo(struct amdgpu_cs_parser *p, uint32_t ib_idx, * amdgpu_vce_cs_reloc - command submission relocation * * @p: parser context - * @ib_idx: indirect buffer to use + * @ib: indirect buffer to use * @lo: address of lower dword * @hi: address of higher dword * @size: minimum size @@ -647,7 +647,7 @@ static int amdgpu_vce_validate_bo(struct amdgpu_cs_parser *p, uint32_t ib_idx, * * Patch relocation inside command stream with real buffer address */ -static int amdgpu_vce_cs_reloc(struct amdgpu_cs_parser *p, uint32_t ib_idx, +static int amdgpu_vce_cs_reloc(struct amdgpu_cs_parser *p, struct amdgpu_ib *ib, int lo, int hi, unsigned size, uint32_t index) { struct amdgpu_bo_va_mapping *mapping; @@ -658,8 +658,8 @@ static int amdgpu_vce_cs_reloc(struct amdgpu_cs_parser *p, uint32_t ib_idx, if (index == 0xffffffff) index = 0; - addr = ((uint64_t)amdgpu_get_ib_value(p, ib_idx, lo)) | - ((uint64_t)amdgpu_get_ib_value(p, ib_idx, hi)) << 32; + addr = ((uint64_t)amdgpu_ib_get_value(ib, lo)) | + ((uint64_t)amdgpu_ib_get_value(ib, hi)) << 32; addr += ((uint64_t)size) * ((uint64_t)index); r = amdgpu_cs_find_mapping(p, addr, &bo, &mapping); @@ -680,8 +680,8 @@ static int amdgpu_vce_cs_reloc(struct amdgpu_cs_parser *p, uint32_t ib_idx, addr += amdgpu_bo_gpu_offset(bo); addr -= ((uint64_t)size) * ((uint64_t)index); - amdgpu_set_ib_value(p, ib_idx, lo, lower_32_bits(addr)); - amdgpu_set_ib_value(p, ib_idx, hi, upper_32_bits(addr)); + amdgpu_ib_set_value(ib, lo, lower_32_bits(addr)); + amdgpu_ib_set_value(ib, hi, upper_32_bits(addr)); return 0; } @@ -730,11 +730,13 @@ static int amdgpu_vce_validate_handle(struct amdgpu_cs_parser *p, * amdgpu_vce_ring_parse_cs - parse and validate the command stream * * @p: parser context - * @ib_idx: indirect buffer to use + * @job: the job to parse + * @ib: the IB to patch */ -int amdgpu_vce_ring_parse_cs(struct amdgpu_cs_parser *p, uint32_t ib_idx) +int amdgpu_vce_ring_parse_cs(struct amdgpu_cs_parser *p, + struct amdgpu_job *job, + struct amdgpu_ib *ib) { - struct amdgpu_ib *ib = &p->job->ibs[ib_idx]; unsigned fb_idx = 0, bs_idx = 0; int session_idx = -1; uint32_t destroyed = 0; @@ -745,12 +747,12 @@ int amdgpu_vce_ring_parse_cs(struct amdgpu_cs_parser *p, uint32_t ib_idx) unsigned idx; int i, r = 0; - p->job->vm = NULL; + job->vm = NULL; ib->gpu_addr = amdgpu_sa_bo_gpu_addr(ib->sa_bo); for (idx = 0; idx < ib->length_dw;) { - uint32_t len = amdgpu_get_ib_value(p, ib_idx, idx); - uint32_t cmd = amdgpu_get_ib_value(p, ib_idx, idx + 1); + uint32_t len = amdgpu_ib_get_value(ib, idx); + uint32_t cmd = amdgpu_ib_get_value(ib, idx + 1); if ((len < 8) || (len & 3)) { DRM_ERROR("invalid VCE command length (%d)!\n", len); @@ -760,52 +762,52 @@ int amdgpu_vce_ring_parse_cs(struct amdgpu_cs_parser *p, uint32_t ib_idx) switch (cmd) { case 0x00000002: /* task info */ - fb_idx = amdgpu_get_ib_value(p, ib_idx, idx + 6); - bs_idx = amdgpu_get_ib_value(p, ib_idx, idx + 7); + fb_idx = amdgpu_ib_get_value(ib, idx + 6); + bs_idx = amdgpu_ib_get_value(ib, idx + 7); break; case 0x03000001: /* encode */ - r = amdgpu_vce_validate_bo(p, ib_idx, idx + 10, - idx + 9, 0, 0); + r = amdgpu_vce_validate_bo(p, ib, idx + 10, idx + 9, + 0, 0); if (r) goto out; - r = amdgpu_vce_validate_bo(p, ib_idx, idx + 12, - idx + 11, 0, 0); + r = amdgpu_vce_validate_bo(p, ib, idx + 12, idx + 11, + 0, 0); if (r) goto out; break; case 0x05000001: /* context buffer */ - r = amdgpu_vce_validate_bo(p, ib_idx, idx + 3, - idx + 2, 0, 0); + r = amdgpu_vce_validate_bo(p, ib, idx + 3, idx + 2, + 0, 0); if (r) goto out; break; case 0x05000004: /* video bitstream buffer */ - tmp = amdgpu_get_ib_value(p, ib_idx, idx + 4); - r = amdgpu_vce_validate_bo(p, ib_idx, idx + 3, idx + 2, + tmp = amdgpu_ib_get_value(ib, idx + 4); + r = amdgpu_vce_validate_bo(p, ib, idx + 3, idx + 2, tmp, bs_idx); if (r) goto out; break; case 0x05000005: /* feedback buffer */ - r = amdgpu_vce_validate_bo(p, ib_idx, idx + 3, idx + 2, + r = amdgpu_vce_validate_bo(p, ib, idx + 3, idx + 2, 4096, fb_idx); if (r) goto out; break; case 0x0500000d: /* MV buffer */ - r = amdgpu_vce_validate_bo(p, ib_idx, idx + 3, - idx + 2, 0, 0); + r = amdgpu_vce_validate_bo(p, ib, idx + 3, idx + 2, + 0, 0); if (r) goto out; - r = amdgpu_vce_validate_bo(p, ib_idx, idx + 8, - idx + 7, 0, 0); + r = amdgpu_vce_validate_bo(p, ib, idx + 8, idx + 7, + 0, 0); if (r) goto out; break; @@ -815,12 +817,12 @@ int amdgpu_vce_ring_parse_cs(struct amdgpu_cs_parser *p, uint32_t ib_idx) } for (idx = 0; idx < ib->length_dw;) { - uint32_t len = amdgpu_get_ib_value(p, ib_idx, idx); - uint32_t cmd = amdgpu_get_ib_value(p, ib_idx, idx + 1); + uint32_t len = amdgpu_ib_get_value(ib, idx); + uint32_t cmd = amdgpu_ib_get_value(ib, idx + 1); switch (cmd) { case 0x00000001: /* session */ - handle = amdgpu_get_ib_value(p, ib_idx, idx + 2); + handle = amdgpu_ib_get_value(ib, idx + 2); session_idx = amdgpu_vce_validate_handle(p, handle, &allocated); if (session_idx < 0) { @@ -831,8 +833,8 @@ int amdgpu_vce_ring_parse_cs(struct amdgpu_cs_parser *p, uint32_t ib_idx) break; case 0x00000002: /* task info */ - fb_idx = amdgpu_get_ib_value(p, ib_idx, idx + 6); - bs_idx = amdgpu_get_ib_value(p, ib_idx, idx + 7); + fb_idx = amdgpu_ib_get_value(ib, idx + 6); + bs_idx = amdgpu_ib_get_value(ib, idx + 7); break; case 0x01000001: /* create */ @@ -847,8 +849,8 @@ int amdgpu_vce_ring_parse_cs(struct amdgpu_cs_parser *p, uint32_t ib_idx) goto out; } - *size = amdgpu_get_ib_value(p, ib_idx, idx + 8) * - amdgpu_get_ib_value(p, ib_idx, idx + 10) * + *size = amdgpu_ib_get_value(ib, idx + 8) * + amdgpu_ib_get_value(ib, idx + 10) * 8 * 3 / 2; break; @@ -877,12 +879,12 @@ int amdgpu_vce_ring_parse_cs(struct amdgpu_cs_parser *p, uint32_t ib_idx) break; case 0x03000001: /* encode */ - r = amdgpu_vce_cs_reloc(p, ib_idx, idx + 10, idx + 9, + r = amdgpu_vce_cs_reloc(p, ib, idx + 10, idx + 9, *size, 0); if (r) goto out; - r = amdgpu_vce_cs_reloc(p, ib_idx, idx + 12, idx + 11, + r = amdgpu_vce_cs_reloc(p, ib, idx + 12, idx + 11, *size / 3, 0); if (r) goto out; @@ -893,35 +895,35 @@ int amdgpu_vce_ring_parse_cs(struct amdgpu_cs_parser *p, uint32_t ib_idx) break; case 0x05000001: /* context buffer */ - r = amdgpu_vce_cs_reloc(p, ib_idx, idx + 3, idx + 2, + r = amdgpu_vce_cs_reloc(p, ib, idx + 3, idx + 2, *size * 2, 0); if (r) goto out; break; case 0x05000004: /* video bitstream buffer */ - tmp = amdgpu_get_ib_value(p, ib_idx, idx + 4); - r = amdgpu_vce_cs_reloc(p, ib_idx, idx + 3, idx + 2, + tmp = amdgpu_ib_get_value(ib, idx + 4); + r = amdgpu_vce_cs_reloc(p, ib, idx + 3, idx + 2, tmp, bs_idx); if (r) goto out; break; case 0x05000005: /* feedback buffer */ - r = amdgpu_vce_cs_reloc(p, ib_idx, idx + 3, idx + 2, + r = amdgpu_vce_cs_reloc(p, ib, idx + 3, idx + 2, 4096, fb_idx); if (r) goto out; break; case 0x0500000d: /* MV buffer */ - r = amdgpu_vce_cs_reloc(p, ib_idx, idx + 3, - idx + 2, *size, 0); + r = amdgpu_vce_cs_reloc(p, ib, idx + 3, + idx + 2, *size, 0); if (r) goto out; - r = amdgpu_vce_cs_reloc(p, ib_idx, idx + 8, - idx + 7, *size / 12, 0); + r = amdgpu_vce_cs_reloc(p, ib, idx + 8, + idx + 7, *size / 12, 0); if (r) goto out; break; @@ -966,11 +968,13 @@ out: * amdgpu_vce_ring_parse_cs_vm - parse the command stream in VM mode * * @p: parser context - * @ib_idx: indirect buffer to use + * @job: the job to parse + * @ib: the IB to patch */ -int amdgpu_vce_ring_parse_cs_vm(struct amdgpu_cs_parser *p, uint32_t ib_idx) +int amdgpu_vce_ring_parse_cs_vm(struct amdgpu_cs_parser *p, + struct amdgpu_job *job, + struct amdgpu_ib *ib) { - struct amdgpu_ib *ib = &p->job->ibs[ib_idx]; int session_idx = -1; uint32_t destroyed = 0; uint32_t created = 0; @@ -979,8 +983,8 @@ int amdgpu_vce_ring_parse_cs_vm(struct amdgpu_cs_parser *p, uint32_t ib_idx) int i, r = 0, idx = 0; while (idx < ib->length_dw) { - uint32_t len = amdgpu_get_ib_value(p, ib_idx, idx); - uint32_t cmd = amdgpu_get_ib_value(p, ib_idx, idx + 1); + uint32_t len = amdgpu_ib_get_value(ib, idx); + uint32_t cmd = amdgpu_ib_get_value(ib, idx + 1); if ((len < 8) || (len & 3)) { DRM_ERROR("invalid VCE command length (%d)!\n", len); @@ -990,7 +994,7 @@ int amdgpu_vce_ring_parse_cs_vm(struct amdgpu_cs_parser *p, uint32_t ib_idx) switch (cmd) { case 0x00000001: /* session */ - handle = amdgpu_get_ib_value(p, ib_idx, idx + 2); + handle = amdgpu_ib_get_value(ib, idx + 2); session_idx = amdgpu_vce_validate_handle(p, handle, &allocated); if (session_idx < 0) { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.h index be4a6e773c5b..ea680fc9a6c3 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.h @@ -59,8 +59,11 @@ int amdgpu_vce_entity_init(struct amdgpu_device *adev); int amdgpu_vce_suspend(struct amdgpu_device *adev); int amdgpu_vce_resume(struct amdgpu_device *adev); void amdgpu_vce_free_handles(struct amdgpu_device *adev, struct drm_file *filp); -int amdgpu_vce_ring_parse_cs(struct amdgpu_cs_parser *p, uint32_t ib_idx); -int amdgpu_vce_ring_parse_cs_vm(struct amdgpu_cs_parser *p, uint32_t ib_idx); +int amdgpu_vce_ring_parse_cs(struct amdgpu_cs_parser *p, struct amdgpu_job *job, + struct amdgpu_ib *ib); +int amdgpu_vce_ring_parse_cs_vm(struct amdgpu_cs_parser *p, + struct amdgpu_job *job, + struct amdgpu_ib *ib); void amdgpu_vce_ring_emit_ib(struct amdgpu_ring *ring, struct amdgpu_job *job, struct amdgpu_ib *ib, uint32_t flags); void amdgpu_vce_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u64 seq, diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c b/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c index 7afa660e341c..2f15b8e0f7d7 100644 --- a/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c +++ b/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c @@ -1276,14 +1276,15 @@ static int uvd_v7_0_ring_test_ring(struct amdgpu_ring *ring) * uvd_v7_0_ring_patch_cs_in_place - Patch the IB for command submission. * * @p: the CS parser with the IBs - * @ib_idx: which IB to patch + * @job: which job this ib is in + * @ib: which IB to patch * */ static int uvd_v7_0_ring_patch_cs_in_place(struct amdgpu_cs_parser *p, - uint32_t ib_idx) + struct amdgpu_job *job, + struct amdgpu_ib *ib) { - struct amdgpu_ring *ring = to_amdgpu_ring(p->entity->rq->sched); - struct amdgpu_ib *ib = &p->job->ibs[ib_idx]; + struct amdgpu_ring *ring = to_amdgpu_ring(job->base.sched); unsigned i; /* No patching necessary for the first instance */ @@ -1291,12 +1292,12 @@ static int uvd_v7_0_ring_patch_cs_in_place(struct amdgpu_cs_parser *p, return 0; for (i = 0; i < ib->length_dw; i += 2) { - uint32_t reg = amdgpu_get_ib_value(p, ib_idx, i); + uint32_t reg = amdgpu_ib_get_value(ib, i); reg -= p->adev->reg_offset[UVD_HWIP][0][1]; reg += p->adev->reg_offset[UVD_HWIP][1][1]; - amdgpu_set_ib_value(p, ib_idx, i, reg); + amdgpu_ib_set_value(ib, i, reg); } return 0; } diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c b/drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c index d09474a22201..5dbf5ba7d62d 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c @@ -1812,21 +1812,23 @@ static const struct amdgpu_ring_funcs vcn_v3_0_dec_sw_ring_vm_funcs = { .emit_reg_write_reg_wait = amdgpu_ring_emit_reg_write_reg_wait_helper, }; -static int vcn_v3_0_limit_sched(struct amdgpu_cs_parser *p) +static int vcn_v3_0_limit_sched(struct amdgpu_cs_parser *p, + struct amdgpu_job *job) { struct drm_gpu_scheduler **scheds; /* The create msg must be in the first IB submitted */ - if (atomic_read(&p->entity->fence_seq)) + if (atomic_read(&job->base.entity->fence_seq)) return -EINVAL; scheds = p->adev->gpu_sched[AMDGPU_HW_IP_VCN_DEC] [AMDGPU_RING_PRIO_DEFAULT].sched; - drm_sched_entity_modify_sched(p->entity, scheds, 1); + drm_sched_entity_modify_sched(job->base.entity, scheds, 1); return 0; } -static int vcn_v3_0_dec_msg(struct amdgpu_cs_parser *p, uint64_t addr) +static int vcn_v3_0_dec_msg(struct amdgpu_cs_parser *p, struct amdgpu_job *job, + uint64_t addr) { struct ttm_operation_ctx ctx = { false, false }; struct amdgpu_bo_va_mapping *map; @@ -1897,7 +1899,7 @@ static int vcn_v3_0_dec_msg(struct amdgpu_cs_parser *p, uint64_t addr) if (create[0] == 0x7 || create[0] == 0x10 || create[0] == 0x11) continue; - r = vcn_v3_0_limit_sched(p); + r = vcn_v3_0_limit_sched(p, job); if (r) goto out; } @@ -1908,10 +1910,10 @@ out: } static int vcn_v3_0_ring_patch_cs_in_place(struct amdgpu_cs_parser *p, - uint32_t ib_idx) + struct amdgpu_job *job, + struct amdgpu_ib *ib) { - struct amdgpu_ring *ring = to_amdgpu_ring(p->entity->rq->sched); - struct amdgpu_ib *ib = &p->job->ibs[ib_idx]; + struct amdgpu_ring *ring = to_amdgpu_ring(job->base.sched); uint32_t msg_lo = 0, msg_hi = 0; unsigned i; int r; @@ -1921,8 +1923,8 @@ static int vcn_v3_0_ring_patch_cs_in_place(struct amdgpu_cs_parser *p, return 0; for (i = 0; i < ib->length_dw; i += 2) { - uint32_t reg = amdgpu_get_ib_value(p, ib_idx, i); - uint32_t val = amdgpu_get_ib_value(p, ib_idx, i + 1); + uint32_t reg = amdgpu_ib_get_value(ib, i); + uint32_t val = amdgpu_ib_get_value(ib, i + 1); if (reg == PACKET0(p->adev->vcn.internal.data0, 0)) { msg_lo = val; @@ -1930,7 +1932,8 @@ static int vcn_v3_0_ring_patch_cs_in_place(struct amdgpu_cs_parser *p, msg_hi = val; } else if (reg == PACKET0(p->adev->vcn.internal.cmd, 0) && val == 0) { - r = vcn_v3_0_dec_msg(p, ((u64)msg_hi) << 32 | msg_lo); + r = vcn_v3_0_dec_msg(p, job, + ((u64)msg_hi) << 32 | msg_lo); if (r) return r; } -- cgit From 7bc80a5462c37eab58a9ea386064307c0f447fd1 Mon Sep 17 00:00:00 2001 From: Christian König Date: Tue, 9 Nov 2021 11:08:18 +0100 Subject: dma-buf: add enum dma_resv_usage v4 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This change adds the dma_resv_usage enum and allows us to specify why a dma_resv object is queried for its containing fences. Additional to that a dma_resv_usage_rw() helper function is added to aid retrieving the fences for a read or write userspace submission. This is then deployed to the different query functions of the dma_resv object and all of their users. When the write paratermer was previously true we now use DMA_RESV_USAGE_WRITE and DMA_RESV_USAGE_READ otherwise. v2: add KERNEL/OTHER in separate patch v3: some kerneldoc suggestions by Daniel v4: some more kerneldoc suggestions by Daniel, fix missing cases lost in the rebase pointed out by Bas. Signed-off-by: Christian König Reviewed-by: Daniel Vetter Link: https://patchwork.freedesktop.org/patch/msgid/20220407085946.744568-2-christian.koenig@amd.com --- drivers/dma-buf/dma-buf.c | 6 +- drivers/dma-buf/dma-resv.c | 35 +++++------ drivers/dma-buf/st-dma-resv.c | 48 +++++++------- drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 4 +- drivers/gpu/drm/amd/amdgpu/amdgpu_display.c | 3 +- drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c | 3 +- drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c | 5 +- drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c | 4 +- drivers/gpu/drm/amd/amdgpu/amdgpu_object.c | 4 +- drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c | 3 +- drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 3 +- drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c | 3 +- drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 7 ++- drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 3 +- drivers/gpu/drm/drm_gem.c | 3 +- drivers/gpu/drm/drm_gem_atomic_helper.c | 2 +- drivers/gpu/drm/etnaviv/etnaviv_gem.c | 6 +- drivers/gpu/drm/i915/display/intel_atomic_plane.c | 3 +- drivers/gpu/drm/i915/gem/i915_gem_busy.c | 4 +- drivers/gpu/drm/i915/gem/i915_gem_lmem.c | 2 +- drivers/gpu/drm/i915/gem/i915_gem_userptr.c | 2 +- drivers/gpu/drm/i915/gem/i915_gem_wait.c | 6 +- .../gpu/drm/i915/gem/selftests/i915_gem_dmabuf.c | 3 +- drivers/gpu/drm/i915/i915_request.c | 3 +- drivers/gpu/drm/i915/i915_sw_fence.c | 2 +- drivers/gpu/drm/msm/msm_gem.c | 3 +- drivers/gpu/drm/nouveau/dispnv50/wndw.c | 3 +- drivers/gpu/drm/nouveau/nouveau_bo.c | 8 +-- drivers/gpu/drm/nouveau/nouveau_fence.c | 8 ++- drivers/gpu/drm/nouveau/nouveau_gem.c | 3 +- drivers/gpu/drm/panfrost/panfrost_drv.c | 3 +- drivers/gpu/drm/qxl/qxl_debugfs.c | 3 +- drivers/gpu/drm/radeon/radeon_display.c | 3 +- drivers/gpu/drm/radeon/radeon_gem.c | 9 ++- drivers/gpu/drm/radeon/radeon_mn.c | 4 +- drivers/gpu/drm/radeon/radeon_sync.c | 2 +- drivers/gpu/drm/radeon/radeon_uvd.c | 4 +- drivers/gpu/drm/scheduler/sched_main.c | 3 +- drivers/gpu/drm/ttm/ttm_bo.c | 18 +++--- drivers/gpu/drm/vgem/vgem_fence.c | 4 +- drivers/gpu/drm/virtio/virtgpu_ioctl.c | 5 +- drivers/gpu/drm/vmwgfx/vmwgfx_bo.c | 4 +- drivers/gpu/drm/vmwgfx/vmwgfx_resource.c | 3 +- drivers/infiniband/core/umem_dmabuf.c | 3 +- include/linux/dma-buf.h | 8 ++- include/linux/dma-resv.h | 73 ++++++++++++++++++---- 46 files changed, 215 insertions(+), 126 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c') diff --git a/drivers/dma-buf/dma-buf.c b/drivers/dma-buf/dma-buf.c index 775d3afb4169..1cddb65eafda 100644 --- a/drivers/dma-buf/dma-buf.c +++ b/drivers/dma-buf/dma-buf.c @@ -216,7 +216,8 @@ static bool dma_buf_poll_add_cb(struct dma_resv *resv, bool write, struct dma_fence *fence; int r; - dma_resv_for_each_fence(&cursor, resv, write, fence) { + dma_resv_for_each_fence(&cursor, resv, dma_resv_usage_rw(write), + fence) { dma_fence_get(fence); r = dma_fence_add_callback(fence, &dcb->cb, dma_buf_poll_cb); if (!r) @@ -1124,7 +1125,8 @@ static int __dma_buf_begin_cpu_access(struct dma_buf *dmabuf, long ret; /* Wait on any implicit rendering fences */ - ret = dma_resv_wait_timeout(resv, write, true, MAX_SCHEDULE_TIMEOUT); + ret = dma_resv_wait_timeout(resv, dma_resv_usage_rw(write), + true, MAX_SCHEDULE_TIMEOUT); if (ret < 0) return ret; diff --git a/drivers/dma-buf/dma-resv.c b/drivers/dma-buf/dma-resv.c index 8c650b96357a..17237e6ee30c 100644 --- a/drivers/dma-buf/dma-resv.c +++ b/drivers/dma-buf/dma-resv.c @@ -384,7 +384,7 @@ static void dma_resv_iter_restart_unlocked(struct dma_resv_iter *cursor) cursor->seq = read_seqcount_begin(&cursor->obj->seq); cursor->index = -1; cursor->shared_count = 0; - if (cursor->all_fences) { + if (cursor->usage >= DMA_RESV_USAGE_READ) { cursor->fences = dma_resv_shared_list(cursor->obj); if (cursor->fences) cursor->shared_count = cursor->fences->shared_count; @@ -496,7 +496,7 @@ struct dma_fence *dma_resv_iter_first(struct dma_resv_iter *cursor) dma_resv_assert_held(cursor->obj); cursor->index = 0; - if (cursor->all_fences) + if (cursor->usage >= DMA_RESV_USAGE_READ) cursor->fences = dma_resv_shared_list(cursor->obj); else cursor->fences = NULL; @@ -551,7 +551,7 @@ int dma_resv_copy_fences(struct dma_resv *dst, struct dma_resv *src) list = NULL; excl = NULL; - dma_resv_iter_begin(&cursor, src, true); + dma_resv_iter_begin(&cursor, src, DMA_RESV_USAGE_READ); dma_resv_for_each_fence_unlocked(&cursor, f) { if (dma_resv_iter_is_restarted(&cursor)) { @@ -597,7 +597,7 @@ EXPORT_SYMBOL(dma_resv_copy_fences); * dma_resv_get_fences - Get an object's shared and exclusive * fences without update side lock held * @obj: the reservation object - * @write: true if we should return all fences + * @usage: controls which fences to include, see enum dma_resv_usage. * @num_fences: the number of fences returned * @fences: the array of fence ptrs returned (array is krealloc'd to the * required size, and must be freed by caller) @@ -605,7 +605,7 @@ EXPORT_SYMBOL(dma_resv_copy_fences); * Retrieve all fences from the reservation object. * Returns either zero or -ENOMEM. */ -int dma_resv_get_fences(struct dma_resv *obj, bool write, +int dma_resv_get_fences(struct dma_resv *obj, enum dma_resv_usage usage, unsigned int *num_fences, struct dma_fence ***fences) { struct dma_resv_iter cursor; @@ -614,7 +614,7 @@ int dma_resv_get_fences(struct dma_resv *obj, bool write, *num_fences = 0; *fences = NULL; - dma_resv_iter_begin(&cursor, obj, write); + dma_resv_iter_begin(&cursor, obj, usage); dma_resv_for_each_fence_unlocked(&cursor, fence) { if (dma_resv_iter_is_restarted(&cursor)) { @@ -646,7 +646,7 @@ EXPORT_SYMBOL_GPL(dma_resv_get_fences); /** * dma_resv_get_singleton - Get a single fence for all the fences * @obj: the reservation object - * @write: true if we should return all fences + * @usage: controls which fences to include, see enum dma_resv_usage. * @fence: the resulting fence * * Get a single fence representing all the fences inside the resv object. @@ -658,7 +658,7 @@ EXPORT_SYMBOL_GPL(dma_resv_get_fences); * * Returns 0 on success and negative error values on failure. */ -int dma_resv_get_singleton(struct dma_resv *obj, bool write, +int dma_resv_get_singleton(struct dma_resv *obj, enum dma_resv_usage usage, struct dma_fence **fence) { struct dma_fence_array *array; @@ -666,7 +666,7 @@ int dma_resv_get_singleton(struct dma_resv *obj, bool write, unsigned count; int r; - r = dma_resv_get_fences(obj, write, &count, &fences); + r = dma_resv_get_fences(obj, usage, &count, &fences); if (r) return r; @@ -700,7 +700,7 @@ EXPORT_SYMBOL_GPL(dma_resv_get_singleton); * dma_resv_wait_timeout - Wait on reservation's objects * shared and/or exclusive fences. * @obj: the reservation object - * @wait_all: if true, wait on all fences, else wait on just exclusive fence + * @usage: controls which fences to include, see enum dma_resv_usage. * @intr: if true, do interruptible wait * @timeout: timeout value in jiffies or zero to return immediately * @@ -710,14 +710,14 @@ EXPORT_SYMBOL_GPL(dma_resv_get_singleton); * Returns -ERESTARTSYS if interrupted, 0 if the wait timed out, or * greater than zer on success. */ -long dma_resv_wait_timeout(struct dma_resv *obj, bool wait_all, bool intr, - unsigned long timeout) +long dma_resv_wait_timeout(struct dma_resv *obj, enum dma_resv_usage usage, + bool intr, unsigned long timeout) { long ret = timeout ? timeout : 1; struct dma_resv_iter cursor; struct dma_fence *fence; - dma_resv_iter_begin(&cursor, obj, wait_all); + dma_resv_iter_begin(&cursor, obj, usage); dma_resv_for_each_fence_unlocked(&cursor, fence) { ret = dma_fence_wait_timeout(fence, intr, ret); @@ -737,8 +737,7 @@ EXPORT_SYMBOL_GPL(dma_resv_wait_timeout); * dma_resv_test_signaled - Test if a reservation object's fences have been * signaled. * @obj: the reservation object - * @test_all: if true, test all fences, otherwise only test the exclusive - * fence + * @usage: controls which fences to include, see enum dma_resv_usage. * * Callers are not required to hold specific locks, but maybe hold * dma_resv_lock() already. @@ -747,12 +746,12 @@ EXPORT_SYMBOL_GPL(dma_resv_wait_timeout); * * True if all fences signaled, else false. */ -bool dma_resv_test_signaled(struct dma_resv *obj, bool test_all) +bool dma_resv_test_signaled(struct dma_resv *obj, enum dma_resv_usage usage) { struct dma_resv_iter cursor; struct dma_fence *fence; - dma_resv_iter_begin(&cursor, obj, test_all); + dma_resv_iter_begin(&cursor, obj, usage); dma_resv_for_each_fence_unlocked(&cursor, fence) { dma_resv_iter_end(&cursor); return false; @@ -775,7 +774,7 @@ void dma_resv_describe(struct dma_resv *obj, struct seq_file *seq) struct dma_resv_iter cursor; struct dma_fence *fence; - dma_resv_for_each_fence(&cursor, obj, true, fence) { + dma_resv_for_each_fence(&cursor, obj, DMA_RESV_USAGE_READ, fence) { seq_printf(seq, "\t%s fence:", dma_resv_iter_is_exclusive(&cursor) ? "Exclusive" : "Shared"); diff --git a/drivers/dma-buf/st-dma-resv.c b/drivers/dma-buf/st-dma-resv.c index d2e61f6ae989..d097981061b1 100644 --- a/drivers/dma-buf/st-dma-resv.c +++ b/drivers/dma-buf/st-dma-resv.c @@ -58,7 +58,7 @@ static int sanitycheck(void *arg) return r; } -static int test_signaling(void *arg, bool shared) +static int test_signaling(void *arg, enum dma_resv_usage usage) { struct dma_resv resv; struct dma_fence *f; @@ -81,18 +81,18 @@ static int test_signaling(void *arg, bool shared) goto err_unlock; } - if (shared) + if (usage >= DMA_RESV_USAGE_READ) dma_resv_add_shared_fence(&resv, f); else dma_resv_add_excl_fence(&resv, f); - if (dma_resv_test_signaled(&resv, shared)) { + if (dma_resv_test_signaled(&resv, usage)) { pr_err("Resv unexpectedly signaled\n"); r = -EINVAL; goto err_unlock; } dma_fence_signal(f); - if (!dma_resv_test_signaled(&resv, shared)) { + if (!dma_resv_test_signaled(&resv, usage)) { pr_err("Resv not reporting signaled\n"); r = -EINVAL; goto err_unlock; @@ -107,15 +107,15 @@ err_free: static int test_excl_signaling(void *arg) { - return test_signaling(arg, false); + return test_signaling(arg, DMA_RESV_USAGE_WRITE); } static int test_shared_signaling(void *arg) { - return test_signaling(arg, true); + return test_signaling(arg, DMA_RESV_USAGE_READ); } -static int test_for_each(void *arg, bool shared) +static int test_for_each(void *arg, enum dma_resv_usage usage) { struct dma_resv_iter cursor; struct dma_fence *f, *fence; @@ -139,13 +139,13 @@ static int test_for_each(void *arg, bool shared) goto err_unlock; } - if (shared) + if (usage >= DMA_RESV_USAGE_READ) dma_resv_add_shared_fence(&resv, f); else dma_resv_add_excl_fence(&resv, f); r = -ENOENT; - dma_resv_for_each_fence(&cursor, &resv, shared, fence) { + dma_resv_for_each_fence(&cursor, &resv, usage, fence) { if (!r) { pr_err("More than one fence found\n"); r = -EINVAL; @@ -156,7 +156,8 @@ static int test_for_each(void *arg, bool shared) r = -EINVAL; goto err_unlock; } - if (dma_resv_iter_is_exclusive(&cursor) != !shared) { + if (dma_resv_iter_is_exclusive(&cursor) != + (usage >= DMA_RESV_USAGE_READ)) { pr_err("Unexpected fence usage\n"); r = -EINVAL; goto err_unlock; @@ -178,15 +179,15 @@ err_free: static int test_excl_for_each(void *arg) { - return test_for_each(arg, false); + return test_for_each(arg, DMA_RESV_USAGE_WRITE); } static int test_shared_for_each(void *arg) { - return test_for_each(arg, true); + return test_for_each(arg, DMA_RESV_USAGE_READ); } -static int test_for_each_unlocked(void *arg, bool shared) +static int test_for_each_unlocked(void *arg, enum dma_resv_usage usage) { struct dma_resv_iter cursor; struct dma_fence *f, *fence; @@ -211,14 +212,14 @@ static int test_for_each_unlocked(void *arg, bool shared) goto err_free; } - if (shared) + if (usage >= DMA_RESV_USAGE_READ) dma_resv_add_shared_fence(&resv, f); else dma_resv_add_excl_fence(&resv, f); dma_resv_unlock(&resv); r = -ENOENT; - dma_resv_iter_begin(&cursor, &resv, shared); + dma_resv_iter_begin(&cursor, &resv, usage); dma_resv_for_each_fence_unlocked(&cursor, fence) { if (!r) { pr_err("More than one fence found\n"); @@ -234,7 +235,8 @@ static int test_for_each_unlocked(void *arg, bool shared) r = -EINVAL; goto err_iter_end; } - if (dma_resv_iter_is_exclusive(&cursor) != !shared) { + if (dma_resv_iter_is_exclusive(&cursor) != + (usage >= DMA_RESV_USAGE_READ)) { pr_err("Unexpected fence usage\n"); r = -EINVAL; goto err_iter_end; @@ -262,15 +264,15 @@ err_free: static int test_excl_for_each_unlocked(void *arg) { - return test_for_each_unlocked(arg, false); + return test_for_each_unlocked(arg, DMA_RESV_USAGE_WRITE); } static int test_shared_for_each_unlocked(void *arg) { - return test_for_each_unlocked(arg, true); + return test_for_each_unlocked(arg, DMA_RESV_USAGE_READ); } -static int test_get_fences(void *arg, bool shared) +static int test_get_fences(void *arg, enum dma_resv_usage usage) { struct dma_fence *f, **fences = NULL; struct dma_resv resv; @@ -294,13 +296,13 @@ static int test_get_fences(void *arg, bool shared) goto err_resv; } - if (shared) + if (usage >= DMA_RESV_USAGE_READ) dma_resv_add_shared_fence(&resv, f); else dma_resv_add_excl_fence(&resv, f); dma_resv_unlock(&resv); - r = dma_resv_get_fences(&resv, shared, &i, &fences); + r = dma_resv_get_fences(&resv, usage, &i, &fences); if (r) { pr_err("get_fences failed\n"); goto err_free; @@ -324,12 +326,12 @@ err_resv: static int test_excl_get_fences(void *arg) { - return test_get_fences(arg, false); + return test_get_fences(arg, DMA_RESV_USAGE_WRITE); } static int test_shared_get_fences(void *arg) { - return test_get_fences(arg, true); + return test_get_fences(arg, DMA_RESV_USAGE_READ); } int dma_resv(void) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c index e85e347eb670..413f32c3fd63 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c @@ -1288,7 +1288,9 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p, * * TODO: Remove together with dma_resv rework. */ - dma_resv_for_each_fence(&cursor, resv, false, fence) { + dma_resv_for_each_fence(&cursor, resv, + DMA_RESV_USAGE_WRITE, + fence) { break; } dma_fence_chain_init(chain, fence, dma_fence_get(p->fence), 1); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c index fae5c1debfad..7a6908d71820 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c @@ -200,8 +200,7 @@ int amdgpu_display_crtc_page_flip_target(struct drm_crtc *crtc, goto unpin; } - /* TODO: Unify this with other drivers */ - r = dma_resv_get_fences(new_abo->tbo.base.resv, true, + r = dma_resv_get_fences(new_abo->tbo.base.resv, DMA_RESV_USAGE_WRITE, &work->shared_count, &work->shared); if (unlikely(r != 0)) { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c index 57b74d35052f..84a53758e18e 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c @@ -526,7 +526,8 @@ int amdgpu_gem_wait_idle_ioctl(struct drm_device *dev, void *data, return -ENOENT; } robj = gem_to_amdgpu_bo(gobj); - ret = dma_resv_wait_timeout(robj->tbo.base.resv, true, true, timeout); + ret = dma_resv_wait_timeout(robj->tbo.base.resv, DMA_RESV_USAGE_READ, + true, timeout); /* ret == 0 means not signaled, * ret > 0 means signaled diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c index 81207737c716..65998cbcd7f7 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c @@ -111,7 +111,7 @@ void amdgpu_pasid_free_delayed(struct dma_resv *resv, struct dma_fence *fence; int r; - r = dma_resv_get_singleton(resv, true, &fence); + r = dma_resv_get_singleton(resv, DMA_RESV_USAGE_READ, &fence); if (r) goto fallback; @@ -139,7 +139,8 @@ fallback: /* Not enough memory for the delayed delete, as last resort * block for all the fences to complete. */ - dma_resv_wait_timeout(resv, true, false, MAX_SCHEDULE_TIMEOUT); + dma_resv_wait_timeout(resv, DMA_RESV_USAGE_READ, + false, MAX_SCHEDULE_TIMEOUT); amdgpu_pasid_free(pasid); } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c index 4b153daf283d..86f5248676b0 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c @@ -75,8 +75,8 @@ static bool amdgpu_mn_invalidate_gfx(struct mmu_interval_notifier *mni, mmu_interval_set_seq(mni, cur_seq); - r = dma_resv_wait_timeout(bo->tbo.base.resv, true, false, - MAX_SCHEDULE_TIMEOUT); + r = dma_resv_wait_timeout(bo->tbo.base.resv, DMA_RESV_USAGE_READ, + false, MAX_SCHEDULE_TIMEOUT); mutex_unlock(&adev->notifier_lock); if (r <= 0) DRM_ERROR("(%ld) failed to wait for user bo\n", r); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c index 6f57a2fd5fe3..a7f39f8ab7be 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c @@ -768,8 +768,8 @@ int amdgpu_bo_kmap(struct amdgpu_bo *bo, void **ptr) return 0; } - r = dma_resv_wait_timeout(bo->tbo.base.resv, false, false, - MAX_SCHEDULE_TIMEOUT); + r = dma_resv_wait_timeout(bo->tbo.base.resv, DMA_RESV_USAGE_WRITE, + false, MAX_SCHEDULE_TIMEOUT); if (r < 0) return r; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c index 40e06745fae9..744e144e5fc2 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c @@ -259,7 +259,8 @@ int amdgpu_sync_resv(struct amdgpu_device *adev, struct amdgpu_sync *sync, if (resv == NULL) return -EINVAL; - dma_resv_for_each_fence(&cursor, resv, true, f) { + /* TODO: Use DMA_RESV_USAGE_READ here */ + dma_resv_for_each_fence(&cursor, resv, DMA_RESV_USAGE_READ, f) { dma_fence_chain_for_each(f, f) { struct dma_fence *tmp = dma_fence_chain_contained(f); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c index f7f149588432..5db5066e74b4 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c @@ -1344,7 +1344,8 @@ static bool amdgpu_ttm_bo_eviction_valuable(struct ttm_buffer_object *bo, * If true, then return false as any KFD process needs all its BOs to * be resident to run successfully */ - dma_resv_for_each_fence(&resv_cursor, bo->base.resv, true, f) { + dma_resv_for_each_fence(&resv_cursor, bo->base.resv, + DMA_RESV_USAGE_READ, f) { if (amdkfd_fence_check_mm(f, current->mm)) return false; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c index 39c74d9fa7cc..3654326219e0 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c @@ -1163,7 +1163,8 @@ static int amdgpu_uvd_send_msg(struct amdgpu_ring *ring, struct amdgpu_bo *bo, ib->length_dw = 16; if (direct) { - r = dma_resv_wait_timeout(bo->tbo.base.resv, true, false, + r = dma_resv_wait_timeout(bo->tbo.base.resv, + DMA_RESV_USAGE_WRITE, false, msecs_to_jiffies(10)); if (r == 0) r = -ETIMEDOUT; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c index b13451255e8b..a0376fd36a82 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c @@ -2059,7 +2059,7 @@ static void amdgpu_vm_prt_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm) struct dma_resv_iter cursor; struct dma_fence *fence; - dma_resv_for_each_fence(&cursor, resv, true, fence) { + dma_resv_for_each_fence(&cursor, resv, DMA_RESV_USAGE_READ, fence) { /* Add a callback for each fence in the reservation object */ amdgpu_vm_prt_get(adev); amdgpu_vm_add_prt_cb(adev, fence); @@ -2665,7 +2665,7 @@ bool amdgpu_vm_evictable(struct amdgpu_bo *bo) return true; /* Don't evict VM page tables while they are busy */ - if (!dma_resv_test_signaled(bo->tbo.base.resv, true)) + if (!dma_resv_test_signaled(bo->tbo.base.resv, DMA_RESV_USAGE_READ)) return false; /* Try to block ongoing updates */ @@ -2845,7 +2845,8 @@ void amdgpu_vm_adjust_size(struct amdgpu_device *adev, uint32_t min_vm_size, */ long amdgpu_vm_wait_idle(struct amdgpu_vm *vm, long timeout) { - timeout = dma_resv_wait_timeout(vm->root.bo->tbo.base.resv, true, + timeout = dma_resv_wait_timeout(vm->root.bo->tbo.base.resv, + DMA_RESV_USAGE_READ, true, timeout); if (timeout <= 0) return timeout; diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index b30656959fd8..9e24b1e616af 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -9236,7 +9236,8 @@ static void amdgpu_dm_commit_planes(struct drm_atomic_state *state, * deadlock during GPU reset when this fence will not signal * but we hold reservation lock for the BO. */ - r = dma_resv_wait_timeout(abo->tbo.base.resv, true, false, + r = dma_resv_wait_timeout(abo->tbo.base.resv, + DMA_RESV_USAGE_WRITE, false, msecs_to_jiffies(5000)); if (unlikely(r <= 0)) DRM_ERROR("Waiting for fences timed out!"); diff --git a/drivers/gpu/drm/drm_gem.c b/drivers/gpu/drm/drm_gem.c index 133dfae06fab..eb0c2d041f13 100644 --- a/drivers/gpu/drm/drm_gem.c +++ b/drivers/gpu/drm/drm_gem.c @@ -771,7 +771,8 @@ long drm_gem_dma_resv_wait(struct drm_file *filep, u32 handle, return -EINVAL; } - ret = dma_resv_wait_timeout(obj->resv, wait_all, true, timeout); + ret = dma_resv_wait_timeout(obj->resv, dma_resv_usage_rw(wait_all), + true, timeout); if (ret == 0) ret = -ETIME; else if (ret > 0) diff --git a/drivers/gpu/drm/drm_gem_atomic_helper.c b/drivers/gpu/drm/drm_gem_atomic_helper.c index 9338ddb7edff..a6d89aed0bda 100644 --- a/drivers/gpu/drm/drm_gem_atomic_helper.c +++ b/drivers/gpu/drm/drm_gem_atomic_helper.c @@ -151,7 +151,7 @@ int drm_gem_plane_helper_prepare_fb(struct drm_plane *plane, struct drm_plane_st return 0; obj = drm_gem_fb_get_obj(state->fb, 0); - ret = dma_resv_get_singleton(obj->resv, false, &fence); + ret = dma_resv_get_singleton(obj->resv, DMA_RESV_USAGE_WRITE, &fence); if (ret) return ret; diff --git a/drivers/gpu/drm/etnaviv/etnaviv_gem.c b/drivers/gpu/drm/etnaviv/etnaviv_gem.c index d5314aa28ff7..507172e2780b 100644 --- a/drivers/gpu/drm/etnaviv/etnaviv_gem.c +++ b/drivers/gpu/drm/etnaviv/etnaviv_gem.c @@ -380,12 +380,14 @@ int etnaviv_gem_cpu_prep(struct drm_gem_object *obj, u32 op, } if (op & ETNA_PREP_NOSYNC) { - if (!dma_resv_test_signaled(obj->resv, write)) + if (!dma_resv_test_signaled(obj->resv, + dma_resv_usage_rw(write))) return -EBUSY; } else { unsigned long remain = etnaviv_timeout_to_jiffies(timeout); - ret = dma_resv_wait_timeout(obj->resv, write, true, remain); + ret = dma_resv_wait_timeout(obj->resv, dma_resv_usage_rw(write), + true, remain); if (ret <= 0) return ret == 0 ? -ETIMEDOUT : ret; } diff --git a/drivers/gpu/drm/i915/display/intel_atomic_plane.c b/drivers/gpu/drm/i915/display/intel_atomic_plane.c index 5712688232fb..03e86e836a17 100644 --- a/drivers/gpu/drm/i915/display/intel_atomic_plane.c +++ b/drivers/gpu/drm/i915/display/intel_atomic_plane.c @@ -997,7 +997,8 @@ intel_prepare_plane_fb(struct drm_plane *_plane, if (ret < 0) goto unpin_fb; - dma_resv_iter_begin(&cursor, obj->base.resv, false); + dma_resv_iter_begin(&cursor, obj->base.resv, + DMA_RESV_USAGE_WRITE); dma_resv_for_each_fence_unlocked(&cursor, fence) { add_rps_boost_after_vblank(new_plane_state->hw.crtc, fence); diff --git a/drivers/gpu/drm/i915/gem/i915_gem_busy.c b/drivers/gpu/drm/i915/gem/i915_gem_busy.c index 470fdfd61a0f..14a1c0ad8c3c 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_busy.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_busy.c @@ -138,12 +138,12 @@ i915_gem_busy_ioctl(struct drm_device *dev, void *data, * Alternatively, we can trade that extra information on read/write * activity with * args->busy = - * !dma_resv_test_signaled(obj->resv, true); + * !dma_resv_test_signaled(obj->resv, DMA_RESV_USAGE_READ); * to report the overall busyness. This is what the wait-ioctl does. * */ args->busy = 0; - dma_resv_iter_begin(&cursor, obj->base.resv, true); + dma_resv_iter_begin(&cursor, obj->base.resv, DMA_RESV_USAGE_READ); dma_resv_for_each_fence_unlocked(&cursor, fence) { if (dma_resv_iter_is_restarted(&cursor)) args->busy = 0; diff --git a/drivers/gpu/drm/i915/gem/i915_gem_lmem.c b/drivers/gpu/drm/i915/gem/i915_gem_lmem.c index 444f8268b9c5..a200d3e66573 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_lmem.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_lmem.c @@ -66,7 +66,7 @@ bool __i915_gem_object_is_lmem(struct drm_i915_gem_object *obj) struct intel_memory_region *mr = READ_ONCE(obj->mm.region); #ifdef CONFIG_LOCKDEP - GEM_WARN_ON(dma_resv_test_signaled(obj->base.resv, true) && + GEM_WARN_ON(dma_resv_test_signaled(obj->base.resv, DMA_RESV_USAGE_READ) && i915_gem_object_evictable(obj)); #endif return mr && (mr->type == INTEL_MEMORY_LOCAL || diff --git a/drivers/gpu/drm/i915/gem/i915_gem_userptr.c b/drivers/gpu/drm/i915/gem/i915_gem_userptr.c index 6d1a71d6404c..644fe237601c 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_userptr.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_userptr.c @@ -86,7 +86,7 @@ static bool i915_gem_userptr_invalidate(struct mmu_interval_notifier *mni, return true; /* we will unbind on next submission, still have userptr pins */ - r = dma_resv_wait_timeout(obj->base.resv, true, false, + r = dma_resv_wait_timeout(obj->base.resv, DMA_RESV_USAGE_READ, false, MAX_SCHEDULE_TIMEOUT); if (r <= 0) drm_err(&i915->drm, "(%ld) failed to wait for idle\n", r); diff --git a/drivers/gpu/drm/i915/gem/i915_gem_wait.c b/drivers/gpu/drm/i915/gem/i915_gem_wait.c index dab3d30c09a0..319936f91ac5 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_wait.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_wait.c @@ -40,7 +40,8 @@ i915_gem_object_wait_reservation(struct dma_resv *resv, struct dma_fence *fence; long ret = timeout ?: 1; - dma_resv_iter_begin(&cursor, resv, flags & I915_WAIT_ALL); + dma_resv_iter_begin(&cursor, resv, + dma_resv_usage_rw(flags & I915_WAIT_ALL)); dma_resv_for_each_fence_unlocked(&cursor, fence) { ret = i915_gem_object_wait_fence(fence, flags, timeout); if (ret <= 0) @@ -117,7 +118,8 @@ i915_gem_object_wait_priority(struct drm_i915_gem_object *obj, struct dma_resv_iter cursor; struct dma_fence *fence; - dma_resv_iter_begin(&cursor, obj->base.resv, flags & I915_WAIT_ALL); + dma_resv_iter_begin(&cursor, obj->base.resv, + dma_resv_usage_rw(flags & I915_WAIT_ALL)); dma_resv_for_each_fence_unlocked(&cursor, fence) i915_gem_fence_wait_priority(fence, attr); dma_resv_iter_end(&cursor); diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_dmabuf.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_dmabuf.c index b071a58dd6da..b4275b55e5b8 100644 --- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_dmabuf.c +++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_dmabuf.c @@ -219,7 +219,8 @@ static int igt_dmabuf_import_same_driver(struct drm_i915_private *i915, goto out_detach; } - timeout = dma_resv_wait_timeout(dmabuf->resv, false, true, 5 * HZ); + timeout = dma_resv_wait_timeout(dmabuf->resv, DMA_RESV_USAGE_WRITE, + true, 5 * HZ); if (!timeout) { pr_err("dmabuf wait for exclusive fence timed out.\n"); timeout = -ETIME; diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c index 582770360ad1..73d5195146b0 100644 --- a/drivers/gpu/drm/i915/i915_request.c +++ b/drivers/gpu/drm/i915/i915_request.c @@ -1598,7 +1598,8 @@ i915_request_await_object(struct i915_request *to, struct dma_fence *fence; int ret = 0; - dma_resv_for_each_fence(&cursor, obj->base.resv, write, fence) { + dma_resv_for_each_fence(&cursor, obj->base.resv, + dma_resv_usage_rw(write), fence) { ret = i915_request_await_dma_fence(to, fence); if (ret) break; diff --git a/drivers/gpu/drm/i915/i915_sw_fence.c b/drivers/gpu/drm/i915/i915_sw_fence.c index 2a74a9a1cafe..ae984c66c48a 100644 --- a/drivers/gpu/drm/i915/i915_sw_fence.c +++ b/drivers/gpu/drm/i915/i915_sw_fence.c @@ -585,7 +585,7 @@ int i915_sw_fence_await_reservation(struct i915_sw_fence *fence, debug_fence_assert(fence); might_sleep_if(gfpflags_allow_blocking(gfp)); - dma_resv_iter_begin(&cursor, resv, write); + dma_resv_iter_begin(&cursor, resv, dma_resv_usage_rw(write)); dma_resv_for_each_fence_unlocked(&cursor, f) { pending = i915_sw_fence_await_dma_fence(fence, f, timeout, gfp); diff --git a/drivers/gpu/drm/msm/msm_gem.c b/drivers/gpu/drm/msm/msm_gem.c index 02b9ae65a96a..01bbb5f2d462 100644 --- a/drivers/gpu/drm/msm/msm_gem.c +++ b/drivers/gpu/drm/msm/msm_gem.c @@ -848,7 +848,8 @@ int msm_gem_cpu_prep(struct drm_gem_object *obj, uint32_t op, ktime_t *timeout) op & MSM_PREP_NOSYNC ? 0 : timeout_to_jiffies(timeout); long ret; - ret = dma_resv_wait_timeout(obj->resv, write, true, remain); + ret = dma_resv_wait_timeout(obj->resv, dma_resv_usage_rw(write), + true, remain); if (ret == 0) return remain == 0 ? -EBUSY : -ETIMEDOUT; else if (ret < 0) diff --git a/drivers/gpu/drm/nouveau/dispnv50/wndw.c b/drivers/gpu/drm/nouveau/dispnv50/wndw.c index e2faf92e4831..8642b84ea20c 100644 --- a/drivers/gpu/drm/nouveau/dispnv50/wndw.c +++ b/drivers/gpu/drm/nouveau/dispnv50/wndw.c @@ -558,7 +558,8 @@ nv50_wndw_prepare_fb(struct drm_plane *plane, struct drm_plane_state *state) asyw->image.handle[0] = ctxdma->object.handle; } - ret = dma_resv_get_singleton(nvbo->bo.base.resv, false, + ret = dma_resv_get_singleton(nvbo->bo.base.resv, + DMA_RESV_USAGE_WRITE, &asyw->state.fence); if (ret) return ret; diff --git a/drivers/gpu/drm/nouveau/nouveau_bo.c b/drivers/gpu/drm/nouveau/nouveau_bo.c index 74f8652d2bd3..c6bb4dbcd735 100644 --- a/drivers/gpu/drm/nouveau/nouveau_bo.c +++ b/drivers/gpu/drm/nouveau/nouveau_bo.c @@ -962,11 +962,11 @@ nouveau_bo_vm_cleanup(struct ttm_buffer_object *bo, struct dma_fence *fence; int ret; - /* TODO: This is actually a memory management dependency */ - ret = dma_resv_get_singleton(bo->base.resv, false, &fence); + ret = dma_resv_get_singleton(bo->base.resv, DMA_RESV_USAGE_WRITE, + &fence); if (ret) - dma_resv_wait_timeout(bo->base.resv, false, false, - MAX_SCHEDULE_TIMEOUT); + dma_resv_wait_timeout(bo->base.resv, DMA_RESV_USAGE_WRITE, + false, MAX_SCHEDULE_TIMEOUT); nv10_bo_put_tile_region(dev, *old_tile, fence); *old_tile = new_tile; diff --git a/drivers/gpu/drm/nouveau/nouveau_fence.c b/drivers/gpu/drm/nouveau/nouveau_fence.c index 0268259e97eb..d5e81ccee01c 100644 --- a/drivers/gpu/drm/nouveau/nouveau_fence.c +++ b/drivers/gpu/drm/nouveau/nouveau_fence.c @@ -350,14 +350,16 @@ nouveau_fence_sync(struct nouveau_bo *nvbo, struct nouveau_channel *chan, if (ret) return ret; - /* Waiting for the exclusive fence first causes performance regressions - * under some circumstances. So manually wait for the shared ones first. + /* Waiting for the writes first causes performance regressions + * under some circumstances. So manually wait for the reads first. */ for (i = 0; i < 2; ++i) { struct dma_resv_iter cursor; struct dma_fence *fence; - dma_resv_for_each_fence(&cursor, resv, exclusive, fence) { + dma_resv_for_each_fence(&cursor, resv, + dma_resv_usage_rw(exclusive), + fence) { struct nouveau_fence *f; if (i == 0 && dma_resv_iter_is_exclusive(&cursor)) diff --git a/drivers/gpu/drm/nouveau/nouveau_gem.c b/drivers/gpu/drm/nouveau/nouveau_gem.c index 9416bee92141..fab542a758ff 100644 --- a/drivers/gpu/drm/nouveau/nouveau_gem.c +++ b/drivers/gpu/drm/nouveau/nouveau_gem.c @@ -962,7 +962,8 @@ nouveau_gem_ioctl_cpu_prep(struct drm_device *dev, void *data, return -ENOENT; nvbo = nouveau_gem_object(gem); - lret = dma_resv_wait_timeout(nvbo->bo.base.resv, write, true, + lret = dma_resv_wait_timeout(nvbo->bo.base.resv, + dma_resv_usage_rw(write), true, no_wait ? 0 : 30 * HZ); if (!lret) ret = -EBUSY; diff --git a/drivers/gpu/drm/panfrost/panfrost_drv.c b/drivers/gpu/drm/panfrost/panfrost_drv.c index 94b6f0a19c83..7fcbc2a5b6cd 100644 --- a/drivers/gpu/drm/panfrost/panfrost_drv.c +++ b/drivers/gpu/drm/panfrost/panfrost_drv.c @@ -316,7 +316,8 @@ panfrost_ioctl_wait_bo(struct drm_device *dev, void *data, if (!gem_obj) return -ENOENT; - ret = dma_resv_wait_timeout(gem_obj->resv, true, true, timeout); + ret = dma_resv_wait_timeout(gem_obj->resv, DMA_RESV_USAGE_READ, + true, timeout); if (!ret) ret = timeout ? -ETIMEDOUT : -EBUSY; diff --git a/drivers/gpu/drm/qxl/qxl_debugfs.c b/drivers/gpu/drm/qxl/qxl_debugfs.c index 6a36b0fd845c..33e5889d6608 100644 --- a/drivers/gpu/drm/qxl/qxl_debugfs.c +++ b/drivers/gpu/drm/qxl/qxl_debugfs.c @@ -61,7 +61,8 @@ qxl_debugfs_buffers_info(struct seq_file *m, void *data) struct dma_fence *fence; int rel = 0; - dma_resv_iter_begin(&cursor, bo->tbo.base.resv, true); + dma_resv_iter_begin(&cursor, bo->tbo.base.resv, + DMA_RESV_USAGE_READ); dma_resv_for_each_fence_unlocked(&cursor, fence) { if (dma_resv_iter_is_restarted(&cursor)) rel = 0; diff --git a/drivers/gpu/drm/radeon/radeon_display.c b/drivers/gpu/drm/radeon/radeon_display.c index f60e826cd292..57ff2b723c87 100644 --- a/drivers/gpu/drm/radeon/radeon_display.c +++ b/drivers/gpu/drm/radeon/radeon_display.c @@ -533,7 +533,8 @@ static int radeon_crtc_page_flip_target(struct drm_crtc *crtc, DRM_ERROR("failed to pin new rbo buffer before flip\n"); goto cleanup; } - r = dma_resv_get_singleton(new_rbo->tbo.base.resv, false, &work->fence); + r = dma_resv_get_singleton(new_rbo->tbo.base.resv, DMA_RESV_USAGE_WRITE, + &work->fence); if (r) { radeon_bo_unreserve(new_rbo); DRM_ERROR("failed to get new rbo buffer fences\n"); diff --git a/drivers/gpu/drm/radeon/radeon_gem.c b/drivers/gpu/drm/radeon/radeon_gem.c index f563284a7fac..6616a828f40b 100644 --- a/drivers/gpu/drm/radeon/radeon_gem.c +++ b/drivers/gpu/drm/radeon/radeon_gem.c @@ -162,7 +162,9 @@ static int radeon_gem_set_domain(struct drm_gem_object *gobj, } if (domain == RADEON_GEM_DOMAIN_CPU) { /* Asking for cpu access wait for object idle */ - r = dma_resv_wait_timeout(robj->tbo.base.resv, true, true, 30 * HZ); + r = dma_resv_wait_timeout(robj->tbo.base.resv, + DMA_RESV_USAGE_READ, + true, 30 * HZ); if (!r) r = -EBUSY; @@ -524,7 +526,7 @@ int radeon_gem_busy_ioctl(struct drm_device *dev, void *data, } robj = gem_to_radeon_bo(gobj); - r = dma_resv_test_signaled(robj->tbo.base.resv, true); + r = dma_resv_test_signaled(robj->tbo.base.resv, DMA_RESV_USAGE_READ); if (r == 0) r = -EBUSY; else @@ -553,7 +555,8 @@ int radeon_gem_wait_idle_ioctl(struct drm_device *dev, void *data, } robj = gem_to_radeon_bo(gobj); - ret = dma_resv_wait_timeout(robj->tbo.base.resv, true, true, 30 * HZ); + ret = dma_resv_wait_timeout(robj->tbo.base.resv, DMA_RESV_USAGE_READ, + true, 30 * HZ); if (ret == 0) r = -EBUSY; else if (ret < 0) diff --git a/drivers/gpu/drm/radeon/radeon_mn.c b/drivers/gpu/drm/radeon/radeon_mn.c index 9fa88549c89e..68ebeb1bdfff 100644 --- a/drivers/gpu/drm/radeon/radeon_mn.c +++ b/drivers/gpu/drm/radeon/radeon_mn.c @@ -66,8 +66,8 @@ static bool radeon_mn_invalidate(struct mmu_interval_notifier *mn, return true; } - r = dma_resv_wait_timeout(bo->tbo.base.resv, true, false, - MAX_SCHEDULE_TIMEOUT); + r = dma_resv_wait_timeout(bo->tbo.base.resv, DMA_RESV_USAGE_READ, + false, MAX_SCHEDULE_TIMEOUT); if (r <= 0) DRM_ERROR("(%ld) failed to wait for user bo\n", r); diff --git a/drivers/gpu/drm/radeon/radeon_sync.c b/drivers/gpu/drm/radeon/radeon_sync.c index b991ba1bcd51..49bbb2266c0f 100644 --- a/drivers/gpu/drm/radeon/radeon_sync.c +++ b/drivers/gpu/drm/radeon/radeon_sync.c @@ -96,7 +96,7 @@ int radeon_sync_resv(struct radeon_device *rdev, struct dma_fence *f; int r = 0; - dma_resv_for_each_fence(&cursor, resv, shared, f) { + dma_resv_for_each_fence(&cursor, resv, dma_resv_usage_rw(shared), f) { fence = to_radeon_fence(f); if (fence && fence->rdev == rdev) radeon_sync_fence(sync, fence); diff --git a/drivers/gpu/drm/radeon/radeon_uvd.c b/drivers/gpu/drm/radeon/radeon_uvd.c index bc0f44299bb9..a50750740ab0 100644 --- a/drivers/gpu/drm/radeon/radeon_uvd.c +++ b/drivers/gpu/drm/radeon/radeon_uvd.c @@ -478,8 +478,8 @@ static int radeon_uvd_cs_msg(struct radeon_cs_parser *p, struct radeon_bo *bo, return -EINVAL; } - r = dma_resv_wait_timeout(bo->tbo.base.resv, false, false, - MAX_SCHEDULE_TIMEOUT); + r = dma_resv_wait_timeout(bo->tbo.base.resv, DMA_RESV_USAGE_WRITE, + false, MAX_SCHEDULE_TIMEOUT); if (r <= 0) { DRM_ERROR("Failed waiting for UVD message (%ld)!\n", r); return r ? r : -ETIME; diff --git a/drivers/gpu/drm/scheduler/sched_main.c b/drivers/gpu/drm/scheduler/sched_main.c index c5660b066554..76fd2904c7c6 100644 --- a/drivers/gpu/drm/scheduler/sched_main.c +++ b/drivers/gpu/drm/scheduler/sched_main.c @@ -705,7 +705,8 @@ int drm_sched_job_add_implicit_dependencies(struct drm_sched_job *job, dma_resv_assert_held(obj->resv); - dma_resv_for_each_fence(&cursor, obj->resv, write, fence) { + dma_resv_for_each_fence(&cursor, obj->resv, dma_resv_usage_rw(write), + fence) { /* Make sure to grab an additional ref on the added fence */ dma_fence_get(fence); ret = drm_sched_job_add_dependency(job, fence); diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c index c49996cf25d0..cff05b62f3f7 100644 --- a/drivers/gpu/drm/ttm/ttm_bo.c +++ b/drivers/gpu/drm/ttm/ttm_bo.c @@ -223,7 +223,7 @@ static void ttm_bo_flush_all_fences(struct ttm_buffer_object *bo) struct dma_resv_iter cursor; struct dma_fence *fence; - dma_resv_iter_begin(&cursor, resv, true); + dma_resv_iter_begin(&cursor, resv, DMA_RESV_USAGE_READ); dma_resv_for_each_fence_unlocked(&cursor, fence) { if (!fence->ops->signaled) dma_fence_enable_sw_signaling(fence); @@ -252,7 +252,7 @@ static int ttm_bo_cleanup_refs(struct ttm_buffer_object *bo, struct dma_resv *resv = &bo->base._resv; int ret; - if (dma_resv_test_signaled(resv, true)) + if (dma_resv_test_signaled(resv, DMA_RESV_USAGE_READ)) ret = 0; else ret = -EBUSY; @@ -264,7 +264,8 @@ static int ttm_bo_cleanup_refs(struct ttm_buffer_object *bo, dma_resv_unlock(bo->base.resv); spin_unlock(&bo->bdev->lru_lock); - lret = dma_resv_wait_timeout(resv, true, interruptible, + lret = dma_resv_wait_timeout(resv, DMA_RESV_USAGE_READ, + interruptible, 30 * HZ); if (lret < 0) @@ -367,7 +368,8 @@ static void ttm_bo_release(struct kref *kref) /* Last resort, if we fail to allocate memory for the * fences block for the BO to become idle */ - dma_resv_wait_timeout(bo->base.resv, true, false, + dma_resv_wait_timeout(bo->base.resv, + DMA_RESV_USAGE_READ, false, 30 * HZ); } @@ -378,7 +380,7 @@ static void ttm_bo_release(struct kref *kref) ttm_mem_io_free(bdev, bo->resource); } - if (!dma_resv_test_signaled(bo->base.resv, true) || + if (!dma_resv_test_signaled(bo->base.resv, DMA_RESV_USAGE_READ) || !dma_resv_trylock(bo->base.resv)) { /* The BO is not idle, resurrect it for delayed destroy */ ttm_bo_flush_all_fences(bo); @@ -1044,14 +1046,14 @@ int ttm_bo_wait(struct ttm_buffer_object *bo, long timeout = 15 * HZ; if (no_wait) { - if (dma_resv_test_signaled(bo->base.resv, true)) + if (dma_resv_test_signaled(bo->base.resv, DMA_RESV_USAGE_READ)) return 0; else return -EBUSY; } - timeout = dma_resv_wait_timeout(bo->base.resv, true, interruptible, - timeout); + timeout = dma_resv_wait_timeout(bo->base.resv, DMA_RESV_USAGE_READ, + interruptible, timeout); if (timeout < 0) return timeout; diff --git a/drivers/gpu/drm/vgem/vgem_fence.c b/drivers/gpu/drm/vgem/vgem_fence.c index 2ddbebca87d9..91fc4940c65a 100644 --- a/drivers/gpu/drm/vgem/vgem_fence.c +++ b/drivers/gpu/drm/vgem/vgem_fence.c @@ -130,6 +130,7 @@ int vgem_fence_attach_ioctl(struct drm_device *dev, struct vgem_file *vfile = file->driver_priv; struct dma_resv *resv; struct drm_gem_object *obj; + enum dma_resv_usage usage; struct dma_fence *fence; int ret; @@ -151,7 +152,8 @@ int vgem_fence_attach_ioctl(struct drm_device *dev, /* Check for a conflicting fence */ resv = obj->resv; - if (!dma_resv_test_signaled(resv, arg->flags & VGEM_FENCE_WRITE)) { + usage = dma_resv_usage_rw(arg->flags & VGEM_FENCE_WRITE); + if (!dma_resv_test_signaled(resv, usage)) { ret = -EBUSY; goto err_fence; } diff --git a/drivers/gpu/drm/virtio/virtgpu_ioctl.c b/drivers/gpu/drm/virtio/virtgpu_ioctl.c index 77743fd2c61a..f8d83358d2a0 100644 --- a/drivers/gpu/drm/virtio/virtgpu_ioctl.c +++ b/drivers/gpu/drm/virtio/virtgpu_ioctl.c @@ -518,9 +518,10 @@ static int virtio_gpu_wait_ioctl(struct drm_device *dev, void *data, return -ENOENT; if (args->flags & VIRTGPU_WAIT_NOWAIT) { - ret = dma_resv_test_signaled(obj->resv, true); + ret = dma_resv_test_signaled(obj->resv, DMA_RESV_USAGE_READ); } else { - ret = dma_resv_wait_timeout(obj->resv, true, true, timeout); + ret = dma_resv_wait_timeout(obj->resv, DMA_RESV_USAGE_READ, + true, timeout); } if (ret == 0) ret = -EBUSY; diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_bo.c b/drivers/gpu/drm/vmwgfx/vmwgfx_bo.c index fe13aa8b4a64..b96884f7d03d 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_bo.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_bo.c @@ -528,8 +528,8 @@ static int vmw_user_bo_synccpu_grab(struct vmw_buffer_object *vmw_bo, if (flags & drm_vmw_synccpu_allow_cs) { long lret; - lret = dma_resv_wait_timeout(bo->base.resv, true, true, - nonblock ? 0 : + lret = dma_resv_wait_timeout(bo->base.resv, DMA_RESV_USAGE_READ, + true, nonblock ? 0 : MAX_SCHEDULE_TIMEOUT); if (!lret) return -EBUSY; diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_resource.c b/drivers/gpu/drm/vmwgfx/vmwgfx_resource.c index 626067104751..a84d1d5628d0 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_resource.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_resource.c @@ -1164,7 +1164,8 @@ int vmw_resources_clean(struct vmw_buffer_object *vbo, pgoff_t start, if (bo->moving) dma_fence_put(bo->moving); - return dma_resv_get_singleton(bo->base.resv, false, + return dma_resv_get_singleton(bo->base.resv, + DMA_RESV_USAGE_WRITE, &bo->moving); } diff --git a/drivers/infiniband/core/umem_dmabuf.c b/drivers/infiniband/core/umem_dmabuf.c index d32cd7538835..f9901d273b8e 100644 --- a/drivers/infiniband/core/umem_dmabuf.c +++ b/drivers/infiniband/core/umem_dmabuf.c @@ -67,7 +67,8 @@ wait_fence: * may be not up-to-date. Wait for the exporter to finish * the migration. */ - return dma_resv_wait_timeout(umem_dmabuf->attach->dmabuf->resv, false, + return dma_resv_wait_timeout(umem_dmabuf->attach->dmabuf->resv, + DMA_RESV_USAGE_WRITE, false, MAX_SCHEDULE_TIMEOUT); } EXPORT_SYMBOL(ib_umem_dmabuf_map_pages); diff --git a/include/linux/dma-buf.h b/include/linux/dma-buf.h index 6fb91956ab8d..a297397743a2 100644 --- a/include/linux/dma-buf.h +++ b/include/linux/dma-buf.h @@ -408,6 +408,9 @@ struct dma_buf { * pipelining across drivers. These do not set any fences for their * access. An example here is v4l. * + * - Driver should use dma_resv_usage_rw() when retrieving fences as + * dependency for implicit synchronization. + * * DYNAMIC IMPORTER RULES: * * Dynamic importers, see dma_buf_attachment_is_dynamic(), have @@ -423,8 +426,9 @@ struct dma_buf { * * IMPORTANT: * - * All drivers must obey the struct dma_resv rules, specifically the - * rules for updating and obeying fences. + * All drivers and memory management related functions must obey the + * struct dma_resv rules, specifically the rules for updating and + * obeying fences. See enum dma_resv_usage for further descriptions. */ struct dma_resv *resv; diff --git a/include/linux/dma-resv.h b/include/linux/dma-resv.h index 5fa04d0fccad..92cd8023980f 100644 --- a/include/linux/dma-resv.h +++ b/include/linux/dma-resv.h @@ -49,6 +49,53 @@ extern struct ww_class reservation_ww_class; struct dma_resv_list; +/** + * enum dma_resv_usage - how the fences from a dma_resv obj are used + * + * This enum describes the different use cases for a dma_resv object and + * controls which fences are returned when queried. + * + * An important fact is that there is the order WRITEobj = obj; - cursor->all_fences = all_fences; + cursor->usage = usage; cursor->fence = NULL; } @@ -241,7 +288,7 @@ static inline bool dma_resv_iter_is_restarted(struct dma_resv_iter *cursor) * dma_resv_for_each_fence - fence iterator * @cursor: a struct dma_resv_iter pointer * @obj: a dma_resv object pointer - * @all_fences: true if all fences should be returned + * @usage: controls which fences to return * @fence: the current fence * * Iterate over the fences in a struct dma_resv object while holding the @@ -250,8 +297,8 @@ static inline bool dma_resv_iter_is_restarted(struct dma_resv_iter *cursor) * valid as long as the lock is held and so no extra reference to the fence is * taken. */ -#define dma_resv_for_each_fence(cursor, obj, all_fences, fence) \ - for (dma_resv_iter_begin(cursor, obj, all_fences), \ +#define dma_resv_for_each_fence(cursor, obj, usage, fence) \ + for (dma_resv_iter_begin(cursor, obj, usage), \ fence = dma_resv_iter_first(cursor); fence; \ fence = dma_resv_iter_next(cursor)) @@ -418,14 +465,14 @@ void dma_resv_add_shared_fence(struct dma_resv *obj, struct dma_fence *fence); void dma_resv_replace_fences(struct dma_resv *obj, uint64_t context, struct dma_fence *fence); void dma_resv_add_excl_fence(struct dma_resv *obj, struct dma_fence *fence); -int dma_resv_get_fences(struct dma_resv *obj, bool write, +int dma_resv_get_fences(struct dma_resv *obj, enum dma_resv_usage usage, unsigned int *num_fences, struct dma_fence ***fences); -int dma_resv_get_singleton(struct dma_resv *obj, bool write, +int dma_resv_get_singleton(struct dma_resv *obj, enum dma_resv_usage usage, struct dma_fence **fence); int dma_resv_copy_fences(struct dma_resv *dst, struct dma_resv *src); -long dma_resv_wait_timeout(struct dma_resv *obj, bool wait_all, bool intr, - unsigned long timeout); -bool dma_resv_test_signaled(struct dma_resv *obj, bool test_all); +long dma_resv_wait_timeout(struct dma_resv *obj, enum dma_resv_usage usage, + bool intr, unsigned long timeout); +bool dma_resv_test_signaled(struct dma_resv *obj, enum dma_resv_usage usage); void dma_resv_describe(struct dma_resv *obj, struct seq_file *seq); #endif /* _LINUX_RESERVATION_H */ -- cgit From c35fcfa344c7544c899610dd5e512f7d630a152c Mon Sep 17 00:00:00 2001 From: Christian König Date: Mon, 4 Apr 2022 12:57:30 +0200 Subject: drm/amdgpu: use DMA_RESV_USAGE_KERNEL MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Wait only for kernel fences before kmap or UVD direct submission. This also makes sure that we always wait in amdgpu_bo_kmap() even when returning a cached pointer. Signed-off-by: Christian König Acked-by: Daniel Vetter Link: https://patchwork.freedesktop.org/patch/msgid/20220407085946.744568-6-christian.koenig@amd.com --- drivers/gpu/drm/amd/amdgpu/amdgpu_object.c | 10 +++++----- drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c | 2 +- 2 files changed, 6 insertions(+), 6 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c index a3cdf8a24377..5832c05ab10d 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c @@ -761,6 +761,11 @@ int amdgpu_bo_kmap(struct amdgpu_bo *bo, void **ptr) if (bo->flags & AMDGPU_GEM_CREATE_NO_CPU_ACCESS) return -EPERM; + r = dma_resv_wait_timeout(bo->tbo.base.resv, DMA_RESV_USAGE_KERNEL, + false, MAX_SCHEDULE_TIMEOUT); + if (r < 0) + return r; + kptr = amdgpu_bo_kptr(bo); if (kptr) { if (ptr) @@ -768,11 +773,6 @@ int amdgpu_bo_kmap(struct amdgpu_bo *bo, void **ptr) return 0; } - r = dma_resv_wait_timeout(bo->tbo.base.resv, DMA_RESV_USAGE_WRITE, - false, MAX_SCHEDULE_TIMEOUT); - if (r < 0) - return r; - r = ttm_bo_kmap(&bo->tbo, 0, bo->tbo.resource->num_pages, &bo->kmap); if (r) return r; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c index 3654326219e0..6eac649499d3 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c @@ -1164,7 +1164,7 @@ static int amdgpu_uvd_send_msg(struct amdgpu_ring *ring, struct amdgpu_bo *bo, if (direct) { r = dma_resv_wait_timeout(bo->tbo.base.resv, - DMA_RESV_USAGE_WRITE, false, + DMA_RESV_USAGE_KERNEL, false, msecs_to_jiffies(10)); if (r == 0) r = -ETIMEDOUT; -- cgit