From 818f5c8f4cd27747e8218e8a5fb230c322e02d1e Mon Sep 17 00:00:00 2001 From: Stefan Schake Date: Wed, 25 Apr 2018 00:03:45 +0200 Subject: drm/vc4: Syncobj import support Allow userland to specify a syncobj that is waited on before a render job starts processing. v2: Use 0 as invalid syncobj to drop flag (Eric) Drop extra newline (Eric) Signed-off-by: Stefan Schake Signed-off-by: Eric Anholt Reviewed-by: Eric Anholt Link: https://patchwork.freedesktop.org/patch/msgid/1524607427-12876-2-git-send-email-stschake@gmail.com --- drivers/gpu/drm/vc4/vc4_gem.c | 30 +++++++++++++++++++++++++----- 1 file changed, 25 insertions(+), 5 deletions(-) (limited to 'drivers/gpu/drm/vc4/vc4_gem.c') diff --git a/drivers/gpu/drm/vc4/vc4_gem.c b/drivers/gpu/drm/vc4/vc4_gem.c index 2107b0daf8ef..e305ccdedf47 100644 --- a/drivers/gpu/drm/vc4/vc4_gem.c +++ b/drivers/gpu/drm/vc4/vc4_gem.c @@ -27,6 +27,7 @@ #include #include #include +#include #include "uapi/drm/vc4_drm.h" #include "vc4_drv.h" @@ -1115,6 +1116,7 @@ vc4_submit_cl_ioctl(struct drm_device *dev, void *data, struct drm_vc4_submit_cl *args = data; struct vc4_exec_info *exec; struct ww_acquire_ctx acquire_ctx; + struct dma_fence *in_fence; int ret = 0; if ((args->flags & ~(VC4_SUBMIT_CL_USE_CLEAR_COLOR | @@ -1125,11 +1127,6 @@ vc4_submit_cl_ioctl(struct drm_device *dev, void *data, return -EINVAL; } - if (args->pad2 != 0) { - DRM_DEBUG("->pad2 must be set to zero\n"); - return -EINVAL; - } - exec = kcalloc(1, sizeof(*exec), GFP_KERNEL); if (!exec) { DRM_ERROR("malloc failure on exec struct\n"); @@ -1164,6 +1161,29 @@ vc4_submit_cl_ioctl(struct drm_device *dev, void *data, } } + if (args->in_sync) { + ret = drm_syncobj_find_fence(file_priv, args->in_sync, + &in_fence); + if (ret) + goto fail; + + /* When the fence (or fence array) is exclusively from our + * context we can skip the wait since jobs are executed in + * order of their submission through this ioctl and this can + * only have fences from a prior job. + */ + if (!dma_fence_match_context(in_fence, + vc4->dma_fence_context)) { + ret = dma_fence_wait(in_fence, true); + if (ret) { + dma_fence_put(in_fence); + goto fail; + } + } + + dma_fence_put(in_fence); + } + if (exec->args->bin_cl_size != 0) { ret = vc4_get_bcl(dev, exec); if (ret) -- cgit From e84fcb95e07442edd7ce3b13973523646dbc581a Mon Sep 17 00:00:00 2001 From: Stefan Schake Date: Wed, 25 Apr 2018 00:03:46 +0200 Subject: drm/vc4: Export fence through syncobj Allow specifying a syncobj on render job submission where we store the fence for the job. This gives userland flexible access to the fence. v2: Use 0 as invalid syncobj to drop flag (Eric) Don't reintroduce the padding (Eric) Signed-off-by: Stefan Schake Signed-off-by: Eric Anholt Reviewed-by: Eric Anholt Link: https://patchwork.freedesktop.org/patch/msgid/1524607427-12876-3-git-send-email-stschake@gmail.com --- drivers/gpu/drm/vc4/vc4_gem.c | 30 ++++++++++++++++++++++++++++-- include/uapi/drm/vc4_drm.h | 6 ++++++ 2 files changed, 34 insertions(+), 2 deletions(-) (limited to 'drivers/gpu/drm/vc4/vc4_gem.c') diff --git a/drivers/gpu/drm/vc4/vc4_gem.c b/drivers/gpu/drm/vc4/vc4_gem.c index e305ccdedf47..a4c4be3ac6af 100644 --- a/drivers/gpu/drm/vc4/vc4_gem.c +++ b/drivers/gpu/drm/vc4/vc4_gem.c @@ -656,7 +656,8 @@ retry: */ static int vc4_queue_submit(struct drm_device *dev, struct vc4_exec_info *exec, - struct ww_acquire_ctx *acquire_ctx) + struct ww_acquire_ctx *acquire_ctx, + struct drm_syncobj *out_sync) { struct vc4_dev *vc4 = to_vc4_dev(dev); struct vc4_exec_info *renderjob; @@ -679,6 +680,9 @@ vc4_queue_submit(struct drm_device *dev, struct vc4_exec_info *exec, fence->seqno = exec->seqno; exec->fence = &fence->base; + if (out_sync) + drm_syncobj_replace_fence(out_sync, exec->fence); + vc4_update_bo_seqnos(exec, seqno); vc4_unlock_bo_reservations(dev, exec, acquire_ctx); @@ -1114,6 +1118,7 @@ vc4_submit_cl_ioctl(struct drm_device *dev, void *data, struct vc4_dev *vc4 = to_vc4_dev(dev); struct vc4_file *vc4file = file_priv->driver_priv; struct drm_vc4_submit_cl *args = data; + struct drm_syncobj *out_sync = NULL; struct vc4_exec_info *exec; struct ww_acquire_ctx acquire_ctx; struct dma_fence *in_fence; @@ -1201,12 +1206,33 @@ vc4_submit_cl_ioctl(struct drm_device *dev, void *data, if (ret) goto fail; + if (args->out_sync) { + out_sync = drm_syncobj_find(file_priv, args->out_sync); + if (!out_sync) { + ret = -EINVAL; + goto fail; + } + + /* We replace the fence in out_sync in vc4_queue_submit since + * the render job could execute immediately after that call. + * If it finishes before our ioctl processing resumes the + * render job fence could already have been freed. + */ + } + /* Clear this out of the struct we'll be putting in the queue, * since it's part of our stack. */ exec->args = NULL; - ret = vc4_queue_submit(dev, exec, &acquire_ctx); + ret = vc4_queue_submit(dev, exec, &acquire_ctx, out_sync); + + /* The syncobj isn't part of the exec data and we need to free our + * reference even if job submission failed. + */ + if (out_sync) + drm_syncobj_put(out_sync); + if (ret) goto fail; diff --git a/include/uapi/drm/vc4_drm.h b/include/uapi/drm/vc4_drm.h index d97065b86431..2be4fe3610b8 100644 --- a/include/uapi/drm/vc4_drm.h +++ b/include/uapi/drm/vc4_drm.h @@ -187,6 +187,12 @@ struct drm_vc4_submit_cl { * will not start until the syncobj is signaled. 0 means ignore. */ __u32 in_sync; + + /* Syncobj handle to export fence to. If set, the fence in the syncobj + * will be replaced with a fence that signals upon completion of this + * render job. 0 means ignore. + */ + __u32 out_sync; }; /** -- cgit From 4c70ac7639f6af6d7c2d01f0307665a4b9afada7 Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Mon, 30 Apr 2018 16:59:27 -0700 Subject: drm/vc4: Add a pad field to align drm_vc4_submit_cl to 64 bits. I had originally asked Stefan Schake to drop the pad field from the syncobj changes that just landed, because I couldn't come up with a reason to align to 64 bits. Talking with Dave Airlie about the new v3d driver's submit ioctl, we came up with a reason: sizeof() on 64-bit platforms may align to 64 bits, in which case the userspace will be submitting the aligned size and the final 32 bits won't be zero-padded by the kernel. If userspace doesn't zero-fill, then a future ABI change adding a 32-bit field at the end could potentially cause the kernel to read undefined data from old userspace (our userspace happens to use structure initialization that zero-fills, but as a general rule we try not to rely on that in the kernel). Signed-off-by: Eric Anholt Link: https://patchwork.freedesktop.org/patch/msgid/20180430235927.28712-1-eric@anholt.net Reviewed-by: Stefan Schake --- drivers/gpu/drm/vc4/vc4_gem.c | 5 +++++ include/uapi/drm/vc4_drm.h | 2 ++ 2 files changed, 7 insertions(+) (limited to 'drivers/gpu/drm/vc4/vc4_gem.c') diff --git a/drivers/gpu/drm/vc4/vc4_gem.c b/drivers/gpu/drm/vc4/vc4_gem.c index a4c4be3ac6af..7910b9acedd6 100644 --- a/drivers/gpu/drm/vc4/vc4_gem.c +++ b/drivers/gpu/drm/vc4/vc4_gem.c @@ -1132,6 +1132,11 @@ vc4_submit_cl_ioctl(struct drm_device *dev, void *data, return -EINVAL; } + if (args->pad2 != 0) { + DRM_DEBUG("Invalid pad: 0x%08x\n", args->pad2); + return -EINVAL; + } + exec = kcalloc(1, sizeof(*exec), GFP_KERNEL); if (!exec) { DRM_ERROR("malloc failure on exec struct\n"); diff --git a/include/uapi/drm/vc4_drm.h b/include/uapi/drm/vc4_drm.h index 2be4fe3610b8..2cac6277a1d7 100644 --- a/include/uapi/drm/vc4_drm.h +++ b/include/uapi/drm/vc4_drm.h @@ -193,6 +193,8 @@ struct drm_vc4_submit_cl { * render job. 0 means ignore. */ __u32 out_sync; + + __u32 pad2; }; /** -- cgit