diff options
author | Daniel Vetter <daniel.vetter@ffwll.ch> | 2023-11-20 09:50:08 +0100 |
---|---|---|
committer | Daniel Vetter <daniel.vetter@ffwll.ch> | 2023-11-20 09:50:09 +0100 |
commit | c79b972eb88b077d2765e7790d0902b3dc94d55c (patch) | |
tree | 1ba476c2339679189ed5fdd8b7afe9bb5792605a /drivers/gpu/drm/v3d/v3d_sched.c | |
parent | 98b1cc82c4affc16f5598d4fa14b1858671b2263 (diff) | |
parent | 3b434a3445fff3149128db0169da864d67057325 (diff) |
Merge tag 'drm-misc-next-2023-11-17' of git://anongit.freedesktop.org/drm/drm-misc into drm-next
drm-misc-next for 6.8:
UAPI Changes:
- drm: Introduce CLOSE_FB ioctl
- drm/dp-mst: Documentation for the PATH property
- fdinfo: Do not align to a MB if the size is larger than 1MiB
- virtio-gpu: add explicit virtgpu context debug name
Cross-subsystem Changes:
- dma-buf: Add dma_fence_timestamp helper
Core Changes:
- client: Do not acquire module reference
- edid: split out drm_eld, add SAD helpers
- format-helper: Cache format conversion buffers
- sched: Move from a kthread to a workqueue, rename some internal
functions to make it clearer, implement dynamic job-flow control
- gpuvm: Provide more features to handle GEM objects
- tests: Remove slow kunit tests
Driver Changes:
- ivpu: Update FW API, new debugfs file, a new NOP job submission test
mode, improve suspend/resume, PM improvements, MMU PT optimizations,
firmware profiling frequency support, support for uncached buffers,
switch to gem shmem helpers, replace kthread with threaded
interrupts
- panfrost: PM improvements
- qaic: Allow to run with a single MSI, support host/device time
synchronization, misc improvements
- simplefb: Support memory-regions, support power-domains
- ssd130x: Unitialized variable fixes
- omapdrm: dma-fence lockdep annotation fix
- tidss: dma-fence lockdep annotation fix
- v3d: Support BCM2712 (RaspberryPi5), Support fdinfo and gputop
- panel:
- edp: Support AUO B116XTN02, BOE NT116WHM-N21,836X2, NV116WHM-N49
V8.0, plus a whole bunch of panels used on Mediatek chromebooks.
Note that the one missing s-o-b for 0da611a87021 ("dma-buf: add
dma_fence_timestamp helper") has been supplied here, and rebasing the
entire tree with upsetting committers didn't seem worth the trouble:
https://lore.kernel.org/dri-devel/ce94020e-a7d4-4799-b87d-fbea7b14a268@gmail.com/
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
From: Maxime Ripard <mripard@redhat.com>
Link: https://patchwork.freedesktop.org/patch/msgid/y4awn5vcfy2lr2hpauo7rc4nfpnc6kksr7btmnwaz7zk63pwoi@gwwef5iqpzva
Diffstat (limited to 'drivers/gpu/drm/v3d/v3d_sched.c')
-rw-r--r-- | drivers/gpu/drm/v3d/v3d_sched.c | 81 |
1 files changed, 59 insertions, 22 deletions
diff --git a/drivers/gpu/drm/v3d/v3d_sched.c b/drivers/gpu/drm/v3d/v3d_sched.c index 038e1ae589c7..fccbea2a5f2e 100644 --- a/drivers/gpu/drm/v3d/v3d_sched.c +++ b/drivers/gpu/drm/v3d/v3d_sched.c @@ -18,6 +18,7 @@ * semaphores to interlock between them. */ +#include <linux/sched/clock.h> #include <linux/kthread.h> #include "v3d_drv.h" @@ -76,6 +77,7 @@ static struct dma_fence *v3d_bin_job_run(struct drm_sched_job *sched_job) { struct v3d_bin_job *job = to_bin_job(sched_job); struct v3d_dev *v3d = job->base.v3d; + struct v3d_file_priv *file = job->base.file->driver_priv; struct drm_device *dev = &v3d->drm; struct dma_fence *fence; unsigned long irqflags; @@ -107,6 +109,9 @@ static struct dma_fence *v3d_bin_job_run(struct drm_sched_job *sched_job) trace_v3d_submit_cl(dev, false, to_v3d_fence(fence)->seqno, job->start, job->end); + file->start_ns[V3D_BIN] = local_clock(); + v3d->queue[V3D_BIN].start_ns = file->start_ns[V3D_BIN]; + v3d_switch_perfmon(v3d, &job->base); /* Set the current and end address of the control list. @@ -131,6 +136,7 @@ static struct dma_fence *v3d_render_job_run(struct drm_sched_job *sched_job) { struct v3d_render_job *job = to_render_job(sched_job); struct v3d_dev *v3d = job->base.v3d; + struct v3d_file_priv *file = job->base.file->driver_priv; struct drm_device *dev = &v3d->drm; struct dma_fence *fence; @@ -158,6 +164,9 @@ static struct dma_fence *v3d_render_job_run(struct drm_sched_job *sched_job) trace_v3d_submit_cl(dev, true, to_v3d_fence(fence)->seqno, job->start, job->end); + file->start_ns[V3D_RENDER] = local_clock(); + v3d->queue[V3D_RENDER].start_ns = file->start_ns[V3D_RENDER]; + v3d_switch_perfmon(v3d, &job->base); /* XXX: Set the QCFG */ @@ -176,6 +185,7 @@ v3d_tfu_job_run(struct drm_sched_job *sched_job) { struct v3d_tfu_job *job = to_tfu_job(sched_job); struct v3d_dev *v3d = job->base.v3d; + struct v3d_file_priv *file = job->base.file->driver_priv; struct drm_device *dev = &v3d->drm; struct dma_fence *fence; @@ -190,20 +200,25 @@ v3d_tfu_job_run(struct drm_sched_job *sched_job) trace_v3d_submit_tfu(dev, to_v3d_fence(fence)->seqno); - V3D_WRITE(V3D_TFU_IIA, job->args.iia); - V3D_WRITE(V3D_TFU_IIS, job->args.iis); - V3D_WRITE(V3D_TFU_ICA, job->args.ica); - V3D_WRITE(V3D_TFU_IUA, job->args.iua); - V3D_WRITE(V3D_TFU_IOA, job->args.ioa); - V3D_WRITE(V3D_TFU_IOS, job->args.ios); - V3D_WRITE(V3D_TFU_COEF0, job->args.coef[0]); - if (job->args.coef[0] & V3D_TFU_COEF0_USECOEF) { - V3D_WRITE(V3D_TFU_COEF1, job->args.coef[1]); - V3D_WRITE(V3D_TFU_COEF2, job->args.coef[2]); - V3D_WRITE(V3D_TFU_COEF3, job->args.coef[3]); + file->start_ns[V3D_TFU] = local_clock(); + v3d->queue[V3D_TFU].start_ns = file->start_ns[V3D_TFU]; + + V3D_WRITE(V3D_TFU_IIA(v3d->ver), job->args.iia); + V3D_WRITE(V3D_TFU_IIS(v3d->ver), job->args.iis); + V3D_WRITE(V3D_TFU_ICA(v3d->ver), job->args.ica); + V3D_WRITE(V3D_TFU_IUA(v3d->ver), job->args.iua); + V3D_WRITE(V3D_TFU_IOA(v3d->ver), job->args.ioa); + if (v3d->ver >= 71) + V3D_WRITE(V3D_V7_TFU_IOC, job->args.v71.ioc); + V3D_WRITE(V3D_TFU_IOS(v3d->ver), job->args.ios); + V3D_WRITE(V3D_TFU_COEF0(v3d->ver), job->args.coef[0]); + if (v3d->ver >= 71 || (job->args.coef[0] & V3D_TFU_COEF0_USECOEF)) { + V3D_WRITE(V3D_TFU_COEF1(v3d->ver), job->args.coef[1]); + V3D_WRITE(V3D_TFU_COEF2(v3d->ver), job->args.coef[2]); + V3D_WRITE(V3D_TFU_COEF3(v3d->ver), job->args.coef[3]); } /* ICFG kicks off the job. */ - V3D_WRITE(V3D_TFU_ICFG, job->args.icfg | V3D_TFU_ICFG_IOC); + V3D_WRITE(V3D_TFU_ICFG(v3d->ver), job->args.icfg | V3D_TFU_ICFG_IOC); return fence; } @@ -213,9 +228,10 @@ v3d_csd_job_run(struct drm_sched_job *sched_job) { struct v3d_csd_job *job = to_csd_job(sched_job); struct v3d_dev *v3d = job->base.v3d; + struct v3d_file_priv *file = job->base.file->driver_priv; struct drm_device *dev = &v3d->drm; struct dma_fence *fence; - int i; + int i, csd_cfg0_reg, csd_cfg_reg_count; v3d->csd_job = job; @@ -231,12 +247,17 @@ v3d_csd_job_run(struct drm_sched_job *sched_job) trace_v3d_submit_csd(dev, to_v3d_fence(fence)->seqno); + file->start_ns[V3D_CSD] = local_clock(); + v3d->queue[V3D_CSD].start_ns = file->start_ns[V3D_CSD]; + v3d_switch_perfmon(v3d, &job->base); - for (i = 1; i <= 6; i++) - V3D_CORE_WRITE(0, V3D_CSD_QUEUED_CFG0 + 4 * i, job->args.cfg[i]); + csd_cfg0_reg = V3D_CSD_QUEUED_CFG0(v3d->ver); + csd_cfg_reg_count = v3d->ver < 71 ? 6 : 7; + for (i = 1; i <= csd_cfg_reg_count; i++) + V3D_CORE_WRITE(0, csd_cfg0_reg + 4 * i, job->args.cfg[i]); /* CFG0 write kicks off the job. */ - V3D_CORE_WRITE(0, V3D_CSD_QUEUED_CFG0, job->args.cfg[0]); + V3D_CORE_WRITE(0, csd_cfg0_reg, job->args.cfg[0]); return fence; } @@ -246,9 +267,25 @@ v3d_cache_clean_job_run(struct drm_sched_job *sched_job) { struct v3d_job *job = to_v3d_job(sched_job); struct v3d_dev *v3d = job->v3d; + struct v3d_file_priv *file = job->file->driver_priv; + u64 runtime; + + file->start_ns[V3D_CACHE_CLEAN] = local_clock(); + v3d->queue[V3D_CACHE_CLEAN].start_ns = file->start_ns[V3D_CACHE_CLEAN]; v3d_clean_caches(v3d); + runtime = local_clock() - file->start_ns[V3D_CACHE_CLEAN]; + + file->enabled_ns[V3D_CACHE_CLEAN] += runtime; + v3d->queue[V3D_CACHE_CLEAN].enabled_ns += runtime; + + file->jobs_sent[V3D_CACHE_CLEAN]++; + v3d->queue[V3D_CACHE_CLEAN].jobs_sent++; + + file->start_ns[V3D_CACHE_CLEAN] = 0; + v3d->queue[V3D_CACHE_CLEAN].start_ns = 0; + return NULL; } @@ -336,7 +373,7 @@ v3d_csd_job_timedout(struct drm_sched_job *sched_job) { struct v3d_csd_job *job = to_csd_job(sched_job); struct v3d_dev *v3d = job->base.v3d; - u32 batches = V3D_CORE_READ(0, V3D_CSD_CURRENT_CFG4); + u32 batches = V3D_CORE_READ(0, V3D_CSD_CURRENT_CFG4(v3d->ver)); /* If we've made progress, skip reset and let the timer get * rearmed. @@ -388,7 +425,7 @@ v3d_sched_init(struct v3d_dev *v3d) int ret; ret = drm_sched_init(&v3d->queue[V3D_BIN].sched, - &v3d_bin_sched_ops, + &v3d_bin_sched_ops, NULL, DRM_SCHED_PRIORITY_COUNT, hw_jobs_limit, job_hang_limit, msecs_to_jiffies(hang_limit_ms), NULL, @@ -397,7 +434,7 @@ v3d_sched_init(struct v3d_dev *v3d) return ret; ret = drm_sched_init(&v3d->queue[V3D_RENDER].sched, - &v3d_render_sched_ops, + &v3d_render_sched_ops, NULL, DRM_SCHED_PRIORITY_COUNT, hw_jobs_limit, job_hang_limit, msecs_to_jiffies(hang_limit_ms), NULL, @@ -406,7 +443,7 @@ v3d_sched_init(struct v3d_dev *v3d) goto fail; ret = drm_sched_init(&v3d->queue[V3D_TFU].sched, - &v3d_tfu_sched_ops, + &v3d_tfu_sched_ops, NULL, DRM_SCHED_PRIORITY_COUNT, hw_jobs_limit, job_hang_limit, msecs_to_jiffies(hang_limit_ms), NULL, @@ -416,7 +453,7 @@ v3d_sched_init(struct v3d_dev *v3d) if (v3d_has_csd(v3d)) { ret = drm_sched_init(&v3d->queue[V3D_CSD].sched, - &v3d_csd_sched_ops, + &v3d_csd_sched_ops, NULL, DRM_SCHED_PRIORITY_COUNT, hw_jobs_limit, job_hang_limit, msecs_to_jiffies(hang_limit_ms), NULL, @@ -425,7 +462,7 @@ v3d_sched_init(struct v3d_dev *v3d) goto fail; ret = drm_sched_init(&v3d->queue[V3D_CACHE_CLEAN].sched, - &v3d_cache_clean_sched_ops, + &v3d_cache_clean_sched_ops, NULL, DRM_SCHED_PRIORITY_COUNT, hw_jobs_limit, job_hang_limit, msecs_to_jiffies(hang_limit_ms), NULL, |