diff options
author | Dave Airlie <airlied@redhat.com> | 2017-11-02 11:29:28 +1000 |
---|---|---|
committer | Dave Airlie <airlied@redhat.com> | 2017-11-02 11:29:28 +1000 |
commit | 87331c83797b5d5763a82f09f26fbb6e1a7e6661 (patch) | |
tree | 0fbd0da83eeadfd50751bfbeee65a60f8aa9ca5d /drivers/gpu/drm/msm/msm_gpu.c | |
parent | 43106e25ab37580f54df99c512d1d66ae0fe1c67 (diff) | |
parent | 39ae0d3e561d360e41f2a3d1c427d5d9142468da (diff) |
Merge tag 'drm-msm-next-2017-11-01' of git://people.freedesktop.org/~robclark/linux into drm-next
+ preemption support for a5xx[1][2]
+ display fixes for 8x96 (snapdragon 820) including fixes for 4k scanout
(hwpipe assignment re-work to handle multiple hwpipe assigned to plane
for wide scanout)
+ async cursor plane updates and fixes
+ refactor adreno_bind/hwinit.. still defer fw loading until device open,
but move clk/irq/etc to probe/bind time to fix issues when fw isn't
present in filesys
+ clk/dt bindings cleanups w/ backward compat via msm_clk_get() (dt docs
part ack'ed by Rob Herring)
+ fw loading re-work with helper to handle either /lib/firmware/qcom/$fw
or /lib/firmware/$fw.. background, we've started landing fw for some of
generations in linux-firmware, but there is a preference to put fw files
under 'qcom' subdirectory, which is not what was done on android or for
people who copied fw from android. So now we first look in qcom subdir
and then fallback to the original location.
+ bunch of GPU debugging enhancements, to dump full cmdline of processes
that trigger faults, and to add a new debugfs to capture cmdstream of
just submits that triggered faults.. both quite useful for piglit ;-)
* tag 'drm-msm-next-2017-11-01' of git://people.freedesktop.org/~robclark/linux: (38 commits)
drm/msm: use %z format modifier for printing size_t
drm/msm/mdp5: Don't use async plane update path if plane visibility changes
drm/msm/mdp5: mdp5_crtc: Restore cursor state only if LM cursors are enabled
drm/msm/mdp5: Update mdp5_pipe_assign to spit out both planes
drm/msm/mdp5: Prepare mdp5_pipe_assign for some rework
drm/msm: remove mdp5_cursor_plane_funcs
drm/msm: update cursors asynchronously through atomic
drm/msm/atomic: switch to drm_atomic_helper_check
drm/msm/mdp5: restore cursor state when enabling crtc
drm/msm/mdp5: don't use autosuspend
drm/msm/mdp5: ignore planes that are not visible
drm/msm: dump submits which triggered gpu hang
drm/msm: preserve IOVAs in submit's bo table
drm/msm/rd: allow adding addition msg to top of dump
drm/msm: split rd debugfs file
drm/msm: add special _get_vaddr_active() for cmdstream dumps
drm/msm: show task cmdline in gpu recovery messages
drm/msm: dump a rd GPUADDR header for all buffers in the command
drm/msm: Removed unused struct_mutex_task
drm/msm: Implement preemption for A5XX targets
...
Diffstat (limited to 'drivers/gpu/drm/msm/msm_gpu.c')
-rw-r--r-- | drivers/gpu/drm/msm/msm_gpu.c | 238 |
1 files changed, 179 insertions, 59 deletions
diff --git a/drivers/gpu/drm/msm/msm_gpu.c b/drivers/gpu/drm/msm/msm_gpu.c index ffbff27600e0..8d4477818ec2 100644 --- a/drivers/gpu/drm/msm/msm_gpu.c +++ b/drivers/gpu/drm/msm/msm_gpu.c @@ -20,6 +20,8 @@ #include "msm_mmu.h" #include "msm_fence.h" +#include <linux/string_helpers.h> + /* * Power Management: @@ -221,33 +223,102 @@ int msm_gpu_hw_init(struct msm_gpu *gpu) * Hangcheck detection for locked gpu: */ +static void update_fences(struct msm_gpu *gpu, struct msm_ringbuffer *ring, + uint32_t fence) +{ + struct msm_gem_submit *submit; + + list_for_each_entry(submit, &ring->submits, node) { + if (submit->seqno > fence) + break; + + msm_update_fence(submit->ring->fctx, + submit->fence->seqno); + } +} + +static struct msm_gem_submit * +find_submit(struct msm_ringbuffer *ring, uint32_t fence) +{ + struct msm_gem_submit *submit; + + WARN_ON(!mutex_is_locked(&ring->gpu->dev->struct_mutex)); + + list_for_each_entry(submit, &ring->submits, node) + if (submit->seqno == fence) + return submit; + + return NULL; +} + static void retire_submits(struct msm_gpu *gpu); static void recover_worker(struct work_struct *work) { struct msm_gpu *gpu = container_of(work, struct msm_gpu, recover_work); struct drm_device *dev = gpu->dev; + struct msm_drm_private *priv = dev->dev_private; struct msm_gem_submit *submit; - uint32_t fence = gpu->funcs->last_fence(gpu); - - msm_update_fence(gpu->fctx, fence + 1); + struct msm_ringbuffer *cur_ring = gpu->funcs->active_ring(gpu); + int i; mutex_lock(&dev->struct_mutex); dev_err(dev->dev, "%s: hangcheck recover!\n", gpu->name); - list_for_each_entry(submit, &gpu->submit_list, node) { - if (submit->fence->seqno == (fence + 1)) { - struct task_struct *task; - - rcu_read_lock(); - task = pid_task(submit->pid, PIDTYPE_PID); - if (task) { - dev_err(dev->dev, "%s: offending task: %s\n", - gpu->name, task->comm); - } - rcu_read_unlock(); - break; + + submit = find_submit(cur_ring, cur_ring->memptrs->fence + 1); + if (submit) { + struct task_struct *task; + + rcu_read_lock(); + task = pid_task(submit->pid, PIDTYPE_PID); + if (task) { + char *cmd; + + /* + * So slightly annoying, in other paths like + * mmap'ing gem buffers, mmap_sem is acquired + * before struct_mutex, which means we can't + * hold struct_mutex across the call to + * get_cmdline(). But submits are retired + * from the same in-order workqueue, so we can + * safely drop the lock here without worrying + * about the submit going away. + */ + mutex_unlock(&dev->struct_mutex); + cmd = kstrdup_quotable_cmdline(task, GFP_KERNEL); + mutex_lock(&dev->struct_mutex); + + dev_err(dev->dev, "%s: offending task: %s (%s)\n", + gpu->name, task->comm, cmd); + + msm_rd_dump_submit(priv->hangrd, submit, + "offending task: %s (%s)", task->comm, cmd); + } else { + msm_rd_dump_submit(priv->hangrd, submit, NULL); } + rcu_read_unlock(); + } + + + /* + * Update all the rings with the latest and greatest fence.. this + * needs to happen after msm_rd_dump_submit() to ensure that the + * bo's referenced by the offending submit are still around. + */ + for (i = 0; i < ARRAY_SIZE(gpu->rb); i++) { + struct msm_ringbuffer *ring = gpu->rb[i]; + + uint32_t fence = ring->memptrs->fence; + + /* + * For the current (faulting?) ring/submit advance the fence by + * one more to clear the faulting submit + */ + if (ring == cur_ring) + fence++; + + update_fences(gpu, ring, fence); } if (msm_gpu_active(gpu)) { @@ -258,9 +329,15 @@ static void recover_worker(struct work_struct *work) gpu->funcs->recover(gpu); pm_runtime_put_sync(&gpu->pdev->dev); - /* replay the remaining submits after the one that hung: */ - list_for_each_entry(submit, &gpu->submit_list, node) { - gpu->funcs->submit(gpu, submit, NULL); + /* + * Replay all remaining submits starting with highest priority + * ring + */ + for (i = 0; i < gpu->nr_rings; i++) { + struct msm_ringbuffer *ring = gpu->rb[i]; + + list_for_each_entry(submit, &ring->submits, node) + gpu->funcs->submit(gpu, submit, NULL); } } @@ -281,25 +358,27 @@ static void hangcheck_handler(unsigned long data) struct msm_gpu *gpu = (struct msm_gpu *)data; struct drm_device *dev = gpu->dev; struct msm_drm_private *priv = dev->dev_private; - uint32_t fence = gpu->funcs->last_fence(gpu); + struct msm_ringbuffer *ring = gpu->funcs->active_ring(gpu); + uint32_t fence = ring->memptrs->fence; - if (fence != gpu->hangcheck_fence) { + if (fence != ring->hangcheck_fence) { /* some progress has been made.. ya! */ - gpu->hangcheck_fence = fence; - } else if (fence < gpu->fctx->last_fence) { + ring->hangcheck_fence = fence; + } else if (fence < ring->seqno) { /* no progress and not done.. hung! */ - gpu->hangcheck_fence = fence; - dev_err(dev->dev, "%s: hangcheck detected gpu lockup!\n", - gpu->name); + ring->hangcheck_fence = fence; + dev_err(dev->dev, "%s: hangcheck detected gpu lockup rb %d!\n", + gpu->name, ring->id); dev_err(dev->dev, "%s: completed fence: %u\n", gpu->name, fence); dev_err(dev->dev, "%s: submitted fence: %u\n", - gpu->name, gpu->fctx->last_fence); + gpu->name, ring->seqno); + queue_work(priv->wq, &gpu->recover_work); } /* if still more pending work, reset the hangcheck timer: */ - if (gpu->fctx->last_fence > gpu->hangcheck_fence) + if (ring->seqno > ring->hangcheck_fence) hangcheck_timer_reset(gpu); /* workaround for missing irq: */ @@ -428,19 +507,18 @@ static void retire_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit) static void retire_submits(struct msm_gpu *gpu) { struct drm_device *dev = gpu->dev; + struct msm_gem_submit *submit, *tmp; + int i; WARN_ON(!mutex_is_locked(&dev->struct_mutex)); - while (!list_empty(&gpu->submit_list)) { - struct msm_gem_submit *submit; + /* Retire the commits starting with highest priority */ + for (i = 0; i < gpu->nr_rings; i++) { + struct msm_ringbuffer *ring = gpu->rb[i]; - submit = list_first_entry(&gpu->submit_list, - struct msm_gem_submit, node); - - if (dma_fence_is_signaled(submit->fence)) { - retire_submit(gpu, submit); - } else { - break; + list_for_each_entry_safe(submit, tmp, &ring->submits, node) { + if (dma_fence_is_signaled(submit->fence)) + retire_submit(gpu, submit); } } } @@ -449,9 +527,10 @@ static void retire_worker(struct work_struct *work) { struct msm_gpu *gpu = container_of(work, struct msm_gpu, retire_work); struct drm_device *dev = gpu->dev; - uint32_t fence = gpu->funcs->last_fence(gpu); + int i; - msm_update_fence(gpu->fctx, fence); + for (i = 0; i < gpu->nr_rings; i++) + update_fences(gpu, gpu->rb[i], gpu->rb[i]->memptrs->fence); mutex_lock(&dev->struct_mutex); retire_submits(gpu); @@ -472,6 +551,7 @@ void msm_gpu_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit, { struct drm_device *dev = gpu->dev; struct msm_drm_private *priv = dev->dev_private; + struct msm_ringbuffer *ring = submit->ring; int i; WARN_ON(!mutex_is_locked(&dev->struct_mutex)); @@ -480,9 +560,11 @@ void msm_gpu_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit, msm_gpu_hw_init(gpu); - list_add_tail(&submit->node, &gpu->submit_list); + submit->seqno = ++ring->seqno; - msm_rd_dump_submit(submit); + list_add_tail(&submit->node, &ring->submits); + + msm_rd_dump_submit(priv->rd, submit, NULL); update_sw_cntrs(gpu); @@ -605,7 +687,9 @@ int msm_gpu_init(struct drm_device *drm, struct platform_device *pdev, struct msm_gpu *gpu, const struct msm_gpu_funcs *funcs, const char *name, struct msm_gpu_config *config) { - int ret; + int i, ret, nr_rings = config->nr_rings; + void *memptrs; + uint64_t memptrs_iova; if (WARN_ON(gpu->num_perfcntrs > ARRAY_SIZE(gpu->last_cntrs))) gpu->num_perfcntrs = ARRAY_SIZE(gpu->last_cntrs); @@ -613,18 +697,11 @@ int msm_gpu_init(struct drm_device *drm, struct platform_device *pdev, gpu->dev = drm; gpu->funcs = funcs; gpu->name = name; - gpu->fctx = msm_fence_context_alloc(drm, name); - if (IS_ERR(gpu->fctx)) { - ret = PTR_ERR(gpu->fctx); - gpu->fctx = NULL; - goto fail; - } INIT_LIST_HEAD(&gpu->active_list); INIT_WORK(&gpu->retire_work, retire_worker); INIT_WORK(&gpu->recover_work, recover_worker); - INIT_LIST_HEAD(&gpu->submit_list); setup_timer(&gpu->hangcheck_timer, hangcheck_handler, (unsigned long)gpu); @@ -689,36 +766,79 @@ int msm_gpu_init(struct drm_device *drm, struct platform_device *pdev, goto fail; } - /* Create ringbuffer: */ - gpu->rb = msm_ringbuffer_new(gpu, config->ringsz); - if (IS_ERR(gpu->rb)) { - ret = PTR_ERR(gpu->rb); - gpu->rb = NULL; - dev_err(drm->dev, "could not create ringbuffer: %d\n", ret); + memptrs = msm_gem_kernel_new(drm, sizeof(*gpu->memptrs_bo), + MSM_BO_UNCACHED, gpu->aspace, &gpu->memptrs_bo, + &memptrs_iova); + + if (IS_ERR(memptrs)) { + ret = PTR_ERR(memptrs); + dev_err(drm->dev, "could not allocate memptrs: %d\n", ret); goto fail; } + if (nr_rings > ARRAY_SIZE(gpu->rb)) { + DRM_DEV_INFO_ONCE(drm->dev, "Only creating %zu ringbuffers\n", + ARRAY_SIZE(gpu->rb)); + nr_rings = ARRAY_SIZE(gpu->rb); + } + + /* Create ringbuffer(s): */ + for (i = 0; i < nr_rings; i++) { + gpu->rb[i] = msm_ringbuffer_new(gpu, i, memptrs, memptrs_iova); + + if (IS_ERR(gpu->rb[i])) { + ret = PTR_ERR(gpu->rb[i]); + dev_err(drm->dev, + "could not create ringbuffer %d: %d\n", i, ret); + goto fail; + } + + memptrs += sizeof(struct msm_rbmemptrs); + memptrs_iova += sizeof(struct msm_rbmemptrs); + } + + gpu->nr_rings = nr_rings; + return 0; fail: + for (i = 0; i < ARRAY_SIZE(gpu->rb); i++) { + msm_ringbuffer_destroy(gpu->rb[i]); + gpu->rb[i] = NULL; + } + + if (gpu->memptrs_bo) { + msm_gem_put_vaddr(gpu->memptrs_bo); + msm_gem_put_iova(gpu->memptrs_bo, gpu->aspace); + drm_gem_object_unreference_unlocked(gpu->memptrs_bo); + } + platform_set_drvdata(pdev, NULL); return ret; } void msm_gpu_cleanup(struct msm_gpu *gpu) { + int i; + DBG("%s", gpu->name); WARN_ON(!list_empty(&gpu->active_list)); bs_fini(gpu); - if (gpu->rb) { - if (gpu->rb_iova) - msm_gem_put_iova(gpu->rb->bo, gpu->aspace); - msm_ringbuffer_destroy(gpu->rb); + for (i = 0; i < ARRAY_SIZE(gpu->rb); i++) { + msm_ringbuffer_destroy(gpu->rb[i]); + gpu->rb[i] = NULL; } - if (gpu->aspace) { + + if (gpu->memptrs_bo) { + msm_gem_put_vaddr(gpu->memptrs_bo); + msm_gem_put_iova(gpu->memptrs_bo, gpu->aspace); + drm_gem_object_unreference_unlocked(gpu->memptrs_bo); + } + + if (!IS_ERR_OR_NULL(gpu->aspace)) { gpu->aspace->mmu->funcs->detach(gpu->aspace->mmu, NULL, 0); msm_gem_address_space_put(gpu->aspace); |