From 7645670decdb677e2f415ff91609d31e5d4777d8 Mon Sep 17 00:00:00 2001 From: Christian König Date: Thu, 6 Apr 2017 17:52:39 +0200 Subject: drm/amdgpu: split VMID management by VMHUB MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This way GFX and MM won't fight for VMIDs any more. Initially disabled since we need to stop flushing all HUBS at the same time as well. Signed-off-by: Christian König Reviewed-by: Andres Rodriguez Acked-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 1 - 1 file changed, 1 deletion(-) (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_device.c') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index 483660742f75..724b4c1c80f6 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -1854,7 +1854,6 @@ int amdgpu_device_init(struct amdgpu_device *adev, /* mutex initialization are all done here so we * can recall function without having locking issues */ - mutex_init(&adev->vm_manager.lock); atomic_set(&adev->irq.ih.lock, 0); mutex_init(&adev->firmware.mutex); mutex_init(&adev->pm.mutex); -- cgit From 03161a6ecb7576492bb37e9bfc68a0330fc6a41d Mon Sep 17 00:00:00 2001 From: Huang Rui Date: Thu, 13 Apr 2017 16:12:26 +0800 Subject: drm/amdgpu: fix dead lock if any ip block resume failed in s3 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Driver must free the console lock whether driver resuming successful or not. Otherwise, fb_console will be always waiting for the lock and then cause system stuck. [ 244.405541] INFO: task kworker/0:0:4 blocked for more than 120 seconds. [ 244.405543] Tainted: G OE 4.9.0-custom #1 [ 244.405544] "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message. [ 244.405541] INFO: task kworker/0:0:4 blocked for more than 120 seconds. [ 244.405543] Tainted: G OE 4.9.0-custom #1 [ 244.405544] "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message. [ 244.405550] kworker/0:0 D 0 4 2 0x00080000 [ 244.405559] Workqueue: events console_callback [ 244.405564] ffff88045a2cfc00 0000000000000000 ffff880462b75940 ffffffff81c0e500 [ 244.405568] ffff880476419280 ffffc900018f7c90 ffffffff817dcf62 000000000000003c [ 244.405572] 0000000100000000 0000000000000002 ffff880462b75940 ffff880462b75940 [ 244.405573] Call Trace: [ 244.405580] [] ? __schedule+0x222/0x6a0 [ 244.405584] [] schedule+0x36/0x80 [ 244.405588] [] schedule_timeout+0x1fc/0x390 [ 244.405592] [] __down_common+0xa5/0xf8 [ 244.405598] [] ? put_prev_entity+0x48/0x710 [ 244.405601] [] __down+0x1d/0x1f [ 244.405606] [] down+0x41/0x50 [ 244.405611] [] console_lock+0x1a/0x40 [ 244.405614] [] console_callback+0x13/0x160 [ 244.405617] [] ? __schedule+0x22a/0x6a0 [ 244.405623] [] process_one_work+0x153/0x3f0 [ 244.405628] [] worker_thread+0x12b/0x4b0 [ 244.405633] [] ? rescuer_thread+0x350/0x350 [ 244.405637] [] kthread+0xd3/0xf0 [ 244.405641] [] ? kthread_park+0x60/0x60 [ 244.405645] [] ? kthread_park+0x60/0x60 [ 244.405649] [] ret_from_fork+0x25/0x30 Signed-off-by: Huang Rui Reviewed-by: Michel Dänzer Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 26 +++++++++++--------------- 1 file changed, 11 insertions(+), 15 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_device.c') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index 724b4c1c80f6..fbda93a87648 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -2215,7 +2215,7 @@ int amdgpu_device_resume(struct drm_device *dev, bool resume, bool fbcon) struct drm_connector *connector; struct amdgpu_device *adev = dev->dev_private; struct drm_crtc *crtc; - int r; + int r = 0; if (dev->switch_power_state == DRM_SWITCH_POWER_OFF) return 0; @@ -2227,11 +2227,8 @@ int amdgpu_device_resume(struct drm_device *dev, bool resume, bool fbcon) pci_set_power_state(dev->pdev, PCI_D0); pci_restore_state(dev->pdev); r = pci_enable_device(dev->pdev); - if (r) { - if (fbcon) - console_unlock(); - return r; - } + if (r) + goto unlock; } if (adev->is_atom_fw) amdgpu_atomfirmware_scratch_regs_restore(adev); @@ -2248,7 +2245,7 @@ int amdgpu_device_resume(struct drm_device *dev, bool resume, bool fbcon) r = amdgpu_resume(adev); if (r) { DRM_ERROR("amdgpu_resume failed (%d).\n", r); - return r; + goto unlock; } amdgpu_fence_driver_resume(adev); @@ -2259,11 +2256,8 @@ int amdgpu_device_resume(struct drm_device *dev, bool resume, bool fbcon) } r = amdgpu_late_init(adev); - if (r) { - if (fbcon) - console_unlock(); - return r; - } + if (r) + goto unlock; /* pin cursors */ list_for_each_entry(crtc, &dev->mode_config.crtc_list, head) { @@ -2313,12 +2307,14 @@ int amdgpu_device_resume(struct drm_device *dev, bool resume, bool fbcon) dev->dev->power.disable_depth--; #endif - if (fbcon) { + if (fbcon) amdgpu_fbdev_set_suspend(adev, 0); + +unlock: + if (fbcon) console_unlock(); - } - return 0; + return r; } static bool amdgpu_check_soft_reset(struct amdgpu_device *adev) -- cgit From 8972e5d26996e3f04e1cf29b7d7edd3ec5614bf2 Mon Sep 17 00:00:00 2001 From: Christian König Date: Mon, 6 Mar 2017 13:34:57 +0100 Subject: drm/amdgpu: fix coding style and printing in amdgpu_doorbell_init MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Based on commit "drm/radeon: remove useless and potentially wrong message". The size of the info printing is incorrect and the PCI subsystems prints the same info on boot anyway. Signed-off-by: Christian König Reviewed-by: Edward O'Callaghan Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_device.c') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index fbda93a87648..8aad6f4a5241 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -422,12 +422,11 @@ static int amdgpu_doorbell_init(struct amdgpu_device *adev) if (adev->doorbell.num_doorbells == 0) return -EINVAL; - adev->doorbell.ptr = ioremap(adev->doorbell.base, adev->doorbell.num_doorbells * sizeof(u32)); - if (adev->doorbell.ptr == NULL) { + adev->doorbell.ptr = ioremap(adev->doorbell.base, + adev->doorbell.num_doorbells * + sizeof(u32)); + if (adev->doorbell.ptr == NULL) return -ENOMEM; - } - DRM_INFO("doorbell mmio base: 0x%08X\n", (uint32_t)adev->doorbell.base); - DRM_INFO("doorbell mmio size: %u\n", (unsigned)adev->doorbell.size); return 0; } -- cgit From 7ad87b96962a01830b0751d11a389b4039eb4460 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Mon, 24 Apr 2017 13:51:52 -0400 Subject: Revert "drm/amd/amdgpu: Set VCE/UVD off during late init" This leads to hangs on init. This reverts commit d1aff8ec49c3ece05cee9b6e63d44e96a420b068. --- drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 4 ---- 1 file changed, 4 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_device.c') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index 8aad6f4a5241..75f851bd5b63 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -53,7 +53,6 @@ #include "bif/bif_4_1_d.h" #include #include -#include "amdgpu_pm.h" static int amdgpu_debugfs_regs_init(struct amdgpu_device *adev); static void amdgpu_debugfs_regs_cleanup(struct amdgpu_device *adev); @@ -1583,9 +1582,6 @@ static int amdgpu_late_init(struct amdgpu_device *adev) } } - amdgpu_dpm_enable_uvd(adev, false); - amdgpu_dpm_enable_vce(adev, false); - return 0; } -- cgit From 23d2e5049c080abda50810d21c8be20b5d65d191 Mon Sep 17 00:00:00 2001 From: "Roger.He" Date: Fri, 21 Apr 2017 14:24:26 +0800 Subject: drm/amdgpu: fix indent Reviewed-by: Alex Deucher Signed-off-by: Roger.He Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 34 +++++++++++++++--------------- 1 file changed, 17 insertions(+), 17 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_device.c') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index 75f851bd5b63..61716a2d4484 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -2420,25 +2420,25 @@ static int amdgpu_recover_vram_from_shadow(struct amdgpu_device *adev, uint32_t domain; int r; - if (!bo->shadow) - return 0; - - r = amdgpu_bo_reserve(bo, false); - if (r) - return r; - domain = amdgpu_mem_type_to_domain(bo->tbo.mem.mem_type); - /* if bo has been evicted, then no need to recover */ - if (domain == AMDGPU_GEM_DOMAIN_VRAM) { - r = amdgpu_bo_restore_from_shadow(adev, ring, bo, + if (!bo->shadow) + return 0; + + r = amdgpu_bo_reserve(bo, false); + if (r) + return r; + domain = amdgpu_mem_type_to_domain(bo->tbo.mem.mem_type); + /* if bo has been evicted, then no need to recover */ + if (domain == AMDGPU_GEM_DOMAIN_VRAM) { + r = amdgpu_bo_restore_from_shadow(adev, ring, bo, NULL, fence, true); - if (r) { - DRM_ERROR("recover page table failed!\n"); - goto err; - } - } + if (r) { + DRM_ERROR("recover page table failed!\n"); + goto err; + } + } err: - amdgpu_bo_unreserve(bo); - return r; + amdgpu_bo_unreserve(bo); + return r; } /** -- cgit From 51687759be93fbc553f2727e86be25c38126ba93 Mon Sep 17 00:00:00 2001 From: Chunming Zhou Date: Mon, 24 Apr 2017 17:09:15 +0800 Subject: drm/amdgpu: fix gpu reset crash MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ 413.687439] BUG: unable to handle kernel NULL pointer dereference at 0000000000000548 [ 413.687479] IP: [] to_live_kthread+0x5/0x60 [ 413.687507] PGD 1efd12067 [ 413.687519] PUD 1efd11067 [ 413.687531] PMD 0 [ 413.687543] Oops: 0000 [#1] SMP [ 413.687557] Modules linked in: amdgpu(OE) ttm(OE) drm_kms_helper(E) drm(E) i2c_algo_bit(E) fb_sys_fops(E) syscopyarea(E) sysfillrect(E) sysimgblt(E) rpcsec_gss_krb5(E) nfsv4(E) nfs(E) fscache(E) snd_hda_codec_realtek(E) snd_hda_codec_generic(E) snd_hda_codec_hdmi(E) snd_hda_intel(E) eeepc_wmi(E) snd_hda_codec(E) asus_wmi(E) snd_hda_core(E) sparse_keymap(E) snd_hwdep(E) video(E) snd_pcm(E) snd_seq_midi(E) joydev(E) snd_seq_midi_event(E) snd_rawmidi(E) snd_seq(E) snd_seq_device(E) snd_timer(E) kvm(E) irqbypass(E) crct10dif_pclmul(E) snd(E) crc32_pclmul(E) ghash_clmulni_intel(E) soundcore(E) aesni_intel(E) aes_x86_64(E) lrw(E) gf128mul(E) glue_helper(E) ablk_helper(E) cryptd(E) shpchp(E) serio_raw(E) i2c_piix4(E) 8250_dw(E) i2c_designware_platform(E) i2c_designware_core(E) mac_hid(E) binfmt_misc(E) [ 413.687894] parport_pc(E) ppdev(E) lp(E) parport(E) nfsd(E) auth_rpcgss(E) nfs_acl(E) lockd(E) grace(E) sunrpc(E) autofs4(E) hid_generic(E) usbhid(E) hid(E) psmouse(E) ahci(E) r8169(E) mii(E) libahci(E) wmi(E) [ 413.687989] CPU: 13 PID: 1134 Comm: kworker/13:2 Tainted: G OE 4.9.0-custom #4 [ 413.688019] Hardware name: System manufacturer System Product Name/PRIME B350-PLUS, BIOS 0606 04/06/2017 [ 413.688089] Workqueue: events amd_sched_job_timedout [amdgpu] [ 413.688116] task: ffff88020f9657c0 task.stack: ffffc90001a88000 [ 413.688139] RIP: 0010:[] [] to_live_kthread+0x5/0x60 [ 413.688171] RSP: 0018:ffffc90001a8bd60 EFLAGS: 00010282 [ 413.688191] RAX: ffff88020f0073f8 RBX: ffff88020f000000 RCX: 0000000000000000 [ 413.688217] RDX: 0000000000000001 RSI: ffff88020f9670c0 RDI: 0000000000000000 [ 413.688243] RBP: ffffc90001a8bd78 R08: 0000000000000000 R09: 0000000000001000 [ 413.688269] R10: 0000006051b11a82 R11: 0000000000000001 R12: 0000000000000000 [ 413.688295] R13: ffff88020f002770 R14: ffff88020f004838 R15: ffff8801b23c2c60 [ 413.688321] FS: 0000000000000000(0000) GS:ffff88021ef40000(0000) knlGS:0000000000000000 [ 413.688352] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 413.688373] CR2: 0000000000000548 CR3: 00000001efd0f000 CR4: 00000000003406e0 [ 413.688399] Stack: [ 413.688407] ffffffff8109b304 ffff88020f000000 0000000000000070 ffffc90001a8bdf0 [ 413.688439] ffffffffa05ce29d ffffffffa052feb7 ffffffffa07b5820 ffffc90001a8bda0 [ 413.688470] ffffffff00000018 ffff8801bb88f060 0000000001a8bdb8 ffff88021ef59280 [ 413.688502] Call Trace: [ 413.688514] [] ? kthread_park+0x14/0x60 [ 413.688555] [] amdgpu_gpu_reset+0x7d/0x670 [amdgpu] [ 413.688589] [] ? drm_printk+0x97/0xa0 [drm] [ 413.688643] [] amdgpu_job_timedout+0x46/0x50 [amdgpu] [ 413.688700] [] amd_sched_job_timedout+0x17/0x20 [amdgpu] [ 413.688727] [] process_one_work+0x153/0x3f0 [ 413.688751] [] worker_thread+0x12b/0x4b0 [ 413.688773] [] ? do_syscall_64+0x6e/0x180 [ 413.688795] [] ? rescuer_thread+0x350/0x350 [ 413.688818] [] ? do_syscall_64+0x6e/0x180 [ 413.688839] [] kthread+0xd3/0xf0 [ 413.688858] [] ? kthread_park+0x60/0x60 [ 413.688881] [] ret_from_fork+0x25/0x30 [ 413.688901] Code: 25 40 d3 00 00 48 8b 80 48 05 00 00 48 89 e5 5d 48 8b 40 c8 48 c1 e8 02 83 e0 01 c3 66 2e 0f 1f 84 00 00 00 00 00 0f 1f 44 00 00 <48> 8b b7 48 05 00 00 55 48 89 e5 48 85 f6 74 31 8b 97 f8 18 00 [ 413.689045] RIP [] to_live_kthread+0x5/0x60 [ 413.689064] RSP [ 413.689076] CR2: 0000000000000548 [ 413.697985] ---[ end trace 0a314a64821f84e9 ]--- The root cause is some ring doesn't have scheduler, like KIQ ring Reviewed-by: Christian König Signed-off-by: Chunming Zhou Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_device.c') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index 61716a2d4484..eadd6e0a4152 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -2583,7 +2583,7 @@ int amdgpu_gpu_reset(struct amdgpu_device *adev) for (i = 0; i < AMDGPU_MAX_RINGS; ++i) { struct amdgpu_ring *ring = adev->rings[i]; - if (!ring) + if (!ring || !ring->sched.thread) continue; kthread_park(ring->sched.thread); amd_sched_hw_job_reset(&ring->sched); @@ -2678,7 +2678,8 @@ retry: } for (i = 0; i < AMDGPU_MAX_RINGS; ++i) { struct amdgpu_ring *ring = adev->rings[i]; - if (!ring) + + if (!ring || !ring->sched.thread) continue; amd_sched_job_recovery(&ring->sched); @@ -2687,7 +2688,7 @@ retry: } else { dev_err(adev->dev, "asic resume failed (%d).\n", r); for (i = 0; i < AMDGPU_MAX_RINGS; ++i) { - if (adev->rings[i]) { + if (adev->rings[i] && adev->rings[i]->sched.thread) { kthread_unpark(adev->rings[i]->sched.thread); } } -- cgit From 8ab25b4f51ffd2f859afaffa191d50a1eda4128f Mon Sep 17 00:00:00 2001 From: Alex Xie Date: Mon, 24 Apr 2017 13:30:43 -0400 Subject: drm/amdgpu: Fix use of interruptible waiting MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit If amdgpu_bo_reserve function is interrupted by signal, amdgpu_bo_kunmap function is not called. Signed-off-by: Alex Xie Reviewed-by: Christian König Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_device.c') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index eadd6e0a4152..ebc0022a7ab4 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -349,7 +349,7 @@ static void amdgpu_vram_scratch_fini(struct amdgpu_device *adev) if (adev->vram_scratch.robj == NULL) { return; } - r = amdgpu_bo_reserve(adev->vram_scratch.robj, false); + r = amdgpu_bo_reserve(adev->vram_scratch.robj, true); if (likely(r == 0)) { amdgpu_bo_kunmap(adev->vram_scratch.robj); amdgpu_bo_unpin(adev->vram_scratch.robj); -- cgit From 7a6901d7d7e49ad50d477cd8f8ef79d079b5c6c5 Mon Sep 17 00:00:00 2001 From: Alex Xie Date: Mon, 24 Apr 2017 13:52:41 -0400 Subject: drm/amdgpu: Fix use of interruptible waiting MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 1. The signal interrupt can affect the expected behaviour. 2. There is no mechanism to handle the corresponding error. Signed-off-by: Alex Xie Reviewed-by: Christian König Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_device.c') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index ebc0022a7ab4..3f01c4b92280 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -2140,7 +2140,7 @@ int amdgpu_device_suspend(struct drm_device *dev, bool suspend, bool fbcon) if (amdgpu_crtc->cursor_bo) { struct amdgpu_bo *aobj = gem_to_amdgpu_bo(amdgpu_crtc->cursor_bo); - r = amdgpu_bo_reserve(aobj, false); + r = amdgpu_bo_reserve(aobj, true); if (r == 0) { amdgpu_bo_unpin(aobj); amdgpu_bo_unreserve(aobj); @@ -2153,7 +2153,7 @@ int amdgpu_device_suspend(struct drm_device *dev, bool suspend, bool fbcon) robj = gem_to_amdgpu_bo(rfb->obj); /* don't unpin kernel fb objects */ if (!amdgpu_fbdev_robj_is_fb(adev, robj)) { - r = amdgpu_bo_reserve(robj, false); + r = amdgpu_bo_reserve(robj, true); if (r == 0) { amdgpu_bo_unpin(robj); amdgpu_bo_unreserve(robj); @@ -2260,7 +2260,7 @@ int amdgpu_device_resume(struct drm_device *dev, bool resume, bool fbcon) if (amdgpu_crtc->cursor_bo) { struct amdgpu_bo *aobj = gem_to_amdgpu_bo(amdgpu_crtc->cursor_bo); - r = amdgpu_bo_reserve(aobj, false); + r = amdgpu_bo_reserve(aobj, true); if (r == 0) { r = amdgpu_bo_pin(aobj, AMDGPU_GEM_DOMAIN_VRAM, -- cgit From 1d28479776fe932c32bda2366501fb2408557397 Mon Sep 17 00:00:00 2001 From: Alex Xie Date: Mon, 24 Apr 2017 13:53:04 -0400 Subject: drm/amdgpu: Fix use of interruptible waiting MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 1. The signal interrupt can affect the expected behaviour. 2. There is no good mechanism to handle the corresponding error. Signed-off-by: Alex Xie Reviewed-by: Christian König Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_device.c') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index 3f01c4b92280..234d90a12482 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -2423,7 +2423,7 @@ static int amdgpu_recover_vram_from_shadow(struct amdgpu_device *adev, if (!bo->shadow) return 0; - r = amdgpu_bo_reserve(bo, false); + r = amdgpu_bo_reserve(bo, true); if (r) return r; domain = amdgpu_mem_type_to_domain(bo->tbo.mem.mem_type); -- cgit From 82521316395a37a2ec1d7e396c055f3c7de9d93b Mon Sep 17 00:00:00 2001 From: "Roger.He" Date: Fri, 21 Apr 2017 13:08:43 +0800 Subject: drm/amdgpu: validate shadow before restoring from it MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Reviewed-by: Chunming Zhou Reviewed-by: Christian König Signed-off-by: Roger.He Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 12 ++++++++++++ drivers/gpu/drm/amd/amdgpu/amdgpu_object.c | 21 +++++++++++++++++++++ drivers/gpu/drm/amd/amdgpu/amdgpu_object.h | 1 + 3 files changed, 34 insertions(+) (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_device.c') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index 234d90a12482..3c1754df4c40 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -2429,6 +2429,18 @@ static int amdgpu_recover_vram_from_shadow(struct amdgpu_device *adev, domain = amdgpu_mem_type_to_domain(bo->tbo.mem.mem_type); /* if bo has been evicted, then no need to recover */ if (domain == AMDGPU_GEM_DOMAIN_VRAM) { + r = amdgpu_bo_validate(bo->shadow); + if (r) { + DRM_ERROR("bo validate failed!\n"); + goto err; + } + + r = amdgpu_ttm_bind(&bo->shadow->tbo, &bo->shadow->tbo.mem); + if (r) { + DRM_ERROR("%p bind failed\n", bo->shadow); + goto err; + } + r = amdgpu_bo_restore_from_shadow(adev, ring, bo, NULL, fence, true); if (r) { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c index 60de77b153b7..365883d7948d 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c @@ -543,6 +543,27 @@ err: return r; } +int amdgpu_bo_validate(struct amdgpu_bo *bo) +{ + uint32_t domain; + int r; + + if (bo->pin_count) + return 0; + + domain = bo->prefered_domains; + +retry: + amdgpu_ttm_placement_from_domain(bo, domain); + r = ttm_bo_validate(&bo->tbo, &bo->placement, false, false); + if (unlikely(r == -ENOMEM) && domain != bo->allowed_domains) { + domain = bo->allowed_domains; + goto retry; + } + + return r; +} + int amdgpu_bo_restore_from_shadow(struct amdgpu_device *adev, struct amdgpu_ring *ring, struct amdgpu_bo *bo, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h index 15a723adca76..382485115b06 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h @@ -175,6 +175,7 @@ int amdgpu_bo_backup_to_shadow(struct amdgpu_device *adev, struct amdgpu_bo *bo, struct reservation_object *resv, struct dma_fence **fence, bool direct); +int amdgpu_bo_validate(struct amdgpu_bo *bo); int amdgpu_bo_restore_from_shadow(struct amdgpu_device *adev, struct amdgpu_ring *ring, struct amdgpu_bo *bo, -- cgit From 236763d340d6190c56554caee61c2bd5cfdf5217 Mon Sep 17 00:00:00 2001 From: Monk Liu Date: Mon, 1 May 2017 16:15:31 +0800 Subject: drm/amdgpu:fix waiting on dirty fence if bo->shadow is NULL (race issue:BO shadow was just released and gpu-reset kick in but BO hasn't yet) recover_vram_from_shadow won't set @next, so the following "fence=next" will wrongly use a fence pointer which may already dirty. fixing it by set next to NULL prior to recover_vram_from_shadow Signed-off-by: Monk Liu Reviewed-by: Chunming Zhou Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_device.c') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index 3c1754df4c40..367811cd0763 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -2522,6 +2522,7 @@ int amdgpu_sriov_gpu_reset(struct amdgpu_device *adev, bool voluntary) ring = adev->mman.buffer_funcs_ring; mutex_lock(&adev->shadow_list_lock); list_for_each_entry_safe(bo, tmp, &adev->shadow_list, shadow_list) { + next = NULL; amdgpu_recover_vram_from_shadow(adev, ring, bo, &next); if (fence) { r = dma_fence_wait(fence, false); @@ -2668,6 +2669,7 @@ retry: DRM_INFO("recover vram bo from shadow\n"); mutex_lock(&adev->shadow_list_lock); list_for_each_entry_safe(bo, tmp, &adev->shadow_list, shadow_list) { + next = NULL; amdgpu_recover_vram_from_shadow(adev, ring, bo, &next); if (fence) { r = dma_fence_wait(fence, false); -- cgit From db2c2a9798d266fd1d7c950b3937793b05533bf4 Mon Sep 17 00:00:00 2001 From: Pixel Ding Date: Tue, 25 Apr 2017 16:47:42 +0800 Subject: drm/amdgpu: fix mutex list null pointer reference Fix NULL pointer reference. Signed-off-by: Pixel Ding Signed-off-by: Xiangliang Yu Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_device.c') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index 367811cd0763..43ca16b6eee2 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -2065,7 +2065,8 @@ void amdgpu_device_fini(struct amdgpu_device *adev) DRM_INFO("amdgpu: finishing device.\n"); adev->shutdown = true; - drm_crtc_force_disable_all(adev->ddev); + if (adev->mode_info.mode_config_initialized) + drm_crtc_force_disable_all(adev->ddev); /* evict vram memory */ amdgpu_bo_evict_vram(adev); amdgpu_ib_pool_fini(adev); -- cgit