aboutsummaryrefslogtreecommitdiff
path: root/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
diff options
context:
space:
mode:
authorLinus Torvalds <[email protected]>2017-09-03 17:02:26 -0700
committerLinus Torvalds <[email protected]>2017-09-03 17:02:26 -0700
commit906dde0f355bd97c080c215811ae7db1137c4af8 (patch)
tree1e1b4ba58a59c4027f06d86e7430566ee0dcbb15 /drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
parent69c0067aa3f40d3e52ab78643aecb17d669d3389 (diff)
parent7846b12fe0b5feab5446d892f41b5140c1419109 (diff)
Merge tag 'drm-for-v4.14' of git://people.freedesktop.org/~airlied/linux
Pull drm updates from Dave Airlie: "This is the main drm pull request for 4.14 merge window. I'm sending this early, as my continuing journey into fatherhood is occurring really soon now, I'm going to be mostly useless for the next couple of weeks, though I may be able to read email, I doubt I'll be doing much patch applications or git sending. If anything urgent pops up I've asked Daniel/Jani/Alex/Sean to try and direct stuff towards you. Outside drm changes: Some rcar-du updates that touch the V4L tree, all acks should be in place. It adds one export to the radix tree code for new i915 use case. There are some minor AGP cleanups (don't see that too often). Changes to the vbox driver in staging to avoid breaking compilation. Summary: core: - Atomic helper fixes - Atomic UAPI fixes - Add YCBCR 4:2:0 support - Drop set_busid hook - Refactor fb_helper locking - Remove a bunch of internal APIs - Add a bunch of better default handlers - Format modifier/blob plane property added - More internal header refactoring - Make more internal API names consistent - Enhanced syncobj APIs (wait/signal/reset/create signalled) bridge: - Add Synopsys Designware MIPI DSI host bridge driver tiny: - Add Pervasive Displays RePaper displays - Add support for LEGO MINDSTORMS EV3 LCD i915: - Lots of GEN10/CNL support patches - drm syncobj support - Skylake+ watermark refactoring - GVT vGPU 48-bit ppgtt support - GVT performance improvements - NOA change ioctl - CCS (color compression) scanout support - GPU reset improvements amdgpu: - Initial hugepage support - BO migration logic rework - Vega10 improvements - Powerplay fixes - Stop reprogramming the MC - Fixes for ACP audio on stoney - SR-IOV fixes/improvements - Command submission overhead improvements amdkfd: - Non-dGPU upstreaming patches - Scratch VA ioctl - Image tiling modes - Update PM4 headers for new firmware - Drop all BUG_ONs. nouveau: - GP108 modesetting support. - Disable MSI on big endian. vmwgfx: - Add fence fd support. msm: - Runtime PM improvements exynos: - NV12MT support - Refactor KMS drivers imx-drm: - Lock scanout channel to improve memory bw - Cleanups etnaviv: - GEM object population fixes tegra: - Prep work for Tegra186 support - PRIME mmap support sunxi: - HDMI support improvements - HDMI CEC support omapdrm: - HDMI hotplug IRQ support - Big driver cleanup - OMAP5 DSI support rcar-du: - vblank fixes - VSP1 updates arcgpu: - Minor fixes stm: - Add STM32 DSI controller driver dw_hdmi: - Add support for Rockchip RK3399 - HDMI CEC support atmel-hlcdc: - Add 8-bit color support vc4: - Atomic fixes - New ioctl to attach a label to a buffer object - HDMI CEC support - Allow userspace to dictate rendering order on submit ioctl" * tag 'drm-for-v4.14' of git://people.freedesktop.org/~airlied/linux: (1074 commits) drm/syncobj: Add a signal ioctl (v3) drm/syncobj: Add a reset ioctl (v3) drm/syncobj: Add a syncobj_array_find helper drm/syncobj: Allow wait for submit and signal behavior (v5) drm/syncobj: Add a CREATE_SIGNALED flag drm/syncobj: Add a callback mechanism for replace_fence (v3) drm/syncobj: add sync obj wait interface. (v8) i915: Use drm_syncobj_fence_get drm/syncobj: Add a race-free drm_syncobj_fence_get helper (v2) drm/syncobj: Rename fence_get to find_fence drm: kirin: Add mode_valid logic to avoid mode clocks we can't generate drm/vmwgfx: Bump the version for fence FD support drm/vmwgfx: Add export fence to file descriptor support drm/vmwgfx: Add support for imported Fence File Descriptor drm/vmwgfx: Prepare to support fence fd drm/vmwgfx: Fix incorrect command header offset at restart drm/vmwgfx: Support the NOP_ERROR command drm/vmwgfx: Restart command buffers after errors drm/vmwgfx: Move irq bottom half processing to threads drm/vmwgfx: Don't use drm_irq_[un]install ...
Diffstat (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_object.c')
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_object.c234
1 files changed, 121 insertions, 113 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
index 8ee69652be8c..e7e899190bef 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
@@ -37,55 +37,6 @@
#include "amdgpu.h"
#include "amdgpu_trace.h"
-
-
-static u64 amdgpu_get_vis_part_size(struct amdgpu_device *adev,
- struct ttm_mem_reg *mem)
-{
- if (mem->start << PAGE_SHIFT >= adev->mc.visible_vram_size)
- return 0;
-
- return ((mem->start << PAGE_SHIFT) + mem->size) >
- adev->mc.visible_vram_size ?
- adev->mc.visible_vram_size - (mem->start << PAGE_SHIFT) :
- mem->size;
-}
-
-static void amdgpu_update_memory_usage(struct amdgpu_device *adev,
- struct ttm_mem_reg *old_mem,
- struct ttm_mem_reg *new_mem)
-{
- u64 vis_size;
- if (!adev)
- return;
-
- if (new_mem) {
- switch (new_mem->mem_type) {
- case TTM_PL_TT:
- atomic64_add(new_mem->size, &adev->gtt_usage);
- break;
- case TTM_PL_VRAM:
- atomic64_add(new_mem->size, &adev->vram_usage);
- vis_size = amdgpu_get_vis_part_size(adev, new_mem);
- atomic64_add(vis_size, &adev->vram_vis_usage);
- break;
- }
- }
-
- if (old_mem) {
- switch (old_mem->mem_type) {
- case TTM_PL_TT:
- atomic64_sub(old_mem->size, &adev->gtt_usage);
- break;
- case TTM_PL_VRAM:
- atomic64_sub(old_mem->size, &adev->vram_usage);
- vis_size = amdgpu_get_vis_part_size(adev, old_mem);
- atomic64_sub(vis_size, &adev->vram_vis_usage);
- break;
- }
- }
-}
-
static void amdgpu_ttm_bo_destroy(struct ttm_buffer_object *tbo)
{
struct amdgpu_device *adev = amdgpu_ttm_adev(tbo->bdev);
@@ -93,7 +44,7 @@ static void amdgpu_ttm_bo_destroy(struct ttm_buffer_object *tbo)
bo = container_of(tbo, struct amdgpu_bo, tbo);
- amdgpu_update_memory_usage(adev, &bo->tbo.mem, NULL);
+ amdgpu_bo_kunmap(bo);
drm_gem_object_release(&bo->gem_base);
amdgpu_bo_unref(&bo->parent);
@@ -219,7 +170,7 @@ static void amdgpu_fill_placement_to_bo(struct amdgpu_bo *bo,
}
/**
- * amdgpu_bo_create_kernel - create BO for kernel use
+ * amdgpu_bo_create_reserved - create reserved BO for kernel use
*
* @adev: amdgpu device object
* @size: size for the new BO
@@ -229,24 +180,30 @@ static void amdgpu_fill_placement_to_bo(struct amdgpu_bo *bo,
* @gpu_addr: GPU addr of the pinned BO
* @cpu_addr: optional CPU address mapping
*
- * Allocates and pins a BO for kernel internal use.
+ * Allocates and pins a BO for kernel internal use, and returns it still
+ * reserved.
*
* Returns 0 on success, negative error code otherwise.
*/
-int amdgpu_bo_create_kernel(struct amdgpu_device *adev,
- unsigned long size, int align,
- u32 domain, struct amdgpu_bo **bo_ptr,
- u64 *gpu_addr, void **cpu_addr)
+int amdgpu_bo_create_reserved(struct amdgpu_device *adev,
+ unsigned long size, int align,
+ u32 domain, struct amdgpu_bo **bo_ptr,
+ u64 *gpu_addr, void **cpu_addr)
{
+ bool free = false;
int r;
- r = amdgpu_bo_create(adev, size, align, true, domain,
- AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED |
- AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS,
- NULL, NULL, bo_ptr);
- if (r) {
- dev_err(adev->dev, "(%d) failed to allocate kernel bo\n", r);
- return r;
+ if (!*bo_ptr) {
+ r = amdgpu_bo_create(adev, size, align, true, domain,
+ AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED |
+ AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS,
+ NULL, NULL, 0, bo_ptr);
+ if (r) {
+ dev_err(adev->dev, "(%d) failed to allocate kernel bo\n",
+ r);
+ return r;
+ }
+ free = true;
}
r = amdgpu_bo_reserve(*bo_ptr, false);
@@ -269,20 +226,52 @@ int amdgpu_bo_create_kernel(struct amdgpu_device *adev,
}
}
- amdgpu_bo_unreserve(*bo_ptr);
-
return 0;
error_unreserve:
amdgpu_bo_unreserve(*bo_ptr);
error_free:
- amdgpu_bo_unref(bo_ptr);
+ if (free)
+ amdgpu_bo_unref(bo_ptr);
return r;
}
/**
+ * amdgpu_bo_create_kernel - create BO for kernel use
+ *
+ * @adev: amdgpu device object
+ * @size: size for the new BO
+ * @align: alignment for the new BO
+ * @domain: where to place it
+ * @bo_ptr: resulting BO
+ * @gpu_addr: GPU addr of the pinned BO
+ * @cpu_addr: optional CPU address mapping
+ *
+ * Allocates and pins a BO for kernel internal use.
+ *
+ * Returns 0 on success, negative error code otherwise.
+ */
+int amdgpu_bo_create_kernel(struct amdgpu_device *adev,
+ unsigned long size, int align,
+ u32 domain, struct amdgpu_bo **bo_ptr,
+ u64 *gpu_addr, void **cpu_addr)
+{
+ int r;
+
+ r = amdgpu_bo_create_reserved(adev, size, align, domain, bo_ptr,
+ gpu_addr, cpu_addr);
+
+ if (r)
+ return r;
+
+ amdgpu_bo_unreserve(*bo_ptr);
+
+ return 0;
+}
+
+/**
* amdgpu_bo_free_kernel - free BO for kernel use
*
* @bo: amdgpu BO to free
@@ -317,12 +306,13 @@ int amdgpu_bo_create_restricted(struct amdgpu_device *adev,
struct sg_table *sg,
struct ttm_placement *placement,
struct reservation_object *resv,
+ uint64_t init_value,
struct amdgpu_bo **bo_ptr)
{
struct amdgpu_bo *bo;
enum ttm_bo_type type;
unsigned long page_align;
- u64 initial_bytes_moved;
+ u64 initial_bytes_moved, bytes_moved;
size_t acc_size;
int r;
@@ -351,13 +341,13 @@ int amdgpu_bo_create_restricted(struct amdgpu_device *adev,
}
INIT_LIST_HEAD(&bo->shadow_list);
INIT_LIST_HEAD(&bo->va);
- bo->prefered_domains = domain & (AMDGPU_GEM_DOMAIN_VRAM |
+ bo->preferred_domains = domain & (AMDGPU_GEM_DOMAIN_VRAM |
AMDGPU_GEM_DOMAIN_GTT |
AMDGPU_GEM_DOMAIN_CPU |
AMDGPU_GEM_DOMAIN_GDS |
AMDGPU_GEM_DOMAIN_GWS |
AMDGPU_GEM_DOMAIN_OA);
- bo->allowed_domains = bo->prefered_domains;
+ bo->allowed_domains = bo->preferred_domains;
if (!kernel && bo->allowed_domains == AMDGPU_GEM_DOMAIN_VRAM)
bo->allowed_domains |= AMDGPU_GEM_DOMAIN_GTT;
@@ -398,8 +388,14 @@ int amdgpu_bo_create_restricted(struct amdgpu_device *adev,
r = ttm_bo_init_reserved(&adev->mman.bdev, &bo->tbo, size, type,
&bo->placement, page_align, !kernel, NULL,
acc_size, sg, resv, &amdgpu_ttm_bo_destroy);
- amdgpu_cs_report_moved_bytes(adev,
- atomic64_read(&adev->num_bytes_moved) - initial_bytes_moved);
+ bytes_moved = atomic64_read(&adev->num_bytes_moved) -
+ initial_bytes_moved;
+ if (adev->mc.visible_vram_size < adev->mc.real_vram_size &&
+ bo->tbo.mem.mem_type == TTM_PL_VRAM &&
+ bo->tbo.mem.start < adev->mc.visible_vram_size >> PAGE_SHIFT)
+ amdgpu_cs_report_moved_bytes(adev, bytes_moved, bytes_moved);
+ else
+ amdgpu_cs_report_moved_bytes(adev, bytes_moved, 0);
if (unlikely(r != 0))
return r;
@@ -411,7 +407,7 @@ int amdgpu_bo_create_restricted(struct amdgpu_device *adev,
bo->tbo.mem.placement & TTM_PL_FLAG_VRAM) {
struct dma_fence *fence;
- r = amdgpu_fill_buffer(bo, 0, bo->tbo.resv, &fence);
+ r = amdgpu_fill_buffer(bo, init_value, bo->tbo.resv, &fence);
if (unlikely(r))
goto fail_unreserve;
@@ -426,6 +422,10 @@ int amdgpu_bo_create_restricted(struct amdgpu_device *adev,
trace_amdgpu_bo_create(bo);
+ /* Treat CPU_ACCESS_REQUIRED only as a hint if given by UMD */
+ if (type == ttm_bo_type_device)
+ bo->flags &= ~AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED;
+
return 0;
fail_unreserve:
@@ -459,6 +459,7 @@ static int amdgpu_bo_create_shadow(struct amdgpu_device *adev,
AMDGPU_GEM_CREATE_CPU_GTT_USWC,
NULL, &placement,
bo->tbo.resv,
+ 0,
&bo->shadow);
if (!r) {
bo->shadow->parent = amdgpu_bo_ref(bo);
@@ -470,11 +471,15 @@ static int amdgpu_bo_create_shadow(struct amdgpu_device *adev,
return r;
}
+/* init_value will only take effect when flags contains
+ * AMDGPU_GEM_CREATE_VRAM_CLEARED.
+ */
int amdgpu_bo_create(struct amdgpu_device *adev,
unsigned long size, int byte_align,
bool kernel, u32 domain, u64 flags,
struct sg_table *sg,
struct reservation_object *resv,
+ uint64_t init_value,
struct amdgpu_bo **bo_ptr)
{
struct ttm_placement placement = {0};
@@ -489,7 +494,7 @@ int amdgpu_bo_create(struct amdgpu_device *adev,
r = amdgpu_bo_create_restricted(adev, size, byte_align, kernel,
domain, flags, sg, &placement,
- resv, bo_ptr);
+ resv, init_value, bo_ptr);
if (r)
return r;
@@ -535,7 +540,7 @@ int amdgpu_bo_backup_to_shadow(struct amdgpu_device *adev,
r = amdgpu_copy_buffer(ring, bo_addr, shadow_addr,
amdgpu_bo_size(bo), resv, fence,
- direct);
+ direct, false);
if (!r)
amdgpu_bo_fence(bo, *fence, true);
@@ -551,7 +556,7 @@ int amdgpu_bo_validate(struct amdgpu_bo *bo)
if (bo->pin_count)
return 0;
- domain = bo->prefered_domains;
+ domain = bo->preferred_domains;
retry:
amdgpu_ttm_placement_from_domain(bo, domain);
@@ -588,7 +593,7 @@ int amdgpu_bo_restore_from_shadow(struct amdgpu_device *adev,
r = amdgpu_copy_buffer(ring, shadow_addr, bo_addr,
amdgpu_bo_size(bo), resv, fence,
- direct);
+ direct, false);
if (!r)
amdgpu_bo_fence(bo, *fence, true);
@@ -598,16 +603,16 @@ err:
int amdgpu_bo_kmap(struct amdgpu_bo *bo, void **ptr)
{
- bool is_iomem;
+ void *kptr;
long r;
if (bo->flags & AMDGPU_GEM_CREATE_NO_CPU_ACCESS)
return -EPERM;
- if (bo->kptr) {
- if (ptr) {
- *ptr = bo->kptr;
- }
+ kptr = amdgpu_bo_kptr(bo);
+ if (kptr) {
+ if (ptr)
+ *ptr = kptr;
return 0;
}
@@ -620,19 +625,23 @@ int amdgpu_bo_kmap(struct amdgpu_bo *bo, void **ptr)
if (r)
return r;
- bo->kptr = ttm_kmap_obj_virtual(&bo->kmap, &is_iomem);
if (ptr)
- *ptr = bo->kptr;
+ *ptr = amdgpu_bo_kptr(bo);
return 0;
}
+void *amdgpu_bo_kptr(struct amdgpu_bo *bo)
+{
+ bool is_iomem;
+
+ return ttm_kmap_obj_virtual(&bo->kmap, &is_iomem);
+}
+
void amdgpu_bo_kunmap(struct amdgpu_bo *bo)
{
- if (bo->kptr == NULL)
- return;
- bo->kptr = NULL;
- ttm_bo_kunmap(&bo->kmap);
+ if (bo->kmap.bo)
+ ttm_bo_kunmap(&bo->kmap);
}
struct amdgpu_bo *amdgpu_bo_ref(struct amdgpu_bo *bo)
@@ -724,15 +733,16 @@ int amdgpu_bo_pin_restricted(struct amdgpu_bo *bo, u32 domain,
dev_err(adev->dev, "%p pin failed\n", bo);
goto error;
}
- r = amdgpu_ttm_bind(&bo->tbo, &bo->tbo.mem);
- if (unlikely(r)) {
- dev_err(adev->dev, "%p bind failed\n", bo);
- goto error;
- }
bo->pin_count = 1;
- if (gpu_addr != NULL)
+ if (gpu_addr != NULL) {
+ r = amdgpu_ttm_bind(&bo->tbo, &bo->tbo.mem);
+ if (unlikely(r)) {
+ dev_err(adev->dev, "%p bind failed\n", bo);
+ goto error;
+ }
*gpu_addr = amdgpu_bo_gpu_offset(bo);
+ }
if (domain == AMDGPU_GEM_DOMAIN_VRAM) {
adev->vram_pin_size += amdgpu_bo_size(bo);
if (bo->flags & AMDGPU_GEM_CREATE_NO_CPU_ACCESS)
@@ -921,6 +931,8 @@ void amdgpu_bo_move_notify(struct ttm_buffer_object *bo,
abo = container_of(bo, struct amdgpu_bo, tbo);
amdgpu_vm_bo_invalidate(adev, abo);
+ amdgpu_bo_kunmap(abo);
+
/* remember the eviction */
if (evict)
atomic64_inc(&adev->num_evictions);
@@ -930,8 +942,6 @@ void amdgpu_bo_move_notify(struct ttm_buffer_object *bo,
return;
/* move_notify is called before move happens */
- amdgpu_update_memory_usage(adev, &bo->mem, new_mem);
-
trace_amdgpu_ttm_bo_move(abo, new_mem->mem_type, old_mem->mem_type);
}
@@ -939,19 +949,22 @@ int amdgpu_bo_fault_reserve_notify(struct ttm_buffer_object *bo)
{
struct amdgpu_device *adev = amdgpu_ttm_adev(bo->bdev);
struct amdgpu_bo *abo;
- unsigned long offset, size, lpfn;
- int i, r;
+ unsigned long offset, size;
+ int r;
if (!amdgpu_ttm_bo_is_amdgpu_bo(bo))
return 0;
abo = container_of(bo, struct amdgpu_bo, tbo);
+
+ /* Remember that this BO was accessed by the CPU */
+ abo->flags |= AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED;
+
if (bo->mem.mem_type != TTM_PL_VRAM)
return 0;
size = bo->mem.num_pages << PAGE_SHIFT;
offset = bo->mem.start << PAGE_SHIFT;
- /* TODO: figure out how to map scattered VRAM to the CPU */
if ((offset + size) <= adev->mc.visible_vram_size)
return 0;
@@ -961,26 +974,21 @@ int amdgpu_bo_fault_reserve_notify(struct ttm_buffer_object *bo)
/* hurrah the memory is not visible ! */
atomic64_inc(&adev->num_vram_cpu_page_faults);
- amdgpu_ttm_placement_from_domain(abo, AMDGPU_GEM_DOMAIN_VRAM);
- lpfn = adev->mc.visible_vram_size >> PAGE_SHIFT;
- for (i = 0; i < abo->placement.num_placement; i++) {
- /* Force into visible VRAM */
- if ((abo->placements[i].flags & TTM_PL_FLAG_VRAM) &&
- (!abo->placements[i].lpfn ||
- abo->placements[i].lpfn > lpfn))
- abo->placements[i].lpfn = lpfn;
- }
+ amdgpu_ttm_placement_from_domain(abo, AMDGPU_GEM_DOMAIN_VRAM |
+ AMDGPU_GEM_DOMAIN_GTT);
+
+ /* Avoid costly evictions; only set GTT as a busy placement */
+ abo->placement.num_busy_placement = 1;
+ abo->placement.busy_placement = &abo->placements[1];
+
r = ttm_bo_validate(bo, &abo->placement, false, false);
- if (unlikely(r == -ENOMEM)) {
- amdgpu_ttm_placement_from_domain(abo, AMDGPU_GEM_DOMAIN_GTT);
- return ttm_bo_validate(bo, &abo->placement, false, false);
- } else if (unlikely(r != 0)) {
+ if (unlikely(r != 0))
return r;
- }
offset = bo->mem.start << PAGE_SHIFT;
/* this should never happen */
- if ((offset + size) > adev->mc.visible_vram_size)
+ if (bo->mem.mem_type == TTM_PL_VRAM &&
+ (offset + size) > adev->mc.visible_vram_size)
return -EINVAL;
return 0;