aboutsummaryrefslogtreecommitdiff
path: root/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
diff options
context:
space:
mode:
authorLinus Torvalds <[email protected]>2020-12-14 11:07:56 -0800
committerLinus Torvalds <[email protected]>2020-12-14 11:07:56 -0800
commit1d36dffa5d887715dacca0f717f4519b7be5e498 (patch)
treea68f7c00dbb3036a67806ed6c6b8cc61c3cff60d /drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
parent2c85ebc57b3e1817b6ce1a6b703928e113a90442 (diff)
parentb10733527bfd864605c33ab2e9a886eec317ec39 (diff)
Merge tag 'drm-next-2020-12-11' of git://anongit.freedesktop.org/drm/drm
Pull drm updates from Dave Airlie: "Not a huge amount of big things here, AMD has support for a few new HW variants (vangogh, green sardine, dimgrey cavefish), Intel has some more DG1 enablement. We have a few big reworks of the TTM layers and interfaces, GEM and atomic internal API reworks cross tree. fbdev is marked orphaned in here as well to reflect the current reality. core: - documentation updates - deprecate DRM_FORMAT_MOD_NONE - atomic crtc enable/disable rework - GEM convert drivers to gem object functions - remove SCATTER_LIST_MAX_SEGMENT sched: - avoid infinite waits ttm: - remove AGP support - don't modify caching for swapout - ttm pinning rework - major TTM reworks - new backend allocator - multihop support vram-helper: - top down BO placement fix - TTM changes - GEM object support displayport: - DP 2.0 DPCD prep work - DP MST extended DPCD caps fbdev: - mark as orphaned amdgpu: - Initial Vangogh support - Green Sardine support - Dimgrey Cavefish support - SG display support for renoir - SMU7 improvements - gfx9+ modiifier support - CI BACO fixes radeon: - expose voltage via hwmon on SUMO amdkfd: - fix unique id handling i915: - more DG1 enablement - bigjoiner support - integer scaling filter support - async flip support - ICL+ DSI command mode - Improve display shutdown - Display refactoring - eLLC machine fbdev loading fix - dma scatterlist fixes - TGL hang fixes - eLLC display buffer caching on SKL+ - MOCS PTE seeting for gen9+ msm: - Shutdown hook - GPU cooling device support - DSI 7nm and 10nm phy/pll updates - sm8150/sm2850 DPU support - GEM locking re-work - LLCC system cache support aspeed: - sysfs output config support ast: - LUT fix - new display mode gma500: - remove 2d framebuffer accel panfrost: - move gpu reset to a worker exynos: - new HDMI mode support mediatek: - MT8167 support - yaml bindings - MIPI DSI phy code moved etnaviv: - new perf counter - more lockdep annotation hibmc: - i2c DDC support ingenic: - pixel clock reset fix - reserved memory support - allow both DMA channels at once - different pixel format support - 30/24/8-bit palette modes tilcdc: - don't keep vblank irq enabled vc4: - new maintainer added - DSI registration fix virtio: - blob resource support - host visible and cross-device support - uuid api support" * tag 'drm-next-2020-12-11' of git://anongit.freedesktop.org/drm/drm: (1754 commits) drm/amdgpu: Initialise drm_gem_object_funcs for imported BOs drm/amdgpu: fix size calculation with stolen vga memory drm/amdgpu: remove amdgpu_ttm_late_init and amdgpu_bo_late_init drm/amdgpu: free the pre-OS console framebuffer after the first modeset drm/amdgpu: enable runtime pm using BACO on CI dGPUs drm/amdgpu/cik: enable BACO reset on Bonaire drm/amd/pm: update smu10.h WORKLOAD_PPLIB setting for raven drm/amd/pm: remove one unsupported smu function for vangogh drm/amd/display: setup system context for APUs drm/amd/display: add S/G support for Vangogh drm/amdkfd: Fix leak in dmabuf import drm/amdgpu: use AMDGPU_NUM_VMID when possible drm/amdgpu: fix sdma instance fw version and feature version init drm/amd/pm: update driver if version for dimgrey_cavefish drm/amd/display: 3.2.115 drm/amd/display: [FW Promotion] Release 0.0.45 drm/amd/display: Revert DCN2.1 dram_clock_change_latency update drm/amd/display: Enable gpu_vm_support for dcn3.01 drm/amd/display: Fixed the audio noise during mode switching with HDCP mode on drm/amd/display: Add wm table for Renoir ...
Diffstat (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c')
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c293
1 files changed, 181 insertions, 112 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index df110afa97bf..0768c8686983 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -300,7 +300,7 @@ static void amdgpu_vm_bo_relocated(struct amdgpu_vm_bo_base *vm_bo)
static void amdgpu_vm_bo_done(struct amdgpu_vm_bo_base *vm_bo)
{
spin_lock(&vm_bo->vm->invalidated_lock);
- list_del_init(&vm_bo->vm_status);
+ list_move(&vm_bo->vm_status, &vm_bo->vm->done);
spin_unlock(&vm_bo->vm->invalidated_lock);
}
@@ -609,7 +609,7 @@ void amdgpu_vm_del_from_lru_notify(struct ttm_buffer_object *bo)
if (!amdgpu_bo_is_amdgpu_bo(bo))
return;
- if (bo->mem.placement & TTM_PL_FLAG_NO_EVICT)
+ if (bo->pin_count)
return;
abo = ttm_to_amdgpu_bo(bo);
@@ -1570,7 +1570,8 @@ static int amdgpu_vm_update_ptes(struct amdgpu_vm_update_params *params,
/**
* amdgpu_vm_bo_update_mapping - update a mapping in the vm page table
*
- * @adev: amdgpu_device pointer
+ * @adev: amdgpu_device pointer of the VM
+ * @bo_adev: amdgpu_device pointer of the mapped BO
* @vm: requested vm
* @immediate: immediate submission in a page fault
* @unlocked: unlocked invalidation during MM callback
@@ -1578,7 +1579,8 @@ static int amdgpu_vm_update_ptes(struct amdgpu_vm_update_params *params,
* @start: start of mapped range
* @last: last mapped entry
* @flags: flags for the entries
- * @addr: addr to set the area to
+ * @offset: offset into nodes and pages_addr
+ * @nodes: array of drm_mm_nodes with the MC addresses
* @pages_addr: DMA addresses to use for mapping
* @fence: optional resulting fence
*
@@ -1588,15 +1590,18 @@ static int amdgpu_vm_update_ptes(struct amdgpu_vm_update_params *params,
* 0 for success, -EINVAL for failure.
*/
static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev,
+ struct amdgpu_device *bo_adev,
struct amdgpu_vm *vm, bool immediate,
bool unlocked, struct dma_resv *resv,
uint64_t start, uint64_t last,
- uint64_t flags, uint64_t addr,
+ uint64_t flags, uint64_t offset,
+ struct drm_mm_node *nodes,
dma_addr_t *pages_addr,
struct dma_fence **fence)
{
struct amdgpu_vm_update_params params;
enum amdgpu_sync_mode sync_mode;
+ uint64_t pfn;
int r;
memset(&params, 0, sizeof(params));
@@ -1614,6 +1619,14 @@ static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev,
else
sync_mode = AMDGPU_SYNC_EXPLICIT;
+ pfn = offset >> PAGE_SHIFT;
+ if (nodes) {
+ while (pfn >= nodes->size) {
+ pfn -= nodes->size;
+ ++nodes;
+ }
+ }
+
amdgpu_vm_eviction_lock(vm);
if (vm->evicting) {
r = -EBUSY;
@@ -1632,105 +1645,47 @@ static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev,
if (r)
goto error_unlock;
- r = amdgpu_vm_update_ptes(&params, start, last + 1, addr, flags);
- if (r)
- goto error_unlock;
-
- r = vm->update_funcs->commit(&params, fence);
-
-error_unlock:
- amdgpu_vm_eviction_unlock(vm);
- return r;
-}
-
-/**
- * amdgpu_vm_bo_split_mapping - split a mapping into smaller chunks
- *
- * @adev: amdgpu_device pointer
- * @resv: fences we need to sync to
- * @pages_addr: DMA addresses to use for mapping
- * @vm: requested vm
- * @mapping: mapped range and flags to use for the update
- * @flags: HW flags for the mapping
- * @bo_adev: amdgpu_device pointer that bo actually been allocated
- * @nodes: array of drm_mm_nodes with the MC addresses
- * @fence: optional resulting fence
- *
- * Split the mapping into smaller chunks so that each update fits
- * into a SDMA IB.
- *
- * Returns:
- * 0 for success, -EINVAL for failure.
- */
-static int amdgpu_vm_bo_split_mapping(struct amdgpu_device *adev,
- struct dma_resv *resv,
- dma_addr_t *pages_addr,
- struct amdgpu_vm *vm,
- struct amdgpu_bo_va_mapping *mapping,
- uint64_t flags,
- struct amdgpu_device *bo_adev,
- struct drm_mm_node *nodes,
- struct dma_fence **fence)
-{
- unsigned min_linear_pages = 1 << adev->vm_manager.fragment_size;
- uint64_t pfn, start = mapping->start;
- int r;
-
- /* normally,bo_va->flags only contians READABLE and WIRTEABLE bit go here
- * but in case of something, we filter the flags in first place
- */
- if (!(mapping->flags & AMDGPU_PTE_READABLE))
- flags &= ~AMDGPU_PTE_READABLE;
- if (!(mapping->flags & AMDGPU_PTE_WRITEABLE))
- flags &= ~AMDGPU_PTE_WRITEABLE;
-
- /* Apply ASIC specific mapping flags */
- amdgpu_gmc_get_vm_pte(adev, mapping, &flags);
-
- trace_amdgpu_vm_bo_update(mapping);
-
- pfn = mapping->offset >> PAGE_SHIFT;
- if (nodes) {
- while (pfn >= nodes->size) {
- pfn -= nodes->size;
- ++nodes;
- }
- }
-
do {
- dma_addr_t *dma_addr = NULL;
- uint64_t max_entries;
- uint64_t addr, last;
+ uint64_t tmp, num_entries, addr;
+
- max_entries = mapping->last - start + 1;
+ num_entries = last - start + 1;
if (nodes) {
addr = nodes->start << PAGE_SHIFT;
- max_entries = min((nodes->size - pfn) *
- AMDGPU_GPU_PAGES_IN_CPU_PAGE, max_entries);
+ num_entries = min((nodes->size - pfn) *
+ AMDGPU_GPU_PAGES_IN_CPU_PAGE, num_entries);
} else {
addr = 0;
}
if (pages_addr) {
- uint64_t count;
+ bool contiguous = true;
- for (count = 1;
- count < max_entries / AMDGPU_GPU_PAGES_IN_CPU_PAGE;
- ++count) {
- uint64_t idx = pfn + count;
+ if (num_entries > AMDGPU_GPU_PAGES_IN_CPU_PAGE) {
+ uint64_t count;
- if (pages_addr[idx] !=
- (pages_addr[idx - 1] + PAGE_SIZE))
- break;
+ contiguous = pages_addr[pfn + 1] ==
+ pages_addr[pfn] + PAGE_SIZE;
+
+ tmp = num_entries /
+ AMDGPU_GPU_PAGES_IN_CPU_PAGE;
+ for (count = 2; count < tmp; ++count) {
+ uint64_t idx = pfn + count;
+
+ if (contiguous != (pages_addr[idx] ==
+ pages_addr[idx - 1] + PAGE_SIZE))
+ break;
+ }
+ num_entries = count *
+ AMDGPU_GPU_PAGES_IN_CPU_PAGE;
}
- if (count < min_linear_pages) {
+ if (!contiguous) {
addr = pfn << PAGE_SHIFT;
- dma_addr = pages_addr;
+ params.pages_addr = pages_addr;
} else {
addr = pages_addr[pfn];
- max_entries = count *
- AMDGPU_GPU_PAGES_IN_CPU_PAGE;
+ params.pages_addr = NULL;
}
} else if (flags & (AMDGPU_PTE_VALID | AMDGPU_PTE_PRT)) {
@@ -1738,23 +1693,25 @@ static int amdgpu_vm_bo_split_mapping(struct amdgpu_device *adev,
addr += pfn << PAGE_SHIFT;
}
- last = start + max_entries - 1;
- r = amdgpu_vm_bo_update_mapping(adev, vm, false, false, resv,
- start, last, flags, addr,
- dma_addr, fence);
+ tmp = start + num_entries;
+ r = amdgpu_vm_update_ptes(&params, start, tmp, addr, flags);
if (r)
- return r;
+ goto error_unlock;
- pfn += (last - start + 1) / AMDGPU_GPU_PAGES_IN_CPU_PAGE;
+ pfn += num_entries / AMDGPU_GPU_PAGES_IN_CPU_PAGE;
if (nodes && nodes->size == pfn) {
pfn = 0;
++nodes;
}
- start = last + 1;
+ start = tmp;
- } while (unlikely(start != mapping->last + 1));
+ } while (unlikely(start != last + 1));
- return 0;
+ r = vm->update_funcs->commit(&params, fence);
+
+error_unlock:
+ amdgpu_vm_eviction_unlock(vm);
+ return r;
}
/**
@@ -1790,7 +1747,6 @@ int amdgpu_vm_bo_update(struct amdgpu_device *adev, struct amdgpu_bo_va *bo_va,
resv = vm->root.base.bo->tbo.base.resv;
} else {
struct drm_gem_object *obj = &bo->tbo.base;
- struct ttm_dma_tt *ttm;
resv = bo->tbo.base.resv;
if (obj->import_attach && bo_va->is_xgmi) {
@@ -1803,10 +1759,8 @@ int amdgpu_vm_bo_update(struct amdgpu_device *adev, struct amdgpu_bo_va *bo_va,
}
mem = &bo->tbo.mem;
nodes = mem->mm_node;
- if (mem->mem_type == TTM_PL_TT) {
- ttm = container_of(bo->tbo.ttm, struct ttm_dma_tt, ttm);
- pages_addr = ttm->dma_address;
- }
+ if (mem->mem_type == TTM_PL_TT)
+ pages_addr = bo->tbo.ttm->dma_address;
}
if (bo) {
@@ -1835,9 +1789,26 @@ int amdgpu_vm_bo_update(struct amdgpu_device *adev, struct amdgpu_bo_va *bo_va,
}
list_for_each_entry(mapping, &bo_va->invalids, list) {
- r = amdgpu_vm_bo_split_mapping(adev, resv, pages_addr, vm,
- mapping, flags, bo_adev, nodes,
- last_update);
+ uint64_t update_flags = flags;
+
+ /* normally,bo_va->flags only contians READABLE and WIRTEABLE bit go here
+ * but in case of something, we filter the flags in first place
+ */
+ if (!(mapping->flags & AMDGPU_PTE_READABLE))
+ update_flags &= ~AMDGPU_PTE_READABLE;
+ if (!(mapping->flags & AMDGPU_PTE_WRITEABLE))
+ update_flags &= ~AMDGPU_PTE_WRITEABLE;
+
+ /* Apply ASIC specific mapping flags */
+ amdgpu_gmc_get_vm_pte(adev, mapping, &update_flags);
+
+ trace_amdgpu_vm_bo_update(mapping);
+
+ r = amdgpu_vm_bo_update_mapping(adev, bo_adev, vm, false, false,
+ resv, mapping->start,
+ mapping->last, update_flags,
+ mapping->offset, nodes,
+ pages_addr, last_update);
if (r)
return r;
}
@@ -2045,9 +2016,10 @@ int amdgpu_vm_clear_freed(struct amdgpu_device *adev,
mapping->start < AMDGPU_GMC_HOLE_START)
init_pte_value = AMDGPU_PTE_DEFAULT_ATC;
- r = amdgpu_vm_bo_update_mapping(adev, vm, false, false, resv,
- mapping->start, mapping->last,
- init_pte_value, 0, NULL, &f);
+ r = amdgpu_vm_bo_update_mapping(adev, adev, vm, false, false,
+ resv, mapping->start,
+ mapping->last, init_pte_value,
+ 0, NULL, NULL, &f);
amdgpu_vm_free_mapping(adev, vm, mapping, f);
if (r) {
dma_fence_put(f);
@@ -2166,7 +2138,7 @@ struct amdgpu_bo_va *amdgpu_vm_bo_add(struct amdgpu_device *adev,
/**
- * amdgpu_vm_bo_insert_mapping - insert a new mapping
+ * amdgpu_vm_bo_insert_map - insert a new mapping
*
* @adev: amdgpu_device pointer
* @bo_va: bo_va to store the address
@@ -2823,7 +2795,7 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm,
INIT_LIST_HEAD(&vm->invalidated);
spin_lock_init(&vm->invalidated_lock);
INIT_LIST_HEAD(&vm->freed);
-
+ INIT_LIST_HEAD(&vm->done);
/* create scheduler entities for page table updates */
r = drm_sched_entity_init(&vm->immediate, DRM_SCHED_PRIORITY_NORMAL,
@@ -3375,8 +3347,9 @@ bool amdgpu_vm_handle_fault(struct amdgpu_device *adev, u32 pasid,
value = 0;
}
- r = amdgpu_vm_bo_update_mapping(adev, vm, true, false, NULL, addr,
- addr + 1, flags, value, NULL, NULL);
+ r = amdgpu_vm_bo_update_mapping(adev, adev, vm, true, false, NULL, addr,
+ addr, flags, value, NULL, NULL,
+ NULL);
if (r)
goto error_unlock;
@@ -3392,3 +3365,99 @@ error_unref:
return false;
}
+
+#if defined(CONFIG_DEBUG_FS)
+/**
+ * amdgpu_debugfs_vm_bo_info - print BO info for the VM
+ *
+ * @vm: Requested VM for printing BO info
+ * @m: debugfs file
+ *
+ * Print BO information in debugfs file for the VM
+ */
+void amdgpu_debugfs_vm_bo_info(struct amdgpu_vm *vm, struct seq_file *m)
+{
+ struct amdgpu_bo_va *bo_va, *tmp;
+ u64 total_idle = 0;
+ u64 total_evicted = 0;
+ u64 total_relocated = 0;
+ u64 total_moved = 0;
+ u64 total_invalidated = 0;
+ u64 total_done = 0;
+ unsigned int total_idle_objs = 0;
+ unsigned int total_evicted_objs = 0;
+ unsigned int total_relocated_objs = 0;
+ unsigned int total_moved_objs = 0;
+ unsigned int total_invalidated_objs = 0;
+ unsigned int total_done_objs = 0;
+ unsigned int id = 0;
+
+ seq_puts(m, "\tIdle BOs:\n");
+ list_for_each_entry_safe(bo_va, tmp, &vm->idle, base.vm_status) {
+ if (!bo_va->base.bo)
+ continue;
+ total_idle += amdgpu_bo_print_info(id++, bo_va->base.bo, m);
+ }
+ total_idle_objs = id;
+ id = 0;
+
+ seq_puts(m, "\tEvicted BOs:\n");
+ list_for_each_entry_safe(bo_va, tmp, &vm->evicted, base.vm_status) {
+ if (!bo_va->base.bo)
+ continue;
+ total_evicted += amdgpu_bo_print_info(id++, bo_va->base.bo, m);
+ }
+ total_evicted_objs = id;
+ id = 0;
+
+ seq_puts(m, "\tRelocated BOs:\n");
+ list_for_each_entry_safe(bo_va, tmp, &vm->relocated, base.vm_status) {
+ if (!bo_va->base.bo)
+ continue;
+ total_relocated += amdgpu_bo_print_info(id++, bo_va->base.bo, m);
+ }
+ total_relocated_objs = id;
+ id = 0;
+
+ seq_puts(m, "\tMoved BOs:\n");
+ list_for_each_entry_safe(bo_va, tmp, &vm->moved, base.vm_status) {
+ if (!bo_va->base.bo)
+ continue;
+ total_moved += amdgpu_bo_print_info(id++, bo_va->base.bo, m);
+ }
+ total_moved_objs = id;
+ id = 0;
+
+ seq_puts(m, "\tInvalidated BOs:\n");
+ spin_lock(&vm->invalidated_lock);
+ list_for_each_entry_safe(bo_va, tmp, &vm->invalidated, base.vm_status) {
+ if (!bo_va->base.bo)
+ continue;
+ total_invalidated += amdgpu_bo_print_info(id++, bo_va->base.bo, m);
+ }
+ total_invalidated_objs = id;
+ id = 0;
+
+ seq_puts(m, "\tDone BOs:\n");
+ list_for_each_entry_safe(bo_va, tmp, &vm->done, base.vm_status) {
+ if (!bo_va->base.bo)
+ continue;
+ total_done += amdgpu_bo_print_info(id++, bo_va->base.bo, m);
+ }
+ spin_unlock(&vm->invalidated_lock);
+ total_done_objs = id;
+
+ seq_printf(m, "\tTotal idle size: %12lld\tobjs:\t%d\n", total_idle,
+ total_idle_objs);
+ seq_printf(m, "\tTotal evicted size: %12lld\tobjs:\t%d\n", total_evicted,
+ total_evicted_objs);
+ seq_printf(m, "\tTotal relocated size: %12lld\tobjs:\t%d\n", total_relocated,
+ total_relocated_objs);
+ seq_printf(m, "\tTotal moved size: %12lld\tobjs:\t%d\n", total_moved,
+ total_moved_objs);
+ seq_printf(m, "\tTotal invalidated size: %12lld\tobjs:\t%d\n", total_invalidated,
+ total_invalidated_objs);
+ seq_printf(m, "\tTotal done size: %12lld\tobjs:\t%d\n", total_done,
+ total_done_objs);
+}
+#endif