From 9296435729dc8a2fd28b42391ff9f1ff310ebb7b Mon Sep 17 00:00:00 2001
From: Christian König <christian.koenig@amd.com>
Date: Tue, 21 Aug 2018 15:09:39 +0200
Subject: drm/amdgpu: fix preamble handling
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

At this point the command submission can still be interrupted.

Signed-off-by: Christian König <christian.koenig@amd.com>
Acked-by: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 16 +++++++++-------
 1 file changed, 9 insertions(+), 7 deletions(-)

(limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c')

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
index 502b94fb116a..09703c87d676 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
@@ -1012,13 +1012,9 @@ static int amdgpu_cs_ib_fill(struct amdgpu_device *adev,
 		if (r)
 			return r;
 
-		if (chunk_ib->flags & AMDGPU_IB_FLAG_PREAMBLE) {
-			parser->job->preamble_status |= AMDGPU_PREAMBLE_IB_PRESENT;
-			if (!parser->ctx->preamble_presented) {
-				parser->job->preamble_status |= AMDGPU_PREAMBLE_IB_PRESENT_FIRST;
-				parser->ctx->preamble_presented = true;
-			}
-		}
+		if (chunk_ib->flags & AMDGPU_IB_FLAG_PREAMBLE)
+			parser->job->preamble_status |=
+				AMDGPU_PREAMBLE_IB_PRESENT;
 
 		if (parser->ring && parser->ring != ring)
 			return -EINVAL;
@@ -1241,6 +1237,12 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p,
 
 	amdgpu_cs_post_dependencies(p);
 
+	if ((job->preamble_status & AMDGPU_PREAMBLE_IB_PRESENT) &&
+	    !p->ctx->preamble_presented) {
+		job->preamble_status |= AMDGPU_PREAMBLE_IB_PRESENT_FIRST;
+		p->ctx->preamble_presented = true;
+	}
+
 	cs->out.handle = seq;
 	job->uf_sequence = seq;
 
-- 
cgit 


From 2cddc50e98193f2c4aab10d05550b5ffe7587e73 Mon Sep 17 00:00:00 2001
From: Huang Rui <ray.huang@amd.com>
Date: Mon, 13 Aug 2018 11:41:35 -0500
Subject: drm/amdgpu: move gem definitions into amdgpu_gem header
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Demangle amdgpu.h.

Signed-off-by: Huang Rui <ray.huang@amd.com>
Acked-by: Christian König <christian.koenig@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu.h       | 61 +-------------------
 drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c    |  1 +
 drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c   |  1 +
 drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c    |  1 +
 drivers/gpu/drm/amd/amdgpu/amdgpu_gem.h   | 92 +++++++++++++++++++++++++++++++
 drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c   |  1 +
 drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c |  1 +
 drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c     |  1 +
 drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c     |  1 +
 drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c     |  1 +
 drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c     |  1 +
 11 files changed, 102 insertions(+), 60 deletions(-)
 create mode 100644 drivers/gpu/drm/amd/amdgpu/amdgpu_gem.h

(limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c')

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index 0568140e38b3..7261068f9cca 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -77,6 +77,7 @@
 #include "amdgpu_debugfs.h"
 #include "amdgpu_job.h"
 #include "amdgpu_bo_list.h"
+#include "amdgpu_gem.h"
 
 /*
  * Modules parameters.
@@ -302,34 +303,6 @@ struct amdgpu_clock {
 	uint32_t max_pixel_clock;
 };
 
-/*
- * GEM.
- */
-
-#define AMDGPU_GEM_DOMAIN_MAX		0x3
-#define gem_to_amdgpu_bo(gobj) container_of((gobj), struct amdgpu_bo, gem_base)
-
-void amdgpu_gem_object_free(struct drm_gem_object *obj);
-int amdgpu_gem_object_open(struct drm_gem_object *obj,
-				struct drm_file *file_priv);
-void amdgpu_gem_object_close(struct drm_gem_object *obj,
-				struct drm_file *file_priv);
-unsigned long amdgpu_gem_timeout(uint64_t timeout_ns);
-struct sg_table *amdgpu_gem_prime_get_sg_table(struct drm_gem_object *obj);
-struct drm_gem_object *
-amdgpu_gem_prime_import_sg_table(struct drm_device *dev,
-				 struct dma_buf_attachment *attach,
-				 struct sg_table *sg);
-struct dma_buf *amdgpu_gem_prime_export(struct drm_device *dev,
-					struct drm_gem_object *gobj,
-					int flags);
-struct drm_gem_object *amdgpu_gem_prime_import(struct drm_device *dev,
-					    struct dma_buf *dma_buf);
-struct reservation_object *amdgpu_gem_prime_res_obj(struct drm_gem_object *);
-void *amdgpu_gem_prime_vmap(struct drm_gem_object *obj);
-void amdgpu_gem_prime_vunmap(struct drm_gem_object *obj, void *vaddr);
-int amdgpu_gem_prime_mmap(struct drm_gem_object *obj, struct vm_area_struct *vma);
-
 /* sub-allocation manager, it has to be protected by another lock.
  * By conception this is an helper for other part of the driver
  * like the indirect buffer or semaphore, which both have their
@@ -379,22 +352,6 @@ struct amdgpu_sa_bo {
 	struct dma_fence	        *fence;
 };
 
-/*
- * GEM objects.
- */
-void amdgpu_gem_force_release(struct amdgpu_device *adev);
-int amdgpu_gem_object_create(struct amdgpu_device *adev, unsigned long size,
-			     int alignment, u32 initial_domain,
-			     u64 flags, enum ttm_bo_type type,
-			     struct reservation_object *resv,
-			     struct drm_gem_object **obj);
-
-int amdgpu_mode_dumb_create(struct drm_file *file_priv,
-			    struct drm_device *dev,
-			    struct drm_mode_create_dumb *args);
-int amdgpu_mode_dumb_mmap(struct drm_file *filp,
-			  struct drm_device *dev,
-			  uint32_t handle, uint64_t *offset_p);
 int amdgpu_fence_slab_init(void);
 void amdgpu_fence_slab_fini(void);
 
@@ -791,23 +748,9 @@ struct amdgpu_asic_funcs {
 /*
  * IOCTL.
  */
-int amdgpu_gem_create_ioctl(struct drm_device *dev, void *data,
-			    struct drm_file *filp);
 int amdgpu_bo_list_ioctl(struct drm_device *dev, void *data,
 				struct drm_file *filp);
 
-int amdgpu_gem_info_ioctl(struct drm_device *dev, void *data,
-			  struct drm_file *filp);
-int amdgpu_gem_userptr_ioctl(struct drm_device *dev, void *data,
-			struct drm_file *filp);
-int amdgpu_gem_mmap_ioctl(struct drm_device *dev, void *data,
-			  struct drm_file *filp);
-int amdgpu_gem_wait_idle_ioctl(struct drm_device *dev, void *data,
-			      struct drm_file *filp);
-int amdgpu_gem_va_ioctl(struct drm_device *dev, void *data,
-			  struct drm_file *filp);
-int amdgpu_gem_op_ioctl(struct drm_device *dev, void *data,
-			struct drm_file *filp);
 int amdgpu_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp);
 int amdgpu_cs_fence_to_handle_ioctl(struct drm_device *dev, void *data,
 				    struct drm_file *filp);
@@ -815,8 +758,6 @@ int amdgpu_cs_wait_ioctl(struct drm_device *dev, void *data, struct drm_file *fi
 int amdgpu_cs_wait_fences_ioctl(struct drm_device *dev, void *data,
 				struct drm_file *filp);
 
-int amdgpu_gem_metadata_ioctl(struct drm_device *dev, void *data,
-				struct drm_file *filp);
 
 /* VRAM scratch page for HDP bug, default vram page */
 struct amdgpu_vram_scratch {
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
index 09703c87d676..dc3b2f980d87 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
@@ -32,6 +32,7 @@
 #include "amdgpu.h"
 #include "amdgpu_trace.h"
 #include "amdgpu_gmc.h"
+#include "amdgpu_gem.h"
 
 static int amdgpu_cs_user_fence_chunk(struct amdgpu_cs_parser *p,
 				      struct drm_amdgpu_cs_chunk_fence *data,
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
index 8843a06360fa..75c9433ef300 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
@@ -36,6 +36,7 @@
 
 #include "amdgpu.h"
 #include "amdgpu_irq.h"
+#include "amdgpu_gem.h"
 
 #include "amdgpu_amdkfd.h"
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c
index 69c5d22f29bd..5cbde74b97dd 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c
@@ -33,6 +33,7 @@
 #include <drm/amdgpu_drm.h>
 #include "amdgpu.h"
 #include "cikd.h"
+#include "amdgpu_gem.h"
 
 #include <drm/drm_fb_helper.h>
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.h
new file mode 100644
index 000000000000..d63daba9b17c
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.h
@@ -0,0 +1,92 @@
+/*
+ * Copyright 2018 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+#ifndef __AMDGPU_GEM_H__
+#define __AMDGPU_GEM_H__
+
+#include <drm/amdgpu_drm.h>
+#include <drm/drm_gem.h>
+
+/*
+ * GEM.
+ */
+
+#define AMDGPU_GEM_DOMAIN_MAX		0x3
+#define gem_to_amdgpu_bo(gobj) container_of((gobj), struct amdgpu_bo, gem_base)
+
+void amdgpu_gem_object_free(struct drm_gem_object *obj);
+int amdgpu_gem_object_open(struct drm_gem_object *obj,
+				struct drm_file *file_priv);
+void amdgpu_gem_object_close(struct drm_gem_object *obj,
+				struct drm_file *file_priv);
+unsigned long amdgpu_gem_timeout(uint64_t timeout_ns);
+struct sg_table *amdgpu_gem_prime_get_sg_table(struct drm_gem_object *obj);
+struct drm_gem_object *
+amdgpu_gem_prime_import_sg_table(struct drm_device *dev,
+				 struct dma_buf_attachment *attach,
+				 struct sg_table *sg);
+struct dma_buf *amdgpu_gem_prime_export(struct drm_device *dev,
+					struct drm_gem_object *gobj,
+					int flags);
+struct drm_gem_object *amdgpu_gem_prime_import(struct drm_device *dev,
+					    struct dma_buf *dma_buf);
+struct reservation_object *amdgpu_gem_prime_res_obj(struct drm_gem_object *);
+void *amdgpu_gem_prime_vmap(struct drm_gem_object *obj);
+void amdgpu_gem_prime_vunmap(struct drm_gem_object *obj, void *vaddr);
+int amdgpu_gem_prime_mmap(struct drm_gem_object *obj, struct vm_area_struct *vma);
+
+/*
+ * GEM objects.
+ */
+void amdgpu_gem_force_release(struct amdgpu_device *adev);
+int amdgpu_gem_object_create(struct amdgpu_device *adev, unsigned long size,
+			     int alignment, u32 initial_domain,
+			     u64 flags, enum ttm_bo_type type,
+			     struct reservation_object *resv,
+			     struct drm_gem_object **obj);
+
+int amdgpu_mode_dumb_create(struct drm_file *file_priv,
+			    struct drm_device *dev,
+			    struct drm_mode_create_dumb *args);
+int amdgpu_mode_dumb_mmap(struct drm_file *filp,
+			  struct drm_device *dev,
+			  uint32_t handle, uint64_t *offset_p);
+
+int amdgpu_gem_create_ioctl(struct drm_device *dev, void *data,
+			    struct drm_file *filp);
+int amdgpu_gem_info_ioctl(struct drm_device *dev, void *data,
+			  struct drm_file *filp);
+int amdgpu_gem_userptr_ioctl(struct drm_device *dev, void *data,
+			struct drm_file *filp);
+int amdgpu_gem_mmap_ioctl(struct drm_device *dev, void *data,
+			  struct drm_file *filp);
+int amdgpu_gem_wait_idle_ioctl(struct drm_device *dev, void *data,
+			      struct drm_file *filp);
+int amdgpu_gem_va_ioctl(struct drm_device *dev, void *data,
+			  struct drm_file *filp);
+int amdgpu_gem_op_ioctl(struct drm_device *dev, void *data,
+			struct drm_file *filp);
+
+int amdgpu_gem_metadata_ioctl(struct drm_device *dev, void *data,
+				struct drm_file *filp);
+
+#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
index bd98cc5fb97b..20645ea719b3 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
@@ -37,6 +37,7 @@
 #include <linux/slab.h>
 #include <linux/pm_runtime.h>
 #include "amdgpu_amdkfd.h"
+#include "amdgpu_gem.h"
 
 /**
  * amdgpu_driver_unload_kms - Main unload function for KMS.
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c
index 1c5d97f4b4dd..2686297e34e0 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c
@@ -35,6 +35,7 @@
 
 #include "amdgpu.h"
 #include "amdgpu_display.h"
+#include "amdgpu_gem.h"
 #include <drm/amdgpu_drm.h>
 #include <linux/dma-buf.h>
 
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c
index ad151fefa41f..0a0a4dcbea2c 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c
@@ -26,6 +26,7 @@
 #include "amdgpu.h"
 #include "gmc_v6_0.h"
 #include "amdgpu_ucode.h"
+#include "amdgpu_gem.h"
 
 #include "bif/bif_3_0_d.h"
 #include "bif/bif_3_0_sh_mask.h"
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c
index f8d8a3a73e42..93ea19456e91 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c
@@ -29,6 +29,7 @@
 #include "gmc_v7_0.h"
 #include "amdgpu_ucode.h"
 #include "amdgpu_amdkfd.h"
+#include "amdgpu_gem.h"
 
 #include "bif/bif_4_1_d.h"
 #include "bif/bif_4_1_sh_mask.h"
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c
index 9333109b210d..24dd86725b6e 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c
@@ -27,6 +27,7 @@
 #include "gmc_v8_0.h"
 #include "amdgpu_ucode.h"
 #include "amdgpu_amdkfd.h"
+#include "amdgpu_gem.h"
 
 #include "gmc/gmc_8_1_d.h"
 #include "gmc/gmc_8_1_sh_mask.h"
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
index 72f8018fa2a8..7300be4816a9 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
@@ -25,6 +25,7 @@
 #include "amdgpu.h"
 #include "gmc_v9_0.h"
 #include "amdgpu_atomfirmware.h"
+#include "amdgpu_gem.h"
 
 #include "hdp/hdp_4_0_offset.h"
 #include "hdp/hdp_4_0_sh_mask.h"
-- 
cgit 


From 869a53d4d7d7976d039b9389aa90b6f3d29ed234 Mon Sep 17 00:00:00 2001
From: Christian König <christian.koenig@amd.com>
Date: Mon, 16 Jul 2018 15:19:20 +0200
Subject: drm/amdgpu: remove the queue manager
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Not needed any more since that is now done by the scheduler.

Signed-off-by: Christian König <christian.koenig@amd.com>
Reviewed-by: Chunming Zhou <david1.zhou@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/Makefile           |   3 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu.h           |  27 +--
 drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c        |  22 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c       |  67 +++++-
 drivers/gpu/drm/amd/amdgpu/amdgpu_queue_mgr.c | 316 --------------------------
 5 files changed, 75 insertions(+), 360 deletions(-)
 delete mode 100644 drivers/gpu/drm/amd/amdgpu/amdgpu_queue_mgr.c

(limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c')

diff --git a/drivers/gpu/drm/amd/amdgpu/Makefile b/drivers/gpu/drm/amd/amdgpu/Makefile
index 7d7faaf299ef..860cb8731c7c 100644
--- a/drivers/gpu/drm/amd/amdgpu/Makefile
+++ b/drivers/gpu/drm/amd/amdgpu/Makefile
@@ -51,8 +51,7 @@ amdgpu-y += amdgpu_device.o amdgpu_kms.o \
 	amdgpu_prime.o amdgpu_vm.o amdgpu_ib.o amdgpu_pll.o \
 	amdgpu_ucode.o amdgpu_bo_list.o amdgpu_ctx.o amdgpu_sync.o \
 	amdgpu_gtt_mgr.o amdgpu_vram_mgr.o amdgpu_virt.o amdgpu_atomfirmware.o \
-	amdgpu_queue_mgr.o amdgpu_vf_error.o amdgpu_sched.o amdgpu_debugfs.o \
-	amdgpu_ids.o
+	amdgpu_vf_error.o amdgpu_sched.o amdgpu_debugfs.o amdgpu_ids.o
 
 # add asic specific block
 amdgpu-$(CONFIG_DRM_AMDGPU_CIK)+= cik.o cik_ih.o kv_smc.o kv_dpm.o \
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index 07924d41ee89..20e81df5cd94 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -477,29 +477,6 @@ struct amdgpu_ib {
 
 extern const struct drm_sched_backend_ops amdgpu_sched_ops;
 
-/*
- * Queue manager
- */
-struct amdgpu_queue_mapper {
-	int 		hw_ip;
-	struct mutex	lock;
-	/* protected by lock */
-	struct amdgpu_ring *queue_map[AMDGPU_MAX_RINGS];
-};
-
-struct amdgpu_queue_mgr {
-	struct amdgpu_queue_mapper mapper[AMDGPU_MAX_IP_NUM];
-};
-
-int amdgpu_queue_mgr_init(struct amdgpu_device *adev,
-			  struct amdgpu_queue_mgr *mgr);
-int amdgpu_queue_mgr_fini(struct amdgpu_device *adev,
-			  struct amdgpu_queue_mgr *mgr);
-int amdgpu_queue_mgr_map(struct amdgpu_device *adev,
-			 struct amdgpu_queue_mgr *mgr,
-			 u32 hw_ip, u32 instance, u32 ring,
-			 struct amdgpu_ring **out_ring);
-
 /*
  * context related structures
  */
@@ -513,7 +490,6 @@ struct amdgpu_ctx_ring {
 struct amdgpu_ctx {
 	struct kref		refcount;
 	struct amdgpu_device    *adev;
-	struct amdgpu_queue_mgr queue_mgr;
 	unsigned		reset_counter;
 	unsigned        reset_counter_query;
 	uint32_t		vram_lost_counter;
@@ -537,6 +513,9 @@ struct amdgpu_ctx_mgr {
 struct amdgpu_ctx *amdgpu_ctx_get(struct amdgpu_fpriv *fpriv, uint32_t id);
 int amdgpu_ctx_put(struct amdgpu_ctx *ctx);
 
+int amdgpu_ctx_get_ring(struct amdgpu_ctx *ctx,
+			u32 hw_ip, u32 instance, u32 ring,
+			struct amdgpu_ring **out_ring);
 int amdgpu_ctx_add_fence(struct amdgpu_ctx *ctx, struct amdgpu_ring *ring,
 			      struct dma_fence *fence, uint64_t *seq);
 struct dma_fence *amdgpu_ctx_get_fence(struct amdgpu_ctx *ctx,
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
index dc3b2f980d87..55667ab4fbf5 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
@@ -1008,8 +1008,9 @@ static int amdgpu_cs_ib_fill(struct amdgpu_device *adev,
 				return -EINVAL;
 		}
 
-		r = amdgpu_queue_mgr_map(adev, &parser->ctx->queue_mgr, chunk_ib->ip_type,
-					 chunk_ib->ip_instance, chunk_ib->ring, &ring);
+		r = amdgpu_ctx_get_ring(parser->ctx, chunk_ib->ip_type,
+					chunk_ib->ip_instance, chunk_ib->ring,
+					&ring);
 		if (r)
 			return r;
 
@@ -1067,10 +1068,9 @@ static int amdgpu_cs_process_fence_dep(struct amdgpu_cs_parser *p,
 		if (ctx == NULL)
 			return -EINVAL;
 
-		r = amdgpu_queue_mgr_map(p->adev, &ctx->queue_mgr,
-					 deps[i].ip_type,
-					 deps[i].ip_instance,
-					 deps[i].ring, &ring);
+		r = amdgpu_ctx_get_ring(ctx, deps[i].ip_type,
+					deps[i].ip_instance,
+					deps[i].ring, &ring);
 		if (r) {
 			amdgpu_ctx_put(ctx);
 			return r;
@@ -1331,7 +1331,6 @@ int amdgpu_cs_wait_ioctl(struct drm_device *dev, void *data,
 			 struct drm_file *filp)
 {
 	union drm_amdgpu_wait_cs *wait = data;
-	struct amdgpu_device *adev = dev->dev_private;
 	unsigned long timeout = amdgpu_gem_timeout(wait->in.timeout);
 	struct amdgpu_ring *ring = NULL;
 	struct amdgpu_ctx *ctx;
@@ -1342,9 +1341,8 @@ int amdgpu_cs_wait_ioctl(struct drm_device *dev, void *data,
 	if (ctx == NULL)
 		return -EINVAL;
 
-	r = amdgpu_queue_mgr_map(adev, &ctx->queue_mgr,
-				 wait->in.ip_type, wait->in.ip_instance,
-				 wait->in.ring, &ring);
+	r = amdgpu_ctx_get_ring(ctx, wait->in.ip_type, wait->in.ip_instance,
+				wait->in.ring, &ring);
 	if (r) {
 		amdgpu_ctx_put(ctx);
 		return r;
@@ -1391,8 +1389,8 @@ static struct dma_fence *amdgpu_cs_get_fence(struct amdgpu_device *adev,
 	if (ctx == NULL)
 		return ERR_PTR(-EINVAL);
 
-	r = amdgpu_queue_mgr_map(adev, &ctx->queue_mgr, user->ip_type,
-				 user->ip_instance, user->ring, &ring);
+	r = amdgpu_ctx_get_ring(ctx, user->ip_type, user->ip_instance,
+				user->ring, &ring);
 	if (r) {
 		amdgpu_ctx_put(ctx);
 		return ERR_PTR(r);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
index a078e68e0319..e5acc72b05d2 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
@@ -121,10 +121,6 @@ static int amdgpu_ctx_init(struct amdgpu_device *adev,
 			goto failed;
 	}
 
-	r = amdgpu_queue_mgr_init(adev, &ctx->queue_mgr);
-	if (r)
-		goto failed;
-
 	return 0;
 
 failed:
@@ -150,13 +146,72 @@ static void amdgpu_ctx_fini(struct kref *ref)
 	kfree(ctx->fences);
 	ctx->fences = NULL;
 
-	amdgpu_queue_mgr_fini(adev, &ctx->queue_mgr);
-
 	mutex_destroy(&ctx->lock);
 
 	kfree(ctx);
 }
 
+int amdgpu_ctx_get_ring(struct amdgpu_ctx *ctx,
+			u32 hw_ip, u32 instance, u32 ring,
+			struct amdgpu_ring **out_ring)
+{
+	struct amdgpu_device *adev = ctx->adev;
+	unsigned num_rings = 0;
+
+	/* Right now all IPs have only one instance - multiple rings. */
+	if (instance != 0) {
+		DRM_DEBUG("invalid ip instance: %d\n", instance);
+		return -EINVAL;
+	}
+
+	switch (hw_ip) {
+	case AMDGPU_HW_IP_GFX:
+		*out_ring = &adev->gfx.gfx_ring[ring];
+		num_rings = adev->gfx.num_gfx_rings;
+		break;
+	case AMDGPU_HW_IP_COMPUTE:
+		*out_ring = &adev->gfx.compute_ring[ring];
+		num_rings = adev->gfx.num_compute_rings;
+		break;
+	case AMDGPU_HW_IP_DMA:
+		*out_ring = &adev->sdma.instance[ring].ring;
+		num_rings = adev->sdma.num_instances;
+		break;
+	case AMDGPU_HW_IP_UVD:
+		*out_ring = &adev->uvd.inst[0].ring;
+		num_rings = adev->uvd.num_uvd_inst;
+		break;
+	case AMDGPU_HW_IP_VCE:
+		*out_ring = &adev->vce.ring[ring];
+		num_rings = adev->vce.num_rings;
+		break;
+	case AMDGPU_HW_IP_UVD_ENC:
+		*out_ring = &adev->uvd.inst[0].ring_enc[ring];
+		num_rings = adev->uvd.num_enc_rings;
+		break;
+	case AMDGPU_HW_IP_VCN_DEC:
+		*out_ring = &adev->vcn.ring_dec;
+		num_rings = 1;
+		break;
+	case AMDGPU_HW_IP_VCN_ENC:
+		*out_ring = &adev->vcn.ring_enc[ring];
+		num_rings = adev->vcn.num_enc_rings;
+		break;
+	case AMDGPU_HW_IP_VCN_JPEG:
+		*out_ring = &adev->vcn.ring_jpeg;
+		num_rings = 1;
+		break;
+	default:
+		DRM_ERROR("unknown HW IP type: %d\n", hw_ip);
+		return -EINVAL;
+	}
+
+	if (ring > num_rings)
+		return -EINVAL;
+
+	return 0;
+}
+
 static int amdgpu_ctx_alloc(struct amdgpu_device *adev,
 			    struct amdgpu_fpriv *fpriv,
 			    struct drm_file *filp,
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_queue_mgr.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_queue_mgr.c
deleted file mode 100644
index a172bba32b45..000000000000
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_queue_mgr.c
+++ /dev/null
@@ -1,316 +0,0 @@
-/*
- * Copyright 2017 Valve Corporation
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
- * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
- * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
- * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
- * OTHER DEALINGS IN THE SOFTWARE.
- *
- * Authors: Andres Rodriguez
- */
-
-#include "amdgpu.h"
-#include "amdgpu_ring.h"
-
-static int amdgpu_queue_mapper_init(struct amdgpu_queue_mapper *mapper,
-				    int hw_ip)
-{
-	if (!mapper)
-		return -EINVAL;
-
-	if (hw_ip > AMDGPU_MAX_IP_NUM)
-		return -EINVAL;
-
-	mapper->hw_ip = hw_ip;
-	mutex_init(&mapper->lock);
-
-	memset(mapper->queue_map, 0, sizeof(mapper->queue_map));
-
-	return 0;
-}
-
-static struct amdgpu_ring *amdgpu_get_cached_map(struct amdgpu_queue_mapper *mapper,
-					  int ring)
-{
-	return mapper->queue_map[ring];
-}
-
-static int amdgpu_update_cached_map(struct amdgpu_queue_mapper *mapper,
-			     int ring, struct amdgpu_ring *pring)
-{
-	if (WARN_ON(mapper->queue_map[ring])) {
-		DRM_ERROR("Un-expected ring re-map\n");
-		return -EINVAL;
-	}
-
-	mapper->queue_map[ring] = pring;
-
-	return 0;
-}
-
-static int amdgpu_identity_map(struct amdgpu_device *adev,
-			       struct amdgpu_queue_mapper *mapper,
-			       u32 ring,
-			       struct amdgpu_ring **out_ring)
-{
-	switch (mapper->hw_ip) {
-	case AMDGPU_HW_IP_GFX:
-		*out_ring = &adev->gfx.gfx_ring[ring];
-		break;
-	case AMDGPU_HW_IP_COMPUTE:
-		*out_ring = &adev->gfx.compute_ring[ring];
-		break;
-	case AMDGPU_HW_IP_DMA:
-		*out_ring = &adev->sdma.instance[ring].ring;
-		break;
-	case AMDGPU_HW_IP_UVD:
-		*out_ring = &adev->uvd.inst[0].ring;
-		break;
-	case AMDGPU_HW_IP_VCE:
-		*out_ring = &adev->vce.ring[ring];
-		break;
-	case AMDGPU_HW_IP_UVD_ENC:
-		*out_ring = &adev->uvd.inst[0].ring_enc[ring];
-		break;
-	case AMDGPU_HW_IP_VCN_DEC:
-		*out_ring = &adev->vcn.ring_dec;
-		break;
-	case AMDGPU_HW_IP_VCN_ENC:
-		*out_ring = &adev->vcn.ring_enc[ring];
-		break;
-	case AMDGPU_HW_IP_VCN_JPEG:
-		*out_ring = &adev->vcn.ring_jpeg;
-		break;
-	default:
-		*out_ring = NULL;
-		DRM_ERROR("unknown HW IP type: %d\n", mapper->hw_ip);
-		return -EINVAL;
-	}
-
-	return amdgpu_update_cached_map(mapper, ring, *out_ring);
-}
-
-static enum amdgpu_ring_type amdgpu_hw_ip_to_ring_type(int hw_ip)
-{
-	switch (hw_ip) {
-	case AMDGPU_HW_IP_GFX:
-		return AMDGPU_RING_TYPE_GFX;
-	case AMDGPU_HW_IP_COMPUTE:
-		return AMDGPU_RING_TYPE_COMPUTE;
-	case AMDGPU_HW_IP_DMA:
-		return AMDGPU_RING_TYPE_SDMA;
-	case AMDGPU_HW_IP_UVD:
-		return AMDGPU_RING_TYPE_UVD;
-	case AMDGPU_HW_IP_VCE:
-		return AMDGPU_RING_TYPE_VCE;
-	default:
-		DRM_ERROR("Invalid HW IP specified %d\n", hw_ip);
-		return -1;
-	}
-}
-
-static int amdgpu_lru_map(struct amdgpu_device *adev,
-			  struct amdgpu_queue_mapper *mapper,
-			  u32 user_ring, bool lru_pipe_order,
-			  struct amdgpu_ring **out_ring)
-{
-	int r, i, j;
-	int ring_type = amdgpu_hw_ip_to_ring_type(mapper->hw_ip);
-	int ring_blacklist[AMDGPU_MAX_RINGS];
-	struct amdgpu_ring *ring;
-
-	/* 0 is a valid ring index, so initialize to -1 */
-	memset(ring_blacklist, 0xff, sizeof(ring_blacklist));
-
-	for (i = 0, j = 0; i < AMDGPU_MAX_RINGS; i++) {
-		ring = mapper->queue_map[i];
-		if (ring)
-			ring_blacklist[j++] = ring->idx;
-	}
-
-	r = amdgpu_ring_lru_get(adev, ring_type, ring_blacklist,
-				j, lru_pipe_order, out_ring);
-	if (r)
-		return r;
-
-	return amdgpu_update_cached_map(mapper, user_ring, *out_ring);
-}
-
-/**
- * amdgpu_queue_mgr_init - init an amdgpu_queue_mgr struct
- *
- * @adev: amdgpu_device pointer
- * @mgr: amdgpu_queue_mgr structure holding queue information
- *
- * Initialize the the selected @mgr (all asics).
- *
- * Returns 0 on success, error on failure.
- */
-int amdgpu_queue_mgr_init(struct amdgpu_device *adev,
-			  struct amdgpu_queue_mgr *mgr)
-{
-	int i, r;
-
-	if (!adev || !mgr)
-		return -EINVAL;
-
-	memset(mgr, 0, sizeof(*mgr));
-
-	for (i = 0; i < AMDGPU_MAX_IP_NUM; ++i) {
-		r = amdgpu_queue_mapper_init(&mgr->mapper[i], i);
-		if (r)
-			return r;
-	}
-
-	return 0;
-}
-
-/**
- * amdgpu_queue_mgr_fini - de-initialize an amdgpu_queue_mgr struct
- *
- * @adev: amdgpu_device pointer
- * @mgr: amdgpu_queue_mgr structure holding queue information
- *
- * De-initialize the the selected @mgr (all asics).
- *
- * Returns 0 on success, error on failure.
- */
-int amdgpu_queue_mgr_fini(struct amdgpu_device *adev,
-			  struct amdgpu_queue_mgr *mgr)
-{
-	return 0;
-}
-
-/**
- * amdgpu_queue_mgr_map - Map a userspace ring id to an amdgpu_ring
- *
- * @adev: amdgpu_device pointer
- * @mgr: amdgpu_queue_mgr structure holding queue information
- * @hw_ip: HW IP enum
- * @instance: HW instance
- * @ring: user ring id
- * @our_ring: pointer to mapped amdgpu_ring
- *
- * Map a userspace ring id to an appropriate kernel ring. Different
- * policies are configurable at a HW IP level.
- *
- * Returns 0 on success, error on failure.
- */
-int amdgpu_queue_mgr_map(struct amdgpu_device *adev,
-			 struct amdgpu_queue_mgr *mgr,
-			 u32 hw_ip, u32 instance, u32 ring,
-			 struct amdgpu_ring **out_ring)
-{
-	int i, r, ip_num_rings = 0;
-	struct amdgpu_queue_mapper *mapper = &mgr->mapper[hw_ip];
-
-	if (!adev || !mgr || !out_ring)
-		return -EINVAL;
-
-	if (hw_ip >= AMDGPU_MAX_IP_NUM)
-		return -EINVAL;
-
-	if (ring >= AMDGPU_MAX_RINGS)
-		return -EINVAL;
-
-	/* Right now all IPs have only one instance - multiple rings. */
-	if (instance != 0) {
-		DRM_DEBUG("invalid ip instance: %d\n", instance);
-		return -EINVAL;
-	}
-
-	switch (hw_ip) {
-	case AMDGPU_HW_IP_GFX:
-		ip_num_rings = adev->gfx.num_gfx_rings;
-		break;
-	case AMDGPU_HW_IP_COMPUTE:
-		ip_num_rings = adev->gfx.num_compute_rings;
-		break;
-	case AMDGPU_HW_IP_DMA:
-		ip_num_rings = adev->sdma.num_instances;
-		break;
-	case AMDGPU_HW_IP_UVD:
-		for (i = 0; i < adev->uvd.num_uvd_inst; i++) {
-			if (!(adev->uvd.harvest_config & (1 << i)))
-				ip_num_rings++;
-		}
-		break;
-	case AMDGPU_HW_IP_VCE:
-		ip_num_rings = adev->vce.num_rings;
-		break;
-	case AMDGPU_HW_IP_UVD_ENC:
-		for (i = 0; i < adev->uvd.num_uvd_inst; i++) {
-			if (!(adev->uvd.harvest_config & (1 << i)))
-				ip_num_rings++;
-		}
-		ip_num_rings =
-			adev->uvd.num_enc_rings * ip_num_rings;
-		break;
-	case AMDGPU_HW_IP_VCN_DEC:
-		ip_num_rings = 1;
-		break;
-	case AMDGPU_HW_IP_VCN_ENC:
-		ip_num_rings = adev->vcn.num_enc_rings;
-		break;
-	case AMDGPU_HW_IP_VCN_JPEG:
-		ip_num_rings = 1;
-		break;
-	default:
-		DRM_DEBUG("unknown ip type: %d\n", hw_ip);
-		return -EINVAL;
-	}
-
-	if (ring >= ip_num_rings) {
-		DRM_DEBUG("Ring index:%d exceeds maximum:%d for ip:%d\n",
-			  ring, ip_num_rings, hw_ip);
-		return -EINVAL;
-	}
-
-	mutex_lock(&mapper->lock);
-
-	*out_ring = amdgpu_get_cached_map(mapper, ring);
-	if (*out_ring) {
-		/* cache hit */
-		r = 0;
-		goto out_unlock;
-	}
-
-	switch (mapper->hw_ip) {
-	case AMDGPU_HW_IP_GFX:
-	case AMDGPU_HW_IP_UVD:
-	case AMDGPU_HW_IP_VCE:
-	case AMDGPU_HW_IP_UVD_ENC:
-	case AMDGPU_HW_IP_VCN_DEC:
-	case AMDGPU_HW_IP_VCN_ENC:
-	case AMDGPU_HW_IP_VCN_JPEG:
-		r = amdgpu_identity_map(adev, mapper, ring, out_ring);
-		break;
-	case AMDGPU_HW_IP_DMA:
-		r = amdgpu_lru_map(adev, mapper, ring, false, out_ring);
-		break;
-	case AMDGPU_HW_IP_COMPUTE:
-		r = amdgpu_lru_map(adev, mapper, ring, true, out_ring);
-		break;
-	default:
-		*out_ring = NULL;
-		r = -EINVAL;
-		DRM_DEBUG("unknown HW IP type: %d\n", mapper->hw_ip);
-	}
-
-out_unlock:
-	mutex_unlock(&mapper->lock);
-	return r;
-}
-- 
cgit 


From 0d346a14c634120046d194377e2cb5b387a6c1c6 Mon Sep 17 00:00:00 2001
From: Christian König <christian.koenig@amd.com>
Date: Thu, 19 Jul 2018 14:22:25 +0200
Subject: drm/amdgpu: use entity instead of ring for CS
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Further demangle ring from entity handling.

Signed-off-by: Christian König <christian.koenig@amd.com>
Reviewed-by: Chunming Zhou <david1.zhou@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu.h       |  2 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c    | 66 ++++++++++++++++---------------
 drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c   | 53 ++++++++++++++-----------
 drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h   | 16 ++++----
 drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h |  4 +-
 drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c     |  3 +-
 6 files changed, 78 insertions(+), 66 deletions(-)

(limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c')

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index 50eeb7c1350e..6265b88135fc 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -523,7 +523,7 @@ struct amdgpu_cs_parser {
 
 	/* scheduler job object */
 	struct amdgpu_job	*job;
-	struct amdgpu_ring	*ring;
+	struct drm_sched_entity	*entity;
 
 	/* buffer objects */
 	struct ww_acquire_ctx		ticket;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
index 55667ab4fbf5..313ac971eaaf 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
@@ -893,13 +893,13 @@ static int amdgpu_bo_vm_update_pte(struct amdgpu_cs_parser *p)
 static int amdgpu_cs_ib_vm_chunk(struct amdgpu_device *adev,
 				 struct amdgpu_cs_parser *p)
 {
+	struct amdgpu_ring *ring = to_amdgpu_ring(p->entity->rq->sched);
 	struct amdgpu_fpriv *fpriv = p->filp->driver_priv;
 	struct amdgpu_vm *vm = &fpriv->vm;
-	struct amdgpu_ring *ring = p->ring;
 	int r;
 
 	/* Only for UVD/VCE VM emulation */
-	if (p->ring->funcs->parse_cs || p->ring->funcs->patch_cs_in_place) {
+	if (ring->funcs->parse_cs || ring->funcs->patch_cs_in_place) {
 		unsigned i, j;
 
 		for (i = 0, j = 0; i < p->nchunks && j < p->job->num_ibs; i++) {
@@ -940,7 +940,7 @@ static int amdgpu_cs_ib_vm_chunk(struct amdgpu_device *adev,
 			offset = m->start * AMDGPU_GPU_PAGE_SIZE;
 			kptr += va_start - offset;
 
-			if (p->ring->funcs->parse_cs) {
+			if (ring->funcs->parse_cs) {
 				memcpy(ib->ptr, kptr, chunk_ib->ib_bytes);
 				amdgpu_bo_kunmap(aobj);
 
@@ -979,14 +979,15 @@ static int amdgpu_cs_ib_fill(struct amdgpu_device *adev,
 {
 	struct amdgpu_fpriv *fpriv = parser->filp->driver_priv;
 	struct amdgpu_vm *vm = &fpriv->vm;
-	int i, j;
 	int r, ce_preempt = 0, de_preempt = 0;
+	struct amdgpu_ring *ring;
+	int i, j;
 
 	for (i = 0, j = 0; i < parser->nchunks && j < parser->job->num_ibs; i++) {
 		struct amdgpu_cs_chunk *chunk;
 		struct amdgpu_ib *ib;
 		struct drm_amdgpu_cs_chunk_ib *chunk_ib;
-		struct amdgpu_ring *ring;
+		struct drm_sched_entity *entity;
 
 		chunk = &parser->chunks[i];
 		ib = &parser->job->ibs[j];
@@ -1008,9 +1009,9 @@ static int amdgpu_cs_ib_fill(struct amdgpu_device *adev,
 				return -EINVAL;
 		}
 
-		r = amdgpu_ctx_get_ring(parser->ctx, chunk_ib->ip_type,
-					chunk_ib->ip_instance, chunk_ib->ring,
-					&ring);
+		r = amdgpu_ctx_get_entity(parser->ctx, chunk_ib->ip_type,
+					  chunk_ib->ip_instance, chunk_ib->ring,
+					  &entity);
 		if (r)
 			return r;
 
@@ -1018,14 +1019,14 @@ static int amdgpu_cs_ib_fill(struct amdgpu_device *adev,
 			parser->job->preamble_status |=
 				AMDGPU_PREAMBLE_IB_PRESENT;
 
-		if (parser->ring && parser->ring != ring)
+		if (parser->entity && parser->entity != entity)
 			return -EINVAL;
 
-		parser->ring = ring;
+		parser->entity = entity;
 
-		r =  amdgpu_ib_get(adev, vm,
-					ring->funcs->parse_cs ? chunk_ib->ib_bytes : 0,
-					ib);
+		ring = to_amdgpu_ring(entity->rq->sched);
+		r =  amdgpu_ib_get(adev, vm, ring->funcs->parse_cs ?
+				   chunk_ib->ib_bytes : 0, ib);
 		if (r) {
 			DRM_ERROR("Failed to get ib !\n");
 			return r;
@@ -1039,12 +1040,13 @@ static int amdgpu_cs_ib_fill(struct amdgpu_device *adev,
 	}
 
 	/* UVD & VCE fw doesn't support user fences */
+	ring = to_amdgpu_ring(parser->entity->rq->sched);
 	if (parser->job->uf_addr && (
-	    parser->ring->funcs->type == AMDGPU_RING_TYPE_UVD ||
-	    parser->ring->funcs->type == AMDGPU_RING_TYPE_VCE))
+	    ring->funcs->type == AMDGPU_RING_TYPE_UVD ||
+	    ring->funcs->type == AMDGPU_RING_TYPE_VCE))
 		return -EINVAL;
 
-	return amdgpu_ctx_wait_prev_fence(parser->ctx, parser->ring->idx);
+	return amdgpu_ctx_wait_prev_fence(parser->ctx, parser->entity);
 }
 
 static int amdgpu_cs_process_fence_dep(struct amdgpu_cs_parser *p,
@@ -1060,23 +1062,23 @@ static int amdgpu_cs_process_fence_dep(struct amdgpu_cs_parser *p,
 		sizeof(struct drm_amdgpu_cs_chunk_dep);
 
 	for (i = 0; i < num_deps; ++i) {
-		struct amdgpu_ring *ring;
 		struct amdgpu_ctx *ctx;
+		struct drm_sched_entity *entity;
 		struct dma_fence *fence;
 
 		ctx = amdgpu_ctx_get(fpriv, deps[i].ctx_id);
 		if (ctx == NULL)
 			return -EINVAL;
 
-		r = amdgpu_ctx_get_ring(ctx, deps[i].ip_type,
-					deps[i].ip_instance,
-					deps[i].ring, &ring);
+		r = amdgpu_ctx_get_entity(ctx, deps[i].ip_type,
+					  deps[i].ip_instance,
+					  deps[i].ring, &entity);
 		if (r) {
 			amdgpu_ctx_put(ctx);
 			return r;
 		}
 
-		fence = amdgpu_ctx_get_fence(ctx, ring,
+		fence = amdgpu_ctx_get_fence(ctx, entity,
 					     deps[i].handle);
 		if (IS_ERR(fence)) {
 			r = PTR_ERR(fence);
@@ -1195,9 +1197,9 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p,
 			    union drm_amdgpu_cs *cs)
 {
 	struct amdgpu_fpriv *fpriv = p->filp->driver_priv;
-	struct amdgpu_ring *ring = p->ring;
-	struct drm_sched_entity *entity = &p->ctx->rings[ring->idx].entity;
+	struct drm_sched_entity *entity = p->entity;
 	enum drm_sched_priority priority;
+	struct amdgpu_ring *ring;
 	struct amdgpu_bo_list_entry *e;
 	struct amdgpu_job *job;
 	uint64_t seq;
@@ -1227,7 +1229,7 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p,
 	job->owner = p->filp;
 	p->fence = dma_fence_get(&job->base.s_fence->finished);
 
-	r = amdgpu_ctx_add_fence(p->ctx, ring, p->fence, &seq);
+	r = amdgpu_ctx_add_fence(p->ctx, entity, p->fence, &seq);
 	if (r) {
 		dma_fence_put(p->fence);
 		dma_fence_put(&job->base.s_fence->finished);
@@ -1332,7 +1334,7 @@ int amdgpu_cs_wait_ioctl(struct drm_device *dev, void *data,
 {
 	union drm_amdgpu_wait_cs *wait = data;
 	unsigned long timeout = amdgpu_gem_timeout(wait->in.timeout);
-	struct amdgpu_ring *ring = NULL;
+	struct drm_sched_entity *entity;
 	struct amdgpu_ctx *ctx;
 	struct dma_fence *fence;
 	long r;
@@ -1341,14 +1343,14 @@ int amdgpu_cs_wait_ioctl(struct drm_device *dev, void *data,
 	if (ctx == NULL)
 		return -EINVAL;
 
-	r = amdgpu_ctx_get_ring(ctx, wait->in.ip_type, wait->in.ip_instance,
-				wait->in.ring, &ring);
+	r = amdgpu_ctx_get_entity(ctx, wait->in.ip_type, wait->in.ip_instance,
+				  wait->in.ring, &entity);
 	if (r) {
 		amdgpu_ctx_put(ctx);
 		return r;
 	}
 
-	fence = amdgpu_ctx_get_fence(ctx, ring, wait->in.handle);
+	fence = amdgpu_ctx_get_fence(ctx, entity, wait->in.handle);
 	if (IS_ERR(fence))
 		r = PTR_ERR(fence);
 	else if (fence) {
@@ -1380,7 +1382,7 @@ static struct dma_fence *amdgpu_cs_get_fence(struct amdgpu_device *adev,
 					     struct drm_file *filp,
 					     struct drm_amdgpu_fence *user)
 {
-	struct amdgpu_ring *ring;
+	struct drm_sched_entity *entity;
 	struct amdgpu_ctx *ctx;
 	struct dma_fence *fence;
 	int r;
@@ -1389,14 +1391,14 @@ static struct dma_fence *amdgpu_cs_get_fence(struct amdgpu_device *adev,
 	if (ctx == NULL)
 		return ERR_PTR(-EINVAL);
 
-	r = amdgpu_ctx_get_ring(ctx, user->ip_type, user->ip_instance,
-				user->ring, &ring);
+	r = amdgpu_ctx_get_entity(ctx, user->ip_type, user->ip_instance,
+				  user->ring, &entity);
 	if (r) {
 		amdgpu_ctx_put(ctx);
 		return ERR_PTR(r);
 	}
 
-	fence = amdgpu_ctx_get_fence(ctx, ring, user->seq_no);
+	fence = amdgpu_ctx_get_fence(ctx, entity, user->seq_no);
 	amdgpu_ctx_put(ctx);
 
 	return fence;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
index e5acc72b05d2..0a6cd1202ee5 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
@@ -27,6 +27,9 @@
 #include "amdgpu.h"
 #include "amdgpu_sched.h"
 
+#define to_amdgpu_ctx_ring(e)	\
+	container_of((e), struct amdgpu_ctx_ring, entity)
+
 static int amdgpu_ctx_priority_permit(struct drm_file *filp,
 				      enum drm_sched_priority priority)
 {
@@ -151,12 +154,12 @@ static void amdgpu_ctx_fini(struct kref *ref)
 	kfree(ctx);
 }
 
-int amdgpu_ctx_get_ring(struct amdgpu_ctx *ctx,
-			u32 hw_ip, u32 instance, u32 ring,
-			struct amdgpu_ring **out_ring)
+int amdgpu_ctx_get_entity(struct amdgpu_ctx *ctx, u32 hw_ip, u32 instance,
+			  u32 ring, struct drm_sched_entity **entity)
 {
 	struct amdgpu_device *adev = ctx->adev;
 	unsigned num_rings = 0;
+	struct amdgpu_ring *out_ring;
 
 	/* Right now all IPs have only one instance - multiple rings. */
 	if (instance != 0) {
@@ -166,39 +169,39 @@ int amdgpu_ctx_get_ring(struct amdgpu_ctx *ctx,
 
 	switch (hw_ip) {
 	case AMDGPU_HW_IP_GFX:
-		*out_ring = &adev->gfx.gfx_ring[ring];
+		out_ring = &adev->gfx.gfx_ring[ring];
 		num_rings = adev->gfx.num_gfx_rings;
 		break;
 	case AMDGPU_HW_IP_COMPUTE:
-		*out_ring = &adev->gfx.compute_ring[ring];
+		out_ring = &adev->gfx.compute_ring[ring];
 		num_rings = adev->gfx.num_compute_rings;
 		break;
 	case AMDGPU_HW_IP_DMA:
-		*out_ring = &adev->sdma.instance[ring].ring;
+		out_ring = &adev->sdma.instance[ring].ring;
 		num_rings = adev->sdma.num_instances;
 		break;
 	case AMDGPU_HW_IP_UVD:
-		*out_ring = &adev->uvd.inst[0].ring;
+		out_ring = &adev->uvd.inst[0].ring;
 		num_rings = adev->uvd.num_uvd_inst;
 		break;
 	case AMDGPU_HW_IP_VCE:
-		*out_ring = &adev->vce.ring[ring];
+		out_ring = &adev->vce.ring[ring];
 		num_rings = adev->vce.num_rings;
 		break;
 	case AMDGPU_HW_IP_UVD_ENC:
-		*out_ring = &adev->uvd.inst[0].ring_enc[ring];
+		out_ring = &adev->uvd.inst[0].ring_enc[ring];
 		num_rings = adev->uvd.num_enc_rings;
 		break;
 	case AMDGPU_HW_IP_VCN_DEC:
-		*out_ring = &adev->vcn.ring_dec;
+		out_ring = &adev->vcn.ring_dec;
 		num_rings = 1;
 		break;
 	case AMDGPU_HW_IP_VCN_ENC:
-		*out_ring = &adev->vcn.ring_enc[ring];
+		out_ring = &adev->vcn.ring_enc[ring];
 		num_rings = adev->vcn.num_enc_rings;
 		break;
 	case AMDGPU_HW_IP_VCN_JPEG:
-		*out_ring = &adev->vcn.ring_jpeg;
+		out_ring = &adev->vcn.ring_jpeg;
 		num_rings = 1;
 		break;
 	default:
@@ -209,6 +212,7 @@ int amdgpu_ctx_get_ring(struct amdgpu_ctx *ctx,
 	if (ring > num_rings)
 		return -EINVAL;
 
+	*entity = &ctx->rings[out_ring->idx].entity;
 	return 0;
 }
 
@@ -414,13 +418,14 @@ int amdgpu_ctx_put(struct amdgpu_ctx *ctx)
 	return 0;
 }
 
-int amdgpu_ctx_add_fence(struct amdgpu_ctx *ctx, struct amdgpu_ring *ring,
-			      struct dma_fence *fence, uint64_t* handler)
+int amdgpu_ctx_add_fence(struct amdgpu_ctx *ctx,
+			 struct drm_sched_entity *entity,
+			 struct dma_fence *fence, uint64_t* handle)
 {
-	struct amdgpu_ctx_ring *cring = & ctx->rings[ring->idx];
+	struct amdgpu_ctx_ring *cring = to_amdgpu_ctx_ring(entity);
 	uint64_t seq = cring->sequence;
-	unsigned idx = 0;
 	struct dma_fence *other = NULL;
+	unsigned idx = 0;
 
 	idx = seq & (amdgpu_sched_jobs - 1);
 	other = cring->fences[idx];
@@ -435,22 +440,23 @@ int amdgpu_ctx_add_fence(struct amdgpu_ctx *ctx, struct amdgpu_ring *ring,
 	spin_unlock(&ctx->ring_lock);
 
 	dma_fence_put(other);
-	if (handler)
-		*handler = seq;
+	if (handle)
+		*handle = seq;
 
 	return 0;
 }
 
 struct dma_fence *amdgpu_ctx_get_fence(struct amdgpu_ctx *ctx,
-				       struct amdgpu_ring *ring, uint64_t seq)
+				       struct drm_sched_entity *entity,
+				       uint64_t seq)
 {
-	struct amdgpu_ctx_ring *cring = & ctx->rings[ring->idx];
+	struct amdgpu_ctx_ring *cring = to_amdgpu_ctx_ring(entity);
 	struct dma_fence *fence;
 
 	spin_lock(&ctx->ring_lock);
 
 	if (seq == ~0ull)
-		seq = ctx->rings[ring->idx].sequence - 1;
+		seq = cring->sequence - 1;
 
 	if (seq >= cring->sequence) {
 		spin_unlock(&ctx->ring_lock);
@@ -494,9 +500,10 @@ void amdgpu_ctx_priority_override(struct amdgpu_ctx *ctx,
 	}
 }
 
-int amdgpu_ctx_wait_prev_fence(struct amdgpu_ctx *ctx, unsigned ring_id)
+int amdgpu_ctx_wait_prev_fence(struct amdgpu_ctx *ctx,
+			       struct drm_sched_entity *entity)
 {
-	struct amdgpu_ctx_ring *cring = &ctx->rings[ring_id];
+	struct amdgpu_ctx_ring *cring = to_amdgpu_ctx_ring(entity);
 	unsigned idx = cring->sequence & (amdgpu_sched_jobs - 1);
 	struct dma_fence *other = cring->fences[idx];
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h
index 5664b1f54142..609f925b076c 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h
@@ -61,20 +61,22 @@ struct amdgpu_ctx_mgr {
 struct amdgpu_ctx *amdgpu_ctx_get(struct amdgpu_fpriv *fpriv, uint32_t id);
 int amdgpu_ctx_put(struct amdgpu_ctx *ctx);
 
-int amdgpu_ctx_get_ring(struct amdgpu_ctx *ctx,
-			u32 hw_ip, u32 instance, u32 ring,
-			struct amdgpu_ring **out_ring);
-int amdgpu_ctx_add_fence(struct amdgpu_ctx *ctx, struct amdgpu_ring *ring,
-			      struct dma_fence *fence, uint64_t *seq);
+int amdgpu_ctx_get_entity(struct amdgpu_ctx *ctx, u32 hw_ip, u32 instance,
+			  u32 ring, struct drm_sched_entity **entity);
+int amdgpu_ctx_add_fence(struct amdgpu_ctx *ctx,
+			 struct drm_sched_entity *entity,
+			 struct dma_fence *fence, uint64_t *seq);
 struct dma_fence *amdgpu_ctx_get_fence(struct amdgpu_ctx *ctx,
-				   struct amdgpu_ring *ring, uint64_t seq);
+				       struct drm_sched_entity *entity,
+				       uint64_t seq);
 void amdgpu_ctx_priority_override(struct amdgpu_ctx *ctx,
 				  enum drm_sched_priority priority);
 
 int amdgpu_ctx_ioctl(struct drm_device *dev, void *data,
 		     struct drm_file *filp);
 
-int amdgpu_ctx_wait_prev_fence(struct amdgpu_ctx *ctx, unsigned ring_id);
+int amdgpu_ctx_wait_prev_fence(struct amdgpu_ctx *ctx,
+			       struct drm_sched_entity *entity);
 
 void amdgpu_ctx_mgr_init(struct amdgpu_ctx_mgr *mgr);
 void amdgpu_ctx_mgr_entity_fini(struct amdgpu_ctx_mgr *mgr);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h
index 8c2dab20eb36..2e87414422f9 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h
@@ -150,10 +150,10 @@ TRACE_EVENT(amdgpu_cs,
 
 	    TP_fast_assign(
 			   __entry->bo_list = p->bo_list;
-			   __entry->ring = p->ring->idx;
+			   __entry->ring = to_amdgpu_ring(p->entity->rq->sched)->idx;
 			   __entry->dw = p->job->ibs[i].length_dw;
 			   __entry->fences = amdgpu_fence_count_emitted(
-				p->ring);
+				to_amdgpu_ring(p->entity->rq->sched));
 			   ),
 	    TP_printk("bo_list=%p, ring=%u, dw=%u, fences=%u",
 		      __entry->bo_list, __entry->ring, __entry->dw,
diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c b/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c
index 9b7f8469bc5c..e33425513a89 100644
--- a/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c
@@ -1264,11 +1264,12 @@ static int uvd_v7_0_ring_test_ring(struct amdgpu_ring *ring)
 static int uvd_v7_0_ring_patch_cs_in_place(struct amdgpu_cs_parser *p,
 					   uint32_t ib_idx)
 {
+	struct amdgpu_ring *ring = to_amdgpu_ring(p->entity->rq->sched);
 	struct amdgpu_ib *ib = &p->job->ibs[ib_idx];
 	unsigned i;
 
 	/* No patching necessary for the first instance */
-	if (!p->ring->me)
+	if (!ring->me)
 		return 0;
 
 	for (i = 0; i < ib->length_dw; i += 2) {
-- 
cgit 


From 9a02ece43ee49efdfad19a3ca90c02d20f491031 Mon Sep 17 00:00:00 2001
From: Christian König <christian.koenig@amd.com>
Date: Fri, 17 Aug 2018 15:07:13 +0200
Subject: drm/amdgpu: cleanup VM handling in the CS a bit
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Add a helper function for getting the root PD addr and cleanup join the
two VM related functions and cleanup the function name.

No functional change.

Signed-off-by: Christian König <christian.koenig@amd.com>
Reviewed-by: Huang Rui <ray.huang@amd.com>
Reviewed-by: Junwei Zhang <Jerry.Zhang@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 160 +++++++++++++++------------------
 1 file changed, 74 insertions(+), 86 deletions(-)

(limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c')

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
index 313ac971eaaf..5b70a30967ec 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
@@ -804,8 +804,9 @@ static void amdgpu_cs_parser_fini(struct amdgpu_cs_parser *parser, int error,
 	amdgpu_bo_unref(&parser->uf_entry.robj);
 }
 
-static int amdgpu_bo_vm_update_pte(struct amdgpu_cs_parser *p)
+static int amdgpu_cs_vm_handling(struct amdgpu_cs_parser *p)
 {
+	struct amdgpu_ring *ring = to_amdgpu_ring(p->entity->rq->sched);
 	struct amdgpu_fpriv *fpriv = p->filp->driver_priv;
 	struct amdgpu_device *adev = p->adev;
 	struct amdgpu_vm *vm = &fpriv->vm;
@@ -814,6 +815,71 @@ static int amdgpu_bo_vm_update_pte(struct amdgpu_cs_parser *p)
 	struct amdgpu_bo *bo;
 	int r;
 
+	/* Only for UVD/VCE VM emulation */
+	if (ring->funcs->parse_cs || ring->funcs->patch_cs_in_place) {
+		unsigned i, j;
+
+		for (i = 0, j = 0; i < p->nchunks && j < p->job->num_ibs; i++) {
+			struct drm_amdgpu_cs_chunk_ib *chunk_ib;
+			struct amdgpu_bo_va_mapping *m;
+			struct amdgpu_bo *aobj = NULL;
+			struct amdgpu_cs_chunk *chunk;
+			uint64_t offset, va_start;
+			struct amdgpu_ib *ib;
+			uint8_t *kptr;
+
+			chunk = &p->chunks[i];
+			ib = &p->job->ibs[j];
+			chunk_ib = chunk->kdata;
+
+			if (chunk->chunk_id != AMDGPU_CHUNK_ID_IB)
+				continue;
+
+			va_start = chunk_ib->va_start & AMDGPU_VA_HOLE_MASK;
+			r = amdgpu_cs_find_mapping(p, va_start, &aobj, &m);
+			if (r) {
+				DRM_ERROR("IB va_start is invalid\n");
+				return r;
+			}
+
+			if ((va_start + chunk_ib->ib_bytes) >
+			    (m->last + 1) * AMDGPU_GPU_PAGE_SIZE) {
+				DRM_ERROR("IB va_start+ib_bytes is invalid\n");
+				return -EINVAL;
+			}
+
+			/* the IB should be reserved at this point */
+			r = amdgpu_bo_kmap(aobj, (void **)&kptr);
+			if (r) {
+				return r;
+			}
+
+			offset = m->start * AMDGPU_GPU_PAGE_SIZE;
+			kptr += va_start - offset;
+
+			if (ring->funcs->parse_cs) {
+				memcpy(ib->ptr, kptr, chunk_ib->ib_bytes);
+				amdgpu_bo_kunmap(aobj);
+
+				r = amdgpu_ring_parse_cs(ring, p, j);
+				if (r)
+					return r;
+			} else {
+				ib->ptr = (uint32_t *)kptr;
+				r = amdgpu_ring_patch_cs_in_place(ring, p, j);
+				amdgpu_bo_kunmap(aobj);
+				if (r)
+					return r;
+			}
+
+			j++;
+		}
+	}
+
+	if (!p->job->vm)
+		return amdgpu_cs_sync_rings(p);
+
+
 	r = amdgpu_vm_clear_freed(adev, vm, NULL);
 	if (r)
 		return r;
@@ -876,6 +942,12 @@ static int amdgpu_bo_vm_update_pte(struct amdgpu_cs_parser *p)
 	if (r)
 		return r;
 
+	r = reservation_object_reserve_shared(vm->root.base.bo->tbo.resv);
+	if (r)
+		return r;
+
+	p->job->vm_pd_addr = amdgpu_bo_gpu_offset(vm->root.base.bo);
+
 	if (amdgpu_vm_debug) {
 		/* Invalidate all BOs to test for userspace bugs */
 		amdgpu_bo_list_for_each_entry(e, p->bo_list) {
@@ -887,90 +959,6 @@ static int amdgpu_bo_vm_update_pte(struct amdgpu_cs_parser *p)
 		}
 	}
 
-	return r;
-}
-
-static int amdgpu_cs_ib_vm_chunk(struct amdgpu_device *adev,
-				 struct amdgpu_cs_parser *p)
-{
-	struct amdgpu_ring *ring = to_amdgpu_ring(p->entity->rq->sched);
-	struct amdgpu_fpriv *fpriv = p->filp->driver_priv;
-	struct amdgpu_vm *vm = &fpriv->vm;
-	int r;
-
-	/* Only for UVD/VCE VM emulation */
-	if (ring->funcs->parse_cs || ring->funcs->patch_cs_in_place) {
-		unsigned i, j;
-
-		for (i = 0, j = 0; i < p->nchunks && j < p->job->num_ibs; i++) {
-			struct drm_amdgpu_cs_chunk_ib *chunk_ib;
-			struct amdgpu_bo_va_mapping *m;
-			struct amdgpu_bo *aobj = NULL;
-			struct amdgpu_cs_chunk *chunk;
-			uint64_t offset, va_start;
-			struct amdgpu_ib *ib;
-			uint8_t *kptr;
-
-			chunk = &p->chunks[i];
-			ib = &p->job->ibs[j];
-			chunk_ib = chunk->kdata;
-
-			if (chunk->chunk_id != AMDGPU_CHUNK_ID_IB)
-				continue;
-
-			va_start = chunk_ib->va_start & AMDGPU_VA_HOLE_MASK;
-			r = amdgpu_cs_find_mapping(p, va_start, &aobj, &m);
-			if (r) {
-				DRM_ERROR("IB va_start is invalid\n");
-				return r;
-			}
-
-			if ((va_start + chunk_ib->ib_bytes) >
-			    (m->last + 1) * AMDGPU_GPU_PAGE_SIZE) {
-				DRM_ERROR("IB va_start+ib_bytes is invalid\n");
-				return -EINVAL;
-			}
-
-			/* the IB should be reserved at this point */
-			r = amdgpu_bo_kmap(aobj, (void **)&kptr);
-			if (r) {
-				return r;
-			}
-
-			offset = m->start * AMDGPU_GPU_PAGE_SIZE;
-			kptr += va_start - offset;
-
-			if (ring->funcs->parse_cs) {
-				memcpy(ib->ptr, kptr, chunk_ib->ib_bytes);
-				amdgpu_bo_kunmap(aobj);
-
-				r = amdgpu_ring_parse_cs(ring, p, j);
-				if (r)
-					return r;
-			} else {
-				ib->ptr = (uint32_t *)kptr;
-				r = amdgpu_ring_patch_cs_in_place(ring, p, j);
-				amdgpu_bo_kunmap(aobj);
-				if (r)
-					return r;
-			}
-
-			j++;
-		}
-	}
-
-	if (p->job->vm) {
-		p->job->vm_pd_addr = amdgpu_bo_gpu_offset(vm->root.base.bo);
-
-		r = amdgpu_bo_vm_update_pte(p);
-		if (r)
-			return r;
-
-		r = reservation_object_reserve_shared(vm->root.base.bo->tbo.resv);
-		if (r)
-			return r;
-	}
-
 	return amdgpu_cs_sync_rings(p);
 }
 
@@ -1309,7 +1297,7 @@ int amdgpu_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
 	for (i = 0; i < parser.job->num_ibs; i++)
 		trace_amdgpu_cs(&parser, i);
 
-	r = amdgpu_cs_ib_vm_chunk(adev, &parser);
+	r = amdgpu_cs_vm_handling(&parser);
 	if (r)
 		goto out;
 
-- 
cgit 


From 11c3a249ff7a1c710011bd06a451956f2a40c30c Mon Sep 17 00:00:00 2001
From: Christian König <christian.koenig@amd.com>
Date: Wed, 22 Aug 2018 12:22:14 +0200
Subject: drm/amdgpu: add amdgpu_gmc_pd_addr helper
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Add a helper to get the root PD address and remove the workarounds from
the GMC9 code for that.

Signed-off-by: Christian König <christian.koenig@amd.com>
Acked-by: Felix Kuehling <Felix.Kuehling@amd.com>
Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/Makefile              |  3 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c |  5 +--
 drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c           |  2 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c          | 47 ++++++++++++++++++++++++
 drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h          |  2 +
 drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c          |  2 +-
 drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c         |  7 +---
 drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c            |  4 --
 drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c          |  7 +---
 9 files changed, 56 insertions(+), 23 deletions(-)
 create mode 100644 drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c

(limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c')

diff --git a/drivers/gpu/drm/amd/amdgpu/Makefile b/drivers/gpu/drm/amd/amdgpu/Makefile
index 860cb8731c7c..d2bafabe585d 100644
--- a/drivers/gpu/drm/amd/amdgpu/Makefile
+++ b/drivers/gpu/drm/amd/amdgpu/Makefile
@@ -51,7 +51,8 @@ amdgpu-y += amdgpu_device.o amdgpu_kms.o \
 	amdgpu_prime.o amdgpu_vm.o amdgpu_ib.o amdgpu_pll.o \
 	amdgpu_ucode.o amdgpu_bo_list.o amdgpu_ctx.o amdgpu_sync.o \
 	amdgpu_gtt_mgr.o amdgpu_vram_mgr.o amdgpu_virt.o amdgpu_atomfirmware.o \
-	amdgpu_vf_error.o amdgpu_sched.o amdgpu_debugfs.o amdgpu_ids.o
+	amdgpu_vf_error.o amdgpu_sched.o amdgpu_debugfs.o amdgpu_ids.o \
+	amdgpu_gmc.o
 
 # add asic specific block
 amdgpu-$(CONFIG_DRM_AMDGPU_CIK)+= cik.o cik_ih.o kv_smc.o kv_dpm.o \
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
index f92597c292fe..2ef6e8557b65 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
@@ -364,7 +364,6 @@ static int vm_validate_pt_pd_bos(struct amdgpu_vm *vm)
 	struct amdgpu_bo *pd = vm->root.base.bo;
 	struct amdgpu_device *adev = amdgpu_ttm_adev(pd->tbo.bdev);
 	struct amdgpu_vm_parser param;
-	uint64_t addr, flags = AMDGPU_PTE_VALID;
 	int ret;
 
 	param.domain = AMDGPU_GEM_DOMAIN_VRAM;
@@ -383,9 +382,7 @@ static int vm_validate_pt_pd_bos(struct amdgpu_vm *vm)
 		return ret;
 	}
 
-	addr = amdgpu_bo_gpu_offset(vm->root.base.bo);
-	amdgpu_gmc_get_vm_pde(adev, -1, &addr, &flags);
-	vm->pd_phys_addr = addr;
+	vm->pd_phys_addr = amdgpu_gmc_pd_addr(vm->root.base.bo);
 
 	if (vm->use_cpu_for_update) {
 		ret = amdgpu_bo_kmap(pd, NULL);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
index 5b70a30967ec..fd3902983195 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
@@ -946,7 +946,7 @@ static int amdgpu_cs_vm_handling(struct amdgpu_cs_parser *p)
 	if (r)
 		return r;
 
-	p->job->vm_pd_addr = amdgpu_bo_gpu_offset(vm->root.base.bo);
+	p->job->vm_pd_addr = amdgpu_gmc_pd_addr(vm->root.base.bo);
 
 	if (amdgpu_vm_debug) {
 		/* Invalidate all BOs to test for userspace bugs */
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
new file mode 100644
index 000000000000..36058feac64f
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
@@ -0,0 +1,47 @@
+/*
+ * Copyright 2018 Advanced Micro Devices, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ */
+
+#include "amdgpu.h"
+
+/**
+ * amdgpu_gmc_pd_addr - return the address of the root directory
+ *
+ */
+uint64_t amdgpu_gmc_pd_addr(struct amdgpu_bo *bo)
+{
+	struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
+	uint64_t pd_addr;
+
+	pd_addr = amdgpu_bo_gpu_offset(bo);
+	/* TODO: move that into ASIC specific code */
+	if (adev->asic_type >= CHIP_VEGA10) {
+		uint64_t flags = AMDGPU_PTE_VALID;
+
+		amdgpu_gmc_get_vm_pde(adev, -1, &pd_addr, &flags);
+		pd_addr |= flags;
+	}
+	return pd_addr;
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h
index 64391d811a82..1c6974a33467 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h
@@ -133,4 +133,6 @@ static inline bool amdgpu_gmc_vram_full_visible(struct amdgpu_gmc *gmc)
 	return (gmc->real_vram_size == gmc->visible_vram_size);
 }
 
+uint64_t amdgpu_gmc_pd_addr(struct amdgpu_bo *bo);
+
 #endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
index e7f73deed975..eb08a03b82a0 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
@@ -2049,7 +2049,7 @@ int amdgpu_copy_buffer(struct amdgpu_ring *ring, uint64_t src_offset,
 		return r;
 
 	if (vm_needs_flush) {
-		job->vm_pd_addr = amdgpu_bo_gpu_offset(adev->gart.bo);
+		job->vm_pd_addr = amdgpu_gmc_pd_addr(adev->gart.bo);
 		job->vm_needs_flush = true;
 	}
 	if (resv) {
diff --git a/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c b/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c
index 2baab7e69ef5..3403ded39d13 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c
@@ -37,12 +37,7 @@ u64 gfxhub_v1_0_get_mc_fb_offset(struct amdgpu_device *adev)
 
 static void gfxhub_v1_0_init_gart_pt_regs(struct amdgpu_device *adev)
 {
-	uint64_t value = amdgpu_bo_gpu_offset(adev->gart.bo);
-
-	BUG_ON(value & (~0x0000FFFFFFFFF000ULL));
-	value -= adev->gmc.vram_start + adev->vm_manager.vram_base_offset;
-	value &= 0x0000FFFFFFFFF000ULL;
-	value |= 0x1; /*valid bit*/
+	uint64_t value = amdgpu_gmc_pd_addr(adev->gart.bo);
 
 	WREG32_SOC15(GC, 0, mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32,
 		     lower_32_bits(value));
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
index dc48e19d01f8..a82b3eb429e8 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
@@ -429,12 +429,8 @@ static uint64_t gmc_v9_0_emit_flush_gpu_tlb(struct amdgpu_ring *ring,
 	struct amdgpu_device *adev = ring->adev;
 	struct amdgpu_vmhub *hub = &adev->vmhub[ring->funcs->vmhub];
 	uint32_t req = gmc_v9_0_get_invalidate_req(vmid);
-	uint64_t flags = AMDGPU_PTE_VALID;
 	unsigned eng = ring->vm_inv_eng;
 
-	amdgpu_gmc_get_vm_pde(adev, -1, &pd_addr, &flags);
-	pd_addr |= flags;
-
 	amdgpu_ring_emit_wreg(ring, hub->ctx0_ptb_addr_lo32 + (2 * vmid),
 			      lower_32_bits(pd_addr));
 
diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c
index 800ec4687f13..5f6a9c85488f 100644
--- a/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c
@@ -47,12 +47,7 @@ u64 mmhub_v1_0_get_fb_location(struct amdgpu_device *adev)
 
 static void mmhub_v1_0_init_gart_pt_regs(struct amdgpu_device *adev)
 {
-	uint64_t value = amdgpu_bo_gpu_offset(adev->gart.bo);
-
-	BUG_ON(value & (~0x0000FFFFFFFFF000ULL));
-	value -= adev->gmc.vram_start + adev->vm_manager.vram_base_offset;
-	value &= 0x0000FFFFFFFFF000ULL;
-	value |= 0x1; /* valid bit */
+	uint64_t value = amdgpu_gmc_pd_addr(adev->gart.bo);
 
 	WREG32_SOC15(MMHUB, 0, mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32,
 		     lower_32_bits(value));
-- 
cgit 


From f921661bd4a112f80d57bbfb3e792da63787f4b0 Mon Sep 17 00:00:00 2001
From: Huang Rui <ray.huang@amd.com>
Date: Mon, 6 Aug 2018 10:57:08 +0800
Subject: drm/amdgpu: use bulk moves for efficient VM LRU handling (v6)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

I continue to work for bulk moving that based on the proposal by Christian.

Background:
amdgpu driver will move all PD/PT and PerVM BOs into idle list. Then move all of
them on the end of LRU list one by one. Thus, that cause so many BOs moved to
the end of the LRU, and impact performance seriously.

Then Christian provided a workaround to not move PD/PT BOs on LRU with below
patch:
Commit 0bbf32026cf5ba41e9922b30e26e1bed1ecd38ae ("drm/amdgpu: band aid
validating VM PTs")

However, the final solution should bulk move all PD/PT and PerVM BOs on the LRU
instead of one by one.

Whenever amdgpu_vm_validate_pt_bos() is called and we have BOs which need to be
validated we move all BOs together to the end of the LRU without dropping the
lock for the LRU.

While doing so we note the beginning and end of this block in the LRU list.

Now when amdgpu_vm_validate_pt_bos() is called and we don't have anything to do,
we don't move every BO one by one, but instead cut the LRU list into pieces so
that we bulk move everything to the end in just one operation.

Test data:
+--------------+-----------------+-----------+---------------------------------------+
|              |The Talos        |Clpeak(OCL)|BusSpeedReadback(OCL)                  |
|              |Principle(Vulkan)|           |                                       |
+------------------------------------------------------------------------------------+
|              |                 |           |0.319 ms(1k) 0.314 ms(2K) 0.308 ms(4K) |
| Original     |  147.7 FPS      |  76.86 us |0.307 ms(8K) 0.310 ms(16K)             |
+------------------------------------------------------------------------------------+
| Orignial + WA|                 |           |0.254 ms(1K) 0.241 ms(2K)              |
|(don't move   |  162.1 FPS      |  42.15 us |0.230 ms(4K) 0.223 ms(8K) 0.204 ms(16K)|
|PT BOs on LRU)|                 |           |                                       |
+------------------------------------------------------------------------------------+
| Bulk move    |  163.1 FPS      |  40.52 us |0.244 ms(1K) 0.252 ms(2K) 0.213 ms(4K) |
|              |                 |           |0.214 ms(8K) 0.225 ms(16K)             |
+--------------+-----------------+-----------+---------------------------------------+

After test them with above three benchmarks include vulkan and opencl. We can
see the visible improvement than original, and even better than original with
workaround.

v2: move all BOs include idle, relocated, and moved list to the end of LRU and
put them together.
v3: remove unused parameter and use list_for_each_entry instead of the one with
save entry.
v4: move the amdgpu_vm_move_to_lru_tail after command submission, at that time,
all bo will be back on idle list.
v5: remove amdgpu_vm_move_to_lru_tail_by_list(), use bulk_moveable instread of
validated, and move ttm_bo_bulk_move_lru_tail() also into
amdgpu_vm_move_to_lru_tail().
v6: clean up and fix return value.

Signed-off-by: Christian König <christian.koenig@amd.com>
Signed-off-by: Huang Rui <ray.huang@amd.com>
Tested-by: Mike Lothian <mike@fireburn.co.uk>
Tested-by: Dieter Nützel <Dieter@nuetzel-hh.de>
Acked-by: Chunming Zhou <david1.zhou@amd.com>
Reviewed-by: Junwei Zhang <Jerry.Zhang@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c |  3 ++
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 66 ++++++++++++++++++++++------------
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h | 11 +++++-
 3 files changed, 57 insertions(+), 23 deletions(-)

(limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c')

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
index fd3902983195..b62bbe71662d 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
@@ -1259,6 +1259,7 @@ int amdgpu_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
 	union drm_amdgpu_cs *cs = data;
 	struct amdgpu_cs_parser parser = {};
 	bool reserved_buffers = false;
+	struct amdgpu_fpriv *fpriv;
 	int i, r;
 
 	if (!adev->accel_working)
@@ -1303,6 +1304,8 @@ int amdgpu_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
 
 	r = amdgpu_cs_submit(&parser, cs);
 
+	fpriv = filp->driver_priv;
+	amdgpu_vm_move_to_lru_tail(adev, &fpriv->vm);
 out:
 	amdgpu_cs_parser_fini(&parser, r, reserved_buffers);
 	return r;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index d12bffa5f70c..7b0fdf5c79f9 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -267,6 +267,47 @@ void amdgpu_vm_get_pd_bo(struct amdgpu_vm *vm,
 	list_add(&entry->tv.head, validated);
 }
 
+/**
+ * amdgpu_vm_move_to_lru_tail - move all BOs to the end of LRU
+ *
+ * @adev: amdgpu device pointer
+ * @vm: vm providing the BOs
+ *
+ * Move all BOs to the end of LRU and remember their positions to put them
+ * together.
+ */
+void amdgpu_vm_move_to_lru_tail(struct amdgpu_device *adev,
+				struct amdgpu_vm *vm)
+{
+	struct ttm_bo_global *glob = adev->mman.bdev.glob;
+	struct amdgpu_vm_bo_base *bo_base;
+
+	if (vm->bulk_moveable) {
+		spin_lock(&glob->lru_lock);
+		ttm_bo_bulk_move_lru_tail(&vm->lru_bulk_move);
+		spin_unlock(&glob->lru_lock);
+		return;
+	}
+
+	memset(&vm->lru_bulk_move, 0, sizeof(vm->lru_bulk_move));
+
+	spin_lock(&glob->lru_lock);
+	list_for_each_entry(bo_base, &vm->idle, vm_status) {
+		struct amdgpu_bo *bo = bo_base->bo;
+
+		if (!bo->parent)
+			continue;
+
+		ttm_bo_move_to_lru_tail(&bo->tbo, &vm->lru_bulk_move);
+		if (bo->shadow)
+			ttm_bo_move_to_lru_tail(&bo->shadow->tbo,
+						&vm->lru_bulk_move);
+	}
+	spin_unlock(&glob->lru_lock);
+
+	vm->bulk_moveable = true;
+}
+
 /**
  * amdgpu_vm_validate_pt_bos - validate the page table BOs
  *
@@ -284,10 +325,11 @@ int amdgpu_vm_validate_pt_bos(struct amdgpu_device *adev, struct amdgpu_vm *vm,
 			      int (*validate)(void *p, struct amdgpu_bo *bo),
 			      void *param)
 {
-	struct ttm_bo_global *glob = adev->mman.bdev.glob;
 	struct amdgpu_vm_bo_base *bo_base, *tmp;
 	int r = 0;
 
+	vm->bulk_moveable &= list_empty(&vm->evicted);
+
 	list_for_each_entry_safe(bo_base, tmp, &vm->evicted, vm_status) {
 		struct amdgpu_bo *bo = bo_base->bo;
 
@@ -295,14 +337,6 @@ int amdgpu_vm_validate_pt_bos(struct amdgpu_device *adev, struct amdgpu_vm *vm,
 		if (r)
 			break;
 
-		if (bo->parent) {
-			spin_lock(&glob->lru_lock);
-			ttm_bo_move_to_lru_tail(&bo->tbo, NULL);
-			if (bo->shadow)
-				ttm_bo_move_to_lru_tail(&bo->shadow->tbo, NULL);
-			spin_unlock(&glob->lru_lock);
-		}
-
 		if (bo->tbo.type != ttm_bo_type_kernel) {
 			spin_lock(&vm->moved_lock);
 			list_move(&bo_base->vm_status, &vm->moved);
@@ -312,19 +346,6 @@ int amdgpu_vm_validate_pt_bos(struct amdgpu_device *adev, struct amdgpu_vm *vm,
 		}
 	}
 
-	spin_lock(&glob->lru_lock);
-	list_for_each_entry(bo_base, &vm->idle, vm_status) {
-		struct amdgpu_bo *bo = bo_base->bo;
-
-		if (!bo->parent)
-			continue;
-
-		ttm_bo_move_to_lru_tail(&bo->tbo, NULL);
-		if (bo->shadow)
-			ttm_bo_move_to_lru_tail(&bo->shadow->tbo, NULL);
-	}
-	spin_unlock(&glob->lru_lock);
-
 	return r;
 }
 
@@ -2590,6 +2611,7 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm,
 		return r;
 
 	vm->pte_support_ats = false;
+	vm->bulk_moveable = true;
 
 	if (vm_context == AMDGPU_VM_CONTEXT_COMPUTE) {
 		vm->use_cpu_for_update = !!(adev->vm_manager.vm_update_mode &
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
index 1162c2bf3138..14bafe771c9b 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
@@ -29,6 +29,7 @@
 #include <linux/rbtree.h>
 #include <drm/gpu_scheduler.h>
 #include <drm/drm_file.h>
+#include <drm/ttm/ttm_bo_driver.h>
 
 #include "amdgpu_sync.h"
 #include "amdgpu_ring.h"
@@ -247,6 +248,11 @@ struct amdgpu_vm {
 
 	/* Some basic info about the task */
 	struct amdgpu_task_info task_info;
+
+	/* Store positions of group of BOs */
+	struct ttm_lru_bulk_move lru_bulk_move;
+	/* mark whether can do the bulk move */
+	bool			bulk_moveable;
 };
 
 struct amdgpu_vm_manager {
@@ -354,8 +360,11 @@ bool amdgpu_vm_need_pipeline_sync(struct amdgpu_ring *ring,
 void amdgpu_vm_check_compute_bug(struct amdgpu_device *adev);
 
 void amdgpu_vm_get_task_info(struct amdgpu_device *adev, unsigned int pasid,
-			 struct amdgpu_task_info *task_info);
+			     struct amdgpu_task_info *task_info);
 
 void amdgpu_vm_set_task_info(struct amdgpu_vm *vm);
 
+void amdgpu_vm_move_to_lru_tail(struct amdgpu_device *adev,
+				struct amdgpu_vm *vm);
+
 #endif
-- 
cgit 


From 85eff20020a656b2d13b33dc4681523508fee037 Mon Sep 17 00:00:00 2001
From: Christian König <christian.koenig@amd.com>
Date: Fri, 24 Aug 2018 14:23:33 +0200
Subject: drm/amdgpu: amdgpu_ctx_add_fence can't fail
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

No more waiting for a fence done here.

Signed-off-by: Christian König <christian.koenig@amd.com>
Reviewed-by: Chunming Zhou <david1.zhou@amd.com>
Reviewed-by: Junwei Zhang <Jerry.Zhang@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c  | 10 +---------
 drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c |  8 +++-----
 drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h |  6 +++---
 3 files changed, 7 insertions(+), 17 deletions(-)

(limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c')

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
index b62bbe71662d..adc6a43e2333 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
@@ -1217,15 +1217,7 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p,
 	job->owner = p->filp;
 	p->fence = dma_fence_get(&job->base.s_fence->finished);
 
-	r = amdgpu_ctx_add_fence(p->ctx, entity, p->fence, &seq);
-	if (r) {
-		dma_fence_put(p->fence);
-		dma_fence_put(&job->base.s_fence->finished);
-		amdgpu_job_free(job);
-		amdgpu_mn_unlock(p->mn);
-		return r;
-	}
-
+	amdgpu_ctx_add_fence(p->ctx, entity, p->fence, &seq);
 	amdgpu_cs_post_dependencies(p);
 
 	if ((job->preamble_status & AMDGPU_PREAMBLE_IB_PRESENT) &&
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
index 987b7f256463..f9b54236102d 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
@@ -434,9 +434,9 @@ int amdgpu_ctx_put(struct amdgpu_ctx *ctx)
 	return 0;
 }
 
-int amdgpu_ctx_add_fence(struct amdgpu_ctx *ctx,
-			 struct drm_sched_entity *entity,
-			 struct dma_fence *fence, uint64_t* handle)
+void amdgpu_ctx_add_fence(struct amdgpu_ctx *ctx,
+			  struct drm_sched_entity *entity,
+			  struct dma_fence *fence, uint64_t* handle)
 {
 	struct amdgpu_ctx_entity *centity = to_amdgpu_ctx_entity(entity);
 	uint64_t seq = centity->sequence;
@@ -458,8 +458,6 @@ int amdgpu_ctx_add_fence(struct amdgpu_ctx *ctx,
 	dma_fence_put(other);
 	if (handle)
 		*handle = seq;
-
-	return 0;
 }
 
 struct dma_fence *amdgpu_ctx_get_fence(struct amdgpu_ctx *ctx,
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h
index d67c1d285a4f..b3b012c0a7da 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h
@@ -65,9 +65,9 @@ int amdgpu_ctx_put(struct amdgpu_ctx *ctx);
 
 int amdgpu_ctx_get_entity(struct amdgpu_ctx *ctx, u32 hw_ip, u32 instance,
 			  u32 ring, struct drm_sched_entity **entity);
-int amdgpu_ctx_add_fence(struct amdgpu_ctx *ctx,
-			 struct drm_sched_entity *entity,
-			 struct dma_fence *fence, uint64_t *seq);
+void amdgpu_ctx_add_fence(struct amdgpu_ctx *ctx,
+			  struct drm_sched_entity *entity,
+			  struct dma_fence *fence, uint64_t *seq);
 struct dma_fence *amdgpu_ctx_get_fence(struct amdgpu_ctx *ctx,
 				       struct drm_sched_entity *entity,
 				       uint64_t seq);
-- 
cgit 


From 4f9ea1d0d1ed914092d9e03d87d80fa7e63ecc8f Mon Sep 17 00:00:00 2001
From: Christian König <christian.koenig@amd.com>
Date: Fri, 24 Aug 2018 14:48:02 +0200
Subject: drm/amdgpu: fix holding mn_lock while allocating memory
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

We can't hold the mn_lock while allocating memory.

Signed-off-by: Christian König <christian.koenig@amd.com>
Acked-by: Chunming Zhou <david1.zhou@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 31 +++++++++++++++++++------------
 1 file changed, 19 insertions(+), 12 deletions(-)

(limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c')

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
index adc6a43e2333..dd734970e167 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
@@ -1194,26 +1194,24 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p,
 
 	int r;
 
+	job = p->job;
+	p->job = NULL;
+
+	r = drm_sched_job_init(&job->base, entity, p->filp);
+	if (r)
+		goto error_unlock;
+
+	/* No memory allocation is allowed while holding the mn lock */
 	amdgpu_mn_lock(p->mn);
 	amdgpu_bo_list_for_each_userptr_entry(e, p->bo_list) {
 		struct amdgpu_bo *bo = e->robj;
 
 		if (amdgpu_ttm_tt_userptr_needs_pages(bo->tbo.ttm)) {
-			amdgpu_mn_unlock(p->mn);
-			return -ERESTARTSYS;
+			r = -ERESTARTSYS;
+			goto error_abort;
 		}
 	}
 
-	job = p->job;
-	p->job = NULL;
-
-	r = drm_sched_job_init(&job->base, entity, p->filp);
-	if (r) {
-		amdgpu_job_free(job);
-		amdgpu_mn_unlock(p->mn);
-		return r;
-	}
-
 	job->owner = p->filp;
 	p->fence = dma_fence_get(&job->base.s_fence->finished);
 
@@ -1243,6 +1241,15 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p,
 	amdgpu_mn_unlock(p->mn);
 
 	return 0;
+
+error_abort:
+	dma_fence_put(&job->base.s_fence->finished);
+	job->base.s_fence = NULL;
+
+error_unlock:
+	amdgpu_job_free(job);
+	amdgpu_mn_unlock(p->mn);
+	return r;
 }
 
 int amdgpu_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
-- 
cgit 


From b995795bf09b6bb7847a2a9fc8e6b5b4ab0ce20c Mon Sep 17 00:00:00 2001
From: Christian König <christian.koenig@amd.com>
Date: Thu, 30 Aug 2018 10:04:53 +0200
Subject: drm/amdgpu: fix "use bulk moves for efficient VM LRU handling" v2
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

First step to fix the LRU corruption, we accidentially tried to move things
on the LRU after dropping the lock.

Signed-off-by: Christian König <christian.koenig@amd.com>
Tested-by: Michel Dänzer <michel.daenzer@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

(limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c')

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
index dd734970e167..349dcc37ee64 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
@@ -1237,6 +1237,8 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p,
 	ring = to_amdgpu_ring(entity->rq->sched);
 	amdgpu_ring_priority_get(ring, priority);
 
+	amdgpu_vm_move_to_lru_tail(p->adev, &fpriv->vm);
+
 	ttm_eu_fence_buffer_objects(&p->ticket, &p->validated, p->fence);
 	amdgpu_mn_unlock(p->mn);
 
@@ -1258,7 +1260,6 @@ int amdgpu_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
 	union drm_amdgpu_cs *cs = data;
 	struct amdgpu_cs_parser parser = {};
 	bool reserved_buffers = false;
-	struct amdgpu_fpriv *fpriv;
 	int i, r;
 
 	if (!adev->accel_working)
@@ -1303,8 +1304,6 @@ int amdgpu_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
 
 	r = amdgpu_cs_submit(&parser, cs);
 
-	fpriv = filp->driver_priv;
-	amdgpu_vm_move_to_lru_tail(adev, &fpriv->vm);
 out:
 	amdgpu_cs_parser_fini(&parser, r, reserved_buffers);
 	return r;
-- 
cgit 


From 0a53b69cce846b42adf03ccee49ae0a37a731c20 Mon Sep 17 00:00:00 2001
From: Christian König <christian.koenig@amd.com>
Date: Mon, 3 Sep 2018 10:51:51 +0200
Subject: drm/amdgpu: fix amdgpu_mn_unlock() in the CS error path
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Avoid unlocking a lock we never locked.

Signed-off-by: Christian König <christian.koenig@amd.com>
Reviewed-by: Junwei Zhang <Jerry.Zhang@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c')

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
index 349dcc37ee64..04a2733b5ccc 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
@@ -1247,10 +1247,10 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p,
 error_abort:
 	dma_fence_put(&job->base.s_fence->finished);
 	job->base.s_fence = NULL;
+	amdgpu_mn_unlock(p->mn);
 
 error_unlock:
 	amdgpu_job_free(job);
-	amdgpu_mn_unlock(p->mn);
 	return r;
 }
 
-- 
cgit 


From ad9a5b78f585e9a9bd5ad06dfaf1269659a99f43 Mon Sep 17 00:00:00 2001
From: Christian König <christian.koenig@amd.com>
Date: Mon, 27 Aug 2018 18:22:31 +0200
Subject: drm/amdgpu: correctly sign extend 48bit addresses v3
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Correct sign extend the GMC addresses to 48bit.

v2: sign extending turned out easier than thought.
v3: clean up the defines and move them into amdgpu_gmc.h as well

Signed-off-by: Christian König <christian.koenig@amd.com>
Reviewed-by: Junwei Zhang <Jerry.Zhang@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c |  2 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c     |  2 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c    | 10 +++++-----
 drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h    | 26 ++++++++++++++++++++++++++
 drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c    |  8 ++++----
 drivers/gpu/drm/amd/amdgpu/amdgpu_object.c |  2 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c   |  6 ++----
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c     |  7 ++++---
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h     | 13 -------------
 9 files changed, 44 insertions(+), 32 deletions(-)

(limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c')

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
index 8bee9a0a1dec..db9872f83d03 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
@@ -135,7 +135,7 @@ void amdgpu_amdkfd_device_init(struct amdgpu_device *adev)
 			.num_queue_per_pipe = adev->gfx.mec.num_queue_per_pipe,
 			.gpuvm_size = min(adev->vm_manager.max_pfn
 					  << AMDGPU_GPU_PAGE_SHIFT,
-					  AMDGPU_VA_HOLE_START),
+					  AMDGPU_GMC_HOLE_START),
 			.drm_render_minor = adev->ddev->render->index
 		};
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
index 04a2733b5ccc..135d9d8c9506 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
@@ -835,7 +835,7 @@ static int amdgpu_cs_vm_handling(struct amdgpu_cs_parser *p)
 			if (chunk->chunk_id != AMDGPU_CHUNK_ID_IB)
 				continue;
 
-			va_start = chunk_ib->va_start & AMDGPU_VA_HOLE_MASK;
+			va_start = chunk_ib->va_start & AMDGPU_GMC_HOLE_MASK;
 			r = amdgpu_cs_find_mapping(p, va_start, &aobj, &m);
 			if (r) {
 				DRM_ERROR("IB va_start is invalid\n");
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
index 71792d820ae0..d30a0838851b 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
@@ -572,16 +572,16 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev, void *data,
 		return -EINVAL;
 	}
 
-	if (args->va_address >= AMDGPU_VA_HOLE_START &&
-	    args->va_address < AMDGPU_VA_HOLE_END) {
+	if (args->va_address >= AMDGPU_GMC_HOLE_START &&
+	    args->va_address < AMDGPU_GMC_HOLE_END) {
 		dev_dbg(&dev->pdev->dev,
 			"va_address 0x%LX is in VA hole 0x%LX-0x%LX\n",
-			args->va_address, AMDGPU_VA_HOLE_START,
-			AMDGPU_VA_HOLE_END);
+			args->va_address, AMDGPU_GMC_HOLE_START,
+			AMDGPU_GMC_HOLE_END);
 		return -EINVAL;
 	}
 
-	args->va_address &= AMDGPU_VA_HOLE_MASK;
+	args->va_address &= AMDGPU_GMC_HOLE_MASK;
 
 	if ((args->flags & ~valid_flags) && (args->flags & ~prt_flags)) {
 		dev_dbg(&dev->pdev->dev, "invalid flags combination 0x%08X\n",
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h
index 588a62f7aebc..d84ef1634eb2 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h
@@ -30,6 +30,19 @@
 
 #include "amdgpu_irq.h"
 
+/* VA hole for 48bit addresses on Vega10 */
+#define AMDGPU_GMC_HOLE_START	0x0000800000000000ULL
+#define AMDGPU_GMC_HOLE_END	0xffff800000000000ULL
+
+/*
+ * Hardware is programmed as if the hole doesn't exists with start and end
+ * address values.
+ *
+ * This mask is used to remove the upper 16bits of the VA and so come up with
+ * the linear addr value.
+ */
+#define AMDGPU_GMC_HOLE_MASK	0x0000ffffffffffffULL
+
 struct firmware;
 
 /*
@@ -133,6 +146,19 @@ static inline bool amdgpu_gmc_vram_full_visible(struct amdgpu_gmc *gmc)
 	return (gmc->real_vram_size == gmc->visible_vram_size);
 }
 
+/**
+ * amdgpu_gmc_sign_extend - sign extend the given gmc address
+ *
+ * @addr: address to extend
+ */
+static inline uint64_t amdgpu_gmc_sign_extend(uint64_t addr)
+{
+	if (addr >= AMDGPU_GMC_HOLE_START)
+		addr |= AMDGPU_GMC_HOLE_END;
+
+	return addr;
+}
+
 void amdgpu_gmc_get_pde_for_bo(struct amdgpu_bo *bo, int level,
 			       uint64_t *addr, uint64_t *flags);
 uint64_t amdgpu_gmc_pd_addr(struct amdgpu_bo *bo);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
index ad7978bab5fc..86e8772b6852 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
@@ -655,11 +655,11 @@ static int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file
 
 		dev_info.virtual_address_offset = AMDGPU_VA_RESERVED_SIZE;
 		dev_info.virtual_address_max =
-			min(vm_size, AMDGPU_VA_HOLE_START);
+			min(vm_size, AMDGPU_GMC_HOLE_START);
 
-		if (vm_size > AMDGPU_VA_HOLE_START) {
-			dev_info.high_va_offset = AMDGPU_VA_HOLE_END;
-			dev_info.high_va_max = AMDGPU_VA_HOLE_END | vm_size;
+		if (vm_size > AMDGPU_GMC_HOLE_START) {
+			dev_info.high_va_offset = AMDGPU_GMC_HOLE_END;
+			dev_info.high_va_max = AMDGPU_GMC_HOLE_END | vm_size;
 		}
 		dev_info.virtual_address_alignment = max((int)PAGE_SIZE, AMDGPU_GPU_PAGE_SIZE);
 		dev_info.pte_fragment_size = (1 << adev->vm_manager.fragment_size) * AMDGPU_GPU_PAGE_SIZE;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
index b5f20b42439e..0cbf651a88a6 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
@@ -1368,7 +1368,7 @@ u64 amdgpu_bo_gpu_offset(struct amdgpu_bo *bo)
 	WARN_ON_ONCE(bo->tbo.mem.mem_type == TTM_PL_VRAM &&
 		     !(bo->flags & AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS));
 
-	return bo->tbo.offset;
+	return amdgpu_gmc_sign_extend(bo->tbo.offset);
 }
 
 /**
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
index 38856365580d..f2f358aa0597 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
@@ -28,9 +28,7 @@ uint64_t amdgpu_csa_vaddr(struct amdgpu_device *adev)
 	uint64_t addr = adev->vm_manager.max_pfn << AMDGPU_GPU_PAGE_SHIFT;
 
 	addr -= AMDGPU_VA_RESERVED_SIZE;
-
-	if (addr >= AMDGPU_VA_HOLE_START)
-		addr |= AMDGPU_VA_HOLE_END;
+	addr = amdgpu_gmc_sign_extend(addr);
 
 	return addr;
 }
@@ -73,7 +71,7 @@ void amdgpu_free_static_csa(struct amdgpu_device *adev) {
 int amdgpu_map_static_csa(struct amdgpu_device *adev, struct amdgpu_vm *vm,
 			  struct amdgpu_bo_va **bo_va)
 {
-	uint64_t csa_addr = amdgpu_csa_vaddr(adev) & AMDGPU_VA_HOLE_MASK;
+	uint64_t csa_addr = amdgpu_csa_vaddr(adev) & AMDGPU_GMC_HOLE_MASK;
 	struct ww_acquire_ctx ticket;
 	struct list_head list;
 	struct amdgpu_bo_list_entry pd;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index 1f79a0ddc78a..3163351508cf 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -492,7 +492,7 @@ static int amdgpu_vm_clear_bo(struct amdgpu_device *adev,
 		if (level == adev->vm_manager.root_level) {
 			ats_entries = amdgpu_vm_level_shift(adev, level);
 			ats_entries += AMDGPU_GPU_PAGE_SHIFT;
-			ats_entries = AMDGPU_VA_HOLE_START >> ats_entries;
+			ats_entries = AMDGPU_GMC_HOLE_START >> ats_entries;
 			ats_entries = min(ats_entries, entries);
 			entries -= ats_entries;
 		} else {
@@ -722,7 +722,7 @@ int amdgpu_vm_alloc_pts(struct amdgpu_device *adev,
 	eaddr = saddr + size - 1;
 
 	if (vm->pte_support_ats)
-		ats = saddr < AMDGPU_VA_HOLE_START;
+		ats = saddr < AMDGPU_GMC_HOLE_START;
 
 	saddr /= AMDGPU_GPU_PAGE_SIZE;
 	eaddr /= AMDGPU_GPU_PAGE_SIZE;
@@ -2016,7 +2016,8 @@ int amdgpu_vm_clear_freed(struct amdgpu_device *adev,
 			struct amdgpu_bo_va_mapping, list);
 		list_del(&mapping->list);
 
-		if (vm->pte_support_ats && mapping->start < AMDGPU_VA_HOLE_START)
+		if (vm->pte_support_ats &&
+		    mapping->start < AMDGPU_GMC_HOLE_START)
 			init_pte_value = AMDGPU_PTE_DEFAULT_ATC;
 
 		r = amdgpu_vm_bo_update_mapping(adev, NULL, NULL, vm,
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
index 6ea162ca296a..e275ee7c1bc1 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
@@ -101,19 +101,6 @@ struct amdgpu_bo_list_entry;
 /* hardcode that limit for now */
 #define AMDGPU_VA_RESERVED_SIZE			(1ULL << 20)
 
-/* VA hole for 48bit addresses on Vega10 */
-#define AMDGPU_VA_HOLE_START			0x0000800000000000ULL
-#define AMDGPU_VA_HOLE_END			0xffff800000000000ULL
-
-/*
- * Hardware is programmed as if the hole doesn't exists with start and end
- * address values.
- *
- * This mask is used to remove the upper 16bits of the VA and so come up with
- * the linear addr value.
- */
-#define AMDGPU_VA_HOLE_MASK			0x0000ffffffffffffULL
-
 /* max vmids dedicated for process */
 #define AMDGPU_VM_MAX_RESERVED_VMID	1
 
-- 
cgit 


From 7893499e3022542f6522847837487019ea83f142 Mon Sep 17 00:00:00 2001
From: Christian König <christian.koenig@amd.com>
Date: Mon, 10 Sep 2018 15:52:55 +0200
Subject: drm/amdgpu: fix error handling in amdgpu_cs_user_fence_chunk
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Slowly leaking memory one page at a time :)

Signed-off-by: Christian König <christian.koenig@amd.com>
Reviewed-by: Andrey Grodzovsky <andrey.grodzovsky@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 23 +++++++++++++++--------
 1 file changed, 15 insertions(+), 8 deletions(-)

(limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c')

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
index 135d9d8c9506..c5cc648a1b4e 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
@@ -40,6 +40,7 @@ static int amdgpu_cs_user_fence_chunk(struct amdgpu_cs_parser *p,
 {
 	struct drm_gem_object *gobj;
 	unsigned long size;
+	int r;
 
 	gobj = drm_gem_object_lookup(p->filp, data->handle);
 	if (gobj == NULL)
@@ -51,20 +52,26 @@ static int amdgpu_cs_user_fence_chunk(struct amdgpu_cs_parser *p,
 	p->uf_entry.tv.shared = true;
 	p->uf_entry.user_pages = NULL;
 
-	size = amdgpu_bo_size(p->uf_entry.robj);
-	if (size != PAGE_SIZE || (data->offset + 8) > size)
-		return -EINVAL;
-
-	*offset = data->offset;
-
 	drm_gem_object_put_unlocked(gobj);
 
+	size = amdgpu_bo_size(p->uf_entry.robj);
+	if (size != PAGE_SIZE || (data->offset + 8) > size) {
+		r = -EINVAL;
+		goto error_unref;
+	}
+
 	if (amdgpu_ttm_tt_get_usermm(p->uf_entry.robj->tbo.ttm)) {
-		amdgpu_bo_unref(&p->uf_entry.robj);
-		return -EINVAL;
+		r = -EINVAL;
+		goto error_unref;
 	}
 
+	*offset = data->offset;
+
 	return 0;
+
+error_unref:
+	amdgpu_bo_unref(&p->uf_entry.robj);
+	return r;
 }
 
 static int amdgpu_cs_bo_handles_chunk(struct amdgpu_cs_parser *p,
-- 
cgit 


From 7e7bf8de432db3de912050856e641458de72a7b1 Mon Sep 17 00:00:00 2001
From: Chunming Zhou <david1.zhou@amd.com>
Date: Tue, 11 Sep 2018 17:22:40 +0800
Subject: drm/amdgpu: move cs dependencies front a bit
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

cs dependencies handling doesn't need in vm resv

Signed-off-by: Chunming Zhou <david1.zhou@amd.com>
Reviewed-by: Christian König <christian.koenig@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

(limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c')

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
index c5cc648a1b4e..1081fd00b059 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
@@ -1285,6 +1285,12 @@ int amdgpu_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
 	if (r)
 		goto out;
 
+	r = amdgpu_cs_dependencies(adev, &parser);
+	if (r) {
+		DRM_ERROR("Failed in the dependencies handling %d!\n", r);
+		goto out;
+	}
+
 	r = amdgpu_cs_parser_bos(&parser, data);
 	if (r) {
 		if (r == -ENOMEM)
@@ -1296,12 +1302,6 @@ int amdgpu_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
 
 	reserved_buffers = true;
 
-	r = amdgpu_cs_dependencies(adev, &parser);
-	if (r) {
-		DRM_ERROR("Failed in the dependencies handling %d!\n", r);
-		goto out;
-	}
-
 	for (i = 0; i < parser.job->num_ibs; i++)
 		trace_amdgpu_cs(&parser, i);
 
-- 
cgit 


From e83dfe4d869358549bb259ab581ae4f0450c6580 Mon Sep 17 00:00:00 2001
From: Christian König <christian.koenig@amd.com>
Date: Mon, 10 Sep 2018 16:07:57 +0200
Subject: drm/amdgpu: remove amdgpu_bo_list_entry.robj (v2)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

We can get that just by casting tv.bo.

v2: squash in kfd fix (Alex)

Signed-off-by: Christian König <christian.koenig@amd.com>
Reviewed-by: Chunming Zhou <david1.zhou@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c |  2 -
 drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c      | 42 +++++++++--------
 drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.h      |  1 -
 drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c           | 58 ++++++++++++++----------
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c           |  3 +-
 5 files changed, 58 insertions(+), 48 deletions(-)

(limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c')

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
index e7ceae05d517..6ee9dc476c86 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
@@ -675,7 +675,6 @@ static int reserve_bo_and_vm(struct kgd_mem *mem,
 	if (!ctx->vm_pd)
 		return -ENOMEM;
 
-	ctx->kfd_bo.robj = bo;
 	ctx->kfd_bo.priority = 0;
 	ctx->kfd_bo.tv.bo = &bo->tbo;
 	ctx->kfd_bo.tv.shared = true;
@@ -740,7 +739,6 @@ static int reserve_bo_and_cond_vms(struct kgd_mem *mem,
 			return -ENOMEM;
 	}
 
-	ctx->kfd_bo.robj = bo;
 	ctx->kfd_bo.priority = 0;
 	ctx->kfd_bo.tv.bo = &bo->tbo;
 	ctx->kfd_bo.tv.shared = true;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c
index b80243d3972e..14d2982a47cc 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c
@@ -49,8 +49,11 @@ static void amdgpu_bo_list_free(struct kref *ref)
 						   refcount);
 	struct amdgpu_bo_list_entry *e;
 
-	amdgpu_bo_list_for_each_entry(e, list)
-		amdgpu_bo_unref(&e->robj);
+	amdgpu_bo_list_for_each_entry(e, list) {
+		struct amdgpu_bo *bo = ttm_to_amdgpu_bo(e->tv.bo);
+
+		amdgpu_bo_unref(&bo);
+	}
 
 	call_rcu(&list->rhead, amdgpu_bo_list_free_rcu);
 }
@@ -112,21 +115,20 @@ int amdgpu_bo_list_create(struct amdgpu_device *adev, struct drm_file *filp,
 			entry = &array[last_entry++];
 		}
 
-		entry->robj = bo;
 		entry->priority = min(info[i].bo_priority,
 				      AMDGPU_BO_LIST_MAX_PRIORITY);
-		entry->tv.bo = &entry->robj->tbo;
-		entry->tv.shared = !entry->robj->prime_shared_count;
-
-		if (entry->robj->preferred_domains == AMDGPU_GEM_DOMAIN_GDS)
-			list->gds_obj = entry->robj;
-		if (entry->robj->preferred_domains == AMDGPU_GEM_DOMAIN_GWS)
-			list->gws_obj = entry->robj;
-		if (entry->robj->preferred_domains == AMDGPU_GEM_DOMAIN_OA)
-			list->oa_obj = entry->robj;
-
-		total_size += amdgpu_bo_size(entry->robj);
-		trace_amdgpu_bo_list_set(list, entry->robj);
+		entry->tv.bo = &bo->tbo;
+		entry->tv.shared = !bo->prime_shared_count;
+
+		if (bo->preferred_domains == AMDGPU_GEM_DOMAIN_GDS)
+			list->gds_obj = bo;
+		if (bo->preferred_domains == AMDGPU_GEM_DOMAIN_GWS)
+			list->gws_obj = bo;
+		if (bo->preferred_domains == AMDGPU_GEM_DOMAIN_OA)
+			list->oa_obj = bo;
+
+		total_size += amdgpu_bo_size(bo);
+		trace_amdgpu_bo_list_set(list, bo);
 	}
 
 	list->first_userptr = first_userptr;
@@ -138,8 +140,11 @@ int amdgpu_bo_list_create(struct amdgpu_device *adev, struct drm_file *filp,
 	return 0;
 
 error_free:
-	while (i--)
-		amdgpu_bo_unref(&array[i].robj);
+	while (i--) {
+		struct amdgpu_bo *bo = ttm_to_amdgpu_bo(array[i].tv.bo);
+
+		amdgpu_bo_unref(&bo);
+	}
 	kvfree(list);
 	return r;
 
@@ -191,9 +196,10 @@ void amdgpu_bo_list_get_list(struct amdgpu_bo_list *list,
 	 * with the same priority, i.e. it must be stable.
 	 */
 	amdgpu_bo_list_for_each_entry(e, list) {
+		struct amdgpu_bo *bo = ttm_to_amdgpu_bo(e->tv.bo);
 		unsigned priority = e->priority;
 
-		if (!e->robj->parent)
+		if (!bo->parent)
 			list_add_tail(&e->tv.head, &bucket[priority]);
 
 		e->user_pages = NULL;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.h
index 61b089768e1c..7c5f5d1601e6 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.h
@@ -32,7 +32,6 @@ struct amdgpu_bo_va;
 struct amdgpu_fpriv;
 
 struct amdgpu_bo_list_entry {
-	struct amdgpu_bo		*robj;
 	struct ttm_validate_buffer	tv;
 	struct amdgpu_bo_va		*bo_va;
 	uint32_t			priority;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
index 1081fd00b059..d762d78e5102 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
@@ -39,6 +39,7 @@ static int amdgpu_cs_user_fence_chunk(struct amdgpu_cs_parser *p,
 				      uint32_t *offset)
 {
 	struct drm_gem_object *gobj;
+	struct amdgpu_bo *bo;
 	unsigned long size;
 	int r;
 
@@ -46,21 +47,21 @@ static int amdgpu_cs_user_fence_chunk(struct amdgpu_cs_parser *p,
 	if (gobj == NULL)
 		return -EINVAL;
 
-	p->uf_entry.robj = amdgpu_bo_ref(gem_to_amdgpu_bo(gobj));
+	bo = amdgpu_bo_ref(gem_to_amdgpu_bo(gobj));
 	p->uf_entry.priority = 0;
-	p->uf_entry.tv.bo = &p->uf_entry.robj->tbo;
+	p->uf_entry.tv.bo = &bo->tbo;
 	p->uf_entry.tv.shared = true;
 	p->uf_entry.user_pages = NULL;
 
 	drm_gem_object_put_unlocked(gobj);
 
-	size = amdgpu_bo_size(p->uf_entry.robj);
+	size = amdgpu_bo_size(bo);
 	if (size != PAGE_SIZE || (data->offset + 8) > size) {
 		r = -EINVAL;
 		goto error_unref;
 	}
 
-	if (amdgpu_ttm_tt_get_usermm(p->uf_entry.robj->tbo.ttm)) {
+	if (amdgpu_ttm_tt_get_usermm(bo->tbo.ttm)) {
 		r = -EINVAL;
 		goto error_unref;
 	}
@@ -70,7 +71,7 @@ static int amdgpu_cs_user_fence_chunk(struct amdgpu_cs_parser *p,
 	return 0;
 
 error_unref:
-	amdgpu_bo_unref(&p->uf_entry.robj);
+	amdgpu_bo_unref(&bo);
 	return r;
 }
 
@@ -229,7 +230,7 @@ static int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, union drm_amdgpu_cs
 		goto free_all_kdata;
 	}
 
-	if (p->uf_entry.robj)
+	if (p->uf_entry.tv.bo)
 		p->job->uf_addr = uf_offset;
 	kfree(chunk_array);
 
@@ -458,13 +459,13 @@ static bool amdgpu_cs_try_evict(struct amdgpu_cs_parser *p,
 	     p->evictable = list_prev_entry(p->evictable, tv.head)) {
 
 		struct amdgpu_bo_list_entry *candidate = p->evictable;
-		struct amdgpu_bo *bo = candidate->robj;
+		struct amdgpu_bo *bo = ttm_to_amdgpu_bo(candidate->tv.bo);
 		struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
 		bool update_bytes_moved_vis;
 		uint32_t other;
 
 		/* If we reached our current BO we can forget it */
-		if (candidate->robj == validated)
+		if (bo == validated)
 			break;
 
 		/* We can't move pinned BOs here */
@@ -529,7 +530,7 @@ static int amdgpu_cs_list_validate(struct amdgpu_cs_parser *p,
 	int r;
 
 	list_for_each_entry(lobj, validated, tv.head) {
-		struct amdgpu_bo *bo = lobj->robj;
+		struct amdgpu_bo *bo = ttm_to_amdgpu_bo(lobj->tv.bo);
 		bool binding_userptr = false;
 		struct mm_struct *usermm;
 
@@ -604,7 +605,7 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p,
 	INIT_LIST_HEAD(&duplicates);
 	amdgpu_vm_get_pd_bo(&fpriv->vm, &p->validated, &p->vm_pd);
 
-	if (p->uf_entry.robj && !p->uf_entry.robj->parent)
+	if (p->uf_entry.tv.bo && !ttm_to_amdgpu_bo(p->uf_entry.tv.bo)->parent)
 		list_add(&p->uf_entry.tv.head, &p->validated);
 
 	while (1) {
@@ -620,7 +621,7 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p,
 
 		INIT_LIST_HEAD(&need_pages);
 		amdgpu_bo_list_for_each_userptr_entry(e, p->bo_list) {
-			struct amdgpu_bo *bo = e->robj;
+			struct amdgpu_bo *bo = ttm_to_amdgpu_bo(e->tv.bo);
 
 			if (amdgpu_ttm_tt_userptr_invalidated(bo->tbo.ttm,
 				 &e->user_invalidated) && e->user_pages) {
@@ -639,7 +640,7 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p,
 				list_del(&e->tv.head);
 				list_add(&e->tv.head, &need_pages);
 
-				amdgpu_bo_unreserve(e->robj);
+				amdgpu_bo_unreserve(bo);
 			}
 		}
 
@@ -658,7 +659,7 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p,
 
 		/* Fill the page arrays for all userptrs. */
 		list_for_each_entry(e, &need_pages, tv.head) {
-			struct ttm_tt *ttm = e->robj->tbo.ttm;
+			struct ttm_tt *ttm = e->tv.bo->ttm;
 
 			e->user_pages = kvmalloc_array(ttm->num_pages,
 							 sizeof(struct page*),
@@ -717,7 +718,7 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p,
 	oa = p->bo_list->oa_obj;
 
 	amdgpu_bo_list_for_each_entry(e, p->bo_list)
-		e->bo_va = amdgpu_vm_bo_find(vm, e->robj);
+		e->bo_va = amdgpu_vm_bo_find(vm, ttm_to_amdgpu_bo(e->tv.bo));
 
 	if (gds) {
 		p->job->gds_base = amdgpu_bo_gpu_offset(gds);
@@ -732,8 +733,8 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p,
 		p->job->oa_size = amdgpu_bo_size(oa);
 	}
 
-	if (!r && p->uf_entry.robj) {
-		struct amdgpu_bo *uf = p->uf_entry.robj;
+	if (!r && p->uf_entry.tv.bo) {
+		struct amdgpu_bo *uf = ttm_to_amdgpu_bo(p->uf_entry.tv.bo);
 
 		r = amdgpu_ttm_alloc_gart(&uf->tbo);
 		p->job->uf_addr += amdgpu_bo_gpu_offset(uf);
@@ -749,8 +750,7 @@ error_free_pages:
 		if (!e->user_pages)
 			continue;
 
-		release_pages(e->user_pages,
-			      e->robj->tbo.ttm->num_pages);
+		release_pages(e->user_pages, e->tv.bo->ttm->num_pages);
 		kvfree(e->user_pages);
 	}
 
@@ -763,9 +763,11 @@ static int amdgpu_cs_sync_rings(struct amdgpu_cs_parser *p)
 	int r;
 
 	list_for_each_entry(e, &p->validated, tv.head) {
-		struct reservation_object *resv = e->robj->tbo.resv;
+		struct amdgpu_bo *bo = ttm_to_amdgpu_bo(e->tv.bo);
+		struct reservation_object *resv = bo->tbo.resv;
+
 		r = amdgpu_sync_resv(p->adev, &p->job->sync, resv, p->filp,
-				     amdgpu_bo_explicit_sync(e->robj));
+				     amdgpu_bo_explicit_sync(bo));
 
 		if (r)
 			return r;
@@ -808,7 +810,11 @@ static void amdgpu_cs_parser_fini(struct amdgpu_cs_parser *parser, int error,
 	kfree(parser->chunks);
 	if (parser->job)
 		amdgpu_job_free(parser->job);
-	amdgpu_bo_unref(&parser->uf_entry.robj);
+	if (parser->uf_entry.tv.bo) {
+		struct amdgpu_bo *uf = ttm_to_amdgpu_bo(parser->uf_entry.tv.bo);
+
+		amdgpu_bo_unref(&uf);
+	}
 }
 
 static int amdgpu_cs_vm_handling(struct amdgpu_cs_parser *p)
@@ -919,7 +925,7 @@ static int amdgpu_cs_vm_handling(struct amdgpu_cs_parser *p)
 		struct dma_fence *f;
 
 		/* ignore duplicates */
-		bo = e->robj;
+		bo = ttm_to_amdgpu_bo(e->tv.bo);
 		if (!bo)
 			continue;
 
@@ -958,11 +964,13 @@ static int amdgpu_cs_vm_handling(struct amdgpu_cs_parser *p)
 	if (amdgpu_vm_debug) {
 		/* Invalidate all BOs to test for userspace bugs */
 		amdgpu_bo_list_for_each_entry(e, p->bo_list) {
+			struct amdgpu_bo *bo = ttm_to_amdgpu_bo(e->tv.bo);
+
 			/* ignore duplicates */
-			if (!e->robj)
+			if (!bo)
 				continue;
 
-			amdgpu_vm_bo_invalidate(adev, e->robj, false);
+			amdgpu_vm_bo_invalidate(adev, bo, false);
 		}
 	}
 
@@ -1211,7 +1219,7 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p,
 	/* No memory allocation is allowed while holding the mn lock */
 	amdgpu_mn_lock(p->mn);
 	amdgpu_bo_list_for_each_userptr_entry(e, p->bo_list) {
-		struct amdgpu_bo *bo = e->robj;
+		struct amdgpu_bo *bo = ttm_to_amdgpu_bo(e->tv.bo);
 
 		if (amdgpu_ttm_tt_userptr_needs_pages(bo->tbo.ttm)) {
 			r = -ERESTARTSYS;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index dd5a0cdd67bc..234764ac58cf 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -593,9 +593,8 @@ void amdgpu_vm_get_pd_bo(struct amdgpu_vm *vm,
 			 struct list_head *validated,
 			 struct amdgpu_bo_list_entry *entry)
 {
-	entry->robj = vm->root.base.bo;
 	entry->priority = 0;
-	entry->tv.bo = &entry->robj->tbo;
+	entry->tv.bo = &vm->root.base.bo->tbo;
 	entry->tv.shared = true;
 	entry->user_pages = NULL;
 	list_add(&entry->tv.head, validated);
-- 
cgit 


From 77a2faa55c1a497f4e7e89eabd11830f0e3cb3dd Mon Sep 17 00:00:00 2001
From: Christian König <christian.koenig@amd.com>
Date: Fri, 14 Sep 2018 16:06:31 +0200
Subject: drm/amdgpu: fix up GDS/GWS/OA shifting
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

That only worked by pure coincident. Completely remove the shifting and
always apply correct PAGE_SHIFT.

Signed-off-by: Christian König <christian.koenig@amd.com>
Acked-by: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c     | 12 ++++++------
 drivers/gpu/drm/amd/amdgpu/amdgpu_gds.h    |  7 -------
 drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c    | 12 +++---------
 drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c    | 14 +++++++-------
 drivers/gpu/drm/amd/amdgpu/amdgpu_object.c |  6 +++++-
 drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c    | 15 +++------------
 drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c      |  9 ---------
 drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c      |  9 ---------
 drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c      | 12 +-----------
 9 files changed, 25 insertions(+), 71 deletions(-)

(limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c')

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
index d762d78e5102..8836186eb5ef 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
@@ -721,16 +721,16 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p,
 		e->bo_va = amdgpu_vm_bo_find(vm, ttm_to_amdgpu_bo(e->tv.bo));
 
 	if (gds) {
-		p->job->gds_base = amdgpu_bo_gpu_offset(gds);
-		p->job->gds_size = amdgpu_bo_size(gds);
+		p->job->gds_base = amdgpu_bo_gpu_offset(gds) >> PAGE_SHIFT;
+		p->job->gds_size = amdgpu_bo_size(gds) >> PAGE_SHIFT;
 	}
 	if (gws) {
-		p->job->gws_base = amdgpu_bo_gpu_offset(gws);
-		p->job->gws_size = amdgpu_bo_size(gws);
+		p->job->gws_base = amdgpu_bo_gpu_offset(gws) >> PAGE_SHIFT;
+		p->job->gws_size = amdgpu_bo_size(gws) >> PAGE_SHIFT;
 	}
 	if (oa) {
-		p->job->oa_base = amdgpu_bo_gpu_offset(oa);
-		p->job->oa_size = amdgpu_bo_size(oa);
+		p->job->oa_base = amdgpu_bo_gpu_offset(oa) >> PAGE_SHIFT;
+		p->job->oa_size = amdgpu_bo_size(oa) >> PAGE_SHIFT;
 	}
 
 	if (!r && p->uf_entry.tv.bo) {
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gds.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gds.h
index e73728d90388..ecbcefe49a98 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gds.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gds.h
@@ -24,13 +24,6 @@
 #ifndef __AMDGPU_GDS_H__
 #define __AMDGPU_GDS_H__
 
-/* Because TTM request that alloacted buffer should be PAGE_SIZE aligned,
- * we should report GDS/GWS/OA size as PAGE_SIZE aligned
- * */
-#define AMDGPU_GDS_SHIFT	2
-#define AMDGPU_GWS_SHIFT	PAGE_SHIFT
-#define AMDGPU_OA_SHIFT		PAGE_SHIFT
-
 struct amdgpu_ring;
 struct amdgpu_bo;
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
index d30a0838851b..7b3d1ebda9df 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
@@ -244,16 +244,10 @@ int amdgpu_gem_create_ioctl(struct drm_device *dev, void *data,
 			return -EINVAL;
 		}
 		flags |= AMDGPU_GEM_CREATE_NO_CPU_ACCESS;
-		if (args->in.domains == AMDGPU_GEM_DOMAIN_GDS)
-			size = size << AMDGPU_GDS_SHIFT;
-		else if (args->in.domains == AMDGPU_GEM_DOMAIN_GWS)
-			size = size << AMDGPU_GWS_SHIFT;
-		else if (args->in.domains == AMDGPU_GEM_DOMAIN_OA)
-			size = size << AMDGPU_OA_SHIFT;
-		else
-			return -EINVAL;
+		/* GDS allocations must be DW aligned */
+		if (args->in.domains & AMDGPU_GEM_DOMAIN_GDS)
+			size = ALIGN(size, 4);
 	}
-	size = roundup(size, PAGE_SIZE);
 
 	if (flags & AMDGPU_GEM_CREATE_VM_ALWAYS_VALID) {
 		r = amdgpu_bo_reserve(vm->root.base.bo, false);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
index dc4b2f34e3ea..a64056dadc58 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
@@ -528,13 +528,13 @@ static int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file
 		struct drm_amdgpu_info_gds gds_info;
 
 		memset(&gds_info, 0, sizeof(gds_info));
-		gds_info.gds_gfx_partition_size = adev->gds.mem.gfx_partition_size >> AMDGPU_GDS_SHIFT;
-		gds_info.compute_partition_size = adev->gds.mem.cs_partition_size >> AMDGPU_GDS_SHIFT;
-		gds_info.gds_total_size = adev->gds.mem.total_size >> AMDGPU_GDS_SHIFT;
-		gds_info.gws_per_gfx_partition = adev->gds.gws.gfx_partition_size >> AMDGPU_GWS_SHIFT;
-		gds_info.gws_per_compute_partition = adev->gds.gws.cs_partition_size >> AMDGPU_GWS_SHIFT;
-		gds_info.oa_per_gfx_partition = adev->gds.oa.gfx_partition_size >> AMDGPU_OA_SHIFT;
-		gds_info.oa_per_compute_partition = adev->gds.oa.cs_partition_size >> AMDGPU_OA_SHIFT;
+		gds_info.gds_gfx_partition_size = adev->gds.mem.gfx_partition_size;
+		gds_info.compute_partition_size = adev->gds.mem.cs_partition_size;
+		gds_info.gds_total_size = adev->gds.mem.total_size;
+		gds_info.gws_per_gfx_partition = adev->gds.gws.gfx_partition_size;
+		gds_info.gws_per_compute_partition = adev->gds.gws.cs_partition_size;
+		gds_info.oa_per_gfx_partition = adev->gds.oa.gfx_partition_size;
+		gds_info.oa_per_compute_partition = adev->gds.oa.cs_partition_size;
 		return copy_to_user(out, &gds_info,
 				    min((size_t)size, sizeof(gds_info))) ? -EFAULT : 0;
 	}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
index 113738cbb32c..904014dc5915 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
@@ -427,7 +427,11 @@ static int amdgpu_bo_do_create(struct amdgpu_device *adev,
 	int r;
 
 	page_align = roundup(bp->byte_align, PAGE_SIZE) >> PAGE_SHIFT;
-	size = ALIGN(size, PAGE_SIZE);
+	if (bp->domain & (AMDGPU_GEM_DOMAIN_GDS | AMDGPU_GEM_DOMAIN_GWS |
+			  AMDGPU_GEM_DOMAIN_OA))
+		size <<= PAGE_SHIFT;
+	else
+		size = ALIGN(size, PAGE_SIZE);
 
 	if (!amdgpu_bo_validate_size(adev, size, bp->domain))
 		return -ENOMEM;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
index d61910873627..0c4ab72474e4 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
@@ -1845,19 +1845,10 @@ int amdgpu_ttm_init(struct amdgpu_device *adev)
 		 (unsigned)(gtt_size / (1024 * 1024)));
 
 	/* Initialize various on-chip memory pools */
-	adev->gds.mem.total_size = adev->gds.mem.total_size << AMDGPU_GDS_SHIFT;
-	adev->gds.mem.gfx_partition_size = adev->gds.mem.gfx_partition_size << AMDGPU_GDS_SHIFT;
-	adev->gds.mem.cs_partition_size = adev->gds.mem.cs_partition_size << AMDGPU_GDS_SHIFT;
-	adev->gds.gws.total_size = adev->gds.gws.total_size << AMDGPU_GWS_SHIFT;
-	adev->gds.gws.gfx_partition_size = adev->gds.gws.gfx_partition_size << AMDGPU_GWS_SHIFT;
-	adev->gds.gws.cs_partition_size = adev->gds.gws.cs_partition_size << AMDGPU_GWS_SHIFT;
-	adev->gds.oa.total_size = adev->gds.oa.total_size << AMDGPU_OA_SHIFT;
-	adev->gds.oa.gfx_partition_size = adev->gds.oa.gfx_partition_size << AMDGPU_OA_SHIFT;
-	adev->gds.oa.cs_partition_size = adev->gds.oa.cs_partition_size << AMDGPU_OA_SHIFT;
 	/* GDS Memory */
 	if (adev->gds.mem.total_size) {
 		r = ttm_bo_init_mm(&adev->mman.bdev, AMDGPU_PL_GDS,
-				   adev->gds.mem.total_size >> PAGE_SHIFT);
+				   adev->gds.mem.total_size);
 		if (r) {
 			DRM_ERROR("Failed initializing GDS heap.\n");
 			return r;
@@ -1867,7 +1858,7 @@ int amdgpu_ttm_init(struct amdgpu_device *adev)
 	/* GWS */
 	if (adev->gds.gws.total_size) {
 		r = ttm_bo_init_mm(&adev->mman.bdev, AMDGPU_PL_GWS,
-				   adev->gds.gws.total_size >> PAGE_SHIFT);
+				   adev->gds.gws.total_size);
 		if (r) {
 			DRM_ERROR("Failed initializing gws heap.\n");
 			return r;
@@ -1877,7 +1868,7 @@ int amdgpu_ttm_init(struct amdgpu_device *adev)
 	/* OA */
 	if (adev->gds.oa.total_size) {
 		r = ttm_bo_init_mm(&adev->mman.bdev, AMDGPU_PL_OA,
-				   adev->gds.oa.total_size >> PAGE_SHIFT);
+				   adev->gds.oa.total_size);
 		if (r) {
 			DRM_ERROR("Failed initializing oa heap.\n");
 			return r;
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
index a15d9c0f233b..c0f9732cbaf7 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
@@ -4170,15 +4170,6 @@ static void gfx_v7_0_ring_emit_gds_switch(struct amdgpu_ring *ring,
 					  uint32_t gws_base, uint32_t gws_size,
 					  uint32_t oa_base, uint32_t oa_size)
 {
-	gds_base = gds_base >> AMDGPU_GDS_SHIFT;
-	gds_size = gds_size >> AMDGPU_GDS_SHIFT;
-
-	gws_base = gws_base >> AMDGPU_GWS_SHIFT;
-	gws_size = gws_size >> AMDGPU_GWS_SHIFT;
-
-	oa_base = oa_base >> AMDGPU_OA_SHIFT;
-	oa_size = oa_size >> AMDGPU_OA_SHIFT;
-
 	/* GDS Base */
 	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
 	amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
index 11e6ccdfc3d1..96df23c99cfb 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
@@ -5396,15 +5396,6 @@ static void gfx_v8_0_ring_emit_gds_switch(struct amdgpu_ring *ring,
 					  uint32_t gws_base, uint32_t gws_size,
 					  uint32_t oa_base, uint32_t oa_size)
 {
-	gds_base = gds_base >> AMDGPU_GDS_SHIFT;
-	gds_size = gds_size >> AMDGPU_GDS_SHIFT;
-
-	gws_base = gws_base >> AMDGPU_GWS_SHIFT;
-	gws_size = gws_size >> AMDGPU_GWS_SHIFT;
-
-	oa_base = oa_base >> AMDGPU_OA_SHIFT;
-	oa_size = oa_size >> AMDGPU_OA_SHIFT;
-
 	/* GDS Base */
 	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
 	amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
index 1a298f17b7dc..528a8a567633 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
@@ -1527,8 +1527,7 @@ static int gfx_v9_0_ngg_en(struct amdgpu_device *adev)
 	gfx_v9_0_write_data_to_reg(ring, 0, false,
 				   SOC15_REG_OFFSET(GC, 0, mmGDS_VMID0_SIZE),
 			           (adev->gds.mem.total_size +
-				    adev->gfx.ngg.gds_reserve_size) >>
-				   AMDGPU_GDS_SHIFT);
+				    adev->gfx.ngg.gds_reserve_size));
 
 	amdgpu_ring_write(ring, PACKET3(PACKET3_DMA_DATA, 5));
 	amdgpu_ring_write(ring, (PACKET3_DMA_DATA_CP_SYNC |
@@ -3472,15 +3471,6 @@ static void gfx_v9_0_ring_emit_gds_switch(struct amdgpu_ring *ring,
 {
 	struct amdgpu_device *adev = ring->adev;
 
-	gds_base = gds_base >> AMDGPU_GDS_SHIFT;
-	gds_size = gds_size >> AMDGPU_GDS_SHIFT;
-
-	gws_base = gws_base >> AMDGPU_GWS_SHIFT;
-	gws_size = gws_size >> AMDGPU_GWS_SHIFT;
-
-	oa_base = oa_base >> AMDGPU_OA_SHIFT;
-	oa_size = oa_size >> AMDGPU_OA_SHIFT;
-
 	/* GDS Base */
 	gfx_v9_0_write_data_to_reg(ring, 0, false,
 				   SOC15_REG_OFFSET(GC, 0, mmGDS_VMID0_BASE) + 2 * vmid,
-- 
cgit