From b152e8e13e883656c0012504c451d483579aa842 Mon Sep 17 00:00:00 2001
From: xinhui pan <xinhui.pan@amd.com>
Date: Wed, 8 May 2019 19:31:06 +0800
Subject: drm/amdgpu: Revert "drm/amdgpu: skip gpu reset when ras error
 occured"

Enable this now to reset the GPU on RAS errors.

This reverts commit 138352e5752aa3e694951d70c8fe8730219f4edf.

Signed-off-by: xinhui pan <xinhui.pan@amd.com>
Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h | 3 ---
 1 file changed, 3 deletions(-)

(limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h')

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h
index eaef5edefc34..e60a554656ca 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h
@@ -187,13 +187,10 @@ int amdgpu_ras_reserve_bad_pages(struct amdgpu_device *adev);
 static inline int amdgpu_ras_reset_gpu(struct amdgpu_device *adev,
 		bool is_baco)
 {
-	/* remove me when gpu reset works on vega20 A1. */
-#if 0
 	struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
 
 	if (atomic_cmpxchg(&ras->in_recovery, 0, 1) == 0)
 		schedule_work(&ras->recovery_work);
-#endif
 	return 0;
 }
 
-- 
cgit 


From a564808e7f5b19b3621a1dc4ff2a3042171ae167 Mon Sep 17 00:00:00 2001
From: xinhui pan <xinhui.pan@amd.com>
Date: Wed, 8 May 2019 19:12:24 +0800
Subject: drm/amdgpu: handle ras reset

add another flag to allow IP do a gpu reset after device init.

Signed-off-by: xinhui pan <xinhui.pan@amd.com>
Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c | 36 +++++++++++++++++++++++++++++++--
 drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h |  3 +++
 2 files changed, 37 insertions(+), 2 deletions(-)

(limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h')

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
index 5f8e1163a75d..37cb3de08494 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
@@ -118,7 +118,8 @@ const char *ras_block_string[] = {
 #define ras_err_str(i) (ras_error_string[ffs(i)])
 #define ras_block_str(i) (ras_block_string[i])
 
-#define AMDGPU_RAS_FLAG_INIT_BY_VBIOS 1
+#define AMDGPU_RAS_FLAG_INIT_BY_VBIOS		1
+#define AMDGPU_RAS_FLAG_INIT_NEED_RESET		2
 #define RAS_DEFAULT_FLAGS (AMDGPU_RAS_FLAG_INIT_BY_VBIOS)
 
 static void amdgpu_ras_self_test(struct amdgpu_device *adev)
@@ -1358,6 +1359,19 @@ static int amdgpu_ras_recovery_fini(struct amdgpu_device *adev)
 }
 /* recovery end */
 
+/* return 0 if ras will reset gpu and repost.*/
+int amdgpu_ras_request_reset_on_boot(struct amdgpu_device *adev,
+		unsigned int block)
+{
+	struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
+
+	if (!ras)
+		return -EINVAL;
+
+	ras->flags |= AMDGPU_RAS_FLAG_INIT_NEED_RESET;
+	return 0;
+}
+
 /*
  * check hardware's ras ability which will be saved in hw_supported.
  * if hardware does not support ras, we can skip some ras initializtion and
@@ -1433,7 +1447,12 @@ recovery_out:
 	return -EINVAL;
 }
 
-/* do some init work after IP late init as dependence */
+/* do some init work after IP late init as dependence.
+ * TODO
+ * gpu reset will re-enable ras, need fint out one way to run it again.
+ * for now, if a gpu reset happened, unless IP enable its ras, the ras state
+ * will be showed as disabled.
+ */
 void amdgpu_ras_post_init(struct amdgpu_device *adev)
 {
 	struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
@@ -1462,6 +1481,19 @@ void amdgpu_ras_post_init(struct amdgpu_device *adev)
 			}
 		}
 	}
+
+	if (con->flags & AMDGPU_RAS_FLAG_INIT_NEED_RESET) {
+		con->flags &= ~AMDGPU_RAS_FLAG_INIT_NEED_RESET;
+		/* setup ras obj state as disabled.
+		 * for init_by_vbios case.
+		 * if we want to enable ras, just enable it in a normal way.
+		 * If we want do disable it, need setup ras obj as enabled,
+		 * then issue another TA disable cmd.
+		 * See feature_enable_on_boot
+		 */
+		amdgpu_ras_disable_all_features(adev, 1);
+		amdgpu_ras_reset_gpu(adev, 0);
+	}
 }
 
 /* do some fini work before IP fini as dependence */
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h
index e60a554656ca..06ef325b61b8 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h
@@ -175,6 +175,9 @@ static inline int amdgpu_ras_is_supported(struct amdgpu_device *adev,
 	return ras && (ras->supported & (1 << block));
 }
 
+int amdgpu_ras_request_reset_on_boot(struct amdgpu_device *adev,
+		unsigned int block);
+
 int amdgpu_ras_query_error_count(struct amdgpu_device *adev,
 		bool is_ce);
 
-- 
cgit 


From 466b179346094e01deccd051a215fe782b59ca68 Mon Sep 17 00:00:00 2001
From: xinhui pan <xinhui.pan@amd.com>
Date: Tue, 7 May 2019 11:53:31 +0800
Subject: drm/amdgpu: add badpages sysfs interafce

add badpages node.
it will output badpages list in format
gpu pfn : gpu page size : flags

example
0x00000000 : 0x00001000 : R
0x00000001 : 0x00001000 : R
0x00000002 : 0x00001000 : R
0x00000003 : 0x00001000 : R
0x00000004 : 0x00001000 : R
0x00000005 : 0x00001000 : R
0x00000006 : 0x00001000 : R
0x00000007 : 0x00001000 : P
0x00000008 : 0x00001000 : P
0x00000009 : 0x00001000 : P

flags can be one of below characters
R: reserved.
P: pending for reserve.
F: failed to reserve for some reasons.

Signed-off-by: xinhui pan <xinhui.pan@amd.com>
Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c | 146 ++++++++++++++++++++++++++++++++
 drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h |   1 +
 2 files changed, 147 insertions(+)

(limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h')

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
index 37cb3de08494..49c71cfc7fc6 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
@@ -90,6 +90,12 @@ struct ras_manager {
 	struct ras_err_data err_data;
 };
 
+struct ras_badpage {
+	unsigned int bp;
+	unsigned int size;
+	unsigned int flags;
+};
+
 const char *ras_error_string[] = {
 	"none",
 	"parity",
@@ -710,6 +716,77 @@ int amdgpu_ras_query_error_count(struct amdgpu_device *adev,
 
 /* sysfs begin */
 
+static int amdgpu_ras_badpages_read(struct amdgpu_device *adev,
+		struct ras_badpage **bps, unsigned int *count);
+
+static char *amdgpu_ras_badpage_flags_str(unsigned int flags)
+{
+	switch (flags) {
+	case 0:
+		return "R";
+	case 1:
+		return "P";
+	case 2:
+	default:
+		return "F";
+	};
+}
+
+/*
+ * DOC: ras sysfs gpu_vram_bad_pages interface
+ *
+ * It allows user to read the bad pages of vram on the gpu through
+ * /sys/class/drm/card[0/1/2...]/device/ras/gpu_vram_bad_pages
+ *
+ * It outputs multiple lines, and each line stands for one gpu page.
+ *
+ * The format of one line is below,
+ * gpu pfn : gpu page size : flags
+ *
+ * gpu pfn and gpu page size are printed in hex format.
+ * flags can be one of below character,
+ * R: reserved, this gpu page is reserved and not able to use.
+ * P: pending for reserve, this gpu page is marked as bad, will be reserved
+ *    in next window of page_reserve.
+ * F: unable to reserve. this gpu page can't be reserved due to some reasons.
+ *
+ * examples:
+ * 0x00000001 : 0x00001000 : R
+ * 0x00000002 : 0x00001000 : P
+ */
+
+static ssize_t amdgpu_ras_sysfs_badpages_read(struct file *f,
+		struct kobject *kobj, struct bin_attribute *attr,
+		char *buf, loff_t ppos, size_t count)
+{
+	struct amdgpu_ras *con =
+		container_of(attr, struct amdgpu_ras, badpages_attr);
+	struct amdgpu_device *adev = con->adev;
+	const unsigned int element_size =
+		sizeof("0xabcdabcd : 0x12345678 : R\n") - 1;
+	unsigned int start = (ppos + element_size - 1) / element_size;
+	unsigned int end = (ppos + count - 1) / element_size;
+	ssize_t s = 0;
+	struct ras_badpage *bps = NULL;
+	unsigned int bps_count = 0;
+
+	memset(buf, 0, count);
+
+	if (amdgpu_ras_badpages_read(adev, &bps, &bps_count))
+		return 0;
+
+	for (; start < end && start < bps_count; start++)
+		s += scnprintf(&buf[s], element_size + 1,
+				"0x%08x : 0x%08x : %1s\n",
+				bps[start].bp,
+				bps[start].size,
+				amdgpu_ras_badpage_flags_str(bps[start].flags));
+
+	kfree(bps);
+
+	return s;
+}
+
 static ssize_t amdgpu_ras_sysfs_features_read(struct device *dev,
 		struct device_attribute *attr, char *buf)
 {
@@ -750,9 +827,14 @@ static int amdgpu_ras_sysfs_create_feature_node(struct amdgpu_device *adev)
 		&con->features_attr.attr,
 		NULL
 	};
+	struct bin_attribute *bin_attrs[] = {
+		&con->badpages_attr,
+		NULL
+	};
 	struct attribute_group group = {
 		.name = "ras",
 		.attrs = attrs,
+		.bin_attrs = bin_attrs,
 	};
 
 	con->features_attr = (struct device_attribute) {
@@ -762,7 +844,19 @@ static int amdgpu_ras_sysfs_create_feature_node(struct amdgpu_device *adev)
 		},
 			.show = amdgpu_ras_sysfs_features_read,
 	};
+
+	con->badpages_attr = (struct bin_attribute) {
+		.attr = {
+			.name = "gpu_vram_bad_pages",
+			.mode = S_IRUGO,
+		},
+		.size = 0,
+		.private = NULL,
+		.read = amdgpu_ras_sysfs_badpages_read,
+	};
+
 	sysfs_attr_init(attrs[0]);
+	sysfs_bin_attr_init(bin_attrs[0]);
 
 	return sysfs_create_group(&adev->dev->kobj, &group);
 }
@@ -774,9 +868,14 @@ static int amdgpu_ras_sysfs_remove_feature_node(struct amdgpu_device *adev)
 		&con->features_attr.attr,
 		NULL
 	};
+	struct bin_attribute *bin_attrs[] = {
+		&con->badpages_attr,
+		NULL
+	};
 	struct attribute_group group = {
 		.name = "ras",
 		.attrs = attrs,
+		.bin_attrs = bin_attrs,
 	};
 
 	sysfs_remove_group(&adev->dev->kobj, &group);
@@ -1108,6 +1207,53 @@ static int amdgpu_ras_interrupt_remove_all(struct amdgpu_device *adev)
 /* ih end */
 
 /* recovery begin */
+
+/* return 0 on success.
+ * caller need free bps.
+ */
+static int amdgpu_ras_badpages_read(struct amdgpu_device *adev,
+		struct ras_badpage **bps, unsigned int *count)
+{
+	struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
+	struct ras_err_handler_data *data;
+	int i = 0;
+	int ret = 0;
+
+	if (!con || !con->eh_data || !bps || !count)
+		return -EINVAL;
+
+	mutex_lock(&con->recovery_lock);
+	data = con->eh_data;
+	if (!data || data->count == 0) {
+		*bps = NULL;
+		goto out;
+	}
+
+	*bps = kmalloc(sizeof(struct ras_badpage) * data->count, GFP_KERNEL);
+	if (!*bps) {
+		ret = -ENOMEM;
+		goto out;
+	}
+
+	for (; i < data->count; i++) {
+		(*bps)[i] = (struct ras_badpage){
+			.bp = data->bps[i].bp,
+			.size = AMDGPU_GPU_PAGE_SIZE,
+			.flags = 0,
+		};
+
+		if (data->last_reserved <= i)
+			(*bps)[i].flags = 1;
+		else if (data->bps[i].bo == NULL)
+			(*bps)[i].flags = 2;
+	}
+
+	*count = data->count;
+out:
+	mutex_unlock(&con->recovery_lock);
+	return ret;
+}
+
 static void amdgpu_ras_do_recovery(struct work_struct *work)
 {
 	struct amdgpu_ras *ras =
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h
index 06ef325b61b8..59994ee00855 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h
@@ -93,6 +93,7 @@ struct amdgpu_ras {
 	struct dentry *ent;
 	/* sysfs */
 	struct device_attribute features_attr;
+	struct bin_attribute badpages_attr;
 	/* block array */
 	struct ras_manager *objs;
 
-- 
cgit 


From 511fdbc33aaa4758f7c445183ff840e251c0b427 Mon Sep 17 00:00:00 2001
From: xinhui pan <xinhui.pan@amd.com>
Date: Thu, 9 May 2019 08:26:27 +0800
Subject: drm/amdgpu: ras support suspend/resume

add ras suspend function. rename ras_post_init to amdgpu_ras_resume.

Signed-off-by: xinhui pan <xinhui.pan@amd.com>
Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
Reviewed-by: James Zhu <James.Zhu@amd.com>
Tested-by: James Zhu <James.Zhu@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_device.c |  4 ++--
 drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c    | 20 +++++++++++++++-----
 drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h    |  4 +++-
 3 files changed, 20 insertions(+), 8 deletions(-)

(limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h')

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index 309461d0c275..da120fe330be 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -2745,7 +2745,7 @@ fence_driver_init:
 	}
 
 	/* must succeed. */
-	amdgpu_ras_post_init(adev);
+	amdgpu_ras_resume(adev);
 
 	r = device_create_file(adev->dev, &dev_attr_pcie_replay_count);
 	if (r) {
@@ -3503,7 +3503,7 @@ static int amdgpu_do_asic_reset(struct amdgpu_hive_info *hive,
 					goto out;
 
 				/* must succeed. */
-				amdgpu_ras_post_init(tmp_adev);
+				amdgpu_ras_resume(tmp_adev);
 
 				/* Update PSP FW topology after reset */
 				if (hive && tmp_adev->gmc.xgmi.num_physical_nodes > 1)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
index 49c71cfc7fc6..da1dc40b9b14 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
@@ -1594,12 +1594,9 @@ recovery_out:
 }
 
 /* do some init work after IP late init as dependence.
- * TODO
- * gpu reset will re-enable ras, need fint out one way to run it again.
- * for now, if a gpu reset happened, unless IP enable its ras, the ras state
- * will be showed as disabled.
+ * and it runs in resume/gpu reset/booting up cases.
  */
-void amdgpu_ras_post_init(struct amdgpu_device *adev)
+void amdgpu_ras_resume(struct amdgpu_device *adev)
 {
 	struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
 	struct ras_manager *obj, *tmp;
@@ -1642,6 +1639,19 @@ void amdgpu_ras_post_init(struct amdgpu_device *adev)
 	}
 }
 
+void amdgpu_ras_suspend(struct amdgpu_device *adev)
+{
+	struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
+
+	if (!con)
+		return;
+
+	amdgpu_ras_disable_all_features(adev, 0);
+	/* Make sure all ras objects are disabled. */
+	if (con->features)
+		amdgpu_ras_disable_all_features(adev, 1);
+}
+
 /* do some fini work before IP fini as dependence */
 int amdgpu_ras_pre_fini(struct amdgpu_device *adev)
 {
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h
index 59994ee00855..c6b34fbd695f 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h
@@ -179,6 +179,9 @@ static inline int amdgpu_ras_is_supported(struct amdgpu_device *adev,
 int amdgpu_ras_request_reset_on_boot(struct amdgpu_device *adev,
 		unsigned int block);
 
+void amdgpu_ras_resume(struct amdgpu_device *adev);
+void amdgpu_ras_suspend(struct amdgpu_device *adev);
+
 int amdgpu_ras_query_error_count(struct amdgpu_device *adev,
 		bool is_ce);
 
@@ -256,7 +259,6 @@ amdgpu_ras_error_to_ta(enum amdgpu_ras_error_type error) {
 
 /* called in ip_init and ip_fini */
 int amdgpu_ras_init(struct amdgpu_device *adev);
-void amdgpu_ras_post_init(struct amdgpu_device *adev);
 int amdgpu_ras_fini(struct amdgpu_device *adev);
 int amdgpu_ras_pre_fini(struct amdgpu_device *adev);
 
-- 
cgit 


From 8252562d5270c13e6123a5939d0db1bbf04327a1 Mon Sep 17 00:00:00 2001
From: Dan Carpenter <dan.carpenter@oracle.com>
Date: Sat, 8 Jun 2019 12:23:57 +0300
Subject: drm/amdgpu: Fix bounds checking in amdgpu_ras_is_supported()

The "block" variable can be set by the user through debugfs, so it can
be quite large which leads to shift wrapping here.  This means we report
a "block" as supported when it's not, and that leads to array overflows
later on.

This bug is not really a security issue in real life, because debugfs is
generally root only.

Fixes: 36ea1bd2d084 ("drm/amdgpu: add debugfs ctrl node")
Signed-off-by: Dan Carpenter <dan.carpenter@oracle.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h')

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h
index c6b34fbd695f..94c652f5265a 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h
@@ -173,6 +173,8 @@ static inline int amdgpu_ras_is_supported(struct amdgpu_device *adev,
 {
 	struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
 
+	if (block >= AMDGPU_RAS_BLOCK_COUNT)
+		return 0;
 	return ras && (ras->supported & (1 << block));
 }
 
-- 
cgit 


From 450f30ea9c60228e87cc60647473e2c1eb55df0b Mon Sep 17 00:00:00 2001
From: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Date: Thu, 13 Jun 2019 15:19:19 +0200
Subject: amdgpu: no need to check return value of debugfs_create functions
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

When calling debugfs functions, there is no need to ever check the
return value.  The function can work or not, but the code logic should
never do something different based on this.

Cc: Alex Deucher <alexander.deucher@amd.com>
Cc: "Christian König" <christian.koenig@amd.com>
Cc: "David (ChunMing) Zhou" <David1.Zhou@amd.com>
Cc: David Airlie <airlied@linux.ie>
Cc: Daniel Vetter <daniel@ffwll.ch>
Cc: xinhui pan <xinhui.pan@amd.com>
Cc: Evan Quan <evan.quan@amd.com>
Cc: Feifei Xu <Feifei.Xu@amd.com>
Cc: amd-gfx@lists.freedesktop.org
Cc: dri-devel@lists.freedesktop.org
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c | 51 ++++++++-------------------------
 drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h |  4 +--
 drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c   |  5 +---
 drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c   |  5 +---
 drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c  |  5 +---
 5 files changed, 17 insertions(+), 53 deletions(-)

(limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h')

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
index 011630f62f85..7efc4e07665d 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
@@ -967,40 +967,24 @@ static int amdgpu_ras_sysfs_remove_all(struct amdgpu_device *adev)
 /* sysfs end */
 
 /* debugfs begin */
-static int amdgpu_ras_debugfs_create_ctrl_node(struct amdgpu_device *adev)
+static void amdgpu_ras_debugfs_create_ctrl_node(struct amdgpu_device *adev)
 {
 	struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
 	struct drm_minor *minor = adev->ddev->primary;
-	struct dentry *root = minor->debugfs_root, *dir;
-	struct dentry *ent;
-
-	dir = debugfs_create_dir("ras", root);
-	if (IS_ERR(dir))
-		return -EINVAL;
 
-	con->dir = dir;
-
-	ent = debugfs_create_file("ras_ctrl",
-			S_IWUGO | S_IRUGO, con->dir,
-			adev, &amdgpu_ras_debugfs_ctrl_ops);
-	if (IS_ERR(ent)) {
-		debugfs_remove(con->dir);
-		return -EINVAL;
-	}
-
-	con->ent = ent;
-	return 0;
+	con->dir = debugfs_create_dir("ras", minor->debugfs_root);
+	con->ent = debugfs_create_file("ras_ctrl", S_IWUGO | S_IRUGO, con->dir,
+				       adev, &amdgpu_ras_debugfs_ctrl_ops);
 }
 
-int amdgpu_ras_debugfs_create(struct amdgpu_device *adev,
+void amdgpu_ras_debugfs_create(struct amdgpu_device *adev,
 		struct ras_fs_if *head)
 {
 	struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
 	struct ras_manager *obj = amdgpu_ras_find_obj(adev, &head->head);
-	struct dentry *ent;
 
 	if (!obj || obj->ent)
-		return -EINVAL;
+		return;
 
 	get_obj(obj);
 
@@ -1008,34 +992,25 @@ int amdgpu_ras_debugfs_create(struct amdgpu_device *adev,
 			head->debugfs_name,
 			sizeof(obj->fs_data.debugfs_name));
 
-	ent = debugfs_create_file(obj->fs_data.debugfs_name,
-			S_IWUGO | S_IRUGO, con->dir,
-			obj, &amdgpu_ras_debugfs_ops);
-
-	if (IS_ERR(ent))
-		return -EINVAL;
-
-	obj->ent = ent;
-
-	return 0;
+	obj->ent = debugfs_create_file(obj->fs_data.debugfs_name,
+				       S_IWUGO | S_IRUGO, con->dir, obj,
+				       &amdgpu_ras_debugfs_ops);
 }
 
-int amdgpu_ras_debugfs_remove(struct amdgpu_device *adev,
+void amdgpu_ras_debugfs_remove(struct amdgpu_device *adev,
 		struct ras_common_if *head)
 {
 	struct ras_manager *obj = amdgpu_ras_find_obj(adev, head);
 
 	if (!obj || !obj->ent)
-		return 0;
+		return;
 
 	debugfs_remove(obj->ent);
 	obj->ent = NULL;
 	put_obj(obj);
-
-	return 0;
 }
 
-static int amdgpu_ras_debugfs_remove_all(struct amdgpu_device *adev)
+static void amdgpu_ras_debugfs_remove_all(struct amdgpu_device *adev)
 {
 	struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
 	struct ras_manager *obj, *tmp;
@@ -1048,8 +1023,6 @@ static int amdgpu_ras_debugfs_remove_all(struct amdgpu_device *adev)
 	debugfs_remove(con->dir);
 	con->dir = NULL;
 	con->ent = NULL;
-
-	return 0;
 }
 /* debugfs end */
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h
index 94c652f5265a..b2841195bd3b 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h
@@ -276,10 +276,10 @@ int amdgpu_ras_sysfs_create(struct amdgpu_device *adev,
 int amdgpu_ras_sysfs_remove(struct amdgpu_device *adev,
 		struct ras_common_if *head);
 
-int amdgpu_ras_debugfs_create(struct amdgpu_device *adev,
+void amdgpu_ras_debugfs_create(struct amdgpu_device *adev,
 		struct ras_fs_if *head);
 
-int amdgpu_ras_debugfs_remove(struct amdgpu_device *adev,
+void amdgpu_ras_debugfs_remove(struct amdgpu_device *adev,
 		struct ras_common_if *head);
 
 int amdgpu_ras_error_query(struct amdgpu_device *adev,
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
index ba36a28da2fa..a3f1490f7307 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
@@ -3904,9 +3904,7 @@ static int gfx_v9_0_ecc_late_init(void *handle)
 	if (r)
 		goto interrupt;
 
-	r = amdgpu_ras_debugfs_create(adev, &fs_info);
-	if (r)
-		goto debugfs;
+	amdgpu_ras_debugfs_create(adev, &fs_info);
 
 	r = amdgpu_ras_sysfs_create(adev, &fs_info);
 	if (r)
@@ -3921,7 +3919,6 @@ irq:
 	amdgpu_ras_sysfs_remove(adev, *ras_if);
 sysfs:
 	amdgpu_ras_debugfs_remove(adev, *ras_if);
-debugfs:
 	amdgpu_ras_interrupt_remove_handler(adev, &ih_info);
 interrupt:
 	amdgpu_ras_feature_enable(adev, *ras_if, 0);
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
index 602593bab7a7..3c23de4b2e48 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
@@ -730,9 +730,7 @@ static int gmc_v9_0_ecc_late_init(void *handle)
 	if (r)
 		goto interrupt;
 
-	r = amdgpu_ras_debugfs_create(adev, &fs_info);
-	if (r)
-		goto debugfs;
+	amdgpu_ras_debugfs_create(adev, &fs_info);
 
 	r = amdgpu_ras_sysfs_create(adev, &fs_info);
 	if (r)
@@ -747,7 +745,6 @@ irq:
 	amdgpu_ras_sysfs_remove(adev, *ras_if);
 sysfs:
 	amdgpu_ras_debugfs_remove(adev, *ras_if);
-debugfs:
 	amdgpu_ras_interrupt_remove_handler(adev, &ih_info);
 interrupt:
 	amdgpu_ras_feature_enable(adev, *ras_if, 0);
diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
index 7a259c5b6c62..c0b6011b4bd1 100644
--- a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
@@ -1566,9 +1566,7 @@ static int sdma_v4_0_late_init(void *handle)
 	if (r)
 		goto interrupt;
 
-	r = amdgpu_ras_debugfs_create(adev, &fs_info);
-	if (r)
-		goto debugfs;
+	amdgpu_ras_debugfs_create(adev, &fs_info);
 
 	r = amdgpu_ras_sysfs_create(adev, &fs_info);
 	if (r)
@@ -1589,7 +1587,6 @@ irq:
 	amdgpu_ras_sysfs_remove(adev, *ras_if);
 sysfs:
 	amdgpu_ras_debugfs_remove(adev, *ras_if);
-debugfs:
 	amdgpu_ras_interrupt_remove_handler(adev, &ih_info);
 interrupt:
 	amdgpu_ras_feature_enable(adev, *ras_if, 0);
-- 
cgit