aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorGraham Sider <Graham.Sider@amd.com>2023-02-06 14:04:42 -0500
committerAlex Deucher <alexander.deucher@amd.com>2023-06-09 09:59:03 -0400
commit895797d9193b38e759bc01268a8e3887e521f682 (patch)
tree73797e8e8d06b8cbe881acb0ecd593d5ff85a805
parent2e8cc5d317d12f7fb4f66361a3ce5427f0abe2cd (diff)
drm/amdgpu/bu: Add use_mtype_cc_wa module param
By default, set use_mtype_cc_wa to 1 to set PTE coherence flag MTYPE_CC instead of MTYPE_RW by default. This is required for the time being to mitigate a bug causing XCCs to hit stale data due to TCC marking fully dirty lines as exclusive. Signed-off-by: Graham Sider <Graham.Sider@amd.com> Reviewed-by: Joseph Greathouse <Joseph.Greathouse@amd.com> Reviewed-by: Felix Kuehling <Felix.Kuehling@amd.com> Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu.h1
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c7
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c10
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_svm.c7
4 files changed, 20 insertions, 5 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index cb9373f8c25a..cd2a29a7e26d 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -212,6 +212,7 @@ extern int amdgpu_noretry;
extern int amdgpu_force_asic_type;
extern int amdgpu_smartshift_bias;
extern int amdgpu_use_xgmi_p2p;
+extern bool amdgpu_use_mtype_cc_wa;
#ifdef CONFIG_HSA_AMD
extern int sched_policy;
extern bool debug_evictions;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
index da4e50aef95a..8bc37826a99f 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
@@ -823,6 +823,13 @@ module_param_named(no_queue_eviction_on_vm_fault, amdgpu_no_queue_eviction_on_vm
#endif
/**
+ * DOC: use_mtype_cc_wa (bool)
+ */
+bool amdgpu_use_mtype_cc_wa = true;
+MODULE_PARM_DESC(use_mtype_cc_wa, "Use MTYPE_CC workaround (0 = use MTYPE_RW where applicable, 1 = use MTYPE_CC where applicable (default))");
+module_param_named(use_mtype_cc_wa, amdgpu_use_mtype_cc_wa, bool, 0444);
+
+/**
* DOC: pcie_p2p (bool)
* Enable PCIe P2P (requires large-BAR). Default value: true (on)
*/
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
index 2eb67b53e497..8623b93c05ee 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
@@ -1187,6 +1187,7 @@ static void gmc_v9_0_get_coherence_flags(struct amdgpu_device *adev,
bool coherent = bo->flags & AMDGPU_GEM_CREATE_COHERENT;
bool uncached = bo->flags & AMDGPU_GEM_CREATE_UNCACHED;
unsigned int mtype;
+ unsigned int mtype_default;
bool snoop = false;
switch (adev->ip_versions[GC_HWIP][0]) {
@@ -1230,7 +1231,10 @@ static void gmc_v9_0_get_coherence_flags(struct amdgpu_device *adev,
/* FIXME: Needs more work for handling multiple memory
* partitions (> NPS1 mode) e.g. NPS4 for both APU and dGPU
* modes.
+ * FIXME: Temporarily using MTYPE_CC instead of MTYPE_RW where applicable.
+ * To force use of MTYPE_RW, set use_mtype_cc_wa=0
*/
+ mtype_default = amdgpu_use_mtype_cc_wa ? MTYPE_CC : MTYPE_RW;
snoop = true;
if (uncached) {
mtype = MTYPE_UC;
@@ -1245,14 +1249,14 @@ static void gmc_v9_0_get_coherence_flags(struct amdgpu_device *adev,
* socket should be treated as remote access so MTYPE_RW
* cannot be used always.
*/
- mtype = MTYPE_RW;
+ mtype = mtype_default;
} else if (adev->flags & AMD_IS_APU) {
/* APU on carve out mode */
- mtype = MTYPE_RW;
+ mtype = mtype_default;
} else {
/* dGPU */
if (is_vram && bo_adev == adev)
- mtype = MTYPE_RW;
+ mtype = mtype_default;
else if (is_vram)
mtype = MTYPE_NC;
else
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
index 2b2129dd1e4a..477ef9294203 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
@@ -1198,9 +1198,12 @@ svm_range_get_pte_flags(struct kfd_node *node,
if (uncached) {
mapping_flags |= AMDGPU_VM_MTYPE_UC;
} else if (domain == SVM_RANGE_VRAM_DOMAIN) {
- /* local HBM region close to partition */
+ /* local HBM region close to partition
+ * FIXME: Temporarily using MTYPE_CC instead of MTYPE_RW where applicable.
+ * To force use of MTYPE_RW, set use_mtype_cc_wa=0
+ */
if (bo_node == node)
- mapping_flags |= AMDGPU_VM_MTYPE_RW;
+ mapping_flags |= amdgpu_use_mtype_cc_wa ? AMDGPU_VM_MTYPE_CC : AMDGPU_VM_MTYPE_RW;
/* local HBM region far from partition or remote XGMI GPU */
else if (svm_nodes_in_same_hive(bo_node, node))
mapping_flags |= AMDGPU_VM_MTYPE_NC;