aboutsummaryrefslogtreecommitdiff
path: root/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h
diff options
context:
space:
mode:
authorDave Airlie <airlied@redhat.com>2024-04-30 14:42:54 +1000
committerDave Airlie <airlied@redhat.com>2024-04-30 14:43:00 +1000
commit4a56c0ed5aa0bcbe1f5f7d755fb1fe1ebf48ae9c (patch)
treeec7d1f08d654ef4b6e596c891eadb504df2f611e /drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h
parent68b89e23c2282877b0d411e07a3ef90490d6fe30 (diff)
parentb77bef36015c501f1e0f51db72c55e6dcd8bdd48 (diff)
Merge tag 'amd-drm-next-6.10-2024-04-26' of https://gitlab.freedesktop.org/agd5f/linux into drm-next
amd-drm-next-6.10-2024-04-26: amdgpu: - Misc code cleanups and refactors - Support setting reset method at runtime - Report OD status - SMU 14.0.1 fixes - SDMA 4.4.2 fixes - VPE fixes - MES fixes - Update BO eviction priorities - UMSCH fixes - Reset fixes - Freesync fixes - GFXIP 9.4.3 fixes - SDMA 5.2 fixes - MES UAF fix - RAS updates - Devcoredump updates for dumping IP state - DSC fixes - JPEG fix - Fix VRAM memory accounting - VCN 5.0 fixes - MES fixes - UMC 12.0 updates - Modify contiguous flags handling - Initial support for mapping kernel queues via MES amdkfd: - Fix rescheduling of restore worker - VRAM accounting for SVM migrations - mGPU fix - Enable SQ watchpoint for gfx10 Signed-off-by: Dave Airlie <airlied@redhat.com> From: Alex Deucher <alexander.deucher@amd.com> Link: https://patchwork.freedesktop.org/patch/msgid/20240426221245.1613332-1-alexander.deucher@amd.com
Diffstat (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h')
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h19
1 files changed, 18 insertions, 1 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h
index 563b0249247e..5f50c69c3cec 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h
@@ -52,6 +52,8 @@
#define LOOP_UMC_EACH_NODE_INST_AND_CH(node_inst, umc_inst, ch_inst) \
LOOP_UMC_NODE_INST((node_inst)) LOOP_UMC_INST_AND_CH((umc_inst), (ch_inst))
+/* Page retirement tag */
+#define UMC_ECC_NEW_DETECTED_TAG 0x1
typedef int (*umc_func)(struct amdgpu_device *adev, uint32_t node_inst,
uint32_t umc_inst, uint32_t ch_inst, void *data);
@@ -66,6 +68,8 @@ struct amdgpu_umc_ras {
void *ras_error_status);
bool (*check_ecc_err_status)(struct amdgpu_device *adev,
enum amdgpu_mca_error_type type, void *ras_error_status);
+ int (*update_ecc_status)(struct amdgpu_device *adev,
+ uint64_t status, uint64_t ipid, uint64_t addr);
};
struct amdgpu_umc_funcs {
@@ -102,10 +106,13 @@ int amdgpu_umc_ras_sw_init(struct amdgpu_device *adev);
int amdgpu_umc_ras_late_init(struct amdgpu_device *adev, struct ras_common_if *ras_block);
int amdgpu_umc_poison_handler(struct amdgpu_device *adev,
enum amdgpu_ras_block block, uint32_t reset);
+int amdgpu_umc_pasid_poison_handler(struct amdgpu_device *adev,
+ enum amdgpu_ras_block block, uint16_t pasid,
+ pasid_notify pasid_fn, void *data, uint32_t reset);
int amdgpu_umc_process_ecc_irq(struct amdgpu_device *adev,
struct amdgpu_irq_src *source,
struct amdgpu_iv_entry *entry);
-void amdgpu_umc_fill_error_record(struct ras_err_data *err_data,
+int amdgpu_umc_fill_error_record(struct ras_err_data *err_data,
uint64_t err_addr,
uint64_t retired_page,
uint32_t channel_index,
@@ -122,4 +129,14 @@ int amdgpu_umc_loop_channels(struct amdgpu_device *adev,
int amdgpu_umc_bad_page_polling_timeout(struct amdgpu_device *adev,
uint32_t reset, uint32_t timeout_ms);
+
+int amdgpu_umc_update_ecc_status(struct amdgpu_device *adev,
+ uint64_t status, uint64_t ipid, uint64_t addr);
+int amdgpu_umc_build_pages_hash(struct amdgpu_device *adev,
+ uint64_t *pfns, int len, uint64_t *val);
+int amdgpu_umc_logs_ecc_err(struct amdgpu_device *adev,
+ struct radix_tree_root *ecc_tree, struct ras_ecc_err *ecc_err);
+
+void amdgpu_umc_handle_bad_pages(struct amdgpu_device *adev,
+ void *ras_error_status);
#endif