diff options
Diffstat (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h')
| -rw-r--r-- | drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h | 22 | 
1 files changed, 19 insertions, 3 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h index b2667342cf67..6b8d7bb83bb3 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h @@ -31,6 +31,10 @@  #include "ta_ras_if.h"  #include "amdgpu_ras_eeprom.h" +#define AMDGPU_RAS_FLAG_INIT_BY_VBIOS		(0x1 << 0) +#define AMDGPU_RAS_FLAG_INIT_NEED_RESET		(0x1 << 1) +#define AMDGPU_RAS_FLAG_SKIP_BAD_PAGE_RESV	(0x1 << 2) +  enum amdgpu_ras_block {  	AMDGPU_RAS_BLOCK__UMC = 0,  	AMDGPU_RAS_BLOCK__SDMA, @@ -336,6 +340,12 @@ struct amdgpu_ras {  	struct amdgpu_ras_eeprom_control eeprom_control;  	bool error_query_ready; + +	/* bad page count threshold */ +	uint32_t bad_page_cnt_threshold; + +	/* disable ras error count harvest in recovery */ +	bool disable_ras_err_cnt_harvest;  };  struct ras_fs_data { @@ -490,6 +500,8 @@ void amdgpu_ras_suspend(struct amdgpu_device *adev);  unsigned long amdgpu_ras_query_error_count(struct amdgpu_device *adev,  		bool is_ce); +bool amdgpu_ras_check_err_threshold(struct amdgpu_device *adev); +  /* error handling functions */  int amdgpu_ras_add_bad_pages(struct amdgpu_device *adev,  		struct eeprom_table_record *bps, int pages); @@ -500,10 +512,14 @@ static inline int amdgpu_ras_reset_gpu(struct amdgpu_device *adev)  {  	struct amdgpu_ras *ras = amdgpu_ras_get_context(adev); -	/* save bad page to eeprom before gpu reset, -	 * i2c may be unstable in gpu reset +	/* +	 * Save bad page to eeprom before gpu reset, i2c may be unstable +	 * in gpu reset. +	 * +	 * Also, exclude the case when ras recovery issuer is +	 * eeprom page write itself.  	 */ -	if (in_task()) +	if (!(ras->flags & AMDGPU_RAS_FLAG_SKIP_BAD_PAGE_RESV) && in_task())  		amdgpu_ras_reserve_bad_pages(adev);  	if (atomic_cmpxchg(&ras->in_recovery, 0, 1) == 0)  |