diff options
Diffstat (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c')
| -rw-r--r-- | drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c | 394 | 
1 files changed, 313 insertions, 81 deletions
| diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c index 96a8fd0ca1df..08133de21fdd 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c @@ -35,7 +35,11 @@  #include "amdgpu_xgmi.h"  #include "ivsrcid/nbio/irqsrcs_nbif_7_4.h"  #include "atom.h" +#ifdef CONFIG_X86_MCE_AMD +#include <asm/mce.h> +static bool notifier_registered; +#endif  static const char *RAS_FS_NAME = "ras";  const char *ras_error_string[] = { @@ -61,8 +65,30 @@ const char *ras_block_string[] = {  	"mp0",  	"mp1",  	"fuse", +	"mca",  }; +const char *ras_mca_block_string[] = { +	"mca_mp0", +	"mca_mp1", +	"mca_mpio", +	"mca_iohc", +}; + +const char *get_ras_block_str(struct ras_common_if *ras_block) +{ +	if (!ras_block) +		return "NULL"; + +	if (ras_block->block >= AMDGPU_RAS_BLOCK_COUNT) +		return "OUT OF RANGE"; + +	if (ras_block->block == AMDGPU_RAS_BLOCK__MCA) +		return ras_mca_block_string[ras_block->sub_block_index]; + +	return ras_block_string[ras_block->block]; +} +  #define ras_err_str(i) (ras_error_string[ffs(i)])  #define RAS_DEFAULT_FLAGS (AMDGPU_RAS_FLAG_INIT_BY_VBIOS) @@ -85,6 +111,14 @@ static bool amdgpu_ras_check_bad_page_unlock(struct amdgpu_ras *con,  				uint64_t addr);  static bool amdgpu_ras_check_bad_page(struct amdgpu_device *adev,  				uint64_t addr); +#ifdef CONFIG_X86_MCE_AMD +static void amdgpu_register_bad_pages_mca_notifier(struct amdgpu_device *adev); +struct mce_notifier_adev_list { +	struct amdgpu_device *devs[MAX_GPU_INSTANCE]; +	int num_gpu; +}; +static struct mce_notifier_adev_list mce_adev_list; +#endif  void amdgpu_ras_set_error_query_ready(struct amdgpu_device *adev, bool ready)  { @@ -187,7 +221,7 @@ static int amdgpu_ras_find_block_id_by_name(const char *name, int *block_id)  	for (i = 0; i < ARRAY_SIZE(ras_block_string); i++) {  		*block_id = i; -		if (strcmp(name, ras_block_str(i)) == 0) +		if (strcmp(name, ras_block_string[i]) == 0)  			return 0;  	}  	return -EINVAL; @@ -509,7 +543,6 @@ static ssize_t amdgpu_ras_sysfs_read(struct device *dev,  	if (amdgpu_ras_query_error_status(obj->adev, &info))  		return -EINVAL; -  	if (obj->adev->asic_type == CHIP_ALDEBARAN) {  		if (amdgpu_ras_reset_error_status(obj->adev, info.head.block))  			DRM_WARN("Failed to reset error counter and error status"); @@ -529,7 +562,7 @@ static inline void put_obj(struct ras_manager *obj)  	if (obj && (--obj->use == 0))  		list_del(&obj->node);  	if (obj && (obj->use < 0)) -		DRM_ERROR("RAS ERROR: Unbalance obj(%s) use\n", ras_block_str(obj->head.block)); +		DRM_ERROR("RAS ERROR: Unbalance obj(%s) use\n", get_ras_block_str(&obj->head));  }  /* make one obj and return it. */ @@ -545,7 +578,14 @@ static struct ras_manager *amdgpu_ras_create_obj(struct amdgpu_device *adev,  	if (head->block >= AMDGPU_RAS_BLOCK_COUNT)  		return NULL; -	obj = &con->objs[head->block]; +	if (head->block == AMDGPU_RAS_BLOCK__MCA) { +		if (head->sub_block_index >= AMDGPU_RAS_MCA_BLOCK__LAST) +			return NULL; + +		obj = &con->objs[AMDGPU_RAS_BLOCK__LAST + head->sub_block_index]; +	} else +		obj = &con->objs[head->block]; +  	/* already exist. return obj? */  	if (alive_obj(obj))  		return NULL; @@ -573,19 +613,21 @@ struct ras_manager *amdgpu_ras_find_obj(struct amdgpu_device *adev,  		if (head->block >= AMDGPU_RAS_BLOCK_COUNT)  			return NULL; -		obj = &con->objs[head->block]; +		if (head->block == AMDGPU_RAS_BLOCK__MCA) { +			if (head->sub_block_index >= AMDGPU_RAS_MCA_BLOCK__LAST) +				return NULL; + +			obj = &con->objs[AMDGPU_RAS_BLOCK__LAST + head->sub_block_index]; +		} else +			obj = &con->objs[head->block]; -		if (alive_obj(obj)) { -			WARN_ON(head->block != obj->head.block); +		if (alive_obj(obj))  			return obj; -		}  	} else { -		for (i = 0; i < AMDGPU_RAS_BLOCK_COUNT; i++) { +		for (i = 0; i < AMDGPU_RAS_BLOCK_COUNT + AMDGPU_RAS_MCA_BLOCK_COUNT; i++) {  			obj = &con->objs[i]; -			if (alive_obj(obj)) { -				WARN_ON(i != obj->head.block); +			if (alive_obj(obj))  				return obj; -			}  		}  	} @@ -626,8 +668,6 @@ static int __amdgpu_ras_feature_enable(struct amdgpu_device *adev,  	 */  	if (!amdgpu_ras_is_feature_allowed(adev, head))  		return 0; -	if (!(!!enable ^ !!amdgpu_ras_is_feature_enabled(adev, head))) -		return 0;  	if (enable) {  		if (!obj) { @@ -678,19 +718,14 @@ int amdgpu_ras_feature_enable(struct amdgpu_device *adev,  	/* Do not enable if it is not allowed. */  	WARN_ON(enable && !amdgpu_ras_is_feature_allowed(adev, head)); -	/* Are we alerady in that state we are going to set? */ -	if (!(!!enable ^ !!amdgpu_ras_is_feature_enabled(adev, head))) { -		ret = 0; -		goto out; -	}  	if (!amdgpu_ras_intr_triggered()) {  		ret = psp_ras_enable_features(&adev->psp, info, enable);  		if (ret) { -			dev_err(adev->dev, "ras %s %s failed %d\n", +			dev_err(adev->dev, "ras %s %s failed poison:%d ret:%d\n",  				enable ? "enable":"disable", -				ras_block_str(head->block), -				ret); +				get_ras_block_str(head), +				amdgpu_ras_is_poison_mode_supported(adev), ret);  			goto out;  		}  	} @@ -731,7 +766,7 @@ int amdgpu_ras_feature_enable_on_boot(struct amdgpu_device *adev,  				if (!ret)  					dev_info(adev->dev,  						"RAS INFO: %s setup object\n", -						ras_block_str(head->block)); +						get_ras_block_str(head));  			}  		} else {  			/* setup the object then issue a ras TA disable cmd.*/ @@ -781,17 +816,39 @@ static int amdgpu_ras_enable_all_features(struct amdgpu_device *adev,  		bool bypass)  {  	struct amdgpu_ras *con = amdgpu_ras_get_context(adev); -	int ras_block_count = AMDGPU_RAS_BLOCK_COUNT;  	int i; -	const enum amdgpu_ras_error_type default_ras_type = -		AMDGPU_RAS_ERROR__NONE; +	const enum amdgpu_ras_error_type default_ras_type = AMDGPU_RAS_ERROR__NONE; -	for (i = 0; i < ras_block_count; i++) { +	for (i = 0; i < AMDGPU_RAS_BLOCK_COUNT; i++) {  		struct ras_common_if head = {  			.block = i,  			.type = default_ras_type,  			.sub_block_index = 0,  		}; + +		if (i == AMDGPU_RAS_BLOCK__MCA) +			continue; + +		if (bypass) { +			/* +			 * bypass psp. vbios enable ras for us. +			 * so just create the obj +			 */ +			if (__amdgpu_ras_feature_enable(adev, &head, 1)) +				break; +		} else { +			if (amdgpu_ras_feature_enable(adev, &head, 1)) +				break; +		} +	} + +	for (i = 0; i < AMDGPU_RAS_MCA_BLOCK_COUNT; i++) { +		struct ras_common_if head = { +			.block = AMDGPU_RAS_BLOCK__MCA, +			.type = default_ras_type, +			.sub_block_index = i, +		}; +  		if (bypass) {  			/*  			 * bypass psp. vbios enable ras for us. @@ -809,6 +866,32 @@ static int amdgpu_ras_enable_all_features(struct amdgpu_device *adev,  }  /* feature ctl end */ + +void amdgpu_ras_mca_query_error_status(struct amdgpu_device *adev, +				       struct ras_common_if *ras_block, +				       struct ras_err_data  *err_data) +{ +	switch (ras_block->sub_block_index) { +	case AMDGPU_RAS_MCA_BLOCK__MP0: +		if (adev->mca.mp0.ras_funcs && +		    adev->mca.mp0.ras_funcs->query_ras_error_count) +			adev->mca.mp0.ras_funcs->query_ras_error_count(adev, &err_data); +		break; +	case AMDGPU_RAS_MCA_BLOCK__MP1: +		if (adev->mca.mp1.ras_funcs && +		    adev->mca.mp1.ras_funcs->query_ras_error_count) +			adev->mca.mp1.ras_funcs->query_ras_error_count(adev, &err_data); +		break; +	case AMDGPU_RAS_MCA_BLOCK__MPIO: +		if (adev->mca.mpio.ras_funcs && +		    adev->mca.mpio.ras_funcs->query_ras_error_count) +			adev->mca.mpio.ras_funcs->query_ras_error_count(adev, &err_data); +		break; +	default: +		break; +	} +} +  /* query/inject/cure begin */  int amdgpu_ras_query_error_status(struct amdgpu_device *adev,  				  struct ras_query_if *info) @@ -872,6 +955,9 @@ int amdgpu_ras_query_error_status(struct amdgpu_device *adev,  		    adev->hdp.ras_funcs->query_ras_error_count)  			adev->hdp.ras_funcs->query_ras_error_count(adev, &err_data);  		break; +	case AMDGPU_RAS_BLOCK__MCA: +		amdgpu_ras_mca_query_error_status(adev, &info->head, &err_data); +		break;  	default:  		break;  	} @@ -893,13 +979,13 @@ int amdgpu_ras_query_error_status(struct amdgpu_device *adev,  					adev->smuio.funcs->get_socket_id(adev),  					adev->smuio.funcs->get_die_id(adev),  					obj->err_data.ce_count, -					ras_block_str(info->head.block)); +					get_ras_block_str(&info->head));  		} else {  			dev_info(adev->dev, "%ld correctable hardware errors "  					"detected in %s block, no user "  					"action is needed.\n",  					obj->err_data.ce_count, -					ras_block_str(info->head.block)); +					get_ras_block_str(&info->head));  		}  	}  	if (err_data.ue_count) { @@ -912,15 +998,18 @@ int amdgpu_ras_query_error_status(struct amdgpu_device *adev,  					adev->smuio.funcs->get_socket_id(adev),  					adev->smuio.funcs->get_die_id(adev),  					obj->err_data.ue_count, -					ras_block_str(info->head.block)); +					get_ras_block_str(&info->head));  		} else {  			dev_info(adev->dev, "%ld uncorrectable hardware errors "  					"detected in %s block\n",  					obj->err_data.ue_count, -					ras_block_str(info->head.block)); +					get_ras_block_str(&info->head));  		}  	} +	if (!amdgpu_persistent_edc_harvesting_supported(adev)) +		amdgpu_ras_reset_error_status(adev, info->head.block); +  	return 0;  } @@ -1027,6 +1116,7 @@ int amdgpu_ras_error_inject(struct amdgpu_device *adev,  	case AMDGPU_RAS_BLOCK__SDMA:  	case AMDGPU_RAS_BLOCK__MMHUB:  	case AMDGPU_RAS_BLOCK__PCIE_BIF: +	case AMDGPU_RAS_BLOCK__MCA:  		ret = psp_ras_trigger_error(&adev->psp, &block_info);  		break;  	case AMDGPU_RAS_BLOCK__XGMI_WAFL: @@ -1034,13 +1124,13 @@ int amdgpu_ras_error_inject(struct amdgpu_device *adev,  		break;  	default:  		dev_info(adev->dev, "%s error injection is not supported yet\n", -			 ras_block_str(info->head.block)); +			 get_ras_block_str(&info->head));  		ret = -EINVAL;  	}  	if (ret)  		dev_err(adev->dev, "ras inject %s failed %d\n", -			ras_block_str(info->head.block), ret); +			get_ras_block_str(&info->head), ret);  	return ret;  } @@ -1383,7 +1473,7 @@ void amdgpu_ras_debugfs_create_all(struct amdgpu_device *adev)  		if (amdgpu_ras_is_supported(adev, obj->head.block) &&  			(obj->attr_inuse == 1)) {  			sprintf(fs_info.debugfs_name, "%s_err_inject", -					ras_block_str(obj->head.block)); +					get_ras_block_str(&obj->head));  			fs_info.head = obj->head;  			amdgpu_ras_debugfs_create(adev, &fs_info, dir);  		} @@ -1469,22 +1559,28 @@ static void amdgpu_ras_interrupt_handler(struct ras_manager *obj)  		data->rptr = (data->aligned_element_size +  				data->rptr) % data->ring_size; -		/* Let IP handle its data, maybe we need get the output -		 * from the callback to udpate the error type/count, etc -		 */  		if (data->cb) { -			ret = data->cb(obj->adev, &err_data, &entry); -			/* ue will trigger an interrupt, and in that case -			 * we need do a reset to recovery the whole system. -			 * But leave IP do that recovery, here we just dispatch -			 * the error. -			 */ -			if (ret == AMDGPU_RAS_SUCCESS) { -				/* these counts could be left as 0 if -				 * some blocks do not count error number +			if (amdgpu_ras_is_poison_mode_supported(obj->adev) && +			    obj->head.block == AMDGPU_RAS_BLOCK__UMC) +				dev_info(obj->adev->dev, +						"Poison is created, no user action is needed.\n"); +			else { +				/* Let IP handle its data, maybe we need get the output +				 * from the callback to udpate the error type/count, etc +				 */ +				ret = data->cb(obj->adev, &err_data, &entry); +				/* ue will trigger an interrupt, and in that case +				 * we need do a reset to recovery the whole system. +				 * But leave IP do that recovery, here we just dispatch +				 * the error.  				 */ -				obj->err_data.ue_count += err_data.ue_count; -				obj->err_data.ce_count += err_data.ce_count; +				if (ret == AMDGPU_RAS_SUCCESS) { +					/* these counts could be left as 0 if +					 * some blocks do not count error number +					 */ +					obj->err_data.ue_count += err_data.ue_count; +					obj->err_data.ce_count += err_data.ce_count; +				}  			}  		}  	} @@ -2014,6 +2110,11 @@ int amdgpu_ras_recovery_init(struct amdgpu_device *adev)  			adev->smu.ppt_funcs->send_hbm_bad_pages_num(&adev->smu, con->eeprom_control.ras_num_recs);  	} +#ifdef CONFIG_X86_MCE_AMD +	if ((adev->asic_type == CHIP_ALDEBARAN) && +	    (adev->gmc.xgmi.connected_to_cpu)) +		amdgpu_register_bad_pages_mca_notifier(adev); +#endif  	return 0;  free: @@ -2056,19 +2157,6 @@ static int amdgpu_ras_recovery_fini(struct amdgpu_device *adev)  }  /* recovery end */ -/* return 0 if ras will reset gpu and repost.*/ -int amdgpu_ras_request_reset_on_boot(struct amdgpu_device *adev, -		unsigned int block) -{ -	struct amdgpu_ras *ras = amdgpu_ras_get_context(adev); - -	if (!ras) -		return -EINVAL; - -	ras->flags |= AMDGPU_RAS_FLAG_INIT_NEED_RESET; -	return 0; -} -  static bool amdgpu_ras_asic_supported(struct amdgpu_device *adev)  {  	return adev->asic_type == CHIP_VEGA10 || @@ -2176,12 +2264,14 @@ int amdgpu_ras_init(struct amdgpu_device *adev)  {  	struct amdgpu_ras *con = amdgpu_ras_get_context(adev);  	int r; +	bool df_poison, umc_poison;  	if (con)  		return 0;  	con = kmalloc(sizeof(struct amdgpu_ras) + -			sizeof(struct ras_manager) * AMDGPU_RAS_BLOCK_COUNT, +			sizeof(struct ras_manager) * AMDGPU_RAS_BLOCK_COUNT + +			sizeof(struct ras_manager) * AMDGPU_RAS_MCA_BLOCK_COUNT,  			GFP_KERNEL|__GFP_ZERO);  	if (!con)  		return -ENOMEM; @@ -2245,6 +2335,23 @@ int amdgpu_ras_init(struct amdgpu_device *adev)  			goto release_con;  	} +	/* Init poison supported flag, the default value is false */ +	if (adev->df.funcs && +	    adev->df.funcs->query_ras_poison_mode && +	    adev->umc.ras_funcs && +	    adev->umc.ras_funcs->query_ras_poison_mode) { +		df_poison = +			adev->df.funcs->query_ras_poison_mode(adev); +		umc_poison = +			adev->umc.ras_funcs->query_ras_poison_mode(adev); +		/* Only poison is set in both DF and UMC, we can support it */ +		if (df_poison && umc_poison) +			con->poison_supported = true; +		else if (df_poison != umc_poison) +			dev_warn(adev->dev, "Poison setting is inconsistent in DF/UMC(%d:%d)!\n", +					df_poison, umc_poison); +	} +  	if (amdgpu_ras_fs_init(adev)) {  		r = -EINVAL;  		goto release_con; @@ -2288,6 +2395,16 @@ static int amdgpu_persistent_edc_harvesting(struct amdgpu_device *adev,  	return 0;  } +bool amdgpu_ras_is_poison_mode_supported(struct amdgpu_device *adev) +{ +       struct amdgpu_ras *con = amdgpu_ras_get_context(adev); + +       if (!con) +               return false; + +       return con->poison_supported; +} +  /* helper function to handle common stuff in ip late init phase */  int amdgpu_ras_late_init(struct amdgpu_device *adev,  			 struct ras_common_if *ras_block, @@ -2306,12 +2423,7 @@ int amdgpu_ras_late_init(struct amdgpu_device *adev,  	r = amdgpu_ras_feature_enable_on_boot(adev, ras_block, 1);  	if (r) { -		if (r == -EAGAIN) { -			/* request gpu reset. will run again */ -			amdgpu_ras_request_reset_on_boot(adev, -					ras_block->block); -			return 0; -		} else if (adev->in_suspend || amdgpu_in_reset(adev)) { +		if (adev->in_suspend || amdgpu_in_reset(adev)) {  			/* in resume phase, if fail to enable ras,  			 * clean up all ras fs nodes, and disable ras */  			goto cleanup; @@ -2403,19 +2515,6 @@ void amdgpu_ras_resume(struct amdgpu_device *adev)  			}  		}  	} - -	if (con->flags & AMDGPU_RAS_FLAG_INIT_NEED_RESET) { -		con->flags &= ~AMDGPU_RAS_FLAG_INIT_NEED_RESET; -		/* setup ras obj state as disabled. -		 * for init_by_vbios case. -		 * if we want to enable ras, just enable it in a normal way. -		 * If we want do disable it, need setup ras obj as enabled, -		 * then issue another TA disable cmd. -		 * See feature_enable_on_boot -		 */ -		amdgpu_ras_disable_all_features(adev, 1); -		amdgpu_ras_reset_gpu(adev); -	}  }  void amdgpu_ras_suspend(struct amdgpu_device *adev) @@ -2507,3 +2606,136 @@ void amdgpu_release_ras_context(struct amdgpu_device *adev)  		kfree(con);  	}  } + +#ifdef CONFIG_X86_MCE_AMD +static struct amdgpu_device *find_adev(uint32_t node_id) +{ +	int i; +	struct amdgpu_device *adev = NULL; + +	for (i = 0; i < mce_adev_list.num_gpu; i++) { +		adev = mce_adev_list.devs[i]; + +		if (adev && adev->gmc.xgmi.connected_to_cpu && +		    adev->gmc.xgmi.physical_node_id == node_id) +			break; +		adev = NULL; +	} + +	return adev; +} + +#define GET_MCA_IPID_GPUID(m)	(((m) >> 44) & 0xF) +#define GET_UMC_INST(m)		(((m) >> 21) & 0x7) +#define GET_CHAN_INDEX(m)	((((m) >> 12) & 0x3) | (((m) >> 18) & 0x4)) +#define GPU_ID_OFFSET		8 + +static int amdgpu_bad_page_notifier(struct notifier_block *nb, +				    unsigned long val, void *data) +{ +	struct mce *m = (struct mce *)data; +	struct amdgpu_device *adev = NULL; +	uint32_t gpu_id = 0; +	uint32_t umc_inst = 0; +	uint32_t ch_inst, channel_index = 0; +	struct ras_err_data err_data = {0, 0, 0, NULL}; +	struct eeprom_table_record err_rec; +	uint64_t retired_page; + +	/* +	 * If the error was generated in UMC_V2, which belongs to GPU UMCs, +	 * and error occurred in DramECC (Extended error code = 0) then only +	 * process the error, else bail out. +	 */ +	if (!m || !((smca_get_bank_type(m->bank) == SMCA_UMC_V2) && +		    (XEC(m->status, 0x3f) == 0x0))) +		return NOTIFY_DONE; + +	/* +	 * If it is correctable error, return. +	 */ +	if (mce_is_correctable(m)) +		return NOTIFY_OK; + +	/* +	 * GPU Id is offset by GPU_ID_OFFSET in MCA_IPID_UMC register. +	 */ +	gpu_id = GET_MCA_IPID_GPUID(m->ipid) - GPU_ID_OFFSET; + +	adev = find_adev(gpu_id); +	if (!adev) { +		DRM_WARN("%s: Unable to find adev for gpu_id: %d\n", __func__, +								gpu_id); +		return NOTIFY_DONE; +	} + +	/* +	 * If it is uncorrectable error, then find out UMC instance and +	 * channel index. +	 */ +	umc_inst = GET_UMC_INST(m->ipid); +	ch_inst = GET_CHAN_INDEX(m->ipid); + +	dev_info(adev->dev, "Uncorrectable error detected in UMC inst: %d, chan_idx: %d", +			     umc_inst, ch_inst); + +	memset(&err_rec, 0x0, sizeof(struct eeprom_table_record)); + +	/* +	 * Translate UMC channel address to Physical address +	 */ +	channel_index = +		adev->umc.channel_idx_tbl[umc_inst * adev->umc.channel_inst_num +					  + ch_inst]; + +	retired_page = ADDR_OF_8KB_BLOCK(m->addr) | +			ADDR_OF_256B_BLOCK(channel_index) | +			OFFSET_IN_256B_BLOCK(m->addr); + +	err_rec.address = m->addr; +	err_rec.retired_page = retired_page >> AMDGPU_GPU_PAGE_SHIFT; +	err_rec.ts = (uint64_t)ktime_get_real_seconds(); +	err_rec.err_type = AMDGPU_RAS_EEPROM_ERR_NON_RECOVERABLE; +	err_rec.cu = 0; +	err_rec.mem_channel = channel_index; +	err_rec.mcumc_id = umc_inst; + +	err_data.err_addr = &err_rec; +	err_data.err_addr_cnt = 1; + +	if (amdgpu_bad_page_threshold != 0) { +		amdgpu_ras_add_bad_pages(adev, err_data.err_addr, +						err_data.err_addr_cnt); +		amdgpu_ras_save_bad_pages(adev); +	} + +	return NOTIFY_OK; +} + +static struct notifier_block amdgpu_bad_page_nb = { +	.notifier_call  = amdgpu_bad_page_notifier, +	.priority       = MCE_PRIO_UC, +}; + +static void amdgpu_register_bad_pages_mca_notifier(struct amdgpu_device *adev) +{ +	/* +	 * Add the adev to the mce_adev_list. +	 * During mode2 reset, amdgpu device is temporarily +	 * removed from the mgpu_info list which can cause +	 * page retirement to fail. +	 * Use this list instead of mgpu_info to find the amdgpu +	 * device on which the UMC error was reported. +	 */ +	mce_adev_list.devs[mce_adev_list.num_gpu++] = adev; + +	/* +	 * Register the x86 notifier only once +	 * with MCE subsystem. +	 */ +	if (notifier_registered == false) { +		mce_register_decode_chain(&amdgpu_bad_page_nb); +		notifier_registered = true; +	} +} +#endif |