diff options
Diffstat (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c')
| -rw-r--r-- | drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c | 102 | 
1 files changed, 58 insertions, 44 deletions
| diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c index 8aaa427f8c0f..7689395e44fd 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c @@ -35,6 +35,7 @@  #include "amdgpu_xgmi.h"  #include "ivsrcid/nbio/irqsrcs_nbif_7_4.h"  #include "nbio_v4_3.h" +#include "nbio_v7_9.h"  #include "atom.h"  #include "amdgpu_reset.h" @@ -757,16 +758,6 @@ static int __amdgpu_ras_feature_enable(struct amdgpu_device *adev,  	return 0;  } -static int amdgpu_ras_check_feature_allowed(struct amdgpu_device *adev, -		struct ras_common_if *head) -{ -	if (amdgpu_ras_is_feature_allowed(adev, head) || -		amdgpu_ras_is_poison_mode_supported(adev)) -		return 1; -	else -		return 0; -} -  /* wrapper of psp_ras_enable_features */  int amdgpu_ras_feature_enable(struct amdgpu_device *adev,  		struct ras_common_if *head, bool enable) @@ -778,7 +769,16 @@ int amdgpu_ras_feature_enable(struct amdgpu_device *adev,  	if (!con)  		return -EINVAL; -	if (head->block == AMDGPU_RAS_BLOCK__GFX) { +	/* Do not enable ras feature if it is not allowed */ +	if (enable && +	    head->block != AMDGPU_RAS_BLOCK__GFX && +	    !amdgpu_ras_is_feature_allowed(adev, head)) +		goto out; + +	/* Only enable gfx ras feature from host side */ +	if (head->block == AMDGPU_RAS_BLOCK__GFX && +	    !amdgpu_sriov_vf(adev) && +	    !amdgpu_ras_intr_triggered()) {  		info = kzalloc(sizeof(union ta_ras_cmd_input), GFP_KERNEL);  		if (!info)  			return -ENOMEM; @@ -794,16 +794,7 @@ int amdgpu_ras_feature_enable(struct amdgpu_device *adev,  				.error_type = amdgpu_ras_error_to_ta(head->type),  			};  		} -	} -	/* Do not enable if it is not allowed. */ -	if (enable && !amdgpu_ras_check_feature_allowed(adev, head)) -		goto out; - -	/* Only enable ras feature operation handle on host side */ -	if (head->block == AMDGPU_RAS_BLOCK__GFX && -		!amdgpu_sriov_vf(adev) && -		!amdgpu_ras_intr_triggered()) {  		ret = psp_ras_enable_features(&adev->psp, info, enable);  		if (ret) {  			dev_err(adev->dev, "ras %s %s failed poison:%d ret:%d\n", @@ -1159,7 +1150,8 @@ int amdgpu_ras_error_inject(struct amdgpu_device *adev,  	}  	/* Calculate XGMI relative offset */ -	if (adev->gmc.xgmi.num_physical_nodes > 1) { +	if (adev->gmc.xgmi.num_physical_nodes > 1 && +	    info->head.block != AMDGPU_RAS_BLOCK__GFX) {  		block_info.address =  			amdgpu_xgmi_get_relative_phy_addr(adev,  							  block_info.address); @@ -2072,6 +2064,8 @@ static void amdgpu_ras_do_recovery(struct work_struct *work)  			if (ras->gpu_reset_flags & AMDGPU_RAS_GPU_RESET_MODE1_RESET) {  				ras->gpu_reset_flags &= ~AMDGPU_RAS_GPU_RESET_MODE1_RESET;  				set_bit(AMDGPU_NEED_FULL_RESET, &reset_context.flags); + +				psp_fatal_error_recovery_quirk(&adev->psp);  			}  		} @@ -2414,6 +2408,7 @@ static bool amdgpu_ras_asic_supported(struct amdgpu_device *adev)  	if (adev->asic_type == CHIP_IP_DISCOVERY) {  		switch (adev->ip_versions[MP0_HWIP][0]) {  		case IP_VERSION(13, 0, 0): +		case IP_VERSION(13, 0, 6):  		case IP_VERSION(13, 0, 10):  			return true;  		default: @@ -2440,10 +2435,10 @@ static void amdgpu_ras_get_quirks(struct amdgpu_device *adev)  	if (!ctx)  		return; -	if (strnstr(ctx->vbios_version, "D16406", -		    sizeof(ctx->vbios_version)) || -		strnstr(ctx->vbios_version, "D36002", -			sizeof(ctx->vbios_version))) +	if (strnstr(ctx->vbios_pn, "D16406", +		    sizeof(ctx->vbios_pn)) || +		strnstr(ctx->vbios_pn, "D36002", +			sizeof(ctx->vbios_pn)))  		adev->ras_hw_enabled |= (1 << AMDGPU_RAS_BLOCK__GFX);  } @@ -2515,8 +2510,18 @@ static void amdgpu_ras_check_supported(struct amdgpu_device *adev)  	/* hw_supported needs to be aligned with RAS block mask. */  	adev->ras_hw_enabled &= AMDGPU_RAS_BLOCK_MASK; -	adev->ras_enabled = amdgpu_ras_enable == 0 ? 0 : -		adev->ras_hw_enabled & amdgpu_ras_mask; + +	/* +	 * Disable ras feature for aqua vanjaram +	 * by default on apu platform. +	 */ +	if (adev->ip_versions[MP0_HWIP][0] == IP_VERSION(13, 0, 6) && +	    adev->gmc.is_app_apu) +		adev->ras_enabled = amdgpu_ras_enable != 1 ? 0 : +			adev->ras_hw_enabled & amdgpu_ras_mask; +	else +		adev->ras_enabled = amdgpu_ras_enable == 0 ? 0 : +			adev->ras_hw_enabled & amdgpu_ras_mask;  }  static void amdgpu_ras_counte_dw(struct work_struct *work) @@ -2642,6 +2647,10 @@ int amdgpu_ras_init(struct amdgpu_device *adev)  			 * check DF RAS */  			adev->nbio.ras = &nbio_v4_3_ras;  		break; +	case IP_VERSION(7, 9, 0): +		if (!adev->gmc.is_app_apu) +			adev->nbio.ras = &nbio_v7_9_ras; +		break;  	default:  		/* nbio ras is not available */  		break; @@ -2765,23 +2774,28 @@ int amdgpu_ras_block_late_init(struct amdgpu_device *adev,  			goto cleanup;  	} -	r = amdgpu_ras_sysfs_create(adev, ras_block); -	if (r) -		goto interrupt; +	if (ras_obj->hw_ops && +	    (ras_obj->hw_ops->query_ras_error_count || +	     ras_obj->hw_ops->query_ras_error_status)) { +		r = amdgpu_ras_sysfs_create(adev, ras_block); +		if (r) +			goto interrupt; -	/* Those are the cached values at init. -	 */ -	query_info = kzalloc(sizeof(struct ras_query_if), GFP_KERNEL); -	if (!query_info) -		return -ENOMEM; -	memcpy(&query_info->head, ras_block, sizeof(struct ras_common_if)); +		/* Those are the cached values at init. +		 */ +		query_info = kzalloc(sizeof(*query_info), GFP_KERNEL); +		if (!query_info) +			return -ENOMEM; +		memcpy(&query_info->head, ras_block, sizeof(struct ras_common_if)); -	if (amdgpu_ras_query_error_count(adev, &ce_count, &ue_count, query_info) == 0) { -		atomic_set(&con->ras_ce_count, ce_count); -		atomic_set(&con->ras_ue_count, ue_count); +		if (amdgpu_ras_query_error_count(adev, &ce_count, &ue_count, query_info) == 0) { +			atomic_set(&con->ras_ce_count, ce_count); +			atomic_set(&con->ras_ue_count, ue_count); +		} + +		kfree(query_info);  	} -	kfree(query_info);  	return 0;  interrupt: @@ -2958,10 +2972,6 @@ int amdgpu_ras_fini(struct amdgpu_device *adev)  void amdgpu_ras_global_ras_isr(struct amdgpu_device *adev)  { -	amdgpu_ras_check_supported(adev); -	if (!adev->ras_hw_enabled) -		return; -  	if (atomic_cmpxchg(&amdgpu_ras_in_intr, 0, 1) == 0) {  		struct amdgpu_ras *ras = amdgpu_ras_get_context(adev); @@ -3136,6 +3146,10 @@ int amdgpu_ras_is_supported(struct amdgpu_device *adev,  	 * that the ras block supports ras function.  	 */  	if (!ret && +	    (block == AMDGPU_RAS_BLOCK__GFX || +	     block == AMDGPU_RAS_BLOCK__SDMA || +	     block == AMDGPU_RAS_BLOCK__VCN || +	     block == AMDGPU_RAS_BLOCK__JPEG) &&  	    amdgpu_ras_is_poison_mode_supported(adev) &&  	    amdgpu_ras_get_ras_block(adev, block, 0))  		ret = 1; |