diff options
Diffstat (limited to 'drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c')
| -rw-r--r-- | drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c | 67 | 
1 files changed, 36 insertions, 31 deletions
| diff --git a/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c b/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c index 0c7275bca8f7..f5411b798e11 100644 --- a/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c +++ b/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c @@ -93,7 +93,7 @@ static int xgpu_ai_poll_ack(struct amdgpu_device *adev)  		timeout -= 5;  	} while (timeout > 1); -	pr_err("Doesn't get TRN_MSG_ACK from pf in %d msec\n", AI_MAILBOX_POLL_ACK_TIMEDOUT); +	dev_err(adev->dev, "Doesn't get TRN_MSG_ACK from pf in %d msec\n", AI_MAILBOX_POLL_ACK_TIMEDOUT);  	return -ETIME;  } @@ -111,7 +111,7 @@ static int xgpu_ai_poll_msg(struct amdgpu_device *adev, enum idh_event event)  		timeout -= 10;  	} while (timeout > 1); -	pr_err("Doesn't get msg:%d from pf, error=%d\n", event, r); +	dev_err(adev->dev, "Doesn't get msg:%d from pf, error=%d\n", event, r);  	return -ETIME;  } @@ -132,7 +132,7 @@ static void xgpu_ai_mailbox_trans_msg (struct amdgpu_device *adev,  		xgpu_ai_mailbox_set_valid(adev, false);  		trn = xgpu_ai_peek_ack(adev);  		if (trn) { -			pr_err("trn=%x ACK should not assert! wait again !\n", trn); +			dev_err_ratelimited(adev->dev, "trn=%x ACK should not assert! wait again !\n", trn);  			msleep(1);  		}  	} while(trn); @@ -155,7 +155,7 @@ static void xgpu_ai_mailbox_trans_msg (struct amdgpu_device *adev,  	/* start to poll ack */  	r = xgpu_ai_poll_ack(adev);  	if (r) -		pr_err("Doesn't get ack from pf, continue\n"); +		dev_err(adev->dev, "Doesn't get ack from pf, continue\n");  	xgpu_ai_mailbox_set_valid(adev, false);  } @@ -173,7 +173,7 @@ static int xgpu_ai_send_access_requests(struct amdgpu_device *adev,  		req == IDH_REQ_GPU_RESET_ACCESS) {  		r = xgpu_ai_poll_msg(adev, IDH_READY_TO_ACCESS_GPU);  		if (r) { -			pr_err("Doesn't get READY_TO_ACCESS_GPU from pf, give up\n"); +			dev_err(adev->dev, "Doesn't get READY_TO_ACCESS_GPU from pf, give up\n");  			return r;  		}  		/* Retrieve checksum from mailbox2 */ @@ -231,7 +231,7 @@ static int xgpu_ai_mailbox_ack_irq(struct amdgpu_device *adev,  					struct amdgpu_irq_src *source,  					struct amdgpu_iv_entry *entry)  { -	DRM_DEBUG("get ack intr and do nothing.\n"); +	dev_dbg(adev->dev, "get ack intr and do nothing.\n");  	return 0;  } @@ -249,38 +249,33 @@ static int xgpu_ai_set_mailbox_ack_irq(struct amdgpu_device *adev,  	return 0;  } -static void xgpu_ai_mailbox_flr_work(struct work_struct *work) +static void xgpu_ai_ready_to_reset(struct amdgpu_device *adev)  { -	struct amdgpu_virt *virt = container_of(work, struct amdgpu_virt, flr_work); -	struct amdgpu_device *adev = container_of(virt, struct amdgpu_device, virt); -	int timeout = AI_MAILBOX_POLL_FLR_TIMEDOUT; - -	/* block amdgpu_gpu_recover till msg FLR COMPLETE received, -	 * otherwise the mailbox msg will be ruined/reseted by -	 * the VF FLR. -	 */ -	if (atomic_cmpxchg(&adev->reset_domain->in_gpu_reset, 0, 1) != 0) -		return; - -	down_write(&adev->reset_domain->sem); - -	amdgpu_virt_fini_data_exchange(adev); -  	xgpu_ai_mailbox_trans_msg(adev, IDH_READY_TO_RESET, 0, 0, 0); +} +static int xgpu_ai_wait_reset(struct amdgpu_device *adev) +{ +	int timeout = AI_MAILBOX_POLL_FLR_TIMEDOUT;  	do { -		if (xgpu_ai_mailbox_peek_msg(adev) == IDH_FLR_NOTIFICATION_CMPL) -			goto flr_done; - +		if (xgpu_ai_mailbox_peek_msg(adev) == IDH_FLR_NOTIFICATION_CMPL) { +			dev_dbg(adev->dev, "Got AI IDH_FLR_NOTIFICATION_CMPL after %d ms\n", AI_MAILBOX_POLL_FLR_TIMEDOUT - timeout); +			return 0; +		}  		msleep(10);  		timeout -= 10;  	} while (timeout > 1); -	dev_warn(adev->dev, "waiting IDH_FLR_NOTIFICATION_CMPL timeout\n"); +	dev_dbg(adev->dev, "waiting AI IDH_FLR_NOTIFICATION_CMPL timeout\n"); +	return -ETIME; +} + +static void xgpu_ai_mailbox_flr_work(struct work_struct *work) +{ +	struct amdgpu_virt *virt = container_of(work, struct amdgpu_virt, flr_work); +	struct amdgpu_device *adev = container_of(virt, struct amdgpu_device, virt); -flr_done: -	atomic_set(&adev->reset_domain->in_gpu_reset, 0); -	up_write(&adev->reset_domain->sem); +	amdgpu_virt_fini_data_exchange(adev);  	/* Trigger recovery for world switch failure if no TDR */  	if (amdgpu_device_should_recover_gpu(adev) @@ -292,6 +287,7 @@ flr_done:  		reset_context.method = AMD_RESET_METHOD_NONE;  		reset_context.reset_req_dev = adev;  		clear_bit(AMDGPU_NEED_FULL_RESET, &reset_context.flags); +		set_bit(AMDGPU_HOST_FLR, &reset_context.flags);  		amdgpu_device_gpu_recover(adev, NULL, &reset_context);  	} @@ -319,7 +315,7 @@ static int xgpu_ai_mailbox_rcv_irq(struct amdgpu_device *adev,  	switch (event) {  		case IDH_FLR_NOTIFICATION: -		if (amdgpu_sriov_runtime(adev) && !amdgpu_in_reset(adev)) +		if (amdgpu_sriov_runtime(adev))  			WARN_ONCE(!amdgpu_reset_domain_schedule(adev->reset_domain,  								&adev->virt.flr_work),  				  "Failed to queue work! at %s", @@ -412,12 +408,21 @@ static void xgpu_ai_ras_poison_handler(struct amdgpu_device *adev,  	xgpu_ai_send_access_requests(adev, IDH_RAS_POISON);  } +static bool xgpu_ai_rcvd_ras_intr(struct amdgpu_device *adev) +{ +	enum idh_event msg = xgpu_ai_mailbox_peek_msg(adev); + +	return (msg == IDH_RAS_ERROR_DETECTED || msg == 0xFFFFFFFF); +} +  const struct amdgpu_virt_ops xgpu_ai_virt_ops = {  	.req_full_gpu	= xgpu_ai_request_full_gpu_access,  	.rel_full_gpu	= xgpu_ai_release_full_gpu_access,  	.reset_gpu = xgpu_ai_request_reset, -	.wait_reset = NULL, +	.ready_to_reset = xgpu_ai_ready_to_reset, +	.wait_reset = xgpu_ai_wait_reset,  	.trans_msg = xgpu_ai_mailbox_trans_msg,  	.req_init_data  = xgpu_ai_request_init_data,  	.ras_poison_handler = xgpu_ai_ras_poison_handler, +	.rcvd_ras_intr = xgpu_ai_rcvd_ras_intr,  }; |