diff options
Diffstat (limited to 'drivers/gpu/drm/amd/amdgpu/mxgpu_nv.c')
| -rw-r--r-- | drivers/gpu/drm/amd/amdgpu/mxgpu_nv.c | 73 | 
1 files changed, 41 insertions, 32 deletions
| diff --git a/drivers/gpu/drm/amd/amdgpu/mxgpu_nv.c b/drivers/gpu/drm/amd/amdgpu/mxgpu_nv.c index aba00d961627..f47bd7ada4d7 100644 --- a/drivers/gpu/drm/amd/amdgpu/mxgpu_nv.c +++ b/drivers/gpu/drm/amd/amdgpu/mxgpu_nv.c @@ -91,7 +91,7 @@ static int xgpu_nv_poll_ack(struct amdgpu_device *adev)  		timeout -= 5;  	} while (timeout > 1); -	pr_err("Doesn't get TRN_MSG_ACK from pf in %d msec\n", NV_MAILBOX_POLL_ACK_TIMEDOUT); +	dev_err(adev->dev, "Doesn't get TRN_MSG_ACK from pf in %d msec \n", NV_MAILBOX_POLL_ACK_TIMEDOUT);  	return -ETIME;  } @@ -106,13 +106,16 @@ static int xgpu_nv_poll_msg(struct amdgpu_device *adev, enum idh_event event)  	do {  		r = xgpu_nv_mailbox_rcv_msg(adev, event); -		if (!r) +		if (!r) { +			dev_dbg(adev->dev, "rcv_msg 0x%x after %llu ms\n", event, NV_MAILBOX_POLL_MSG_TIMEDOUT - timeout + now);  			return 0; +		}  		msleep(10);  		now = (uint64_t)ktime_to_ms(ktime_get());  	} while (timeout > now); +	dev_dbg(adev->dev, "nv_poll_msg timed out\n");  	return -ETIME;  } @@ -133,11 +136,12 @@ static void xgpu_nv_mailbox_trans_msg (struct amdgpu_device *adev,  		xgpu_nv_mailbox_set_valid(adev, false);  		trn = xgpu_nv_peek_ack(adev);  		if (trn) { -			pr_err("trn=%x ACK should not assert! wait again !\n", trn); +			dev_err_ratelimited(adev->dev, "trn=%x ACK should not assert! wait again !\n", trn);  			msleep(1);  		}  	} while (trn); +	dev_dbg(adev->dev, "trans_msg req = 0x%x, data1 = 0x%x\n", req, data1);  	WREG32_NO_KIQ(mmMAILBOX_MSGBUF_TRN_DW0, req);  	WREG32_NO_KIQ(mmMAILBOX_MSGBUF_TRN_DW1, data1);  	WREG32_NO_KIQ(mmMAILBOX_MSGBUF_TRN_DW2, data2); @@ -147,7 +151,7 @@ static void xgpu_nv_mailbox_trans_msg (struct amdgpu_device *adev,  	/* start to poll ack */  	r = xgpu_nv_poll_ack(adev);  	if (r) -		pr_err("Doesn't get ack from pf, continue\n"); +		dev_err(adev->dev, "Doesn't get ack from pf, continue\n");  	xgpu_nv_mailbox_set_valid(adev, false);  } @@ -181,11 +185,11 @@ send_request:  	if (event != -1) {  		r = xgpu_nv_poll_msg(adev, event);  		if (r) { -			if (retry++ < 2) +			if (retry++ < 5)  				goto send_request;  			if (req != IDH_REQ_GPU_INIT_DATA) { -				pr_err("Doesn't get msg:%d from pf, error=%d\n", event, r); +				dev_err(adev->dev, "Doesn't get msg:%d from pf, error=%d\n", event, r);  				return r;  			} else /* host doesn't support REQ_GPU_INIT_DATA handshake */  				adev->virt.req_init_data_ver = 0; @@ -261,7 +265,7 @@ static int xgpu_nv_mailbox_ack_irq(struct amdgpu_device *adev,  					struct amdgpu_irq_src *source,  					struct amdgpu_iv_entry *entry)  { -	DRM_DEBUG("get ack intr and do nothing.\n"); +	dev_dbg(adev->dev, "get ack intr and do nothing.\n");  	return 0;  } @@ -282,38 +286,33 @@ static int xgpu_nv_set_mailbox_ack_irq(struct amdgpu_device *adev,  	return 0;  } -static void xgpu_nv_mailbox_flr_work(struct work_struct *work) +static void xgpu_nv_ready_to_reset(struct amdgpu_device *adev)  { -	struct amdgpu_virt *virt = container_of(work, struct amdgpu_virt, flr_work); -	struct amdgpu_device *adev = container_of(virt, struct amdgpu_device, virt); -	int timeout = NV_MAILBOX_POLL_FLR_TIMEDOUT; - -	/* block amdgpu_gpu_recover till msg FLR COMPLETE received, -	 * otherwise the mailbox msg will be ruined/reseted by -	 * the VF FLR. -	 */ -	if (atomic_cmpxchg(&adev->reset_domain->in_gpu_reset, 0, 1) != 0) -		return; - -	down_write(&adev->reset_domain->sem); - -	amdgpu_virt_fini_data_exchange(adev); -  	xgpu_nv_mailbox_trans_msg(adev, IDH_READY_TO_RESET, 0, 0, 0); +} +static int xgpu_nv_wait_reset(struct amdgpu_device *adev) +{ +	int timeout = NV_MAILBOX_POLL_FLR_TIMEDOUT;  	do { -		if (xgpu_nv_mailbox_peek_msg(adev) == IDH_FLR_NOTIFICATION_CMPL) -			goto flr_done; - +		if (xgpu_nv_mailbox_peek_msg(adev) == IDH_FLR_NOTIFICATION_CMPL) { +			dev_dbg(adev->dev, "Got NV IDH_FLR_NOTIFICATION_CMPL after %d ms\n", NV_MAILBOX_POLL_FLR_TIMEDOUT - timeout); +			return 0; +		}  		msleep(10);  		timeout -= 10;  	} while (timeout > 1); -	dev_warn(adev->dev, "waiting IDH_FLR_NOTIFICATION_CMPL timeout\n"); +	dev_dbg(adev->dev, "waiting NV IDH_FLR_NOTIFICATION_CMPL timeout\n"); +	return -ETIME; +} + +static void xgpu_nv_mailbox_flr_work(struct work_struct *work) +{ +	struct amdgpu_virt *virt = container_of(work, struct amdgpu_virt, flr_work); +	struct amdgpu_device *adev = container_of(virt, struct amdgpu_device, virt); -flr_done: -	atomic_set(&adev->reset_domain->in_gpu_reset, 0); -	up_write(&adev->reset_domain->sem); +	amdgpu_virt_fini_data_exchange(adev);  	/* Trigger recovery for world switch failure if no TDR */  	if (amdgpu_device_should_recover_gpu(adev) @@ -328,6 +327,7 @@ flr_done:  		reset_context.method = AMD_RESET_METHOD_NONE;  		reset_context.reset_req_dev = adev;  		clear_bit(AMDGPU_NEED_FULL_RESET, &reset_context.flags); +		set_bit(AMDGPU_HOST_FLR, &reset_context.flags);  		amdgpu_device_gpu_recover(adev, NULL, &reset_context);  	} @@ -358,7 +358,7 @@ static int xgpu_nv_mailbox_rcv_irq(struct amdgpu_device *adev,  	switch (event) {  	case IDH_FLR_NOTIFICATION: -		if (amdgpu_sriov_runtime(adev) && !amdgpu_in_reset(adev)) +		if (amdgpu_sriov_runtime(adev))  			WARN_ONCE(!amdgpu_reset_domain_schedule(adev->reset_domain,  				   &adev->virt.flr_work),  				  "Failed to queue work! at %s", @@ -449,12 +449,21 @@ static void xgpu_nv_ras_poison_handler(struct amdgpu_device *adev,  	}  } +static bool xgpu_nv_rcvd_ras_intr(struct amdgpu_device *adev) +{ +	enum idh_event msg = xgpu_nv_mailbox_peek_msg(adev); + +	return (msg == IDH_RAS_ERROR_DETECTED || msg == 0xFFFFFFFF); +} +  const struct amdgpu_virt_ops xgpu_nv_virt_ops = {  	.req_full_gpu	= xgpu_nv_request_full_gpu_access,  	.rel_full_gpu	= xgpu_nv_release_full_gpu_access,  	.req_init_data  = xgpu_nv_request_init_data,  	.reset_gpu = xgpu_nv_request_reset, -	.wait_reset = NULL, +	.ready_to_reset = xgpu_nv_ready_to_reset, +	.wait_reset = xgpu_nv_wait_reset,  	.trans_msg = xgpu_nv_mailbox_trans_msg,  	.ras_poison_handler = xgpu_nv_ras_poison_handler, +	.rcvd_ras_intr = xgpu_nv_rcvd_ras_intr,  }; |