diff options
Diffstat (limited to 'drivers/gpu')
37 files changed, 436 insertions, 274 deletions
| diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index 2992a49ad4a5..8ac1581a6b53 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -945,6 +945,7 @@ struct amdgpu_device {  	/* s3/s4 mask */  	bool                            in_suspend; +	bool				in_hibernate;  	/* record last mm index being written through WREG32*/  	unsigned long last_mm_index; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c index 9dff792c9290..6a5b91d23fd9 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c @@ -1343,7 +1343,7 @@ int amdgpu_amdkfd_gpuvm_free_memory_of_gpu(  	}  	/* Free the BO*/ -	amdgpu_bo_unref(&mem->bo); +	drm_gem_object_put_unlocked(&mem->bo->tbo.base);  	mutex_destroy(&mem->lock);  	kfree(mem); @@ -1688,7 +1688,8 @@ int amdgpu_amdkfd_gpuvm_import_dmabuf(struct kgd_dev *kgd,  		| KFD_IOC_ALLOC_MEM_FLAGS_WRITABLE  		| KFD_IOC_ALLOC_MEM_FLAGS_EXECUTABLE; -	(*mem)->bo = amdgpu_bo_ref(bo); +	drm_gem_object_get(&bo->tbo.base); +	(*mem)->bo = bo;  	(*mem)->va = va;  	(*mem)->domain = (bo->preferred_domains & AMDGPU_GEM_DOMAIN_VRAM) ?  		AMDGPU_GEM_DOMAIN_VRAM : AMDGPU_GEM_DOMAIN_GTT; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c index 466bfe541e45..a735d79a717b 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c @@ -1181,7 +1181,9 @@ static int amdgpu_pmops_freeze(struct device *dev)  	struct amdgpu_device *adev = drm_dev->dev_private;  	int r; +	adev->in_hibernate = true;  	r = amdgpu_device_suspend(drm_dev, true); +	adev->in_hibernate = false;  	if (r)  		return r;  	return amdgpu_asic_reset(adev); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c index 9ae7b61f696a..25ddb482466a 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c @@ -133,8 +133,7 @@ static int amdgpufb_create_pinned_object(struct amdgpu_fbdev *rfbdev,  	u32 cpp;  	u64 flags = AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED |  			       AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS     | -			       AMDGPU_GEM_CREATE_VRAM_CLEARED 	     | -			       AMDGPU_GEM_CREATE_CPU_GTT_USWC; +			       AMDGPU_GEM_CREATE_VRAM_CLEARED;  	info = drm_get_format_info(adev->ddev, mode_cmd);  	cpp = info->cpp[0]; diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c index f92c158d89a1..0e0daf0021b6 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c @@ -4273,7 +4273,7 @@ static int gfx_v10_0_update_gfx_clock_gating(struct amdgpu_device *adev,  		/* ===  CGCG /CGLS for GFX 3D Only === */  		gfx_v10_0_update_3d_clock_gating(adev, enable);  		/* ===  MGCG + MGLS === */ -		/* gfx_v10_0_update_medium_grain_clock_gating(adev, enable); */ +		gfx_v10_0_update_medium_grain_clock_gating(adev, enable);  	}  	if (adev->cg_flags & @@ -4353,11 +4353,7 @@ static int gfx_v10_0_set_powergating_state(void *handle,  	switch (adev->asic_type) {  	case CHIP_NAVI10:  	case CHIP_NAVI14: -		if (!enable) { -			amdgpu_gfx_off_ctrl(adev, false); -			cancel_delayed_work_sync(&adev->gfx.gfx_off_delay_work); -		} else -			amdgpu_gfx_off_ctrl(adev, true); +		amdgpu_gfx_off_ctrl(adev, enable);  		break;  	default:  		break; @@ -4918,6 +4914,19 @@ static void gfx_v10_0_ring_emit_reg_write_reg_wait(struct amdgpu_ring *ring,  							   ref, mask);  } +static void gfx_v10_0_ring_soft_recovery(struct amdgpu_ring *ring, +					 unsigned vmid) +{ +	struct amdgpu_device *adev = ring->adev; +	uint32_t value = 0; + +	value = REG_SET_FIELD(value, SQ_CMD, CMD, 0x03); +	value = REG_SET_FIELD(value, SQ_CMD, MODE, 0x01); +	value = REG_SET_FIELD(value, SQ_CMD, CHECK_VMID, 1); +	value = REG_SET_FIELD(value, SQ_CMD, VM_ID, vmid); +	WREG32_SOC15(GC, 0, mmSQ_CMD, value); +} +  static void  gfx_v10_0_set_gfx_eop_interrupt_state(struct amdgpu_device *adev,  				      uint32_t me, uint32_t pipe, @@ -5309,6 +5318,7 @@ static const struct amdgpu_ring_funcs gfx_v10_0_ring_funcs_gfx = {  	.emit_wreg = gfx_v10_0_ring_emit_wreg,  	.emit_reg_wait = gfx_v10_0_ring_emit_reg_wait,  	.emit_reg_write_reg_wait = gfx_v10_0_ring_emit_reg_write_reg_wait, +	.soft_recovery = gfx_v10_0_ring_soft_recovery,  };  static const struct amdgpu_ring_funcs gfx_v10_0_ring_funcs_compute = { diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c index 0c390485bc10..d2d9dce68c2f 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c @@ -1236,6 +1236,8 @@ static const struct amdgpu_gfxoff_quirk amdgpu_gfxoff_quirk_list[] = {  	{ 0x1002, 0x15dd, 0x1002, 0x15dd, 0xc8 },  	/* https://bugzilla.kernel.org/show_bug.cgi?id=207171 */  	{ 0x1002, 0x15dd, 0x103c, 0x83e7, 0xd3 }, +	/* GFXOFF is unstable on C6 parts with a VBIOS 113-RAVEN-114 */ +	{ 0x1002, 0x15dd, 0x1002, 0x15dd, 0xc6 },  	{ 0, 0, 0, 0, 0 },  }; @@ -5025,10 +5027,9 @@ static int gfx_v9_0_set_powergating_state(void *handle,  	switch (adev->asic_type) {  	case CHIP_RAVEN:  	case CHIP_RENOIR: -		if (!enable) { +		if (!enable)  			amdgpu_gfx_off_ctrl(adev, false); -			cancel_delayed_work_sync(&adev->gfx.gfx_off_delay_work); -		} +  		if (adev->pg_flags & AMD_PG_SUPPORT_RLC_SMU_HS) {  			gfx_v9_0_enable_sck_slow_down_on_power_up(adev, true);  			gfx_v9_0_enable_sck_slow_down_on_power_down(adev, true); @@ -5052,12 +5053,7 @@ static int gfx_v9_0_set_powergating_state(void *handle,  			amdgpu_gfx_off_ctrl(adev, true);  		break;  	case CHIP_VEGA12: -		if (!enable) { -			amdgpu_gfx_off_ctrl(adev, false); -			cancel_delayed_work_sync(&adev->gfx.gfx_off_delay_work); -		} else { -			amdgpu_gfx_off_ctrl(adev, true); -		} +		amdgpu_gfx_off_ctrl(adev, enable);  		break;  	default:  		break; diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index 9c83c1303f08..28e651b173ab 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -441,7 +441,7 @@ static void dm_vupdate_high_irq(void *interrupt_params)  /**   * dm_crtc_high_irq() - Handles CRTC interrupt - * @interrupt_params: ignored + * @interrupt_params: used for determining the CRTC instance   *   * Handles the CRTC/VSYNC interrupt by notfying DRM's VBLANK   * event handler. @@ -455,70 +455,6 @@ static void dm_crtc_high_irq(void *interrupt_params)  	unsigned long flags;  	acrtc = get_crtc_by_otg_inst(adev, irq_params->irq_src - IRQ_TYPE_VBLANK); - -	if (acrtc) { -		acrtc_state = to_dm_crtc_state(acrtc->base.state); - -		DRM_DEBUG_VBL("crtc:%d, vupdate-vrr:%d\n", -			      acrtc->crtc_id, -			      amdgpu_dm_vrr_active(acrtc_state)); - -		/* Core vblank handling at start of front-porch is only possible -		 * in non-vrr mode, as only there vblank timestamping will give -		 * valid results while done in front-porch. Otherwise defer it -		 * to dm_vupdate_high_irq after end of front-porch. -		 */ -		if (!amdgpu_dm_vrr_active(acrtc_state)) -			drm_crtc_handle_vblank(&acrtc->base); - -		/* Following stuff must happen at start of vblank, for crc -		 * computation and below-the-range btr support in vrr mode. -		 */ -		amdgpu_dm_crtc_handle_crc_irq(&acrtc->base); - -		if (acrtc_state->stream && adev->family >= AMDGPU_FAMILY_AI && -		    acrtc_state->vrr_params.supported && -		    acrtc_state->freesync_config.state == VRR_STATE_ACTIVE_VARIABLE) { -			spin_lock_irqsave(&adev->ddev->event_lock, flags); -			mod_freesync_handle_v_update( -				adev->dm.freesync_module, -				acrtc_state->stream, -				&acrtc_state->vrr_params); - -			dc_stream_adjust_vmin_vmax( -				adev->dm.dc, -				acrtc_state->stream, -				&acrtc_state->vrr_params.adjust); -			spin_unlock_irqrestore(&adev->ddev->event_lock, flags); -		} -	} -} - -#if defined(CONFIG_DRM_AMD_DC_DCN) -/** - * dm_dcn_crtc_high_irq() - Handles VStartup interrupt for DCN generation ASICs - * @interrupt params - interrupt parameters - * - * Notify DRM's vblank event handler at VSTARTUP - * - * Unlike DCE hardware, we trigger the handler at VSTARTUP. at which: - * * We are close enough to VUPDATE - the point of no return for hw - * * We are in the fixed portion of variable front porch when vrr is enabled - * * We are before VUPDATE, where double-buffered vrr registers are swapped - * - * It is therefore the correct place to signal vblank, send user flip events, - * and update VRR. - */ -static void dm_dcn_crtc_high_irq(void *interrupt_params) -{ -	struct common_irq_params *irq_params = interrupt_params; -	struct amdgpu_device *adev = irq_params->adev; -	struct amdgpu_crtc *acrtc; -	struct dm_crtc_state *acrtc_state; -	unsigned long flags; - -	acrtc = get_crtc_by_otg_inst(adev, irq_params->irq_src - IRQ_TYPE_VBLANK); -  	if (!acrtc)  		return; @@ -528,22 +464,35 @@ static void dm_dcn_crtc_high_irq(void *interrupt_params)  			 amdgpu_dm_vrr_active(acrtc_state),  			 acrtc_state->active_planes); +	/** +	 * Core vblank handling at start of front-porch is only possible +	 * in non-vrr mode, as only there vblank timestamping will give +	 * valid results while done in front-porch. Otherwise defer it +	 * to dm_vupdate_high_irq after end of front-porch. +	 */ +	if (!amdgpu_dm_vrr_active(acrtc_state)) +		drm_crtc_handle_vblank(&acrtc->base); + +	/** +	 * Following stuff must happen at start of vblank, for crc +	 * computation and below-the-range btr support in vrr mode. +	 */  	amdgpu_dm_crtc_handle_crc_irq(&acrtc->base); -	drm_crtc_handle_vblank(&acrtc->base); + +	/* BTR updates need to happen before VUPDATE on Vega and above. */ +	if (adev->family < AMDGPU_FAMILY_AI) +		return;  	spin_lock_irqsave(&adev->ddev->event_lock, flags); -	if (acrtc_state->vrr_params.supported && +	if (acrtc_state->stream && acrtc_state->vrr_params.supported &&  	    acrtc_state->freesync_config.state == VRR_STATE_ACTIVE_VARIABLE) { -		mod_freesync_handle_v_update( -		adev->dm.freesync_module, -		acrtc_state->stream, -		&acrtc_state->vrr_params); +		mod_freesync_handle_v_update(adev->dm.freesync_module, +					     acrtc_state->stream, +					     &acrtc_state->vrr_params); -		dc_stream_adjust_vmin_vmax( -			adev->dm.dc, -			acrtc_state->stream, -			&acrtc_state->vrr_params.adjust); +		dc_stream_adjust_vmin_vmax(adev->dm.dc, acrtc_state->stream, +					   &acrtc_state->vrr_params.adjust);  	}  	/* @@ -556,7 +505,8 @@ static void dm_dcn_crtc_high_irq(void *interrupt_params)  	 * avoid race conditions between flip programming and completion,  	 * which could cause too early flip completion events.  	 */ -	if (acrtc->pflip_status == AMDGPU_FLIP_SUBMITTED && +	if (adev->family >= AMDGPU_FAMILY_RV && +	    acrtc->pflip_status == AMDGPU_FLIP_SUBMITTED &&  	    acrtc_state->active_planes == 0) {  		if (acrtc->event) {  			drm_crtc_send_vblank_event(&acrtc->base, acrtc->event); @@ -568,7 +518,6 @@ static void dm_dcn_crtc_high_irq(void *interrupt_params)  	spin_unlock_irqrestore(&adev->ddev->event_lock, flags);  } -#endif  static int dm_set_clockgating_state(void *handle,  		  enum amd_clockgating_state state) @@ -2445,8 +2394,36 @@ static int dcn10_register_irq_handlers(struct amdgpu_device *adev)  		c_irq_params->adev = adev;  		c_irq_params->irq_src = int_params.irq_source; +		amdgpu_dm_irq_register_interrupt( +			adev, &int_params, dm_crtc_high_irq, c_irq_params); +	} + +	/* Use VUPDATE_NO_LOCK interrupt on DCN, which seems to correspond to +	 * the regular VUPDATE interrupt on DCE. We want DC_IRQ_SOURCE_VUPDATEx +	 * to trigger at end of each vblank, regardless of state of the lock, +	 * matching DCE behaviour. +	 */ +	for (i = DCN_1_0__SRCID__OTG0_IHC_V_UPDATE_NO_LOCK_INTERRUPT; +	     i <= DCN_1_0__SRCID__OTG0_IHC_V_UPDATE_NO_LOCK_INTERRUPT + adev->mode_info.num_crtc - 1; +	     i++) { +		r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_DCE, i, &adev->vupdate_irq); + +		if (r) { +			DRM_ERROR("Failed to add vupdate irq id!\n"); +			return r; +		} + +		int_params.int_context = INTERRUPT_HIGH_IRQ_CONTEXT; +		int_params.irq_source = +			dc_interrupt_to_irq_source(dc, i, 0); + +		c_irq_params = &adev->dm.vupdate_params[int_params.irq_source - DC_IRQ_SOURCE_VUPDATE1]; + +		c_irq_params->adev = adev; +		c_irq_params->irq_src = int_params.irq_source; +  		amdgpu_dm_irq_register_interrupt(adev, &int_params, -				dm_dcn_crtc_high_irq, c_irq_params); +				dm_vupdate_high_irq, c_irq_params);  	}  	/* Use GRPH_PFLIP interrupt */ @@ -4453,10 +4430,6 @@ static inline int dm_set_vupdate_irq(struct drm_crtc *crtc, bool enable)  	struct amdgpu_device *adev = crtc->dev->dev_private;  	int rc; -	/* Do not set vupdate for DCN hardware */ -	if (adev->family > AMDGPU_FAMILY_AI) -		return 0; -  	irq_source = IRQ_TYPE_VUPDATE + acrtc->otg_inst;  	rc = dc_interrupt_set(adev->dm.dc, irq_source, enable) ? 0 : -EBUSY; @@ -7882,6 +7855,7 @@ static int dm_update_plane_state(struct dc *dc,  	struct drm_crtc_state *old_crtc_state, *new_crtc_state;  	struct dm_crtc_state *dm_new_crtc_state, *dm_old_crtc_state;  	struct dm_plane_state *dm_new_plane_state, *dm_old_plane_state; +	struct amdgpu_crtc *new_acrtc;  	bool needs_reset;  	int ret = 0; @@ -7891,9 +7865,30 @@ static int dm_update_plane_state(struct dc *dc,  	dm_new_plane_state = to_dm_plane_state(new_plane_state);  	dm_old_plane_state = to_dm_plane_state(old_plane_state); -	/*TODO Implement atomic check for cursor plane */ -	if (plane->type == DRM_PLANE_TYPE_CURSOR) +	/*TODO Implement better atomic check for cursor plane */ +	if (plane->type == DRM_PLANE_TYPE_CURSOR) { +		if (!enable || !new_plane_crtc || +			drm_atomic_plane_disabling(plane->state, new_plane_state)) +			return 0; + +		new_acrtc = to_amdgpu_crtc(new_plane_crtc); + +		if ((new_plane_state->crtc_w > new_acrtc->max_cursor_width) || +			(new_plane_state->crtc_h > new_acrtc->max_cursor_height)) { +			DRM_DEBUG_ATOMIC("Bad cursor size %d x %d\n", +							 new_plane_state->crtc_w, new_plane_state->crtc_h); +			return -EINVAL; +		} + +		if (new_plane_state->crtc_x <= -new_acrtc->max_cursor_width || +			new_plane_state->crtc_y <= -new_acrtc->max_cursor_height) { +			DRM_DEBUG_ATOMIC("Bad cursor position %d, %d\n", +							 new_plane_state->crtc_x, new_plane_state->crtc_y); +			return -EINVAL; +		} +  		return 0; +	}  	needs_reset = should_reset_plane(state, plane, old_plane_state,  					 new_plane_state); diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_hdcp.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_hdcp.c index 78e1c11d4ae5..dcf84a61de37 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_hdcp.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_hdcp.c @@ -398,15 +398,15 @@ static void update_config(void *handle, struct cp_psp_stream_config *config)  	struct mod_hdcp_display *display = &hdcp_work[link_index].display;  	struct mod_hdcp_link *link = &hdcp_work[link_index].link; -	memset(display, 0, sizeof(*display)); -	memset(link, 0, sizeof(*link)); - -	display->index = aconnector->base.index; -  	if (config->dpms_off) {  		hdcp_remove_display(hdcp_work, link_index, aconnector);  		return;  	} + +	memset(display, 0, sizeof(*display)); +	memset(link, 0, sizeof(*link)); + +	display->index = aconnector->base.index;  	display->state = MOD_HDCP_DISPLAY_ACTIVE;  	if (aconnector->dc_sink != NULL) diff --git a/drivers/gpu/drm/amd/powerplay/amd_powerplay.c b/drivers/gpu/drm/amd/powerplay/amd_powerplay.c index e4e5a53b2b4e..8e2acb4df860 100644 --- a/drivers/gpu/drm/amd/powerplay/amd_powerplay.c +++ b/drivers/gpu/drm/amd/powerplay/amd_powerplay.c @@ -319,12 +319,12 @@ static void pp_dpm_en_umd_pstate(struct pp_hwmgr  *hwmgr,  		if (*level & profile_mode_mask) {  			hwmgr->saved_dpm_level = hwmgr->dpm_level;  			hwmgr->en_umd_pstate = true; -			amdgpu_device_ip_set_clockgating_state(hwmgr->adev, -						AMD_IP_BLOCK_TYPE_GFX, -						AMD_CG_STATE_UNGATE);  			amdgpu_device_ip_set_powergating_state(hwmgr->adev,  					AMD_IP_BLOCK_TYPE_GFX,  					AMD_PG_STATE_UNGATE); +			amdgpu_device_ip_set_clockgating_state(hwmgr->adev, +						AMD_IP_BLOCK_TYPE_GFX, +						AMD_CG_STATE_UNGATE);  		}  	} else {  		/* exit umd pstate, restore level, enable gfx cg*/ diff --git a/drivers/gpu/drm/amd/powerplay/amdgpu_smu.c b/drivers/gpu/drm/amd/powerplay/amdgpu_smu.c index e8b27fab6aa1..e77046931e4c 100644 --- a/drivers/gpu/drm/amd/powerplay/amdgpu_smu.c +++ b/drivers/gpu/drm/amd/powerplay/amdgpu_smu.c @@ -1476,7 +1476,7 @@ static int smu_disable_dpm(struct smu_context *smu)  	bool use_baco = !smu->is_apu &&  		((adev->in_gpu_reset &&  		  (amdgpu_asic_reset_method(adev) == AMD_RESET_METHOD_BACO)) || -		 (adev->in_runpm && amdgpu_asic_supports_baco(adev))); +		 ((adev->in_runpm || adev->in_hibernate) && amdgpu_asic_supports_baco(adev)));  	ret = smu_get_smc_version(smu, NULL, &smu_version);  	if (ret) { @@ -1744,12 +1744,12 @@ static int smu_enable_umd_pstate(void *handle,  		if (*level & profile_mode_mask) {  			smu_dpm_ctx->saved_dpm_level = smu_dpm_ctx->dpm_level;  			smu_dpm_ctx->enable_umd_pstate = true; -			amdgpu_device_ip_set_clockgating_state(smu->adev, -							       AMD_IP_BLOCK_TYPE_GFX, -							       AMD_CG_STATE_UNGATE);  			amdgpu_device_ip_set_powergating_state(smu->adev,  							       AMD_IP_BLOCK_TYPE_GFX,  							       AMD_PG_STATE_UNGATE); +			amdgpu_device_ip_set_clockgating_state(smu->adev, +							       AMD_IP_BLOCK_TYPE_GFX, +							       AMD_CG_STATE_UNGATE);  		}  	} else {  		/* exit umd pstate, restore level, enable gfx cg*/ diff --git a/drivers/gpu/drm/i915/display/intel_fbc.c b/drivers/gpu/drm/i915/display/intel_fbc.c index 2e5d835a9eaa..c125ca9ab9b3 100644 --- a/drivers/gpu/drm/i915/display/intel_fbc.c +++ b/drivers/gpu/drm/i915/display/intel_fbc.c @@ -485,8 +485,7 @@ static int intel_fbc_alloc_cfb(struct drm_i915_private *dev_priv,  	if (!ret)  		goto err_llb;  	else if (ret > 1) { -		DRM_INFO("Reducing the compressed framebuffer size. This may lead to less power savings than a non-reduced-size. Try to increase stolen memory size if available in BIOS.\n"); - +		DRM_INFO_ONCE("Reducing the compressed framebuffer size. This may lead to less power savings than a non-reduced-size. Try to increase stolen memory size if available in BIOS.\n");  	}  	fbc->threshold = ret; diff --git a/drivers/gpu/drm/i915/gem/i915_gem_domain.c b/drivers/gpu/drm/i915/gem/i915_gem_domain.c index 0cc40e77bbd2..4f96c8788a2e 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_domain.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_domain.c @@ -368,7 +368,6 @@ static void i915_gem_object_bump_inactive_ggtt(struct drm_i915_gem_object *obj)  	struct drm_i915_private *i915 = to_i915(obj->base.dev);  	struct i915_vma *vma; -	GEM_BUG_ON(!i915_gem_object_has_pinned_pages(obj));  	if (!atomic_read(&obj->bind_count))  		return; @@ -400,12 +399,8 @@ static void i915_gem_object_bump_inactive_ggtt(struct drm_i915_gem_object *obj)  void  i915_gem_object_unpin_from_display_plane(struct i915_vma *vma)  { -	struct drm_i915_gem_object *obj = vma->obj; - -	assert_object_held(obj); -  	/* Bump the LRU to try and avoid premature eviction whilst flipping  */ -	i915_gem_object_bump_inactive_ggtt(obj); +	i915_gem_object_bump_inactive_ggtt(vma->obj);  	i915_vma_unpin(vma);  } diff --git a/drivers/gpu/drm/i915/gt/intel_context_types.h b/drivers/gpu/drm/i915/gt/intel_context_types.h index 07cb83a0d017..ca0d4f4f3615 100644 --- a/drivers/gpu/drm/i915/gt/intel_context_types.h +++ b/drivers/gpu/drm/i915/gt/intel_context_types.h @@ -69,7 +69,13 @@ struct intel_context {  #define CONTEXT_NOPREEMPT		7  	u32 *lrc_reg_state; -	u64 lrc_desc; +	union { +		struct { +			u32 lrca; +			u32 ccid; +		}; +		u64 desc; +	} lrc;  	u32 tag; /* cookie passed to HW to track this context on submission */  	/* Time on GPU as tracked by the hw. */ diff --git a/drivers/gpu/drm/i915/gt/intel_engine.h b/drivers/gpu/drm/i915/gt/intel_engine.h index b469de0dd9b6..a1aa0d3e8be1 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine.h +++ b/drivers/gpu/drm/i915/gt/intel_engine.h @@ -333,13 +333,4 @@ intel_engine_has_preempt_reset(const struct intel_engine_cs *engine)  	return intel_engine_has_preemption(engine);  } -static inline bool -intel_engine_has_timeslices(const struct intel_engine_cs *engine) -{ -	if (!IS_ACTIVE(CONFIG_DRM_I915_TIMESLICE_DURATION)) -		return false; - -	return intel_engine_has_semaphores(engine); -} -  #endif /* _INTEL_RINGBUFFER_H_ */ diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c b/drivers/gpu/drm/i915/gt/intel_engine_cs.c index 3aa8a652c16d..883a9b7fe88d 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c +++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c @@ -1295,6 +1295,12 @@ static void intel_engine_print_registers(struct intel_engine_cs *engine,  	if (engine->id == RENDER_CLASS && IS_GEN_RANGE(dev_priv, 4, 7))  		drm_printf(m, "\tCCID: 0x%08x\n", ENGINE_READ(engine, CCID)); +	if (HAS_EXECLISTS(dev_priv)) { +		drm_printf(m, "\tEL_STAT_HI: 0x%08x\n", +			   ENGINE_READ(engine, RING_EXECLIST_STATUS_HI)); +		drm_printf(m, "\tEL_STAT_LO: 0x%08x\n", +			   ENGINE_READ(engine, RING_EXECLIST_STATUS_LO)); +	}  	drm_printf(m, "\tRING_START: 0x%08x\n",  		   ENGINE_READ(engine, RING_START));  	drm_printf(m, "\tRING_HEAD:  0x%08x\n", diff --git a/drivers/gpu/drm/i915/gt/intel_engine_types.h b/drivers/gpu/drm/i915/gt/intel_engine_types.h index 80cdde712842..0be674ae1cf6 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_types.h +++ b/drivers/gpu/drm/i915/gt/intel_engine_types.h @@ -157,6 +157,20 @@ struct intel_engine_execlists {  	struct i915_priolist default_priolist;  	/** +	 * @ccid: identifier for contexts submitted to this engine +	 */ +	u32 ccid; + +	/** +	 * @yield: CCID at the time of the last semaphore-wait interrupt. +	 * +	 * Instead of leaving a semaphore busy-spinning on an engine, we would +	 * like to switch to another ready context, i.e. yielding the semaphore +	 * timeslice. +	 */ +	u32 yield; + +	/**  	 * @error_interrupt: CS Master EIR  	 *  	 * The CS generates an interrupt when it detects an error. We capture @@ -295,8 +309,7 @@ struct intel_engine_cs {  	u32 context_size;  	u32 mmio_base; -	unsigned int context_tag; -#define NUM_CONTEXT_TAG roundup_pow_of_two(2 * EXECLIST_MAX_PORTS) +	unsigned long context_tag;  	struct rb_node uabi_node; @@ -483,10 +496,11 @@ struct intel_engine_cs {  #define I915_ENGINE_SUPPORTS_STATS   BIT(1)  #define I915_ENGINE_HAS_PREEMPTION   BIT(2)  #define I915_ENGINE_HAS_SEMAPHORES   BIT(3) -#define I915_ENGINE_NEEDS_BREADCRUMB_TASKLET BIT(4) -#define I915_ENGINE_IS_VIRTUAL       BIT(5) -#define I915_ENGINE_HAS_RELATIVE_MMIO BIT(6) -#define I915_ENGINE_REQUIRES_CMD_PARSER BIT(7) +#define I915_ENGINE_HAS_TIMESLICES   BIT(4) +#define I915_ENGINE_NEEDS_BREADCRUMB_TASKLET BIT(5) +#define I915_ENGINE_IS_VIRTUAL       BIT(6) +#define I915_ENGINE_HAS_RELATIVE_MMIO BIT(7) +#define I915_ENGINE_REQUIRES_CMD_PARSER BIT(8)  	unsigned int flags;  	/* @@ -585,6 +599,15 @@ intel_engine_has_semaphores(const struct intel_engine_cs *engine)  }  static inline bool +intel_engine_has_timeslices(const struct intel_engine_cs *engine) +{ +	if (!IS_ACTIVE(CONFIG_DRM_I915_TIMESLICE_DURATION)) +		return false; + +	return engine->flags & I915_ENGINE_HAS_TIMESLICES; +} + +static inline bool  intel_engine_needs_breadcrumb_tasklet(const struct intel_engine_cs *engine)  {  	return engine->flags & I915_ENGINE_NEEDS_BREADCRUMB_TASKLET; diff --git a/drivers/gpu/drm/i915/gt/intel_gt_irq.c b/drivers/gpu/drm/i915/gt/intel_gt_irq.c index f0e7fd95165a..0cc7dd54f4f9 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_irq.c +++ b/drivers/gpu/drm/i915/gt/intel_gt_irq.c @@ -39,6 +39,15 @@ cs_irq_handler(struct intel_engine_cs *engine, u32 iir)  		}  	} +	if (iir & GT_WAIT_SEMAPHORE_INTERRUPT) { +		WRITE_ONCE(engine->execlists.yield, +			   ENGINE_READ_FW(engine, RING_EXECLIST_STATUS_HI)); +		ENGINE_TRACE(engine, "semaphore yield: %08x\n", +			     engine->execlists.yield); +		if (del_timer(&engine->execlists.timer)) +			tasklet = true; +	} +  	if (iir & GT_CONTEXT_SWITCH_INTERRUPT)  		tasklet = true; @@ -228,7 +237,8 @@ void gen11_gt_irq_postinstall(struct intel_gt *gt)  	const u32 irqs =  		GT_CS_MASTER_ERROR_INTERRUPT |  		GT_RENDER_USER_INTERRUPT | -		GT_CONTEXT_SWITCH_INTERRUPT; +		GT_CONTEXT_SWITCH_INTERRUPT | +		GT_WAIT_SEMAPHORE_INTERRUPT;  	struct intel_uncore *uncore = gt->uncore;  	const u32 dmask = irqs << 16 | irqs;  	const u32 smask = irqs << 16; @@ -366,7 +376,8 @@ void gen8_gt_irq_postinstall(struct intel_gt *gt)  	const u32 irqs =  		GT_CS_MASTER_ERROR_INTERRUPT |  		GT_RENDER_USER_INTERRUPT | -		GT_CONTEXT_SWITCH_INTERRUPT; +		GT_CONTEXT_SWITCH_INTERRUPT | +		GT_WAIT_SEMAPHORE_INTERRUPT;  	const u32 gt_interrupts[] = {  		irqs << GEN8_RCS_IRQ_SHIFT | irqs << GEN8_BCS_IRQ_SHIFT,  		irqs << GEN8_VCS0_IRQ_SHIFT | irqs << GEN8_VCS1_IRQ_SHIFT, diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c index 683014e7bc51..2dfaddb8811e 100644 --- a/drivers/gpu/drm/i915/gt/intel_lrc.c +++ b/drivers/gpu/drm/i915/gt/intel_lrc.c @@ -456,10 +456,10 @@ assert_priority_queue(const struct i915_request *prev,   * engine info, SW context ID and SW counter need to form a unique number   * (Context ID) per lrc.   */ -static u64 +static u32  lrc_descriptor(struct intel_context *ce, struct intel_engine_cs *engine)  { -	u64 desc; +	u32 desc;  	desc = INTEL_LEGACY_32B_CONTEXT;  	if (i915_vm_is_4lvl(ce->vm)) @@ -470,21 +470,7 @@ lrc_descriptor(struct intel_context *ce, struct intel_engine_cs *engine)  	if (IS_GEN(engine->i915, 8))  		desc |= GEN8_CTX_L3LLC_COHERENT; -	desc |= i915_ggtt_offset(ce->state); /* bits 12-31 */ -	/* -	 * The following 32bits are copied into the OA reports (dword 2). -	 * Consider updating oa_get_render_ctx_id in i915_perf.c when changing -	 * anything below. -	 */ -	if (INTEL_GEN(engine->i915) >= 11) { -		desc |= (u64)engine->instance << GEN11_ENGINE_INSTANCE_SHIFT; -								/* bits 48-53 */ - -		desc |= (u64)engine->class << GEN11_ENGINE_CLASS_SHIFT; -								/* bits 61-63 */ -	} - -	return desc; +	return i915_ggtt_offset(ce->state) | desc;  }  static inline unsigned int dword_in_page(void *addr) @@ -1192,7 +1178,7 @@ static void reset_active(struct i915_request *rq,  	__execlists_update_reg_state(ce, engine, head);  	/* We've switched away, so this should be a no-op, but intent matters */ -	ce->lrc_desc |= CTX_DESC_FORCE_RESTORE; +	ce->lrc.desc |= CTX_DESC_FORCE_RESTORE;  }  static u32 intel_context_get_runtime(const struct intel_context *ce) @@ -1251,18 +1237,23 @@ __execlists_schedule_in(struct i915_request *rq)  	if (IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM))  		execlists_check_context(ce, engine); -	ce->lrc_desc &= ~GENMASK_ULL(47, 37);  	if (ce->tag) {  		/* Use a fixed tag for OA and friends */ -		ce->lrc_desc |= (u64)ce->tag << 32; +		GEM_BUG_ON(ce->tag <= BITS_PER_LONG); +		ce->lrc.ccid = ce->tag;  	} else {  		/* We don't need a strict matching tag, just different values */ -		ce->lrc_desc |= -			(u64)(++engine->context_tag % NUM_CONTEXT_TAG) << -			GEN11_SW_CTX_ID_SHIFT; -		BUILD_BUG_ON(NUM_CONTEXT_TAG > GEN12_MAX_CONTEXT_HW_ID); +		unsigned int tag = ffs(engine->context_tag); + +		GEM_BUG_ON(tag == 0 || tag >= BITS_PER_LONG); +		clear_bit(tag - 1, &engine->context_tag); +		ce->lrc.ccid = tag << (GEN11_SW_CTX_ID_SHIFT - 32); + +		BUILD_BUG_ON(BITS_PER_LONG > GEN12_MAX_CONTEXT_HW_ID);  	} +	ce->lrc.ccid |= engine->execlists.ccid; +  	__intel_gt_pm_get(engine->gt);  	execlists_context_status_change(rq, INTEL_CONTEXT_SCHEDULE_IN);  	intel_engine_context_in(engine); @@ -1302,7 +1293,8 @@ static void kick_siblings(struct i915_request *rq, struct intel_context *ce)  static inline void  __execlists_schedule_out(struct i915_request *rq, -			 struct intel_engine_cs * const engine) +			 struct intel_engine_cs * const engine, +			 unsigned int ccid)  {  	struct intel_context * const ce = rq->context; @@ -1320,6 +1312,14 @@ __execlists_schedule_out(struct i915_request *rq,  	    i915_request_completed(rq))  		intel_engine_add_retire(engine, ce->timeline); +	ccid >>= GEN11_SW_CTX_ID_SHIFT - 32; +	ccid &= GEN12_MAX_CONTEXT_HW_ID; +	if (ccid < BITS_PER_LONG) { +		GEM_BUG_ON(ccid == 0); +		GEM_BUG_ON(test_bit(ccid - 1, &engine->context_tag)); +		set_bit(ccid - 1, &engine->context_tag); +	} +  	intel_context_update_runtime(ce);  	intel_engine_context_out(engine);  	execlists_context_status_change(rq, INTEL_CONTEXT_SCHEDULE_OUT); @@ -1345,15 +1345,17 @@ execlists_schedule_out(struct i915_request *rq)  {  	struct intel_context * const ce = rq->context;  	struct intel_engine_cs *cur, *old; +	u32 ccid;  	trace_i915_request_out(rq); +	ccid = rq->context->lrc.ccid;  	old = READ_ONCE(ce->inflight);  	do  		cur = ptr_unmask_bits(old, 2) ? ptr_dec(old) : NULL;  	while (!try_cmpxchg(&ce->inflight, &old, cur));  	if (!cur) -		__execlists_schedule_out(rq, old); +		__execlists_schedule_out(rq, old, ccid);  	i915_request_put(rq);  } @@ -1361,7 +1363,7 @@ execlists_schedule_out(struct i915_request *rq)  static u64 execlists_update_context(struct i915_request *rq)  {  	struct intel_context *ce = rq->context; -	u64 desc = ce->lrc_desc; +	u64 desc = ce->lrc.desc;  	u32 tail, prev;  	/* @@ -1400,7 +1402,7 @@ static u64 execlists_update_context(struct i915_request *rq)  	 */  	wmb(); -	ce->lrc_desc &= ~CTX_DESC_FORCE_RESTORE; +	ce->lrc.desc &= ~CTX_DESC_FORCE_RESTORE;  	return desc;  } @@ -1719,6 +1721,9 @@ static void defer_request(struct i915_request *rq, struct list_head * const pl)  			struct i915_request *w =  				container_of(p->waiter, typeof(*w), sched); +			if (p->flags & I915_DEPENDENCY_WEAK) +				continue; +  			/* Leave semaphores spinning on the other engines */  			if (w->engine != rq->engine)  				continue; @@ -1754,7 +1759,8 @@ static void defer_active(struct intel_engine_cs *engine)  }  static bool -need_timeslice(struct intel_engine_cs *engine, const struct i915_request *rq) +need_timeslice(const struct intel_engine_cs *engine, +	       const struct i915_request *rq)  {  	int hint; @@ -1768,6 +1774,32 @@ need_timeslice(struct intel_engine_cs *engine, const struct i915_request *rq)  	return hint >= effective_prio(rq);  } +static bool +timeslice_yield(const struct intel_engine_execlists *el, +		const struct i915_request *rq) +{ +	/* +	 * Once bitten, forever smitten! +	 * +	 * If the active context ever busy-waited on a semaphore, +	 * it will be treated as a hog until the end of its timeslice (i.e. +	 * until it is scheduled out and replaced by a new submission, +	 * possibly even its own lite-restore). The HW only sends an interrupt +	 * on the first miss, and we do know if that semaphore has been +	 * signaled, or even if it is now stuck on another semaphore. Play +	 * safe, yield if it might be stuck -- it will be given a fresh +	 * timeslice in the near future. +	 */ +	return rq->context->lrc.ccid == READ_ONCE(el->yield); +} + +static bool +timeslice_expired(const struct intel_engine_execlists *el, +		  const struct i915_request *rq) +{ +	return timer_expired(&el->timer) || timeslice_yield(el, rq); +} +  static int  switch_prio(struct intel_engine_cs *engine, const struct i915_request *rq)  { @@ -1783,8 +1815,7 @@ timeslice(const struct intel_engine_cs *engine)  	return READ_ONCE(engine->props.timeslice_duration_ms);  } -static unsigned long -active_timeslice(const struct intel_engine_cs *engine) +static unsigned long active_timeslice(const struct intel_engine_cs *engine)  {  	const struct intel_engine_execlists *execlists = &engine->execlists;  	const struct i915_request *rq = *execlists->active; @@ -1946,13 +1977,14 @@ static void execlists_dequeue(struct intel_engine_cs *engine)  			last = NULL;  		} else if (need_timeslice(engine, last) && -			   timer_expired(&engine->execlists.timer)) { +			   timeslice_expired(execlists, last)) {  			ENGINE_TRACE(engine, -				     "expired last=%llx:%lld, prio=%d, hint=%d\n", +				     "expired last=%llx:%lld, prio=%d, hint=%d, yield?=%s\n",  				     last->fence.context,  				     last->fence.seqno,  				     last->sched.attr.priority, -				     execlists->queue_priority_hint); +				     execlists->queue_priority_hint, +				     yesno(timeslice_yield(execlists, last)));  			ring_set_paused(engine, 1);  			defer_active(engine); @@ -2213,6 +2245,7 @@ done:  		}  		clear_ports(port + 1, last_port - port); +		WRITE_ONCE(execlists->yield, -1);  		execlists_submit_ports(engine);  		set_preempt_timeout(engine, *active);  	} else { @@ -3043,7 +3076,7 @@ __execlists_context_pin(struct intel_context *ce,  	if (IS_ERR(vaddr))  		return PTR_ERR(vaddr); -	ce->lrc_desc = lrc_descriptor(ce, engine) | CTX_DESC_FORCE_RESTORE; +	ce->lrc.lrca = lrc_descriptor(ce, engine) | CTX_DESC_FORCE_RESTORE;  	ce->lrc_reg_state = vaddr + LRC_STATE_PN * PAGE_SIZE;  	__execlists_update_reg_state(ce, engine, ce->ring->tail); @@ -3072,7 +3105,7 @@ static void execlists_context_reset(struct intel_context *ce)  				 ce, ce->engine, ce->ring, true);  	__execlists_update_reg_state(ce, ce->engine, ce->ring->tail); -	ce->lrc_desc |= CTX_DESC_FORCE_RESTORE; +	ce->lrc.desc |= CTX_DESC_FORCE_RESTORE;  }  static const struct intel_context_ops execlists_context_ops = { @@ -3541,7 +3574,7 @@ static void enable_execlists(struct intel_engine_cs *engine)  	enable_error_interrupt(engine); -	engine->context_tag = 0; +	engine->context_tag = GENMASK(BITS_PER_LONG - 2, 0);  }  static bool unexpected_starting_state(struct intel_engine_cs *engine) @@ -3753,7 +3786,7 @@ out_replay:  		     head, ce->ring->tail);  	__execlists_reset_reg_state(ce, engine);  	__execlists_update_reg_state(ce, engine, head); -	ce->lrc_desc |= CTX_DESC_FORCE_RESTORE; /* paranoid: GPU was reset! */ +	ce->lrc.desc |= CTX_DESC_FORCE_RESTORE; /* paranoid: GPU was reset! */  unwind:  	/* Push back any incomplete requests for replay after the reset. */ @@ -4369,8 +4402,11 @@ void intel_execlists_set_default_submission(struct intel_engine_cs *engine)  	engine->flags |= I915_ENGINE_SUPPORTS_STATS;  	if (!intel_vgpu_active(engine->i915)) {  		engine->flags |= I915_ENGINE_HAS_SEMAPHORES; -		if (HAS_LOGICAL_RING_PREEMPTION(engine->i915)) +		if (HAS_LOGICAL_RING_PREEMPTION(engine->i915)) {  			engine->flags |= I915_ENGINE_HAS_PREEMPTION; +			if (IS_ACTIVE(CONFIG_DRM_I915_TIMESLICE_DURATION)) +				engine->flags |= I915_ENGINE_HAS_TIMESLICES; +		}  	}  	if (INTEL_GEN(engine->i915) >= 12) @@ -4449,6 +4485,7 @@ logical_ring_default_irqs(struct intel_engine_cs *engine)  	engine->irq_enable_mask = GT_RENDER_USER_INTERRUPT << shift;  	engine->irq_keep_mask = GT_CONTEXT_SWITCH_INTERRUPT << shift;  	engine->irq_keep_mask |= GT_CS_MASTER_ERROR_INTERRUPT << shift; +	engine->irq_keep_mask |= GT_WAIT_SEMAPHORE_INTERRUPT << shift;  }  static void rcs_submission_override(struct intel_engine_cs *engine) @@ -4516,6 +4553,11 @@ int intel_execlists_submission_setup(struct intel_engine_cs *engine)  	else  		execlists->csb_size = GEN11_CSB_ENTRIES; +	if (INTEL_GEN(engine->i915) >= 11) { +		execlists->ccid |= engine->instance << (GEN11_ENGINE_INSTANCE_SHIFT - 32); +		execlists->ccid |= engine->class << (GEN11_ENGINE_CLASS_SHIFT - 32); +	} +  	reset_csb_pointers(engine);  	/* Finally, take ownership and responsibility for cleanup! */ diff --git a/drivers/gpu/drm/i915/gt/selftest_lrc.c b/drivers/gpu/drm/i915/gt/selftest_lrc.c index 6f06ba750a0a..f95ae15ce865 100644 --- a/drivers/gpu/drm/i915/gt/selftest_lrc.c +++ b/drivers/gpu/drm/i915/gt/selftest_lrc.c @@ -929,7 +929,7 @@ create_rewinder(struct intel_context *ce,  			goto err;  	} -	cs = intel_ring_begin(rq, 10); +	cs = intel_ring_begin(rq, 14);  	if (IS_ERR(cs)) {  		err = PTR_ERR(cs);  		goto err; @@ -941,8 +941,8 @@ create_rewinder(struct intel_context *ce,  	*cs++ = MI_SEMAPHORE_WAIT |  		MI_SEMAPHORE_GLOBAL_GTT |  		MI_SEMAPHORE_POLL | -		MI_SEMAPHORE_SAD_NEQ_SDD; -	*cs++ = 0; +		MI_SEMAPHORE_SAD_GTE_SDD; +	*cs++ = idx;  	*cs++ = offset;  	*cs++ = 0; @@ -951,6 +951,11 @@ create_rewinder(struct intel_context *ce,  	*cs++ = offset + idx * sizeof(u32);  	*cs++ = 0; +	*cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT; +	*cs++ = offset; +	*cs++ = 0; +	*cs++ = idx + 1; +  	intel_ring_advance(rq, cs);  	rq->sched.attr.priority = I915_PRIORITY_MASK; @@ -984,7 +989,7 @@ static int live_timeslice_rewind(void *arg)  	for_each_engine(engine, gt, id) {  		enum { A1, A2, B1 }; -		enum { X = 1, Y, Z }; +		enum { X = 1, Z, Y };  		struct i915_request *rq[3] = {};  		struct intel_context *ce;  		unsigned long heartbeat; @@ -1017,13 +1022,13 @@ static int live_timeslice_rewind(void *arg)  			goto err;  		} -		rq[0] = create_rewinder(ce, NULL, slot, 1); +		rq[0] = create_rewinder(ce, NULL, slot, X);  		if (IS_ERR(rq[0])) {  			intel_context_put(ce);  			goto err;  		} -		rq[1] = create_rewinder(ce, NULL, slot, 2); +		rq[1] = create_rewinder(ce, NULL, slot, Y);  		intel_context_put(ce);  		if (IS_ERR(rq[1]))  			goto err; @@ -1041,7 +1046,7 @@ static int live_timeslice_rewind(void *arg)  			goto err;  		} -		rq[2] = create_rewinder(ce, rq[0], slot, 3); +		rq[2] = create_rewinder(ce, rq[0], slot, Z);  		intel_context_put(ce);  		if (IS_ERR(rq[2]))  			goto err; @@ -1055,15 +1060,12 @@ static int live_timeslice_rewind(void *arg)  		GEM_BUG_ON(!timer_pending(&engine->execlists.timer));  		/* ELSP[] = { { A:rq1, A:rq2 }, { B:rq1 } } */ -		GEM_BUG_ON(!i915_request_is_active(rq[A1])); -		GEM_BUG_ON(!i915_request_is_active(rq[A2])); -		GEM_BUG_ON(!i915_request_is_active(rq[B1])); - -		/* Wait for the timeslice to kick in */ -		del_timer(&engine->execlists.timer); -		tasklet_hi_schedule(&engine->execlists.tasklet); -		intel_engine_flush_submission(engine); - +		if (i915_request_is_active(rq[A2])) { /* semaphore yielded! */ +			/* Wait for the timeslice to kick in */ +			del_timer(&engine->execlists.timer); +			tasklet_hi_schedule(&engine->execlists.tasklet); +			intel_engine_flush_submission(engine); +		}  		/* -> ELSP[] = { { A:rq1 }, { B:rq1 } } */  		GEM_BUG_ON(!i915_request_is_active(rq[A1]));  		GEM_BUG_ON(!i915_request_is_active(rq[B1])); diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c index fe7778c28d2d..aa6d56e25a10 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c @@ -217,7 +217,7 @@ static void guc_wq_item_append(struct intel_guc *guc,  static void guc_add_request(struct intel_guc *guc, struct i915_request *rq)  {  	struct intel_engine_cs *engine = rq->engine; -	u32 ctx_desc = lower_32_bits(rq->context->lrc_desc); +	u32 ctx_desc = rq->context->lrc.ccid;  	u32 ring_tail = intel_ring_set_tail(rq->ring, rq->tail) / sizeof(u64);  	guc_wq_item_append(guc, engine->guc_id, ctx_desc, diff --git a/drivers/gpu/drm/i915/gvt/display.c b/drivers/gpu/drm/i915/gvt/display.c index a83df2f84eb9..a1696e9ce4b6 100644 --- a/drivers/gpu/drm/i915/gvt/display.c +++ b/drivers/gpu/drm/i915/gvt/display.c @@ -208,14 +208,41 @@ static void emulate_monitor_status_change(struct intel_vgpu *vgpu)  				SKL_FUSE_PG_DIST_STATUS(SKL_PG0) |  				SKL_FUSE_PG_DIST_STATUS(SKL_PG1) |  				SKL_FUSE_PG_DIST_STATUS(SKL_PG2); -		vgpu_vreg_t(vgpu, LCPLL1_CTL) |= -				LCPLL_PLL_ENABLE | -				LCPLL_PLL_LOCK; -		vgpu_vreg_t(vgpu, LCPLL2_CTL) |= LCPLL_PLL_ENABLE; - +		/* +		 * Only 1 PIPE enabled in current vGPU display and PIPE_A is +		 *  tied to TRANSCODER_A in HW, so it's safe to assume PIPE_A, +		 *   TRANSCODER_A can be enabled. PORT_x depends on the input of +		 *   setup_virtual_dp_monitor, we can bind DPLL0 to any PORT_x +		 *   so we fixed to DPLL0 here. +		 * Setup DPLL0: DP link clk 1620 MHz, non SSC, DP Mode +		 */ +		vgpu_vreg_t(vgpu, DPLL_CTRL1) = +			DPLL_CTRL1_OVERRIDE(DPLL_ID_SKL_DPLL0); +		vgpu_vreg_t(vgpu, DPLL_CTRL1) |= +			DPLL_CTRL1_LINK_RATE(DPLL_CTRL1_LINK_RATE_1620, DPLL_ID_SKL_DPLL0); +		vgpu_vreg_t(vgpu, LCPLL1_CTL) = +			LCPLL_PLL_ENABLE | LCPLL_PLL_LOCK; +		vgpu_vreg_t(vgpu, DPLL_STATUS) = DPLL_LOCK(DPLL_ID_SKL_DPLL0); +		/* +		 * Golden M/N are calculated based on: +		 *   24 bpp, 4 lanes, 154000 pixel clk (from virtual EDID), +		 *   DP link clk 1620 MHz and non-constant_n. +		 * TODO: calculate DP link symbol clk and stream clk m/n. +		 */ +		vgpu_vreg_t(vgpu, PIPE_DATA_M1(TRANSCODER_A)) = 63 << TU_SIZE_SHIFT; +		vgpu_vreg_t(vgpu, PIPE_DATA_M1(TRANSCODER_A)) |= 0x5b425e; +		vgpu_vreg_t(vgpu, PIPE_DATA_N1(TRANSCODER_A)) = 0x800000; +		vgpu_vreg_t(vgpu, PIPE_LINK_M1(TRANSCODER_A)) = 0x3cd6e; +		vgpu_vreg_t(vgpu, PIPE_LINK_N1(TRANSCODER_A)) = 0x80000;  	}  	if (intel_vgpu_has_monitor_on_port(vgpu, PORT_B)) { +		vgpu_vreg_t(vgpu, DPLL_CTRL2) &= +			~DPLL_CTRL2_DDI_CLK_OFF(PORT_B); +		vgpu_vreg_t(vgpu, DPLL_CTRL2) |= +			DPLL_CTRL2_DDI_CLK_SEL(DPLL_ID_SKL_DPLL0, PORT_B); +		vgpu_vreg_t(vgpu, DPLL_CTRL2) |= +			DPLL_CTRL2_DDI_SEL_OVERRIDE(PORT_B);  		vgpu_vreg_t(vgpu, SFUSE_STRAP) |= SFUSE_STRAP_DDIB_DETECTED;  		vgpu_vreg_t(vgpu, TRANS_DDI_FUNC_CTL(TRANSCODER_A)) &=  			~(TRANS_DDI_BPC_MASK | TRANS_DDI_MODE_SELECT_MASK | @@ -236,6 +263,12 @@ static void emulate_monitor_status_change(struct intel_vgpu *vgpu)  	}  	if (intel_vgpu_has_monitor_on_port(vgpu, PORT_C)) { +		vgpu_vreg_t(vgpu, DPLL_CTRL2) &= +			~DPLL_CTRL2_DDI_CLK_OFF(PORT_C); +		vgpu_vreg_t(vgpu, DPLL_CTRL2) |= +			DPLL_CTRL2_DDI_CLK_SEL(DPLL_ID_SKL_DPLL0, PORT_C); +		vgpu_vreg_t(vgpu, DPLL_CTRL2) |= +			DPLL_CTRL2_DDI_SEL_OVERRIDE(PORT_C);  		vgpu_vreg_t(vgpu, SDEISR) |= SDE_PORTC_HOTPLUG_CPT;  		vgpu_vreg_t(vgpu, TRANS_DDI_FUNC_CTL(TRANSCODER_A)) &=  			~(TRANS_DDI_BPC_MASK | TRANS_DDI_MODE_SELECT_MASK | @@ -256,6 +289,12 @@ static void emulate_monitor_status_change(struct intel_vgpu *vgpu)  	}  	if (intel_vgpu_has_monitor_on_port(vgpu, PORT_D)) { +		vgpu_vreg_t(vgpu, DPLL_CTRL2) &= +			~DPLL_CTRL2_DDI_CLK_OFF(PORT_D); +		vgpu_vreg_t(vgpu, DPLL_CTRL2) |= +			DPLL_CTRL2_DDI_CLK_SEL(DPLL_ID_SKL_DPLL0, PORT_D); +		vgpu_vreg_t(vgpu, DPLL_CTRL2) |= +			DPLL_CTRL2_DDI_SEL_OVERRIDE(PORT_D);  		vgpu_vreg_t(vgpu, SDEISR) |= SDE_PORTD_HOTPLUG_CPT;  		vgpu_vreg_t(vgpu, TRANS_DDI_FUNC_CTL(TRANSCODER_A)) &=  			~(TRANS_DDI_BPC_MASK | TRANS_DDI_MODE_SELECT_MASK | diff --git a/drivers/gpu/drm/i915/gvt/scheduler.c b/drivers/gpu/drm/i915/gvt/scheduler.c index cb11c3184085..e92ed96c9b23 100644 --- a/drivers/gpu/drm/i915/gvt/scheduler.c +++ b/drivers/gpu/drm/i915/gvt/scheduler.c @@ -290,7 +290,7 @@ static void  shadow_context_descriptor_update(struct intel_context *ce,  				 struct intel_vgpu_workload *workload)  { -	u64 desc = ce->lrc_desc; +	u64 desc = ce->lrc.desc;  	/*  	 * Update bits 0-11 of the context descriptor which includes flags @@ -300,7 +300,7 @@ shadow_context_descriptor_update(struct intel_context *ce,  	desc |= (u64)workload->ctx_desc.addressing_mode <<  		GEN8_CTX_ADDRESSING_MODE_SHIFT; -	ce->lrc_desc = desc; +	ce->lrc.desc = desc;  }  static int copy_workload_to_ring_buffer(struct intel_vgpu_workload *workload) @@ -379,7 +379,11 @@ static void set_context_ppgtt_from_shadow(struct intel_vgpu_workload *workload,  		for (i = 0; i < GVT_RING_CTX_NR_PDPS; i++) {  			struct i915_page_directory * const pd =  				i915_pd_entry(ppgtt->pd, i); - +			/* skip now as current i915 ppgtt alloc won't allocate +			   top level pdp for non 4-level table, won't impact +			   shadow ppgtt. */ +			if (!pd) +				break;  			px_dma(pd) = mm->ppgtt_mm.shadow_pdps[i];  		}  	} diff --git a/drivers/gpu/drm/i915/i915_gem_evict.c b/drivers/gpu/drm/i915/i915_gem_evict.c index 4518b9b35c3d..02ad1acd117c 100644 --- a/drivers/gpu/drm/i915/i915_gem_evict.c +++ b/drivers/gpu/drm/i915/i915_gem_evict.c @@ -128,6 +128,13 @@ search_again:  	active = NULL;  	INIT_LIST_HEAD(&eviction_list);  	list_for_each_entry_safe(vma, next, &vm->bound_list, vm_link) { +		if (vma == active) { /* now seen this vma twice */ +			if (flags & PIN_NONBLOCK) +				break; + +			active = ERR_PTR(-EAGAIN); +		} +  		/*  		 * We keep this list in a rough least-recently scanned order  		 * of active elements (inactive elements are cheap to reap). @@ -143,21 +150,12 @@ search_again:  		 * To notice when we complete one full cycle, we record the  		 * first active element seen, before moving it to the tail.  		 */ -		if (i915_vma_is_active(vma)) { -			if (vma == active) { -				if (flags & PIN_NONBLOCK) -					break; - -				active = ERR_PTR(-EAGAIN); -			} - -			if (active != ERR_PTR(-EAGAIN)) { -				if (!active) -					active = vma; +		if (active != ERR_PTR(-EAGAIN) && i915_vma_is_active(vma)) { +			if (!active) +				active = vma; -				list_move_tail(&vma->vm_link, &vm->bound_list); -				continue; -			} +			list_move_tail(&vma->vm_link, &vm->bound_list); +			continue;  		}  		if (mark_free(&scan, vma, flags, &eviction_list)) diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c index 2a4cd0ba5464..5c8e51d2ba5b 100644 --- a/drivers/gpu/drm/i915/i915_gpu_error.c +++ b/drivers/gpu/drm/i915/i915_gpu_error.c @@ -1207,8 +1207,6 @@ static void engine_record_registers(struct intel_engine_coredump *ee)  static void record_request(const struct i915_request *request,  			   struct i915_request_coredump *erq)  { -	const struct i915_gem_context *ctx; -  	erq->flags = request->fence.flags;  	erq->context = request->fence.context;  	erq->seqno = request->fence.seqno; @@ -1219,9 +1217,13 @@ static void record_request(const struct i915_request *request,  	erq->pid = 0;  	rcu_read_lock(); -	ctx = rcu_dereference(request->context->gem_context); -	if (ctx) -		erq->pid = pid_nr(ctx->pid); +	if (!intel_context_is_closed(request->context)) { +		const struct i915_gem_context *ctx; + +		ctx = rcu_dereference(request->context->gem_context); +		if (ctx) +			erq->pid = pid_nr(ctx->pid); +	}  	rcu_read_unlock();  } diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c index d91557d842dc..8a2b83807ffc 100644 --- a/drivers/gpu/drm/i915/i915_irq.c +++ b/drivers/gpu/drm/i915/i915_irq.c @@ -3361,7 +3361,7 @@ static void gen8_de_irq_postinstall(struct drm_i915_private *dev_priv)  	u32 de_pipe_masked = gen8_de_pipe_fault_mask(dev_priv) |  		GEN8_PIPE_CDCLK_CRC_DONE;  	u32 de_pipe_enables; -	u32 de_port_masked = GEN8_AUX_CHANNEL_A; +	u32 de_port_masked = gen8_de_port_aux_mask(dev_priv);  	u32 de_port_enables;  	u32 de_misc_masked = GEN8_DE_EDP_PSR;  	enum pipe pipe; @@ -3369,18 +3369,8 @@ static void gen8_de_irq_postinstall(struct drm_i915_private *dev_priv)  	if (INTEL_GEN(dev_priv) <= 10)  		de_misc_masked |= GEN8_DE_MISC_GSE; -	if (INTEL_GEN(dev_priv) >= 9) { -		de_port_masked |= GEN9_AUX_CHANNEL_B | GEN9_AUX_CHANNEL_C | -				  GEN9_AUX_CHANNEL_D; -		if (IS_GEN9_LP(dev_priv)) -			de_port_masked |= BXT_DE_PORT_GMBUS; -	} - -	if (INTEL_GEN(dev_priv) >= 11) -		de_port_masked |= ICL_AUX_CHANNEL_E; - -	if (IS_CNL_WITH_PORT_F(dev_priv) || INTEL_GEN(dev_priv) >= 11) -		de_port_masked |= CNL_AUX_CHANNEL_F; +	if (IS_GEN9_LP(dev_priv)) +		de_port_masked |= BXT_DE_PORT_GMBUS;  	de_pipe_enables = de_pipe_masked | GEN8_PIPE_VBLANK |  					   GEN8_PIPE_FIFO_UNDERRUN; diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c index 66a46e41d5ef..cf2c01f17da8 100644 --- a/drivers/gpu/drm/i915/i915_perf.c +++ b/drivers/gpu/drm/i915/i915_perf.c @@ -1310,8 +1310,7 @@ static int oa_get_render_ctx_id(struct i915_perf_stream *stream)  			 * dropped by GuC. They won't be part of the context  			 * ID in the OA reports, so squash those lower bits.  			 */ -			stream->specific_ctx_id = -				lower_32_bits(ce->lrc_desc) >> 12; +			stream->specific_ctx_id = ce->lrc.lrca >> 12;  			/*  			 * GuC uses the top bit to signal proxy submission, so @@ -1328,11 +1327,10 @@ static int oa_get_render_ctx_id(struct i915_perf_stream *stream)  			((1U << GEN11_SW_CTX_ID_WIDTH) - 1) << (GEN11_SW_CTX_ID_SHIFT - 32);  		/*  		 * Pick an unused context id -		 * 0 - (NUM_CONTEXT_TAG - 1) are used by other contexts +		 * 0 - BITS_PER_LONG are used by other contexts  		 * GEN12_MAX_CONTEXT_HW_ID (0x7ff) is used by idle context  		 */  		stream->specific_ctx_id = (GEN12_MAX_CONTEXT_HW_ID - 1) << (GEN11_SW_CTX_ID_SHIFT - 32); -		BUILD_BUG_ON((GEN12_MAX_CONTEXT_HW_ID - 1) < NUM_CONTEXT_TAG);  		break;  	} diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index e0c6021fdaf9..6e12000c4b6b 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -3094,6 +3094,7 @@ static inline bool i915_mmio_reg_valid(i915_reg_t reg)  #define GT_BSD_CS_ERROR_INTERRUPT		(1 << 15)  #define GT_BSD_USER_INTERRUPT			(1 << 12)  #define GT_RENDER_L3_PARITY_ERROR_INTERRUPT_S1	(1 << 11) /* hsw+; rsvd on snb, ivb, vlv */ +#define GT_WAIT_SEMAPHORE_INTERRUPT		REG_BIT(11) /* bdw+ */  #define GT_CONTEXT_SWITCH_INTERRUPT		(1 <<  8)  #define GT_RENDER_L3_PARITY_ERROR_INTERRUPT	(1 <<  5) /* !snb */  #define GT_RENDER_PIPECTL_NOTIFY_INTERRUPT	(1 <<  4) diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c index c0df71d7d0ff..e2b78db685ea 100644 --- a/drivers/gpu/drm/i915/i915_request.c +++ b/drivers/gpu/drm/i915/i915_request.c @@ -1017,11 +1017,15 @@ i915_request_await_request(struct i915_request *to, struct i915_request *from)  	GEM_BUG_ON(to == from);  	GEM_BUG_ON(to->timeline == from->timeline); -	if (i915_request_completed(from)) +	if (i915_request_completed(from)) { +		i915_sw_fence_set_error_once(&to->submit, from->fence.error);  		return 0; +	}  	if (to->engine->schedule) { -		ret = i915_sched_node_add_dependency(&to->sched, &from->sched); +		ret = i915_sched_node_add_dependency(&to->sched, +						     &from->sched, +						     I915_DEPENDENCY_EXTERNAL);  		if (ret < 0)  			return ret;  	} @@ -1183,7 +1187,9 @@ __i915_request_await_execution(struct i915_request *to,  	/* Couple the dependency tree for PI on this exposed to->fence */  	if (to->engine->schedule) { -		err = i915_sched_node_add_dependency(&to->sched, &from->sched); +		err = i915_sched_node_add_dependency(&to->sched, +						     &from->sched, +						     I915_DEPENDENCY_WEAK);  		if (err < 0)  			return err;  	} diff --git a/drivers/gpu/drm/i915/i915_scheduler.c b/drivers/gpu/drm/i915/i915_scheduler.c index 68b06a7ba667..f0a9e8958ca0 100644 --- a/drivers/gpu/drm/i915/i915_scheduler.c +++ b/drivers/gpu/drm/i915/i915_scheduler.c @@ -456,7 +456,8 @@ bool __i915_sched_node_add_dependency(struct i915_sched_node *node,  }  int i915_sched_node_add_dependency(struct i915_sched_node *node, -				   struct i915_sched_node *signal) +				   struct i915_sched_node *signal, +				   unsigned long flags)  {  	struct i915_dependency *dep; @@ -465,8 +466,7 @@ int i915_sched_node_add_dependency(struct i915_sched_node *node,  		return -ENOMEM;  	if (!__i915_sched_node_add_dependency(node, signal, dep, -					      I915_DEPENDENCY_EXTERNAL | -					      I915_DEPENDENCY_ALLOC)) +					      flags | I915_DEPENDENCY_ALLOC))  		i915_dependency_free(dep);  	return 0; diff --git a/drivers/gpu/drm/i915/i915_scheduler.h b/drivers/gpu/drm/i915/i915_scheduler.h index d1dc4efef77b..6f0bf00fc569 100644 --- a/drivers/gpu/drm/i915/i915_scheduler.h +++ b/drivers/gpu/drm/i915/i915_scheduler.h @@ -34,7 +34,8 @@ bool __i915_sched_node_add_dependency(struct i915_sched_node *node,  				      unsigned long flags);  int i915_sched_node_add_dependency(struct i915_sched_node *node, -				   struct i915_sched_node *signal); +				   struct i915_sched_node *signal, +				   unsigned long flags);  void i915_sched_node_fini(struct i915_sched_node *node); diff --git a/drivers/gpu/drm/i915/i915_scheduler_types.h b/drivers/gpu/drm/i915/i915_scheduler_types.h index d18e70550054..7186875088a0 100644 --- a/drivers/gpu/drm/i915/i915_scheduler_types.h +++ b/drivers/gpu/drm/i915/i915_scheduler_types.h @@ -78,6 +78,7 @@ struct i915_dependency {  	unsigned long flags;  #define I915_DEPENDENCY_ALLOC		BIT(0)  #define I915_DEPENDENCY_EXTERNAL	BIT(1) +#define I915_DEPENDENCY_WEAK		BIT(2)  };  #endif /* _I915_SCHEDULER_TYPES_H_ */ diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c index 82e3bc280622..2cd7a7e87c0a 100644 --- a/drivers/gpu/drm/i915/i915_vma.c +++ b/drivers/gpu/drm/i915/i915_vma.c @@ -1228,18 +1228,6 @@ int __i915_vma_unbind(struct i915_vma *vma)  	lockdep_assert_held(&vma->vm->mutex); -	/* -	 * First wait upon any activity as retiring the request may -	 * have side-effects such as unpinning or even unbinding this vma. -	 * -	 * XXX Actually waiting under the vm->mutex is a hinderance and -	 * should be pipelined wherever possible. In cases where that is -	 * unavoidable, we should lift the wait to before the mutex. -	 */ -	ret = i915_vma_sync(vma); -	if (ret) -		return ret; -  	if (i915_vma_is_pinned(vma)) {  		vma_print_allocator(vma, "is pinned");  		return -EAGAIN; @@ -1313,15 +1301,20 @@ int i915_vma_unbind(struct i915_vma *vma)  	if (!drm_mm_node_allocated(&vma->node))  		return 0; -	if (i915_vma_is_bound(vma, I915_VMA_GLOBAL_BIND)) -		/* XXX not always required: nop_clear_range */ -		wakeref = intel_runtime_pm_get(&vm->i915->runtime_pm); -  	/* Optimistic wait before taking the mutex */  	err = i915_vma_sync(vma);  	if (err)  		goto out_rpm; +	if (i915_vma_is_pinned(vma)) { +		vma_print_allocator(vma, "is pinned"); +		return -EAGAIN; +	} + +	if (i915_vma_is_bound(vma, I915_VMA_GLOBAL_BIND)) +		/* XXX not always required: nop_clear_range */ +		wakeref = intel_runtime_pm_get(&vm->i915->runtime_pm); +  	err = mutex_lock_interruptible(&vm->mutex);  	if (err)  		goto out_rpm; diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index 8375054ba27d..a52986a9e7a6 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -4992,7 +4992,7 @@ static void skl_compute_plane_wm(const struct intel_crtc_state *crtc_state,  	 * WaIncreaseLatencyIPCEnabled: kbl,cfl  	 * Display WA #1141: kbl,cfl  	 */ -	if ((IS_KABYLAKE(dev_priv) || IS_COFFEELAKE(dev_priv)) || +	if ((IS_KABYLAKE(dev_priv) || IS_COFFEELAKE(dev_priv)) &&  	    dev_priv->ipc_enabled)  		latency += 4; diff --git a/drivers/gpu/drm/i915/selftests/i915_vma.c b/drivers/gpu/drm/i915/selftests/i915_vma.c index 58b5f40a07dd..af89c7fc8f59 100644 --- a/drivers/gpu/drm/i915/selftests/i915_vma.c +++ b/drivers/gpu/drm/i915/selftests/i915_vma.c @@ -173,7 +173,7 @@ static int igt_vma_create(void *arg)  		}  		nc = 0; -		for_each_prime_number(num_ctx, 2 * NUM_CONTEXT_TAG) { +		for_each_prime_number(num_ctx, 2 * BITS_PER_LONG) {  			for (; nc < num_ctx; nc++) {  				ctx = mock_context(i915, "mock");  				if (!ctx) diff --git a/drivers/gpu/drm/meson/meson_drv.c b/drivers/gpu/drm/meson/meson_drv.c index b5f5eb7b4bb9..8c2e1b47e81a 100644 --- a/drivers/gpu/drm/meson/meson_drv.c +++ b/drivers/gpu/drm/meson/meson_drv.c @@ -412,9 +412,7 @@ static int __maybe_unused meson_drv_pm_resume(struct device *dev)  	if (priv->afbcd.ops)  		priv->afbcd.ops->init(priv); -	drm_mode_config_helper_resume(priv->drm); - -	return 0; +	return drm_mode_config_helper_resume(priv->drm);  }  static int compare_of(struct device *dev, void *data) diff --git a/drivers/gpu/drm/tegra/drm.c b/drivers/gpu/drm/tegra/drm.c index bd268028fb3d..583cd6e0ae27 100644 --- a/drivers/gpu/drm/tegra/drm.c +++ b/drivers/gpu/drm/tegra/drm.c @@ -1039,6 +1039,7 @@ void tegra_drm_free(struct tegra_drm *tegra, size_t size, void *virt,  static bool host1x_drm_wants_iommu(struct host1x_device *dev)  { +	struct host1x *host1x = dev_get_drvdata(dev->dev.parent);  	struct iommu_domain *domain;  	/* @@ -1076,7 +1077,7 @@ static bool host1x_drm_wants_iommu(struct host1x_device *dev)  	 * sufficient and whether or not the host1x is attached to an IOMMU  	 * doesn't matter.  	 */ -	if (!domain && dma_get_mask(dev->dev.parent) <= DMA_BIT_MASK(32)) +	if (!domain && host1x_get_dma_mask(host1x) <= DMA_BIT_MASK(32))  		return true;  	return domain != NULL; diff --git a/drivers/gpu/host1x/dev.c b/drivers/gpu/host1x/dev.c index 388bcc2889aa..d24344e91922 100644 --- a/drivers/gpu/host1x/dev.c +++ b/drivers/gpu/host1x/dev.c @@ -192,17 +192,55 @@ static void host1x_setup_sid_table(struct host1x *host)  	}  } +static bool host1x_wants_iommu(struct host1x *host1x) +{ +	/* +	 * If we support addressing a maximum of 32 bits of physical memory +	 * and if the host1x firewall is enabled, there's no need to enable +	 * IOMMU support. This can happen for example on Tegra20, Tegra30 +	 * and Tegra114. +	 * +	 * Tegra124 and later can address up to 34 bits of physical memory and +	 * many platforms come equipped with more than 2 GiB of system memory, +	 * which requires crossing the 4 GiB boundary. But there's a catch: on +	 * SoCs before Tegra186 (i.e. Tegra124 and Tegra210), the host1x can +	 * only address up to 32 bits of memory in GATHER opcodes, which means +	 * that command buffers need to either be in the first 2 GiB of system +	 * memory (which could quickly lead to memory exhaustion), or command +	 * buffers need to be treated differently from other buffers (which is +	 * not possible with the current ABI). +	 * +	 * A third option is to use the IOMMU in these cases to make sure all +	 * buffers will be mapped into a 32-bit IOVA space that host1x can +	 * address. This allows all of the system memory to be used and works +	 * within the limitations of the host1x on these SoCs. +	 * +	 * In summary, default to enable IOMMU on Tegra124 and later. For any +	 * of the earlier SoCs, only use the IOMMU for additional safety when +	 * the host1x firewall is disabled. +	 */ +	if (host1x->info->dma_mask <= DMA_BIT_MASK(32)) { +		if (IS_ENABLED(CONFIG_TEGRA_HOST1X_FIREWALL)) +			return false; +	} + +	return true; +} +  static struct iommu_domain *host1x_iommu_attach(struct host1x *host)  {  	struct iommu_domain *domain = iommu_get_domain_for_dev(host->dev);  	int err;  	/* -	 * If the host1x firewall is enabled, there's no need to enable IOMMU -	 * support. Similarly, if host1x is already attached to an IOMMU (via -	 * the DMA API), don't try to attach again. +	 * We may not always want to enable IOMMU support (for example if the +	 * host1x firewall is already enabled and we don't support addressing +	 * more than 32 bits of physical memory), so check for that first. +	 * +	 * Similarly, if host1x is already attached to an IOMMU (via the DMA +	 * API), don't try to attach again.  	 */ -	if (IS_ENABLED(CONFIG_TEGRA_HOST1X_FIREWALL) || domain) +	if (!host1x_wants_iommu(host) || domain)  		return domain;  	host->group = iommu_group_get(host->dev); @@ -502,6 +540,19 @@ static void __exit tegra_host1x_exit(void)  }  module_exit(tegra_host1x_exit); +/** + * host1x_get_dma_mask() - query the supported DMA mask for host1x + * @host1x: host1x instance + * + * Note that this returns the supported DMA mask for host1x, which can be + * different from the applicable DMA mask under certain circumstances. + */ +u64 host1x_get_dma_mask(struct host1x *host1x) +{ +	return host1x->info->dma_mask; +} +EXPORT_SYMBOL(host1x_get_dma_mask); +  MODULE_AUTHOR("Thierry Reding <[email protected]>");  MODULE_AUTHOR("Terje Bergstrom <[email protected]>");  MODULE_DESCRIPTION("Host1x driver for Tegra products"); |