diff options
Diffstat (limited to 'drivers/gpu/drm/amd/amdgpu')
93 files changed, 4430 insertions, 1759 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/ObjectID.h b/drivers/gpu/drm/amd/amdgpu/ObjectID.h index 06192698bd96..5b393622f592 100644 --- a/drivers/gpu/drm/amd/amdgpu/ObjectID.h +++ b/drivers/gpu/drm/amd/amdgpu/ObjectID.h @@ -136,6 +136,7 @@  #define GENERIC_OBJECT_ID_PX2_NON_DRIVABLE        0x02  #define GENERIC_OBJECT_ID_MXM_OPM                 0x03  #define GENERIC_OBJECT_ID_STEREO_PIN              0x04        //This object could show up from Misc Object table, it follows ATOM_OBJECT format, and contains one ATOM_OBJECT_GPIO_CNTL_RECORD for the stereo pin +#define GENERIC_OBJECT_ID_BRACKET_LAYOUT          0x05  /****************************************************/  /* Graphics Object ENUM ID Definition               */ @@ -714,6 +715,13 @@                                                   GRAPH_OBJECT_ENUM_ID1 << ENUM_ID_SHIFT |\                                                   GENERIC_OBJECT_ID_STEREO_PIN << OBJECT_ID_SHIFT) +#define GENERICOBJECT_BRACKET_LAYOUT_ENUM_ID1    (GRAPH_OBJECT_TYPE_GENERIC << OBJECT_TYPE_SHIFT |\ +                                                 GRAPH_OBJECT_ENUM_ID1 << ENUM_ID_SHIFT |\ +                                                 GENERIC_OBJECT_ID_BRACKET_LAYOUT << OBJECT_ID_SHIFT) + +#define GENERICOBJECT_BRACKET_LAYOUT_ENUM_ID2    (GRAPH_OBJECT_TYPE_GENERIC << OBJECT_TYPE_SHIFT |\ +                                                 GRAPH_OBJECT_ENUM_ID2 << ENUM_ID_SHIFT |\ +                                                 GENERIC_OBJECT_ID_BRACKET_LAYOUT << OBJECT_ID_SHIFT)  /****************************************************/  /* Object Cap definition - Shared with BIOS         */  /****************************************************/ diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index 7dcbac8af9a7..447c4c7a36d6 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -73,6 +73,8 @@  #include "amdgpu_virt.h"  #include "amdgpu_gart.h"  #include "amdgpu_debugfs.h" +#include "amdgpu_job.h" +#include "amdgpu_bo_list.h"  /*   * Modules parameters. @@ -105,11 +107,8 @@ extern int amdgpu_vm_fault_stop;  extern int amdgpu_vm_debug;  extern int amdgpu_vm_update_mode;  extern int amdgpu_dc; -extern int amdgpu_dc_log;  extern int amdgpu_sched_jobs;  extern int amdgpu_sched_hw_submission; -extern int amdgpu_no_evict; -extern int amdgpu_direct_gma_size;  extern uint amdgpu_pcie_gen_cap;  extern uint amdgpu_pcie_lane_cap;  extern uint amdgpu_cg_mask; @@ -600,17 +599,6 @@ struct amdgpu_ib {  extern const struct drm_sched_backend_ops amdgpu_sched_ops; -int amdgpu_job_alloc(struct amdgpu_device *adev, unsigned num_ibs, -		     struct amdgpu_job **job, struct amdgpu_vm *vm); -int amdgpu_job_alloc_with_ib(struct amdgpu_device *adev, unsigned size, -			     struct amdgpu_job **job); - -void amdgpu_job_free_resources(struct amdgpu_job *job); -void amdgpu_job_free(struct amdgpu_job *job); -int amdgpu_job_submit(struct amdgpu_job *job, struct amdgpu_ring *ring, -		      struct drm_sched_entity *entity, void *owner, -		      struct dma_fence **f); -  /*   * Queue manager   */ @@ -684,8 +672,8 @@ int amdgpu_ctx_ioctl(struct drm_device *dev, void *data,  int amdgpu_ctx_wait_prev_fence(struct amdgpu_ctx *ctx, unsigned ring_id);  void amdgpu_ctx_mgr_init(struct amdgpu_ctx_mgr *mgr); -void amdgpu_ctx_mgr_entity_cleanup(struct amdgpu_ctx_mgr *mgr);  void amdgpu_ctx_mgr_entity_fini(struct amdgpu_ctx_mgr *mgr); +void amdgpu_ctx_mgr_entity_flush(struct amdgpu_ctx_mgr *mgr);  void amdgpu_ctx_mgr_fini(struct amdgpu_ctx_mgr *mgr); @@ -703,37 +691,6 @@ struct amdgpu_fpriv {  };  /* - * residency list - */ -struct amdgpu_bo_list_entry { -	struct amdgpu_bo		*robj; -	struct ttm_validate_buffer	tv; -	struct amdgpu_bo_va		*bo_va; -	uint32_t			priority; -	struct page			**user_pages; -	int				user_invalidated; -}; - -struct amdgpu_bo_list { -	struct mutex lock; -	struct rcu_head rhead; -	struct kref refcount; -	struct amdgpu_bo *gds_obj; -	struct amdgpu_bo *gws_obj; -	struct amdgpu_bo *oa_obj; -	unsigned first_userptr; -	unsigned num_entries; -	struct amdgpu_bo_list_entry *array; -}; - -struct amdgpu_bo_list * -amdgpu_bo_list_get(struct amdgpu_fpriv *fpriv, int id); -void amdgpu_bo_list_get_list(struct amdgpu_bo_list *list, -			     struct list_head *validated); -void amdgpu_bo_list_put(struct amdgpu_bo_list *list); -void amdgpu_bo_list_free(struct amdgpu_bo_list *list); - -/*   * GFX stuff   */  #include "clearstate_defs.h" @@ -931,6 +888,11 @@ struct amdgpu_ngg {  	bool			init;  }; +struct sq_work { +	struct work_struct	work; +	unsigned ih_data; +}; +  struct amdgpu_gfx {  	struct mutex			gpu_clock_mutex;  	struct amdgpu_gfx_config	config; @@ -969,6 +931,10 @@ struct amdgpu_gfx {  	struct amdgpu_irq_src		eop_irq;  	struct amdgpu_irq_src		priv_reg_irq;  	struct amdgpu_irq_src		priv_inst_irq; +	struct amdgpu_irq_src		cp_ecc_error_irq; +	struct amdgpu_irq_src		sq_irq; +	struct sq_work			sq_work; +  	/* gfx status */  	uint32_t			gfx_current_status;  	/* ce ram size*/ @@ -1020,6 +986,7 @@ struct amdgpu_cs_parser {  	/* scheduler job object */  	struct amdgpu_job	*job; +	struct amdgpu_ring	*ring;  	/* buffer objects */  	struct ww_acquire_ctx		ticket; @@ -1041,40 +1008,6 @@ struct amdgpu_cs_parser {  	struct drm_syncobj **post_dep_syncobjs;  }; -#define AMDGPU_PREAMBLE_IB_PRESENT          (1 << 0) /* bit set means command submit involves a preamble IB */ -#define AMDGPU_PREAMBLE_IB_PRESENT_FIRST    (1 << 1) /* bit set means preamble IB is first presented in belonging context */ -#define AMDGPU_HAVE_CTX_SWITCH              (1 << 2) /* bit set means context switch occured */ - -struct amdgpu_job { -	struct drm_sched_job    base; -	struct amdgpu_device	*adev; -	struct amdgpu_vm	*vm; -	struct amdgpu_ring	*ring; -	struct amdgpu_sync	sync; -	struct amdgpu_sync	sched_sync; -	struct amdgpu_ib	*ibs; -	struct dma_fence	*fence; /* the hw fence */ -	uint32_t		preamble_status; -	uint32_t		num_ibs; -	void			*owner; -	uint64_t		fence_ctx; /* the fence_context this job uses */ -	bool                    vm_needs_flush; -	uint64_t		vm_pd_addr; -	unsigned		vmid; -	unsigned		pasid; -	uint32_t		gds_base, gds_size; -	uint32_t		gws_base, gws_size; -	uint32_t		oa_base, oa_size; -	uint32_t		vram_lost_counter; - -	/* user fence handling */ -	uint64_t		uf_addr; -	uint64_t		uf_sequence; - -}; -#define to_amdgpu_job(sched_job)		\ -		container_of((sched_job), struct amdgpu_job, base) -  static inline u32 amdgpu_get_ib_value(struct amdgpu_cs_parser *p,  				      uint32_t ib_idx, int idx)  { @@ -1389,6 +1322,7 @@ enum amd_hw_ip_block_type {  	PWR_HWIP,  	NBIF_HWIP,  	THM_HWIP, +	CLK_HWIP,  	MAX_HWIP  }; @@ -1579,9 +1513,9 @@ struct amdgpu_device {  	DECLARE_HASHTABLE(mn_hash, 7);  	/* tracking pinned memory */ -	u64 vram_pin_size; -	u64 invisible_pin_size; -	u64 gart_pin_size; +	atomic64_t vram_pin_size; +	atomic64_t visible_pin_size; +	atomic64_t gart_pin_size;  	/* amdkfd interface */  	struct kfd_dev          *kfd; @@ -1776,6 +1710,7 @@ amdgpu_get_sdma_instance(struct amdgpu_ring *ring)  #define amdgpu_vm_write_pte(adev, ib, pe, value, count, incr) ((adev)->vm_manager.vm_pte_funcs->write_pte((ib), (pe), (value), (count), (incr)))  #define amdgpu_vm_set_pte_pde(adev, ib, pe, addr, count, incr, flags) ((adev)->vm_manager.vm_pte_funcs->set_pte_pde((ib), (pe), (addr), (count), (incr), (flags)))  #define amdgpu_ring_parse_cs(r, p, ib) ((r)->funcs->parse_cs((p), (ib))) +#define amdgpu_ring_patch_cs_in_place(r, p, ib) ((r)->funcs->patch_cs_in_place((p), (ib)))  #define amdgpu_ring_test_ring(r) (r)->funcs->test_ring((r))  #define amdgpu_ring_test_ib(r, t) (r)->funcs->test_ib((r), (t))  #define amdgpu_ring_get_rptr(r) (r)->funcs->get_rptr((r)) @@ -1829,8 +1764,6 @@ void amdgpu_display_update_priority(struct amdgpu_device *adev);  void amdgpu_cs_report_moved_bytes(struct amdgpu_device *adev, u64 num_bytes,  				  u64 num_vis_bytes); -void amdgpu_ttm_placement_from_domain(struct amdgpu_bo *abo, u32 domain); -bool amdgpu_ttm_bo_is_amdgpu_bo(struct ttm_buffer_object *bo);  void amdgpu_device_vram_location(struct amdgpu_device *adev,  				 struct amdgpu_gmc *mc, u64 base);  void amdgpu_device_gart_location(struct amdgpu_device *adev, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c index 0d8c3fc6eace..353993218f21 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c @@ -364,7 +364,6 @@ static int amdgpu_atif_handler(struct amdgpu_device *adev,  			       struct acpi_bus_event *event)  {  	struct amdgpu_atif *atif = adev->atif; -	struct atif_sbios_requests req;  	int count;  	DRM_DEBUG_DRIVER("event, device_class = %s, type = %#x\n", @@ -379,42 +378,48 @@ static int amdgpu_atif_handler(struct amdgpu_device *adev,  		/* Not our event */  		return NOTIFY_DONE; -	/* Check pending SBIOS requests */ -	count = amdgpu_atif_get_sbios_requests(atif, &req); +	if (atif->functions.sbios_requests) { +		struct atif_sbios_requests req; -	if (count <= 0) -		return NOTIFY_DONE; +		/* Check pending SBIOS requests */ +		count = amdgpu_atif_get_sbios_requests(atif, &req); + +		if (count <= 0) +			return NOTIFY_DONE; -	DRM_DEBUG_DRIVER("ATIF: %d pending SBIOS requests\n", count); +		DRM_DEBUG_DRIVER("ATIF: %d pending SBIOS requests\n", count); -	if (req.pending & ATIF_PANEL_BRIGHTNESS_CHANGE_REQUEST) { -		struct amdgpu_encoder *enc = atif->encoder_for_bl; +		/* todo: add DC handling */ +		if ((req.pending & ATIF_PANEL_BRIGHTNESS_CHANGE_REQUEST) && +		    !amdgpu_device_has_dc_support(adev)) { +			struct amdgpu_encoder *enc = atif->encoder_for_bl; -		if (enc) { -			struct amdgpu_encoder_atom_dig *dig = enc->enc_priv; +			if (enc) { +				struct amdgpu_encoder_atom_dig *dig = enc->enc_priv; -			DRM_DEBUG_DRIVER("Changing brightness to %d\n", -					req.backlight_level); +				DRM_DEBUG_DRIVER("Changing brightness to %d\n", +						 req.backlight_level); -			amdgpu_display_backlight_set_level(adev, enc, req.backlight_level); +				amdgpu_display_backlight_set_level(adev, enc, req.backlight_level);  #if defined(CONFIG_BACKLIGHT_CLASS_DEVICE) || defined(CONFIG_BACKLIGHT_CLASS_DEVICE_MODULE) -			backlight_force_update(dig->bl_dev, -					       BACKLIGHT_UPDATE_HOTKEY); +				backlight_force_update(dig->bl_dev, +						       BACKLIGHT_UPDATE_HOTKEY);  #endif +			}  		} -	} -	if (req.pending & ATIF_DGPU_DISPLAY_EVENT) { -		if ((adev->flags & AMD_IS_PX) && -		    amdgpu_atpx_dgpu_req_power_for_displays()) { -			pm_runtime_get_sync(adev->ddev->dev); -			/* Just fire off a uevent and let userspace tell us what to do */ -			drm_helper_hpd_irq_event(adev->ddev); -			pm_runtime_mark_last_busy(adev->ddev->dev); -			pm_runtime_put_autosuspend(adev->ddev->dev); +		if (req.pending & ATIF_DGPU_DISPLAY_EVENT) { +			if ((adev->flags & AMD_IS_PX) && +			    amdgpu_atpx_dgpu_req_power_for_displays()) { +				pm_runtime_get_sync(adev->ddev->dev); +				/* Just fire off a uevent and let userspace tell us what to do */ +				drm_helper_hpd_irq_event(adev->ddev); +				pm_runtime_mark_last_busy(adev->ddev->dev); +				pm_runtime_put_autosuspend(adev->ddev->dev); +			}  		} +		/* TODO: check other events */  	} -	/* TODO: check other events */  	/* We've handled the event, stop the notifier chain. The ACPI interface  	 * overloads ACPI_VIDEO_NOTIFY_PROBE, we don't want to send that to diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c index 305143fcc1ce..f8bbbb3a9504 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c @@ -243,6 +243,33 @@ int amdgpu_amdkfd_resume(struct amdgpu_device *adev)  	return r;  } +int amdgpu_amdkfd_pre_reset(struct amdgpu_device *adev) +{ +	int r = 0; + +	if (adev->kfd) +		r = kgd2kfd->pre_reset(adev->kfd); + +	return r; +} + +int amdgpu_amdkfd_post_reset(struct amdgpu_device *adev) +{ +	int r = 0; + +	if (adev->kfd) +		r = kgd2kfd->post_reset(adev->kfd); + +	return r; +} + +void amdgpu_amdkfd_gpu_reset(struct kgd_dev *kgd) +{ +	struct amdgpu_device *adev = (struct amdgpu_device *)kgd; + +	amdgpu_device_gpu_recover(adev, NULL, false); +} +  int alloc_gtt_mem(struct kgd_dev *kgd, size_t size,  			void **mem_obj, uint64_t *gpu_addr,  			void **cpu_ptr) @@ -251,7 +278,6 @@ int alloc_gtt_mem(struct kgd_dev *kgd, size_t size,  	struct amdgpu_bo *bo = NULL;  	struct amdgpu_bo_param bp;  	int r; -	uint64_t gpu_addr_tmp = 0;  	void *cpu_ptr_tmp = NULL;  	memset(&bp, 0, sizeof(bp)); @@ -275,13 +301,18 @@ int alloc_gtt_mem(struct kgd_dev *kgd, size_t size,  		goto allocate_mem_reserve_bo_failed;  	} -	r = amdgpu_bo_pin(bo, AMDGPU_GEM_DOMAIN_GTT, -				&gpu_addr_tmp); +	r = amdgpu_bo_pin(bo, AMDGPU_GEM_DOMAIN_GTT);  	if (r) {  		dev_err(adev->dev, "(%d) failed to pin bo for amdkfd\n", r);  		goto allocate_mem_pin_bo_failed;  	} +	r = amdgpu_ttm_alloc_gart(&bo->tbo); +	if (r) { +		dev_err(adev->dev, "%p bind failed\n", bo); +		goto allocate_mem_kmap_bo_failed; +	} +  	r = amdgpu_bo_kmap(bo, &cpu_ptr_tmp);  	if (r) {  		dev_err(adev->dev, @@ -290,7 +321,7 @@ int alloc_gtt_mem(struct kgd_dev *kgd, size_t size,  	}  	*mem_obj = bo; -	*gpu_addr = gpu_addr_tmp; +	*gpu_addr = amdgpu_bo_gpu_offset(bo);  	*cpu_ptr = cpu_ptr_tmp;  	amdgpu_bo_unreserve(bo); @@ -457,6 +488,14 @@ err:  	return ret;  } +void amdgpu_amdkfd_set_compute_idle(struct kgd_dev *kgd, bool idle) +{ +	struct amdgpu_device *adev = (struct amdgpu_device *)kgd; + +	amdgpu_dpm_switch_power_profile(adev, +					PP_SMC_POWER_PROFILE_COMPUTE, !idle); +} +  bool amdgpu_amdkfd_is_kfd_vmid(struct amdgpu_device *adev, u32 vmid)  {  	if (adev->kfd) { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h index a8418a3f4e9d..2f379c183ed2 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h @@ -119,6 +119,7 @@ int amdgpu_amdkfd_evict_userptr(struct kgd_mem *mem, struct mm_struct *mm);  int amdgpu_amdkfd_submit_ib(struct kgd_dev *kgd, enum kgd_engine_type engine,  				uint32_t vmid, uint64_t gpu_addr,  				uint32_t *ib_cmd, uint32_t ib_len); +void amdgpu_amdkfd_set_compute_idle(struct kgd_dev *kgd, bool idle);  struct kfd2kgd_calls *amdgpu_amdkfd_gfx_7_get_functions(void);  struct kfd2kgd_calls *amdgpu_amdkfd_gfx_8_0_get_functions(void); @@ -126,6 +127,12 @@ struct kfd2kgd_calls *amdgpu_amdkfd_gfx_9_0_get_functions(void);  bool amdgpu_amdkfd_is_kfd_vmid(struct amdgpu_device *adev, u32 vmid); +int amdgpu_amdkfd_pre_reset(struct amdgpu_device *adev); + +int amdgpu_amdkfd_post_reset(struct amdgpu_device *adev); + +void amdgpu_amdkfd_gpu_reset(struct kgd_dev *kgd); +  /* Shared API */  int alloc_gtt_mem(struct kgd_dev *kgd, size_t size,  			void **mem_obj, uint64_t *gpu_addr, @@ -183,6 +190,9 @@ int amdgpu_amdkfd_gpuvm_map_gtt_bo_to_kernel(struct kgd_dev *kgd,  int amdgpu_amdkfd_gpuvm_restore_process_bos(void *process_info,  					    struct dma_fence **ef); +int amdgpu_amdkfd_gpuvm_get_vm_fault_info(struct kgd_dev *kgd, +					      struct kfd_vm_fault_info *info); +  void amdgpu_amdkfd_gpuvm_init_mem_limits(void);  void amdgpu_amdkfd_unreserve_system_memory_limit(struct amdgpu_bo *bo); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_fence.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_fence.c index 2c14025e5e76..574c1181ae9a 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_fence.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_fence.c @@ -173,7 +173,5 @@ static const struct dma_fence_ops amdkfd_fence_ops = {  	.get_driver_name = amdkfd_fence_get_driver_name,  	.get_timeline_name = amdkfd_fence_get_timeline_name,  	.enable_signaling = amdkfd_fence_enable_signaling, -	.signaled = NULL, -	.wait = dma_fence_default_wait,  	.release = amdkfd_fence_release,  }; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c index ea79908dac4c..ea3f698aef5e 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c @@ -145,6 +145,7 @@ static void set_vm_context_page_table_base(struct kgd_dev *kgd, uint32_t vmid,  		uint32_t page_table_base);  static int invalidate_tlbs(struct kgd_dev *kgd, uint16_t pasid);  static int invalidate_tlbs_vmid(struct kgd_dev *kgd, uint16_t vmid); +static uint32_t read_vmid_from_vmfault_reg(struct kgd_dev *kgd);  /* Because of REG_GET_FIELD() being used, we put this function in the   * asic specific file. @@ -216,6 +217,10 @@ static const struct kfd2kgd_calls kfd2kgd = {  	.invalidate_tlbs = invalidate_tlbs,  	.invalidate_tlbs_vmid = invalidate_tlbs_vmid,  	.submit_ib = amdgpu_amdkfd_submit_ib, +	.get_vm_fault_info = amdgpu_amdkfd_gpuvm_get_vm_fault_info, +	.read_vmid_from_vmfault_reg = read_vmid_from_vmfault_reg, +	.gpu_recover = amdgpu_amdkfd_gpu_reset, +	.set_compute_idle = amdgpu_amdkfd_set_compute_idle  };  struct kfd2kgd_calls *amdgpu_amdkfd_gfx_7_get_functions(void) @@ -571,6 +576,9 @@ static int kgd_hqd_destroy(struct kgd_dev *kgd, void *mqd,  	unsigned long flags, end_jiffies;  	int retry; +	if (adev->in_gpu_reset) +		return -EIO; +  	acquire_queue(kgd, pipe_id, queue_id);  	WREG32(mmCP_HQD_PQ_DOORBELL_CONTROL, 0); @@ -882,6 +890,9 @@ static int invalidate_tlbs(struct kgd_dev *kgd, uint16_t pasid)  	int vmid;  	unsigned int tmp; +	if (adev->in_gpu_reset) +		return -EIO; +  	for (vmid = 0; vmid < 16; vmid++) {  		if (!amdgpu_amdkfd_is_kfd_vmid(adev, vmid))  			continue; @@ -911,3 +922,19 @@ static int invalidate_tlbs_vmid(struct kgd_dev *kgd, uint16_t vmid)  	RREG32(mmVM_INVALIDATE_RESPONSE);  	return 0;  } + + /** +  * read_vmid_from_vmfault_reg - read vmid from register +  * +  * adev: amdgpu_device pointer +  * @vmid: vmid pointer +  * read vmid from register (CIK). +  */ +static uint32_t read_vmid_from_vmfault_reg(struct kgd_dev *kgd) +{ +	struct amdgpu_device *adev = get_amdgpu_device(kgd); + +	uint32_t status = RREG32(mmVM_CONTEXT1_PROTECTION_FAULT_STATUS); + +	return REG_GET_FIELD(status, VM_CONTEXT1_PROTECTION_FAULT_STATUS, VMID); +} diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c index 19dd665e7307..f6e53e9352bd 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c @@ -176,6 +176,9 @@ static const struct kfd2kgd_calls kfd2kgd = {  	.invalidate_tlbs = invalidate_tlbs,  	.invalidate_tlbs_vmid = invalidate_tlbs_vmid,  	.submit_ib = amdgpu_amdkfd_submit_ib, +	.get_vm_fault_info = amdgpu_amdkfd_gpuvm_get_vm_fault_info, +	.gpu_recover = amdgpu_amdkfd_gpu_reset, +	.set_compute_idle = amdgpu_amdkfd_set_compute_idle  };  struct kfd2kgd_calls *amdgpu_amdkfd_gfx_8_0_get_functions(void) @@ -568,6 +571,9 @@ static int kgd_hqd_destroy(struct kgd_dev *kgd, void *mqd,  	int retry;  	struct vi_mqd *m = get_mqd(mqd); +	if (adev->in_gpu_reset) +		return -EIO; +  	acquire_queue(kgd, pipe_id, queue_id);  	if (m->cp_hqd_vmid == 0) @@ -844,6 +850,9 @@ static int invalidate_tlbs(struct kgd_dev *kgd, uint16_t pasid)  	int vmid;  	unsigned int tmp; +	if (adev->in_gpu_reset) +		return -EIO; +  	for (vmid = 0; vmid < 16; vmid++) {  		if (!amdgpu_amdkfd_is_kfd_vmid(adev, vmid))  			continue; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c index 1db60aa5b7f0..8efedfcb9dfc 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c @@ -213,6 +213,8 @@ static const struct kfd2kgd_calls kfd2kgd = {  	.invalidate_tlbs = invalidate_tlbs,  	.invalidate_tlbs_vmid = invalidate_tlbs_vmid,  	.submit_ib = amdgpu_amdkfd_submit_ib, +	.gpu_recover = amdgpu_amdkfd_gpu_reset, +	.set_compute_idle = amdgpu_amdkfd_set_compute_idle  };  struct kfd2kgd_calls *amdgpu_amdkfd_gfx_9_0_get_functions(void) @@ -679,6 +681,9 @@ static int kgd_hqd_destroy(struct kgd_dev *kgd, void *mqd,  	uint32_t temp;  	struct v9_mqd *m = get_mqd(mqd); +	if (adev->in_gpu_reset) +		return -EIO; +  	acquire_queue(kgd, pipe_id, queue_id);  	if (m->cp_hqd_vmid == 0) @@ -866,6 +871,9 @@ static int invalidate_tlbs(struct kgd_dev *kgd, uint16_t pasid)  	int vmid;  	struct amdgpu_ring *ring = &adev->gfx.kiq.ring; +	if (adev->in_gpu_reset) +		return -EIO; +  	if (ring->ready)  		return invalidate_tlbs_with_kiq(adev, pasid); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c index ff8fd75f7ca5..8a707d8bbb1c 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c @@ -334,7 +334,7 @@ static int amdgpu_amdkfd_bo_validate(struct amdgpu_bo *bo, uint32_t domain,  		 "Called with userptr BO"))  		return -EINVAL; -	amdgpu_ttm_placement_from_domain(bo, domain); +	amdgpu_bo_placement_from_domain(bo, domain);  	ret = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);  	if (ret) @@ -622,7 +622,7 @@ static int init_user_pages(struct kgd_mem *mem, struct mm_struct *mm,  		pr_err("%s: Failed to reserve BO\n", __func__);  		goto release_out;  	} -	amdgpu_ttm_placement_from_domain(bo, mem->domain); +	amdgpu_bo_placement_from_domain(bo, mem->domain);  	ret = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);  	if (ret)  		pr_err("%s: failed to validate BO\n", __func__); @@ -1587,7 +1587,7 @@ int amdgpu_amdkfd_gpuvm_map_gtt_bo_to_kernel(struct kgd_dev *kgd,  		goto bo_reserve_failed;  	} -	ret = amdgpu_bo_pin(bo, AMDGPU_GEM_DOMAIN_GTT, NULL); +	ret = amdgpu_bo_pin(bo, AMDGPU_GEM_DOMAIN_GTT);  	if (ret) {  		pr_err("Failed to pin bo. ret %d\n", ret);  		goto pin_failed; @@ -1621,6 +1621,20 @@ bo_reserve_failed:  	return ret;  } +int amdgpu_amdkfd_gpuvm_get_vm_fault_info(struct kgd_dev *kgd, +					      struct kfd_vm_fault_info *mem) +{ +	struct amdgpu_device *adev; + +	adev = (struct amdgpu_device *)kgd; +	if (atomic_read(&adev->gmc.vm_fault_info_updated) == 1) { +		*mem = *adev->gmc.vm_fault_info; +		mb(); +		atomic_set(&adev->gmc.vm_fault_info_updated, 0); +	} +	return 0; +} +  /* Evict a userptr BO by stopping the queues if necessary   *   * Runs in MMU notifier, may be in RECLAIM_FS context. This means it @@ -1680,7 +1694,7 @@ static int update_invalid_user_pages(struct amdkfd_process_info *process_info,  		if (amdgpu_bo_reserve(bo, true))  			return -EAGAIN; -		amdgpu_ttm_placement_from_domain(bo, AMDGPU_GEM_DOMAIN_CPU); +		amdgpu_bo_placement_from_domain(bo, AMDGPU_GEM_DOMAIN_CPU);  		ret = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);  		amdgpu_bo_unreserve(bo);  		if (ret) { @@ -1824,7 +1838,7 @@ static int validate_invalid_user_pages(struct amdkfd_process_info *process_info)  		if (mem->user_pages[0]) {  			amdgpu_ttm_tt_set_user_pages(bo->tbo.ttm,  						     mem->user_pages); -			amdgpu_ttm_placement_from_domain(bo, mem->domain); +			amdgpu_bo_placement_from_domain(bo, mem->domain);  			ret = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);  			if (ret) {  				pr_err("%s: failed to validate BO\n", __func__); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_atpx_handler.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_atpx_handler.c index ca8bf1c9a98e..a028661d9e20 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_atpx_handler.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_atpx_handler.c @@ -32,7 +32,7 @@ struct amdgpu_atpx_functions {  	bool switch_start;  	bool switch_end;  	bool disp_connectors_mapping; -	bool disp_detetion_ports; +	bool disp_detection_ports;  };  struct amdgpu_atpx { @@ -162,7 +162,7 @@ static void amdgpu_atpx_parse_functions(struct amdgpu_atpx_functions *f, u32 mas  	f->switch_start = mask & ATPX_GRAPHICS_DEVICE_SWITCH_START_NOTIFICATION_SUPPORTED;  	f->switch_end = mask & ATPX_GRAPHICS_DEVICE_SWITCH_END_NOTIFICATION_SUPPORTED;  	f->disp_connectors_mapping = mask & ATPX_GET_DISPLAY_CONNECTORS_MAPPING_SUPPORTED; -	f->disp_detetion_ports = mask & ATPX_GET_DISPLAY_DETECTION_PORTS_SUPPORTED; +	f->disp_detection_ports = mask & ATPX_GET_DISPLAY_DETECTION_PORTS_SUPPORTED;  }  /** diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_benchmark.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_benchmark.c index 19cfff31f2e1..3079ea8523c5 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_benchmark.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_benchmark.c @@ -95,11 +95,17 @@ static void amdgpu_benchmark_move(struct amdgpu_device *adev, unsigned size,  	r = amdgpu_bo_reserve(sobj, false);  	if (unlikely(r != 0))  		goto out_cleanup; -	r = amdgpu_bo_pin(sobj, sdomain, &saddr); +	r = amdgpu_bo_pin(sobj, sdomain); +	if (r) { +		amdgpu_bo_unreserve(sobj); +		goto out_cleanup; +	} +	r = amdgpu_ttm_alloc_gart(&sobj->tbo);  	amdgpu_bo_unreserve(sobj);  	if (r) {  		goto out_cleanup;  	} +	saddr = amdgpu_bo_gpu_offset(sobj);  	bp.domain = ddomain;  	r = amdgpu_bo_create(adev, &bp, &dobj);  	if (r) { @@ -108,11 +114,17 @@ static void amdgpu_benchmark_move(struct amdgpu_device *adev, unsigned size,  	r = amdgpu_bo_reserve(dobj, false);  	if (unlikely(r != 0))  		goto out_cleanup; -	r = amdgpu_bo_pin(dobj, ddomain, &daddr); +	r = amdgpu_bo_pin(dobj, ddomain); +	if (r) { +		amdgpu_bo_unreserve(sobj); +		goto out_cleanup; +	} +	r = amdgpu_ttm_alloc_gart(&dobj->tbo);  	amdgpu_bo_unreserve(dobj);  	if (r) {  		goto out_cleanup;  	} +	daddr = amdgpu_bo_gpu_offset(dobj);  	if (adev->mman.buffer_funcs) {  		time = amdgpu_benchmark_do_move(adev, size, saddr, daddr, n); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c index 92be7f6de197..d472a2c8399f 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c @@ -35,92 +35,53 @@  #define AMDGPU_BO_LIST_MAX_PRIORITY	32u  #define AMDGPU_BO_LIST_NUM_BUCKETS	(AMDGPU_BO_LIST_MAX_PRIORITY + 1) -static int amdgpu_bo_list_set(struct amdgpu_device *adev, -				     struct drm_file *filp, -				     struct amdgpu_bo_list *list, -				     struct drm_amdgpu_bo_list_entry *info, -				     unsigned num_entries); +static void amdgpu_bo_list_free_rcu(struct rcu_head *rcu) +{ +	struct amdgpu_bo_list *list = container_of(rcu, struct amdgpu_bo_list, +						   rhead); + +	kvfree(list); +} -static void amdgpu_bo_list_release_rcu(struct kref *ref) +static void amdgpu_bo_list_free(struct kref *ref)  { -	unsigned i;  	struct amdgpu_bo_list *list = container_of(ref, struct amdgpu_bo_list,  						   refcount); +	struct amdgpu_bo_list_entry *e; -	for (i = 0; i < list->num_entries; ++i) -		amdgpu_bo_unref(&list->array[i].robj); +	amdgpu_bo_list_for_each_entry(e, list) +		amdgpu_bo_unref(&e->robj); -	mutex_destroy(&list->lock); -	kvfree(list->array); -	kfree_rcu(list, rhead); +	call_rcu(&list->rhead, amdgpu_bo_list_free_rcu);  } -static int amdgpu_bo_list_create(struct amdgpu_device *adev, -				 struct drm_file *filp, -				 struct drm_amdgpu_bo_list_entry *info, -				 unsigned num_entries, -				 int *id) +int amdgpu_bo_list_create(struct amdgpu_device *adev, struct drm_file *filp, +			  struct drm_amdgpu_bo_list_entry *info, +			  unsigned num_entries, struct amdgpu_bo_list **result)  { -	int r; -	struct amdgpu_fpriv *fpriv = filp->driver_priv; +	unsigned last_entry = 0, first_userptr = num_entries; +	struct amdgpu_bo_list_entry *array;  	struct amdgpu_bo_list *list; +	uint64_t total_size = 0; +	size_t size; +	unsigned i; +	int r; + +	if (num_entries > SIZE_MAX / sizeof(struct amdgpu_bo_list_entry)) +		return -EINVAL; -	list = kzalloc(sizeof(struct amdgpu_bo_list), GFP_KERNEL); +	size = sizeof(struct amdgpu_bo_list); +	size += num_entries * sizeof(struct amdgpu_bo_list_entry); +	list = kvmalloc(size, GFP_KERNEL);  	if (!list)  		return -ENOMEM; -	/* initialize bo list*/ -	mutex_init(&list->lock);  	kref_init(&list->refcount); -	r = amdgpu_bo_list_set(adev, filp, list, info, num_entries); -	if (r) { -		kfree(list); -		return r; -	} - -	/* idr alloc should be called only after initialization of bo list. */ -	mutex_lock(&fpriv->bo_list_lock); -	r = idr_alloc(&fpriv->bo_list_handles, list, 1, 0, GFP_KERNEL); -	mutex_unlock(&fpriv->bo_list_lock); -	if (r < 0) { -		amdgpu_bo_list_free(list); -		return r; -	} -	*id = r; - -	return 0; -} - -static void amdgpu_bo_list_destroy(struct amdgpu_fpriv *fpriv, int id) -{ -	struct amdgpu_bo_list *list; - -	mutex_lock(&fpriv->bo_list_lock); -	list = idr_remove(&fpriv->bo_list_handles, id); -	mutex_unlock(&fpriv->bo_list_lock); -	if (list) -		kref_put(&list->refcount, amdgpu_bo_list_release_rcu); -} - -static int amdgpu_bo_list_set(struct amdgpu_device *adev, -				     struct drm_file *filp, -				     struct amdgpu_bo_list *list, -				     struct drm_amdgpu_bo_list_entry *info, -				     unsigned num_entries) -{ -	struct amdgpu_bo_list_entry *array; -	struct amdgpu_bo *gds_obj = adev->gds.gds_gfx_bo; -	struct amdgpu_bo *gws_obj = adev->gds.gws_gfx_bo; -	struct amdgpu_bo *oa_obj = adev->gds.oa_gfx_bo; - -	unsigned last_entry = 0, first_userptr = num_entries; -	unsigned i; -	int r; -	unsigned long total_size = 0; +	list->gds_obj = adev->gds.gds_gfx_bo; +	list->gws_obj = adev->gds.gws_gfx_bo; +	list->oa_obj = adev->gds.oa_gfx_bo; -	array = kvmalloc_array(num_entries, sizeof(struct amdgpu_bo_list_entry), GFP_KERNEL); -	if (!array) -		return -ENOMEM; +	array = amdgpu_bo_list_array_entry(list, 0);  	memset(array, 0, num_entries * sizeof(struct amdgpu_bo_list_entry));  	for (i = 0; i < num_entries; ++i) { @@ -157,59 +118,56 @@ static int amdgpu_bo_list_set(struct amdgpu_device *adev,  		entry->tv.shared = !entry->robj->prime_shared_count;  		if (entry->robj->preferred_domains == AMDGPU_GEM_DOMAIN_GDS) -			gds_obj = entry->robj; +			list->gds_obj = entry->robj;  		if (entry->robj->preferred_domains == AMDGPU_GEM_DOMAIN_GWS) -			gws_obj = entry->robj; +			list->gws_obj = entry->robj;  		if (entry->robj->preferred_domains == AMDGPU_GEM_DOMAIN_OA) -			oa_obj = entry->robj; +			list->oa_obj = entry->robj;  		total_size += amdgpu_bo_size(entry->robj);  		trace_amdgpu_bo_list_set(list, entry->robj);  	} -	for (i = 0; i < list->num_entries; ++i) -		amdgpu_bo_unref(&list->array[i].robj); - -	kvfree(list->array); - -	list->gds_obj = gds_obj; -	list->gws_obj = gws_obj; -	list->oa_obj = oa_obj;  	list->first_userptr = first_userptr; -	list->array = array;  	list->num_entries = num_entries;  	trace_amdgpu_cs_bo_status(list->num_entries, total_size); + +	*result = list;  	return 0;  error_free:  	while (i--)  		amdgpu_bo_unref(&array[i].robj); -	kvfree(array); +	kvfree(list);  	return r; +  } -struct amdgpu_bo_list * -amdgpu_bo_list_get(struct amdgpu_fpriv *fpriv, int id) +static void amdgpu_bo_list_destroy(struct amdgpu_fpriv *fpriv, int id)  { -	struct amdgpu_bo_list *result; +	struct amdgpu_bo_list *list; + +	mutex_lock(&fpriv->bo_list_lock); +	list = idr_remove(&fpriv->bo_list_handles, id); +	mutex_unlock(&fpriv->bo_list_lock); +	if (list) +		kref_put(&list->refcount, amdgpu_bo_list_free); +} +int amdgpu_bo_list_get(struct amdgpu_fpriv *fpriv, int id, +		       struct amdgpu_bo_list **result) +{  	rcu_read_lock(); -	result = idr_find(&fpriv->bo_list_handles, id); +	*result = idr_find(&fpriv->bo_list_handles, id); -	if (result) { -		if (kref_get_unless_zero(&result->refcount)) { -			rcu_read_unlock(); -			mutex_lock(&result->lock); -		} else { -			rcu_read_unlock(); -			result = NULL; -		} -	} else { +	if (*result && kref_get_unless_zero(&(*result)->refcount)) {  		rcu_read_unlock(); +		return 0;  	} -	return result; +	rcu_read_unlock(); +	return -ENOENT;  }  void amdgpu_bo_list_get_list(struct amdgpu_bo_list *list, @@ -220,6 +178,7 @@ void amdgpu_bo_list_get_list(struct amdgpu_bo_list *list,  	 * concatenated in descending order.  	 */  	struct list_head bucket[AMDGPU_BO_LIST_NUM_BUCKETS]; +	struct amdgpu_bo_list_entry *e;  	unsigned i;  	for (i = 0; i < AMDGPU_BO_LIST_NUM_BUCKETS; i++) @@ -230,14 +189,13 @@ void amdgpu_bo_list_get_list(struct amdgpu_bo_list *list,  	 * in the list, the sort mustn't change the ordering of buffers  	 * with the same priority, i.e. it must be stable.  	 */ -	for (i = 0; i < list->num_entries; i++) { -		unsigned priority = list->array[i].priority; +	amdgpu_bo_list_for_each_entry(e, list) { +		unsigned priority = e->priority; -		if (!list->array[i].robj->parent) -			list_add_tail(&list->array[i].tv.head, -				      &bucket[priority]); +		if (!e->robj->parent) +			list_add_tail(&e->tv.head, &bucket[priority]); -		list->array[i].user_pages = NULL; +		e->user_pages = NULL;  	}  	/* Connect the sorted buckets in the output list. */ @@ -247,71 +205,82 @@ void amdgpu_bo_list_get_list(struct amdgpu_bo_list *list,  void amdgpu_bo_list_put(struct amdgpu_bo_list *list)  { -	mutex_unlock(&list->lock); -	kref_put(&list->refcount, amdgpu_bo_list_release_rcu); -} - -void amdgpu_bo_list_free(struct amdgpu_bo_list *list) -{ -	unsigned i; - -	for (i = 0; i < list->num_entries; ++i) -		amdgpu_bo_unref(&list->array[i].robj); - -	mutex_destroy(&list->lock); -	kvfree(list->array); -	kfree(list); +	kref_put(&list->refcount, amdgpu_bo_list_free);  } -int amdgpu_bo_list_ioctl(struct drm_device *dev, void *data, -				struct drm_file *filp) +int amdgpu_bo_create_list_entry_array(struct drm_amdgpu_bo_list_in *in, +				      struct drm_amdgpu_bo_list_entry **info_param)  { +	const void __user *uptr = u64_to_user_ptr(in->bo_info_ptr);  	const uint32_t info_size = sizeof(struct drm_amdgpu_bo_list_entry); - -	struct amdgpu_device *adev = dev->dev_private; -	struct amdgpu_fpriv *fpriv = filp->driver_priv; -	union drm_amdgpu_bo_list *args = data; -	uint32_t handle = args->in.list_handle; -	const void __user *uptr = u64_to_user_ptr(args->in.bo_info_ptr); -  	struct drm_amdgpu_bo_list_entry *info; -	struct amdgpu_bo_list *list; -  	int r; -	info = kvmalloc_array(args->in.bo_number, -			     sizeof(struct drm_amdgpu_bo_list_entry), GFP_KERNEL); +	info = kvmalloc_array(in->bo_number, info_size, GFP_KERNEL);  	if (!info)  		return -ENOMEM;  	/* copy the handle array from userspace to a kernel buffer */  	r = -EFAULT; -	if (likely(info_size == args->in.bo_info_size)) { -		unsigned long bytes = args->in.bo_number * -			args->in.bo_info_size; +	if (likely(info_size == in->bo_info_size)) { +		unsigned long bytes = in->bo_number * +			in->bo_info_size;  		if (copy_from_user(info, uptr, bytes))  			goto error_free;  	} else { -		unsigned long bytes = min(args->in.bo_info_size, info_size); +		unsigned long bytes = min(in->bo_info_size, info_size);  		unsigned i; -		memset(info, 0, args->in.bo_number * info_size); -		for (i = 0; i < args->in.bo_number; ++i) { +		memset(info, 0, in->bo_number * info_size); +		for (i = 0; i < in->bo_number; ++i) {  			if (copy_from_user(&info[i], uptr, bytes))  				goto error_free; -			uptr += args->in.bo_info_size; +			uptr += in->bo_info_size;  		}  	} +	*info_param = info; +	return 0; + +error_free: +	kvfree(info); +	return r; +} + +int amdgpu_bo_list_ioctl(struct drm_device *dev, void *data, +				struct drm_file *filp) +{ +	struct amdgpu_device *adev = dev->dev_private; +	struct amdgpu_fpriv *fpriv = filp->driver_priv; +	union drm_amdgpu_bo_list *args = data; +	uint32_t handle = args->in.list_handle; +	struct drm_amdgpu_bo_list_entry *info = NULL; +	struct amdgpu_bo_list *list, *old; +	int r; + +	r = amdgpu_bo_create_list_entry_array(&args->in, &info); +	if (r) +		goto error_free; +  	switch (args->in.operation) {  	case AMDGPU_BO_LIST_OP_CREATE:  		r = amdgpu_bo_list_create(adev, filp, info, args->in.bo_number, -					  &handle); +					  &list);  		if (r)  			goto error_free; + +		mutex_lock(&fpriv->bo_list_lock); +		r = idr_alloc(&fpriv->bo_list_handles, list, 1, 0, GFP_KERNEL); +		mutex_unlock(&fpriv->bo_list_lock); +		if (r < 0) { +			amdgpu_bo_list_put(list); +			return r; +		} + +		handle = r;  		break;  	case AMDGPU_BO_LIST_OP_DESTROY: @@ -320,17 +289,22 @@ int amdgpu_bo_list_ioctl(struct drm_device *dev, void *data,  		break;  	case AMDGPU_BO_LIST_OP_UPDATE: -		r = -ENOENT; -		list = amdgpu_bo_list_get(fpriv, handle); -		if (!list) +		r = amdgpu_bo_list_create(adev, filp, info, args->in.bo_number, +					  &list); +		if (r)  			goto error_free; -		r = amdgpu_bo_list_set(adev, filp, list, info, -					      args->in.bo_number); -		amdgpu_bo_list_put(list); -		if (r) +		mutex_lock(&fpriv->bo_list_lock); +		old = idr_replace(&fpriv->bo_list_handles, list, handle); +		mutex_unlock(&fpriv->bo_list_lock); + +		if (IS_ERR(old)) { +			amdgpu_bo_list_put(list); +			r = PTR_ERR(old);  			goto error_free; +		} +		amdgpu_bo_list_put(old);  		break;  	default: @@ -345,6 +319,7 @@ int amdgpu_bo_list_ioctl(struct drm_device *dev, void *data,  	return 0;  error_free: -	kvfree(info); +	if (info) +		kvfree(info);  	return r;  } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.h new file mode 100644 index 000000000000..61b089768e1c --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.h @@ -0,0 +1,85 @@ +/* + * Copyright 2018 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ +#ifndef __AMDGPU_BO_LIST_H__ +#define __AMDGPU_BO_LIST_H__ + +#include <drm/ttm/ttm_execbuf_util.h> +#include <drm/amdgpu_drm.h> + +struct amdgpu_device; +struct amdgpu_bo; +struct amdgpu_bo_va; +struct amdgpu_fpriv; + +struct amdgpu_bo_list_entry { +	struct amdgpu_bo		*robj; +	struct ttm_validate_buffer	tv; +	struct amdgpu_bo_va		*bo_va; +	uint32_t			priority; +	struct page			**user_pages; +	int				user_invalidated; +}; + +struct amdgpu_bo_list { +	struct rcu_head rhead; +	struct kref refcount; +	struct amdgpu_bo *gds_obj; +	struct amdgpu_bo *gws_obj; +	struct amdgpu_bo *oa_obj; +	unsigned first_userptr; +	unsigned num_entries; +}; + +int amdgpu_bo_list_get(struct amdgpu_fpriv *fpriv, int id, +		       struct amdgpu_bo_list **result); +void amdgpu_bo_list_get_list(struct amdgpu_bo_list *list, +			     struct list_head *validated); +void amdgpu_bo_list_put(struct amdgpu_bo_list *list); +int amdgpu_bo_create_list_entry_array(struct drm_amdgpu_bo_list_in *in, +				      struct drm_amdgpu_bo_list_entry **info_param); + +int amdgpu_bo_list_create(struct amdgpu_device *adev, +				 struct drm_file *filp, +				 struct drm_amdgpu_bo_list_entry *info, +				 unsigned num_entries, +				 struct amdgpu_bo_list **list); + +static inline struct amdgpu_bo_list_entry * +amdgpu_bo_list_array_entry(struct amdgpu_bo_list *list, unsigned index) +{ +	struct amdgpu_bo_list_entry *array = (void *)&list[1]; + +	return &array[index]; +} + +#define amdgpu_bo_list_for_each_entry(e, list) \ +	for (e = amdgpu_bo_list_array_entry(list, 0); \ +	     e != amdgpu_bo_list_array_entry(list, (list)->num_entries); \ +	     ++e) + +#define amdgpu_bo_list_for_each_userptr_entry(e, list) \ +	for (e = amdgpu_bo_list_array_entry(list, (list)->first_userptr); \ +	     e != amdgpu_bo_list_array_entry(list, (list)->num_entries); \ +	     ++e) + +#endif diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c index e950730f1933..693ec5ea4950 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c @@ -314,17 +314,17 @@ static int amdgpu_cgs_get_firmware_info(struct cgs_device *cgs_device,  					(adev->pdev->revision == 0x81) ||  					(adev->pdev->device == 0x665f)) {  					info->is_kicker = true; -					strcpy(fw_name, "radeon/bonaire_k_smc.bin"); +					strcpy(fw_name, "amdgpu/bonaire_k_smc.bin");  				} else { -					strcpy(fw_name, "radeon/bonaire_smc.bin"); +					strcpy(fw_name, "amdgpu/bonaire_smc.bin");  				}  				break;  			case CHIP_HAWAII:  				if (adev->pdev->revision == 0x80) {  					info->is_kicker = true; -					strcpy(fw_name, "radeon/hawaii_k_smc.bin"); +					strcpy(fw_name, "amdgpu/hawaii_k_smc.bin");  				} else { -					strcpy(fw_name, "radeon/hawaii_smc.bin"); +					strcpy(fw_name, "amdgpu/hawaii_smc.bin");  				}  				break;  			case CHIP_TOPAZ: diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c index 8e66851eb427..c770d73352a7 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c @@ -212,30 +212,21 @@ static void  amdgpu_connector_update_scratch_regs(struct drm_connector *connector,  				      enum drm_connector_status status)  { -	struct drm_encoder *best_encoder = NULL; -	struct drm_encoder *encoder = NULL; +	struct drm_encoder *best_encoder; +	struct drm_encoder *encoder;  	const struct drm_connector_helper_funcs *connector_funcs = connector->helper_private;  	bool connected;  	int i;  	best_encoder = connector_funcs->best_encoder(connector); -	for (i = 0; i < DRM_CONNECTOR_MAX_ENCODER; i++) { -		if (connector->encoder_ids[i] == 0) -			break; - -		encoder = drm_encoder_find(connector->dev, NULL, -					connector->encoder_ids[i]); -		if (!encoder) -			continue; - +	drm_connector_for_each_possible_encoder(connector, encoder, i) {  		if ((encoder == best_encoder) && (status == connector_status_connected))  			connected = true;  		else  			connected = false;  		amdgpu_atombios_encoder_set_bios_scratch_regs(connector, encoder, connected); -  	}  } @@ -246,17 +237,11 @@ amdgpu_connector_find_encoder(struct drm_connector *connector,  	struct drm_encoder *encoder;  	int i; -	for (i = 0; i < DRM_CONNECTOR_MAX_ENCODER; i++) { -		if (connector->encoder_ids[i] == 0) -			break; -		encoder = drm_encoder_find(connector->dev, NULL, -					connector->encoder_ids[i]); -		if (!encoder) -			continue; - +	drm_connector_for_each_possible_encoder(connector, encoder, i) {  		if (encoder->encoder_type == encoder_type)  			return encoder;  	} +  	return NULL;  } @@ -349,22 +334,24 @@ static int amdgpu_connector_ddc_get_modes(struct drm_connector *connector)  	int ret;  	if (amdgpu_connector->edid) { -		drm_mode_connector_update_edid_property(connector, amdgpu_connector->edid); +		drm_connector_update_edid_property(connector, amdgpu_connector->edid);  		ret = drm_add_edid_modes(connector, amdgpu_connector->edid);  		return ret;  	} -	drm_mode_connector_update_edid_property(connector, NULL); +	drm_connector_update_edid_property(connector, NULL);  	return 0;  }  static struct drm_encoder *  amdgpu_connector_best_single_encoder(struct drm_connector *connector)  { -	int enc_id = connector->encoder_ids[0]; +	struct drm_encoder *encoder; +	int i; + +	/* pick the first one */ +	drm_connector_for_each_possible_encoder(connector, encoder, i) +		return encoder; -	/* pick the encoder ids */ -	if (enc_id) -		return drm_encoder_find(connector->dev, NULL, enc_id);  	return NULL;  } @@ -985,9 +972,8 @@ amdgpu_connector_dvi_detect(struct drm_connector *connector, bool force)  	struct drm_device *dev = connector->dev;  	struct amdgpu_device *adev = dev->dev_private;  	struct amdgpu_connector *amdgpu_connector = to_amdgpu_connector(connector); -	struct drm_encoder *encoder = NULL;  	const struct drm_encoder_helper_funcs *encoder_funcs; -	int i, r; +	int r;  	enum drm_connector_status ret = connector_status_disconnected;  	bool dret = false, broken_edid = false; @@ -1077,14 +1063,10 @@ amdgpu_connector_dvi_detect(struct drm_connector *connector, bool force)  	/* find analog encoder */  	if (amdgpu_connector->dac_load_detect) { -		for (i = 0; i < DRM_CONNECTOR_MAX_ENCODER; i++) { -			if (connector->encoder_ids[i] == 0) -				break; - -			encoder = drm_encoder_find(connector->dev, NULL, connector->encoder_ids[i]); -			if (!encoder) -				continue; +		struct drm_encoder *encoder; +		int i; +		drm_connector_for_each_possible_encoder(connector, encoder, i) {  			if (encoder->encoder_type != DRM_MODE_ENCODER_DAC &&  			    encoder->encoder_type != DRM_MODE_ENCODER_TVDAC)  				continue; @@ -1132,18 +1114,11 @@ exit:  static struct drm_encoder *  amdgpu_connector_dvi_encoder(struct drm_connector *connector)  { -	int enc_id = connector->encoder_ids[0];  	struct amdgpu_connector *amdgpu_connector = to_amdgpu_connector(connector);  	struct drm_encoder *encoder;  	int i; -	for (i = 0; i < DRM_CONNECTOR_MAX_ENCODER; i++) { -		if (connector->encoder_ids[i] == 0) -			break; - -		encoder = drm_encoder_find(connector->dev, NULL, connector->encoder_ids[i]); -		if (!encoder) -			continue; +	drm_connector_for_each_possible_encoder(connector, encoder, i) {  		if (amdgpu_connector->use_digital == true) {  			if (encoder->encoder_type == DRM_MODE_ENCODER_TMDS)  				return encoder; @@ -1158,8 +1133,9 @@ amdgpu_connector_dvi_encoder(struct drm_connector *connector)  	/* then check use digitial */  	/* pick the first one */ -	if (enc_id) -		return drm_encoder_find(connector->dev, NULL, enc_id); +	drm_connector_for_each_possible_encoder(connector, encoder, i) +		return encoder; +  	return NULL;  } @@ -1296,15 +1272,7 @@ u16 amdgpu_connector_encoder_get_dp_bridge_encoder_id(struct drm_connector *conn  	struct amdgpu_encoder *amdgpu_encoder;  	int i; -	for (i = 0; i < DRM_CONNECTOR_MAX_ENCODER; i++) { -		if (connector->encoder_ids[i] == 0) -			break; - -		encoder = drm_encoder_find(connector->dev, NULL, -					connector->encoder_ids[i]); -		if (!encoder) -			continue; - +	drm_connector_for_each_possible_encoder(connector, encoder, i) {  		amdgpu_encoder = to_amdgpu_encoder(encoder);  		switch (amdgpu_encoder->encoder_id) { @@ -1326,14 +1294,7 @@ static bool amdgpu_connector_encoder_is_hbr2(struct drm_connector *connector)  	int i;  	bool found = false; -	for (i = 0; i < DRM_CONNECTOR_MAX_ENCODER; i++) { -		if (connector->encoder_ids[i] == 0) -			break; -		encoder = drm_encoder_find(connector->dev, NULL, -					connector->encoder_ids[i]); -		if (!encoder) -			continue; - +	drm_connector_for_each_possible_encoder(connector, encoder, i) {  		amdgpu_encoder = to_amdgpu_encoder(encoder);  		if (amdgpu_encoder->caps & ATOM_ENCODER_CAP_RECORD_HBR2)  			found = true; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c index 9c85a90be293..502b94fb116a 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c @@ -31,6 +31,7 @@  #include <drm/drm_syncobj.h>  #include "amdgpu.h"  #include "amdgpu_trace.h" +#include "amdgpu_gmc.h"  static int amdgpu_cs_user_fence_chunk(struct amdgpu_cs_parser *p,  				      struct drm_amdgpu_cs_chunk_fence *data, @@ -65,11 +66,35 @@ static int amdgpu_cs_user_fence_chunk(struct amdgpu_cs_parser *p,  	return 0;  } -static int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, void *data) +static int amdgpu_cs_bo_handles_chunk(struct amdgpu_cs_parser *p, +				      struct drm_amdgpu_bo_list_in *data) +{ +	int r; +	struct drm_amdgpu_bo_list_entry *info = NULL; + +	r = amdgpu_bo_create_list_entry_array(data, &info); +	if (r) +		return r; + +	r = amdgpu_bo_list_create(p->adev, p->filp, info, data->bo_number, +				  &p->bo_list); +	if (r) +		goto error_free; + +	kvfree(info); +	return 0; + +error_free: +	if (info) +		kvfree(info); + +	return r; +} + +static int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, union drm_amdgpu_cs *cs)  {  	struct amdgpu_fpriv *fpriv = p->filp->driver_priv;  	struct amdgpu_vm *vm = &fpriv->vm; -	union drm_amdgpu_cs *cs = data;  	uint64_t *chunk_array_user;  	uint64_t *chunk_array;  	unsigned size, num_ibs = 0; @@ -163,6 +188,19 @@ static int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, void *data)  			break; +		case AMDGPU_CHUNK_ID_BO_HANDLES: +			size = sizeof(struct drm_amdgpu_bo_list_in); +			if (p->chunks[i].length_dw * sizeof(uint32_t) < size) { +				ret = -EINVAL; +				goto free_partial_kdata; +			} + +			ret = amdgpu_cs_bo_handles_chunk(p, p->chunks[i].kdata); +			if (ret) +				goto free_partial_kdata; + +			break; +  		case AMDGPU_CHUNK_ID_DEPENDENCIES:  		case AMDGPU_CHUNK_ID_SYNCOBJ_IN:  		case AMDGPU_CHUNK_ID_SYNCOBJ_OUT: @@ -186,6 +224,10 @@ static int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, void *data)  	if (p->uf_entry.robj)  		p->job->uf_addr = uf_offset;  	kfree(chunk_array); + +	/* Use this opportunity to fill in task info for the vm */ +	amdgpu_vm_set_task_info(vm); +  	return 0;  free_all_kdata: @@ -257,7 +299,7 @@ static void amdgpu_cs_get_threshold_for_moves(struct amdgpu_device *adev,  		return;  	} -	total_vram = adev->gmc.real_vram_size - adev->vram_pin_size; +	total_vram = adev->gmc.real_vram_size - atomic64_read(&adev->vram_pin_size);  	used_vram = amdgpu_vram_mgr_usage(&adev->mman.bdev.man[TTM_PL_VRAM]);  	free_vram = used_vram >= total_vram ? 0 : total_vram - used_vram; @@ -302,7 +344,7 @@ static void amdgpu_cs_get_threshold_for_moves(struct amdgpu_device *adev,  	*max_bytes = us_to_bytes(adev, adev->mm_stats.accum_us);  	/* Do the same for visible VRAM if half of it is free */ -	if (adev->gmc.visible_vram_size < adev->gmc.real_vram_size) { +	if (!amdgpu_gmc_vram_full_visible(&adev->gmc)) {  		u64 total_vis_vram = adev->gmc.visible_vram_size;  		u64 used_vis_vram =  			amdgpu_vram_mgr_vis_usage(&adev->mman.bdev.man[TTM_PL_VRAM]); @@ -359,7 +401,7 @@ static int amdgpu_cs_bo_validate(struct amdgpu_cs_parser *p,  	 * to move it. Don't move anything if the threshold is zero.  	 */  	if (p->bytes_moved < p->bytes_moved_threshold) { -		if (adev->gmc.visible_vram_size < adev->gmc.real_vram_size && +		if (!amdgpu_gmc_vram_full_visible(&adev->gmc) &&  		    (bo->flags & AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED)) {  			/* And don't move a CPU_ACCESS_REQUIRED BO to limited  			 * visible VRAM if we've depleted our allowance to do @@ -377,11 +419,11 @@ static int amdgpu_cs_bo_validate(struct amdgpu_cs_parser *p,  	}  retry: -	amdgpu_ttm_placement_from_domain(bo, domain); +	amdgpu_bo_placement_from_domain(bo, domain);  	r = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);  	p->bytes_moved += ctx.bytes_moved; -	if (adev->gmc.visible_vram_size < adev->gmc.real_vram_size && +	if (!amdgpu_gmc_vram_full_visible(&adev->gmc) &&  	    amdgpu_bo_in_cpu_visible_vram(bo))  		p->bytes_moved_vis += ctx.bytes_moved; @@ -434,9 +476,9 @@ static bool amdgpu_cs_try_evict(struct amdgpu_cs_parser *p,  		/* Good we can try to move this BO somewhere else */  		update_bytes_moved_vis = -			adev->gmc.visible_vram_size < adev->gmc.real_vram_size && -			amdgpu_bo_in_cpu_visible_vram(bo); -		amdgpu_ttm_placement_from_domain(bo, other); +				!amdgpu_gmc_vram_full_visible(&adev->gmc) && +				amdgpu_bo_in_cpu_visible_vram(bo); +		amdgpu_bo_placement_from_domain(bo, other);  		r = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);  		p->bytes_moved += ctx.bytes_moved;  		if (update_bytes_moved_vis) @@ -490,8 +532,8 @@ static int amdgpu_cs_list_validate(struct amdgpu_cs_parser *p,  		/* Check if we have user pages and nobody bound the BO already */  		if (amdgpu_ttm_tt_userptr_needs_pages(bo->tbo.ttm) &&  		    lobj->user_pages) { -			amdgpu_ttm_placement_from_domain(bo, -							 AMDGPU_GEM_DOMAIN_CPU); +			amdgpu_bo_placement_from_domain(bo, +							AMDGPU_GEM_DOMAIN_CPU);  			r = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);  			if (r)  				return r; @@ -519,23 +561,38 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p,  				union drm_amdgpu_cs *cs)  {  	struct amdgpu_fpriv *fpriv = p->filp->driver_priv; +	struct amdgpu_vm *vm = &fpriv->vm;  	struct amdgpu_bo_list_entry *e;  	struct list_head duplicates; -	unsigned i, tries = 10;  	struct amdgpu_bo *gds;  	struct amdgpu_bo *gws;  	struct amdgpu_bo *oa; +	unsigned tries = 10;  	int r;  	INIT_LIST_HEAD(&p->validated); -	p->bo_list = amdgpu_bo_list_get(fpriv, cs->in.bo_list_handle); -	if (p->bo_list) { -		amdgpu_bo_list_get_list(p->bo_list, &p->validated); -		if (p->bo_list->first_userptr != p->bo_list->num_entries) -			p->mn = amdgpu_mn_get(p->adev, AMDGPU_MN_TYPE_GFX); +	/* p->bo_list could already be assigned if AMDGPU_CHUNK_ID_BO_HANDLES is present */ +	if (cs->in.bo_list_handle) { +		if (p->bo_list) +			return -EINVAL; + +		r = amdgpu_bo_list_get(fpriv, cs->in.bo_list_handle, +				       &p->bo_list); +		if (r) +			return r; +	} else if (!p->bo_list) { +		/* Create a empty bo_list when no handle is provided */ +		r = amdgpu_bo_list_create(p->adev, p->filp, NULL, 0, +					  &p->bo_list); +		if (r) +			return r;  	} +	amdgpu_bo_list_get_list(p->bo_list, &p->validated); +	if (p->bo_list->first_userptr != p->bo_list->num_entries) +		p->mn = amdgpu_mn_get(p->adev, AMDGPU_MN_TYPE_GFX); +  	INIT_LIST_HEAD(&duplicates);  	amdgpu_vm_get_pd_bo(&fpriv->vm, &p->validated, &p->vm_pd); @@ -544,7 +601,6 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p,  	while (1) {  		struct list_head need_pages; -		unsigned i;  		r = ttm_eu_reserve_buffers(&p->ticket, &p->validated, true,  					   &duplicates); @@ -554,17 +610,9 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p,  			goto error_free_pages;  		} -		/* Without a BO list we don't have userptr BOs */ -		if (!p->bo_list) -			break; -  		INIT_LIST_HEAD(&need_pages); -		for (i = p->bo_list->first_userptr; -		     i < p->bo_list->num_entries; ++i) { -			struct amdgpu_bo *bo; - -			e = &p->bo_list->array[i]; -			bo = e->robj; +		amdgpu_bo_list_for_each_userptr_entry(e, p->bo_list) { +			struct amdgpu_bo *bo = e->robj;  			if (amdgpu_ttm_tt_userptr_invalidated(bo->tbo.ttm,  				 &e->user_invalidated) && e->user_pages) { @@ -656,23 +704,12 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p,  	amdgpu_cs_report_moved_bytes(p->adev, p->bytes_moved,  				     p->bytes_moved_vis); -	if (p->bo_list) { -		struct amdgpu_vm *vm = &fpriv->vm; -		unsigned i; +	gds = p->bo_list->gds_obj; +	gws = p->bo_list->gws_obj; +	oa = p->bo_list->oa_obj; -		gds = p->bo_list->gds_obj; -		gws = p->bo_list->gws_obj; -		oa = p->bo_list->oa_obj; -		for (i = 0; i < p->bo_list->num_entries; i++) { -			struct amdgpu_bo *bo = p->bo_list->array[i].robj; - -			p->bo_list->array[i].bo_va = amdgpu_vm_bo_find(vm, bo); -		} -	} else { -		gds = p->adev->gds.gds_gfx_bo; -		gws = p->adev->gds.gws_gfx_bo; -		oa = p->adev->gds.oa_gfx_bo; -	} +	amdgpu_bo_list_for_each_entry(e, p->bo_list) +		e->bo_va = amdgpu_vm_bo_find(vm, e->robj);  	if (gds) {  		p->job->gds_base = amdgpu_bo_gpu_offset(gds); @@ -700,18 +737,13 @@ error_validate:  error_free_pages: -	if (p->bo_list) { -		for (i = p->bo_list->first_userptr; -		     i < p->bo_list->num_entries; ++i) { -			e = &p->bo_list->array[i]; - -			if (!e->user_pages) -				continue; +	amdgpu_bo_list_for_each_userptr_entry(e, p->bo_list) { +		if (!e->user_pages) +			continue; -			release_pages(e->user_pages, -				      e->robj->tbo.ttm->num_pages); -			kvfree(e->user_pages); -		} +		release_pages(e->user_pages, +			      e->robj->tbo.ttm->num_pages); +		kvfree(e->user_pages);  	}  	return r; @@ -773,12 +805,13 @@ static void amdgpu_cs_parser_fini(struct amdgpu_cs_parser *parser, int error,  static int amdgpu_bo_vm_update_pte(struct amdgpu_cs_parser *p)  { -	struct amdgpu_device *adev = p->adev;  	struct amdgpu_fpriv *fpriv = p->filp->driver_priv; +	struct amdgpu_device *adev = p->adev;  	struct amdgpu_vm *vm = &fpriv->vm; +	struct amdgpu_bo_list_entry *e;  	struct amdgpu_bo_va *bo_va;  	struct amdgpu_bo *bo; -	int i, r; +	int r;  	r = amdgpu_vm_clear_freed(adev, vm, NULL);  	if (r) @@ -808,29 +841,26 @@ static int amdgpu_bo_vm_update_pte(struct amdgpu_cs_parser *p)  			return r;  	} -	if (p->bo_list) { -		for (i = 0; i < p->bo_list->num_entries; i++) { -			struct dma_fence *f; - -			/* ignore duplicates */ -			bo = p->bo_list->array[i].robj; -			if (!bo) -				continue; +	amdgpu_bo_list_for_each_entry(e, p->bo_list) { +		struct dma_fence *f; -			bo_va = p->bo_list->array[i].bo_va; -			if (bo_va == NULL) -				continue; +		/* ignore duplicates */ +		bo = e->robj; +		if (!bo) +			continue; -			r = amdgpu_vm_bo_update(adev, bo_va, false); -			if (r) -				return r; +		bo_va = e->bo_va; +		if (bo_va == NULL) +			continue; -			f = bo_va->last_pt_update; -			r = amdgpu_sync_fence(adev, &p->job->sync, f, false); -			if (r) -				return r; -		} +		r = amdgpu_vm_bo_update(adev, bo_va, false); +		if (r) +			return r; +		f = bo_va->last_pt_update; +		r = amdgpu_sync_fence(adev, &p->job->sync, f, false); +		if (r) +			return r;  	}  	r = amdgpu_vm_handle_moved(adev, vm); @@ -845,15 +875,14 @@ static int amdgpu_bo_vm_update_pte(struct amdgpu_cs_parser *p)  	if (r)  		return r; -	if (amdgpu_vm_debug && p->bo_list) { +	if (amdgpu_vm_debug) {  		/* Invalidate all BOs to test for userspace bugs */ -		for (i = 0; i < p->bo_list->num_entries; i++) { +		amdgpu_bo_list_for_each_entry(e, p->bo_list) {  			/* ignore duplicates */ -			bo = p->bo_list->array[i].robj; -			if (!bo) +			if (!e->robj)  				continue; -			amdgpu_vm_bo_invalidate(adev, bo, false); +			amdgpu_vm_bo_invalidate(adev, e->robj, false);  		}  	} @@ -865,11 +894,11 @@ static int amdgpu_cs_ib_vm_chunk(struct amdgpu_device *adev,  {  	struct amdgpu_fpriv *fpriv = p->filp->driver_priv;  	struct amdgpu_vm *vm = &fpriv->vm; -	struct amdgpu_ring *ring = p->job->ring; +	struct amdgpu_ring *ring = p->ring;  	int r;  	/* Only for UVD/VCE VM emulation */ -	if (p->job->ring->funcs->parse_cs) { +	if (p->ring->funcs->parse_cs || p->ring->funcs->patch_cs_in_place) {  		unsigned i, j;  		for (i = 0, j = 0; i < p->nchunks && j < p->job->num_ibs; i++) { @@ -910,12 +939,20 @@ static int amdgpu_cs_ib_vm_chunk(struct amdgpu_device *adev,  			offset = m->start * AMDGPU_GPU_PAGE_SIZE;  			kptr += va_start - offset; -			memcpy(ib->ptr, kptr, chunk_ib->ib_bytes); -			amdgpu_bo_kunmap(aobj); - -			r = amdgpu_ring_parse_cs(ring, p, j); -			if (r) -				return r; +			if (p->ring->funcs->parse_cs) { +				memcpy(ib->ptr, kptr, chunk_ib->ib_bytes); +				amdgpu_bo_kunmap(aobj); + +				r = amdgpu_ring_parse_cs(ring, p, j); +				if (r) +					return r; +			} else { +				ib->ptr = (uint32_t *)kptr; +				r = amdgpu_ring_patch_cs_in_place(ring, p, j); +				amdgpu_bo_kunmap(aobj); +				if (r) +					return r; +			}  			j++;  		} @@ -983,10 +1020,10 @@ static int amdgpu_cs_ib_fill(struct amdgpu_device *adev,  			}  		} -		if (parser->job->ring && parser->job->ring != ring) +		if (parser->ring && parser->ring != ring)  			return -EINVAL; -		parser->job->ring = ring; +		parser->ring = ring;  		r =  amdgpu_ib_get(adev, vm,  					ring->funcs->parse_cs ? chunk_ib->ib_bytes : 0, @@ -1005,11 +1042,11 @@ static int amdgpu_cs_ib_fill(struct amdgpu_device *adev,  	/* UVD & VCE fw doesn't support user fences */  	if (parser->job->uf_addr && ( -	    parser->job->ring->funcs->type == AMDGPU_RING_TYPE_UVD || -	    parser->job->ring->funcs->type == AMDGPU_RING_TYPE_VCE)) +	    parser->ring->funcs->type == AMDGPU_RING_TYPE_UVD || +	    parser->ring->funcs->type == AMDGPU_RING_TYPE_VCE))  		return -EINVAL; -	return amdgpu_ctx_wait_prev_fence(parser->ctx, parser->job->ring->idx); +	return amdgpu_ctx_wait_prev_fence(parser->ctx, parser->ring->idx);  }  static int amdgpu_cs_process_fence_dep(struct amdgpu_cs_parser *p, @@ -1160,31 +1197,30 @@ static void amdgpu_cs_post_dependencies(struct amdgpu_cs_parser *p)  static int amdgpu_cs_submit(struct amdgpu_cs_parser *p,  			    union drm_amdgpu_cs *cs)  { -	struct amdgpu_ring *ring = p->job->ring; +	struct amdgpu_fpriv *fpriv = p->filp->driver_priv; +	struct amdgpu_ring *ring = p->ring;  	struct drm_sched_entity *entity = &p->ctx->rings[ring->idx].entity; +	enum drm_sched_priority priority; +	struct amdgpu_bo_list_entry *e;  	struct amdgpu_job *job; -	unsigned i;  	uint64_t seq;  	int r;  	amdgpu_mn_lock(p->mn); -	if (p->bo_list) { -		for (i = p->bo_list->first_userptr; -		     i < p->bo_list->num_entries; ++i) { -			struct amdgpu_bo *bo = p->bo_list->array[i].robj; - -			if (amdgpu_ttm_tt_userptr_needs_pages(bo->tbo.ttm)) { -				amdgpu_mn_unlock(p->mn); -				return -ERESTARTSYS; -			} +	amdgpu_bo_list_for_each_userptr_entry(e, p->bo_list) { +		struct amdgpu_bo *bo = e->robj; + +		if (amdgpu_ttm_tt_userptr_needs_pages(bo->tbo.ttm)) { +			amdgpu_mn_unlock(p->mn); +			return -ERESTARTSYS;  		}  	}  	job = p->job;  	p->job = NULL; -	r = drm_sched_job_init(&job->base, &ring->sched, entity, p->filp); +	r = drm_sched_job_init(&job->base, entity, p->filp);  	if (r) {  		amdgpu_job_free(job);  		amdgpu_mn_unlock(p->mn); @@ -1192,7 +1228,6 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p,  	}  	job->owner = p->filp; -	job->fence_ctx = entity->fence_context;  	p->fence = dma_fence_get(&job->base.s_fence->finished);  	r = amdgpu_ctx_add_fence(p->ctx, ring, p->fence, &seq); @@ -1210,11 +1245,15 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p,  	job->uf_sequence = seq;  	amdgpu_job_free_resources(job); -	amdgpu_ring_priority_get(job->ring, job->base.s_priority);  	trace_amdgpu_cs_ioctl(job); +	amdgpu_vm_bo_trace_cs(&fpriv->vm, &p->ticket); +	priority = job->base.s_priority;  	drm_sched_entity_push_job(&job->base, entity); +	ring = to_amdgpu_ring(entity->rq->sched); +	amdgpu_ring_priority_get(ring, priority); +  	ttm_eu_fence_buffer_objects(&p->ticket, &p->validated, p->fence);  	amdgpu_mn_unlock(p->mn); @@ -1605,7 +1644,7 @@ int amdgpu_cs_find_mapping(struct amdgpu_cs_parser *parser,  	if (!((*bo)->flags & AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS)) {  		(*bo)->flags |= AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS; -		amdgpu_ttm_placement_from_domain(*bo, (*bo)->allowed_domains); +		amdgpu_bo_placement_from_domain(*bo, (*bo)->allowed_domains);  		r = ttm_bo_validate(&(*bo)->tbo, &(*bo)->placement, &ctx);  		if (r)  			return r; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c index c5bb36275e93..df6965761046 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c @@ -90,8 +90,8 @@ static int amdgpu_ctx_init(struct amdgpu_device *adev,  		if (ring == &adev->gfx.kiq.ring)  			continue; -		r = drm_sched_entity_init(&ring->sched, &ctx->rings[i].entity, -					  rq, &ctx->guilty); +		r = drm_sched_entity_init(&ctx->rings[i].entity, +					  &rq, 1, &ctx->guilty);  		if (r)  			goto failed;  	} @@ -104,8 +104,7 @@ static int amdgpu_ctx_init(struct amdgpu_device *adev,  failed:  	for (j = 0; j < i; j++) -		drm_sched_entity_fini(&adev->rings[j]->sched, -				      &ctx->rings[j].entity); +		drm_sched_entity_destroy(&ctx->rings[j].entity);  	kfree(ctx->fences);  	ctx->fences = NULL;  	return r; @@ -178,8 +177,7 @@ static void amdgpu_ctx_do_release(struct kref *ref)  		if (ctx->adev->rings[i] == &ctx->adev->gfx.kiq.ring)  			continue; -		drm_sched_entity_fini(&ctx->adev->rings[i]->sched, -			&ctx->rings[i].entity); +		drm_sched_entity_destroy(&ctx->rings[i].entity);  	}  	amdgpu_ctx_fini(ref); @@ -444,34 +442,36 @@ void amdgpu_ctx_mgr_init(struct amdgpu_ctx_mgr *mgr)  	idr_init(&mgr->ctx_handles);  } -void amdgpu_ctx_mgr_entity_fini(struct amdgpu_ctx_mgr *mgr) +void amdgpu_ctx_mgr_entity_flush(struct amdgpu_ctx_mgr *mgr)  {  	struct amdgpu_ctx *ctx;  	struct idr *idp;  	uint32_t id, i; +	long max_wait = MAX_WAIT_SCHED_ENTITY_Q_EMPTY;  	idp = &mgr->ctx_handles; +	mutex_lock(&mgr->lock);  	idr_for_each_entry(idp, ctx, id) { -		if (!ctx->adev) +		if (!ctx->adev) { +			mutex_unlock(&mgr->lock);  			return; +		}  		for (i = 0; i < ctx->adev->num_rings; i++) {  			if (ctx->adev->rings[i] == &ctx->adev->gfx.kiq.ring)  				continue; -			if (kref_read(&ctx->refcount) == 1) -				drm_sched_entity_do_release(&ctx->adev->rings[i]->sched, -						  &ctx->rings[i].entity); -			else -				DRM_ERROR("ctx %p is still alive\n", ctx); +			max_wait = drm_sched_entity_flush(&ctx->rings[i].entity, +							  max_wait);  		}  	} +	mutex_unlock(&mgr->lock);  } -void amdgpu_ctx_mgr_entity_cleanup(struct amdgpu_ctx_mgr *mgr) +void amdgpu_ctx_mgr_entity_fini(struct amdgpu_ctx_mgr *mgr)  {  	struct amdgpu_ctx *ctx;  	struct idr *idp; @@ -490,8 +490,7 @@ void amdgpu_ctx_mgr_entity_cleanup(struct amdgpu_ctx_mgr *mgr)  				continue;  			if (kref_read(&ctx->refcount) == 1) -				drm_sched_entity_cleanup(&ctx->adev->rings[i]->sched, -					&ctx->rings[i].entity); +				drm_sched_entity_fini(&ctx->rings[i].entity);  			else  				DRM_ERROR("ctx %p is still alive\n", ctx);  		} @@ -504,7 +503,7 @@ void amdgpu_ctx_mgr_fini(struct amdgpu_ctx_mgr *mgr)  	struct idr *idp;  	uint32_t id; -	amdgpu_ctx_mgr_entity_cleanup(mgr); +	amdgpu_ctx_mgr_entity_fini(mgr);  	idp = &mgr->ctx_handles; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index 2c5f093e79e3..1e66dfd0e39c 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -25,6 +25,7 @@   *          Alex Deucher   *          Jerome Glisse   */ +#include <linux/power_supply.h>  #include <linux/kthread.h>  #include <linux/console.h>  #include <linux/slab.h> @@ -675,17 +676,15 @@ void amdgpu_device_vram_location(struct amdgpu_device *adev,  }  /** - * amdgpu_device_gart_location - try to find GTT location + * amdgpu_device_gart_location - try to find GART location   *   * @adev: amdgpu device structure holding all necessary informations   * @mc: memory controller structure holding memory informations   * - * Function will place try to place GTT before or after VRAM. + * Function will place try to place GART before or after VRAM.   * - * If GTT size is bigger than space left then we ajust GTT size. + * If GART size is bigger than space left then we ajust GART size.   * Thus function will never fails. - * - * FIXME: when reducing GTT size align new size on power of 2.   */  void amdgpu_device_gart_location(struct amdgpu_device *adev,  				 struct amdgpu_gmc *mc) @@ -698,13 +697,13 @@ void amdgpu_device_gart_location(struct amdgpu_device *adev,  	size_bf = mc->vram_start;  	if (size_bf > size_af) {  		if (mc->gart_size > size_bf) { -			dev_warn(adev->dev, "limiting GTT\n"); +			dev_warn(adev->dev, "limiting GART\n");  			mc->gart_size = size_bf;  		}  		mc->gart_start = 0;  	} else {  		if (mc->gart_size > size_af) { -			dev_warn(adev->dev, "limiting GTT\n"); +			dev_warn(adev->dev, "limiting GART\n");  			mc->gart_size = size_af;  		}  		/* VCE doesn't like it when BOs cross a 4GB segment, so align @@ -713,7 +712,7 @@ void amdgpu_device_gart_location(struct amdgpu_device *adev,  		mc->gart_start = ALIGN(mc->vram_end + 1, 0x100000000ULL);  	}  	mc->gart_end = mc->gart_start + mc->gart_size - 1; -	dev_info(adev->dev, "GTT: %lluM 0x%016llX - 0x%016llX\n", +	dev_info(adev->dev, "GART: %lluM 0x%016llX - 0x%016llX\n",  			mc->gart_size >> 20, mc->gart_start, mc->gart_end);  } @@ -1077,7 +1076,7 @@ static const struct vga_switcheroo_client_ops amdgpu_switcheroo_ops = {  /**   * amdgpu_device_ip_set_clockgating_state - set the CG state   * - * @adev: amdgpu_device pointer + * @dev: amdgpu_device pointer   * @block_type: Type of hardware IP (SMU, GFX, UVD, etc.)   * @state: clockgating state (gate or ungate)   * @@ -1111,7 +1110,7 @@ int amdgpu_device_ip_set_clockgating_state(void *dev,  /**   * amdgpu_device_ip_set_powergating_state - set the PG state   * - * @adev: amdgpu_device pointer + * @dev: amdgpu_device pointer   * @block_type: Type of hardware IP (SMU, GFX, UVD, etc.)   * @state: powergating state (gate or ungate)   * @@ -1222,7 +1221,7 @@ bool amdgpu_device_ip_is_idle(struct amdgpu_device *adev,   * amdgpu_device_ip_get_ip_block - get a hw IP pointer   *   * @adev: amdgpu_device pointer - * @block_type: Type of hardware IP (SMU, GFX, UVD, etc.) + * @type: Type of hardware IP (SMU, GFX, UVD, etc.)   *   * Returns a pointer to the hardware IP block structure   * if it exists for the asic, otherwise NULL. @@ -1708,10 +1707,6 @@ static int amdgpu_device_ip_late_set_cg_state(struct amdgpu_device *adev)  	if (amdgpu_emu_mode == 1)  		return 0; -	r = amdgpu_ib_ring_tests(adev); -	if (r) -		DRM_ERROR("ib ring test failed (%d).\n", r); -  	for (i = 0; i < adev->num_ip_blocks; i++) {  		if (!adev->ip_blocks[i].status.valid)  			continue; @@ -1731,17 +1726,34 @@ static int amdgpu_device_ip_late_set_cg_state(struct amdgpu_device *adev)  		}  	} -	if (adev->powerplay.pp_feature & PP_GFXOFF_MASK) { -		/* enable gfx powergating */ -		amdgpu_device_ip_set_powergating_state(adev, -						       AMD_IP_BLOCK_TYPE_GFX, -						       AMD_PG_STATE_GATE); -		/* enable gfxoff */ -		amdgpu_device_ip_set_powergating_state(adev, -						       AMD_IP_BLOCK_TYPE_SMC, -						       AMD_PG_STATE_GATE); -	} +	return 0; +} +static int amdgpu_device_ip_late_set_pg_state(struct amdgpu_device *adev) +{ +	int i = 0, r; + +	if (amdgpu_emu_mode == 1) +		return 0; + +	for (i = 0; i < adev->num_ip_blocks; i++) { +		if (!adev->ip_blocks[i].status.valid) +			continue; +		/* skip CG for VCE/UVD, it's handled specially */ +		if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_UVD && +		    adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCE && +		    adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCN && +		    adev->ip_blocks[i].version->funcs->set_powergating_state) { +			/* enable powergating to save power */ +			r = adev->ip_blocks[i].version->funcs->set_powergating_state((void *)adev, +										     AMD_PG_STATE_GATE); +			if (r) { +				DRM_ERROR("set_powergating_state(gate) of IP block <%s> failed %d\n", +					  adev->ip_blocks[i].version->funcs->name, r); +				return r; +			} +		} +	}  	return 0;  } @@ -1775,6 +1787,9 @@ static int amdgpu_device_ip_late_init(struct amdgpu_device *adev)  		}  	} +	amdgpu_device_ip_late_set_cg_state(adev); +	amdgpu_device_ip_late_set_pg_state(adev); +  	queue_delayed_work(system_wq, &adev->late_init_work,  			   msecs_to_jiffies(AMDGPU_RESUME_MS)); @@ -1813,6 +1828,8 @@ static int amdgpu_device_ip_fini(struct amdgpu_device *adev)  					  adev->ip_blocks[i].version->funcs->name, r);  				return r;  			} +			if (adev->powerplay.pp_funcs->set_powergating_by_smu) +				amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_GFX, false);  			r = adev->ip_blocks[i].version->funcs->hw_fini((void *)adev);  			/* XXX handle errors */  			if (r) { @@ -1901,11 +1918,15 @@ static void amdgpu_device_ip_late_init_func_handler(struct work_struct *work)  {  	struct amdgpu_device *adev =  		container_of(work, struct amdgpu_device, late_init_work.work); -	amdgpu_device_ip_late_set_cg_state(adev); +	int r; + +	r = amdgpu_ib_ring_tests(adev); +	if (r) +		DRM_ERROR("ib ring test failed (%d).\n", r);  }  /** - * amdgpu_device_ip_suspend - run suspend for hardware IPs + * amdgpu_device_ip_suspend_phase1 - run suspend for hardware IPs (phase 1)   *   * @adev: amdgpu_device pointer   * @@ -1915,18 +1936,60 @@ static void amdgpu_device_ip_late_init_func_handler(struct work_struct *work)   * in each IP into a state suitable for suspend.   * Returns 0 on success, negative error code on failure.   */ -int amdgpu_device_ip_suspend(struct amdgpu_device *adev) +static int amdgpu_device_ip_suspend_phase1(struct amdgpu_device *adev)  {  	int i, r;  	if (amdgpu_sriov_vf(adev))  		amdgpu_virt_request_full_gpu(adev, false); -	/* ungate SMC block powergating */ -	if (adev->powerplay.pp_feature & PP_GFXOFF_MASK) -		amdgpu_device_ip_set_powergating_state(adev, -						       AMD_IP_BLOCK_TYPE_SMC, -						       AMD_CG_STATE_UNGATE); +	for (i = adev->num_ip_blocks - 1; i >= 0; i--) { +		if (!adev->ip_blocks[i].status.valid) +			continue; +		/* displays are handled separately */ +		if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_DCE) { +			/* ungate blocks so that suspend can properly shut them down */ +			if (adev->ip_blocks[i].version->funcs->set_clockgating_state) { +				r = adev->ip_blocks[i].version->funcs->set_clockgating_state((void *)adev, +											     AMD_CG_STATE_UNGATE); +				if (r) { +					DRM_ERROR("set_clockgating_state(ungate) of IP block <%s> failed %d\n", +						  adev->ip_blocks[i].version->funcs->name, r); +				} +			} +			/* XXX handle errors */ +			r = adev->ip_blocks[i].version->funcs->suspend(adev); +			/* XXX handle errors */ +			if (r) { +				DRM_ERROR("suspend of IP block <%s> failed %d\n", +					  adev->ip_blocks[i].version->funcs->name, r); +			} +		} +	} + +	if (amdgpu_sriov_vf(adev)) +		amdgpu_virt_release_full_gpu(adev, false); + +	return 0; +} + +/** + * amdgpu_device_ip_suspend_phase2 - run suspend for hardware IPs (phase 2) + * + * @adev: amdgpu_device pointer + * + * Main suspend function for hardware IPs.  The list of all the hardware + * IPs that make up the asic is walked, clockgating is disabled and the + * suspend callbacks are run.  suspend puts the hardware and software state + * in each IP into a state suitable for suspend. + * Returns 0 on success, negative error code on failure. + */ +static int amdgpu_device_ip_suspend_phase2(struct amdgpu_device *adev) +{ +	int i, r; + +	if (amdgpu_sriov_vf(adev)) +		amdgpu_virt_request_full_gpu(adev, false);  	/* ungate SMC block first */  	r = amdgpu_device_ip_set_clockgating_state(adev, AMD_IP_BLOCK_TYPE_SMC, @@ -1935,9 +1998,16 @@ int amdgpu_device_ip_suspend(struct amdgpu_device *adev)  		DRM_ERROR("set_clockgating_state(ungate) SMC failed %d\n", r);  	} +	/* call smu to disable gfx off feature first when suspend */ +	if (adev->powerplay.pp_funcs->set_powergating_by_smu) +		amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_GFX, false); +  	for (i = adev->num_ip_blocks - 1; i >= 0; i--) {  		if (!adev->ip_blocks[i].status.valid)  			continue; +		/* displays are handled in phase1 */ +		if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_DCE) +			continue;  		/* ungate blocks so that suspend can properly shut them down */  		if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_SMC &&  			adev->ip_blocks[i].version->funcs->set_clockgating_state) { @@ -1963,6 +2033,29 @@ int amdgpu_device_ip_suspend(struct amdgpu_device *adev)  	return 0;  } +/** + * amdgpu_device_ip_suspend - run suspend for hardware IPs + * + * @adev: amdgpu_device pointer + * + * Main suspend function for hardware IPs.  The list of all the hardware + * IPs that make up the asic is walked, clockgating is disabled and the + * suspend callbacks are run.  suspend puts the hardware and software state + * in each IP into a state suitable for suspend. + * Returns 0 on success, negative error code on failure. + */ +int amdgpu_device_ip_suspend(struct amdgpu_device *adev) +{ +	int r; + +	r = amdgpu_device_ip_suspend_phase1(adev); +	if (r) +		return r; +	r = amdgpu_device_ip_suspend_phase2(adev); + +	return r; +} +  static int amdgpu_device_ip_reinit_early_sriov(struct amdgpu_device *adev)  {  	int i, r; @@ -1985,7 +2078,7 @@ static int amdgpu_device_ip_reinit_early_sriov(struct amdgpu_device *adev)  				continue;  			r = block->version->funcs->hw_init(adev); -			DRM_INFO("RE-INIT: %s %s\n", block->version->funcs->name, r?"failed":"successed"); +			DRM_INFO("RE-INIT: %s %s\n", block->version->funcs->name, r?"failed":"succeeded");  			if (r)  				return r;  		} @@ -2020,7 +2113,7 @@ static int amdgpu_device_ip_reinit_late_sriov(struct amdgpu_device *adev)  				continue;  			r = block->version->funcs->hw_init(adev); -			DRM_INFO("RE-INIT: %s %s\n", block->version->funcs->name, r?"failed":"successed"); +			DRM_INFO("RE-INIT: %s %s\n", block->version->funcs->name, r?"failed":"succeeded");  			if (r)  				return r;  		} @@ -2181,7 +2274,7 @@ bool amdgpu_device_asic_has_dc_support(enum amd_asic_type asic_type)  	case CHIP_VEGA10:  	case CHIP_VEGA12:  	case CHIP_VEGA20: -#if defined(CONFIG_DRM_AMD_DC_DCN1_0) +#ifdef CONFIG_X86  	case CHIP_RAVEN:  #endif  		return amdgpu_dc != 0; @@ -2210,7 +2303,7 @@ bool amdgpu_device_has_dc_support(struct amdgpu_device *adev)   * amdgpu_device_init - initialize the driver   *   * @adev: amdgpu_device pointer - * @pdev: drm dev pointer + * @ddev: drm dev pointer   * @pdev: pci dev pointer   * @flags: driver flags   * @@ -2301,6 +2394,8 @@ int amdgpu_device_init(struct amdgpu_device *adev,  	INIT_DELAYED_WORK(&adev->late_init_work,  			  amdgpu_device_ip_late_init_func_handler); +	adev->pm.ac_power = power_supply_is_system_supplied() > 0 ? true : false; +  	/* Registers mapping */  	/* TODO: block userspace mapping of io register */  	if (adev->asic_type >= CHIP_BONAIRE) { @@ -2581,8 +2676,9 @@ void amdgpu_device_fini(struct amdgpu_device *adev)  /**   * amdgpu_device_suspend - initiate device suspend   * - * @pdev: drm dev pointer - * @state: suspend state + * @dev: drm dev pointer + * @suspend: suspend state + * @fbcon : notify the fbdev of suspend   *   * Puts the hw in the suspend state (all asics).   * Returns 0 for success or an error on failure. @@ -2606,6 +2702,9 @@ int amdgpu_device_suspend(struct drm_device *dev, bool suspend, bool fbcon)  	drm_kms_helper_poll_disable(dev); +	if (fbcon) +		amdgpu_fbdev_set_suspend(adev, 1); +  	if (!amdgpu_device_has_dc_support(adev)) {  		/* turn off display hw */  		drm_modeset_lock_all(dev); @@ -2613,44 +2712,46 @@ int amdgpu_device_suspend(struct drm_device *dev, bool suspend, bool fbcon)  			drm_helper_connector_dpms(connector, DRM_MODE_DPMS_OFF);  		}  		drm_modeset_unlock_all(dev); -	} - -	amdgpu_amdkfd_suspend(adev); - -	/* unpin the front buffers and cursors */ -	list_for_each_entry(crtc, &dev->mode_config.crtc_list, head) { -		struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc); -		struct drm_framebuffer *fb = crtc->primary->fb; -		struct amdgpu_bo *robj; - -		if (amdgpu_crtc->cursor_bo) { -			struct amdgpu_bo *aobj = gem_to_amdgpu_bo(amdgpu_crtc->cursor_bo); -			r = amdgpu_bo_reserve(aobj, true); -			if (r == 0) { -				amdgpu_bo_unpin(aobj); -				amdgpu_bo_unreserve(aobj); +			/* unpin the front buffers and cursors */ +		list_for_each_entry(crtc, &dev->mode_config.crtc_list, head) { +			struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc); +			struct drm_framebuffer *fb = crtc->primary->fb; +			struct amdgpu_bo *robj; + +			if (amdgpu_crtc->cursor_bo) { +				struct amdgpu_bo *aobj = gem_to_amdgpu_bo(amdgpu_crtc->cursor_bo); +				r = amdgpu_bo_reserve(aobj, true); +				if (r == 0) { +					amdgpu_bo_unpin(aobj); +					amdgpu_bo_unreserve(aobj); +				}  			} -		} -		if (fb == NULL || fb->obj[0] == NULL) { -			continue; -		} -		robj = gem_to_amdgpu_bo(fb->obj[0]); -		/* don't unpin kernel fb objects */ -		if (!amdgpu_fbdev_robj_is_fb(adev, robj)) { -			r = amdgpu_bo_reserve(robj, true); -			if (r == 0) { -				amdgpu_bo_unpin(robj); -				amdgpu_bo_unreserve(robj); +			if (fb == NULL || fb->obj[0] == NULL) { +				continue; +			} +			robj = gem_to_amdgpu_bo(fb->obj[0]); +			/* don't unpin kernel fb objects */ +			if (!amdgpu_fbdev_robj_is_fb(adev, robj)) { +				r = amdgpu_bo_reserve(robj, true); +				if (r == 0) { +					amdgpu_bo_unpin(robj); +					amdgpu_bo_unreserve(robj); +				}  			}  		}  	} + +	amdgpu_amdkfd_suspend(adev); + +	r = amdgpu_device_ip_suspend_phase1(adev); +  	/* evict vram memory */  	amdgpu_bo_evict_vram(adev);  	amdgpu_fence_driver_suspend(adev); -	r = amdgpu_device_ip_suspend(adev); +	r = amdgpu_device_ip_suspend_phase2(adev);  	/* evict remaining vram memory  	 * This second call to evict vram is to evict the gart page table @@ -2669,18 +2770,15 @@ int amdgpu_device_suspend(struct drm_device *dev, bool suspend, bool fbcon)  			DRM_ERROR("amdgpu asic reset failed\n");  	} -	if (fbcon) { -		console_lock(); -		amdgpu_fbdev_set_suspend(adev, 1); -		console_unlock(); -	}  	return 0;  }  /**   * amdgpu_device_resume - initiate device resume   * - * @pdev: drm dev pointer + * @dev: drm dev pointer + * @resume: resume state + * @fbcon : notify the fbdev of resume   *   * Bring the hw back to operating state (all asics).   * Returns 0 for success or an error on failure. @@ -2696,15 +2794,12 @@ int amdgpu_device_resume(struct drm_device *dev, bool resume, bool fbcon)  	if (dev->switch_power_state == DRM_SWITCH_POWER_OFF)  		return 0; -	if (fbcon) -		console_lock(); -  	if (resume) {  		pci_set_power_state(dev->pdev, PCI_D0);  		pci_restore_state(dev->pdev);  		r = pci_enable_device(dev->pdev);  		if (r) -			goto unlock; +			return r;  	}  	/* post card */ @@ -2717,29 +2812,30 @@ int amdgpu_device_resume(struct drm_device *dev, bool resume, bool fbcon)  	r = amdgpu_device_ip_resume(adev);  	if (r) {  		DRM_ERROR("amdgpu_device_ip_resume failed (%d).\n", r); -		goto unlock; +		return r;  	}  	amdgpu_fence_driver_resume(adev);  	r = amdgpu_device_ip_late_init(adev);  	if (r) -		goto unlock; - -	/* pin cursors */ -	list_for_each_entry(crtc, &dev->mode_config.crtc_list, head) { -		struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc); - -		if (amdgpu_crtc->cursor_bo) { -			struct amdgpu_bo *aobj = gem_to_amdgpu_bo(amdgpu_crtc->cursor_bo); -			r = amdgpu_bo_reserve(aobj, true); -			if (r == 0) { -				r = amdgpu_bo_pin(aobj, -						  AMDGPU_GEM_DOMAIN_VRAM, -						  &amdgpu_crtc->cursor_addr); -				if (r != 0) -					DRM_ERROR("Failed to pin cursor BO (%d)\n", r); -				amdgpu_bo_unreserve(aobj); +		return r; + +	if (!amdgpu_device_has_dc_support(adev)) { +		/* pin cursors */ +		list_for_each_entry(crtc, &dev->mode_config.crtc_list, head) { +			struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc); + +			if (amdgpu_crtc->cursor_bo) { +				struct amdgpu_bo *aobj = gem_to_amdgpu_bo(amdgpu_crtc->cursor_bo); +				r = amdgpu_bo_reserve(aobj, true); +				if (r == 0) { +					r = amdgpu_bo_pin(aobj, AMDGPU_GEM_DOMAIN_VRAM); +					if (r != 0) +						DRM_ERROR("Failed to pin cursor BO (%d)\n", r); +					amdgpu_crtc->cursor_addr = amdgpu_bo_gpu_offset(aobj); +					amdgpu_bo_unreserve(aobj); +				}  			}  		}  	} @@ -2763,6 +2859,7 @@ int amdgpu_device_resume(struct drm_device *dev, bool resume, bool fbcon)  			}  			drm_modeset_unlock_all(dev);  		} +		amdgpu_fbdev_set_suspend(adev, 0);  	}  	drm_kms_helper_poll_enable(dev); @@ -2786,15 +2883,7 @@ int amdgpu_device_resume(struct drm_device *dev, bool resume, bool fbcon)  #ifdef CONFIG_PM  	dev->dev->power.disable_depth--;  #endif - -	if (fbcon) -		amdgpu_fbdev_set_suspend(adev, 0); - -unlock: -	if (fbcon) -		console_unlock(); - -	return r; +	return 0;  }  /** @@ -3019,7 +3108,7 @@ static int amdgpu_device_handle_vram_lost(struct amdgpu_device *adev)  	long tmo;  	if (amdgpu_sriov_runtime(adev)) -		tmo = msecs_to_jiffies(amdgpu_lockup_timeout); +		tmo = msecs_to_jiffies(8000);  	else  		tmo = msecs_to_jiffies(100); @@ -3071,7 +3160,7 @@ static int amdgpu_device_handle_vram_lost(struct amdgpu_device *adev)   * @adev: amdgpu device pointer   *   * attempt to do soft-reset or full-reset and reinitialize Asic - * return 0 means successed otherwise failed + * return 0 means succeeded otherwise failed   */  static int amdgpu_device_reset(struct amdgpu_device *adev)  { @@ -3146,9 +3235,10 @@ out:   * amdgpu_device_reset_sriov - reset ASIC for SR-IOV vf   *   * @adev: amdgpu device pointer + * @from_hypervisor: request from hypervisor   *   * do VF FLR and reinitialize Asic - * return 0 means successed otherwise failed + * return 0 means succeeded otherwise failed   */  static int amdgpu_device_reset_sriov(struct amdgpu_device *adev,  				     bool from_hypervisor) @@ -3193,7 +3283,7 @@ error:   *   * @adev: amdgpu device pointer   * @job: which job trigger hang - * @force forces reset regardless of amdgpu_gpu_recovery + * @force: forces reset regardless of amdgpu_gpu_recovery   *   * Attempt to reset the GPU if it has hung (all asics).   * Returns 0 for success or an error on failure. @@ -3220,6 +3310,9 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev,  	atomic_inc(&adev->gpu_reset_counter);  	adev->in_gpu_reset = 1; +	/* Block kfd */ +	amdgpu_amdkfd_pre_reset(adev); +  	/* block TTM */  	resched = ttm_bo_lock_delayed_workqueue(&adev->mman.bdev); @@ -3232,10 +3325,10 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev,  		kthread_park(ring->sched.thread); -		if (job && job->ring->idx != i) +		if (job && job->base.sched == &ring->sched)  			continue; -		drm_sched_hw_job_reset(&ring->sched, &job->base); +		drm_sched_hw_job_reset(&ring->sched, job ? &job->base : NULL);  		/* after all hw jobs are reset, hw fence is meaningless, so force_completion */  		amdgpu_fence_driver_force_completion(ring); @@ -3256,7 +3349,7 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev,  		 * or all rings (in the case @job is NULL)  		 * after above amdgpu_reset accomplished  		 */ -		if ((!job || job->ring->idx == i) && !r) +		if ((!job || job->base.sched == &ring->sched) && !r)  			drm_sched_job_recovery(&ring->sched);  		kthread_unpark(ring->sched.thread); @@ -3273,9 +3366,11 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev,  		dev_info(adev->dev, "GPU reset(%d) failed\n", atomic_read(&adev->gpu_reset_counter));  		amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_GPU_RESET_FAIL, 0, r);  	} else { -		dev_info(adev->dev, "GPU reset(%d) successed!\n",atomic_read(&adev->gpu_reset_counter)); +		dev_info(adev->dev, "GPU reset(%d) succeeded!\n",atomic_read(&adev->gpu_reset_counter));  	} +	/*unlock kfd */ +	amdgpu_amdkfd_post_reset(adev);  	amdgpu_vf_error_trans_all(adev);  	adev->in_gpu_reset = 0;  	mutex_unlock(&adev->lock_reset); @@ -3293,8 +3388,9 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev,   */  static void amdgpu_device_get_pcie_info(struct amdgpu_device *adev)  { -	u32 mask; -	int ret; +	struct pci_dev *pdev; +	enum pci_bus_speed speed_cap; +	enum pcie_link_width link_width;  	if (amdgpu_pcie_gen_cap)  		adev->pm.pcie_gen_mask = amdgpu_pcie_gen_cap; @@ -3312,27 +3408,61 @@ static void amdgpu_device_get_pcie_info(struct amdgpu_device *adev)  	}  	if (adev->pm.pcie_gen_mask == 0) { -		ret = drm_pcie_get_speed_cap_mask(adev->ddev, &mask); -		if (!ret) { -			adev->pm.pcie_gen_mask = (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 | +		/* asic caps */ +		pdev = adev->pdev; +		speed_cap = pcie_get_speed_cap(pdev); +		if (speed_cap == PCI_SPEED_UNKNOWN) { +			adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 |  						  CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2 |  						  CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN3); - -			if (mask & DRM_PCIE_SPEED_25) -				adev->pm.pcie_gen_mask |= CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1; -			if (mask & DRM_PCIE_SPEED_50) -				adev->pm.pcie_gen_mask |= CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2; -			if (mask & DRM_PCIE_SPEED_80) -				adev->pm.pcie_gen_mask |= CAIL_PCIE_LINK_SPEED_SUPPORT_GEN3;  		} else { -			adev->pm.pcie_gen_mask = AMDGPU_DEFAULT_PCIE_GEN_MASK; +			if (speed_cap == PCIE_SPEED_16_0GT) +				adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 | +							  CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2 | +							  CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN3 | +							  CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN4); +			else if (speed_cap == PCIE_SPEED_8_0GT) +				adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 | +							  CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2 | +							  CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN3); +			else if (speed_cap == PCIE_SPEED_5_0GT) +				adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 | +							  CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2); +			else +				adev->pm.pcie_gen_mask |= CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1; +		} +		/* platform caps */ +		pdev = adev->ddev->pdev->bus->self; +		speed_cap = pcie_get_speed_cap(pdev); +		if (speed_cap == PCI_SPEED_UNKNOWN) { +			adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 | +						   CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2); +		} else { +			if (speed_cap == PCIE_SPEED_16_0GT) +				adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 | +							   CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2 | +							   CAIL_PCIE_LINK_SPEED_SUPPORT_GEN3 | +							   CAIL_PCIE_LINK_SPEED_SUPPORT_GEN4); +			else if (speed_cap == PCIE_SPEED_8_0GT) +				adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 | +							   CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2 | +							   CAIL_PCIE_LINK_SPEED_SUPPORT_GEN3); +			else if (speed_cap == PCIE_SPEED_5_0GT) +				adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 | +							   CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2); +			else +				adev->pm.pcie_gen_mask |= CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1; +  		}  	}  	if (adev->pm.pcie_mlw_mask == 0) { -		ret = drm_pcie_get_max_link_width(adev->ddev, &mask); -		if (!ret) { -			switch (mask) { -			case 32: +		pdev = adev->ddev->pdev->bus->self; +		link_width = pcie_get_width_cap(pdev); +		if (link_width == PCIE_LNK_WIDTH_UNKNOWN) { +			adev->pm.pcie_mlw_mask |= AMDGPU_DEFAULT_PCIE_MLW_MASK; +		} else { +			switch (link_width) { +			case PCIE_LNK_X32:  				adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X32 |  							  CAIL_PCIE_LINK_WIDTH_SUPPORT_X16 |  							  CAIL_PCIE_LINK_WIDTH_SUPPORT_X12 | @@ -3341,7 +3471,7 @@ static void amdgpu_device_get_pcie_info(struct amdgpu_device *adev)  							  CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |  							  CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);  				break; -			case 16: +			case PCIE_LNK_X16:  				adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X16 |  							  CAIL_PCIE_LINK_WIDTH_SUPPORT_X12 |  							  CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 | @@ -3349,36 +3479,34 @@ static void amdgpu_device_get_pcie_info(struct amdgpu_device *adev)  							  CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |  							  CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);  				break; -			case 12: +			case PCIE_LNK_X12:  				adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X12 |  							  CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 |  							  CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |  							  CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |  							  CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);  				break; -			case 8: +			case PCIE_LNK_X8:  				adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 |  							  CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |  							  CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |  							  CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);  				break; -			case 4: +			case PCIE_LNK_X4:  				adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |  							  CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |  							  CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);  				break; -			case 2: +			case PCIE_LNK_X2:  				adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |  							  CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);  				break; -			case 1: +			case PCIE_LNK_X1:  				adev->pm.pcie_mlw_mask = CAIL_PCIE_LINK_WIDTH_SUPPORT_X1;  				break;  			default:  				break;  			} -		} else { -			adev->pm.pcie_mlw_mask = AMDGPU_DEFAULT_PCIE_MLW_MASK;  		}  	}  } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c index 76ee8e04ff11..6748cd7fc129 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c @@ -157,7 +157,6 @@ int amdgpu_display_crtc_page_flip_target(struct drm_crtc *crtc,  	struct amdgpu_bo *new_abo;  	unsigned long flags;  	u64 tiling_flags; -	u64 base;  	int i, r;  	work = kzalloc(sizeof *work, GFP_KERNEL); @@ -189,12 +188,18 @@ int amdgpu_display_crtc_page_flip_target(struct drm_crtc *crtc,  		goto cleanup;  	} -	r = amdgpu_bo_pin(new_abo, amdgpu_display_supported_domains(adev), &base); +	r = amdgpu_bo_pin(new_abo, amdgpu_display_supported_domains(adev));  	if (unlikely(r != 0)) {  		DRM_ERROR("failed to pin new abo buffer before flip\n");  		goto unreserve;  	} +	r = amdgpu_ttm_alloc_gart(&new_abo->tbo); +	if (unlikely(r != 0)) { +		DRM_ERROR("%p bind failed\n", new_abo); +		goto unpin; +	} +  	r = reservation_object_get_fences_rcu(new_abo->tbo.resv, &work->excl,  					      &work->shared_count,  					      &work->shared); @@ -206,7 +211,7 @@ int amdgpu_display_crtc_page_flip_target(struct drm_crtc *crtc,  	amdgpu_bo_get_tiling_flags(new_abo, &tiling_flags);  	amdgpu_bo_unreserve(new_abo); -	work->base = base; +	work->base = amdgpu_bo_gpu_offset(new_abo);  	work->target_vblank = target - (uint32_t)drm_crtc_vblank_count(crtc) +  		amdgpu_get_vblank_counter_kms(dev, work->crtc_id); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_dpm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_dpm.c index 77ad59ade85c..1c4595562f8f 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_dpm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_dpm.c @@ -28,6 +28,7 @@  #include "amdgpu_i2c.h"  #include "amdgpu_dpm.h"  #include "atom.h" +#include "amd_pcie.h"  void amdgpu_dpm_print_class_info(u32 class, u32 class2)  { @@ -936,9 +937,11 @@ enum amdgpu_pcie_gen amdgpu_get_pcie_gen_support(struct amdgpu_device *adev,  	case AMDGPU_PCIE_GEN3:  		return AMDGPU_PCIE_GEN3;  	default: -		if ((sys_mask & DRM_PCIE_SPEED_80) && (default_gen == AMDGPU_PCIE_GEN3)) +		if ((sys_mask & CAIL_PCIE_LINK_SPEED_SUPPORT_GEN3) && +		    (default_gen == AMDGPU_PCIE_GEN3))  			return AMDGPU_PCIE_GEN3; -		else if ((sys_mask & DRM_PCIE_SPEED_50) && (default_gen == AMDGPU_PCIE_GEN2)) +		else if ((sys_mask & CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2) && +			 (default_gen == AMDGPU_PCIE_GEN2))  			return AMDGPU_PCIE_GEN2;  		else  			return AMDGPU_PCIE_GEN1; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_dpm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_dpm.h index dd6203a0a6b7..ff24e1cc5b65 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_dpm.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_dpm.h @@ -287,12 +287,6 @@ enum amdgpu_pcie_gen {  #define amdgpu_dpm_force_performance_level(adev, l) \  		((adev)->powerplay.pp_funcs->force_performance_level((adev)->powerplay.pp_handle, (l))) -#define amdgpu_dpm_powergate_uvd(adev, g) \ -		((adev)->powerplay.pp_funcs->powergate_uvd((adev)->powerplay.pp_handle, (g))) - -#define amdgpu_dpm_powergate_vce(adev, g) \ -		((adev)->powerplay.pp_funcs->powergate_vce((adev)->powerplay.pp_handle, (g))) -  #define amdgpu_dpm_get_current_power_state(adev) \  		((adev)->powerplay.pp_funcs->get_current_power_state((adev)->powerplay.pp_handle)) @@ -347,6 +341,10 @@ enum amdgpu_pcie_gen {  		((adev)->powerplay.pp_funcs->set_clockgating_by_smu(\  			(adev)->powerplay.pp_handle, msg_id)) +#define amdgpu_dpm_set_powergating_by_smu(adev, block_type, gate) \ +		((adev)->powerplay.pp_funcs->set_powergating_by_smu(\ +			(adev)->powerplay.pp_handle, block_type, gate)) +  #define amdgpu_dpm_get_power_profile_mode(adev, buf) \  		((adev)->powerplay.pp_funcs->get_power_profile_mode(\  			(adev)->powerplay.pp_handle, buf)) @@ -359,10 +357,6 @@ enum amdgpu_pcie_gen {  		((adev)->powerplay.pp_funcs->odn_edit_dpm_table(\  			(adev)->powerplay.pp_handle, type, parameter, size)) -#define amdgpu_dpm_set_mmhub_powergating_by_smu(adev) \ -		((adev)->powerplay.pp_funcs->set_mmhub_powergating_by_smu( \ -		(adev)->powerplay.pp_handle)) -  struct amdgpu_dpm {  	struct amdgpu_ps        *ps;  	/* number of valid power states */ @@ -402,7 +396,6 @@ struct amdgpu_dpm {  	u32 tdp_adjustment;  	u16 load_line_slope;  	bool power_control; -	bool ac_power;  	/* special states active */  	bool                    thermal_active;  	bool                    uvd_active; @@ -439,6 +432,7 @@ struct amdgpu_pm {  	struct amd_pp_display_configuration pm_display_cfg;/* set by dc */  	uint32_t                smu_prv_buffer_size;  	struct amdgpu_bo        *smu_prv_buffer; +	bool ac_power;  };  #define R600_SSTU_DFLT                               0 diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c index b0bf2f24da48..8843a06360fa 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c @@ -1,10 +1,3 @@ -/** - * \file amdgpu_drv.c - * AMD Amdgpu driver - * - * \author Gareth Hughes <[email protected]> - */ -  /*   * Copyright 2000 VA Linux Systems, Inc., Sunnyvale, California.   * All Rights Reserved. @@ -76,9 +69,10 @@   * - 3.24.0 - Add high priority compute support for gfx9   * - 3.25.0 - Add support for sensor query info (stable pstate sclk/mclk).   * - 3.26.0 - GFX9: Process AMDGPU_IB_FLAG_TC_WB_NOT_INVALIDATE. + * - 3.27.0 - Add new chunk to to AMDGPU_CS to enable BO_LIST creation.   */  #define KMS_DRIVER_MAJOR	3 -#define KMS_DRIVER_MINOR	26 +#define KMS_DRIVER_MINOR	27  #define KMS_DRIVER_PATCHLEVEL	0  int amdgpu_vram_limit = 0; @@ -110,11 +104,8 @@ int amdgpu_vram_page_split = 512;  int amdgpu_vm_update_mode = -1;  int amdgpu_exp_hw_support = 0;  int amdgpu_dc = -1; -int amdgpu_dc_log = 0;  int amdgpu_sched_jobs = 32;  int amdgpu_sched_hw_submission = 2; -int amdgpu_no_evict = 0; -int amdgpu_direct_gma_size = 0;  uint amdgpu_pcie_gen_cap = 0;  uint amdgpu_pcie_lane_cap = 0;  uint amdgpu_cg_mask = 0xffffffff; @@ -122,7 +113,8 @@ uint amdgpu_pg_mask = 0xffffffff;  uint amdgpu_sdma_phase_quantum = 32;  char *amdgpu_disable_cu = NULL;  char *amdgpu_virtual_display = NULL; -uint amdgpu_pp_feature_mask = 0xffff3fff; /* gfxoff (bit 15) disabled by default */ +/* OverDrive(bit 14),gfxoff(bit 15),stutter mode(bit 17) disabled by default*/ +uint amdgpu_pp_feature_mask = 0xfffd3fff;  int amdgpu_ngg = 0;  int amdgpu_prim_buf_per_se = 0;  int amdgpu_pos_buf_per_se = 0; @@ -135,163 +127,368 @@ int amdgpu_gpu_recovery = -1; /* auto */  int amdgpu_emu_mode = 0;  uint amdgpu_smu_memory_pool_size = 0; +/** + * DOC: vramlimit (int) + * Restrict the total amount of VRAM in MiB for testing.  The default is 0 (Use full VRAM). + */  MODULE_PARM_DESC(vramlimit, "Restrict VRAM for testing, in megabytes");  module_param_named(vramlimit, amdgpu_vram_limit, int, 0600); +/** + * DOC: vis_vramlimit (int) + * Restrict the amount of CPU visible VRAM in MiB for testing.  The default is 0 (Use full CPU visible VRAM). + */  MODULE_PARM_DESC(vis_vramlimit, "Restrict visible VRAM for testing, in megabytes");  module_param_named(vis_vramlimit, amdgpu_vis_vram_limit, int, 0444); +/** + * DOC: gartsize (uint) + * Restrict the size of GART in Mib (32, 64, etc.) for testing. The default is -1 (The size depends on asic). + */  MODULE_PARM_DESC(gartsize, "Size of GART to setup in megabytes (32, 64, etc., -1=auto)");  module_param_named(gartsize, amdgpu_gart_size, uint, 0600); +/** + * DOC: gttsize (int) + * Restrict the size of GTT domain in MiB for testing. The default is -1 (It's VRAM size if 3GB < VRAM < 3/4 RAM, + * otherwise 3/4 RAM size). + */  MODULE_PARM_DESC(gttsize, "Size of the GTT domain in megabytes (-1 = auto)");  module_param_named(gttsize, amdgpu_gtt_size, int, 0600); +/** + * DOC: moverate (int) + * Set maximum buffer migration rate in MB/s. The default is -1 (8 MB/s). + */  MODULE_PARM_DESC(moverate, "Maximum buffer migration rate in MB/s. (32, 64, etc., -1=auto, 0=1=disabled)");  module_param_named(moverate, amdgpu_moverate, int, 0600); +/** + * DOC: benchmark (int) + * Run benchmarks. The default is 0 (Skip benchmarks). + */  MODULE_PARM_DESC(benchmark, "Run benchmark");  module_param_named(benchmark, amdgpu_benchmarking, int, 0444); +/** + * DOC: test (int) + * Test BO GTT->VRAM and VRAM->GTT GPU copies. The default is 0 (Skip test, only set 1 to run test). + */  MODULE_PARM_DESC(test, "Run tests");  module_param_named(test, amdgpu_testing, int, 0444); +/** + * DOC: audio (int) + * Set HDMI/DPAudio. Only affects non-DC display handling. The default is -1 (Enabled), set 0 to disabled it. + */  MODULE_PARM_DESC(audio, "Audio enable (-1 = auto, 0 = disable, 1 = enable)");  module_param_named(audio, amdgpu_audio, int, 0444); +/** + * DOC: disp_priority (int) + * Set display Priority (1 = normal, 2 = high). Only affects non-DC display handling. The default is 0 (auto). + */  MODULE_PARM_DESC(disp_priority, "Display Priority (0 = auto, 1 = normal, 2 = high)");  module_param_named(disp_priority, amdgpu_disp_priority, int, 0444); +/** + * DOC: hw_i2c (int) + * To enable hw i2c engine. Only affects non-DC display handling. The default is 0 (Disabled). + */  MODULE_PARM_DESC(hw_i2c, "hw i2c engine enable (0 = disable)");  module_param_named(hw_i2c, amdgpu_hw_i2c, int, 0444); +/** + * DOC: pcie_gen2 (int) + * To disable PCIE Gen2/3 mode (0 = disable, 1 = enable). The default is -1 (auto, enabled). + */  MODULE_PARM_DESC(pcie_gen2, "PCIE Gen2 mode (-1 = auto, 0 = disable, 1 = enable)");  module_param_named(pcie_gen2, amdgpu_pcie_gen2, int, 0444); +/** + * DOC: msi (int) + * To disable Message Signaled Interrupts (MSI) functionality (1 = enable, 0 = disable). The default is -1 (auto, enabled). + */  MODULE_PARM_DESC(msi, "MSI support (1 = enable, 0 = disable, -1 = auto)");  module_param_named(msi, amdgpu_msi, int, 0444); +/** + * DOC: lockup_timeout (int) + * Set GPU scheduler timeout value in ms. Value 0 is invalidated, will be adjusted to 10000. + * Negative values mean 'infinite timeout' (MAX_JIFFY_OFFSET). The default is 10000. + */  MODULE_PARM_DESC(lockup_timeout, "GPU lockup timeout in ms > 0 (default 10000)");  module_param_named(lockup_timeout, amdgpu_lockup_timeout, int, 0444); +/** + * DOC: dpm (int) + * Override for dynamic power management setting (1 = enable, 0 = disable). The default is -1 (auto). + */  MODULE_PARM_DESC(dpm, "DPM support (1 = enable, 0 = disable, -1 = auto)");  module_param_named(dpm, amdgpu_dpm, int, 0444); +/** + * DOC: fw_load_type (int) + * Set different firmware loading type for debugging (0 = direct, 1 = SMU, 2 = PSP). The default is -1 (auto). + */  MODULE_PARM_DESC(fw_load_type, "firmware loading type (0 = direct, 1 = SMU, 2 = PSP, -1 = auto)");  module_param_named(fw_load_type, amdgpu_fw_load_type, int, 0444); +/** + * DOC: aspm (int) + * To disable ASPM (1 = enable, 0 = disable). The default is -1 (auto, enabled). + */  MODULE_PARM_DESC(aspm, "ASPM support (1 = enable, 0 = disable, -1 = auto)");  module_param_named(aspm, amdgpu_aspm, int, 0444); +/** + * DOC: runpm (int) + * Override for runtime power management control for dGPUs in PX/HG laptops. The amdgpu driver can dynamically power down + * the dGPU on PX/HG laptops when it is idle. The default is -1 (auto enable). Setting the value to 0 disables this functionality. + */  MODULE_PARM_DESC(runpm, "PX runtime pm (1 = force enable, 0 = disable, -1 = PX only default)");  module_param_named(runpm, amdgpu_runtime_pm, int, 0444); +/** + * DOC: ip_block_mask (uint) + * Override what IP blocks are enabled on the GPU. Each GPU is a collection of IP blocks (gfx, display, video, etc.). + * Use this parameter to disable specific blocks. Note that the IP blocks do not have a fixed index. Some asics may not have + * some IPs or may include multiple instances of an IP so the ordering various from asic to asic. See the driver output in + * the kernel log for the list of IPs on the asic. The default is 0xffffffff (enable all blocks on a device). + */  MODULE_PARM_DESC(ip_block_mask, "IP Block Mask (all blocks enabled (default))");  module_param_named(ip_block_mask, amdgpu_ip_block_mask, uint, 0444); +/** + * DOC: bapm (int) + * Bidirectional Application Power Management (BAPM) used to dynamically share TDP between CPU and GPU. Set value 0 to disable it. + * The default -1 (auto, enabled) + */  MODULE_PARM_DESC(bapm, "BAPM support (1 = enable, 0 = disable, -1 = auto)");  module_param_named(bapm, amdgpu_bapm, int, 0444); +/** + * DOC: deep_color (int) + * Set 1 to enable Deep Color support. Only affects non-DC display handling. The default is 0 (disabled). + */  MODULE_PARM_DESC(deep_color, "Deep Color support (1 = enable, 0 = disable (default))");  module_param_named(deep_color, amdgpu_deep_color, int, 0444); +/** + * DOC: vm_size (int) + * Override the size of the GPU's per client virtual address space in GiB.  The default is -1 (automatic for each asic). + */  MODULE_PARM_DESC(vm_size, "VM address space size in gigabytes (default 64GB)");  module_param_named(vm_size, amdgpu_vm_size, int, 0444); +/** + * DOC: vm_fragment_size (int) + * Override VM fragment size in bits (4, 5, etc. 4 = 64K, 9 = 2M). The default is -1 (automatic for each asic). + */  MODULE_PARM_DESC(vm_fragment_size, "VM fragment size in bits (4, 5, etc. 4 = 64K (default), Max 9 = 2M)");  module_param_named(vm_fragment_size, amdgpu_vm_fragment_size, int, 0444); +/** + * DOC: vm_block_size (int) + * Override VM page table size in bits (default depending on vm_size and hw setup). The default is -1 (automatic for each asic). + */  MODULE_PARM_DESC(vm_block_size, "VM page table size in bits (default depending on vm_size)");  module_param_named(vm_block_size, amdgpu_vm_block_size, int, 0444); +/** + * DOC: vm_fault_stop (int) + * Stop on VM fault for debugging (0 = never, 1 = print first, 2 = always). The default is 0 (No stop). + */  MODULE_PARM_DESC(vm_fault_stop, "Stop on VM fault (0 = never (default), 1 = print first, 2 = always)");  module_param_named(vm_fault_stop, amdgpu_vm_fault_stop, int, 0444); +/** + * DOC: vm_debug (int) + * Debug VM handling (0 = disabled, 1 = enabled). The default is 0 (Disabled). + */  MODULE_PARM_DESC(vm_debug, "Debug VM handling (0 = disabled (default), 1 = enabled)");  module_param_named(vm_debug, amdgpu_vm_debug, int, 0644); +/** + * DOC: vm_update_mode (int) + * Override VM update mode. VM updated by using CPU (0 = never, 1 = Graphics only, 2 = Compute only, 3 = Both). The default + * is -1 (Only in large BAR(LB) systems Compute VM tables will be updated by CPU, otherwise 0, never). + */  MODULE_PARM_DESC(vm_update_mode, "VM update using CPU (0 = never (default except for large BAR(LB)), 1 = Graphics only, 2 = Compute only (default for LB), 3 = Both");  module_param_named(vm_update_mode, amdgpu_vm_update_mode, int, 0444); +/** + * DOC: vram_page_split (int) + * Override the number of pages after we split VRAM allocations (default 512, -1 = disable). The default is 512. + */  MODULE_PARM_DESC(vram_page_split, "Number of pages after we split VRAM allocations (default 512, -1 = disable)");  module_param_named(vram_page_split, amdgpu_vram_page_split, int, 0444); +/** + * DOC: exp_hw_support (int) + * Enable experimental hw support (1 = enable). The default is 0 (disabled). + */  MODULE_PARM_DESC(exp_hw_support, "experimental hw support (1 = enable, 0 = disable (default))");  module_param_named(exp_hw_support, amdgpu_exp_hw_support, int, 0444); +/** + * DOC: dc (int) + * Disable/Enable Display Core driver for debugging (1 = enable, 0 = disable). The default is -1 (automatic for each asic). + */  MODULE_PARM_DESC(dc, "Display Core driver (1 = enable, 0 = disable, -1 = auto (default))");  module_param_named(dc, amdgpu_dc, int, 0444); -MODULE_PARM_DESC(dc_log, "Display Core Log Level (0 = minimal (default), 1 = chatty"); -module_param_named(dc_log, amdgpu_dc_log, int, 0444); - +/** + * DOC: sched_jobs (int) + * Override the max number of jobs supported in the sw queue. The default is 32. + */  MODULE_PARM_DESC(sched_jobs, "the max number of jobs supported in the sw queue (default 32)");  module_param_named(sched_jobs, amdgpu_sched_jobs, int, 0444); +/** + * DOC: sched_hw_submission (int) + * Override the max number of HW submissions. The default is 2. + */  MODULE_PARM_DESC(sched_hw_submission, "the max number of HW submissions (default 2)");  module_param_named(sched_hw_submission, amdgpu_sched_hw_submission, int, 0444); +/** + * DOC: ppfeaturemask (uint) + * Override power features enabled. See enum PP_FEATURE_MASK in drivers/gpu/drm/amd/include/amd_shared.h. + * The default is the current set of stable power features. + */  MODULE_PARM_DESC(ppfeaturemask, "all power features enabled (default))");  module_param_named(ppfeaturemask, amdgpu_pp_feature_mask, uint, 0444); -MODULE_PARM_DESC(no_evict, "Support pinning request from user space (1 = enable, 0 = disable (default))"); -module_param_named(no_evict, amdgpu_no_evict, int, 0444); - -MODULE_PARM_DESC(direct_gma_size, "Direct GMA size in megabytes (max 96MB)"); -module_param_named(direct_gma_size, amdgpu_direct_gma_size, int, 0444); - +/** + * DOC: pcie_gen_cap (uint) + * Override PCIE gen speed capabilities. See the CAIL flags in drivers/gpu/drm/amd/include/amd_pcie.h. + * The default is 0 (automatic for each asic). + */  MODULE_PARM_DESC(pcie_gen_cap, "PCIE Gen Caps (0: autodetect (default))");  module_param_named(pcie_gen_cap, amdgpu_pcie_gen_cap, uint, 0444); +/** + * DOC: pcie_lane_cap (uint) + * Override PCIE lanes capabilities. See the CAIL flags in drivers/gpu/drm/amd/include/amd_pcie.h. + * The default is 0 (automatic for each asic). + */  MODULE_PARM_DESC(pcie_lane_cap, "PCIE Lane Caps (0: autodetect (default))");  module_param_named(pcie_lane_cap, amdgpu_pcie_lane_cap, uint, 0444); +/** + * DOC: cg_mask (uint) + * Override Clockgating features enabled on GPU (0 = disable clock gating). See the AMD_CG_SUPPORT flags in + * drivers/gpu/drm/amd/include/amd_shared.h. The default is 0xffffffff (all enabled). + */  MODULE_PARM_DESC(cg_mask, "Clockgating flags mask (0 = disable clock gating)");  module_param_named(cg_mask, amdgpu_cg_mask, uint, 0444); +/** + * DOC: pg_mask (uint) + * Override Powergating features enabled on GPU (0 = disable power gating). See the AMD_PG_SUPPORT flags in + * drivers/gpu/drm/amd/include/amd_shared.h. The default is 0xffffffff (all enabled). + */  MODULE_PARM_DESC(pg_mask, "Powergating flags mask (0 = disable power gating)");  module_param_named(pg_mask, amdgpu_pg_mask, uint, 0444); +/** + * DOC: sdma_phase_quantum (uint) + * Override SDMA context switch phase quantum (x 1K GPU clock cycles, 0 = no change). The default is 32. + */  MODULE_PARM_DESC(sdma_phase_quantum, "SDMA context switch phase quantum (x 1K GPU clock cycles, 0 = no change (default 32))");  module_param_named(sdma_phase_quantum, amdgpu_sdma_phase_quantum, uint, 0444); +/** + * DOC: disable_cu (charp) + * Set to disable CUs (It's set like se.sh.cu,...). The default is NULL. + */  MODULE_PARM_DESC(disable_cu, "Disable CUs (se.sh.cu,...)");  module_param_named(disable_cu, amdgpu_disable_cu, charp, 0444); +/** + * DOC: virtual_display (charp) + * Set to enable virtual display feature. This feature provides a virtual display hardware on headless boards + * or in virtualized environments. It will be set like xxxx:xx:xx.x,x;xxxx:xx:xx.x,x. It's the pci address of + * the device, plus the number of crtcs to expose. E.g., 0000:26:00.0,4 would enable 4 virtual crtcs on the pci + * device at 26:00.0. The default is NULL. + */  MODULE_PARM_DESC(virtual_display,  		 "Enable virtual display feature (the virtual_display will be set like xxxx:xx:xx.x,x;xxxx:xx:xx.x,x)");  module_param_named(virtual_display, amdgpu_virtual_display, charp, 0444); +/** + * DOC: ngg (int) + * Set to enable Next Generation Graphics (1 = enable). The default is 0 (disabled). + */  MODULE_PARM_DESC(ngg, "Next Generation Graphics (1 = enable, 0 = disable(default depending on gfx))");  module_param_named(ngg, amdgpu_ngg, int, 0444); +/** + * DOC: prim_buf_per_se (int) + * Override the size of Primitive Buffer per Shader Engine in Byte. The default is 0 (depending on gfx). + */  MODULE_PARM_DESC(prim_buf_per_se, "the size of Primitive Buffer per Shader Engine (default depending on gfx)");  module_param_named(prim_buf_per_se, amdgpu_prim_buf_per_se, int, 0444); +/** + * DOC: pos_buf_per_se (int) + * Override the size of Position Buffer per Shader Engine in Byte. The default is 0 (depending on gfx). + */  MODULE_PARM_DESC(pos_buf_per_se, "the size of Position Buffer per Shader Engine (default depending on gfx)");  module_param_named(pos_buf_per_se, amdgpu_pos_buf_per_se, int, 0444); +/** + * DOC: cntl_sb_buf_per_se (int) + * Override the size of Control Sideband per Shader Engine in Byte. The default is 0 (depending on gfx). + */  MODULE_PARM_DESC(cntl_sb_buf_per_se, "the size of Control Sideband per Shader Engine (default depending on gfx)");  module_param_named(cntl_sb_buf_per_se, amdgpu_cntl_sb_buf_per_se, int, 0444); +/** + * DOC: param_buf_per_se (int) + * Override the size of Off-Chip Pramater Cache per Shader Engine in Byte. The default is 0 (depending on gfx). + */  MODULE_PARM_DESC(param_buf_per_se, "the size of Off-Chip Pramater Cache per Shader Engine (default depending on gfx)");  module_param_named(param_buf_per_se, amdgpu_param_buf_per_se, int, 0444); +/** + * DOC: job_hang_limit (int) + * Set how much time allow a job hang and not drop it. The default is 0. + */  MODULE_PARM_DESC(job_hang_limit, "how much time allow a job hang and not drop it (default 0)");  module_param_named(job_hang_limit, amdgpu_job_hang_limit, int ,0444); +/** + * DOC: lbpw (int) + * Override Load Balancing Per Watt (LBPW) support (1 = enable, 0 = disable). The default is -1 (auto, enabled). + */  MODULE_PARM_DESC(lbpw, "Load Balancing Per Watt (LBPW) support (1 = enable, 0 = disable, -1 = auto)");  module_param_named(lbpw, amdgpu_lbpw, int, 0444);  MODULE_PARM_DESC(compute_multipipe, "Force compute queues to be spread across pipes (1 = enable, 0 = disable, -1 = auto)");  module_param_named(compute_multipipe, amdgpu_compute_multipipe, int, 0444); +/** + * DOC: gpu_recovery (int) + * Set to enable GPU recovery mechanism (1 = enable, 0 = disable). The default is -1 (auto, disabled except SRIOV). + */  MODULE_PARM_DESC(gpu_recovery, "Enable GPU recovery mechanism, (1 = enable, 0 = disable, -1 = auto)");  module_param_named(gpu_recovery, amdgpu_gpu_recovery, int, 0444); +/** + * DOC: emu_mode (int) + * Set value 1 to enable emulation mode. This is only needed when running on an emulator. The default is 0 (disabled). + */  MODULE_PARM_DESC(emu_mode, "Emulation mode, (1 = enable, 0 = disable)");  module_param_named(emu_mode, amdgpu_emu_mode, int, 0444); +/** + * DOC: si_support (int) + * Set SI support driver. This parameter works after set config CONFIG_DRM_AMDGPU_SI. For SI asic, when radeon driver is enabled, + * set value 0 to use radeon driver, while set value 1 to use amdgpu driver. The default is using radeon driver when it available, + * otherwise using amdgpu driver. + */  #ifdef CONFIG_DRM_AMDGPU_SI  #if defined(CONFIG_DRM_RADEON) || defined(CONFIG_DRM_RADEON_MODULE) @@ -305,6 +502,12 @@ MODULE_PARM_DESC(si_support, "SI support (1 = enabled (default), 0 = disabled)")  module_param_named(si_support, amdgpu_si_support, int, 0444);  #endif +/** + * DOC: cik_support (int) + * Set CIK support driver. This parameter works after set config CONFIG_DRM_AMDGPU_CIK. For CIK asic, when radeon driver is enabled, + * set value 0 to use radeon driver, while set value 1 to use amdgpu driver. The default is using radeon driver when it available, + * otherwise using amdgpu driver. + */  #ifdef CONFIG_DRM_AMDGPU_CIK  #if defined(CONFIG_DRM_RADEON) || defined(CONFIG_DRM_RADEON_MODULE) @@ -318,6 +521,11 @@ MODULE_PARM_DESC(cik_support, "CIK support (1 = enabled (default), 0 = disabled)  module_param_named(cik_support, amdgpu_cik_support, int, 0444);  #endif +/** + * DOC: smu_memory_pool_size (uint) + * It is used to reserve gtt for smu debug usage, setting value 0 to disable it. The actual size is value * 256MiB. + * E.g. 0x1 = 256Mbyte, 0x2 = 512Mbyte, 0x4 = 1 Gbyte, 0x8 = 2GByte. The default is 0 (disabled). + */  MODULE_PARM_DESC(smu_memory_pool_size,  	"reserve gtt for smu debug usage, 0 = disable,"  		"0x1 = 256Mbyte, 0x2 = 512Mbyte, 0x4 = 1 Gbyte, 0x8 = 2GByte"); @@ -664,7 +872,7 @@ retry_init:  err_pci:  	pci_disable_device(pdev);  err_free: -	drm_dev_unref(dev); +	drm_dev_put(dev);  	return ret;  } @@ -674,7 +882,7 @@ amdgpu_pci_remove(struct pci_dev *pdev)  	struct drm_device *dev = pci_get_drvdata(pdev);  	drm_dev_unregister(dev); -	drm_dev_unref(dev); +	drm_dev_put(dev);  	pci_disable_device(pdev);  	pci_set_drvdata(pdev, NULL);  } @@ -855,9 +1063,21 @@ static const struct dev_pm_ops amdgpu_pm_ops = {  	.runtime_idle = amdgpu_pmops_runtime_idle,  }; +static int amdgpu_flush(struct file *f, fl_owner_t id) +{ +	struct drm_file *file_priv = f->private_data; +	struct amdgpu_fpriv *fpriv = file_priv->driver_priv; + +	amdgpu_ctx_mgr_entity_flush(&fpriv->ctx_mgr); + +	return 0; +} + +  static const struct file_operations amdgpu_driver_kms_fops = {  	.owner = THIS_MODULE,  	.open = drm_open, +	.flush = amdgpu_flush,  	.release = drm_release,  	.unlocked_ioctl = amdgpu_drm_ioctl,  	.mmap = amdgpu_mmap, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_encoders.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_encoders.c index 94138abe093b..ae8fac34f7a5 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_encoders.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_encoders.c @@ -46,7 +46,7 @@ amdgpu_link_encoder_connector(struct drm_device *dev)  		list_for_each_entry(encoder, &dev->mode_config.encoder_list, head) {  			amdgpu_encoder = to_amdgpu_encoder(encoder);  			if (amdgpu_encoder->devices & amdgpu_connector->devices) { -				drm_mode_connector_attach_encoder(connector, encoder); +				drm_connector_attach_encoder(connector, encoder);  				if (amdgpu_encoder->devices & (ATOM_DEVICE_LCD_SUPPORT)) {  					amdgpu_atombios_encoder_init_backlight(amdgpu_encoder, connector);  					adev->mode_info.bl_encoder = amdgpu_encoder; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c index bc5fd8ebab5d..69c5d22f29bd 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c @@ -146,7 +146,7 @@ static int amdgpufb_create_pinned_object(struct amdgpu_fbdev *rfbdev,  				       AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED |  				       AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS |  				       AMDGPU_GEM_CREATE_VRAM_CLEARED, -				       true, NULL, &gobj); +				       ttm_bo_type_kernel, NULL, &gobj);  	if (ret) {  		pr_err("failed to allocate framebuffer (%d)\n", aligned_size);  		return -ENOMEM; @@ -168,11 +168,19 @@ static int amdgpufb_create_pinned_object(struct amdgpu_fbdev *rfbdev,  	} -	ret = amdgpu_bo_pin(abo, domain, NULL); +	ret = amdgpu_bo_pin(abo, domain);  	if (ret) {  		amdgpu_bo_unreserve(abo);  		goto out_unref;  	} + +	ret = amdgpu_ttm_alloc_gart(&abo->tbo); +	if (ret) { +		amdgpu_bo_unreserve(abo); +		dev_err(adev->dev, "%p bind failed\n", abo); +		goto out_unref; +	} +  	ret = amdgpu_bo_kmap(abo, NULL);  	amdgpu_bo_unreserve(abo);  	if (ret) { @@ -365,8 +373,8 @@ void amdgpu_fbdev_fini(struct amdgpu_device *adev)  void amdgpu_fbdev_set_suspend(struct amdgpu_device *adev, int state)  {  	if (adev->mode_info.rfbdev) -		drm_fb_helper_set_suspend(&adev->mode_info.rfbdev->helper, -			state); +		drm_fb_helper_set_suspend_unlocked(&adev->mode_info.rfbdev->helper, +						   state);  }  int amdgpu_fbdev_total_size(struct amdgpu_device *adev) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c index e74d620d9699..7056925eb386 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c @@ -646,7 +646,6 @@ static const struct dma_fence_ops amdgpu_fence_ops = {  	.get_driver_name = amdgpu_fence_get_driver_name,  	.get_timeline_name = amdgpu_fence_get_timeline_name,  	.enable_signaling = amdgpu_fence_enable_signaling, -	.wait = dma_fence_default_wait,  	.release = amdgpu_fence_release,  }; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c index dd11b7313ca0..a54d5655a191 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c @@ -143,14 +143,12 @@ int amdgpu_gart_table_vram_alloc(struct amdgpu_device *adev)   */  int amdgpu_gart_table_vram_pin(struct amdgpu_device *adev)  { -	uint64_t gpu_addr;  	int r;  	r = amdgpu_bo_reserve(adev->gart.robj, false);  	if (unlikely(r != 0))  		return r; -	r = amdgpu_bo_pin(adev->gart.robj, -				AMDGPU_GEM_DOMAIN_VRAM, &gpu_addr); +	r = amdgpu_bo_pin(adev->gart.robj, AMDGPU_GEM_DOMAIN_VRAM);  	if (r) {  		amdgpu_bo_unreserve(adev->gart.robj);  		return r; @@ -159,7 +157,7 @@ int amdgpu_gart_table_vram_pin(struct amdgpu_device *adev)  	if (r)  		amdgpu_bo_unpin(adev->gart.robj);  	amdgpu_bo_unreserve(adev->gart.robj); -	adev->gart.table_addr = gpu_addr; +	adev->gart.table_addr = amdgpu_bo_gpu_offset(adev->gart.robj);  	return r;  } @@ -234,7 +232,7 @@ int amdgpu_gart_unbind(struct amdgpu_device *adev, uint64_t offset,  	}  	t = offset / AMDGPU_GPU_PAGE_SIZE; -	p = t / (PAGE_SIZE / AMDGPU_GPU_PAGE_SIZE); +	p = t / AMDGPU_GPU_PAGES_IN_CPU_PAGE;  	for (i = 0; i < pages; i++, p++) {  #ifdef CONFIG_DRM_AMDGPU_GART_DEBUGFS  		adev->gart.pages[p] = NULL; @@ -243,7 +241,7 @@ int amdgpu_gart_unbind(struct amdgpu_device *adev, uint64_t offset,  		if (!adev->gart.ptr)  			continue; -		for (j = 0; j < (PAGE_SIZE / AMDGPU_GPU_PAGE_SIZE); j++, t++) { +		for (j = 0; j < AMDGPU_GPU_PAGES_IN_CPU_PAGE; j++, t++) {  			amdgpu_gmc_set_pte_pde(adev, adev->gart.ptr,  					       t, page_base, flags);  			page_base += AMDGPU_GPU_PAGE_SIZE; @@ -282,7 +280,7 @@ int amdgpu_gart_map(struct amdgpu_device *adev, uint64_t offset,  	for (i = 0; i < pages; i++) {  		page_base = dma_addr[i]; -		for (j = 0; j < (PAGE_SIZE / AMDGPU_GPU_PAGE_SIZE); j++, t++) { +		for (j = 0; j < AMDGPU_GPU_PAGES_IN_CPU_PAGE; j++, t++) {  			amdgpu_gmc_set_pte_pde(adev, dst, t, page_base, flags);  			page_base += AMDGPU_GPU_PAGE_SIZE;  		} @@ -319,7 +317,7 @@ int amdgpu_gart_bind(struct amdgpu_device *adev, uint64_t offset,  #ifdef CONFIG_DRM_AMDGPU_GART_DEBUGFS  	t = offset / AMDGPU_GPU_PAGE_SIZE; -	p = t / (PAGE_SIZE / AMDGPU_GPU_PAGE_SIZE); +	p = t / AMDGPU_GPU_PAGES_IN_CPU_PAGE;  	for (i = 0; i < pages; i++, p++)  		adev->gart.pages[p] = pagelist ? pagelist[i] : NULL;  #endif diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.h index 456295c00291..9f9e9dc87da1 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.h @@ -37,6 +37,8 @@ struct amdgpu_bo;  #define AMDGPU_GPU_PAGE_SHIFT 12  #define AMDGPU_GPU_PAGE_ALIGN(a) (((a) + AMDGPU_GPU_PAGE_MASK) & ~AMDGPU_GPU_PAGE_MASK) +#define AMDGPU_GPU_PAGES_IN_CPU_PAGE (PAGE_SIZE / AMDGPU_GPU_PAGE_SIZE) +  struct amdgpu_gart {  	u64				table_addr;  	struct amdgpu_bo		*robj; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c index 5fb156a01774..71792d820ae0 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c @@ -265,7 +265,7 @@ int amdgpu_gem_create_ioctl(struct drm_device *dev, void *data,  	r = amdgpu_gem_object_create(adev, size, args->in.alignment,  				     (u32)(0xffffffff & args->in.domains), -				     flags, false, resv, &gobj); +				     flags, ttm_bo_type_device, resv, &gobj);  	if (flags & AMDGPU_GEM_CREATE_VM_ALWAYS_VALID) {  		if (!r) {  			struct amdgpu_bo *abo = gem_to_amdgpu_bo(gobj); @@ -317,7 +317,7 @@ int amdgpu_gem_userptr_ioctl(struct drm_device *dev, void *data,  	/* create a gem object to contain this object in */  	r = amdgpu_gem_object_create(adev, args->size, 0, AMDGPU_GEM_DOMAIN_CPU, -				     0, 0, NULL, &gobj); +				     0, ttm_bo_type_device, NULL, &gobj);  	if (r)  		return r; @@ -344,7 +344,7 @@ int amdgpu_gem_userptr_ioctl(struct drm_device *dev, void *data,  		if (r)  			goto free_pages; -		amdgpu_ttm_placement_from_domain(bo, AMDGPU_GEM_DOMAIN_GTT); +		amdgpu_bo_placement_from_domain(bo, AMDGPU_GEM_DOMAIN_GTT);  		r = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);  		amdgpu_bo_unreserve(bo);  		if (r) @@ -510,7 +510,6 @@ out:   * @adev: amdgpu_device pointer   * @vm: vm to update   * @bo_va: bo_va to update - * @list: validation list   * @operation: map, unmap or clear   *   * Update the bo_va directly after setting its address. Errors are not @@ -519,7 +518,6 @@ out:  static void amdgpu_gem_va_update_vm(struct amdgpu_device *adev,  				    struct amdgpu_vm *vm,  				    struct amdgpu_bo_va *bo_va, -				    struct list_head *list,  				    uint32_t operation)  {  	int r; @@ -612,7 +610,7 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev, void *data,  			return -ENOENT;  		abo = gem_to_amdgpu_bo(gobj);  		tv.bo = &abo->tbo; -		tv.shared = false; +		tv.shared = !!(abo->flags & AMDGPU_GEM_CREATE_VM_ALWAYS_VALID);  		list_add(&tv.head, &list);  	} else {  		gobj = NULL; @@ -673,7 +671,7 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev, void *data,  		break;  	}  	if (!r && !(args->flags & AMDGPU_VM_DELAY_UPDATE) && !amdgpu_vm_debug) -		amdgpu_gem_va_update_vm(adev, &fpriv->vm, bo_va, &list, +		amdgpu_gem_va_update_vm(adev, &fpriv->vm, bo_va,  					args->operation);  error_backoff: @@ -768,7 +766,7 @@ int amdgpu_mode_dumb_create(struct drm_file *file_priv,  				amdgpu_display_supported_domains(adev));  	r = amdgpu_gem_object_create(adev, args->size, 0, domain,  				     AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED, -				     false, NULL, &gobj); +				     ttm_bo_type_device, NULL, &gobj);  	if (r)  		return -ENOMEM; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h index 893c2490b783..bb5a47a45790 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h @@ -105,8 +105,25 @@ struct amdgpu_gmc {  	/* protects concurrent invalidation */  	spinlock_t		invalidate_lock;  	bool			translate_further; +	struct kfd_vm_fault_info *vm_fault_info; +	atomic_t		vm_fault_info_updated;  	const struct amdgpu_gmc_funcs	*gmc_funcs;  }; +/** + * amdgpu_gmc_vram_full_visible - Check if full VRAM is visible through the BAR + * + * @adev: amdgpu_device pointer + * + * Returns: + * True if full VRAM is visible through the BAR + */ +static inline bool amdgpu_gmc_vram_full_visible(struct amdgpu_gmc *gmc) +{ +	WARN_ON(gmc->real_vram_size < gmc->visible_vram_size); + +	return (gmc->real_vram_size == gmc->visible_vram_size); +} +  #endif diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c index 7aaa263ad8c7..5518e623fed2 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c @@ -139,7 +139,7 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs,  	/* ring tests don't use a job */  	if (job) {  		vm = job->vm; -		fence_ctx = job->fence_ctx; +		fence_ctx = job->base.s_fence->scheduled.context;  	} else {  		vm = NULL;  		fence_ctx = 0; @@ -353,7 +353,8 @@ int amdgpu_ib_ring_tests(struct amdgpu_device *adev)  			ring->funcs->type == AMDGPU_RING_TYPE_VCE ||  			ring->funcs->type == AMDGPU_RING_TYPE_UVD_ENC ||  			ring->funcs->type == AMDGPU_RING_TYPE_VCN_DEC || -			ring->funcs->type == AMDGPU_RING_TYPE_VCN_ENC) +			ring->funcs->type == AMDGPU_RING_TYPE_VCN_ENC || +			ring->funcs->type == AMDGPU_RING_TYPE_VCN_JPEG)  			tmo = tmo_mm;  		else  			tmo = tmo_gfx; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c index a1c78f90eadf..3a072a7a39f0 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c @@ -578,11 +578,6 @@ void amdgpu_vmid_mgr_init(struct amdgpu_device *adev)  			list_add_tail(&id_mgr->ids[j].list, &id_mgr->ids_lru);  		}  	} - -	adev->vm_manager.fence_context = -		dma_fence_context_alloc(AMDGPU_MAX_RINGS); -	for (i = 0; i < AMDGPU_MAX_RINGS; ++i) -		adev->vm_manager.seqno[i] = 0;  }  /** diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c index 3a5ca462abf0..1abf5b5bac9e 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c @@ -25,6 +25,23 @@   *          Alex Deucher   *          Jerome Glisse   */ + +/** + * DOC: Interrupt Handling + * + * Interrupts generated within GPU hardware raise interrupt requests that are + * passed to amdgpu IRQ handler which is responsible for detecting source and + * type of the interrupt and dispatching matching handlers. If handling an + * interrupt requires calling kernel functions that may sleep processing is + * dispatched to work handlers. + * + * If MSI functionality is not disabled by module parameter then MSI + * support will be enabled. + * + * For GPU interrupt sources that may be driven by another driver, IRQ domain + * support is used (with mapping between virtual and hardware IRQs). + */ +  #include <linux/irq.h>  #include <drm/drmP.h>  #include <drm/drm_crtc_helper.h> @@ -43,19 +60,21 @@  #define AMDGPU_WAIT_IDLE_TIMEOUT 200 -/* - * Handle hotplug events outside the interrupt handler proper. - */  /** - * amdgpu_hotplug_work_func - display hotplug work handler + * amdgpu_hotplug_work_func - work handler for display hotplug event   * - * @work: work struct + * @work: work struct pointer   * - * This is the hot plug event work handler (all asics). - * The work gets scheduled from the irq handler if there - * was a hot plug interrupt.  It walks the connector table - * and calls the hotplug handler for each one, then sends - * a drm hotplug event to alert userspace. + * This is the hotplug event work handler (all ASICs). + * The work gets scheduled from the IRQ handler if there + * was a hotplug interrupt.  It walks through the connector table + * and calls hotplug handler for each connector. After this, it sends + * a DRM hotplug event to alert userspace. + * + * This design approach is required in order to defer hotplug event handling + * from the IRQ handler to a work handler because hotplug handler has to use + * mutexes which cannot be locked in an IRQ handler (since &mutex_lock may + * sleep).   */  static void amdgpu_hotplug_work_func(struct work_struct *work)  { @@ -74,13 +93,12 @@ static void amdgpu_hotplug_work_func(struct work_struct *work)  }  /** - * amdgpu_irq_reset_work_func - execute gpu reset + * amdgpu_irq_reset_work_func - execute GPU reset   * - * @work: work struct + * @work: work struct pointer   * - * Execute scheduled gpu reset (cayman+). - * This function is called when the irq handler - * thinks we need a gpu reset. + * Execute scheduled GPU reset (Cayman+). + * This function is called when the IRQ handler thinks we need a GPU reset.   */  static void amdgpu_irq_reset_work_func(struct work_struct *work)  { @@ -91,7 +109,13 @@ static void amdgpu_irq_reset_work_func(struct work_struct *work)  		amdgpu_device_gpu_recover(adev, NULL, false);  } -/* Disable *all* interrupts */ +/** + * amdgpu_irq_disable_all - disable *all* interrupts + * + * @adev: amdgpu device pointer + * + * Disable all types of interrupts from all sources. + */  void amdgpu_irq_disable_all(struct amdgpu_device *adev)  {  	unsigned long irqflags; @@ -123,11 +147,15 @@ void amdgpu_irq_disable_all(struct amdgpu_device *adev)  }  /** - * amdgpu_irq_handler - irq handler + * amdgpu_irq_handler - IRQ handler + * + * @irq: IRQ number (unused) + * @arg: pointer to DRM device   * - * @int irq, void *arg: args + * IRQ handler for amdgpu driver (all ASICs).   * - * This is the irq handler for the amdgpu driver (all asics). + * Returns: + * result of handling the IRQ, as defined by &irqreturn_t   */  irqreturn_t amdgpu_irq_handler(int irq, void *arg)  { @@ -142,18 +170,18 @@ irqreturn_t amdgpu_irq_handler(int irq, void *arg)  }  /** - * amdgpu_msi_ok - asic specific msi checks + * amdgpu_msi_ok - check whether MSI functionality is enabled   * - * @adev: amdgpu device pointer + * @adev: amdgpu device pointer (unused) + * + * Checks whether MSI functionality has been disabled via module parameter + * (all ASICs).   * - * Handles asic specific MSI checks to determine if - * MSIs should be enabled on a particular chip (all asics). - * Returns true if MSIs should be enabled, false if MSIs - * should not be enabled. + * Returns: + * *true* if MSIs are allowed to be enabled or *false* otherwise   */  static bool amdgpu_msi_ok(struct amdgpu_device *adev)  { -	/* force MSI on */  	if (amdgpu_msi == 1)  		return true;  	else if (amdgpu_msi == 0) @@ -163,12 +191,15 @@ static bool amdgpu_msi_ok(struct amdgpu_device *adev)  }  /** - * amdgpu_irq_init - init driver interrupt info + * amdgpu_irq_init - initialize interrupt handling   *   * @adev: amdgpu device pointer   * - * Sets up the work irq handlers, vblank init, MSIs, etc. (all asics). - * Returns 0 for success, error for failure. + * Sets up work functions for hotplug and reset interrupts, enables MSI + * functionality, initializes vblank, hotplug and reset interrupt handling. + * + * Returns: + * 0 on success or error code on failure   */  int amdgpu_irq_init(struct amdgpu_device *adev)  { @@ -176,7 +207,7 @@ int amdgpu_irq_init(struct amdgpu_device *adev)  	spin_lock_init(&adev->irq.lock); -	/* enable msi */ +	/* Enable MSI if not disabled by module parameter */  	adev->irq.msi_enabled = false;  	if (amdgpu_msi_ok(adev)) { @@ -189,7 +220,7 @@ int amdgpu_irq_init(struct amdgpu_device *adev)  	if (!amdgpu_device_has_dc_support(adev)) {  		if (!adev->enable_virtual_display) -			/* Disable vblank irqs aggressively for power-saving */ +			/* Disable vblank IRQs aggressively for power-saving */  			/* XXX: can this be enabled for DC? */  			adev->ddev->vblank_disable_immediate = true; @@ -197,7 +228,7 @@ int amdgpu_irq_init(struct amdgpu_device *adev)  		if (r)  			return r; -		/* pre DCE11 */ +		/* Pre-DCE11 */  		INIT_WORK(&adev->hotplug_work,  				amdgpu_hotplug_work_func);  	} @@ -220,11 +251,13 @@ int amdgpu_irq_init(struct amdgpu_device *adev)  }  /** - * amdgpu_irq_fini - tear down driver interrupt info + * amdgpu_irq_fini - shut down interrupt handling   *   * @adev: amdgpu device pointer   * - * Tears down the work irq handlers, vblank handlers, MSIs, etc. (all asics). + * Tears down work functions for hotplug and reset interrupts, disables MSI + * functionality, shuts down vblank, hotplug and reset interrupt handling, + * turns off interrupts from all sources (all ASICs).   */  void amdgpu_irq_fini(struct amdgpu_device *adev)  { @@ -264,12 +297,17 @@ void amdgpu_irq_fini(struct amdgpu_device *adev)  }  /** - * amdgpu_irq_add_id - register irq source + * amdgpu_irq_add_id - register IRQ source   *   * @adev: amdgpu device pointer - * @src_id: source id for this source - * @source: irq source + * @client_id: client id + * @src_id: source id + * @source: IRQ source pointer + * + * Registers IRQ source on a client.   * + * Returns: + * 0 on success or error code otherwise   */  int amdgpu_irq_add_id(struct amdgpu_device *adev,  		      unsigned client_id, unsigned src_id, @@ -312,12 +350,12 @@ int amdgpu_irq_add_id(struct amdgpu_device *adev,  }  /** - * amdgpu_irq_dispatch - dispatch irq to IP blocks + * amdgpu_irq_dispatch - dispatch IRQ to IP blocks   *   * @adev: amdgpu device pointer - * @entry: interrupt vector + * @entry: interrupt vector pointer   * - * Dispatches the irq to the different IP blocks + * Dispatches IRQ to IP blocks.   */  void amdgpu_irq_dispatch(struct amdgpu_device *adev,  			 struct amdgpu_iv_entry *entry) @@ -361,13 +399,13 @@ void amdgpu_irq_dispatch(struct amdgpu_device *adev,  }  /** - * amdgpu_irq_update - update hw interrupt state + * amdgpu_irq_update - update hardware interrupt state   *   * @adev: amdgpu device pointer - * @src: interrupt src you want to enable - * @type: type of interrupt you want to update + * @src: interrupt source pointer + * @type: type of interrupt   * - * Updates the interrupt state for a specific src (all asics). + * Updates interrupt state for the specific source (all ASICs).   */  int amdgpu_irq_update(struct amdgpu_device *adev,  			     struct amdgpu_irq_src *src, unsigned type) @@ -378,7 +416,7 @@ int amdgpu_irq_update(struct amdgpu_device *adev,  	spin_lock_irqsave(&adev->irq.lock, irqflags); -	/* we need to determine after taking the lock, otherwise +	/* We need to determine after taking the lock, otherwise  	   we might disable just enabled interrupts again */  	if (amdgpu_irq_enabled(adev, src, type))  		state = AMDGPU_IRQ_STATE_ENABLE; @@ -390,6 +428,14 @@ int amdgpu_irq_update(struct amdgpu_device *adev,  	return r;  } +/** + * amdgpu_irq_gpu_reset_resume_helper - update interrupt states on all sources + * + * @adev: amdgpu device pointer + * + * Updates state of all types of interrupts on all sources on resume after + * reset. + */  void amdgpu_irq_gpu_reset_resume_helper(struct amdgpu_device *adev)  {  	int i, j, k; @@ -413,10 +459,13 @@ void amdgpu_irq_gpu_reset_resume_helper(struct amdgpu_device *adev)   * amdgpu_irq_get - enable interrupt   *   * @adev: amdgpu device pointer - * @src: interrupt src you want to enable - * @type: type of interrupt you want to enable + * @src: interrupt source pointer + * @type: type of interrupt   * - * Enables the interrupt type for a specific src (all asics). + * Enables specified type of interrupt on the specified source (all ASICs). + * + * Returns: + * 0 on success or error code otherwise   */  int amdgpu_irq_get(struct amdgpu_device *adev, struct amdgpu_irq_src *src,  		   unsigned type) @@ -440,10 +489,13 @@ int amdgpu_irq_get(struct amdgpu_device *adev, struct amdgpu_irq_src *src,   * amdgpu_irq_put - disable interrupt   *   * @adev: amdgpu device pointer - * @src: interrupt src you want to disable - * @type: type of interrupt you want to disable + * @src: interrupt source pointer + * @type: type of interrupt + * + * Enables specified type of interrupt on the specified source (all ASICs).   * - * Disables the interrupt type for a specific src (all asics). + * Returns: + * 0 on success or error code otherwise   */  int amdgpu_irq_put(struct amdgpu_device *adev, struct amdgpu_irq_src *src,  		   unsigned type) @@ -464,12 +516,17 @@ int amdgpu_irq_put(struct amdgpu_device *adev, struct amdgpu_irq_src *src,  }  /** - * amdgpu_irq_enabled - test if irq is enabled or not + * amdgpu_irq_enabled - check whether interrupt is enabled or not   *   * @adev: amdgpu device pointer - * @idx: interrupt src you want to test + * @src: interrupt source pointer + * @type: type of interrupt   * - * Tests if the given interrupt source is enabled or not + * Checks whether the given type of interrupt is enabled on the given source. + * + * Returns: + * *true* if interrupt is enabled, *false* if interrupt is disabled or on + * invalid parameters   */  bool amdgpu_irq_enabled(struct amdgpu_device *adev, struct amdgpu_irq_src *src,  			unsigned type) @@ -486,7 +543,7 @@ bool amdgpu_irq_enabled(struct amdgpu_device *adev, struct amdgpu_irq_src *src,  	return !!atomic_read(&src->enabled_types[type]);  } -/* gen irq */ +/* XXX: Generic IRQ handling */  static void amdgpu_irq_mask(struct irq_data *irqd)  {  	/* XXX */ @@ -497,12 +554,26 @@ static void amdgpu_irq_unmask(struct irq_data *irqd)  	/* XXX */  } +/* amdgpu hardware interrupt chip descriptor */  static struct irq_chip amdgpu_irq_chip = {  	.name = "amdgpu-ih",  	.irq_mask = amdgpu_irq_mask,  	.irq_unmask = amdgpu_irq_unmask,  }; +/** + * amdgpu_irqdomain_map - create mapping between virtual and hardware IRQ numbers + * + * @d: amdgpu IRQ domain pointer (unused) + * @irq: virtual IRQ number + * @hwirq: hardware irq number + * + * Current implementation assigns simple interrupt handler to the given virtual + * IRQ. + * + * Returns: + * 0 on success or error code otherwise + */  static int amdgpu_irqdomain_map(struct irq_domain *d,  				unsigned int irq, irq_hw_number_t hwirq)  { @@ -514,17 +585,21 @@ static int amdgpu_irqdomain_map(struct irq_domain *d,  	return 0;  } +/* Implementation of methods for amdgpu IRQ domain */  static const struct irq_domain_ops amdgpu_hw_irqdomain_ops = {  	.map = amdgpu_irqdomain_map,  };  /** - * amdgpu_irq_add_domain - create a linear irq domain + * amdgpu_irq_add_domain - create a linear IRQ domain   *   * @adev: amdgpu device pointer   * - * Create an irq domain for GPU interrupt sources + * Creates an IRQ domain for GPU interrupt sources   * that may be driven by another driver (e.g., ACP). + * + * Returns: + * 0 on success or error code otherwise   */  int amdgpu_irq_add_domain(struct amdgpu_device *adev)  { @@ -539,11 +614,11 @@ int amdgpu_irq_add_domain(struct amdgpu_device *adev)  }  /** - * amdgpu_irq_remove_domain - remove the irq domain + * amdgpu_irq_remove_domain - remove the IRQ domain   *   * @adev: amdgpu device pointer   * - * Remove the irq domain for GPU interrupt sources + * Removes the IRQ domain for GPU interrupt sources   * that may be driven by another driver (e.g., ACP).   */  void amdgpu_irq_remove_domain(struct amdgpu_device *adev) @@ -555,16 +630,17 @@ void amdgpu_irq_remove_domain(struct amdgpu_device *adev)  }  /** - * amdgpu_irq_create_mapping - create a mapping between a domain irq and a - *                             Linux irq + * amdgpu_irq_create_mapping - create mapping between domain Linux IRQs   *   * @adev: amdgpu device pointer   * @src_id: IH source id   * - * Create a mapping between a domain irq (GPU IH src id) and a Linux irq + * Creates mapping between a domain IRQ (GPU IH src id) and a Linux IRQ   * Use this for components that generate a GPU interrupt, but are driven   * by a different driver (e.g., ACP). - * Returns the Linux irq. + * + * Returns: + * Linux IRQ   */  unsigned amdgpu_irq_create_mapping(struct amdgpu_device *adev, unsigned src_id)  { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c index 2bd56760c744..391e2f7c03aa 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c @@ -30,14 +30,14 @@  static void amdgpu_job_timedout(struct drm_sched_job *s_job)  { -	struct amdgpu_job *job = container_of(s_job, struct amdgpu_job, base); +	struct amdgpu_ring *ring = to_amdgpu_ring(s_job->sched); +	struct amdgpu_job *job = to_amdgpu_job(s_job); -	DRM_ERROR("ring %s timeout, last signaled seq=%u, last emitted seq=%u\n", -		  job->base.sched->name, -		  atomic_read(&job->ring->fence_drv.last_seq), -		  job->ring->fence_drv.sync_seq); +	DRM_ERROR("ring %s timeout, signaled seq=%u, emitted seq=%u\n", +		  job->base.sched->name, atomic_read(&ring->fence_drv.last_seq), +		  ring->fence_drv.sync_seq); -	amdgpu_device_gpu_recover(job->adev, job, false); +	amdgpu_device_gpu_recover(ring->adev, job, false);  }  int amdgpu_job_alloc(struct amdgpu_device *adev, unsigned num_ibs, @@ -54,7 +54,11 @@ int amdgpu_job_alloc(struct amdgpu_device *adev, unsigned num_ibs,  	if (!*job)  		return -ENOMEM; -	(*job)->adev = adev; +	/* +	 * Initialize the scheduler to at least some ring so that we always +	 * have a pointer to adev. +	 */ +	(*job)->base.sched = &adev->rings[0]->sched;  	(*job)->vm = vm;  	(*job)->ibs = (void *)&(*job)[1];  	(*job)->num_ibs = num_ibs; @@ -86,6 +90,7 @@ int amdgpu_job_alloc_with_ib(struct amdgpu_device *adev, unsigned size,  void amdgpu_job_free_resources(struct amdgpu_job *job)  { +	struct amdgpu_ring *ring = to_amdgpu_ring(job->base.sched);  	struct dma_fence *f;  	unsigned i; @@ -93,14 +98,15 @@ void amdgpu_job_free_resources(struct amdgpu_job *job)  	f = job->base.s_fence ? &job->base.s_fence->finished : job->fence;  	for (i = 0; i < job->num_ibs; ++i) -		amdgpu_ib_free(job->adev, &job->ibs[i], f); +		amdgpu_ib_free(ring->adev, &job->ibs[i], f);  }  static void amdgpu_job_free_cb(struct drm_sched_job *s_job)  { -	struct amdgpu_job *job = container_of(s_job, struct amdgpu_job, base); +	struct amdgpu_ring *ring = to_amdgpu_ring(s_job->sched); +	struct amdgpu_job *job = to_amdgpu_job(s_job); -	amdgpu_ring_priority_put(job->ring, s_job->s_priority); +	amdgpu_ring_priority_put(ring, s_job->s_priority);  	dma_fence_put(job->fence);  	amdgpu_sync_free(&job->sync);  	amdgpu_sync_free(&job->sched_sync); @@ -117,50 +123,68 @@ void amdgpu_job_free(struct amdgpu_job *job)  	kfree(job);  } -int amdgpu_job_submit(struct amdgpu_job *job, struct amdgpu_ring *ring, -		      struct drm_sched_entity *entity, void *owner, -		      struct dma_fence **f) +int amdgpu_job_submit(struct amdgpu_job *job, struct drm_sched_entity *entity, +		      void *owner, struct dma_fence **f)  { +	enum drm_sched_priority priority; +	struct amdgpu_ring *ring;  	int r; -	job->ring = ring;  	if (!f)  		return -EINVAL; -	r = drm_sched_job_init(&job->base, &ring->sched, entity, owner); +	r = drm_sched_job_init(&job->base, entity, owner);  	if (r)  		return r;  	job->owner = owner; -	job->fence_ctx = entity->fence_context;  	*f = dma_fence_get(&job->base.s_fence->finished);  	amdgpu_job_free_resources(job); -	amdgpu_ring_priority_get(job->ring, job->base.s_priority); +	priority = job->base.s_priority;  	drm_sched_entity_push_job(&job->base, entity); +	ring = to_amdgpu_ring(entity->rq->sched); +	amdgpu_ring_priority_get(ring, priority); + +	return 0; +} + +int amdgpu_job_submit_direct(struct amdgpu_job *job, struct amdgpu_ring *ring, +			     struct dma_fence **fence) +{ +	int r; + +	job->base.sched = &ring->sched; +	r = amdgpu_ib_schedule(ring, job->num_ibs, job->ibs, NULL, fence); +	job->fence = dma_fence_get(*fence); +	if (r) +		return r; + +	amdgpu_job_free(job);  	return 0;  }  static struct dma_fence *amdgpu_job_dependency(struct drm_sched_job *sched_job,  					       struct drm_sched_entity *s_entity)  { +	struct amdgpu_ring *ring = to_amdgpu_ring(s_entity->rq->sched);  	struct amdgpu_job *job = to_amdgpu_job(sched_job);  	struct amdgpu_vm *vm = job->vm; +	struct dma_fence *fence;  	bool explicit = false;  	int r; -	struct dma_fence *fence = amdgpu_sync_get_fence(&job->sync, &explicit); +	fence = amdgpu_sync_get_fence(&job->sync, &explicit);  	if (fence && explicit) {  		if (drm_sched_dependency_optimized(fence, s_entity)) { -			r = amdgpu_sync_fence(job->adev, &job->sched_sync, fence, false); +			r = amdgpu_sync_fence(ring->adev, &job->sched_sync, +					      fence, false);  			if (r) -				DRM_ERROR("Error adding fence to sync (%d)\n", r); +				DRM_ERROR("Error adding fence (%d)\n", r);  		}  	}  	while (fence == NULL && vm && !job->vmid) { -		struct amdgpu_ring *ring = job->ring; -  		r = amdgpu_vmid_grab(vm, ring, &job->sync,  				     &job->base.s_fence->finished,  				     job); @@ -175,30 +199,25 @@ static struct dma_fence *amdgpu_job_dependency(struct drm_sched_job *sched_job,  static struct dma_fence *amdgpu_job_run(struct drm_sched_job *sched_job)  { +	struct amdgpu_ring *ring = to_amdgpu_ring(sched_job->sched);  	struct dma_fence *fence = NULL, *finished; -	struct amdgpu_device *adev;  	struct amdgpu_job *job;  	int r; -	if (!sched_job) { -		DRM_ERROR("job is null\n"); -		return NULL; -	}  	job = to_amdgpu_job(sched_job);  	finished = &job->base.s_fence->finished; -	adev = job->adev;  	BUG_ON(amdgpu_sync_peek_fence(&job->sync, NULL));  	trace_amdgpu_sched_run_job(job); -	if (job->vram_lost_counter != atomic_read(&adev->vram_lost_counter)) +	if (job->vram_lost_counter != atomic_read(&ring->adev->vram_lost_counter))  		dma_fence_set_error(finished, -ECANCELED);/* skip IB as well if VRAM lost */  	if (finished->error < 0) {  		DRM_INFO("Skip scheduling IBs!\n");  	} else { -		r = amdgpu_ib_schedule(job->ring, job->num_ibs, job->ibs, job, +		r = amdgpu_ib_schedule(ring, job->num_ibs, job->ibs, job,  				       &fence);  		if (r)  			DRM_ERROR("Error scheduling IBs (%d)\n", r); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.h new file mode 100644 index 000000000000..57cfe78a262b --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.h @@ -0,0 +1,74 @@ +/* + * Copyright 2018 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ +#ifndef __AMDGPU_JOB_H__ +#define __AMDGPU_JOB_H__ + +/* bit set means command submit involves a preamble IB */ +#define AMDGPU_PREAMBLE_IB_PRESENT          (1 << 0) +/* bit set means preamble IB is first presented in belonging context */ +#define AMDGPU_PREAMBLE_IB_PRESENT_FIRST    (1 << 1) +/* bit set means context switch occured */ +#define AMDGPU_HAVE_CTX_SWITCH              (1 << 2) + +#define to_amdgpu_job(sched_job)		\ +		container_of((sched_job), struct amdgpu_job, base) + +struct amdgpu_fence; + +struct amdgpu_job { +	struct drm_sched_job    base; +	struct amdgpu_vm	*vm; +	struct amdgpu_sync	sync; +	struct amdgpu_sync	sched_sync; +	struct amdgpu_ib	*ibs; +	struct dma_fence	*fence; /* the hw fence */ +	uint32_t		preamble_status; +	uint32_t		num_ibs; +	void			*owner; +	bool                    vm_needs_flush; +	uint64_t		vm_pd_addr; +	unsigned		vmid; +	unsigned		pasid; +	uint32_t		gds_base, gds_size; +	uint32_t		gws_base, gws_size; +	uint32_t		oa_base, oa_size; +	uint32_t		vram_lost_counter; + +	/* user fence handling */ +	uint64_t		uf_addr; +	uint64_t		uf_sequence; + +}; + +int amdgpu_job_alloc(struct amdgpu_device *adev, unsigned num_ibs, +		     struct amdgpu_job **job, struct amdgpu_vm *vm); +int amdgpu_job_alloc_with_ib(struct amdgpu_device *adev, unsigned size, +			     struct amdgpu_job **job); + +void amdgpu_job_free_resources(struct amdgpu_job *job); +void amdgpu_job_free(struct amdgpu_job *job); +int amdgpu_job_submit(struct amdgpu_job *job, struct drm_sched_entity *entity, +		      void *owner, struct dma_fence **f); +int amdgpu_job_submit_direct(struct amdgpu_job *job, struct amdgpu_ring *ring, +			     struct dma_fence **fence); +#endif diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c index 91517b166a3b..bd98cc5fb97b 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c @@ -328,61 +328,71 @@ static int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file  		case AMDGPU_HW_IP_GFX:  			type = AMD_IP_BLOCK_TYPE_GFX;  			for (i = 0; i < adev->gfx.num_gfx_rings; i++) -				ring_mask |= ((adev->gfx.gfx_ring[i].ready ? 1 : 0) << i); -			ib_start_alignment = AMDGPU_GPU_PAGE_SIZE; -			ib_size_alignment = 8; +				ring_mask |= adev->gfx.gfx_ring[i].ready << i; +			ib_start_alignment = 32; +			ib_size_alignment = 32;  			break;  		case AMDGPU_HW_IP_COMPUTE:  			type = AMD_IP_BLOCK_TYPE_GFX;  			for (i = 0; i < adev->gfx.num_compute_rings; i++) -				ring_mask |= ((adev->gfx.compute_ring[i].ready ? 1 : 0) << i); -			ib_start_alignment = AMDGPU_GPU_PAGE_SIZE; -			ib_size_alignment = 8; +				ring_mask |= adev->gfx.compute_ring[i].ready << i; +			ib_start_alignment = 32; +			ib_size_alignment = 32;  			break;  		case AMDGPU_HW_IP_DMA:  			type = AMD_IP_BLOCK_TYPE_SDMA;  			for (i = 0; i < adev->sdma.num_instances; i++) -				ring_mask |= ((adev->sdma.instance[i].ring.ready ? 1 : 0) << i); -			ib_start_alignment = AMDGPU_GPU_PAGE_SIZE; -			ib_size_alignment = 1; +				ring_mask |= adev->sdma.instance[i].ring.ready << i; +			ib_start_alignment = 256; +			ib_size_alignment = 4;  			break;  		case AMDGPU_HW_IP_UVD:  			type = AMD_IP_BLOCK_TYPE_UVD; -			for (i = 0; i < adev->uvd.num_uvd_inst; i++) -				ring_mask |= ((adev->uvd.inst[i].ring.ready ? 1 : 0) << i); -			ib_start_alignment = AMDGPU_GPU_PAGE_SIZE; -			ib_size_alignment = 16; +			for (i = 0; i < adev->uvd.num_uvd_inst; i++) { +				if (adev->uvd.harvest_config & (1 << i)) +					continue; +				ring_mask |= adev->uvd.inst[i].ring.ready; +			} +			ib_start_alignment = 64; +			ib_size_alignment = 64;  			break;  		case AMDGPU_HW_IP_VCE:  			type = AMD_IP_BLOCK_TYPE_VCE;  			for (i = 0; i < adev->vce.num_rings; i++) -				ring_mask |= ((adev->vce.ring[i].ready ? 1 : 0) << i); -			ib_start_alignment = AMDGPU_GPU_PAGE_SIZE; +				ring_mask |= adev->vce.ring[i].ready << i; +			ib_start_alignment = 4;  			ib_size_alignment = 1;  			break;  		case AMDGPU_HW_IP_UVD_ENC:  			type = AMD_IP_BLOCK_TYPE_UVD; -			for (i = 0; i < adev->uvd.num_uvd_inst; i++) +			for (i = 0; i < adev->uvd.num_uvd_inst; i++) { +				if (adev->uvd.harvest_config & (1 << i)) +					continue;  				for (j = 0; j < adev->uvd.num_enc_rings; j++) -					ring_mask |= -					((adev->uvd.inst[i].ring_enc[j].ready ? 1 : 0) << -					(j + i * adev->uvd.num_enc_rings)); -			ib_start_alignment = AMDGPU_GPU_PAGE_SIZE; -			ib_size_alignment = 1; +					ring_mask |= adev->uvd.inst[i].ring_enc[j].ready << j; +			} +			ib_start_alignment = 64; +			ib_size_alignment = 64;  			break;  		case AMDGPU_HW_IP_VCN_DEC:  			type = AMD_IP_BLOCK_TYPE_VCN; -			ring_mask = adev->vcn.ring_dec.ready ? 1 : 0; -			ib_start_alignment = AMDGPU_GPU_PAGE_SIZE; +			ring_mask = adev->vcn.ring_dec.ready; +			ib_start_alignment = 16;  			ib_size_alignment = 16;  			break;  		case AMDGPU_HW_IP_VCN_ENC:  			type = AMD_IP_BLOCK_TYPE_VCN;  			for (i = 0; i < adev->vcn.num_enc_rings; i++) -				ring_mask |= ((adev->vcn.ring_enc[i].ready ? 1 : 0) << i); -			ib_start_alignment = AMDGPU_GPU_PAGE_SIZE; +				ring_mask |= adev->vcn.ring_enc[i].ready << i; +			ib_start_alignment = 64;  			ib_size_alignment = 1;  			break; +		case AMDGPU_HW_IP_VCN_JPEG: +			type = AMD_IP_BLOCK_TYPE_VCN; +			ring_mask = adev->vcn.ring_jpeg.ready; +			ib_start_alignment = 16; +			ib_size_alignment = 16; +			break;  		default:  			return -EINVAL;  		} @@ -427,6 +437,7 @@ static int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file  			break;  		case AMDGPU_HW_IP_VCN_DEC:  		case AMDGPU_HW_IP_VCN_ENC: +		case AMDGPU_HW_IP_VCN_JPEG:  			type = AMD_IP_BLOCK_TYPE_VCN;  			break;  		default: @@ -494,13 +505,13 @@ static int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file  	case AMDGPU_INFO_VRAM_GTT: {  		struct drm_amdgpu_info_vram_gtt vram_gtt; -		vram_gtt.vram_size = adev->gmc.real_vram_size; -		vram_gtt.vram_size -= adev->vram_pin_size; -		vram_gtt.vram_cpu_accessible_size = adev->gmc.visible_vram_size; -		vram_gtt.vram_cpu_accessible_size -= (adev->vram_pin_size - adev->invisible_pin_size); +		vram_gtt.vram_size = adev->gmc.real_vram_size - +			atomic64_read(&adev->vram_pin_size); +		vram_gtt.vram_cpu_accessible_size = adev->gmc.visible_vram_size - +			atomic64_read(&adev->visible_pin_size);  		vram_gtt.gtt_size = adev->mman.bdev.man[TTM_PL_TT].size;  		vram_gtt.gtt_size *= PAGE_SIZE; -		vram_gtt.gtt_size -= adev->gart_pin_size; +		vram_gtt.gtt_size -= atomic64_read(&adev->gart_pin_size);  		return copy_to_user(out, &vram_gtt,  				    min((size_t)size, sizeof(vram_gtt))) ? -EFAULT : 0;  	} @@ -509,17 +520,16 @@ static int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file  		memset(&mem, 0, sizeof(mem));  		mem.vram.total_heap_size = adev->gmc.real_vram_size; -		mem.vram.usable_heap_size = -			adev->gmc.real_vram_size - adev->vram_pin_size; +		mem.vram.usable_heap_size = adev->gmc.real_vram_size - +			atomic64_read(&adev->vram_pin_size);  		mem.vram.heap_usage =  			amdgpu_vram_mgr_usage(&adev->mman.bdev.man[TTM_PL_VRAM]);  		mem.vram.max_allocation = mem.vram.usable_heap_size * 3 / 4;  		mem.cpu_accessible_vram.total_heap_size =  			adev->gmc.visible_vram_size; -		mem.cpu_accessible_vram.usable_heap_size = -			adev->gmc.visible_vram_size - -			(adev->vram_pin_size - adev->invisible_pin_size); +		mem.cpu_accessible_vram.usable_heap_size = adev->gmc.visible_vram_size - +			atomic64_read(&adev->visible_pin_size);  		mem.cpu_accessible_vram.heap_usage =  			amdgpu_vram_mgr_vis_usage(&adev->mman.bdev.man[TTM_PL_VRAM]);  		mem.cpu_accessible_vram.max_allocation = @@ -527,8 +537,8 @@ static int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file  		mem.gtt.total_heap_size = adev->mman.bdev.man[TTM_PL_TT].size;  		mem.gtt.total_heap_size *= PAGE_SIZE; -		mem.gtt.usable_heap_size = mem.gtt.total_heap_size -			- adev->gart_pin_size; +		mem.gtt.usable_heap_size = mem.gtt.total_heap_size - +			atomic64_read(&adev->gart_pin_size);  		mem.gtt.heap_usage =  			amdgpu_gtt_mgr_usage(&adev->mman.bdev.man[TTM_PL_TT]);  		mem.gtt.max_allocation = mem.gtt.usable_heap_size * 3 / 4; @@ -930,7 +940,6 @@ void amdgpu_driver_postclose_kms(struct drm_device *dev,  		return;  	pm_runtime_get_sync(dev->dev); -	amdgpu_ctx_mgr_entity_fini(&fpriv->ctx_mgr);  	if (adev->asic_type != CHIP_RAVEN) {  		amdgpu_uvd_free_handles(adev, file_priv); @@ -958,7 +967,7 @@ void amdgpu_driver_postclose_kms(struct drm_device *dev,  	amdgpu_bo_unref(&pd);  	idr_for_each_entry(&fpriv->bo_list_handles, list, handle) -		amdgpu_bo_list_free(list); +		amdgpu_bo_list_put(list);  	idr_destroy(&fpriv->bo_list_handles);  	mutex_destroy(&fpriv->bo_list_lock); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c index 83e344fbb50a..a365ea2383d1 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c @@ -28,6 +28,21 @@   *    Christian König <[email protected]>   */ +/** + * DOC: MMU Notifier + * + * For coherent userptr handling registers an MMU notifier to inform the driver + * about updates on the page tables of a process. + * + * When somebody tries to invalidate the page tables we block the update until + * all operations on the pages in question are completed, then those pages are + * marked as accessed and also dirty if it wasn't a read only access. + * + * New command submissions using the userptrs in question are delayed until all + * page table invalidation are completed and we once more see a coherent process + * address space. + */ +  #include <linux/firmware.h>  #include <linux/module.h>  #include <linux/mmu_notifier.h> @@ -38,6 +53,22 @@  #include "amdgpu.h"  #include "amdgpu_amdkfd.h" +/** + * struct amdgpu_mn + * + * @adev: amdgpu device pointer + * @mm: process address space + * @mn: MMU notifier structure + * @type: type of MMU notifier + * @work: destruction work item + * @node: hash table node to find structure by adev and mn + * @lock: rw semaphore protecting the notifier nodes + * @objects: interval tree containing amdgpu_mn_nodes + * @read_lock: mutex for recursive locking of @lock + * @recursion: depth of recursion + * + * Data for each amdgpu device and process address space. + */  struct amdgpu_mn {  	/* constant after initialisation */  	struct amdgpu_device	*adev; @@ -58,13 +89,21 @@ struct amdgpu_mn {  	atomic_t		recursion;  }; +/** + * struct amdgpu_mn_node + * + * @it: interval node defining start-last of the affected address range + * @bos: list of all BOs in the affected address range + * + * Manages all BOs which are affected of a certain range of address space. + */  struct amdgpu_mn_node {  	struct interval_tree_node	it;  	struct list_head		bos;  };  /** - * amdgpu_mn_destroy - destroy the rmn + * amdgpu_mn_destroy - destroy the MMU notifier   *   * @work: previously sheduled work item   * @@ -72,47 +111,50 @@ struct amdgpu_mn_node {   */  static void amdgpu_mn_destroy(struct work_struct *work)  { -	struct amdgpu_mn *rmn = container_of(work, struct amdgpu_mn, work); -	struct amdgpu_device *adev = rmn->adev; +	struct amdgpu_mn *amn = container_of(work, struct amdgpu_mn, work); +	struct amdgpu_device *adev = amn->adev;  	struct amdgpu_mn_node *node, *next_node;  	struct amdgpu_bo *bo, *next_bo;  	mutex_lock(&adev->mn_lock); -	down_write(&rmn->lock); -	hash_del(&rmn->node); +	down_write(&amn->lock); +	hash_del(&amn->node);  	rbtree_postorder_for_each_entry_safe(node, next_node, -					     &rmn->objects.rb_root, it.rb) { +					     &amn->objects.rb_root, it.rb) {  		list_for_each_entry_safe(bo, next_bo, &node->bos, mn_list) {  			bo->mn = NULL;  			list_del_init(&bo->mn_list);  		}  		kfree(node);  	} -	up_write(&rmn->lock); +	up_write(&amn->lock);  	mutex_unlock(&adev->mn_lock); -	mmu_notifier_unregister_no_release(&rmn->mn, rmn->mm); -	kfree(rmn); +	mmu_notifier_unregister_no_release(&amn->mn, amn->mm); +	kfree(amn);  }  /**   * amdgpu_mn_release - callback to notify about mm destruction   *   * @mn: our notifier - * @mn: the mm this callback is about + * @mm: the mm this callback is about   *   * Shedule a work item to lazy destroy our notifier.   */  static void amdgpu_mn_release(struct mmu_notifier *mn,  			      struct mm_struct *mm)  { -	struct amdgpu_mn *rmn = container_of(mn, struct amdgpu_mn, mn); -	INIT_WORK(&rmn->work, amdgpu_mn_destroy); -	schedule_work(&rmn->work); +	struct amdgpu_mn *amn = container_of(mn, struct amdgpu_mn, mn); + +	INIT_WORK(&amn->work, amdgpu_mn_destroy); +	schedule_work(&amn->work);  }  /** - * amdgpu_mn_lock - take the write side lock for this mn + * amdgpu_mn_lock - take the write side lock for this notifier + * + * @mn: our notifier   */  void amdgpu_mn_lock(struct amdgpu_mn *mn)  { @@ -121,7 +163,9 @@ void amdgpu_mn_lock(struct amdgpu_mn *mn)  }  /** - * amdgpu_mn_unlock - drop the write side lock for this mn + * amdgpu_mn_unlock - drop the write side lock for this notifier + * + * @mn: our notifier   */  void amdgpu_mn_unlock(struct amdgpu_mn *mn)  { @@ -130,40 +174,38 @@ void amdgpu_mn_unlock(struct amdgpu_mn *mn)  }  /** - * amdgpu_mn_read_lock - take the rmn read lock - * - * @rmn: our notifier + * amdgpu_mn_read_lock - take the read side lock for this notifier   * - * Take the rmn read side lock. + * @amn: our notifier   */ -static void amdgpu_mn_read_lock(struct amdgpu_mn *rmn) +static void amdgpu_mn_read_lock(struct amdgpu_mn *amn)  { -	mutex_lock(&rmn->read_lock); -	if (atomic_inc_return(&rmn->recursion) == 1) -		down_read_non_owner(&rmn->lock); -	mutex_unlock(&rmn->read_lock); +	mutex_lock(&amn->read_lock); +	if (atomic_inc_return(&amn->recursion) == 1) +		down_read_non_owner(&amn->lock); +	mutex_unlock(&amn->read_lock);  }  /** - * amdgpu_mn_read_unlock - drop the rmn read lock + * amdgpu_mn_read_unlock - drop the read side lock for this notifier   * - * @rmn: our notifier - * - * Drop the rmn read side lock. + * @amn: our notifier   */ -static void amdgpu_mn_read_unlock(struct amdgpu_mn *rmn) +static void amdgpu_mn_read_unlock(struct amdgpu_mn *amn)  { -	if (atomic_dec_return(&rmn->recursion) == 0) -		up_read_non_owner(&rmn->lock); +	if (atomic_dec_return(&amn->recursion) == 0) +		up_read_non_owner(&amn->lock);  }  /**   * amdgpu_mn_invalidate_node - unmap all BOs of a node   *   * @node: the node with the BOs to unmap + * @start: start of address range affected + * @end: end of address range affected   * - * We block for all BOs and unmap them by move them - * into system domain again. + * Block for operations on BOs to finish and mark pages as accessed and + * potentially dirty.   */  static void amdgpu_mn_invalidate_node(struct amdgpu_mn_node *node,  				      unsigned long start, @@ -190,27 +232,27 @@ static void amdgpu_mn_invalidate_node(struct amdgpu_mn_node *node,   * amdgpu_mn_invalidate_range_start_gfx - callback to notify about mm change   *   * @mn: our notifier - * @mn: the mm this callback is about + * @mm: the mm this callback is about   * @start: start of updated range   * @end: end of updated range   * - * We block for all BOs between start and end to be idle and - * unmap them by move them into system domain again. + * Block for operations on BOs to finish and mark pages as accessed and + * potentially dirty.   */  static void amdgpu_mn_invalidate_range_start_gfx(struct mmu_notifier *mn,  						 struct mm_struct *mm,  						 unsigned long start,  						 unsigned long end)  { -	struct amdgpu_mn *rmn = container_of(mn, struct amdgpu_mn, mn); +	struct amdgpu_mn *amn = container_of(mn, struct amdgpu_mn, mn);  	struct interval_tree_node *it;  	/* notification is exclusive, but interval is inclusive */  	end -= 1; -	amdgpu_mn_read_lock(rmn); +	amdgpu_mn_read_lock(amn); -	it = interval_tree_iter_first(&rmn->objects, start, end); +	it = interval_tree_iter_first(&amn->objects, start, end);  	while (it) {  		struct amdgpu_mn_node *node; @@ -225,7 +267,7 @@ static void amdgpu_mn_invalidate_range_start_gfx(struct mmu_notifier *mn,   * amdgpu_mn_invalidate_range_start_hsa - callback to notify about mm change   *   * @mn: our notifier - * @mn: the mm this callback is about + * @mm: the mm this callback is about   * @start: start of updated range   * @end: end of updated range   * @@ -238,15 +280,15 @@ static void amdgpu_mn_invalidate_range_start_hsa(struct mmu_notifier *mn,  						 unsigned long start,  						 unsigned long end)  { -	struct amdgpu_mn *rmn = container_of(mn, struct amdgpu_mn, mn); +	struct amdgpu_mn *amn = container_of(mn, struct amdgpu_mn, mn);  	struct interval_tree_node *it;  	/* notification is exclusive, but interval is inclusive */  	end -= 1; -	amdgpu_mn_read_lock(rmn); +	amdgpu_mn_read_lock(amn); -	it = interval_tree_iter_first(&rmn->objects, start, end); +	it = interval_tree_iter_first(&amn->objects, start, end);  	while (it) {  		struct amdgpu_mn_node *node;  		struct amdgpu_bo *bo; @@ -268,7 +310,7 @@ static void amdgpu_mn_invalidate_range_start_hsa(struct mmu_notifier *mn,   * amdgpu_mn_invalidate_range_end - callback to notify about mm change   *   * @mn: our notifier - * @mn: the mm this callback is about + * @mm: the mm this callback is about   * @start: start of updated range   * @end: end of updated range   * @@ -279,9 +321,9 @@ static void amdgpu_mn_invalidate_range_end(struct mmu_notifier *mn,  					   unsigned long start,  					   unsigned long end)  { -	struct amdgpu_mn *rmn = container_of(mn, struct amdgpu_mn, mn); +	struct amdgpu_mn *amn = container_of(mn, struct amdgpu_mn, mn); -	amdgpu_mn_read_unlock(rmn); +	amdgpu_mn_read_unlock(amn);  }  static const struct mmu_notifier_ops amdgpu_mn_ops[] = { @@ -315,7 +357,7 @@ struct amdgpu_mn *amdgpu_mn_get(struct amdgpu_device *adev,  				enum amdgpu_mn_type type)  {  	struct mm_struct *mm = current->mm; -	struct amdgpu_mn *rmn; +	struct amdgpu_mn *amn;  	unsigned long key = AMDGPU_MN_KEY(mm, type);  	int r; @@ -325,41 +367,41 @@ struct amdgpu_mn *amdgpu_mn_get(struct amdgpu_device *adev,  		return ERR_PTR(-EINTR);  	} -	hash_for_each_possible(adev->mn_hash, rmn, node, key) -		if (AMDGPU_MN_KEY(rmn->mm, rmn->type) == key) +	hash_for_each_possible(adev->mn_hash, amn, node, key) +		if (AMDGPU_MN_KEY(amn->mm, amn->type) == key)  			goto release_locks; -	rmn = kzalloc(sizeof(*rmn), GFP_KERNEL); -	if (!rmn) { -		rmn = ERR_PTR(-ENOMEM); +	amn = kzalloc(sizeof(*amn), GFP_KERNEL); +	if (!amn) { +		amn = ERR_PTR(-ENOMEM);  		goto release_locks;  	} -	rmn->adev = adev; -	rmn->mm = mm; -	init_rwsem(&rmn->lock); -	rmn->type = type; -	rmn->mn.ops = &amdgpu_mn_ops[type]; -	rmn->objects = RB_ROOT_CACHED; -	mutex_init(&rmn->read_lock); -	atomic_set(&rmn->recursion, 0); +	amn->adev = adev; +	amn->mm = mm; +	init_rwsem(&amn->lock); +	amn->type = type; +	amn->mn.ops = &amdgpu_mn_ops[type]; +	amn->objects = RB_ROOT_CACHED; +	mutex_init(&amn->read_lock); +	atomic_set(&amn->recursion, 0); -	r = __mmu_notifier_register(&rmn->mn, mm); +	r = __mmu_notifier_register(&amn->mn, mm);  	if (r) -		goto free_rmn; +		goto free_amn; -	hash_add(adev->mn_hash, &rmn->node, AMDGPU_MN_KEY(mm, type)); +	hash_add(adev->mn_hash, &amn->node, AMDGPU_MN_KEY(mm, type));  release_locks:  	up_write(&mm->mmap_sem);  	mutex_unlock(&adev->mn_lock); -	return rmn; +	return amn; -free_rmn: +free_amn:  	up_write(&mm->mmap_sem);  	mutex_unlock(&adev->mn_lock); -	kfree(rmn); +	kfree(amn);  	return ERR_PTR(r);  } @@ -379,14 +421,14 @@ int amdgpu_mn_register(struct amdgpu_bo *bo, unsigned long addr)  	struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);  	enum amdgpu_mn_type type =  		bo->kfd_bo ? AMDGPU_MN_TYPE_HSA : AMDGPU_MN_TYPE_GFX; -	struct amdgpu_mn *rmn; +	struct amdgpu_mn *amn;  	struct amdgpu_mn_node *node = NULL, *new_node;  	struct list_head bos;  	struct interval_tree_node *it; -	rmn = amdgpu_mn_get(adev, type); -	if (IS_ERR(rmn)) -		return PTR_ERR(rmn); +	amn = amdgpu_mn_get(adev, type); +	if (IS_ERR(amn)) +		return PTR_ERR(amn);  	new_node = kmalloc(sizeof(*new_node), GFP_KERNEL);  	if (!new_node) @@ -394,12 +436,12 @@ int amdgpu_mn_register(struct amdgpu_bo *bo, unsigned long addr)  	INIT_LIST_HEAD(&bos); -	down_write(&rmn->lock); +	down_write(&amn->lock); -	while ((it = interval_tree_iter_first(&rmn->objects, addr, end))) { +	while ((it = interval_tree_iter_first(&amn->objects, addr, end))) {  		kfree(node);  		node = container_of(it, struct amdgpu_mn_node, it); -		interval_tree_remove(&node->it, &rmn->objects); +		interval_tree_remove(&node->it, &amn->objects);  		addr = min(it->start, addr);  		end = max(it->last, end);  		list_splice(&node->bos, &bos); @@ -410,7 +452,7 @@ int amdgpu_mn_register(struct amdgpu_bo *bo, unsigned long addr)  	else  		kfree(new_node); -	bo->mn = rmn; +	bo->mn = amn;  	node->it.start = addr;  	node->it.last = end; @@ -418,9 +460,9 @@ int amdgpu_mn_register(struct amdgpu_bo *bo, unsigned long addr)  	list_splice(&bos, &node->bos);  	list_add(&bo->mn_list, &node->bos); -	interval_tree_insert(&node->it, &rmn->objects); +	interval_tree_insert(&node->it, &amn->objects); -	up_write(&rmn->lock); +	up_write(&amn->lock);  	return 0;  } @@ -435,18 +477,18 @@ int amdgpu_mn_register(struct amdgpu_bo *bo, unsigned long addr)  void amdgpu_mn_unregister(struct amdgpu_bo *bo)  {  	struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev); -	struct amdgpu_mn *rmn; +	struct amdgpu_mn *amn;  	struct list_head *head;  	mutex_lock(&adev->mn_lock); -	rmn = bo->mn; -	if (rmn == NULL) { +	amn = bo->mn; +	if (amn == NULL) {  		mutex_unlock(&adev->mn_lock);  		return;  	} -	down_write(&rmn->lock); +	down_write(&amn->lock);  	/* save the next list entry for later */  	head = bo->mn_list.next; @@ -456,12 +498,13 @@ void amdgpu_mn_unregister(struct amdgpu_bo *bo)  	if (list_empty(head)) {  		struct amdgpu_mn_node *node; +  		node = container_of(head, struct amdgpu_mn_node, bos); -		interval_tree_remove(&node->it, &rmn->objects); +		interval_tree_remove(&node->it, &amn->objects);  		kfree(node);  	} -	up_write(&rmn->lock); +	up_write(&amn->lock);  	mutex_unlock(&adev->mn_lock);  } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c index 3526efa8960e..b0e14a3d54ef 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c @@ -38,7 +38,20 @@  #include "amdgpu_trace.h"  #include "amdgpu_amdkfd.h" -static bool amdgpu_need_backup(struct amdgpu_device *adev) +/** + * DOC: amdgpu_object + * + * This defines the interfaces to operate on an &amdgpu_bo buffer object which + * represents memory used by driver (VRAM, system memory, etc.). The driver + * provides DRM/GEM APIs to userspace. DRM/GEM APIs then use these interfaces + * to create/destroy/set buffer object which are then managed by the kernel TTM + * memory manager. + * The interfaces are also used internally by kernel clients, including gfx, + * uvd, etc. for kernel managed allocations used by the GPU. + * + */ + +static bool amdgpu_bo_need_backup(struct amdgpu_device *adev)  {  	if (adev->flags & AMD_IS_APU)  		return false; @@ -50,11 +63,35 @@ static bool amdgpu_need_backup(struct amdgpu_device *adev)  	return true;  } -static void amdgpu_ttm_bo_destroy(struct ttm_buffer_object *tbo) +/** + * amdgpu_bo_subtract_pin_size - Remove BO from pin_size accounting + * + * @bo: &amdgpu_bo buffer object + * + * This function is called when a BO stops being pinned, and updates the + * &amdgpu_device pin_size values accordingly. + */ +static void amdgpu_bo_subtract_pin_size(struct amdgpu_bo *bo) +{ +	struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev); + +	if (bo->tbo.mem.mem_type == TTM_PL_VRAM) { +		atomic64_sub(amdgpu_bo_size(bo), &adev->vram_pin_size); +		atomic64_sub(amdgpu_vram_mgr_bo_visible_size(bo), +			     &adev->visible_pin_size); +	} else if (bo->tbo.mem.mem_type == TTM_PL_TT) { +		atomic64_sub(amdgpu_bo_size(bo), &adev->gart_pin_size); +	} +} + +static void amdgpu_bo_destroy(struct ttm_buffer_object *tbo)  {  	struct amdgpu_device *adev = amdgpu_ttm_adev(tbo->bdev);  	struct amdgpu_bo *bo = ttm_to_amdgpu_bo(tbo); +	if (bo->pin_count > 0) +		amdgpu_bo_subtract_pin_size(bo); +  	if (bo->kfd_bo)  		amdgpu_amdkfd_unreserve_system_memory_limit(bo); @@ -73,14 +110,32 @@ static void amdgpu_ttm_bo_destroy(struct ttm_buffer_object *tbo)  	kfree(bo);  } -bool amdgpu_ttm_bo_is_amdgpu_bo(struct ttm_buffer_object *bo) +/** + * amdgpu_bo_is_amdgpu_bo - check if the buffer object is an &amdgpu_bo + * @bo: buffer object to be checked + * + * Uses destroy function associated with the object to determine if this is + * an &amdgpu_bo. + * + * Returns: + * true if the object belongs to &amdgpu_bo, false if not. + */ +bool amdgpu_bo_is_amdgpu_bo(struct ttm_buffer_object *bo)  { -	if (bo->destroy == &amdgpu_ttm_bo_destroy) +	if (bo->destroy == &amdgpu_bo_destroy)  		return true;  	return false;  } -void amdgpu_ttm_placement_from_domain(struct amdgpu_bo *abo, u32 domain) +/** + * amdgpu_bo_placement_from_domain - set buffer's placement + * @abo: &amdgpu_bo buffer object whose placement is to be set + * @domain: requested domain + * + * Sets buffer's placement according to requested domain and the buffer's + * flags. + */ +void amdgpu_bo_placement_from_domain(struct amdgpu_bo *abo, u32 domain)  {  	struct amdgpu_device *adev = amdgpu_ttm_adev(abo->tbo.bdev);  	struct ttm_placement *placement = &abo->placement; @@ -161,6 +216,8 @@ void amdgpu_ttm_placement_from_domain(struct amdgpu_bo *abo, u32 domain)  		c++;  	} +	BUG_ON(c >= AMDGPU_BO_MAX_PLACEMENTS); +  	placement->num_placement = c;  	placement->placement = places; @@ -184,7 +241,8 @@ void amdgpu_ttm_placement_from_domain(struct amdgpu_bo *abo, u32 domain)   *   * Note: For bo_ptr new BO is only created if bo_ptr points to NULL.   * - * Returns 0 on success, negative error code otherwise. + * Returns: + * 0 on success, negative error code otherwise.   */  int amdgpu_bo_create_reserved(struct amdgpu_device *adev,  			      unsigned long size, int align, @@ -220,22 +278,33 @@ int amdgpu_bo_create_reserved(struct amdgpu_device *adev,  		goto error_free;  	} -	r = amdgpu_bo_pin(*bo_ptr, domain, gpu_addr); +	r = amdgpu_bo_pin(*bo_ptr, domain);  	if (r) {  		dev_err(adev->dev, "(%d) kernel bo pin failed\n", r);  		goto error_unreserve;  	} +	r = amdgpu_ttm_alloc_gart(&(*bo_ptr)->tbo); +	if (r) { +		dev_err(adev->dev, "%p bind failed\n", *bo_ptr); +		goto error_unpin; +	} + +	if (gpu_addr) +		*gpu_addr = amdgpu_bo_gpu_offset(*bo_ptr); +  	if (cpu_addr) {  		r = amdgpu_bo_kmap(*bo_ptr, cpu_addr);  		if (r) {  			dev_err(adev->dev, "(%d) kernel bo map failed\n", r); -			goto error_unreserve; +			goto error_unpin;  		}  	}  	return 0; +error_unpin: +	amdgpu_bo_unpin(*bo_ptr);  error_unreserve:  	amdgpu_bo_unreserve(*bo_ptr); @@ -261,7 +330,8 @@ error_free:   *   * Note: For bo_ptr new BO is only created if bo_ptr points to NULL.   * - * Returns 0 on success, negative error code otherwise. + * Returns: + * 0 on success, negative error code otherwise.   */  int amdgpu_bo_create_kernel(struct amdgpu_device *adev,  			    unsigned long size, int align, @@ -285,6 +355,8 @@ int amdgpu_bo_create_kernel(struct amdgpu_device *adev,   * amdgpu_bo_free_kernel - free BO for kernel use   *   * @bo: amdgpu BO to free + * @gpu_addr: pointer to where the BO's GPU memory space address was stored + * @cpu_addr: pointer to where the BO's CPU memory space address was stored   *   * unmaps and unpin a BO for kernel internal use.   */ @@ -418,17 +490,17 @@ static int amdgpu_bo_do_create(struct amdgpu_device *adev,  #endif  	bo->tbo.bdev = &adev->mman.bdev; -	amdgpu_ttm_placement_from_domain(bo, bp->domain); +	amdgpu_bo_placement_from_domain(bo, bp->domain);  	if (bp->type == ttm_bo_type_kernel)  		bo->tbo.priority = 1;  	r = ttm_bo_init_reserved(&adev->mman.bdev, &bo->tbo, size, bp->type,  				 &bo->placement, page_align, &ctx, acc_size, -				 NULL, bp->resv, &amdgpu_ttm_bo_destroy); +				 NULL, bp->resv, &amdgpu_bo_destroy);  	if (unlikely(r != 0))  		return r; -	if (adev->gmc.visible_vram_size < adev->gmc.real_vram_size && +	if (!amdgpu_gmc_vram_full_visible(&adev->gmc) &&  	    bo->tbo.mem.mem_type == TTM_PL_VRAM &&  	    bo->tbo.mem.start < adev->gmc.visible_vram_size >> PAGE_SHIFT)  		amdgpu_cs_report_moved_bytes(adev, ctx.bytes_moved, @@ -498,6 +570,20 @@ static int amdgpu_bo_create_shadow(struct amdgpu_device *adev,  	return r;  } +/** + * amdgpu_bo_create - create an &amdgpu_bo buffer object + * @adev: amdgpu device object + * @bp: parameters to be used for the buffer object + * @bo_ptr: pointer to the buffer object pointer + * + * Creates an &amdgpu_bo buffer object; and if requested, also creates a + * shadow object. + * Shadow object is used to backup the original buffer object, and is always + * in GTT. + * + * Returns: + * 0 for success or a negative error code on failure. + */  int amdgpu_bo_create(struct amdgpu_device *adev,  		     struct amdgpu_bo_param *bp,  		     struct amdgpu_bo **bo_ptr) @@ -510,7 +596,7 @@ int amdgpu_bo_create(struct amdgpu_device *adev,  	if (r)  		return r; -	if ((flags & AMDGPU_GEM_CREATE_SHADOW) && amdgpu_need_backup(adev)) { +	if ((flags & AMDGPU_GEM_CREATE_SHADOW) && amdgpu_bo_need_backup(adev)) {  		if (!bp->resv)  			WARN_ON(reservation_object_lock((*bo_ptr)->tbo.resv,  							NULL)); @@ -527,6 +613,21 @@ int amdgpu_bo_create(struct amdgpu_device *adev,  	return r;  } +/** + * amdgpu_bo_backup_to_shadow - Backs up an &amdgpu_bo buffer object + * @adev: amdgpu device object + * @ring: amdgpu_ring for the engine handling the buffer operations + * @bo: &amdgpu_bo buffer to be backed up + * @resv: reservation object with embedded fence + * @fence: dma_fence associated with the operation + * @direct: whether to submit the job directly + * + * Copies an &amdgpu_bo buffer object to its shadow object. + * Not used for now. + * + * Returns: + * 0 for success or a negative error code on failure. + */  int amdgpu_bo_backup_to_shadow(struct amdgpu_device *adev,  			       struct amdgpu_ring *ring,  			       struct amdgpu_bo *bo, @@ -559,6 +660,18 @@ err:  	return r;  } +/** + * amdgpu_bo_validate - validate an &amdgpu_bo buffer object + * @bo: pointer to the buffer object + * + * Sets placement according to domain; and changes placement and caching + * policy of the buffer object according to the placement. + * This is used for validating shadow bos.  It calls ttm_bo_validate() to + * make sure the buffer is resident where it needs to be. + * + * Returns: + * 0 for success or a negative error code on failure. + */  int amdgpu_bo_validate(struct amdgpu_bo *bo)  {  	struct ttm_operation_ctx ctx = { false, false }; @@ -571,7 +684,7 @@ int amdgpu_bo_validate(struct amdgpu_bo *bo)  	domain = bo->preferred_domains;  retry: -	amdgpu_ttm_placement_from_domain(bo, domain); +	amdgpu_bo_placement_from_domain(bo, domain);  	r = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);  	if (unlikely(r == -ENOMEM) && domain != bo->allowed_domains) {  		domain = bo->allowed_domains; @@ -581,6 +694,22 @@ retry:  	return r;  } +/** + * amdgpu_bo_restore_from_shadow - restore an &amdgpu_bo buffer object + * @adev: amdgpu device object + * @ring: amdgpu_ring for the engine handling the buffer operations + * @bo: &amdgpu_bo buffer to be restored + * @resv: reservation object with embedded fence + * @fence: dma_fence associated with the operation + * @direct: whether to submit the job directly + * + * Copies a buffer object's shadow content back to the object. + * This is used for recovering a buffer from its shadow in case of a gpu + * reset where vram context may be lost. + * + * Returns: + * 0 for success or a negative error code on failure. + */  int amdgpu_bo_restore_from_shadow(struct amdgpu_device *adev,  				  struct amdgpu_ring *ring,  				  struct amdgpu_bo *bo, @@ -613,6 +742,17 @@ err:  	return r;  } +/** + * amdgpu_bo_kmap - map an &amdgpu_bo buffer object + * @bo: &amdgpu_bo buffer object to be mapped + * @ptr: kernel virtual address to be returned + * + * Calls ttm_bo_kmap() to set up the kernel virtual mapping; calls + * amdgpu_bo_kptr() to get the kernel virtual address. + * + * Returns: + * 0 for success or a negative error code on failure. + */  int amdgpu_bo_kmap(struct amdgpu_bo *bo, void **ptr)  {  	void *kptr; @@ -643,6 +783,15 @@ int amdgpu_bo_kmap(struct amdgpu_bo *bo, void **ptr)  	return 0;  } +/** + * amdgpu_bo_kptr - returns a kernel virtual address of the buffer object + * @bo: &amdgpu_bo buffer object + * + * Calls ttm_kmap_obj_virtual() to get the kernel virtual address + * + * Returns: + * the virtual address of a buffer object area. + */  void *amdgpu_bo_kptr(struct amdgpu_bo *bo)  {  	bool is_iomem; @@ -650,21 +799,42 @@ void *amdgpu_bo_kptr(struct amdgpu_bo *bo)  	return ttm_kmap_obj_virtual(&bo->kmap, &is_iomem);  } +/** + * amdgpu_bo_kunmap - unmap an &amdgpu_bo buffer object + * @bo: &amdgpu_bo buffer object to be unmapped + * + * Unmaps a kernel map set up by amdgpu_bo_kmap(). + */  void amdgpu_bo_kunmap(struct amdgpu_bo *bo)  {  	if (bo->kmap.bo)  		ttm_bo_kunmap(&bo->kmap);  } +/** + * amdgpu_bo_ref - reference an &amdgpu_bo buffer object + * @bo: &amdgpu_bo buffer object + * + * References the contained &ttm_buffer_object. + * + * Returns: + * a refcounted pointer to the &amdgpu_bo buffer object. + */  struct amdgpu_bo *amdgpu_bo_ref(struct amdgpu_bo *bo)  {  	if (bo == NULL)  		return NULL; -	ttm_bo_reference(&bo->tbo); +	ttm_bo_get(&bo->tbo);  	return bo;  } +/** + * amdgpu_bo_unref - unreference an &amdgpu_bo buffer object + * @bo: &amdgpu_bo buffer object + * + * Unreferences the contained &ttm_buffer_object and clear the pointer + */  void amdgpu_bo_unref(struct amdgpu_bo **bo)  {  	struct ttm_buffer_object *tbo; @@ -673,14 +843,34 @@ void amdgpu_bo_unref(struct amdgpu_bo **bo)  		return;  	tbo = &((*bo)->tbo); -	ttm_bo_unref(&tbo); -	if (tbo == NULL) -		*bo = NULL; +	ttm_bo_put(tbo); +	*bo = NULL;  } +/** + * amdgpu_bo_pin_restricted - pin an &amdgpu_bo buffer object + * @bo: &amdgpu_bo buffer object to be pinned + * @domain: domain to be pinned to + * @min_offset: the start of requested address range + * @max_offset: the end of requested address range + * + * Pins the buffer object according to requested domain and address range. If + * the memory is unbound gart memory, binds the pages into gart table. Adjusts + * pin_count and pin_size accordingly. + * + * Pinning means to lock pages in memory along with keeping them at a fixed + * offset. It is required when a buffer can not be moved, for example, when + * a display buffer is being scanned out. + * + * Compared with amdgpu_bo_pin(), this function gives more flexibility on + * where to pin a buffer if there are specific restrictions on where a buffer + * must be located. + * + * Returns: + * 0 for success or a negative error code on failure. + */  int amdgpu_bo_pin_restricted(struct amdgpu_bo *bo, u32 domain, -			     u64 min_offset, u64 max_offset, -			     u64 *gpu_addr) +			     u64 min_offset, u64 max_offset)  {  	struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);  	struct ttm_operation_ctx ctx = { false, false }; @@ -712,8 +902,6 @@ int amdgpu_bo_pin_restricted(struct amdgpu_bo *bo, u32 domain,  			return -EINVAL;  		bo->pin_count++; -		if (gpu_addr) -			*gpu_addr = amdgpu_bo_gpu_offset(bo);  		if (max_offset != 0) {  			u64 domain_start = bo->tbo.bdev->man[mem_type].gpu_offset; @@ -728,7 +916,7 @@ int amdgpu_bo_pin_restricted(struct amdgpu_bo *bo, u32 domain,  	/* force to pin into visible video ram */  	if (!(bo->flags & AMDGPU_GEM_CREATE_NO_CPU_ACCESS))  		bo->flags |= AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED; -	amdgpu_ttm_placement_from_domain(bo, domain); +	amdgpu_bo_placement_from_domain(bo, domain);  	for (i = 0; i < bo->placement.num_placement; i++) {  		unsigned fpfn, lpfn; @@ -749,33 +937,48 @@ int amdgpu_bo_pin_restricted(struct amdgpu_bo *bo, u32 domain,  		goto error;  	} -	r = amdgpu_ttm_alloc_gart(&bo->tbo); -	if (unlikely(r)) { -		dev_err(adev->dev, "%p bind failed\n", bo); -		goto error; -	} -  	bo->pin_count = 1; -	if (gpu_addr != NULL) -		*gpu_addr = amdgpu_bo_gpu_offset(bo);  	domain = amdgpu_mem_type_to_domain(bo->tbo.mem.mem_type);  	if (domain == AMDGPU_GEM_DOMAIN_VRAM) { -		adev->vram_pin_size += amdgpu_bo_size(bo); -		adev->invisible_pin_size += amdgpu_vram_mgr_bo_invisible_size(bo); +		atomic64_add(amdgpu_bo_size(bo), &adev->vram_pin_size); +		atomic64_add(amdgpu_vram_mgr_bo_visible_size(bo), +			     &adev->visible_pin_size);  	} else if (domain == AMDGPU_GEM_DOMAIN_GTT) { -		adev->gart_pin_size += amdgpu_bo_size(bo); +		atomic64_add(amdgpu_bo_size(bo), &adev->gart_pin_size);  	}  error:  	return r;  } -int amdgpu_bo_pin(struct amdgpu_bo *bo, u32 domain, u64 *gpu_addr) +/** + * amdgpu_bo_pin - pin an &amdgpu_bo buffer object + * @bo: &amdgpu_bo buffer object to be pinned + * @domain: domain to be pinned to + * + * A simple wrapper to amdgpu_bo_pin_restricted(). + * Provides a simpler API for buffers that do not have any strict restrictions + * on where a buffer must be located. + * + * Returns: + * 0 for success or a negative error code on failure. + */ +int amdgpu_bo_pin(struct amdgpu_bo *bo, u32 domain)  { -	return amdgpu_bo_pin_restricted(bo, domain, 0, 0, gpu_addr); +	return amdgpu_bo_pin_restricted(bo, domain, 0, 0);  } +/** + * amdgpu_bo_unpin - unpin an &amdgpu_bo buffer object + * @bo: &amdgpu_bo buffer object to be unpinned + * + * Decreases the pin_count, and clears the flags if pin_count reaches 0. + * Changes placement and pin size accordingly. + * + * Returns: + * 0 for success or a negative error code on failure. + */  int amdgpu_bo_unpin(struct amdgpu_bo *bo)  {  	struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev); @@ -790,12 +993,7 @@ int amdgpu_bo_unpin(struct amdgpu_bo *bo)  	if (bo->pin_count)  		return 0; -	if (bo->tbo.mem.mem_type == TTM_PL_VRAM) { -		adev->vram_pin_size -= amdgpu_bo_size(bo); -		adev->invisible_pin_size -= amdgpu_vram_mgr_bo_invisible_size(bo); -	} else if (bo->tbo.mem.mem_type == TTM_PL_TT) { -		adev->gart_pin_size -= amdgpu_bo_size(bo); -	} +	amdgpu_bo_subtract_pin_size(bo);  	for (i = 0; i < bo->placement.num_placement; i++) {  		bo->placements[i].lpfn = 0; @@ -808,6 +1006,16 @@ int amdgpu_bo_unpin(struct amdgpu_bo *bo)  	return r;  } +/** + * amdgpu_bo_evict_vram - evict VRAM buffers + * @adev: amdgpu device object + * + * Evicts all VRAM buffers on the lru list of the memory type. + * Mainly used for evicting vram at suspend time. + * + * Returns: + * 0 for success or a negative error code on failure. + */  int amdgpu_bo_evict_vram(struct amdgpu_device *adev)  {  	/* late 2.6.33 fix IGP hibernate - we need pm ops to do this correct */ @@ -830,6 +1038,15 @@ static const char *amdgpu_vram_names[] = {  	"DDR4",  }; +/** + * amdgpu_bo_init - initialize memory manager + * @adev: amdgpu device object + * + * Calls amdgpu_ttm_init() to initialize amdgpu memory manager. + * + * Returns: + * 0 for success or a negative error code on failure. + */  int amdgpu_bo_init(struct amdgpu_device *adev)  {  	/* reserve PAT memory space to WC for VRAM */ @@ -847,6 +1064,16 @@ int amdgpu_bo_init(struct amdgpu_device *adev)  	return amdgpu_ttm_init(adev);  } +/** + * amdgpu_bo_late_init - late init + * @adev: amdgpu device object + * + * Calls amdgpu_ttm_late_init() to free resources used earlier during + * initialization. + * + * Returns: + * 0 for success or a negative error code on failure. + */  int amdgpu_bo_late_init(struct amdgpu_device *adev)  {  	amdgpu_ttm_late_init(adev); @@ -854,6 +1081,12 @@ int amdgpu_bo_late_init(struct amdgpu_device *adev)  	return 0;  } +/** + * amdgpu_bo_fini - tear down memory manager + * @adev: amdgpu device object + * + * Reverses amdgpu_bo_init() to tear down memory manager. + */  void amdgpu_bo_fini(struct amdgpu_device *adev)  {  	amdgpu_ttm_fini(adev); @@ -861,12 +1094,33 @@ void amdgpu_bo_fini(struct amdgpu_device *adev)  	arch_io_free_memtype_wc(adev->gmc.aper_base, adev->gmc.aper_size);  } +/** + * amdgpu_bo_fbdev_mmap - mmap fbdev memory + * @bo: &amdgpu_bo buffer object + * @vma: vma as input from the fbdev mmap method + * + * Calls ttm_fbdev_mmap() to mmap fbdev memory if it is backed by a bo. + * + * Returns: + * 0 for success or a negative error code on failure. + */  int amdgpu_bo_fbdev_mmap(struct amdgpu_bo *bo,  			     struct vm_area_struct *vma)  {  	return ttm_fbdev_mmap(vma, &bo->tbo);  } +/** + * amdgpu_bo_set_tiling_flags - set tiling flags + * @bo: &amdgpu_bo buffer object + * @tiling_flags: new flags + * + * Sets buffer object's tiling flags with the new one. Used by GEM ioctl or + * kernel driver to set the tiling flags on a buffer. + * + * Returns: + * 0 for success or a negative error code on failure. + */  int amdgpu_bo_set_tiling_flags(struct amdgpu_bo *bo, u64 tiling_flags)  {  	struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev); @@ -879,6 +1133,14 @@ int amdgpu_bo_set_tiling_flags(struct amdgpu_bo *bo, u64 tiling_flags)  	return 0;  } +/** + * amdgpu_bo_get_tiling_flags - get tiling flags + * @bo: &amdgpu_bo buffer object + * @tiling_flags: returned flags + * + * Gets buffer object's tiling flags. Used by GEM ioctl or kernel driver to + * set the tiling flags on a buffer. + */  void amdgpu_bo_get_tiling_flags(struct amdgpu_bo *bo, u64 *tiling_flags)  {  	lockdep_assert_held(&bo->tbo.resv->lock.base); @@ -887,6 +1149,19 @@ void amdgpu_bo_get_tiling_flags(struct amdgpu_bo *bo, u64 *tiling_flags)  		*tiling_flags = bo->tiling_flags;  } +/** + * amdgpu_bo_set_metadata - set metadata + * @bo: &amdgpu_bo buffer object + * @metadata: new metadata + * @metadata_size: size of the new metadata + * @flags: flags of the new metadata + * + * Sets buffer object's metadata, its size and flags. + * Used via GEM ioctl. + * + * Returns: + * 0 for success or a negative error code on failure. + */  int amdgpu_bo_set_metadata (struct amdgpu_bo *bo, void *metadata,  			    uint32_t metadata_size, uint64_t flags)  { @@ -916,6 +1191,21 @@ int amdgpu_bo_set_metadata (struct amdgpu_bo *bo, void *metadata,  	return 0;  } +/** + * amdgpu_bo_get_metadata - get metadata + * @bo: &amdgpu_bo buffer object + * @buffer: returned metadata + * @buffer_size: size of the buffer + * @metadata_size: size of the returned metadata + * @flags: flags of the returned metadata + * + * Gets buffer object's metadata, its size and flags. buffer_size shall not be + * less than metadata_size. + * Used via GEM ioctl. + * + * Returns: + * 0 for success or a negative error code on failure. + */  int amdgpu_bo_get_metadata(struct amdgpu_bo *bo, void *buffer,  			   size_t buffer_size, uint32_t *metadata_size,  			   uint64_t *flags) @@ -939,6 +1229,16 @@ int amdgpu_bo_get_metadata(struct amdgpu_bo *bo, void *buffer,  	return 0;  } +/** + * amdgpu_bo_move_notify - notification about a memory move + * @bo: pointer to a buffer object + * @evict: if this move is evicting the buffer from the graphics address space + * @new_mem: new information of the bufer object + * + * Marks the corresponding &amdgpu_bo buffer object as invalid, also performs + * bookkeeping. + * TTM driver callback which is called when ttm moves a buffer. + */  void amdgpu_bo_move_notify(struct ttm_buffer_object *bo,  			   bool evict,  			   struct ttm_mem_reg *new_mem) @@ -947,7 +1247,7 @@ void amdgpu_bo_move_notify(struct ttm_buffer_object *bo,  	struct amdgpu_bo *abo;  	struct ttm_mem_reg *old_mem = &bo->mem; -	if (!amdgpu_ttm_bo_is_amdgpu_bo(bo)) +	if (!amdgpu_bo_is_amdgpu_bo(bo))  		return;  	abo = ttm_to_amdgpu_bo(bo); @@ -964,9 +1264,20 @@ void amdgpu_bo_move_notify(struct ttm_buffer_object *bo,  		return;  	/* move_notify is called before move happens */ -	trace_amdgpu_ttm_bo_move(abo, new_mem->mem_type, old_mem->mem_type); +	trace_amdgpu_bo_move(abo, new_mem->mem_type, old_mem->mem_type);  } +/** + * amdgpu_bo_fault_reserve_notify - notification about a memory fault + * @bo: pointer to a buffer object + * + * Notifies the driver we are taking a fault on this BO and have reserved it, + * also performs bookkeeping. + * TTM driver callback for dealing with vm faults. + * + * Returns: + * 0 for success or a negative error code on failure. + */  int amdgpu_bo_fault_reserve_notify(struct ttm_buffer_object *bo)  {  	struct amdgpu_device *adev = amdgpu_ttm_adev(bo->bdev); @@ -975,7 +1286,7 @@ int amdgpu_bo_fault_reserve_notify(struct ttm_buffer_object *bo)  	unsigned long offset, size;  	int r; -	if (!amdgpu_ttm_bo_is_amdgpu_bo(bo)) +	if (!amdgpu_bo_is_amdgpu_bo(bo))  		return 0;  	abo = ttm_to_amdgpu_bo(bo); @@ -997,8 +1308,8 @@ int amdgpu_bo_fault_reserve_notify(struct ttm_buffer_object *bo)  	/* hurrah the memory is not visible ! */  	atomic64_inc(&adev->num_vram_cpu_page_faults); -	amdgpu_ttm_placement_from_domain(abo, AMDGPU_GEM_DOMAIN_VRAM | -					 AMDGPU_GEM_DOMAIN_GTT); +	amdgpu_bo_placement_from_domain(abo, AMDGPU_GEM_DOMAIN_VRAM | +					AMDGPU_GEM_DOMAIN_GTT);  	/* Avoid costly evictions; only set GTT as a busy placement */  	abo->placement.num_busy_placement = 1; @@ -1040,10 +1351,11 @@ void amdgpu_bo_fence(struct amdgpu_bo *bo, struct dma_fence *fence,   * amdgpu_bo_gpu_offset - return GPU offset of bo   * @bo:	amdgpu object for which we query the offset   * - * Returns current GPU offset of the object. - *   * Note: object should either be pinned or reserved when calling this   * function, it might be useful to add check for this for debugging. + * + * Returns: + * current GPU offset of the object.   */  u64 amdgpu_bo_gpu_offset(struct amdgpu_bo *bo)  { @@ -1059,6 +1371,14 @@ u64 amdgpu_bo_gpu_offset(struct amdgpu_bo *bo)  	return bo->tbo.offset;  } +/** + * amdgpu_bo_get_preferred_pin_domain - get preferred domain for scanout + * @adev: amdgpu device object + * @domain: allowed :ref:`memory domains <amdgpu_memory_domains>` + * + * Returns: + * Which of the allowed domains is preferred for pinning the BO for scanout. + */  uint32_t amdgpu_bo_get_preferred_pin_domain(struct amdgpu_device *adev,  					    uint32_t domain)  { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h index 731748033878..18945dd6982d 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h @@ -32,6 +32,7 @@  #include "amdgpu.h"  #define AMDGPU_BO_INVALID_OFFSET	LONG_MAX +#define AMDGPU_BO_MAX_PLACEMENTS	3  struct amdgpu_bo_param {  	unsigned long			size; @@ -77,7 +78,7 @@ struct amdgpu_bo {  	/* Protected by tbo.reserved */  	u32				preferred_domains;  	u32				allowed_domains; -	struct ttm_place		placements[AMDGPU_GEM_DOMAIN_MAX + 1]; +	struct ttm_place		placements[AMDGPU_BO_MAX_PLACEMENTS];  	struct ttm_placement		placement;  	struct ttm_buffer_object	tbo;  	struct ttm_bo_kmap_obj		kmap; @@ -234,6 +235,9 @@ static inline bool amdgpu_bo_explicit_sync(struct amdgpu_bo *bo)  	return bo->flags & AMDGPU_GEM_CREATE_EXPLICIT_SYNC;  } +bool amdgpu_bo_is_amdgpu_bo(struct ttm_buffer_object *bo); +void amdgpu_bo_placement_from_domain(struct amdgpu_bo *abo, u32 domain); +  int amdgpu_bo_create(struct amdgpu_device *adev,  		     struct amdgpu_bo_param *bp,  		     struct amdgpu_bo **bo_ptr); @@ -252,10 +256,9 @@ void *amdgpu_bo_kptr(struct amdgpu_bo *bo);  void amdgpu_bo_kunmap(struct amdgpu_bo *bo);  struct amdgpu_bo *amdgpu_bo_ref(struct amdgpu_bo *bo);  void amdgpu_bo_unref(struct amdgpu_bo **bo); -int amdgpu_bo_pin(struct amdgpu_bo *bo, u32 domain, u64 *gpu_addr); +int amdgpu_bo_pin(struct amdgpu_bo *bo, u32 domain);  int amdgpu_bo_pin_restricted(struct amdgpu_bo *bo, u32 domain, -			     u64 min_offset, u64 max_offset, -			     u64 *gpu_addr); +			     u64 min_offset, u64 max_offset);  int amdgpu_bo_unpin(struct amdgpu_bo *bo);  int amdgpu_bo_evict_vram(struct amdgpu_device *adev);  int amdgpu_bo_init(struct amdgpu_device *adev); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c index fc818b4d849c..8f98629fbe59 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c @@ -31,7 +31,7 @@  #include <linux/power_supply.h>  #include <linux/hwmon.h>  #include <linux/hwmon-sysfs.h> - +#include <linux/nospec.h>  static int amdgpu_debugfs_pm_init(struct amdgpu_device *adev); @@ -68,11 +68,11 @@ void amdgpu_pm_acpi_event_handler(struct amdgpu_device *adev)  	if (adev->pm.dpm_enabled) {  		mutex_lock(&adev->pm.mutex);  		if (power_supply_is_system_supplied() > 0) -			adev->pm.dpm.ac_power = true; +			adev->pm.ac_power = true;  		else -			adev->pm.dpm.ac_power = false; +			adev->pm.ac_power = false;  		if (adev->powerplay.pp_funcs->enable_bapm) -			amdgpu_dpm_enable_bapm(adev, adev->pm.dpm.ac_power); +			amdgpu_dpm_enable_bapm(adev, adev->pm.ac_power);  		mutex_unlock(&adev->pm.mutex);  	}  } @@ -80,12 +80,15 @@ void amdgpu_pm_acpi_event_handler(struct amdgpu_device *adev)  /**   * DOC: power_dpm_state   * - * This is a legacy interface and is only provided for backwards compatibility. - * The amdgpu driver provides a sysfs API for adjusting certain power - * related parameters.  The file power_dpm_state is used for this. + * The power_dpm_state file is a legacy interface and is only provided for + * backwards compatibility. The amdgpu driver provides a sysfs API for adjusting + * certain power related parameters.  The file power_dpm_state is used for this.   * It accepts the following arguments: + *   * - battery + *   * - balanced + *   * - performance   *   * battery @@ -169,14 +172,21 @@ fail:   * The amdgpu driver provides a sysfs API for adjusting certain power   * related parameters.  The file power_dpm_force_performance_level is   * used for this.  It accepts the following arguments: + *   * - auto + *   * - low + *   * - high + *   * - manual - * - GPU fan + *   * - profile_standard + *   * - profile_min_sclk + *   * - profile_min_mclk + *   * - profile_peak   *   * auto @@ -393,6 +403,7 @@ static ssize_t amdgpu_set_pp_force_state(struct device *dev,  			count = -EINVAL;  			goto fail;  		} +		idx = array_index_nospec(idx, ARRAY_SIZE(data.states));  		amdgpu_dpm_get_pp_num_states(adev, &data);  		state = data.states[idx]; @@ -463,8 +474,11 @@ static ssize_t amdgpu_set_pp_table(struct device *dev,   * this.   *   * Reading the file will display: + *   * - a list of engine clock levels and voltages labeled OD_SCLK + *   * - a list of memory clock levels and voltages labeled OD_MCLK + *   * - a list of valid ranges for sclk, mclk, and voltage labeled OD_RANGE   *   * To manually adjust these settings, first select manual using @@ -593,40 +607,59 @@ static ssize_t amdgpu_get_pp_dpm_sclk(struct device *dev,  		return snprintf(buf, PAGE_SIZE, "\n");  } -static ssize_t amdgpu_set_pp_dpm_sclk(struct device *dev, -		struct device_attribute *attr, -		const char *buf, -		size_t count) +/* + * Worst case: 32 bits individually specified, in octal at 12 characters + * per line (+1 for \n). + */ +#define AMDGPU_MASK_BUF_MAX	(32 * 13) + +static ssize_t amdgpu_read_mask(const char *buf, size_t count, uint32_t *mask)  { -	struct drm_device *ddev = dev_get_drvdata(dev); -	struct amdgpu_device *adev = ddev->dev_private;  	int ret;  	long level; -	uint32_t mask = 0;  	char *sub_str = NULL;  	char *tmp; -	char buf_cpy[count]; +	char buf_cpy[AMDGPU_MASK_BUF_MAX + 1];  	const char delimiter[3] = {' ', '\n', '\0'}; +	size_t bytes; -	memcpy(buf_cpy, buf, count+1); +	*mask = 0; + +	bytes = min(count, sizeof(buf_cpy) - 1); +	memcpy(buf_cpy, buf, bytes); +	buf_cpy[bytes] = '\0';  	tmp = buf_cpy;  	while (tmp[0]) { -		sub_str =  strsep(&tmp, delimiter); +		sub_str = strsep(&tmp, delimiter);  		if (strlen(sub_str)) {  			ret = kstrtol(sub_str, 0, &level); - -			if (ret) { -				count = -EINVAL; -				goto fail; -			} -			mask |= 1 << level; +			if (ret) +				return -EINVAL; +			*mask |= 1 << level;  		} else  			break;  	} + +	return 0; +} + +static ssize_t amdgpu_set_pp_dpm_sclk(struct device *dev, +		struct device_attribute *attr, +		const char *buf, +		size_t count) +{ +	struct drm_device *ddev = dev_get_drvdata(dev); +	struct amdgpu_device *adev = ddev->dev_private; +	int ret; +	uint32_t mask = 0; + +	ret = amdgpu_read_mask(buf, count, &mask); +	if (ret) +		return ret; +  	if (adev->powerplay.pp_funcs->force_clock_level)  		amdgpu_dpm_force_clock_level(adev, PP_SCLK, mask); -fail:  	return count;  } @@ -651,32 +684,15 @@ static ssize_t amdgpu_set_pp_dpm_mclk(struct device *dev,  	struct drm_device *ddev = dev_get_drvdata(dev);  	struct amdgpu_device *adev = ddev->dev_private;  	int ret; -	long level;  	uint32_t mask = 0; -	char *sub_str = NULL; -	char *tmp; -	char buf_cpy[count]; -	const char delimiter[3] = {' ', '\n', '\0'}; -	memcpy(buf_cpy, buf, count+1); -	tmp = buf_cpy; -	while (tmp[0]) { -		sub_str =  strsep(&tmp, delimiter); -		if (strlen(sub_str)) { -			ret = kstrtol(sub_str, 0, &level); +	ret = amdgpu_read_mask(buf, count, &mask); +	if (ret) +		return ret; -			if (ret) { -				count = -EINVAL; -				goto fail; -			} -			mask |= 1 << level; -		} else -			break; -	}  	if (adev->powerplay.pp_funcs->force_clock_level)  		amdgpu_dpm_force_clock_level(adev, PP_MCLK, mask); -fail:  	return count;  } @@ -701,33 +717,15 @@ static ssize_t amdgpu_set_pp_dpm_pcie(struct device *dev,  	struct drm_device *ddev = dev_get_drvdata(dev);  	struct amdgpu_device *adev = ddev->dev_private;  	int ret; -	long level;  	uint32_t mask = 0; -	char *sub_str = NULL; -	char *tmp; -	char buf_cpy[count]; -	const char delimiter[3] = {' ', '\n', '\0'}; -	memcpy(buf_cpy, buf, count+1); -	tmp = buf_cpy; - -	while (tmp[0]) { -		sub_str =  strsep(&tmp, delimiter); -		if (strlen(sub_str)) { -			ret = kstrtol(sub_str, 0, &level); +	ret = amdgpu_read_mask(buf, count, &mask); +	if (ret) +		return ret; -			if (ret) { -				count = -EINVAL; -				goto fail; -			} -			mask |= 1 << level; -		} else -			break; -	}  	if (adev->powerplay.pp_funcs->force_clock_level)  		amdgpu_dpm_force_clock_level(adev, PP_PCIE, mask); -fail:  	return count;  } @@ -905,6 +903,36 @@ fail:  	return -EINVAL;  } +/** + * DOC: busy_percent + * + * The amdgpu driver provides a sysfs API for reading how busy the GPU + * is as a percentage.  The file gpu_busy_percent is used for this. + * The SMU firmware computes a percentage of load based on the + * aggregate activity level in the IP cores. + */ +static ssize_t amdgpu_get_busy_percent(struct device *dev, +		struct device_attribute *attr, +		char *buf) +{ +	struct drm_device *ddev = dev_get_drvdata(dev); +	struct amdgpu_device *adev = ddev->dev_private; +	int r, value, size = sizeof(value); + +	/* sanity check PP is enabled */ +	if (!(adev->powerplay.pp_funcs && +	      adev->powerplay.pp_funcs->read_sensor)) +		return -EINVAL; + +	/* read the IP busy sensor */ +	r = amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_GPU_LOAD, +				   (void *)&value, &size); +	if (r) +		return r; + +	return snprintf(buf, PAGE_SIZE, "%d\n", value); +} +  static DEVICE_ATTR(power_dpm_state, S_IRUGO | S_IWUSR, amdgpu_get_dpm_state, amdgpu_set_dpm_state);  static DEVICE_ATTR(power_dpm_force_performance_level, S_IRUGO | S_IWUSR,  		   amdgpu_get_dpm_forced_performance_level, @@ -938,6 +966,8 @@ static DEVICE_ATTR(pp_power_profile_mode, S_IRUGO | S_IWUSR,  static DEVICE_ATTR(pp_od_clk_voltage, S_IRUGO | S_IWUSR,  		amdgpu_get_pp_od_clk_voltage,  		amdgpu_set_pp_od_clk_voltage); +static DEVICE_ATTR(gpu_busy_percent, S_IRUGO, +		amdgpu_get_busy_percent, NULL);  static ssize_t amdgpu_hwmon_show_temp(struct device *dev,  				      struct device_attribute *attr, @@ -1156,7 +1186,7 @@ static ssize_t amdgpu_hwmon_show_vddnb(struct device *dev,  	int r, size = sizeof(vddnb);  	/* only APUs have vddnb */ -	if  (adev->flags & AMD_IS_APU) +	if  (!(adev->flags & AMD_IS_APU))  		return -EINVAL;  	/* Can't get voltage when the card is off */ @@ -1285,35 +1315,51 @@ static ssize_t amdgpu_hwmon_set_power_cap(struct device *dev,   * DOC: hwmon   *   * The amdgpu driver exposes the following sensor interfaces: + *   * - GPU temperature (via the on-die sensor) + *   * - GPU voltage + *   * - Northbridge voltage (APUs only) + *   * - GPU power + *   * - GPU fan   *   * hwmon interfaces for GPU temperature: + *   * - temp1_input: the on die GPU temperature in millidegrees Celsius + *   * - temp1_crit: temperature critical max value in millidegrees Celsius + *   * - temp1_crit_hyst: temperature hysteresis for critical limit in millidegrees Celsius   *   * hwmon interfaces for GPU voltage: + *   * - in0_input: the voltage on the GPU in millivolts + *   * - in1_input: the voltage on the Northbridge in millivolts   *   * hwmon interfaces for GPU power: + *   * - power1_average: average power used by the GPU in microWatts + *   * - power1_cap_min: minimum cap supported in microWatts + *   * - power1_cap_max: maximum cap supported in microWatts + *   * - power1_cap: selected power cap in microWatts   *   * hwmon interfaces for GPU fan: + *   * - pwm1: pulse width modulation fan level (0-255) - * - pwm1_enable: pulse width modulation fan control method - *                0: no fan speed control - *                1: manual fan speed control using pwm interface - *                2: automatic fan speed control + * + * - pwm1_enable: pulse width modulation fan control method (0: no fan speed control, 1: manual fan speed control using pwm interface, 2: automatic fan speed control) + *   * - pwm1_min: pulse width modulation fan control minimum level (0) + *   * - pwm1_max: pulse width modulation fan control maximum level (255) + *   * - fan1_input: fan speed in RPM   *   * You can use hwmon tools like sensors to view this information on your system. @@ -1668,10 +1714,10 @@ static void amdgpu_dpm_change_power_state_locked(struct amdgpu_device *adev)  void amdgpu_dpm_enable_uvd(struct amdgpu_device *adev, bool enable)  { -	if (adev->powerplay.pp_funcs->powergate_uvd) { +	if (adev->powerplay.pp_funcs->set_powergating_by_smu) {  		/* enable/disable UVD */  		mutex_lock(&adev->pm.mutex); -		amdgpu_dpm_powergate_uvd(adev, !enable); +		amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_UVD, !enable);  		mutex_unlock(&adev->pm.mutex);  	} else {  		if (enable) { @@ -1690,10 +1736,10 @@ void amdgpu_dpm_enable_uvd(struct amdgpu_device *adev, bool enable)  void amdgpu_dpm_enable_vce(struct amdgpu_device *adev, bool enable)  { -	if (adev->powerplay.pp_funcs->powergate_vce) { +	if (adev->powerplay.pp_funcs->set_powergating_by_smu) {  		/* enable/disable VCE */  		mutex_lock(&adev->pm.mutex); -		amdgpu_dpm_powergate_vce(adev, !enable); +		amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_VCE, !enable);  		mutex_unlock(&adev->pm.mutex);  	} else {  		if (enable) { @@ -1825,6 +1871,13 @@ int amdgpu_pm_sysfs_init(struct amdgpu_device *adev)  				"pp_od_clk_voltage\n");  		return ret;  	} +	ret = device_create_file(adev->dev, +			&dev_attr_gpu_busy_percent); +	if (ret) { +		DRM_ERROR("failed to create device file	" +				"gpu_busy_level\n"); +		return ret; +	}  	ret = amdgpu_debugfs_pm_init(adev);  	if (ret) {  		DRM_ERROR("Failed to register debugfs file for dpm!\n"); @@ -1860,6 +1913,7 @@ void amdgpu_pm_sysfs_fini(struct amdgpu_device *adev)  			&dev_attr_pp_power_profile_mode);  	device_remove_file(adev->dev,  			&dev_attr_pp_od_clk_voltage); +	device_remove_file(adev->dev, &dev_attr_gpu_busy_percent);  }  void amdgpu_pm_compute_clocks(struct amdgpu_device *adev) @@ -1878,6 +1932,14 @@ void amdgpu_pm_compute_clocks(struct amdgpu_device *adev)  			amdgpu_fence_wait_empty(ring);  	} +	mutex_lock(&adev->pm.mutex); +	/* update battery/ac status */ +	if (power_supply_is_system_supplied() > 0) +		adev->pm.ac_power = true; +	else +		adev->pm.ac_power = false; +	mutex_unlock(&adev->pm.mutex); +  	if (adev->powerplay.pp_funcs->dispatch_tasks) {  		if (!amdgpu_device_has_dc_support(adev)) {  			mutex_lock(&adev->pm.mutex); @@ -1898,14 +1960,7 @@ void amdgpu_pm_compute_clocks(struct amdgpu_device *adev)  	} else {  		mutex_lock(&adev->pm.mutex);  		amdgpu_dpm_get_active_displays(adev); -		/* update battery/ac status */ -		if (power_supply_is_system_supplied() > 0) -			adev->pm.dpm.ac_power = true; -		else -			adev->pm.dpm.ac_power = false; -  		amdgpu_dpm_change_power_state_locked(adev); -  		mutex_unlock(&adev->pm.mutex);  	}  } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c index 4683626b065f..1c5d97f4b4dd 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c @@ -23,6 +23,14 @@   *   * Authors: Alex Deucher   */ + +/** + * DOC: PRIME Buffer Sharing + * + * The following callback implementations are used for :ref:`sharing GEM buffer + * objects between different devices via PRIME <prime_buffer_sharing>`. + */ +  #include <drm/drmP.h>  #include "amdgpu.h" @@ -32,6 +40,14 @@  static const struct dma_buf_ops amdgpu_dmabuf_ops; +/** + * amdgpu_gem_prime_get_sg_table - &drm_driver.gem_prime_get_sg_table + * implementation + * @obj: GEM buffer object + * + * Returns: + * A scatter/gather table for the pinned pages of the buffer object's memory. + */  struct sg_table *amdgpu_gem_prime_get_sg_table(struct drm_gem_object *obj)  {  	struct amdgpu_bo *bo = gem_to_amdgpu_bo(obj); @@ -40,6 +56,15 @@ struct sg_table *amdgpu_gem_prime_get_sg_table(struct drm_gem_object *obj)  	return drm_prime_pages_to_sg(bo->tbo.ttm->pages, npages);  } +/** + * amdgpu_gem_prime_vmap - &dma_buf_ops.vmap implementation + * @obj: GEM buffer object + * + * Sets up an in-kernel virtual mapping of the buffer object's memory. + * + * Returns: + * The virtual address of the mapping or an error pointer. + */  void *amdgpu_gem_prime_vmap(struct drm_gem_object *obj)  {  	struct amdgpu_bo *bo = gem_to_amdgpu_bo(obj); @@ -53,6 +78,13 @@ void *amdgpu_gem_prime_vmap(struct drm_gem_object *obj)  	return bo->dma_buf_vmap.virtual;  } +/** + * amdgpu_gem_prime_vunmap - &dma_buf_ops.vunmap implementation + * @obj: GEM buffer object + * @vaddr: virtual address (unused) + * + * Tears down the in-kernel virtual mapping of the buffer object's memory. + */  void amdgpu_gem_prime_vunmap(struct drm_gem_object *obj, void *vaddr)  {  	struct amdgpu_bo *bo = gem_to_amdgpu_bo(obj); @@ -60,6 +92,17 @@ void amdgpu_gem_prime_vunmap(struct drm_gem_object *obj, void *vaddr)  	ttm_bo_kunmap(&bo->dma_buf_vmap);  } +/** + * amdgpu_gem_prime_mmap - &drm_driver.gem_prime_mmap implementation + * @obj: GEM buffer object + * @vma: virtual memory area + * + * Sets up a userspace mapping of the buffer object's memory in the given + * virtual memory area. + * + * Returns: + * 0 on success or negative error code. + */  int amdgpu_gem_prime_mmap(struct drm_gem_object *obj, struct vm_area_struct *vma)  {  	struct amdgpu_bo *bo = gem_to_amdgpu_bo(obj); @@ -94,6 +137,19 @@ int amdgpu_gem_prime_mmap(struct drm_gem_object *obj, struct vm_area_struct *vma  	return ret;  } +/** + * amdgpu_gem_prime_import_sg_table - &drm_driver.gem_prime_import_sg_table + * implementation + * @dev: DRM device + * @attach: DMA-buf attachment + * @sg: Scatter/gather table + * + * Import shared DMA buffer memory exported by another device. + * + * Returns: + * A new GEM buffer object of the given DRM device, representing the memory + * described by the given DMA-buf attachment and scatter/gather table. + */  struct drm_gem_object *  amdgpu_gem_prime_import_sg_table(struct drm_device *dev,  				 struct dma_buf_attachment *attach, @@ -132,8 +188,19 @@ error:  	return ERR_PTR(ret);  } +/** + * amdgpu_gem_map_attach - &dma_buf_ops.attach implementation + * @dma_buf: shared DMA buffer + * @attach: DMA-buf attachment + * + * Makes sure that the shared DMA buffer can be accessed by the target device. + * For now, simply pins it to the GTT domain, where it should be accessible by + * all DMA devices. + * + * Returns: + * 0 on success or negative error code. + */  static int amdgpu_gem_map_attach(struct dma_buf *dma_buf, -				 struct device *target_dev,  				 struct dma_buf_attachment *attach)  {  	struct drm_gem_object *obj = dma_buf->priv; @@ -141,7 +208,7 @@ static int amdgpu_gem_map_attach(struct dma_buf *dma_buf,  	struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);  	long r; -	r = drm_gem_map_attach(dma_buf, target_dev, attach); +	r = drm_gem_map_attach(dma_buf, attach);  	if (r)  		return r; @@ -165,7 +232,7 @@ static int amdgpu_gem_map_attach(struct dma_buf *dma_buf,  	}  	/* pin buffer into GTT */ -	r = amdgpu_bo_pin(bo, AMDGPU_GEM_DOMAIN_GTT, NULL); +	r = amdgpu_bo_pin(bo, AMDGPU_GEM_DOMAIN_GTT);  	if (r)  		goto error_unreserve; @@ -181,6 +248,14 @@ error_detach:  	return r;  } +/** + * amdgpu_gem_map_detach - &dma_buf_ops.detach implementation + * @dma_buf: shared DMA buffer + * @attach: DMA-buf attachment + * + * This is called when a shared DMA buffer no longer needs to be accessible by + * the other device. For now, simply unpins the buffer from GTT. + */  static void amdgpu_gem_map_detach(struct dma_buf *dma_buf,  				  struct dma_buf_attachment *attach)  { @@ -202,6 +277,13 @@ error:  	drm_gem_map_detach(dma_buf, attach);  } +/** + * amdgpu_gem_prime_res_obj - &drm_driver.gem_prime_res_obj implementation + * @obj: GEM buffer object + * + * Returns: + * The buffer object's reservation object. + */  struct reservation_object *amdgpu_gem_prime_res_obj(struct drm_gem_object *obj)  {  	struct amdgpu_bo *bo = gem_to_amdgpu_bo(obj); @@ -209,6 +291,18 @@ struct reservation_object *amdgpu_gem_prime_res_obj(struct drm_gem_object *obj)  	return bo->tbo.resv;  } +/** + * amdgpu_gem_begin_cpu_access - &dma_buf_ops.begin_cpu_access implementation + * @dma_buf: shared DMA buffer + * @direction: direction of DMA transfer + * + * This is called before CPU access to the shared DMA buffer's memory. If it's + * a read access, the buffer is moved to the GTT domain if possible, for optimal + * CPU read performance. + * + * Returns: + * 0 on success or negative error code. + */  static int amdgpu_gem_begin_cpu_access(struct dma_buf *dma_buf,  				       enum dma_data_direction direction)  { @@ -229,7 +323,7 @@ static int amdgpu_gem_begin_cpu_access(struct dma_buf *dma_buf,  		return ret;  	if (!bo->pin_count && (bo->allowed_domains & AMDGPU_GEM_DOMAIN_GTT)) { -		amdgpu_ttm_placement_from_domain(bo, AMDGPU_GEM_DOMAIN_GTT); +		amdgpu_bo_placement_from_domain(bo, AMDGPU_GEM_DOMAIN_GTT);  		ret = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);  	} @@ -245,14 +339,24 @@ static const struct dma_buf_ops amdgpu_dmabuf_ops = {  	.release = drm_gem_dmabuf_release,  	.begin_cpu_access = amdgpu_gem_begin_cpu_access,  	.map = drm_gem_dmabuf_kmap, -	.map_atomic = drm_gem_dmabuf_kmap_atomic,  	.unmap = drm_gem_dmabuf_kunmap, -	.unmap_atomic = drm_gem_dmabuf_kunmap_atomic,  	.mmap = drm_gem_dmabuf_mmap,  	.vmap = drm_gem_dmabuf_vmap,  	.vunmap = drm_gem_dmabuf_vunmap,  }; +/** + * amdgpu_gem_prime_export - &drm_driver.gem_prime_export implementation + * @dev: DRM device + * @gobj: GEM buffer object + * @flags: flags like DRM_CLOEXEC and DRM_RDWR + * + * The main work is done by the &drm_gem_prime_export helper, which in turn + * uses &amdgpu_gem_prime_res_obj. + * + * Returns: + * Shared DMA buffer representing the GEM buffer object from the given device. + */  struct dma_buf *amdgpu_gem_prime_export(struct drm_device *dev,  					struct drm_gem_object *gobj,  					int flags) @@ -273,6 +377,17 @@ struct dma_buf *amdgpu_gem_prime_export(struct drm_device *dev,  	return buf;  } +/** + * amdgpu_gem_prime_import - &drm_driver.gem_prime_import implementation + * @dev: DRM device + * @dma_buf: Shared DMA buffer + * + * The main work is done by the &drm_gem_prime_import helper, which in turn + * uses &amdgpu_gem_prime_import_sg_table. + * + * Returns: + * GEM buffer object representing the shared DMA buffer for the given device. + */  struct drm_gem_object *amdgpu_gem_prime_import(struct drm_device *dev,  					    struct dma_buf *dma_buf)  { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c index 9f1a5bd39ae8..5b39d1399630 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c @@ -131,6 +131,11 @@ psp_cmd_submit_buf(struct psp_context *psp,  		msleep(1);  	} +	if (ucode) { +		ucode->tmr_mc_addr_lo = psp->cmd_buf_mem->resp.fw_addr_lo; +		ucode->tmr_mc_addr_hi = psp->cmd_buf_mem->resp.fw_addr_hi; +	} +  	return ret;  } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_queue_mgr.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_queue_mgr.c index 8af16e81c7d4..a172bba32b45 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_queue_mgr.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_queue_mgr.c @@ -66,8 +66,6 @@ static int amdgpu_identity_map(struct amdgpu_device *adev,  			       u32 ring,  			       struct amdgpu_ring **out_ring)  { -	u32 instance; -  	switch (mapper->hw_ip) {  	case AMDGPU_HW_IP_GFX:  		*out_ring = &adev->gfx.gfx_ring[ring]; @@ -79,16 +77,13 @@ static int amdgpu_identity_map(struct amdgpu_device *adev,  		*out_ring = &adev->sdma.instance[ring].ring;  		break;  	case AMDGPU_HW_IP_UVD: -		instance = ring; -		*out_ring = &adev->uvd.inst[instance].ring; +		*out_ring = &adev->uvd.inst[0].ring;  		break;  	case AMDGPU_HW_IP_VCE:  		*out_ring = &adev->vce.ring[ring];  		break;  	case AMDGPU_HW_IP_UVD_ENC: -		instance = ring / adev->uvd.num_enc_rings; -		*out_ring = -		&adev->uvd.inst[instance].ring_enc[ring%adev->uvd.num_enc_rings]; +		*out_ring = &adev->uvd.inst[0].ring_enc[ring];  		break;  	case AMDGPU_HW_IP_VCN_DEC:  		*out_ring = &adev->vcn.ring_dec; @@ -96,6 +91,9 @@ static int amdgpu_identity_map(struct amdgpu_device *adev,  	case AMDGPU_HW_IP_VCN_ENC:  		*out_ring = &adev->vcn.ring_enc[ring];  		break; +	case AMDGPU_HW_IP_VCN_JPEG: +		*out_ring = &adev->vcn.ring_jpeg; +		break;  	default:  		*out_ring = NULL;  		DRM_ERROR("unknown HW IP type: %d\n", mapper->hw_ip); @@ -216,7 +214,7 @@ int amdgpu_queue_mgr_map(struct amdgpu_device *adev,  			 u32 hw_ip, u32 instance, u32 ring,  			 struct amdgpu_ring **out_ring)  { -	int r, ip_num_rings; +	int i, r, ip_num_rings = 0;  	struct amdgpu_queue_mapper *mapper = &mgr->mapper[hw_ip];  	if (!adev || !mgr || !out_ring) @@ -245,14 +243,21 @@ int amdgpu_queue_mgr_map(struct amdgpu_device *adev,  		ip_num_rings = adev->sdma.num_instances;  		break;  	case AMDGPU_HW_IP_UVD: -		ip_num_rings = adev->uvd.num_uvd_inst; +		for (i = 0; i < adev->uvd.num_uvd_inst; i++) { +			if (!(adev->uvd.harvest_config & (1 << i))) +				ip_num_rings++; +		}  		break;  	case AMDGPU_HW_IP_VCE:  		ip_num_rings = adev->vce.num_rings;  		break;  	case AMDGPU_HW_IP_UVD_ENC: +		for (i = 0; i < adev->uvd.num_uvd_inst; i++) { +			if (!(adev->uvd.harvest_config & (1 << i))) +				ip_num_rings++; +		}  		ip_num_rings = -			adev->uvd.num_enc_rings * adev->uvd.num_uvd_inst; +			adev->uvd.num_enc_rings * ip_num_rings;  		break;  	case AMDGPU_HW_IP_VCN_DEC:  		ip_num_rings = 1; @@ -260,6 +265,9 @@ int amdgpu_queue_mgr_map(struct amdgpu_device *adev,  	case AMDGPU_HW_IP_VCN_ENC:  		ip_num_rings = adev->vcn.num_enc_rings;  		break; +	case AMDGPU_HW_IP_VCN_JPEG: +		ip_num_rings = 1; +		break;  	default:  		DRM_DEBUG("unknown ip type: %d\n", hw_ip);  		return -EINVAL; @@ -287,6 +295,7 @@ int amdgpu_queue_mgr_map(struct amdgpu_device *adev,  	case AMDGPU_HW_IP_UVD_ENC:  	case AMDGPU_HW_IP_VCN_DEC:  	case AMDGPU_HW_IP_VCN_ENC: +	case AMDGPU_HW_IP_VCN_JPEG:  		r = amdgpu_identity_map(adev, mapper, ring, out_ring);  		break;  	case AMDGPU_HW_IP_DMA: diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c index c6850b629d0e..93794a85f83d 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c @@ -211,7 +211,8 @@ void amdgpu_ring_priority_get(struct amdgpu_ring *ring,  	if (!ring->funcs->set_priority)  		return; -	atomic_inc(&ring->num_jobs[priority]); +	if (atomic_inc_return(&ring->num_jobs[priority]) <= 0) +		return;  	mutex_lock(&ring->priority_mutex);  	if (priority <= ring->priority) @@ -304,7 +305,7 @@ int amdgpu_ring_init(struct amdgpu_device *adev, struct amdgpu_ring *ring,  		0xffffffffffffffff : ring->buf_mask;  	/* Allocate ring buffer */  	if (ring->ring_obj == NULL) { -		r = amdgpu_bo_create_kernel(adev, ring->ring_size, PAGE_SIZE, +		r = amdgpu_bo_create_kernel(adev, ring->ring_size + ring->funcs->extra_dw, PAGE_SIZE,  					    AMDGPU_GEM_DOMAIN_GTT,  					    &ring->ring_obj,  					    &ring->gpu_addr, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h index 1513124c5659..d242b9a51e90 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h @@ -44,6 +44,8 @@  #define AMDGPU_FENCE_FLAG_INT           (1 << 1)  #define AMDGPU_FENCE_FLAG_TC_WB_ONLY    (1 << 2) +#define to_amdgpu_ring(s) container_of((s), struct amdgpu_ring, sched) +  enum amdgpu_ring_type {  	AMDGPU_RING_TYPE_GFX,  	AMDGPU_RING_TYPE_COMPUTE, @@ -53,7 +55,8 @@ enum amdgpu_ring_type {  	AMDGPU_RING_TYPE_KIQ,  	AMDGPU_RING_TYPE_UVD_ENC,  	AMDGPU_RING_TYPE_VCN_DEC, -	AMDGPU_RING_TYPE_VCN_ENC +	AMDGPU_RING_TYPE_VCN_ENC, +	AMDGPU_RING_TYPE_VCN_JPEG  };  struct amdgpu_device; @@ -112,6 +115,7 @@ struct amdgpu_ring_funcs {  	u32			nop;  	bool			support_64bit_ptrs;  	unsigned		vmhub; +	unsigned		extra_dw;  	/* ring read/write ptr handling */  	u64 (*get_rptr)(struct amdgpu_ring *ring); @@ -119,6 +123,7 @@ struct amdgpu_ring_funcs {  	void (*set_wptr)(struct amdgpu_ring *ring);  	/* validating and patching of IBs */  	int (*parse_cs)(struct amdgpu_cs_parser *p, uint32_t ib_idx); +	int (*patch_cs_in_place)(struct amdgpu_cs_parser *p, uint32_t ib_idx);  	/* constants to calculate how many DW are needed for an emit */  	unsigned emit_frame_size;  	unsigned emit_ib_size; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_test.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_test.c index e3878256743a..8904e62dca7a 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_test.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_test.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 OR MIT  /*   * Copyright 2009 VMware, Inc.   * @@ -75,11 +76,12 @@ static void amdgpu_do_test_moves(struct amdgpu_device *adev)  	r = amdgpu_bo_reserve(vram_obj, false);  	if (unlikely(r != 0))  		goto out_unref; -	r = amdgpu_bo_pin(vram_obj, AMDGPU_GEM_DOMAIN_VRAM, &vram_addr); +	r = amdgpu_bo_pin(vram_obj, AMDGPU_GEM_DOMAIN_VRAM);  	if (r) {  		DRM_ERROR("Failed to pin VRAM object\n");  		goto out_unres;  	} +	vram_addr = amdgpu_bo_gpu_offset(vram_obj);  	for (i = 0; i < n; i++) {  		void *gtt_map, *vram_map;  		void **gart_start, **gart_end; @@ -96,11 +98,17 @@ static void amdgpu_do_test_moves(struct amdgpu_device *adev)  		r = amdgpu_bo_reserve(gtt_obj[i], false);  		if (unlikely(r != 0))  			goto out_lclean_unref; -		r = amdgpu_bo_pin(gtt_obj[i], AMDGPU_GEM_DOMAIN_GTT, &gart_addr); +		r = amdgpu_bo_pin(gtt_obj[i], AMDGPU_GEM_DOMAIN_GTT);  		if (r) {  			DRM_ERROR("Failed to pin GTT object %d\n", i);  			goto out_lclean_unres;  		} +		r = amdgpu_ttm_alloc_gart(>t_obj[i]->tbo); +		if (r) { +			DRM_ERROR("%p bind failed\n", gtt_obj[i]); +			goto out_lclean_unpin; +		} +		gart_addr = amdgpu_bo_gpu_offset(gtt_obj[i]);  		r = amdgpu_bo_kmap(gtt_obj[i], >t_map);  		if (r) { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h index e96e26d3f3b0..7206a0025b17 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h @@ -150,10 +150,10 @@ TRACE_EVENT(amdgpu_cs,  	    TP_fast_assign(  			   __entry->bo_list = p->bo_list; -			   __entry->ring = p->job->ring->idx; +			   __entry->ring = p->ring->idx;  			   __entry->dw = p->job->ibs[i].length_dw;  			   __entry->fences = amdgpu_fence_count_emitted( -				p->job->ring); +				p->ring);  			   ),  	    TP_printk("bo_list=%p, ring=%u, dw=%u, fences=%u",  		      __entry->bo_list, __entry->ring, __entry->dw, @@ -178,7 +178,7 @@ TRACE_EVENT(amdgpu_cs_ioctl,  			   __assign_str(timeline, AMDGPU_JOB_GET_TIMELINE_NAME(job))  			   __entry->context = job->base.s_fence->finished.context;  			   __entry->seqno = job->base.s_fence->finished.seqno; -			   __entry->ring_name = job->ring->name; +			   __entry->ring_name = to_amdgpu_ring(job->base.sched)->name;  			   __entry->num_ibs = job->num_ibs;  			   ),  	    TP_printk("sched_job=%llu, timeline=%s, context=%u, seqno=%u, ring_name=%s, num_ibs=%u", @@ -203,7 +203,7 @@ TRACE_EVENT(amdgpu_sched_run_job,  			   __assign_str(timeline, AMDGPU_JOB_GET_TIMELINE_NAME(job))  			   __entry->context = job->base.s_fence->finished.context;  			   __entry->seqno = job->base.s_fence->finished.seqno; -			   __entry->ring_name = job->ring->name; +			   __entry->ring_name = to_amdgpu_ring(job->base.sched)->name;  			   __entry->num_ibs = job->num_ibs;  			   ),  	    TP_printk("sched_job=%llu, timeline=%s, context=%u, seqno=%u, ring_name=%s, num_ibs=%u", @@ -314,6 +314,11 @@ DEFINE_EVENT(amdgpu_vm_mapping, amdgpu_vm_bo_mapping,  	    TP_ARGS(mapping)  ); +DEFINE_EVENT(amdgpu_vm_mapping, amdgpu_vm_bo_cs, +	    TP_PROTO(struct amdgpu_bo_va_mapping *mapping), +	    TP_ARGS(mapping) +); +  TRACE_EVENT(amdgpu_vm_set_ptes,  	    TP_PROTO(uint64_t pe, uint64_t addr, unsigned count,  		     uint32_t incr, uint64_t flags), @@ -436,7 +441,7 @@ TRACE_EVENT(amdgpu_cs_bo_status,  			__entry->total_bo, __entry->total_size)  ); -TRACE_EVENT(amdgpu_ttm_bo_move, +TRACE_EVENT(amdgpu_bo_move,  	    TP_PROTO(struct amdgpu_bo* bo, uint32_t new_placement, uint32_t old_placement),  	    TP_ARGS(bo, new_placement, old_placement),  	    TP_STRUCT__entry( diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c index e93a0a237dc3..fcf421263fd9 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c @@ -92,11 +92,9 @@ static void amdgpu_ttm_mem_global_release(struct drm_global_reference *ref)  }  /** - * amdgpu_ttm_global_init - Initialize global TTM memory reference - * 							structures. + * amdgpu_ttm_global_init - Initialize global TTM memory reference structures.   * - * @adev:  	AMDGPU device for which the global structures need to be - *			registered. + * @adev: AMDGPU device for which the global structures need to be registered.   *   * This is called as part of the AMDGPU ttm init from amdgpu_ttm_init()   * during bring up. @@ -104,8 +102,6 @@ static void amdgpu_ttm_mem_global_release(struct drm_global_reference *ref)  static int amdgpu_ttm_global_init(struct amdgpu_device *adev)  {  	struct drm_global_reference *global_ref; -	struct amdgpu_ring *ring; -	struct drm_sched_rq *rq;  	int r;  	/* ensure reference is false in case init fails */ @@ -138,21 +134,10 @@ static int amdgpu_ttm_global_init(struct amdgpu_device *adev)  	mutex_init(&adev->mman.gtt_window_lock); -	ring = adev->mman.buffer_funcs_ring; -	rq = &ring->sched.sched_rq[DRM_SCHED_PRIORITY_KERNEL]; -	r = drm_sched_entity_init(&ring->sched, &adev->mman.entity, -				  rq, NULL); -	if (r) { -		DRM_ERROR("Failed setting up TTM BO move run queue.\n"); -		goto error_entity; -	} -  	adev->mman.mem_global_referenced = true;  	return 0; -error_entity: -	drm_global_item_unref(&adev->mman.bo_global_ref.ref);  error_bo:  	drm_global_item_unref(&adev->mman.mem_global_ref);  error_mem: @@ -162,8 +147,6 @@ error_mem:  static void amdgpu_ttm_global_fini(struct amdgpu_device *adev)  {  	if (adev->mman.mem_global_referenced) { -		drm_sched_entity_fini(adev->mman.entity.sched, -				      &adev->mman.entity);  		mutex_destroy(&adev->mman.gtt_window_lock);  		drm_global_item_unref(&adev->mman.bo_global_ref.ref);  		drm_global_item_unref(&adev->mman.mem_global_ref); @@ -177,13 +160,12 @@ static int amdgpu_invalidate_caches(struct ttm_bo_device *bdev, uint32_t flags)  }  /** - * amdgpu_init_mem_type - 	Initialize a memory manager for a specific - * 							type of memory request. + * amdgpu_init_mem_type - Initialize a memory manager for a specific type of + * memory request.   * - * @bdev:	The TTM BO device object (contains a reference to - * 			amdgpu_device) - * @type:	The type of memory requested - * @man: + * @bdev: The TTM BO device object (contains a reference to amdgpu_device) + * @type: The type of memory requested + * @man: The memory type manager for each domain   *   * This is called by ttm_bo_init_mm() when a buffer object is being   * initialized. @@ -263,7 +245,7 @@ static void amdgpu_evict_flags(struct ttm_buffer_object *bo,  	}  	/* Object isn't an AMDGPU object so ignore */ -	if (!amdgpu_ttm_bo_is_amdgpu_bo(bo)) { +	if (!amdgpu_bo_is_amdgpu_bo(bo)) {  		placement->placement = &placements;  		placement->busy_placement = &placements;  		placement->num_placement = 1; @@ -276,8 +258,8 @@ static void amdgpu_evict_flags(struct ttm_buffer_object *bo,  	case TTM_PL_VRAM:  		if (!adev->mman.buffer_funcs_enabled) {  			/* Move to system memory */ -			amdgpu_ttm_placement_from_domain(abo, AMDGPU_GEM_DOMAIN_CPU); -		} else if (adev->gmc.visible_vram_size < adev->gmc.real_vram_size && +			amdgpu_bo_placement_from_domain(abo, AMDGPU_GEM_DOMAIN_CPU); +		} else if (!amdgpu_gmc_vram_full_visible(&adev->gmc) &&  			   !(abo->flags & AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED) &&  			   amdgpu_bo_in_cpu_visible_vram(abo)) { @@ -286,7 +268,7 @@ static void amdgpu_evict_flags(struct ttm_buffer_object *bo,  			 * BO will be evicted to GTT rather than causing other  			 * BOs to be evicted from VRAM  			 */ -			amdgpu_ttm_placement_from_domain(abo, AMDGPU_GEM_DOMAIN_VRAM | +			amdgpu_bo_placement_from_domain(abo, AMDGPU_GEM_DOMAIN_VRAM |  							 AMDGPU_GEM_DOMAIN_GTT);  			abo->placements[0].fpfn = adev->gmc.visible_vram_size >> PAGE_SHIFT;  			abo->placements[0].lpfn = 0; @@ -294,12 +276,12 @@ static void amdgpu_evict_flags(struct ttm_buffer_object *bo,  			abo->placement.num_busy_placement = 1;  		} else {  			/* Move to GTT memory */ -			amdgpu_ttm_placement_from_domain(abo, AMDGPU_GEM_DOMAIN_GTT); +			amdgpu_bo_placement_from_domain(abo, AMDGPU_GEM_DOMAIN_GTT);  		}  		break;  	case TTM_PL_TT:  	default: -		amdgpu_ttm_placement_from_domain(abo, AMDGPU_GEM_DOMAIN_CPU); +		amdgpu_bo_placement_from_domain(abo, AMDGPU_GEM_DOMAIN_CPU);  	}  	*placement = abo->placement;  } @@ -307,8 +289,8 @@ static void amdgpu_evict_flags(struct ttm_buffer_object *bo,  /**   * amdgpu_verify_access - Verify access for a mmap call   * - * @bo:		The buffer object to map - * @filp:	The file pointer from the process performing the mmap + * @bo:	The buffer object to map + * @filp: The file pointer from the process performing the mmap   *   * This is called by ttm_bo_mmap() to verify whether a process   * has the right to mmap a BO to their process space. @@ -333,11 +315,10 @@ static int amdgpu_verify_access(struct ttm_buffer_object *bo, struct file *filp)  /**   * amdgpu_move_null - Register memory for a buffer object   * - * @bo:			The bo to assign the memory to - * @new_mem:	The memory to be assigned. + * @bo: The bo to assign the memory to + * @new_mem: The memory to be assigned.   * - * Assign the memory from new_mem to the memory of the buffer object - * bo. + * Assign the memory from new_mem to the memory of the buffer object bo.   */  static void amdgpu_move_null(struct ttm_buffer_object *bo,  			     struct ttm_mem_reg *new_mem) @@ -350,8 +331,12 @@ static void amdgpu_move_null(struct ttm_buffer_object *bo,  }  /** - * amdgpu_mm_node_addr -	Compute the GPU relative offset of a GTT - * 							buffer. + * amdgpu_mm_node_addr - Compute the GPU relative offset of a GTT buffer. + * + * @bo: The bo to assign the memory to. + * @mm_node: Memory manager node for drm allocator. + * @mem: The region where the bo resides. + *   */  static uint64_t amdgpu_mm_node_addr(struct ttm_buffer_object *bo,  				    struct drm_mm_node *mm_node, @@ -367,10 +352,12 @@ static uint64_t amdgpu_mm_node_addr(struct ttm_buffer_object *bo,  }  /** - * amdgpu_find_mm_node -	Helper function finds the drm_mm_node - *  						corresponding to @offset. It also modifies - * 							the offset to be within the drm_mm_node - * 							returned + * amdgpu_find_mm_node - Helper function finds the drm_mm_node corresponding to + * @offset. It also modifies the offset to be within the drm_mm_node returned + * + * @mem: The region where the bo resides. + * @offset: The offset that drm_mm_node is used for finding. + *   */  static struct drm_mm_node *amdgpu_find_mm_node(struct ttm_mem_reg *mem,  					       unsigned long *offset) @@ -512,8 +499,8 @@ error:  /**   * amdgpu_move_blit - Copy an entire buffer to another buffer   * - * This is a helper called by amdgpu_bo_move() and - * amdgpu_move_vram_ram() to help move buffers to and from VRAM. + * This is a helper called by amdgpu_bo_move() and amdgpu_move_vram_ram() to + * help move buffers to and from VRAM.   */  static int amdgpu_move_blit(struct ttm_buffer_object *bo,  			    bool evict, bool no_wait_gpu, @@ -595,7 +582,7 @@ static int amdgpu_move_vram_ram(struct ttm_buffer_object *bo, bool evict,  	}  	/* blit VRAM to GTT */ -	r = amdgpu_move_blit(bo, true, ctx->no_wait_gpu, &tmp_mem, old_mem); +	r = amdgpu_move_blit(bo, evict, ctx->no_wait_gpu, &tmp_mem, old_mem);  	if (unlikely(r)) {  		goto out_cleanup;  	} @@ -647,7 +634,7 @@ static int amdgpu_move_ram_vram(struct ttm_buffer_object *bo, bool evict,  	}  	/* copy to VRAM */ -	r = amdgpu_move_blit(bo, true, ctx->no_wait_gpu, new_mem, old_mem); +	r = amdgpu_move_blit(bo, evict, ctx->no_wait_gpu, new_mem, old_mem);  	if (unlikely(r)) {  		goto out_cleanup;  	} @@ -809,8 +796,8 @@ struct amdgpu_ttm_tt {  };  /** - * amdgpu_ttm_tt_get_user_pages - 	Pin pages of memory pointed to - * 									by a USERPTR pointer to memory + * amdgpu_ttm_tt_get_user_pages - Pin pages of memory pointed to by a USERPTR + * pointer to memory   *   * Called by amdgpu_gem_userptr_ioctl() and amdgpu_cs_parser_bos().   * This provides a wrapper around the get_user_pages() call to provide @@ -833,8 +820,10 @@ int amdgpu_ttm_tt_get_user_pages(struct ttm_tt *ttm, struct page **pages)  	down_read(&mm->mmap_sem);  	if (gtt->userflags & AMDGPU_GEM_USERPTR_ANONONLY) { -		/* check that we only use anonymous memory -		   to prevent problems with writeback */ +		/* +		 * check that we only use anonymous memory to prevent problems +		 * with writeback +		 */  		unsigned long end = gtt->userptr + ttm->num_pages * PAGE_SIZE;  		struct vm_area_struct *vma; @@ -885,10 +874,9 @@ release_pages:  }  /** - * amdgpu_ttm_tt_set_user_pages - 	Copy pages in, putting old pages - * 									as necessary. + * amdgpu_ttm_tt_set_user_pages - Copy pages in, putting old pages as necessary.   * - * Called by amdgpu_cs_list_validate().  This creates the page list + * Called by amdgpu_cs_list_validate(). This creates the page list   * that backs user memory and will ultimately be mapped into the device   * address space.   */ @@ -930,8 +918,7 @@ void amdgpu_ttm_tt_mark_user_pages(struct ttm_tt *ttm)  }  /** - * amdgpu_ttm_tt_pin_userptr - 	prepare the sg table with the - * 								user pages + * amdgpu_ttm_tt_pin_userptr - 	prepare the sg table with the user pages   *   * Called by amdgpu_ttm_backend_bind()   **/ @@ -1310,8 +1297,8 @@ static void amdgpu_ttm_tt_unpopulate(struct ttm_tt *ttm)  }  /** - * amdgpu_ttm_tt_set_userptr -	Initialize userptr GTT ttm_tt - * 								for the current task + * amdgpu_ttm_tt_set_userptr - Initialize userptr GTT ttm_tt for the current + * task   *   * @ttm: The ttm_tt object to bind this userptr object to   * @addr:  The address in the current tasks VM space to use @@ -1361,9 +1348,8 @@ struct mm_struct *amdgpu_ttm_tt_get_usermm(struct ttm_tt *ttm)  }  /** - * amdgpu_ttm_tt_affect_userptr -	Determine if a ttm_tt object lays - * 									inside an address range for the - * 									current task. + * amdgpu_ttm_tt_affect_userptr - Determine if a ttm_tt object lays inside an + * address range for the current task.   *   */  bool amdgpu_ttm_tt_affect_userptr(struct ttm_tt *ttm, unsigned long start, @@ -1401,8 +1387,7 @@ bool amdgpu_ttm_tt_affect_userptr(struct ttm_tt *ttm, unsigned long start,  }  /** - * amdgpu_ttm_tt_userptr_invalidated -	Has the ttm_tt object been - * 										invalidated? + * amdgpu_ttm_tt_userptr_invalidated - Has the ttm_tt object been invalidated?   */  bool amdgpu_ttm_tt_userptr_invalidated(struct ttm_tt *ttm,  				       int *last_invalidated) @@ -1415,10 +1400,8 @@ bool amdgpu_ttm_tt_userptr_invalidated(struct ttm_tt *ttm,  }  /** - * amdgpu_ttm_tt_userptr_needs_pages -	Have the pages backing this - * 										ttm_tt object been invalidated - * 										since the last time they've - * 										been set? + * amdgpu_ttm_tt_userptr_needs_pages - Have the pages backing this ttm_tt object + * been invalidated since the last time they've been set?   */  bool amdgpu_ttm_tt_userptr_needs_pages(struct ttm_tt *ttm)  { @@ -1474,13 +1457,12 @@ uint64_t amdgpu_ttm_tt_pte_flags(struct amdgpu_device *adev, struct ttm_tt *ttm,  }  /** - * amdgpu_ttm_bo_eviction_valuable -	Check to see if we can evict - * 										a buffer object. + * amdgpu_ttm_bo_eviction_valuable - Check to see if we can evict a buffer + * object.   * - * Return true if eviction is sensible.  Called by - * ttm_mem_evict_first() on behalf of ttm_bo_mem_force_space() - * which tries to evict buffer objects until it can find space - * for a new object and by ttm_bo_force_list_clean() which is + * Return true if eviction is sensible. Called by ttm_mem_evict_first() on + * behalf of ttm_bo_mem_force_space() which tries to evict buffer objects until + * it can find space for a new object and by ttm_bo_force_list_clean() which is   * used to clean out a memory space.   */  static bool amdgpu_ttm_bo_eviction_valuable(struct ttm_buffer_object *bo, @@ -1530,8 +1512,7 @@ static bool amdgpu_ttm_bo_eviction_valuable(struct ttm_buffer_object *bo,  }  /** - * amdgpu_ttm_access_memory -	Read or Write memory that backs a - * 								buffer object. + * amdgpu_ttm_access_memory - Read or Write memory that backs a buffer object.   *   * @bo:  The buffer object to read/write   * @offset:  Offset into buffer object @@ -1695,7 +1676,7 @@ static int amdgpu_ttm_fw_reserve_vram_init(struct amdgpu_device *adev)  			AMDGPU_GEM_DOMAIN_VRAM,  			adev->fw_vram_usage.start_offset,  			(adev->fw_vram_usage.start_offset + -			adev->fw_vram_usage.size), NULL); +			adev->fw_vram_usage.size));  		if (r)  			goto error_pin;  		r = amdgpu_bo_kmap(adev->fw_vram_usage.reserved_bo, @@ -1719,8 +1700,8 @@ error_create:  	return r;  }  /** - * amdgpu_ttm_init -	Init the memory management (ttm) as well as - * 						various gtt/vram related fields. + * amdgpu_ttm_init - Init the memory management (ttm) as well as various + * gtt/vram related fields.   *   * This initializes all of the memory space pools that the TTM layer   * will need such as the GTT space (system memory mapped to the device), @@ -1871,8 +1852,7 @@ int amdgpu_ttm_init(struct amdgpu_device *adev)  }  /** - * amdgpu_ttm_late_init -	Handle any late initialization for - * 							amdgpu_ttm + * amdgpu_ttm_late_init - Handle any late initialization for amdgpu_ttm   */  void amdgpu_ttm_late_init(struct amdgpu_device *adev)  { @@ -1921,10 +1901,30 @@ void amdgpu_ttm_set_buffer_funcs_status(struct amdgpu_device *adev, bool enable)  {  	struct ttm_mem_type_manager *man = &adev->mman.bdev.man[TTM_PL_VRAM];  	uint64_t size; +	int r; -	if (!adev->mman.initialized || adev->in_gpu_reset) +	if (!adev->mman.initialized || adev->in_gpu_reset || +	    adev->mman.buffer_funcs_enabled == enable)  		return; +	if (enable) { +		struct amdgpu_ring *ring; +		struct drm_sched_rq *rq; + +		ring = adev->mman.buffer_funcs_ring; +		rq = &ring->sched.sched_rq[DRM_SCHED_PRIORITY_KERNEL]; +		r = drm_sched_entity_init(&adev->mman.entity, &rq, 1, NULL); +		if (r) { +			DRM_ERROR("Failed setting up TTM BO move entity (%d)\n", +				  r); +			return; +		} +	} else { +		drm_sched_entity_destroy(&adev->mman.entity); +		dma_fence_put(man->move); +		man->move = NULL; +	} +  	/* this just adjusts TTM size idea, which sets lpfn to the correct value */  	if (enable)  		size = adev->gmc.real_vram_size; @@ -2002,7 +2002,7 @@ static int amdgpu_map_buffer(struct ttm_buffer_object *bo,  	if (r)  		goto error_free; -	r = amdgpu_job_submit(job, ring, &adev->mman.entity, +	r = amdgpu_job_submit(job, &adev->mman.entity,  			      AMDGPU_FENCE_OWNER_UNDEFINED, &fence);  	if (r)  		goto error_free; @@ -2071,24 +2071,19 @@ int amdgpu_copy_buffer(struct amdgpu_ring *ring, uint64_t src_offset,  	amdgpu_ring_pad_ib(ring, &job->ibs[0]);  	WARN_ON(job->ibs[0].length_dw > num_dw); -	if (direct_submit) { -		r = amdgpu_ib_schedule(ring, job->num_ibs, job->ibs, -				       NULL, fence); -		job->fence = dma_fence_get(*fence); -		if (r) -			DRM_ERROR("Error scheduling IBs (%d)\n", r); -		amdgpu_job_free(job); -	} else { -		r = amdgpu_job_submit(job, ring, &adev->mman.entity, +	if (direct_submit) +		r = amdgpu_job_submit_direct(job, ring, fence); +	else +		r = amdgpu_job_submit(job, &adev->mman.entity,  				      AMDGPU_FENCE_OWNER_UNDEFINED, fence); -		if (r) -			goto error_free; -	} +	if (r) +		goto error_free;  	return r;  error_free:  	amdgpu_job_free(job); +	DRM_ERROR("Error scheduling IBs (%d)\n", r);  	return r;  } @@ -2171,7 +2166,7 @@ int amdgpu_fill_buffer(struct amdgpu_bo *bo,  	amdgpu_ring_pad_ib(ring, &job->ibs[0]);  	WARN_ON(job->ibs[0].length_dw > num_dw); -	r = amdgpu_job_submit(job, ring, &adev->mman.entity, +	r = amdgpu_job_submit(job, &adev->mman.entity,  			      AMDGPU_FENCE_OWNER_UNDEFINED, fence);  	if (r)  		goto error_free; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h index e5da4654b630..8b3cc6687769 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h @@ -73,7 +73,7 @@ bool amdgpu_gtt_mgr_has_gart_addr(struct ttm_mem_reg *mem);  uint64_t amdgpu_gtt_mgr_usage(struct ttm_mem_type_manager *man);  int amdgpu_gtt_mgr_recover(struct ttm_mem_type_manager *man); -u64 amdgpu_vram_mgr_bo_invisible_size(struct amdgpu_bo *bo); +u64 amdgpu_vram_mgr_bo_visible_size(struct amdgpu_bo *bo);  uint64_t amdgpu_vram_mgr_usage(struct ttm_mem_type_manager *man);  uint64_t amdgpu_vram_mgr_vis_usage(struct ttm_mem_type_manager *man); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h index 08e38579af24..bdc472b6e641 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h @@ -194,6 +194,7 @@ enum AMDGPU_UCODE_ID {  	AMDGPU_UCODE_ID_SMC,  	AMDGPU_UCODE_ID_UVD,  	AMDGPU_UCODE_ID_VCE, +	AMDGPU_UCODE_ID_VCN,  	AMDGPU_UCODE_ID_MAXIMUM,  }; @@ -226,6 +227,9 @@ struct amdgpu_firmware_info {  	void *kaddr;  	/* ucode_size_bytes */  	uint32_t ucode_size; +	/* starting tmr mc address */ +	uint32_t tmr_mc_addr_lo; +	uint32_t tmr_mc_addr_hi;  };  void amdgpu_ucode_print_mc_hdr(const struct common_firmware_header *hdr); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c index 3ff08e326838..e5a6db6beab7 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c @@ -53,11 +53,11 @@  /* Firmware Names */  #ifdef CONFIG_DRM_AMDGPU_CIK -#define FIRMWARE_BONAIRE	"radeon/bonaire_uvd.bin" -#define FIRMWARE_KABINI	"radeon/kabini_uvd.bin" -#define FIRMWARE_KAVERI	"radeon/kaveri_uvd.bin" -#define FIRMWARE_HAWAII	"radeon/hawaii_uvd.bin" -#define FIRMWARE_MULLINS	"radeon/mullins_uvd.bin" +#define FIRMWARE_BONAIRE	"amdgpu/bonaire_uvd.bin" +#define FIRMWARE_KABINI	"amdgpu/kabini_uvd.bin" +#define FIRMWARE_KAVERI	"amdgpu/kaveri_uvd.bin" +#define FIRMWARE_HAWAII	"amdgpu/hawaii_uvd.bin" +#define FIRMWARE_MULLINS	"amdgpu/mullins_uvd.bin"  #endif  #define FIRMWARE_TONGA		"amdgpu/tonga_uvd.bin"  #define FIRMWARE_CARRIZO	"amdgpu/carrizo_uvd.bin" @@ -122,12 +122,10 @@ static void amdgpu_uvd_idle_work_handler(struct work_struct *work);  int amdgpu_uvd_sw_init(struct amdgpu_device *adev)  { -	struct amdgpu_ring *ring; -	struct drm_sched_rq *rq;  	unsigned long bo_size;  	const char *fw_name;  	const struct common_firmware_header *hdr; -	unsigned version_major, version_minor, family_id; +	unsigned family_id;  	int i, j, r;  	INIT_DELAYED_WORK(&adev->uvd.idle_work, amdgpu_uvd_idle_work_handler); @@ -208,29 +206,46 @@ int amdgpu_uvd_sw_init(struct amdgpu_device *adev)  	hdr = (const struct common_firmware_header *)adev->uvd.fw->data;  	family_id = le32_to_cpu(hdr->ucode_version) & 0xff; -	version_major = (le32_to_cpu(hdr->ucode_version) >> 24) & 0xff; -	version_minor = (le32_to_cpu(hdr->ucode_version) >> 8) & 0xff; -	DRM_INFO("Found UVD firmware Version: %hu.%hu Family ID: %hu\n", -		version_major, version_minor, family_id); - -	/* -	 * Limit the number of UVD handles depending on microcode major -	 * and minor versions. The firmware version which has 40 UVD -	 * instances support is 1.80. So all subsequent versions should -	 * also have the same support. -	 */ -	if ((version_major > 0x01) || -	    ((version_major == 0x01) && (version_minor >= 0x50))) -		adev->uvd.max_handles = AMDGPU_MAX_UVD_HANDLES; -	adev->uvd.fw_version = ((version_major << 24) | (version_minor << 16) | -				(family_id << 8)); +	if (adev->asic_type < CHIP_VEGA20) { +		unsigned version_major, version_minor; -	if ((adev->asic_type == CHIP_POLARIS10 || -	     adev->asic_type == CHIP_POLARIS11) && -	    (adev->uvd.fw_version < FW_1_66_16)) -		DRM_ERROR("POLARIS10/11 UVD firmware version %hu.%hu is too old.\n", -			  version_major, version_minor); +		version_major = (le32_to_cpu(hdr->ucode_version) >> 24) & 0xff; +		version_minor = (le32_to_cpu(hdr->ucode_version) >> 8) & 0xff; +		DRM_INFO("Found UVD firmware Version: %hu.%hu Family ID: %hu\n", +			version_major, version_minor, family_id); + +		/* +		 * Limit the number of UVD handles depending on microcode major +		 * and minor versions. The firmware version which has 40 UVD +		 * instances support is 1.80. So all subsequent versions should +		 * also have the same support. +		 */ +		if ((version_major > 0x01) || +		    ((version_major == 0x01) && (version_minor >= 0x50))) +			adev->uvd.max_handles = AMDGPU_MAX_UVD_HANDLES; + +		adev->uvd.fw_version = ((version_major << 24) | (version_minor << 16) | +					(family_id << 8)); + +		if ((adev->asic_type == CHIP_POLARIS10 || +		     adev->asic_type == CHIP_POLARIS11) && +		    (adev->uvd.fw_version < FW_1_66_16)) +			DRM_ERROR("POLARIS10/11 UVD firmware version %hu.%hu is too old.\n", +				  version_major, version_minor); +	} else { +		unsigned int enc_major, enc_minor, dec_minor; + +		dec_minor = (le32_to_cpu(hdr->ucode_version) >> 8) & 0xff; +		enc_minor = (le32_to_cpu(hdr->ucode_version) >> 24) & 0x3f; +		enc_major = (le32_to_cpu(hdr->ucode_version) >> 30) & 0x3; +		DRM_INFO("Found UVD firmware ENC: %hu.%hu DEC: .%hu Family ID: %hu\n", +			enc_major, enc_minor, dec_minor, family_id); + +		adev->uvd.max_handles = AMDGPU_MAX_UVD_HANDLES; + +		adev->uvd.fw_version = le32_to_cpu(hdr->ucode_version); +	}  	bo_size = AMDGPU_UVD_STACK_SIZE + AMDGPU_UVD_HEAP_SIZE  		  +  AMDGPU_UVD_SESSION_SIZE * adev->uvd.max_handles; @@ -238,7 +253,8 @@ int amdgpu_uvd_sw_init(struct amdgpu_device *adev)  		bo_size += AMDGPU_GPU_PAGE_ALIGN(le32_to_cpu(hdr->ucode_size_bytes) + 8);  	for (j = 0; j < adev->uvd.num_uvd_inst; j++) { - +		if (adev->uvd.harvest_config & (1 << j)) +			continue;  		r = amdgpu_bo_create_kernel(adev, bo_size, PAGE_SIZE,  					    AMDGPU_GEM_DOMAIN_VRAM, &adev->uvd.inst[j].vcpu_bo,  					    &adev->uvd.inst[j].gpu_addr, &adev->uvd.inst[j].cpu_addr); @@ -246,21 +262,13 @@ int amdgpu_uvd_sw_init(struct amdgpu_device *adev)  			dev_err(adev->dev, "(%d) failed to allocate UVD bo\n", r);  			return r;  		} +	} -		ring = &adev->uvd.inst[j].ring; -		rq = &ring->sched.sched_rq[DRM_SCHED_PRIORITY_NORMAL]; -		r = drm_sched_entity_init(&ring->sched, &adev->uvd.inst[j].entity, -					  rq, NULL); -		if (r != 0) { -			DRM_ERROR("Failed setting up UVD(%d) run queue.\n", j); -			return r; -		} - -		for (i = 0; i < adev->uvd.max_handles; ++i) { -			atomic_set(&adev->uvd.inst[j].handles[i], 0); -			adev->uvd.inst[j].filp[i] = NULL; -		} +	for (i = 0; i < adev->uvd.max_handles; ++i) { +		atomic_set(&adev->uvd.handles[i], 0); +		adev->uvd.filp[i] = NULL;  	} +  	/* from uvd v5.0 HW addressing capacity increased to 64 bits */  	if (!amdgpu_device_ip_block_version_cmp(adev, AMD_IP_BLOCK_TYPE_UVD, 5, 0))  		adev->uvd.address_64_bit = true; @@ -289,10 +297,12 @@ int amdgpu_uvd_sw_fini(struct amdgpu_device *adev)  {  	int i, j; -	for (j = 0; j < adev->uvd.num_uvd_inst; ++j) { -		kfree(adev->uvd.inst[j].saved_bo); +	drm_sched_entity_destroy(&adev->uvd.entity); -		drm_sched_entity_fini(&adev->uvd.inst[j].ring.sched, &adev->uvd.inst[j].entity); +	for (j = 0; j < adev->uvd.num_uvd_inst; ++j) { +		if (adev->uvd.harvest_config & (1 << j)) +			continue; +		kvfree(adev->uvd.inst[j].saved_bo);  		amdgpu_bo_free_kernel(&adev->uvd.inst[j].vcpu_bo,  				      &adev->uvd.inst[j].gpu_addr, @@ -308,6 +318,29 @@ int amdgpu_uvd_sw_fini(struct amdgpu_device *adev)  	return 0;  } +/** + * amdgpu_uvd_entity_init - init entity + * + * @adev: amdgpu_device pointer + * + */ +int amdgpu_uvd_entity_init(struct amdgpu_device *adev) +{ +	struct amdgpu_ring *ring; +	struct drm_sched_rq *rq; +	int r; + +	ring = &adev->uvd.inst[0].ring; +	rq = &ring->sched.sched_rq[DRM_SCHED_PRIORITY_NORMAL]; +	r = drm_sched_entity_init(&adev->uvd.entity, &rq, 1, NULL); +	if (r) { +		DRM_ERROR("Failed setting up UVD kernel entity.\n"); +		return r; +	} + +	return 0; +} +  int amdgpu_uvd_suspend(struct amdgpu_device *adev)  {  	unsigned size; @@ -316,24 +349,26 @@ int amdgpu_uvd_suspend(struct amdgpu_device *adev)  	cancel_delayed_work_sync(&adev->uvd.idle_work); +	/* only valid for physical mode */ +	if (adev->asic_type < CHIP_POLARIS10) { +		for (i = 0; i < adev->uvd.max_handles; ++i) +			if (atomic_read(&adev->uvd.handles[i])) +				break; + +		if (i == adev->uvd.max_handles) +			return 0; +	} +  	for (j = 0; j < adev->uvd.num_uvd_inst; ++j) { +		if (adev->uvd.harvest_config & (1 << j)) +			continue;  		if (adev->uvd.inst[j].vcpu_bo == NULL)  			continue; -		/* only valid for physical mode */ -		if (adev->asic_type < CHIP_POLARIS10) { -			for (i = 0; i < adev->uvd.max_handles; ++i) -				if (atomic_read(&adev->uvd.inst[j].handles[i])) -					break; - -			if (i == adev->uvd.max_handles) -				continue; -		} -  		size = amdgpu_bo_size(adev->uvd.inst[j].vcpu_bo);  		ptr = adev->uvd.inst[j].cpu_addr; -		adev->uvd.inst[j].saved_bo = kmalloc(size, GFP_KERNEL); +		adev->uvd.inst[j].saved_bo = kvmalloc(size, GFP_KERNEL);  		if (!adev->uvd.inst[j].saved_bo)  			return -ENOMEM; @@ -349,6 +384,8 @@ int amdgpu_uvd_resume(struct amdgpu_device *adev)  	int i;  	for (i = 0; i < adev->uvd.num_uvd_inst; i++) { +		if (adev->uvd.harvest_config & (1 << i)) +			continue;  		if (adev->uvd.inst[i].vcpu_bo == NULL)  			return -EINVAL; @@ -357,7 +394,7 @@ int amdgpu_uvd_resume(struct amdgpu_device *adev)  		if (adev->uvd.inst[i].saved_bo != NULL) {  			memcpy_toio(ptr, adev->uvd.inst[i].saved_bo, size); -			kfree(adev->uvd.inst[i].saved_bo); +			kvfree(adev->uvd.inst[i].saved_bo);  			adev->uvd.inst[i].saved_bo = NULL;  		} else {  			const struct common_firmware_header *hdr; @@ -381,30 +418,27 @@ int amdgpu_uvd_resume(struct amdgpu_device *adev)  void amdgpu_uvd_free_handles(struct amdgpu_device *adev, struct drm_file *filp)  { -	struct amdgpu_ring *ring; -	int i, j, r; - -	for (j = 0; j < adev->uvd.num_uvd_inst; j++) { -		ring = &adev->uvd.inst[j].ring; +	struct amdgpu_ring *ring = &adev->uvd.inst[0].ring; +	int i, r; -		for (i = 0; i < adev->uvd.max_handles; ++i) { -			uint32_t handle = atomic_read(&adev->uvd.inst[j].handles[i]); -			if (handle != 0 && adev->uvd.inst[j].filp[i] == filp) { -				struct dma_fence *fence; - -				r = amdgpu_uvd_get_destroy_msg(ring, handle, -							       false, &fence); -				if (r) { -					DRM_ERROR("Error destroying UVD(%d) %d!\n", j, r); -					continue; -				} +	for (i = 0; i < adev->uvd.max_handles; ++i) { +		uint32_t handle = atomic_read(&adev->uvd.handles[i]); -				dma_fence_wait(fence, false); -				dma_fence_put(fence); +		if (handle != 0 && adev->uvd.filp[i] == filp) { +			struct dma_fence *fence; -				adev->uvd.inst[j].filp[i] = NULL; -				atomic_set(&adev->uvd.inst[j].handles[i], 0); +			r = amdgpu_uvd_get_destroy_msg(ring, handle, false, +						       &fence); +			if (r) { +				DRM_ERROR("Error destroying UVD %d!\n", r); +				continue;  			} + +			dma_fence_wait(fence, false); +			dma_fence_put(fence); + +			adev->uvd.filp[i] = NULL; +			atomic_set(&adev->uvd.handles[i], 0);  		}  	}  } @@ -459,7 +493,7 @@ static int amdgpu_uvd_cs_pass1(struct amdgpu_uvd_cs_ctx *ctx)  		if (cmd == 0x0 || cmd == 0x3) {  			/* yes, force it into VRAM */  			uint32_t domain = AMDGPU_GEM_DOMAIN_VRAM; -			amdgpu_ttm_placement_from_domain(bo, domain); +			amdgpu_bo_placement_from_domain(bo, domain);  		}  		amdgpu_uvd_force_into_uvd_segment(bo); @@ -679,16 +713,15 @@ static int amdgpu_uvd_cs_msg(struct amdgpu_uvd_cs_ctx *ctx,  	void *ptr;  	long r;  	int i; -	uint32_t ip_instance = ctx->parser->job->ring->me;  	if (offset & 0x3F) { -		DRM_ERROR("UVD(%d) messages must be 64 byte aligned!\n", ip_instance); +		DRM_ERROR("UVD messages must be 64 byte aligned!\n");  		return -EINVAL;  	}  	r = amdgpu_bo_kmap(bo, &ptr);  	if (r) { -		DRM_ERROR("Failed mapping the UVD(%d) message (%ld)!\n", ip_instance, r); +		DRM_ERROR("Failed mapping the UVD) message (%ld)!\n", r);  		return r;  	} @@ -698,7 +731,7 @@ static int amdgpu_uvd_cs_msg(struct amdgpu_uvd_cs_ctx *ctx,  	handle = msg[2];  	if (handle == 0) { -		DRM_ERROR("Invalid UVD(%d) handle!\n", ip_instance); +		DRM_ERROR("Invalid UVD handle!\n");  		return -EINVAL;  	} @@ -709,18 +742,19 @@ static int amdgpu_uvd_cs_msg(struct amdgpu_uvd_cs_ctx *ctx,  		/* try to alloc a new handle */  		for (i = 0; i < adev->uvd.max_handles; ++i) { -			if (atomic_read(&adev->uvd.inst[ip_instance].handles[i]) == handle) { -				DRM_ERROR("(%d)Handle 0x%x already in use!\n", ip_instance, handle); +			if (atomic_read(&adev->uvd.handles[i]) == handle) { +				DRM_ERROR(")Handle 0x%x already in use!\n", +					  handle);  				return -EINVAL;  			} -			if (!atomic_cmpxchg(&adev->uvd.inst[ip_instance].handles[i], 0, handle)) { -				adev->uvd.inst[ip_instance].filp[i] = ctx->parser->filp; +			if (!atomic_cmpxchg(&adev->uvd.handles[i], 0, handle)) { +				adev->uvd.filp[i] = ctx->parser->filp;  				return 0;  			}  		} -		DRM_ERROR("No more free UVD(%d) handles!\n", ip_instance); +		DRM_ERROR("No more free UVD handles!\n");  		return -ENOSPC;  	case 1: @@ -732,27 +766,27 @@ static int amdgpu_uvd_cs_msg(struct amdgpu_uvd_cs_ctx *ctx,  		/* validate the handle */  		for (i = 0; i < adev->uvd.max_handles; ++i) { -			if (atomic_read(&adev->uvd.inst[ip_instance].handles[i]) == handle) { -				if (adev->uvd.inst[ip_instance].filp[i] != ctx->parser->filp) { -					DRM_ERROR("UVD(%d) handle collision detected!\n", ip_instance); +			if (atomic_read(&adev->uvd.handles[i]) == handle) { +				if (adev->uvd.filp[i] != ctx->parser->filp) { +					DRM_ERROR("UVD handle collision detected!\n");  					return -EINVAL;  				}  				return 0;  			}  		} -		DRM_ERROR("Invalid UVD(%d) handle 0x%x!\n", ip_instance, handle); +		DRM_ERROR("Invalid UVD handle 0x%x!\n", handle);  		return -ENOENT;  	case 2:  		/* it's a destroy msg, free the handle */  		for (i = 0; i < adev->uvd.max_handles; ++i) -			atomic_cmpxchg(&adev->uvd.inst[ip_instance].handles[i], handle, 0); +			atomic_cmpxchg(&adev->uvd.handles[i], handle, 0);  		amdgpu_bo_kunmap(bo);  		return 0;  	default: -		DRM_ERROR("Illegal UVD(%d) message type (%d)!\n", ip_instance, msg_type); +		DRM_ERROR("Illegal UVD message type (%d)!\n", msg_type);  		return -EINVAL;  	}  	BUG(); @@ -1000,7 +1034,7 @@ static int amdgpu_uvd_send_msg(struct amdgpu_ring *ring, struct amdgpu_bo *bo,  	if (!ring->adev->uvd.address_64_bit) {  		struct ttm_operation_ctx ctx = { true, false }; -		amdgpu_ttm_placement_from_domain(bo, AMDGPU_GEM_DOMAIN_VRAM); +		amdgpu_bo_placement_from_domain(bo, AMDGPU_GEM_DOMAIN_VRAM);  		amdgpu_uvd_force_into_uvd_segment(bo);  		r = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);  		if (r) @@ -1045,19 +1079,16 @@ static int amdgpu_uvd_send_msg(struct amdgpu_ring *ring, struct amdgpu_bo *bo,  		if (r < 0)  			goto err_free; -		r = amdgpu_ib_schedule(ring, 1, ib, NULL, &f); -		job->fence = dma_fence_get(f); +		r = amdgpu_job_submit_direct(job, ring, &f);  		if (r)  			goto err_free; - -		amdgpu_job_free(job);  	} else {  		r = amdgpu_sync_resv(adev, &job->sync, bo->tbo.resv,  				     AMDGPU_FENCE_OWNER_UNDEFINED, false);  		if (r)  			goto err_free; -		r = amdgpu_job_submit(job, ring, &adev->uvd.inst[ring->me].entity, +		r = amdgpu_job_submit(job, &adev->uvd.entity,  				      AMDGPU_FENCE_OWNER_UNDEFINED, &f);  		if (r)  			goto err_free; @@ -1149,6 +1180,8 @@ static void amdgpu_uvd_idle_work_handler(struct work_struct *work)  	unsigned fences = 0, i, j;  	for (i = 0; i < adev->uvd.num_uvd_inst; ++i) { +		if (adev->uvd.harvest_config & (1 << i)) +			continue;  		fences += amdgpu_fence_count_emitted(&adev->uvd.inst[i].ring);  		for (j = 0; j < adev->uvd.num_enc_rings; ++j) {  			fences += amdgpu_fence_count_emitted(&adev->uvd.inst[i].ring_enc[j]); @@ -1259,7 +1292,7 @@ uint32_t amdgpu_uvd_used_handles(struct amdgpu_device *adev)  		 * necessarily linear. So we need to count  		 * all non-zero handles.  		 */ -		if (atomic_read(&adev->uvd.inst->handles[i])) +		if (atomic_read(&adev->uvd.handles[i]))  			used_handles++;  	} diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.h index 8b23a1b00c76..a3ab1a41060f 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.h @@ -42,30 +42,34 @@ struct amdgpu_uvd_inst {  	void			*cpu_addr;  	uint64_t		gpu_addr;  	void			*saved_bo; -	atomic_t		handles[AMDGPU_MAX_UVD_HANDLES]; -	struct drm_file		*filp[AMDGPU_MAX_UVD_HANDLES];  	struct amdgpu_ring	ring;  	struct amdgpu_ring	ring_enc[AMDGPU_MAX_UVD_ENC_RINGS];  	struct amdgpu_irq_src	irq; -	struct drm_sched_entity entity; -	struct drm_sched_entity entity_enc;  	uint32_t                srbm_soft_reset;  }; +#define AMDGPU_UVD_HARVEST_UVD0 (1 << 0) +#define AMDGPU_UVD_HARVEST_UVD1 (1 << 1) +  struct amdgpu_uvd {  	const struct firmware	*fw;	/* UVD firmware */  	unsigned		fw_version;  	unsigned		max_handles;  	unsigned		num_enc_rings; -	uint8_t		num_uvd_inst; +	uint8_t			num_uvd_inst;  	bool			address_64_bit;  	bool			use_ctx_buf; -	struct amdgpu_uvd_inst		inst[AMDGPU_MAX_UVD_INSTANCES]; +	struct amdgpu_uvd_inst	inst[AMDGPU_MAX_UVD_INSTANCES]; +	struct drm_file		*filp[AMDGPU_MAX_UVD_HANDLES]; +	atomic_t		handles[AMDGPU_MAX_UVD_HANDLES]; +	struct drm_sched_entity entity;  	struct delayed_work	idle_work; +	unsigned		harvest_config;  };  int amdgpu_uvd_sw_init(struct amdgpu_device *adev);  int amdgpu_uvd_sw_fini(struct amdgpu_device *adev); +int amdgpu_uvd_entity_init(struct amdgpu_device *adev);  int amdgpu_uvd_suspend(struct amdgpu_device *adev);  int amdgpu_uvd_resume(struct amdgpu_device *adev);  int amdgpu_uvd_get_create_msg(struct amdgpu_ring *ring, uint32_t handle, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c index 23d960ec1cf2..0cc5190f4f36 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c @@ -40,11 +40,11 @@  /* Firmware Names */  #ifdef CONFIG_DRM_AMDGPU_CIK -#define FIRMWARE_BONAIRE	"radeon/bonaire_vce.bin" -#define FIRMWARE_KABINI	"radeon/kabini_vce.bin" -#define FIRMWARE_KAVERI	"radeon/kaveri_vce.bin" -#define FIRMWARE_HAWAII	"radeon/hawaii_vce.bin" -#define FIRMWARE_MULLINS	"radeon/mullins_vce.bin" +#define FIRMWARE_BONAIRE	"amdgpu/bonaire_vce.bin" +#define FIRMWARE_KABINI	"amdgpu/kabini_vce.bin" +#define FIRMWARE_KAVERI	"amdgpu/kaveri_vce.bin" +#define FIRMWARE_HAWAII	"amdgpu/hawaii_vce.bin" +#define FIRMWARE_MULLINS	"amdgpu/mullins_vce.bin"  #endif  #define FIRMWARE_TONGA		"amdgpu/tonga_vce.bin"  #define FIRMWARE_CARRIZO	"amdgpu/carrizo_vce.bin" @@ -90,8 +90,6 @@ static void amdgpu_vce_idle_work_handler(struct work_struct *work);   */  int amdgpu_vce_sw_init(struct amdgpu_device *adev, unsigned long size)  { -	struct amdgpu_ring *ring; -	struct drm_sched_rq *rq;  	const char *fw_name;  	const struct common_firmware_header *hdr;  	unsigned ucode_version, version_major, version_minor, binary_id; @@ -188,15 +186,6 @@ int amdgpu_vce_sw_init(struct amdgpu_device *adev, unsigned long size)  		return r;  	} -	ring = &adev->vce.ring[0]; -	rq = &ring->sched.sched_rq[DRM_SCHED_PRIORITY_NORMAL]; -	r = drm_sched_entity_init(&ring->sched, &adev->vce.entity, -				  rq, NULL); -	if (r != 0) { -		DRM_ERROR("Failed setting up VCE run queue.\n"); -		return r; -	} -  	for (i = 0; i < AMDGPU_MAX_VCE_HANDLES; ++i) {  		atomic_set(&adev->vce.handles[i], 0);  		adev->vce.filp[i] = NULL; @@ -222,7 +211,7 @@ int amdgpu_vce_sw_fini(struct amdgpu_device *adev)  	if (adev->vce.vcpu_bo == NULL)  		return 0; -	drm_sched_entity_fini(&adev->vce.ring[0].sched, &adev->vce.entity); +	drm_sched_entity_destroy(&adev->vce.entity);  	amdgpu_bo_free_kernel(&adev->vce.vcpu_bo, &adev->vce.gpu_addr,  		(void **)&adev->vce.cpu_addr); @@ -237,6 +226,29 @@ int amdgpu_vce_sw_fini(struct amdgpu_device *adev)  }  /** + * amdgpu_vce_entity_init - init entity + * + * @adev: amdgpu_device pointer + * + */ +int amdgpu_vce_entity_init(struct amdgpu_device *adev) +{ +	struct amdgpu_ring *ring; +	struct drm_sched_rq *rq; +	int r; + +	ring = &adev->vce.ring[0]; +	rq = &ring->sched.sched_rq[DRM_SCHED_PRIORITY_NORMAL]; +	r = drm_sched_entity_init(&adev->vce.entity, &rq, 1, NULL); +	if (r != 0) { +		DRM_ERROR("Failed setting up VCE run queue.\n"); +		return r; +	} + +	return 0; +} + +/**   * amdgpu_vce_suspend - unpin VCE fw memory   *   * @adev: amdgpu_device pointer @@ -470,12 +482,10 @@ int amdgpu_vce_get_create_msg(struct amdgpu_ring *ring, uint32_t handle,  	for (i = ib->length_dw; i < ib_size_dw; ++i)  		ib->ptr[i] = 0x0; -	r = amdgpu_ib_schedule(ring, 1, ib, NULL, &f); -	job->fence = dma_fence_get(f); +	r = amdgpu_job_submit_direct(job, ring, &f);  	if (r)  		goto err; -	amdgpu_job_free(job);  	if (fence)  		*fence = dma_fence_get(f);  	dma_fence_put(f); @@ -532,19 +542,13 @@ int amdgpu_vce_get_destroy_msg(struct amdgpu_ring *ring, uint32_t handle,  	for (i = ib->length_dw; i < ib_size_dw; ++i)  		ib->ptr[i] = 0x0; -	if (direct) { -		r = amdgpu_ib_schedule(ring, 1, ib, NULL, &f); -		job->fence = dma_fence_get(f); -		if (r) -			goto err; - -		amdgpu_job_free(job); -	} else { -		r = amdgpu_job_submit(job, ring, &ring->adev->vce.entity, +	if (direct) +		r = amdgpu_job_submit_direct(job, ring, &f); +	else +		r = amdgpu_job_submit(job, &ring->adev->vce.entity,  				      AMDGPU_FENCE_OWNER_UNDEFINED, &f); -		if (r) -			goto err; -	} +	if (r) +		goto err;  	if (fence)  		*fence = dma_fence_get(f); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.h index 71781267ee4c..a1f209eed4c4 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.h @@ -55,6 +55,7 @@ struct amdgpu_vce {  int amdgpu_vce_sw_init(struct amdgpu_device *adev, unsigned long size);  int amdgpu_vce_sw_fini(struct amdgpu_device *adev); +int amdgpu_vce_entity_init(struct amdgpu_device *adev);  int amdgpu_vce_suspend(struct amdgpu_device *adev);  int amdgpu_vce_resume(struct amdgpu_device *adev);  int amdgpu_vce_get_create_msg(struct amdgpu_ring *ring, uint32_t handle, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c index 1b4ad9b2a755..fd654a4406db 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c @@ -111,9 +111,10 @@ int amdgpu_vcn_sw_init(struct amdgpu_device *adev)  			version_major, version_minor, family_id);  	} -	bo_size = AMDGPU_GPU_PAGE_ALIGN(le32_to_cpu(hdr->ucode_size_bytes) + 8) -		  +  AMDGPU_VCN_STACK_SIZE + AMDGPU_VCN_HEAP_SIZE +	bo_size = AMDGPU_VCN_STACK_SIZE + AMDGPU_VCN_HEAP_SIZE  		  +  AMDGPU_VCN_SESSION_SIZE * 40; +	if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) +		bo_size += AMDGPU_GPU_PAGE_ALIGN(le32_to_cpu(hdr->ucode_size_bytes) + 8);  	r = amdgpu_bo_create_kernel(adev, bo_size, PAGE_SIZE,  				    AMDGPU_GEM_DOMAIN_VRAM, &adev->vcn.vcpu_bo,  				    &adev->vcn.gpu_addr, &adev->vcn.cpu_addr); @@ -129,7 +130,7 @@ int amdgpu_vcn_sw_fini(struct amdgpu_device *adev)  {  	int i; -	kfree(adev->vcn.saved_bo); +	kvfree(adev->vcn.saved_bo);  	amdgpu_bo_free_kernel(&adev->vcn.vcpu_bo,  			      &adev->vcn.gpu_addr, @@ -140,6 +141,8 @@ int amdgpu_vcn_sw_fini(struct amdgpu_device *adev)  	for (i = 0; i < adev->vcn.num_enc_rings; ++i)  		amdgpu_ring_fini(&adev->vcn.ring_enc[i]); +	amdgpu_ring_fini(&adev->vcn.ring_jpeg); +  	release_firmware(adev->vcn.fw);  	return 0; @@ -158,7 +161,7 @@ int amdgpu_vcn_suspend(struct amdgpu_device *adev)  	size = amdgpu_bo_size(adev->vcn.vcpu_bo);  	ptr = adev->vcn.cpu_addr; -	adev->vcn.saved_bo = kmalloc(size, GFP_KERNEL); +	adev->vcn.saved_bo = kvmalloc(size, GFP_KERNEL);  	if (!adev->vcn.saved_bo)  		return -ENOMEM; @@ -180,18 +183,20 @@ int amdgpu_vcn_resume(struct amdgpu_device *adev)  	if (adev->vcn.saved_bo != NULL) {  		memcpy_toio(ptr, adev->vcn.saved_bo, size); -		kfree(adev->vcn.saved_bo); +		kvfree(adev->vcn.saved_bo);  		adev->vcn.saved_bo = NULL;  	} else {  		const struct common_firmware_header *hdr;  		unsigned offset;  		hdr = (const struct common_firmware_header *)adev->vcn.fw->data; -		offset = le32_to_cpu(hdr->ucode_array_offset_bytes); -		memcpy_toio(adev->vcn.cpu_addr, adev->vcn.fw->data + offset, -			    le32_to_cpu(hdr->ucode_size_bytes)); -		size -= le32_to_cpu(hdr->ucode_size_bytes); -		ptr += le32_to_cpu(hdr->ucode_size_bytes); +		if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) { +			offset = le32_to_cpu(hdr->ucode_array_offset_bytes); +			memcpy_toio(adev->vcn.cpu_addr, adev->vcn.fw->data + offset, +				    le32_to_cpu(hdr->ucode_size_bytes)); +			size -= le32_to_cpu(hdr->ucode_size_bytes); +			ptr += le32_to_cpu(hdr->ucode_size_bytes); +		}  		memset_io(ptr, 0, size);  	} @@ -209,6 +214,8 @@ static void amdgpu_vcn_idle_work_handler(struct work_struct *work)  		fences += amdgpu_fence_count_emitted(&adev->vcn.ring_enc[i]);  	} +	fences += amdgpu_fence_count_emitted(&adev->vcn.ring_jpeg); +  	if (fences == 0) {  		if (adev->pm.dpm_enabled)  			amdgpu_dpm_enable_uvd(adev, false); @@ -225,7 +232,7 @@ void amdgpu_vcn_ring_begin_use(struct amdgpu_ring *ring)  	struct amdgpu_device *adev = ring->adev;  	bool set_clocks = !cancel_delayed_work_sync(&adev->vcn.idle_work); -	if (set_clocks && adev->pm.dpm_enabled) { +	if (set_clocks) {  		if (adev->pm.dpm_enabled)  			amdgpu_dpm_enable_uvd(adev, true);  		else @@ -304,13 +311,10 @@ static int amdgpu_vcn_dec_send_msg(struct amdgpu_ring *ring,  	}  	ib->length_dw = 16; -	r = amdgpu_ib_schedule(ring, 1, ib, NULL, &f); -	job->fence = dma_fence_get(f); +	r = amdgpu_job_submit_direct(job, ring, &f);  	if (r)  		goto err_free; -	amdgpu_job_free(job); -  	amdgpu_bo_fence(bo, f, false);  	amdgpu_bo_unreserve(bo);  	amdgpu_bo_unref(&bo); @@ -495,12 +499,10 @@ static int amdgpu_vcn_enc_get_create_msg(struct amdgpu_ring *ring, uint32_t hand  	for (i = ib->length_dw; i < ib_size_dw; ++i)  		ib->ptr[i] = 0x0; -	r = amdgpu_ib_schedule(ring, 1, ib, NULL, &f); -	job->fence = dma_fence_get(f); +	r = amdgpu_job_submit_direct(job, ring, &f);  	if (r)  		goto err; -	amdgpu_job_free(job);  	if (fence)  		*fence = dma_fence_get(f);  	dma_fence_put(f); @@ -549,12 +551,10 @@ static int amdgpu_vcn_enc_get_destroy_msg(struct amdgpu_ring *ring, uint32_t han  	for (i = ib->length_dw; i < ib_size_dw; ++i)  		ib->ptr[i] = 0x0; -	r = amdgpu_ib_schedule(ring, 1, ib, NULL, &f); -	job->fence = dma_fence_get(f); +	r = amdgpu_job_submit_direct(job, ring, &f);  	if (r)  		goto err; -	amdgpu_job_free(job);  	if (fence)  		*fence = dma_fence_get(f);  	dma_fence_put(f); @@ -597,3 +597,127 @@ error:  	dma_fence_put(fence);  	return r;  } + +int amdgpu_vcn_jpeg_ring_test_ring(struct amdgpu_ring *ring) +{ +	struct amdgpu_device *adev = ring->adev; +	uint32_t tmp = 0; +	unsigned i; +	int r; + +	WREG32(SOC15_REG_OFFSET(UVD, 0, mmUVD_CONTEXT_ID), 0xCAFEDEAD); +	r = amdgpu_ring_alloc(ring, 3); + +	if (r) { +		DRM_ERROR("amdgpu: cp failed to lock ring %d (%d).\n", +				  ring->idx, r); +		return r; +	} + +	amdgpu_ring_write(ring, +		PACKETJ(SOC15_REG_OFFSET(UVD, 0, mmUVD_CONTEXT_ID), 0, 0, 0)); +	amdgpu_ring_write(ring, 0xDEADBEEF); +	amdgpu_ring_commit(ring); + +	for (i = 0; i < adev->usec_timeout; i++) { +		tmp = RREG32(SOC15_REG_OFFSET(UVD, 0, mmUVD_CONTEXT_ID)); +		if (tmp == 0xDEADBEEF) +			break; +		DRM_UDELAY(1); +	} + +	if (i < adev->usec_timeout) { +		DRM_DEBUG("ring test on %d succeeded in %d usecs\n", +				  ring->idx, i); +	} else { +		DRM_ERROR("amdgpu: ring %d test failed (0x%08X)\n", +				  ring->idx, tmp); +		r = -EINVAL; +	} + +	return r; +} + +static int amdgpu_vcn_jpeg_set_reg(struct amdgpu_ring *ring, uint32_t handle, +		struct dma_fence **fence) +{ +	struct amdgpu_device *adev = ring->adev; +	struct amdgpu_job *job; +	struct amdgpu_ib *ib; +	struct dma_fence *f = NULL; +	const unsigned ib_size_dw = 16; +	int i, r; + +	r = amdgpu_job_alloc_with_ib(ring->adev, ib_size_dw * 4, &job); +	if (r) +		return r; + +	ib = &job->ibs[0]; + +	ib->ptr[0] = PACKETJ(SOC15_REG_OFFSET(UVD, 0, mmUVD_JPEG_PITCH), 0, 0, PACKETJ_TYPE0); +	ib->ptr[1] = 0xDEADBEEF; +	for (i = 2; i < 16; i += 2) { +		ib->ptr[i] = PACKETJ(0, 0, 0, PACKETJ_TYPE6); +		ib->ptr[i+1] = 0; +	} +	ib->length_dw = 16; + +	r = amdgpu_job_submit_direct(job, ring, &f); +	if (r) +		goto err; + +	if (fence) +		*fence = dma_fence_get(f); +	dma_fence_put(f); + +	return 0; + +err: +	amdgpu_job_free(job); +	return r; +} + +int amdgpu_vcn_jpeg_ring_test_ib(struct amdgpu_ring *ring, long timeout) +{ +	struct amdgpu_device *adev = ring->adev; +	uint32_t tmp = 0; +	unsigned i; +	struct dma_fence *fence = NULL; +	long r = 0; + +	r = amdgpu_vcn_jpeg_set_reg(ring, 1, &fence); +	if (r) { +		DRM_ERROR("amdgpu: failed to set jpeg register (%ld).\n", r); +		goto error; +	} + +	r = dma_fence_wait_timeout(fence, false, timeout); +	if (r == 0) { +		DRM_ERROR("amdgpu: IB test timed out.\n"); +		r = -ETIMEDOUT; +		goto error; +	} else if (r < 0) { +		DRM_ERROR("amdgpu: fence wait failed (%ld).\n", r); +		goto error; +	} else +		r = 0; + +	for (i = 0; i < adev->usec_timeout; i++) { +		tmp = RREG32(SOC15_REG_OFFSET(UVD, 0, mmUVD_JPEG_PITCH)); +		if (tmp == 0xDEADBEEF) +			break; +		DRM_UDELAY(1); +	} + +	if (i < adev->usec_timeout) +		DRM_DEBUG("ib test on ring %d succeeded\n", ring->idx); +	else { +		DRM_ERROR("ib test failed (0x%08X)\n", tmp); +		r = -EINVAL; +	} + +	dma_fence_put(fence); + +error: +	return r; +} diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h index 773010b9ff15..0b0b8638d73f 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h @@ -66,6 +66,7 @@ struct amdgpu_vcn {  	const struct firmware	*fw;	/* VCN firmware */  	struct amdgpu_ring	ring_dec;  	struct amdgpu_ring	ring_enc[AMDGPU_VCN_MAX_ENC_RINGS]; +	struct amdgpu_ring	ring_jpeg;  	struct amdgpu_irq_src	irq;  	unsigned		num_enc_rings;  }; @@ -83,4 +84,7 @@ int amdgpu_vcn_dec_ring_test_ib(struct amdgpu_ring *ring, long timeout);  int amdgpu_vcn_enc_ring_test_ring(struct amdgpu_ring *ring);  int amdgpu_vcn_enc_ring_test_ib(struct amdgpu_ring *ring, long timeout); +int amdgpu_vcn_jpeg_ring_test_ring(struct amdgpu_ring *ring); +int amdgpu_vcn_jpeg_ring_test_ib(struct amdgpu_ring *ring, long timeout); +  #endif diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c index fdcb498f6d19..ece0ac703e27 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c @@ -33,9 +33,11 @@  #include "amdgpu.h"  #include "amdgpu_trace.h"  #include "amdgpu_amdkfd.h" +#include "amdgpu_gmc.h" -/* - * GPUVM +/** + * DOC: GPUVM + *   * GPUVM is similar to the legacy gart on older asics, however   * rather than there being a single global gart table   * for the entire GPU, there are multiple VM page tables active @@ -63,37 +65,84 @@ INTERVAL_TREE_DEFINE(struct amdgpu_bo_va_mapping, rb, uint64_t, __subtree_last,  #undef START  #undef LAST -/* Local structure. Encapsulate some VM table update parameters to reduce +/** + * struct amdgpu_pte_update_params - Local structure + * + * Encapsulate some VM table update parameters to reduce   * the number of function parameters + *   */  struct amdgpu_pte_update_params { -	/* amdgpu device we do this update for */ + +	/** +	 * @adev: amdgpu device we do this update for +	 */  	struct amdgpu_device *adev; -	/* optional amdgpu_vm we do this update for */ + +	/** +	 * @vm: optional amdgpu_vm we do this update for +	 */  	struct amdgpu_vm *vm; -	/* address where to copy page table entries from */ + +	/** +	 * @src: address where to copy page table entries from +	 */  	uint64_t src; -	/* indirect buffer to fill with commands */ + +	/** +	 * @ib: indirect buffer to fill with commands +	 */  	struct amdgpu_ib *ib; -	/* Function which actually does the update */ + +	/** +	 * @func: Function which actually does the update +	 */  	void (*func)(struct amdgpu_pte_update_params *params,  		     struct amdgpu_bo *bo, uint64_t pe,  		     uint64_t addr, unsigned count, uint32_t incr,  		     uint64_t flags); -	/* The next two are used during VM update by CPU -	 *  DMA addresses to use for mapping -	 *  Kernel pointer of PD/PT BO that needs to be updated +	/** +	 * @pages_addr: +	 * +	 * DMA addresses to use for mapping, used during VM update by CPU  	 */  	dma_addr_t *pages_addr; + +	/** +	 * @kptr: +	 * +	 * Kernel pointer of PD/PT BO that needs to be updated, +	 * used during VM update by CPU +	 */  	void *kptr;  }; -/* Helper to disable partial resident texture feature from a fence callback */ +/** + * struct amdgpu_prt_cb - Helper to disable partial resident texture feature from a fence callback + */  struct amdgpu_prt_cb { + +	/** +	 * @adev: amdgpu device +	 */  	struct amdgpu_device *adev; + +	/** +	 * @cb: callback +	 */  	struct dma_fence_cb cb;  }; +/** + * amdgpu_vm_bo_base_init - Adds bo to the list of bos associated with the vm + * + * @base: base structure for tracking BO usage in a VM + * @vm: vm to which bo is to be added + * @bo: amdgpu buffer object + * + * Initialize a bo_va_base structure and add it to the appropriate lists + * + */  static void amdgpu_vm_bo_base_init(struct amdgpu_vm_bo_base *base,  				   struct amdgpu_vm *vm,  				   struct amdgpu_bo *bo) @@ -129,8 +178,10 @@ static void amdgpu_vm_bo_base_init(struct amdgpu_vm_bo_base *base,   * amdgpu_vm_level_shift - return the addr shift for each level   *   * @adev: amdgpu_device pointer + * @level: VMPT level   * - * Returns the number of bits the pfn needs to be right shifted for a level. + * Returns: + * The number of bits the pfn needs to be right shifted for a level.   */  static unsigned amdgpu_vm_level_shift(struct amdgpu_device *adev,  				      unsigned level) @@ -158,8 +209,10 @@ static unsigned amdgpu_vm_level_shift(struct amdgpu_device *adev,   * amdgpu_vm_num_entries - return the number of entries in a PD/PT   *   * @adev: amdgpu_device pointer + * @level: VMPT level   * - * Calculate the number of entries in a page directory or page table. + * Returns: + * The number of entries in a page directory or page table.   */  static unsigned amdgpu_vm_num_entries(struct amdgpu_device *adev,  				      unsigned level) @@ -182,8 +235,10 @@ static unsigned amdgpu_vm_num_entries(struct amdgpu_device *adev,   * amdgpu_vm_bo_size - returns the size of the BOs in bytes   *   * @adev: amdgpu_device pointer + * @level: VMPT level   * - * Calculate the size of the BO for a page directory or page table in bytes. + * Returns: + * The size of the BO for a page directory or page table in bytes.   */  static unsigned amdgpu_vm_bo_size(struct amdgpu_device *adev, unsigned level)  { @@ -221,6 +276,9 @@ void amdgpu_vm_get_pd_bo(struct amdgpu_vm *vm,   * @param: parameter for the validation callback   *   * Validate the page table BOs on command submission if neccessary. + * + * Returns: + * Validation result.   */  int amdgpu_vm_validate_pt_bos(struct amdgpu_device *adev, struct amdgpu_vm *vm,  			      int (*validate)(void *p, struct amdgpu_bo *bo), @@ -276,6 +334,9 @@ int amdgpu_vm_validate_pt_bos(struct amdgpu_device *adev, struct amdgpu_vm *vm,   * @vm: VM to check   *   * Check if all VM PDs/PTs are ready for updates + * + * Returns: + * True if eviction list is empty.   */  bool amdgpu_vm_ready(struct amdgpu_vm *vm)  { @@ -286,10 +347,15 @@ bool amdgpu_vm_ready(struct amdgpu_vm *vm)   * amdgpu_vm_clear_bo - initially clear the PDs/PTs   *   * @adev: amdgpu_device pointer + * @vm: VM to clear BO from   * @bo: BO to clear   * @level: level this BO is at + * @pte_support_ats: indicate ATS support from PTE   *   * Root PD needs to be reserved when calling this. + * + * Returns: + * 0 on success, errno otherwise.   */  static int amdgpu_vm_clear_bo(struct amdgpu_device *adev,  			      struct amdgpu_vm *vm, struct amdgpu_bo *bo, @@ -321,7 +387,7 @@ static int amdgpu_vm_clear_bo(struct amdgpu_device *adev,  		ats_entries = 0;  	} -	ring = container_of(vm->entity.sched, struct amdgpu_ring, sched); +	ring = container_of(vm->entity.rq->sched, struct amdgpu_ring, sched);  	r = reservation_object_reserve_shared(bo->tbo.resv);  	if (r) @@ -359,8 +425,8 @@ static int amdgpu_vm_clear_bo(struct amdgpu_device *adev,  	if (r)  		goto error_free; -	r = amdgpu_job_submit(job, ring, &vm->entity, -			      AMDGPU_FENCE_OWNER_UNDEFINED, &fence); +	r = amdgpu_job_submit(job, &vm->entity, AMDGPU_FENCE_OWNER_UNDEFINED, +			      &fence);  	if (r)  		goto error_free; @@ -385,10 +451,16 @@ error:   *   * @adev: amdgpu_device pointer   * @vm: requested vm + * @parent: parent PT   * @saddr: start of the address range   * @eaddr: end of the address range + * @level: VMPT level + * @ats: indicate ATS support from PTE   *   * Make sure the page directories and page tables are allocated + * + * Returns: + * 0 on success, errno otherwise.   */  static int amdgpu_vm_alloc_levels(struct amdgpu_device *adev,  				  struct amdgpu_vm *vm, @@ -423,11 +495,12 @@ static int amdgpu_vm_alloc_levels(struct amdgpu_device *adev,  	eaddr = eaddr & ((1 << shift) - 1);  	flags = AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS; +	if (vm->root.base.bo->shadow) +		flags |= AMDGPU_GEM_CREATE_SHADOW;  	if (vm->use_cpu_for_update)  		flags |= AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED;  	else -		flags |= (AMDGPU_GEM_CREATE_NO_CPU_ACCESS | -				AMDGPU_GEM_CREATE_SHADOW); +		flags |= AMDGPU_GEM_CREATE_NO_CPU_ACCESS;  	/* walk over the address space and allocate the page tables */  	for (pt_idx = from; pt_idx <= to; ++pt_idx) { @@ -496,6 +569,9 @@ static int amdgpu_vm_alloc_levels(struct amdgpu_device *adev,   * @size: Size from start address we need.   *   * Make sure the page tables are allocated. + * + * Returns: + * 0 on success, errno otherwise.   */  int amdgpu_vm_alloc_pts(struct amdgpu_device *adev,  			struct amdgpu_vm *vm, @@ -561,6 +637,15 @@ void amdgpu_vm_check_compute_bug(struct amdgpu_device *adev)  	}  } +/** + * amdgpu_vm_need_pipeline_sync - Check if pipe sync is needed for job. + * + * @ring: ring on which the job will be submitted + * @job: job to submit + * + * Returns: + * True if sync is needed. + */  bool amdgpu_vm_need_pipeline_sync(struct amdgpu_ring *ring,  				  struct amdgpu_job *job)  { @@ -588,19 +673,17 @@ bool amdgpu_vm_need_pipeline_sync(struct amdgpu_ring *ring,  	return vm_flush_needed || gds_switch_needed;  } -static bool amdgpu_vm_is_large_bar(struct amdgpu_device *adev) -{ -	return (adev->gmc.real_vram_size == adev->gmc.visible_vram_size); -} -  /**   * amdgpu_vm_flush - hardware flush the vm   *   * @ring: ring to use for flush - * @vmid: vmid number to use - * @pd_addr: address of the page directory + * @job:  related job + * @need_pipe_sync: is pipe sync needed   *   * Emit a VM flush when it is necessary. + * + * Returns: + * 0 on success, errno otherwise.   */  int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job, bool need_pipe_sync)  { @@ -708,6 +791,9 @@ int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job, bool need_   * Returns the found bo_va or NULL if none is found   *   * Object has to be reserved! + * + * Returns: + * Found bo_va or NULL.   */  struct amdgpu_bo_va *amdgpu_vm_bo_find(struct amdgpu_vm *vm,  				       struct amdgpu_bo *bo) @@ -789,7 +875,10 @@ static void amdgpu_vm_do_copy_ptes(struct amdgpu_pte_update_params *params,   * @addr: the unmapped addr   *   * Look up the physical address of the page that the pte resolves - * to and return the pointer for the page table entry. + * to. + * + * Returns: + * The pointer for the page table entry.   */  static uint64_t amdgpu_vm_map_gart(const dma_addr_t *pages_addr, uint64_t addr)  { @@ -842,6 +931,17 @@ static void amdgpu_vm_cpu_set_ptes(struct amdgpu_pte_update_params *params,  	}  } + +/** + * amdgpu_vm_wait_pd - Wait for PT BOs to be free. + * + * @adev: amdgpu_device pointer + * @vm: related vm + * @owner: fence owner + * + * Returns: + * 0 on success, errno otherwise. + */  static int amdgpu_vm_wait_pd(struct amdgpu_device *adev, struct amdgpu_vm *vm,  			     void *owner)  { @@ -895,7 +995,10 @@ static void amdgpu_vm_update_pde(struct amdgpu_pte_update_params *params,  /*   * amdgpu_vm_invalidate_level - mark all PD levels as invalid   * + * @adev: amdgpu_device pointer + * @vm: related vm   * @parent: parent PD + * @level: VMPT level   *   * Mark all PD level as invalid after an error.   */ @@ -930,7 +1033,9 @@ static void amdgpu_vm_invalidate_level(struct amdgpu_device *adev,   * @vm: requested vm   *   * Makes sure all directories are up to date. - * Returns 0 for success, error for failure. + * + * Returns: + * 0 for success, error for failure.   */  int amdgpu_vm_update_directories(struct amdgpu_device *adev,  				 struct amdgpu_vm *vm) @@ -980,7 +1085,7 @@ restart:  					   struct amdgpu_vm_bo_base,  					   vm_status);  		bo_base->moved = false; -		list_move(&bo_base->vm_status, &vm->idle); +		list_del_init(&bo_base->vm_status);  		bo = bo_base->bo->parent;  		if (!bo) @@ -1009,15 +1114,15 @@ restart:  		struct amdgpu_ring *ring;  		struct dma_fence *fence; -		ring = container_of(vm->entity.sched, struct amdgpu_ring, +		ring = container_of(vm->entity.rq->sched, struct amdgpu_ring,  				    sched);  		amdgpu_ring_pad_ib(ring, params.ib);  		amdgpu_sync_resv(adev, &job->sync, root->tbo.resv,  				 AMDGPU_FENCE_OWNER_VM, false);  		WARN_ON(params.ib->length_dw > ndw); -		r = amdgpu_job_submit(job, ring, &vm->entity, -				      AMDGPU_FENCE_OWNER_VM, &fence); +		r = amdgpu_job_submit(job, &vm->entity, AMDGPU_FENCE_OWNER_VM, +				      &fence);  		if (r)  			goto error; @@ -1117,14 +1222,15 @@ static void amdgpu_vm_handle_huge_pages(struct amdgpu_pte_update_params *p,   * amdgpu_vm_update_ptes - make sure that page tables are valid   *   * @params: see amdgpu_pte_update_params definition - * @vm: requested vm   * @start: start of GPU address range   * @end: end of GPU address range   * @dst: destination address to map to, the next dst inside the function   * @flags: mapping flags   *   * Update the page tables in the range @start - @end. - * Returns 0 for success, -EINVAL for failure. + * + * Returns: + * 0 for success, -EINVAL for failure.   */  static int amdgpu_vm_update_ptes(struct amdgpu_pte_update_params *params,  				  uint64_t start, uint64_t end, @@ -1178,7 +1284,9 @@ static int amdgpu_vm_update_ptes(struct amdgpu_pte_update_params *params,   * @end: last PTE to handle   * @dst: addr those PTEs should point to   * @flags: hw mapping flags - * Returns 0 for success, -EINVAL for failure. + * + * Returns: + * 0 for success, -EINVAL for failure.   */  static int amdgpu_vm_frag_ptes(struct amdgpu_pte_update_params	*params,  				uint64_t start, uint64_t end, @@ -1250,7 +1358,9 @@ static int amdgpu_vm_frag_ptes(struct amdgpu_pte_update_params	*params,   * @fence: optional resulting fence   *   * Fill in the page table entries between @start and @last. - * Returns 0 for success, -EINVAL for failure. + * + * Returns: + * 0 for success, -EINVAL for failure.   */  static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev,  				       struct dma_fence *exclusive, @@ -1294,7 +1404,7 @@ static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev,  					   addr, flags);  	} -	ring = container_of(vm->entity.sched, struct amdgpu_ring, sched); +	ring = container_of(vm->entity.rq->sched, struct amdgpu_ring, sched);  	nptes = last - start + 1; @@ -1326,7 +1436,10 @@ static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev,  		ndw += ncmds * 10;  		/* extra commands for begin/end fragments */ -		ndw += 2 * 10 * adev->vm_manager.fragment_size; +		if (vm->root.base.bo->shadow) +		        ndw += 2 * 10 * adev->vm_manager.fragment_size * 2; +		else +		        ndw += 2 * 10 * adev->vm_manager.fragment_size;  		params.func = amdgpu_vm_do_set_ptes;  	} @@ -1373,8 +1486,7 @@ static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev,  	amdgpu_ring_pad_ib(ring, params.ib);  	WARN_ON(params.ib->length_dw > ndw); -	r = amdgpu_job_submit(job, ring, &vm->entity, -			      AMDGPU_FENCE_OWNER_VM, &f); +	r = amdgpu_job_submit(job, &vm->entity, AMDGPU_FENCE_OWNER_VM, &f);  	if (r)  		goto error_free; @@ -1402,7 +1514,9 @@ error_free:   *   * Split the mapping into smaller chunks so that each update fits   * into a SDMA IB. - * Returns 0 for success, -EINVAL for failure. + * + * Returns: + * 0 for success, -EINVAL for failure.   */  static int amdgpu_vm_bo_split_mapping(struct amdgpu_device *adev,  				      struct dma_fence *exclusive, @@ -1455,7 +1569,7 @@ static int amdgpu_vm_bo_split_mapping(struct amdgpu_device *adev,  		if (nodes) {  			addr = nodes->start << PAGE_SHIFT;  			max_entries = (nodes->size - pfn) * -				(PAGE_SIZE / AMDGPU_GPU_PAGE_SIZE); +				AMDGPU_GPU_PAGES_IN_CPU_PAGE;  		} else {  			addr = 0;  			max_entries = S64_MAX; @@ -1466,7 +1580,7 @@ static int amdgpu_vm_bo_split_mapping(struct amdgpu_device *adev,  			max_entries = min(max_entries, 16ull * 1024ull);  			for (count = 1; -			     count < max_entries / (PAGE_SIZE / AMDGPU_GPU_PAGE_SIZE); +			     count < max_entries / AMDGPU_GPU_PAGES_IN_CPU_PAGE;  			     ++count) {  				uint64_t idx = pfn + count; @@ -1480,7 +1594,7 @@ static int amdgpu_vm_bo_split_mapping(struct amdgpu_device *adev,  				dma_addr = pages_addr;  			} else {  				addr = pages_addr[pfn]; -				max_entries = count * (PAGE_SIZE / AMDGPU_GPU_PAGE_SIZE); +				max_entries = count * AMDGPU_GPU_PAGES_IN_CPU_PAGE;  			}  		} else if (flags & AMDGPU_PTE_VALID) { @@ -1495,7 +1609,7 @@ static int amdgpu_vm_bo_split_mapping(struct amdgpu_device *adev,  		if (r)  			return r; -		pfn += (last - start + 1) / (PAGE_SIZE / AMDGPU_GPU_PAGE_SIZE); +		pfn += (last - start + 1) / AMDGPU_GPU_PAGES_IN_CPU_PAGE;  		if (nodes && nodes->size == pfn) {  			pfn = 0;  			++nodes; @@ -1515,7 +1629,9 @@ static int amdgpu_vm_bo_split_mapping(struct amdgpu_device *adev,   * @clear: if true clear the entries   *   * Fill in the page table entries for @bo_va. - * Returns 0 for success, -EINVAL for failure. + * + * Returns: + * 0 for success, -EINVAL for failure.   */  int amdgpu_vm_bo_update(struct amdgpu_device *adev,  			struct amdgpu_bo_va *bo_va, @@ -1531,18 +1647,17 @@ int amdgpu_vm_bo_update(struct amdgpu_device *adev,  	uint64_t flags;  	int r; -	if (clear || !bo_va->base.bo) { +	if (clear || !bo) {  		mem = NULL;  		nodes = NULL;  		exclusive = NULL;  	} else {  		struct ttm_dma_tt *ttm; -		mem = &bo_va->base.bo->tbo.mem; +		mem = &bo->tbo.mem;  		nodes = mem->mm_node;  		if (mem->mem_type == TTM_PL_TT) { -			ttm = container_of(bo_va->base.bo->tbo.ttm, -					   struct ttm_dma_tt, ttm); +			ttm = container_of(bo->tbo.ttm, struct ttm_dma_tt, ttm);  			pages_addr = ttm->dma_address;  		}  		exclusive = reservation_object_get_excl(bo->tbo.resv); @@ -1610,6 +1725,8 @@ int amdgpu_vm_bo_update(struct amdgpu_device *adev,  /**   * amdgpu_vm_update_prt_state - update the global PRT state + * + * @adev: amdgpu_device pointer   */  static void amdgpu_vm_update_prt_state(struct amdgpu_device *adev)  { @@ -1624,6 +1741,8 @@ static void amdgpu_vm_update_prt_state(struct amdgpu_device *adev)  /**   * amdgpu_vm_prt_get - add a PRT user + * + * @adev: amdgpu_device pointer   */  static void amdgpu_vm_prt_get(struct amdgpu_device *adev)  { @@ -1636,6 +1755,8 @@ static void amdgpu_vm_prt_get(struct amdgpu_device *adev)  /**   * amdgpu_vm_prt_put - drop a PRT user + * + * @adev: amdgpu_device pointer   */  static void amdgpu_vm_prt_put(struct amdgpu_device *adev)  { @@ -1645,6 +1766,9 @@ static void amdgpu_vm_prt_put(struct amdgpu_device *adev)  /**   * amdgpu_vm_prt_cb - callback for updating the PRT status + * + * @fence: fence for the callback + * @_cb: the callback function   */  static void amdgpu_vm_prt_cb(struct dma_fence *fence, struct dma_fence_cb *_cb)  { @@ -1656,6 +1780,9 @@ static void amdgpu_vm_prt_cb(struct dma_fence *fence, struct dma_fence_cb *_cb)  /**   * amdgpu_vm_add_prt_cb - add callback for updating the PRT status + * + * @adev: amdgpu_device pointer + * @fence: fence for the callback   */  static void amdgpu_vm_add_prt_cb(struct amdgpu_device *adev,  				 struct dma_fence *fence) @@ -1747,9 +1874,11 @@ static void amdgpu_vm_prt_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm)   * or if an error occurred)   *   * Make sure all freed BOs are cleared in the PT. - * Returns 0 for success. - *   * PTs have to be reserved and mutex must be locked! + * + * Returns: + * 0 for success. + *   */  int amdgpu_vm_clear_freed(struct amdgpu_device *adev,  			  struct amdgpu_vm *vm, @@ -1794,10 +1923,11 @@ int amdgpu_vm_clear_freed(struct amdgpu_device *adev,   *   * @adev: amdgpu_device pointer   * @vm: requested vm - * @sync: sync object to add fences to   *   * Make sure all BOs which are moved are updated in the PTs. - * Returns 0 for success. + * + * Returns: + * 0 for success.   *   * PTs have to be reserved!   */ @@ -1852,7 +1982,9 @@ int amdgpu_vm_handle_moved(struct amdgpu_device *adev,   *   * Add @bo into the requested vm.   * Add @bo to the list of bos associated with the vm - * Returns newly added bo_va or NULL for failure + * + * Returns: + * Newly added bo_va or NULL for failure   *   * Object has to be reserved!   */ @@ -1915,10 +2047,13 @@ static void amdgpu_vm_bo_insert_map(struct amdgpu_device *adev,   * @bo_va: bo_va to store the address   * @saddr: where to map the BO   * @offset: requested offset in the BO + * @size: BO size in bytes   * @flags: attributes of pages (read/write/valid/etc.)   *   * Add a mapping of the BO at the specefied addr into the VM. - * Returns 0 for success, error for failure. + * + * Returns: + * 0 for success, error for failure.   *   * Object has to be reserved and unreserved outside!   */ @@ -1976,11 +2111,14 @@ int amdgpu_vm_bo_map(struct amdgpu_device *adev,   * @bo_va: bo_va to store the address   * @saddr: where to map the BO   * @offset: requested offset in the BO + * @size: BO size in bytes   * @flags: attributes of pages (read/write/valid/etc.)   *   * Add a mapping of the BO at the specefied addr into the VM. Replace existing   * mappings as we do so. - * Returns 0 for success, error for failure. + * + * Returns: + * 0 for success, error for failure.   *   * Object has to be reserved and unreserved outside!   */ @@ -2037,7 +2175,9 @@ int amdgpu_vm_bo_replace_map(struct amdgpu_device *adev,   * @saddr: where to the BO is mapped   *   * Remove a mapping of the BO at the specefied addr from the VM. - * Returns 0 for success, error for failure. + * + * Returns: + * 0 for success, error for failure.   *   * Object has to be reserved and unreserved outside!   */ @@ -2091,7 +2231,9 @@ int amdgpu_vm_bo_unmap(struct amdgpu_device *adev,   * @size: size of the range   *   * Remove all mappings in a range, split them as appropriate. - * Returns 0 for success, error for failure. + * + * Returns: + * 0 for success, error for failure.   */  int amdgpu_vm_bo_clear_mappings(struct amdgpu_device *adev,  				struct amdgpu_vm *vm, @@ -2188,8 +2330,13 @@ int amdgpu_vm_bo_clear_mappings(struct amdgpu_device *adev,   * amdgpu_vm_bo_lookup_mapping - find mapping by address   *   * @vm: the requested VM + * @addr: the address   *   * Find a mapping by it's address. + * + * Returns: + * The amdgpu_bo_va_mapping matching for addr or NULL + *   */  struct amdgpu_bo_va_mapping *amdgpu_vm_bo_lookup_mapping(struct amdgpu_vm *vm,  							 uint64_t addr) @@ -2198,6 +2345,35 @@ struct amdgpu_bo_va_mapping *amdgpu_vm_bo_lookup_mapping(struct amdgpu_vm *vm,  }  /** + * amdgpu_vm_bo_trace_cs - trace all reserved mappings + * + * @vm: the requested vm + * @ticket: CS ticket + * + * Trace all mappings of BOs reserved during a command submission. + */ +void amdgpu_vm_bo_trace_cs(struct amdgpu_vm *vm, struct ww_acquire_ctx *ticket) +{ +	struct amdgpu_bo_va_mapping *mapping; + +	if (!trace_amdgpu_vm_bo_cs_enabled()) +		return; + +	for (mapping = amdgpu_vm_it_iter_first(&vm->va, 0, U64_MAX); mapping; +	     mapping = amdgpu_vm_it_iter_next(mapping, 0, U64_MAX)) { +		if (mapping->bo_va && mapping->bo_va->base.bo) { +			struct amdgpu_bo *bo; + +			bo = mapping->bo_va->base.bo; +			if (READ_ONCE(bo->tbo.resv->lock.ctx) != ticket) +				continue; +		} + +		trace_amdgpu_vm_bo_cs(mapping); +	} +} + +/**   * amdgpu_vm_bo_rmv - remove a bo to a specific vm   *   * @adev: amdgpu_device pointer @@ -2241,8 +2417,8 @@ void amdgpu_vm_bo_rmv(struct amdgpu_device *adev,   * amdgpu_vm_bo_invalidate - mark the bo as invalid   *   * @adev: amdgpu_device pointer - * @vm: requested vm   * @bo: amdgpu buffer object + * @evicted: is the BO evicted   *   * Mark @bo as invalid.   */ @@ -2282,6 +2458,14 @@ void amdgpu_vm_bo_invalidate(struct amdgpu_device *adev,  	}  } +/** + * amdgpu_vm_get_block_size - calculate VM page table size as power of two + * + * @vm_size: VM size + * + * Returns: + * VM page table as power of two + */  static uint32_t amdgpu_vm_get_block_size(uint64_t vm_size)  {  	/* Total bits covered by PD + PTs */ @@ -2300,6 +2484,10 @@ static uint32_t amdgpu_vm_get_block_size(uint64_t vm_size)   *   * @adev: amdgpu_device pointer   * @vm_size: the default vm size if it's set auto + * @fragment_size_default: Default PTE fragment size + * @max_level: max VMPT level + * @max_bits: max address space size in bits + *   */  void amdgpu_vm_adjust_size(struct amdgpu_device *adev, uint32_t vm_size,  			   uint32_t fragment_size_default, unsigned max_level, @@ -2367,8 +2555,12 @@ void amdgpu_vm_adjust_size(struct amdgpu_device *adev, uint32_t vm_size,   * @adev: amdgpu_device pointer   * @vm: requested vm   * @vm_context: Indicates if it GFX or Compute context + * @pasid: Process address space identifier   *   * Init @vm fields. + * + * Returns: + * 0 for success, error for failure.   */  int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm,  		   int vm_context, unsigned int pasid) @@ -2400,8 +2592,7 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm,  	ring_instance %= adev->vm_manager.vm_pte_num_rings;  	ring = adev->vm_manager.vm_pte_rings[ring_instance];  	rq = &ring->sched.sched_rq[DRM_SCHED_PRIORITY_KERNEL]; -	r = drm_sched_entity_init(&ring->sched, &vm->entity, -				  rq, NULL); +	r = drm_sched_entity_init(&vm->entity, &rq, 1, NULL);  	if (r)  		return r; @@ -2419,14 +2610,14 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm,  	}  	DRM_DEBUG_DRIVER("VM update mode is %s\n",  			 vm->use_cpu_for_update ? "CPU" : "SDMA"); -	WARN_ONCE((vm->use_cpu_for_update & !amdgpu_vm_is_large_bar(adev)), +	WARN_ONCE((vm->use_cpu_for_update & !amdgpu_gmc_vram_full_visible(&adev->gmc)),  		  "CPU update of VM recommended only for large BAR system\n");  	vm->last_update = NULL;  	flags = AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS;  	if (vm->use_cpu_for_update)  		flags |= AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED; -	else +	else if (vm_context != AMDGPU_VM_CONTEXT_COMPUTE)  		flags |= AMDGPU_GEM_CREATE_SHADOW;  	size = amdgpu_vm_bo_size(adev, adev->vm_manager.root_level); @@ -2481,7 +2672,7 @@ error_free_root:  	vm->root.base.bo = NULL;  error_free_sched_entity: -	drm_sched_entity_fini(&ring->sched, &vm->entity); +	drm_sched_entity_destroy(&vm->entity);  	return r;  } @@ -2489,6 +2680,9 @@ error_free_sched_entity:  /**   * amdgpu_vm_make_compute - Turn a GFX VM into a compute VM   * + * @adev: amdgpu_device pointer + * @vm: requested vm + *   * This only works on GFX VMs that don't have any BOs added and no   * page tables allocated yet.   * @@ -2498,10 +2692,10 @@ error_free_sched_entity:   * - pasid (old PASID is released, because compute manages its own PASIDs)   *   * Reinitializes the page directory to reflect the changed ATS - * setting. May leave behind an unused shadow BO for the page - * directory when switching from SDMA updates to CPU updates. + * setting.   * - * Returns 0 for success, -errno for errors. + * Returns: + * 0 for success, -errno for errors.   */  int amdgpu_vm_make_compute(struct amdgpu_device *adev, struct amdgpu_vm *vm)  { @@ -2535,7 +2729,7 @@ int amdgpu_vm_make_compute(struct amdgpu_device *adev, struct amdgpu_vm *vm)  	vm->pte_support_ats = pte_support_ats;  	DRM_DEBUG_DRIVER("VM update mode is %s\n",  			 vm->use_cpu_for_update ? "CPU" : "SDMA"); -	WARN_ONCE((vm->use_cpu_for_update & !amdgpu_vm_is_large_bar(adev)), +	WARN_ONCE((vm->use_cpu_for_update & !amdgpu_gmc_vram_full_visible(&adev->gmc)),  		  "CPU update of VM recommended only for large BAR system\n");  	if (vm->pasid) { @@ -2548,6 +2742,9 @@ int amdgpu_vm_make_compute(struct amdgpu_device *adev, struct amdgpu_vm *vm)  		vm->pasid = 0;  	} +	/* Free the shadow bo for compute VM */ +	amdgpu_bo_unref(&vm->root.base.bo->shadow); +  error:  	amdgpu_bo_unreserve(vm->root.base.bo);  	return r; @@ -2614,7 +2811,7 @@ void amdgpu_vm_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm)  		spin_unlock_irqrestore(&adev->vm_manager.pasid_lock, flags);  	} -	drm_sched_entity_fini(vm->entity.sched, &vm->entity); +	drm_sched_entity_destroy(&vm->entity);  	if (!RB_EMPTY_ROOT(&vm->va.rb_root)) {  		dev_err(adev->dev, "still active bo inside vm\n"); @@ -2656,8 +2853,10 @@ void amdgpu_vm_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm)   * @adev: amdgpu_device pointer   * @pasid: PASID do identify the VM   * - * This function is expected to be called in interrupt context. Returns - * true if there was fault credit, false otherwise + * This function is expected to be called in interrupt context. + * + * Returns: + * True if there was fault credit, false otherwise   */  bool amdgpu_vm_pasid_fault_credit(struct amdgpu_device *adev,  				  unsigned int pasid) @@ -2711,7 +2910,7 @@ void amdgpu_vm_manager_init(struct amdgpu_device *adev)  	 */  #ifdef CONFIG_X86_64  	if (amdgpu_vm_update_mode == -1) { -		if (amdgpu_vm_is_large_bar(adev)) +		if (amdgpu_gmc_vram_full_visible(&adev->gmc))  			adev->vm_manager.vm_update_mode =  				AMDGPU_VM_USE_CPU_FOR_COMPUTE;  		else @@ -2741,6 +2940,16 @@ void amdgpu_vm_manager_fini(struct amdgpu_device *adev)  	amdgpu_vmid_mgr_fini(adev);  } +/** + * amdgpu_vm_ioctl - Manages VMID reservation for vm hubs. + * + * @dev: drm device pointer + * @data: drm_amdgpu_vm + * @filp: drm file pointer + * + * Returns: + * 0 for success, -errno for errors. + */  int amdgpu_vm_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)  {  	union drm_amdgpu_vm *args = data; @@ -2764,3 +2973,42 @@ int amdgpu_vm_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)  	return 0;  } + +/** + * amdgpu_vm_get_task_info - Extracts task info for a PASID. + * + * @dev: drm device pointer + * @pasid: PASID identifier for VM + * @task_info: task_info to fill. + */ +void amdgpu_vm_get_task_info(struct amdgpu_device *adev, unsigned int pasid, +			 struct amdgpu_task_info *task_info) +{ +	struct amdgpu_vm *vm; + +	spin_lock(&adev->vm_manager.pasid_lock); + +	vm = idr_find(&adev->vm_manager.pasid_idr, pasid); +	if (vm) +		*task_info = vm->task_info; + +	spin_unlock(&adev->vm_manager.pasid_lock); +} + +/** + * amdgpu_vm_set_task_info - Sets VMs task info. + * + * @vm: vm for which to set the info + */ +void amdgpu_vm_set_task_info(struct amdgpu_vm *vm) +{ +	if (!vm->task_info.pid) { +		vm->task_info.pid = current->pid; +		get_task_comm(vm->task_info.task_name, current); + +		if (current->group_leader->mm == current->mm) { +			vm->task_info.tgid = current->group_leader->pid; +			get_task_comm(vm->task_info.process_name, current->group_leader); +		} +	} +} diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h index 061b99a18cb8..67a15d439ac0 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h @@ -164,6 +164,14 @@ struct amdgpu_vm_pt {  #define AMDGPU_VM_FAULT_PASID(fault) ((u64)(fault) >> 48)  #define AMDGPU_VM_FAULT_ADDR(fault)  ((u64)(fault) & 0xfffffffff000ULL) + +struct amdgpu_task_info { +	char	process_name[TASK_COMM_LEN]; +	char	task_name[TASK_COMM_LEN]; +	pid_t	pid; +	pid_t	tgid; +}; +  struct amdgpu_vm {  	/* tree of virtual addresses mapped */  	struct rb_root_cached	va; @@ -215,6 +223,9 @@ struct amdgpu_vm {  	/* Valid while the PD is reserved or fenced */  	uint64_t		pd_phys_addr; + +	/* Some basic info about the task */ +	struct amdgpu_task_info task_info;  };  struct amdgpu_vm_manager { @@ -307,6 +318,7 @@ int amdgpu_vm_bo_clear_mappings(struct amdgpu_device *adev,  				uint64_t saddr, uint64_t size);  struct amdgpu_bo_va_mapping *amdgpu_vm_bo_lookup_mapping(struct amdgpu_vm *vm,  							 uint64_t addr); +void amdgpu_vm_bo_trace_cs(struct amdgpu_vm *vm, struct ww_acquire_ctx *ticket);  void amdgpu_vm_bo_rmv(struct amdgpu_device *adev,  		      struct amdgpu_bo_va *bo_va);  void amdgpu_vm_adjust_size(struct amdgpu_device *adev, uint32_t vm_size, @@ -317,4 +329,9 @@ bool amdgpu_vm_need_pipeline_sync(struct amdgpu_ring *ring,  				  struct amdgpu_job *job);  void amdgpu_vm_check_compute_bug(struct amdgpu_device *adev); +void amdgpu_vm_get_task_info(struct amdgpu_device *adev, unsigned int pasid, +			 struct amdgpu_task_info *task_info); + +void amdgpu_vm_set_task_info(struct amdgpu_vm *vm); +  #endif diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c index b6333f92ba45..9cfa8a9ada92 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c @@ -97,33 +97,29 @@ static u64 amdgpu_vram_mgr_vis_size(struct amdgpu_device *adev,  }  /** - * amdgpu_vram_mgr_bo_invisible_size - CPU invisible BO size + * amdgpu_vram_mgr_bo_visible_size - CPU visible BO size   *   * @bo: &amdgpu_bo buffer object (must be in VRAM)   *   * Returns: - * How much of the given &amdgpu_bo buffer object lies in CPU invisible VRAM. + * How much of the given &amdgpu_bo buffer object lies in CPU visible VRAM.   */ -u64 amdgpu_vram_mgr_bo_invisible_size(struct amdgpu_bo *bo) +u64 amdgpu_vram_mgr_bo_visible_size(struct amdgpu_bo *bo)  {  	struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);  	struct ttm_mem_reg *mem = &bo->tbo.mem;  	struct drm_mm_node *nodes = mem->mm_node;  	unsigned pages = mem->num_pages; -	u64 usage = 0; +	u64 usage; -	if (adev->gmc.visible_vram_size == adev->gmc.real_vram_size) -		return 0; +	if (amdgpu_gmc_vram_full_visible(&adev->gmc)) +		return amdgpu_bo_size(bo);  	if (mem->start >= adev->gmc.visible_vram_size >> PAGE_SHIFT) -		return amdgpu_bo_size(bo); +		return 0; -	while (nodes && pages) { -		usage += nodes->size << PAGE_SHIFT; -		usage -= amdgpu_vram_mgr_vis_size(adev, nodes); -		pages -= nodes->size; -		++nodes; -	} +	for (usage = 0; nodes && pages; pages -= nodes->size, nodes++) +		usage += amdgpu_vram_mgr_vis_size(adev, nodes);  	return usage;  } diff --git a/drivers/gpu/drm/amd/amdgpu/ci_dpm.c b/drivers/gpu/drm/amd/amdgpu/ci_dpm.c index 7fbad2f5f0bd..d2469453dca2 100644 --- a/drivers/gpu/drm/amd/amdgpu/ci_dpm.c +++ b/drivers/gpu/drm/amd/amdgpu/ci_dpm.c @@ -49,10 +49,10 @@  #include "gmc/gmc_7_1_d.h"  #include "gmc/gmc_7_1_sh_mask.h" -MODULE_FIRMWARE("radeon/bonaire_smc.bin"); -MODULE_FIRMWARE("radeon/bonaire_k_smc.bin"); -MODULE_FIRMWARE("radeon/hawaii_smc.bin"); -MODULE_FIRMWARE("radeon/hawaii_k_smc.bin"); +MODULE_FIRMWARE("amdgpu/bonaire_smc.bin"); +MODULE_FIRMWARE("amdgpu/bonaire_k_smc.bin"); +MODULE_FIRMWARE("amdgpu/hawaii_smc.bin"); +MODULE_FIRMWARE("amdgpu/hawaii_k_smc.bin");  #define MC_CG_ARB_FREQ_F0           0x0a  #define MC_CG_ARB_FREQ_F1           0x0b @@ -951,12 +951,12 @@ static void ci_apply_state_adjust_rules(struct amdgpu_device *adev,  	else  		pi->battery_state = false; -	if (adev->pm.dpm.ac_power) +	if (adev->pm.ac_power)  		max_limits = &adev->pm.dpm.dyn_state.max_clock_voltage_on_ac;  	else  		max_limits = &adev->pm.dpm.dyn_state.max_clock_voltage_on_dc; -	if (adev->pm.dpm.ac_power == false) { +	if (adev->pm.ac_power == false) {  		for (i = 0; i < ps->performance_level_count; i++) {  			if (ps->performance_levels[i].mclk > max_limits->mclk)  				ps->performance_levels[i].mclk = max_limits->mclk; @@ -4078,7 +4078,7 @@ static int ci_enable_uvd_dpm(struct amdgpu_device *adev, bool enable)  	const struct amdgpu_clock_and_voltage_limits *max_limits;  	int i; -	if (adev->pm.dpm.ac_power) +	if (adev->pm.ac_power)  		max_limits = &adev->pm.dpm.dyn_state.max_clock_voltage_on_ac;  	else  		max_limits = &adev->pm.dpm.dyn_state.max_clock_voltage_on_dc; @@ -4127,7 +4127,7 @@ static int ci_enable_vce_dpm(struct amdgpu_device *adev, bool enable)  	const struct amdgpu_clock_and_voltage_limits *max_limits;  	int i; -	if (adev->pm.dpm.ac_power) +	if (adev->pm.ac_power)  		max_limits = &adev->pm.dpm.dyn_state.max_clock_voltage_on_ac;  	else  		max_limits = &adev->pm.dpm.dyn_state.max_clock_voltage_on_dc; @@ -4160,7 +4160,7 @@ static int ci_enable_samu_dpm(struct amdgpu_device *adev, bool enable)  	const struct amdgpu_clock_and_voltage_limits *max_limits;  	int i; -	if (adev->pm.dpm.ac_power) +	if (adev->pm.ac_power)  		max_limits = &adev->pm.dpm.dyn_state.max_clock_voltage_on_ac;  	else  		max_limits = &adev->pm.dpm.dyn_state.max_clock_voltage_on_dc; @@ -4191,7 +4191,7 @@ static int ci_enable_acp_dpm(struct amdgpu_device *adev, bool enable)  	const struct amdgpu_clock_and_voltage_limits *max_limits;  	int i; -	if (adev->pm.dpm.ac_power) +	if (adev->pm.ac_power)  		max_limits = &adev->pm.dpm.dyn_state.max_clock_voltage_on_ac;  	else  		max_limits = &adev->pm.dpm.dyn_state.max_clock_voltage_on_dc; @@ -5815,7 +5815,7 @@ static int ci_dpm_init_microcode(struct amdgpu_device *adev)  	default: BUG();  	} -	snprintf(fw_name, sizeof(fw_name), "radeon/%s_smc.bin", chip_name); +	snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_smc.bin", chip_name);  	err = request_firmware(&adev->pm.fw, fw_name, adev->dev);  	if (err)  		goto out; @@ -5846,8 +5846,7 @@ static int ci_dpm_init(struct amdgpu_device *adev)  	adev->pm.dpm.priv = pi;  	pi->sys_pcie_mask = -		(adev->pm.pcie_gen_mask & CAIL_PCIE_LINK_SPEED_SUPPORT_MASK) >> -		CAIL_PCIE_LINK_SPEED_SUPPORT_SHIFT; +		adev->pm.pcie_gen_mask & CAIL_PCIE_LINK_SPEED_SUPPORT_MASK;  	pi->force_pcie_gen = AMDGPU_PCIE_GEN_INVALID; @@ -6767,6 +6766,19 @@ static int ci_dpm_read_sensor(void *handle, int idx,  	}  } +static int ci_set_powergating_by_smu(void *handle, +				uint32_t block_type, bool gate) +{ +	switch (block_type) { +	case AMD_IP_BLOCK_TYPE_UVD: +		ci_dpm_powergate_uvd(handle, gate); +		break; +	default: +		break; +	} +	return 0; +} +  static const struct amd_ip_funcs ci_dpm_ip_funcs = {  	.name = "ci_dpm",  	.early_init = ci_dpm_early_init, @@ -6804,7 +6816,7 @@ static const struct amd_pm_funcs ci_dpm_funcs = {  	.debugfs_print_current_performance_level = &ci_dpm_debugfs_print_current_performance_level,  	.force_performance_level = &ci_dpm_force_performance_level,  	.vblank_too_short = &ci_dpm_vblank_too_short, -	.powergate_uvd = &ci_dpm_powergate_uvd, +	.set_powergating_by_smu = &ci_set_powergating_by_smu,  	.set_fan_control_mode = &ci_dpm_set_fan_control_mode,  	.get_fan_control_mode = &ci_dpm_get_fan_control_mode,  	.set_fan_speed_percent = &ci_dpm_set_fan_speed_percent, diff --git a/drivers/gpu/drm/amd/amdgpu/cik.c b/drivers/gpu/drm/amd/amdgpu/cik.c index 8ff4c60d1b59..78ab939ae5d8 100644 --- a/drivers/gpu/drm/amd/amdgpu/cik.c +++ b/drivers/gpu/drm/amd/amdgpu/cik.c @@ -1476,7 +1476,7 @@ static void cik_pcie_gen3_enable(struct amdgpu_device *adev)  				tmp |= PCIE_LC_CNTL4__LC_REDO_EQ_MASK;  				WREG32_PCIE(ixPCIE_LC_CNTL4, tmp); -				mdelay(100); +				msleep(100);  				/* linkctl */  				pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &tmp16); @@ -2003,9 +2003,9 @@ int cik_set_ip_blocks(struct amdgpu_device *adev)  		amdgpu_device_ip_block_add(adev, &gmc_v7_0_ip_block);  		amdgpu_device_ip_block_add(adev, &cik_ih_ip_block);  		if (amdgpu_dpm == -1) -			amdgpu_device_ip_block_add(adev, &ci_smu_ip_block); -		else  			amdgpu_device_ip_block_add(adev, &pp_smu_ip_block); +		else +			amdgpu_device_ip_block_add(adev, &ci_smu_ip_block);  		if (adev->enable_virtual_display)  			amdgpu_device_ip_block_add(adev, &dce_virtual_ip_block);  #if defined(CONFIG_DRM_AMD_DC) @@ -2024,9 +2024,9 @@ int cik_set_ip_blocks(struct amdgpu_device *adev)  		amdgpu_device_ip_block_add(adev, &gmc_v7_0_ip_block);  		amdgpu_device_ip_block_add(adev, &cik_ih_ip_block);  		if (amdgpu_dpm == -1) -			amdgpu_device_ip_block_add(adev, &ci_smu_ip_block); -		else  			amdgpu_device_ip_block_add(adev, &pp_smu_ip_block); +		else +			amdgpu_device_ip_block_add(adev, &ci_smu_ip_block);  		if (adev->enable_virtual_display)  			amdgpu_device_ip_block_add(adev, &dce_virtual_ip_block);  #if defined(CONFIG_DRM_AMD_DC) diff --git a/drivers/gpu/drm/amd/amdgpu/cik_sdma.c b/drivers/gpu/drm/amd/amdgpu/cik_sdma.c index a7576255cc30..d0fa2aac2388 100644 --- a/drivers/gpu/drm/amd/amdgpu/cik_sdma.c +++ b/drivers/gpu/drm/amd/amdgpu/cik_sdma.c @@ -54,16 +54,16 @@ static void cik_sdma_set_buffer_funcs(struct amdgpu_device *adev);  static void cik_sdma_set_vm_pte_funcs(struct amdgpu_device *adev);  static int cik_sdma_soft_reset(void *handle); -MODULE_FIRMWARE("radeon/bonaire_sdma.bin"); -MODULE_FIRMWARE("radeon/bonaire_sdma1.bin"); -MODULE_FIRMWARE("radeon/hawaii_sdma.bin"); -MODULE_FIRMWARE("radeon/hawaii_sdma1.bin"); -MODULE_FIRMWARE("radeon/kaveri_sdma.bin"); -MODULE_FIRMWARE("radeon/kaveri_sdma1.bin"); -MODULE_FIRMWARE("radeon/kabini_sdma.bin"); -MODULE_FIRMWARE("radeon/kabini_sdma1.bin"); -MODULE_FIRMWARE("radeon/mullins_sdma.bin"); -MODULE_FIRMWARE("radeon/mullins_sdma1.bin"); +MODULE_FIRMWARE("amdgpu/bonaire_sdma.bin"); +MODULE_FIRMWARE("amdgpu/bonaire_sdma1.bin"); +MODULE_FIRMWARE("amdgpu/hawaii_sdma.bin"); +MODULE_FIRMWARE("amdgpu/hawaii_sdma1.bin"); +MODULE_FIRMWARE("amdgpu/kaveri_sdma.bin"); +MODULE_FIRMWARE("amdgpu/kaveri_sdma1.bin"); +MODULE_FIRMWARE("amdgpu/kabini_sdma.bin"); +MODULE_FIRMWARE("amdgpu/kabini_sdma1.bin"); +MODULE_FIRMWARE("amdgpu/mullins_sdma.bin"); +MODULE_FIRMWARE("amdgpu/mullins_sdma1.bin");  u32 amdgpu_cik_gpu_check_soft_reset(struct amdgpu_device *adev); @@ -132,9 +132,9 @@ static int cik_sdma_init_microcode(struct amdgpu_device *adev)  	for (i = 0; i < adev->sdma.num_instances; i++) {  		if (i == 0) -			snprintf(fw_name, sizeof(fw_name), "radeon/%s_sdma.bin", chip_name); +			snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_sdma.bin", chip_name);  		else -			snprintf(fw_name, sizeof(fw_name), "radeon/%s_sdma1.bin", chip_name); +			snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_sdma1.bin", chip_name);  		err = request_firmware(&adev->sdma.instance[i].fw, fw_name, adev->dev);  		if (err)  			goto out; @@ -177,9 +177,8 @@ static uint64_t cik_sdma_ring_get_rptr(struct amdgpu_ring *ring)  static uint64_t cik_sdma_ring_get_wptr(struct amdgpu_ring *ring)  {  	struct amdgpu_device *adev = ring->adev; -	u32 me = (ring == &adev->sdma.instance[0].ring) ? 0 : 1; -	return (RREG32(mmSDMA0_GFX_RB_WPTR + sdma_offsets[me]) & 0x3fffc) >> 2; +	return (RREG32(mmSDMA0_GFX_RB_WPTR + sdma_offsets[ring->me]) & 0x3fffc) >> 2;  }  /** @@ -192,9 +191,8 @@ static uint64_t cik_sdma_ring_get_wptr(struct amdgpu_ring *ring)  static void cik_sdma_ring_set_wptr(struct amdgpu_ring *ring)  {  	struct amdgpu_device *adev = ring->adev; -	u32 me = (ring == &adev->sdma.instance[0].ring) ? 0 : 1; -	WREG32(mmSDMA0_GFX_RB_WPTR + sdma_offsets[me], +	WREG32(mmSDMA0_GFX_RB_WPTR + sdma_offsets[ring->me],  		       	(lower_32_bits(ring->wptr) << 2) & 0x3fffc);  } @@ -248,7 +246,7 @@ static void cik_sdma_ring_emit_hdp_flush(struct amdgpu_ring *ring)  			  SDMA_POLL_REG_MEM_EXTRA_FUNC(3)); /* == */  	u32 ref_and_mask; -	if (ring == &ring->adev->sdma.instance[0].ring) +	if (ring->me == 0)  		ref_and_mask = GPU_HDP_FLUSH_DONE__SDMA0_MASK;  	else  		ref_and_mask = GPU_HDP_FLUSH_DONE__SDMA1_MASK; @@ -1290,8 +1288,10 @@ static void cik_sdma_set_ring_funcs(struct amdgpu_device *adev)  {  	int i; -	for (i = 0; i < adev->sdma.num_instances; i++) +	for (i = 0; i < adev->sdma.num_instances; i++) {  		adev->sdma.instance[i].ring.funcs = &cik_sdma_ring_funcs; +		adev->sdma.instance[i].ring.me = i; +	}  }  static const struct amdgpu_irq_src_funcs cik_sdma_trap_irq_funcs = { diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c b/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c index ada241bfeee9..308f9f238bc1 100644 --- a/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c +++ b/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c @@ -41,6 +41,8 @@  #include "gmc/gmc_8_1_d.h"  #include "gmc/gmc_8_1_sh_mask.h" +#include "ivsrcid/ivsrcid_vislands30.h" +  static void dce_v10_0_set_display_funcs(struct amdgpu_device *adev);  static void dce_v10_0_set_irq_funcs(struct amdgpu_device *adev); @@ -1855,15 +1857,14 @@ static int dce_v10_0_crtc_do_set_base(struct drm_crtc *crtc,  	if (unlikely(r != 0))  		return r; -	if (atomic) { -		fb_location = amdgpu_bo_gpu_offset(abo); -	} else { -		r = amdgpu_bo_pin(abo, AMDGPU_GEM_DOMAIN_VRAM, &fb_location); +	if (!atomic) { +		r = amdgpu_bo_pin(abo, AMDGPU_GEM_DOMAIN_VRAM);  		if (unlikely(r != 0)) {  			amdgpu_bo_unreserve(abo);  			return -EINVAL;  		}  	} +	fb_location = amdgpu_bo_gpu_offset(abo);  	amdgpu_bo_get_tiling_flags(abo, &tiling_flags);  	amdgpu_bo_unreserve(abo); @@ -2370,13 +2371,14 @@ static int dce_v10_0_crtc_cursor_set2(struct drm_crtc *crtc,  		return ret;  	} -	ret = amdgpu_bo_pin(aobj, AMDGPU_GEM_DOMAIN_VRAM, &amdgpu_crtc->cursor_addr); +	ret = amdgpu_bo_pin(aobj, AMDGPU_GEM_DOMAIN_VRAM);  	amdgpu_bo_unreserve(aobj);  	if (ret) {  		DRM_ERROR("Failed to pin new cursor BO (%d)\n", ret);  		drm_gem_object_put_unlocked(obj);  		return ret;  	} +	amdgpu_crtc->cursor_addr = amdgpu_bo_gpu_offset(aobj);  	dce_v10_0_lock_cursor(crtc, true); @@ -2737,14 +2739,14 @@ static int dce_v10_0_sw_init(void *handle)  			return r;  	} -	for (i = 8; i < 20; i += 2) { +	for (i = VISLANDS30_IV_SRCID_D1_GRPH_PFLIP; i < 20; i += 2) {  		r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, i, &adev->pageflip_irq);  		if (r)  			return r;  	}  	/* HPD hotplug */ -	r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 42, &adev->hpd_irq); +	r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, VISLANDS30_IV_SRCID_HOTPLUG_DETECT_A, &adev->hpd_irq);  	if (r)  		return r; diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c b/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c index a5b96eac3033..76dfb76f7900 100644 --- a/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c @@ -41,6 +41,8 @@  #include "gmc/gmc_8_1_d.h"  #include "gmc/gmc_8_1_sh_mask.h" +#include "ivsrcid/ivsrcid_vislands30.h" +  static void dce_v11_0_set_display_funcs(struct amdgpu_device *adev);  static void dce_v11_0_set_irq_funcs(struct amdgpu_device *adev); @@ -1897,15 +1899,14 @@ static int dce_v11_0_crtc_do_set_base(struct drm_crtc *crtc,  	if (unlikely(r != 0))  		return r; -	if (atomic) { -		fb_location = amdgpu_bo_gpu_offset(abo); -	} else { -		r = amdgpu_bo_pin(abo, AMDGPU_GEM_DOMAIN_VRAM, &fb_location); +	if (!atomic) { +		r = amdgpu_bo_pin(abo, AMDGPU_GEM_DOMAIN_VRAM);  		if (unlikely(r != 0)) {  			amdgpu_bo_unreserve(abo);  			return -EINVAL;  		}  	} +	fb_location = amdgpu_bo_gpu_offset(abo);  	amdgpu_bo_get_tiling_flags(abo, &tiling_flags);  	amdgpu_bo_unreserve(abo); @@ -2449,13 +2450,14 @@ static int dce_v11_0_crtc_cursor_set2(struct drm_crtc *crtc,  		return ret;  	} -	ret = amdgpu_bo_pin(aobj, AMDGPU_GEM_DOMAIN_VRAM, &amdgpu_crtc->cursor_addr); +	ret = amdgpu_bo_pin(aobj, AMDGPU_GEM_DOMAIN_VRAM);  	amdgpu_bo_unreserve(aobj);  	if (ret) {  		DRM_ERROR("Failed to pin new cursor BO (%d)\n", ret);  		drm_gem_object_put_unlocked(obj);  		return ret;  	} +	amdgpu_crtc->cursor_addr = amdgpu_bo_gpu_offset(aobj);  	dce_v11_0_lock_cursor(crtc, true); @@ -2858,14 +2860,14 @@ static int dce_v11_0_sw_init(void *handle)  			return r;  	} -	for (i = 8; i < 20; i += 2) { +	for (i = VISLANDS30_IV_SRCID_D1_GRPH_PFLIP; i < 20; i += 2) {  		r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, i, &adev->pageflip_irq);  		if (r)  			return r;  	}  	/* HPD hotplug */ -	r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 42, &adev->hpd_irq); +	r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, VISLANDS30_IV_SRCID_HOTPLUG_DETECT_A, &adev->hpd_irq);  	if (r)  		return r; diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c b/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c index 394cc1e8fe20..c9adc627305d 100644 --- a/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c +++ b/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c @@ -1811,15 +1811,14 @@ static int dce_v6_0_crtc_do_set_base(struct drm_crtc *crtc,  	if (unlikely(r != 0))  		return r; -	if (atomic) { -		fb_location = amdgpu_bo_gpu_offset(abo); -	} else { -		r = amdgpu_bo_pin(abo, AMDGPU_GEM_DOMAIN_VRAM, &fb_location); +	if (!atomic) { +		r = amdgpu_bo_pin(abo, AMDGPU_GEM_DOMAIN_VRAM);  		if (unlikely(r != 0)) {  			amdgpu_bo_unreserve(abo);  			return -EINVAL;  		}  	} +	fb_location = amdgpu_bo_gpu_offset(abo);  	amdgpu_bo_get_tiling_flags(abo, &tiling_flags);  	amdgpu_bo_unreserve(abo); @@ -2263,13 +2262,14 @@ static int dce_v6_0_crtc_cursor_set2(struct drm_crtc *crtc,  		return ret;  	} -	ret = amdgpu_bo_pin(aobj, AMDGPU_GEM_DOMAIN_VRAM, &amdgpu_crtc->cursor_addr); +	ret = amdgpu_bo_pin(aobj, AMDGPU_GEM_DOMAIN_VRAM);  	amdgpu_bo_unreserve(aobj);  	if (ret) {  		DRM_ERROR("Failed to pin new cursor BO (%d)\n", ret);  		drm_gem_object_put_unlocked(obj);  		return ret;  	} +	amdgpu_crtc->cursor_addr = amdgpu_bo_gpu_offset(aobj);  	dce_v6_0_lock_cursor(crtc, true); diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c b/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c index c9b9ab8f1b05..50cd03beac7d 100644 --- a/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c +++ b/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c @@ -1786,15 +1786,14 @@ static int dce_v8_0_crtc_do_set_base(struct drm_crtc *crtc,  	if (unlikely(r != 0))  		return r; -	if (atomic) { -		fb_location = amdgpu_bo_gpu_offset(abo); -	} else { -		r = amdgpu_bo_pin(abo, AMDGPU_GEM_DOMAIN_VRAM, &fb_location); +	if (!atomic) { +		r = amdgpu_bo_pin(abo, AMDGPU_GEM_DOMAIN_VRAM);  		if (unlikely(r != 0)) {  			amdgpu_bo_unreserve(abo);  			return -EINVAL;  		}  	} +	fb_location = amdgpu_bo_gpu_offset(abo);  	amdgpu_bo_get_tiling_flags(abo, &tiling_flags);  	amdgpu_bo_unreserve(abo); @@ -2274,13 +2273,14 @@ static int dce_v8_0_crtc_cursor_set2(struct drm_crtc *crtc,  		return ret;  	} -	ret = amdgpu_bo_pin(aobj, AMDGPU_GEM_DOMAIN_VRAM, &amdgpu_crtc->cursor_addr); +	ret = amdgpu_bo_pin(aobj, AMDGPU_GEM_DOMAIN_VRAM);  	amdgpu_bo_unreserve(aobj);  	if (ret) {  		DRM_ERROR("Failed to pin new cursor BO (%d)\n", ret);  		drm_gem_object_put_unlocked(obj);  		return ret;  	} +	amdgpu_crtc->cursor_addr = amdgpu_bo_gpu_offset(aobj);  	dce_v8_0_lock_cursor(crtc, true); diff --git a/drivers/gpu/drm/amd/amdgpu/dce_virtual.c b/drivers/gpu/drm/amd/amdgpu/dce_virtual.c index dbf2ccd0c744..15257634a53a 100644 --- a/drivers/gpu/drm/amd/amdgpu/dce_virtual.c +++ b/drivers/gpu/drm/amd/amdgpu/dce_virtual.c @@ -36,6 +36,7 @@  #include "dce_v10_0.h"  #include "dce_v11_0.h"  #include "dce_virtual.h" +#include "ivsrcid/ivsrcid_vislands30.h"  #define DCE_VIRTUAL_VBLANK_PERIOD 16666666 @@ -269,25 +270,18 @@ static int dce_virtual_early_init(void *handle)  static struct drm_encoder *  dce_virtual_encoder(struct drm_connector *connector)  { -	int enc_id = connector->encoder_ids[0];  	struct drm_encoder *encoder;  	int i; -	for (i = 0; i < DRM_CONNECTOR_MAX_ENCODER; i++) { -		if (connector->encoder_ids[i] == 0) -			break; - -		encoder = drm_encoder_find(connector->dev, NULL, connector->encoder_ids[i]); -		if (!encoder) -			continue; - +	drm_connector_for_each_possible_encoder(connector, encoder, i) {  		if (encoder->encoder_type == DRM_MODE_ENCODER_VIRTUAL)  			return encoder;  	}  	/* pick the first one */ -	if (enc_id) -		return drm_encoder_find(connector->dev, NULL, enc_id); +	drm_connector_for_each_possible_encoder(connector, encoder, i) +		return encoder; +  	return NULL;  } @@ -378,7 +372,7 @@ static int dce_virtual_sw_init(void *handle)  	int r, i;  	struct amdgpu_device *adev = (struct amdgpu_device *)handle; -	r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 229, &adev->crtc_irq); +	r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, VISLANDS30_IV_SRCID_SMU_DISP_TIMER2_TRIGGER, &adev->crtc_irq);  	if (r)  		return r; @@ -634,7 +628,7 @@ static int dce_virtual_connector_encoder_init(struct amdgpu_device *adev,  	drm_connector_register(connector);  	/* link them */ -	drm_mode_connector_attach_encoder(connector, encoder); +	drm_connector_attach_encoder(connector, encoder);  	return 0;  } diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c index cd6bf291a853..de184a886057 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c @@ -44,30 +44,30 @@ static void gfx_v6_0_set_ring_funcs(struct amdgpu_device *adev);  static void gfx_v6_0_set_irq_funcs(struct amdgpu_device *adev);  static void gfx_v6_0_get_cu_info(struct amdgpu_device *adev); -MODULE_FIRMWARE("radeon/tahiti_pfp.bin"); -MODULE_FIRMWARE("radeon/tahiti_me.bin"); -MODULE_FIRMWARE("radeon/tahiti_ce.bin"); -MODULE_FIRMWARE("radeon/tahiti_rlc.bin"); - -MODULE_FIRMWARE("radeon/pitcairn_pfp.bin"); -MODULE_FIRMWARE("radeon/pitcairn_me.bin"); -MODULE_FIRMWARE("radeon/pitcairn_ce.bin"); -MODULE_FIRMWARE("radeon/pitcairn_rlc.bin"); - -MODULE_FIRMWARE("radeon/verde_pfp.bin"); -MODULE_FIRMWARE("radeon/verde_me.bin"); -MODULE_FIRMWARE("radeon/verde_ce.bin"); -MODULE_FIRMWARE("radeon/verde_rlc.bin"); - -MODULE_FIRMWARE("radeon/oland_pfp.bin"); -MODULE_FIRMWARE("radeon/oland_me.bin"); -MODULE_FIRMWARE("radeon/oland_ce.bin"); -MODULE_FIRMWARE("radeon/oland_rlc.bin"); - -MODULE_FIRMWARE("radeon/hainan_pfp.bin"); -MODULE_FIRMWARE("radeon/hainan_me.bin"); -MODULE_FIRMWARE("radeon/hainan_ce.bin"); -MODULE_FIRMWARE("radeon/hainan_rlc.bin"); +MODULE_FIRMWARE("amdgpu/tahiti_pfp.bin"); +MODULE_FIRMWARE("amdgpu/tahiti_me.bin"); +MODULE_FIRMWARE("amdgpu/tahiti_ce.bin"); +MODULE_FIRMWARE("amdgpu/tahiti_rlc.bin"); + +MODULE_FIRMWARE("amdgpu/pitcairn_pfp.bin"); +MODULE_FIRMWARE("amdgpu/pitcairn_me.bin"); +MODULE_FIRMWARE("amdgpu/pitcairn_ce.bin"); +MODULE_FIRMWARE("amdgpu/pitcairn_rlc.bin"); + +MODULE_FIRMWARE("amdgpu/verde_pfp.bin"); +MODULE_FIRMWARE("amdgpu/verde_me.bin"); +MODULE_FIRMWARE("amdgpu/verde_ce.bin"); +MODULE_FIRMWARE("amdgpu/verde_rlc.bin"); + +MODULE_FIRMWARE("amdgpu/oland_pfp.bin"); +MODULE_FIRMWARE("amdgpu/oland_me.bin"); +MODULE_FIRMWARE("amdgpu/oland_ce.bin"); +MODULE_FIRMWARE("amdgpu/oland_rlc.bin"); + +MODULE_FIRMWARE("amdgpu/hainan_pfp.bin"); +MODULE_FIRMWARE("amdgpu/hainan_me.bin"); +MODULE_FIRMWARE("amdgpu/hainan_ce.bin"); +MODULE_FIRMWARE("amdgpu/hainan_rlc.bin");  static u32 gfx_v6_0_get_csb_size(struct amdgpu_device *adev);  static void gfx_v6_0_get_csb_buffer(struct amdgpu_device *adev, volatile u32 *buffer); @@ -335,7 +335,7 @@ static int gfx_v6_0_init_microcode(struct amdgpu_device *adev)  	default: BUG();  	} -	snprintf(fw_name, sizeof(fw_name), "radeon/%s_pfp.bin", chip_name); +	snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_pfp.bin", chip_name);  	err = request_firmware(&adev->gfx.pfp_fw, fw_name, adev->dev);  	if (err)  		goto out; @@ -346,7 +346,7 @@ static int gfx_v6_0_init_microcode(struct amdgpu_device *adev)  	adev->gfx.pfp_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);  	adev->gfx.pfp_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version); -	snprintf(fw_name, sizeof(fw_name), "radeon/%s_me.bin", chip_name); +	snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_me.bin", chip_name);  	err = request_firmware(&adev->gfx.me_fw, fw_name, adev->dev);  	if (err)  		goto out; @@ -357,7 +357,7 @@ static int gfx_v6_0_init_microcode(struct amdgpu_device *adev)  	adev->gfx.me_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);  	adev->gfx.me_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version); -	snprintf(fw_name, sizeof(fw_name), "radeon/%s_ce.bin", chip_name); +	snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ce.bin", chip_name);  	err = request_firmware(&adev->gfx.ce_fw, fw_name, adev->dev);  	if (err)  		goto out; @@ -368,7 +368,7 @@ static int gfx_v6_0_init_microcode(struct amdgpu_device *adev)  	adev->gfx.ce_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);  	adev->gfx.ce_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version); -	snprintf(fw_name, sizeof(fw_name), "radeon/%s_rlc.bin", chip_name); +	snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc.bin", chip_name);  	err = request_firmware(&adev->gfx.rlc_fw, fw_name, adev->dev);  	if (err)  		goto out; diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c index 42b6144c1fd5..95452c5a9df6 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c @@ -57,36 +57,36 @@ static void gfx_v7_0_set_ring_funcs(struct amdgpu_device *adev);  static void gfx_v7_0_set_irq_funcs(struct amdgpu_device *adev);  static void gfx_v7_0_set_gds_init(struct amdgpu_device *adev); -MODULE_FIRMWARE("radeon/bonaire_pfp.bin"); -MODULE_FIRMWARE("radeon/bonaire_me.bin"); -MODULE_FIRMWARE("radeon/bonaire_ce.bin"); -MODULE_FIRMWARE("radeon/bonaire_rlc.bin"); -MODULE_FIRMWARE("radeon/bonaire_mec.bin"); - -MODULE_FIRMWARE("radeon/hawaii_pfp.bin"); -MODULE_FIRMWARE("radeon/hawaii_me.bin"); -MODULE_FIRMWARE("radeon/hawaii_ce.bin"); -MODULE_FIRMWARE("radeon/hawaii_rlc.bin"); -MODULE_FIRMWARE("radeon/hawaii_mec.bin"); - -MODULE_FIRMWARE("radeon/kaveri_pfp.bin"); -MODULE_FIRMWARE("radeon/kaveri_me.bin"); -MODULE_FIRMWARE("radeon/kaveri_ce.bin"); -MODULE_FIRMWARE("radeon/kaveri_rlc.bin"); -MODULE_FIRMWARE("radeon/kaveri_mec.bin"); -MODULE_FIRMWARE("radeon/kaveri_mec2.bin"); - -MODULE_FIRMWARE("radeon/kabini_pfp.bin"); -MODULE_FIRMWARE("radeon/kabini_me.bin"); -MODULE_FIRMWARE("radeon/kabini_ce.bin"); -MODULE_FIRMWARE("radeon/kabini_rlc.bin"); -MODULE_FIRMWARE("radeon/kabini_mec.bin"); - -MODULE_FIRMWARE("radeon/mullins_pfp.bin"); -MODULE_FIRMWARE("radeon/mullins_me.bin"); -MODULE_FIRMWARE("radeon/mullins_ce.bin"); -MODULE_FIRMWARE("radeon/mullins_rlc.bin"); -MODULE_FIRMWARE("radeon/mullins_mec.bin"); +MODULE_FIRMWARE("amdgpu/bonaire_pfp.bin"); +MODULE_FIRMWARE("amdgpu/bonaire_me.bin"); +MODULE_FIRMWARE("amdgpu/bonaire_ce.bin"); +MODULE_FIRMWARE("amdgpu/bonaire_rlc.bin"); +MODULE_FIRMWARE("amdgpu/bonaire_mec.bin"); + +MODULE_FIRMWARE("amdgpu/hawaii_pfp.bin"); +MODULE_FIRMWARE("amdgpu/hawaii_me.bin"); +MODULE_FIRMWARE("amdgpu/hawaii_ce.bin"); +MODULE_FIRMWARE("amdgpu/hawaii_rlc.bin"); +MODULE_FIRMWARE("amdgpu/hawaii_mec.bin"); + +MODULE_FIRMWARE("amdgpu/kaveri_pfp.bin"); +MODULE_FIRMWARE("amdgpu/kaveri_me.bin"); +MODULE_FIRMWARE("amdgpu/kaveri_ce.bin"); +MODULE_FIRMWARE("amdgpu/kaveri_rlc.bin"); +MODULE_FIRMWARE("amdgpu/kaveri_mec.bin"); +MODULE_FIRMWARE("amdgpu/kaveri_mec2.bin"); + +MODULE_FIRMWARE("amdgpu/kabini_pfp.bin"); +MODULE_FIRMWARE("amdgpu/kabini_me.bin"); +MODULE_FIRMWARE("amdgpu/kabini_ce.bin"); +MODULE_FIRMWARE("amdgpu/kabini_rlc.bin"); +MODULE_FIRMWARE("amdgpu/kabini_mec.bin"); + +MODULE_FIRMWARE("amdgpu/mullins_pfp.bin"); +MODULE_FIRMWARE("amdgpu/mullins_me.bin"); +MODULE_FIRMWARE("amdgpu/mullins_ce.bin"); +MODULE_FIRMWARE("amdgpu/mullins_rlc.bin"); +MODULE_FIRMWARE("amdgpu/mullins_mec.bin");  static const struct amdgpu_gds_reg_offset amdgpu_gds_reg_offset[] =  { @@ -925,7 +925,7 @@ static int gfx_v7_0_init_microcode(struct amdgpu_device *adev)  	default: BUG();  	} -	snprintf(fw_name, sizeof(fw_name), "radeon/%s_pfp.bin", chip_name); +	snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_pfp.bin", chip_name);  	err = request_firmware(&adev->gfx.pfp_fw, fw_name, adev->dev);  	if (err)  		goto out; @@ -933,7 +933,7 @@ static int gfx_v7_0_init_microcode(struct amdgpu_device *adev)  	if (err)  		goto out; -	snprintf(fw_name, sizeof(fw_name), "radeon/%s_me.bin", chip_name); +	snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_me.bin", chip_name);  	err = request_firmware(&adev->gfx.me_fw, fw_name, adev->dev);  	if (err)  		goto out; @@ -941,7 +941,7 @@ static int gfx_v7_0_init_microcode(struct amdgpu_device *adev)  	if (err)  		goto out; -	snprintf(fw_name, sizeof(fw_name), "radeon/%s_ce.bin", chip_name); +	snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ce.bin", chip_name);  	err = request_firmware(&adev->gfx.ce_fw, fw_name, adev->dev);  	if (err)  		goto out; @@ -949,7 +949,7 @@ static int gfx_v7_0_init_microcode(struct amdgpu_device *adev)  	if (err)  		goto out; -	snprintf(fw_name, sizeof(fw_name), "radeon/%s_mec.bin", chip_name); +	snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec.bin", chip_name);  	err = request_firmware(&adev->gfx.mec_fw, fw_name, adev->dev);  	if (err)  		goto out; @@ -958,7 +958,7 @@ static int gfx_v7_0_init_microcode(struct amdgpu_device *adev)  		goto out;  	if (adev->asic_type == CHIP_KAVERI) { -		snprintf(fw_name, sizeof(fw_name), "radeon/%s_mec2.bin", chip_name); +		snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec2.bin", chip_name);  		err = request_firmware(&adev->gfx.mec2_fw, fw_name, adev->dev);  		if (err)  			goto out; @@ -967,7 +967,7 @@ static int gfx_v7_0_init_microcode(struct amdgpu_device *adev)  			goto out;  	} -	snprintf(fw_name, sizeof(fw_name), "radeon/%s_rlc.bin", chip_name); +	snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc.bin", chip_name);  	err = request_firmware(&adev->gfx.rlc_fw, fw_name, adev->dev);  	if (err)  		goto out; diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c index 818874b13c99..5cd45210113f 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c @@ -51,6 +51,8 @@  #include "smu/smu_7_1_3_d.h" +#include "ivsrcid/ivsrcid_vislands30.h" +  #define GFX8_NUM_GFX_RINGS     1  #define GFX8_MEC_HPD_SIZE 2048 @@ -704,6 +706,17 @@ static const u32 stoney_mgcg_cgcg_init[] =  	mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96940200,  }; + +static const char * const sq_edc_source_names[] = { +	"SQ_EDC_INFO_SOURCE_INVALID: No EDC error has occurred", +	"SQ_EDC_INFO_SOURCE_INST: EDC source is Instruction Fetch", +	"SQ_EDC_INFO_SOURCE_SGPR: EDC source is SGPR or SQC data return", +	"SQ_EDC_INFO_SOURCE_VGPR: EDC source is VGPR", +	"SQ_EDC_INFO_SOURCE_LDS: EDC source is LDS", +	"SQ_EDC_INFO_SOURCE_GDS: EDC source is GDS", +	"SQ_EDC_INFO_SOURCE_TA: EDC source is TA", +}; +  static void gfx_v8_0_set_ring_funcs(struct amdgpu_device *adev);  static void gfx_v8_0_set_irq_funcs(struct amdgpu_device *adev);  static void gfx_v8_0_set_gds_init(struct amdgpu_device *adev); @@ -866,26 +879,32 @@ static int gfx_v8_0_ring_test_ib(struct amdgpu_ring *ring, long timeout)  	struct amdgpu_device *adev = ring->adev;  	struct amdgpu_ib ib;  	struct dma_fence *f = NULL; -	uint32_t scratch; -	uint32_t tmp = 0; + +	unsigned int index; +	uint64_t gpu_addr; +	uint32_t tmp;  	long r; -	r = amdgpu_gfx_scratch_get(adev, &scratch); +	r = amdgpu_device_wb_get(adev, &index);  	if (r) { -		DRM_ERROR("amdgpu: failed to get scratch reg (%ld).\n", r); +		dev_err(adev->dev, "(%ld) failed to allocate wb slot\n", r);  		return r;  	} -	WREG32(scratch, 0xCAFEDEAD); + +	gpu_addr = adev->wb.gpu_addr + (index * 4); +	adev->wb.wb[index] = cpu_to_le32(0xCAFEDEAD);  	memset(&ib, 0, sizeof(ib)); -	r = amdgpu_ib_get(adev, NULL, 256, &ib); +	r = amdgpu_ib_get(adev, NULL, 16, &ib);  	if (r) {  		DRM_ERROR("amdgpu: failed to get ib (%ld).\n", r);  		goto err1;  	} -	ib.ptr[0] = PACKET3(PACKET3_SET_UCONFIG_REG, 1); -	ib.ptr[1] = ((scratch - PACKET3_SET_UCONFIG_REG_START)); -	ib.ptr[2] = 0xDEADBEEF; -	ib.length_dw = 3; +	ib.ptr[0] = PACKET3(PACKET3_WRITE_DATA, 3); +	ib.ptr[1] = WRITE_DATA_DST_SEL(5) | WR_CONFIRM; +	ib.ptr[2] = lower_32_bits(gpu_addr); +	ib.ptr[3] = upper_32_bits(gpu_addr); +	ib.ptr[4] = 0xDEADBEEF; +	ib.length_dw = 5;  	r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f);  	if (r) @@ -900,20 +919,21 @@ static int gfx_v8_0_ring_test_ib(struct amdgpu_ring *ring, long timeout)  		DRM_ERROR("amdgpu: fence wait failed (%ld).\n", r);  		goto err2;  	} -	tmp = RREG32(scratch); + +	tmp = adev->wb.wb[index];  	if (tmp == 0xDEADBEEF) {  		DRM_DEBUG("ib test on ring %d succeeded\n", ring->idx);  		r = 0;  	} else { -		DRM_ERROR("amdgpu: ib test failed (scratch(0x%04X)=0x%08X)\n", -			  scratch, tmp); +		DRM_ERROR("ib test on ring %d failed\n", ring->idx);  		r = -EINVAL;  	} +  err2:  	amdgpu_ib_free(adev, &ib, NULL);  	dma_fence_put(f);  err1: -	amdgpu_gfx_scratch_free(adev, scratch); +	amdgpu_device_wb_free(adev, index);  	return r;  } @@ -1999,6 +2019,8 @@ static int gfx_v8_0_compute_ring_init(struct amdgpu_device *adev, int ring_id,  	return 0;  } +static void gfx_v8_0_sq_irq_work_func(struct work_struct *work); +  static int gfx_v8_0_sw_init(void *handle)  {  	int i, j, k, r, ring_id; @@ -2027,27 +2049,43 @@ static int gfx_v8_0_sw_init(void *handle)  	adev->gfx.mec.num_queue_per_pipe = 8;  	/* KIQ event */ -	r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 178, &adev->gfx.kiq.irq); +	r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, VISLANDS30_IV_SRCID_CP_INT_IB2, &adev->gfx.kiq.irq);  	if (r)  		return r;  	/* EOP Event */ -	r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 181, &adev->gfx.eop_irq); +	r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, VISLANDS30_IV_SRCID_CP_END_OF_PIPE, &adev->gfx.eop_irq);  	if (r)  		return r;  	/* Privileged reg */ -	r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 184, +	r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, VISLANDS30_IV_SRCID_CP_PRIV_REG_FAULT,  			      &adev->gfx.priv_reg_irq);  	if (r)  		return r;  	/* Privileged inst */ -	r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 185, +	r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, VISLANDS30_IV_SRCID_CP_PRIV_INSTR_FAULT,  			      &adev->gfx.priv_inst_irq);  	if (r)  		return r; +	/* Add CP EDC/ECC irq  */ +	r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, VISLANDS30_IV_SRCID_CP_ECC_ERROR, +			      &adev->gfx.cp_ecc_error_irq); +	if (r) +		return r; + +	/* SQ interrupts. */ +	r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, VISLANDS30_IV_SRCID_SQ_INTERRUPT_MSG, +			      &adev->gfx.sq_irq); +	if (r) { +		DRM_ERROR("amdgpu_irq_add() for SQ failed: %d\n", r); +		return r; +	} + +	INIT_WORK(&adev->gfx.sq_work.work, gfx_v8_0_sq_irq_work_func); +  	adev->gfx.gfx_current_status = AMDGPU_GFX_NORMAL_MODE;  	gfx_v8_0_scratch_init(adev); @@ -5111,6 +5149,10 @@ static int gfx_v8_0_hw_fini(void *handle)  	amdgpu_irq_put(adev, &adev->gfx.priv_reg_irq, 0);  	amdgpu_irq_put(adev, &adev->gfx.priv_inst_irq, 0); +	amdgpu_irq_put(adev, &adev->gfx.cp_ecc_error_irq, 0); + +	amdgpu_irq_put(adev, &adev->gfx.sq_irq, 0); +  	/* disable KCQ to avoid CPC touch memory not valid anymore */  	for (i = 0; i < adev->gfx.num_compute_rings; i++)  		gfx_v8_0_kcq_disable(&adev->gfx.kiq.ring, &adev->gfx.compute_ring[i]); @@ -5542,9 +5584,19 @@ static int gfx_v8_0_late_init(void *handle)  	if (r)  		return r; -	amdgpu_device_ip_set_powergating_state(adev, -					       AMD_IP_BLOCK_TYPE_GFX, -					       AMD_PG_STATE_GATE); +	r = amdgpu_irq_get(adev, &adev->gfx.cp_ecc_error_irq, 0); +	if (r) { +		DRM_ERROR("amdgpu_irq_get() failed to get IRQ for EDC, r: %d.\n", r); +		return r; +	} + +	r = amdgpu_irq_get(adev, &adev->gfx.sq_irq, 0); +	if (r) { +		DRM_ERROR( +			"amdgpu_irq_get() failed to get IRQ for SQ, r: %d.\n", +			r); +		return r; +	}  	return 0;  } @@ -5552,14 +5604,12 @@ static int gfx_v8_0_late_init(void *handle)  static void gfx_v8_0_enable_gfx_static_mg_power_gating(struct amdgpu_device *adev,  						       bool enable)  { -	if ((adev->asic_type == CHIP_POLARIS11) || +	if (((adev->asic_type == CHIP_POLARIS11) ||  	    (adev->asic_type == CHIP_POLARIS12) || -	    (adev->asic_type == CHIP_VEGAM)) +	    (adev->asic_type == CHIP_VEGAM)) && +	    adev->powerplay.pp_funcs->set_powergating_by_smu)  		/* Send msg to SMU via Powerplay */ -		amdgpu_device_ip_set_powergating_state(adev, -						       AMD_IP_BLOCK_TYPE_SMC, -						       enable ? -						       AMD_PG_STATE_GATE : AMD_PG_STATE_UNGATE); +		amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_GFX, enable);  	WREG32_FIELD(RLC_PG_CNTL, STATIC_PER_CU_PG_ENABLE, enable ? 1 : 0);  } @@ -6787,6 +6837,77 @@ static int gfx_v8_0_set_eop_interrupt_state(struct amdgpu_device *adev,  	return 0;  } +static int gfx_v8_0_set_cp_ecc_int_state(struct amdgpu_device *adev, +					 struct amdgpu_irq_src *source, +					 unsigned int type, +					 enum amdgpu_interrupt_state state) +{ +	int enable_flag; + +	switch (state) { +	case AMDGPU_IRQ_STATE_DISABLE: +		enable_flag = 0; +		break; + +	case AMDGPU_IRQ_STATE_ENABLE: +		enable_flag = 1; +		break; + +	default: +		return -EINVAL; +	} + +	WREG32_FIELD(CP_INT_CNTL, CP_ECC_ERROR_INT_ENABLE, enable_flag); +	WREG32_FIELD(CP_INT_CNTL_RING0, CP_ECC_ERROR_INT_ENABLE, enable_flag); +	WREG32_FIELD(CP_INT_CNTL_RING1, CP_ECC_ERROR_INT_ENABLE, enable_flag); +	WREG32_FIELD(CP_INT_CNTL_RING2, CP_ECC_ERROR_INT_ENABLE, enable_flag); +	WREG32_FIELD(CPC_INT_CNTL, CP_ECC_ERROR_INT_ENABLE, enable_flag); +	WREG32_FIELD(CP_ME1_PIPE0_INT_CNTL, CP_ECC_ERROR_INT_ENABLE, +		     enable_flag); +	WREG32_FIELD(CP_ME1_PIPE1_INT_CNTL, CP_ECC_ERROR_INT_ENABLE, +		     enable_flag); +	WREG32_FIELD(CP_ME1_PIPE2_INT_CNTL, CP_ECC_ERROR_INT_ENABLE, +		     enable_flag); +	WREG32_FIELD(CP_ME1_PIPE3_INT_CNTL, CP_ECC_ERROR_INT_ENABLE, +		     enable_flag); +	WREG32_FIELD(CP_ME2_PIPE0_INT_CNTL, CP_ECC_ERROR_INT_ENABLE, +		     enable_flag); +	WREG32_FIELD(CP_ME2_PIPE1_INT_CNTL, CP_ECC_ERROR_INT_ENABLE, +		     enable_flag); +	WREG32_FIELD(CP_ME2_PIPE2_INT_CNTL, CP_ECC_ERROR_INT_ENABLE, +		     enable_flag); +	WREG32_FIELD(CP_ME2_PIPE3_INT_CNTL, CP_ECC_ERROR_INT_ENABLE, +		     enable_flag); + +	return 0; +} + +static int gfx_v8_0_set_sq_int_state(struct amdgpu_device *adev, +				     struct amdgpu_irq_src *source, +				     unsigned int type, +				     enum amdgpu_interrupt_state state) +{ +	int enable_flag; + +	switch (state) { +	case AMDGPU_IRQ_STATE_DISABLE: +		enable_flag = 1; +		break; + +	case AMDGPU_IRQ_STATE_ENABLE: +		enable_flag = 0; +		break; + +	default: +		return -EINVAL; +	} + +	WREG32_FIELD(SQ_INTERRUPT_MSG_CTRL, STALL, +		     enable_flag); + +	return 0; +} +  static int gfx_v8_0_eop_irq(struct amdgpu_device *adev,  			    struct amdgpu_irq_src *source,  			    struct amdgpu_iv_entry *entry) @@ -6837,6 +6958,114 @@ static int gfx_v8_0_priv_inst_irq(struct amdgpu_device *adev,  	return 0;  } +static int gfx_v8_0_cp_ecc_error_irq(struct amdgpu_device *adev, +				     struct amdgpu_irq_src *source, +				     struct amdgpu_iv_entry *entry) +{ +	DRM_ERROR("CP EDC/ECC error detected."); +	return 0; +} + +static void gfx_v8_0_parse_sq_irq(struct amdgpu_device *adev, unsigned ih_data) +{ +	u32 enc, se_id, sh_id, cu_id; +	char type[20]; +	int sq_edc_source = -1; + +	enc = REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_CMN, ENCODING); +	se_id = REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_CMN, SE_ID); + +	switch (enc) { +		case 0: +			DRM_INFO("SQ general purpose intr detected:" +					"se_id %d, immed_overflow %d, host_reg_overflow %d," +					"host_cmd_overflow %d, cmd_timestamp %d," +					"reg_timestamp %d, thread_trace_buff_full %d," +					"wlt %d, thread_trace %d.\n", +					se_id, +					REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_AUTO, IMMED_OVERFLOW), +					REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_AUTO, HOST_REG_OVERFLOW), +					REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_AUTO, HOST_CMD_OVERFLOW), +					REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_AUTO, CMD_TIMESTAMP), +					REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_AUTO, REG_TIMESTAMP), +					REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_AUTO, THREAD_TRACE_BUF_FULL), +					REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_AUTO, WLT), +					REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_AUTO, THREAD_TRACE) +					); +			break; +		case 1: +		case 2: + +			cu_id = REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_WAVE, CU_ID); +			sh_id = REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_WAVE, SH_ID); + +			/* +			 * This function can be called either directly from ISR +			 * or from BH in which case we can access SQ_EDC_INFO +			 * instance +			 */ +			if (in_task()) { +				mutex_lock(&adev->grbm_idx_mutex); +				gfx_v8_0_select_se_sh(adev, se_id, sh_id, cu_id); + +				sq_edc_source = REG_GET_FIELD(RREG32(mmSQ_EDC_INFO), SQ_EDC_INFO, SOURCE); + +				gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); +				mutex_unlock(&adev->grbm_idx_mutex); +			} + +			if (enc == 1) +				sprintf(type, "instruction intr"); +			else +				sprintf(type, "EDC/ECC error"); + +			DRM_INFO( +				"SQ %s detected: " +					"se_id %d, sh_id %d, cu_id %d, simd_id %d, wave_id %d, vm_id %d " +					"trap %s, sq_ed_info.source %s.\n", +					type, se_id, sh_id, cu_id, +					REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_WAVE, SIMD_ID), +					REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_WAVE, WAVE_ID), +					REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_WAVE, VM_ID), +					REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_WAVE, PRIV) ? "true" : "false", +					(sq_edc_source != -1) ? sq_edc_source_names[sq_edc_source] : "unavailable" +				); +			break; +		default: +			DRM_ERROR("SQ invalid encoding type\n."); +	} +} + +static void gfx_v8_0_sq_irq_work_func(struct work_struct *work) +{ + +	struct amdgpu_device *adev = container_of(work, struct amdgpu_device, gfx.sq_work.work); +	struct sq_work *sq_work = container_of(work, struct sq_work, work); + +	gfx_v8_0_parse_sq_irq(adev, sq_work->ih_data); +} + +static int gfx_v8_0_sq_irq(struct amdgpu_device *adev, +			   struct amdgpu_irq_src *source, +			   struct amdgpu_iv_entry *entry) +{ +	unsigned ih_data = entry->src_data[0]; + +	/* +	 * Try to submit work so SQ_EDC_INFO can be accessed from +	 * BH. If previous work submission hasn't finished yet +	 * just print whatever info is possible directly from the ISR. +	 */ +	if (work_pending(&adev->gfx.sq_work.work)) { +		gfx_v8_0_parse_sq_irq(adev, ih_data); +	} else { +		adev->gfx.sq_work.ih_data = ih_data; +		schedule_work(&adev->gfx.sq_work.work); +	} + +	return 0; +} +  static int gfx_v8_0_kiq_set_interrupt_state(struct amdgpu_device *adev,  					    struct amdgpu_irq_src *src,  					    unsigned int type, @@ -7037,6 +7266,16 @@ static const struct amdgpu_irq_src_funcs gfx_v8_0_kiq_irq_funcs = {  	.process = gfx_v8_0_kiq_irq,  }; +static const struct amdgpu_irq_src_funcs gfx_v8_0_cp_ecc_error_irq_funcs = { +	.set = gfx_v8_0_set_cp_ecc_int_state, +	.process = gfx_v8_0_cp_ecc_error_irq, +}; + +static const struct amdgpu_irq_src_funcs gfx_v8_0_sq_irq_funcs = { +	.set = gfx_v8_0_set_sq_int_state, +	.process = gfx_v8_0_sq_irq, +}; +  static void gfx_v8_0_set_irq_funcs(struct amdgpu_device *adev)  {  	adev->gfx.eop_irq.num_types = AMDGPU_CP_IRQ_LAST; @@ -7050,6 +7289,12 @@ static void gfx_v8_0_set_irq_funcs(struct amdgpu_device *adev)  	adev->gfx.kiq.irq.num_types = AMDGPU_CP_KIQ_IRQ_LAST;  	adev->gfx.kiq.irq.funcs = &gfx_v8_0_kiq_irq_funcs; + +	adev->gfx.cp_ecc_error_irq.num_types = 1; +	adev->gfx.cp_ecc_error_irq.funcs = &gfx_v8_0_cp_ecc_error_irq_funcs; + +	adev->gfx.sq_irq.num_types = 1; +	adev->gfx.sq_irq.funcs = &gfx_v8_0_sq_irq_funcs;  }  static void gfx_v8_0_set_rlc_funcs(struct amdgpu_device *adev) diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c index a69153435ea7..ef00d14f8645 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c @@ -38,6 +38,8 @@  #include "clearstate_gfx9.h"  #include "v9_structs.h" +#include "ivsrcid/gfx/irqsrcs_gfx_9_0.h" +  #define GFX9_NUM_GFX_RINGS     1  #define GFX9_MEC_HPD_SIZE 2048  #define RLCG_UCODE_LOADING_START_ADDRESS 0x00002000L @@ -102,11 +104,22 @@ static const struct soc15_reg_golden golden_settings_gc_9_0_vg10[] =  {  	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0x0000f000, 0x00012107),  	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_3, 0x30000000, 0x10000000), +	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPC_UTCL1_CNTL, 0x08000000, 0x08000080), +	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPF_UTCL1_CNTL, 0x08000000, 0x08000080), +	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPG_UTCL1_CNTL, 0x08000000, 0x08000080),  	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x2a114042),  	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xffff77ff, 0x2a114042), +	SOC15_REG_GOLDEN_VALUE(GC, 0, mmIA_UTCL1_CNTL, 0x08000000, 0x08000080),  	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0x00008000, 0x00048000), +	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_0, 0x08000000, 0x08000080), +	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_1, 0x08000000, 0x08000080), +	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_2, 0x08000000, 0x08000080), +	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_PREWALKER_UTCL1_CNTL, 0x08000000, 0x08000080), +	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_UTCL1_CNTL, 0x08000000, 0x08000080),  	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRMI_UTCL1_CNTL2, 0x00030000, 0x00020000), -	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x00001800, 0x00000800) +	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_CONFIG_CNTL_1, 0x0000000f, 0x01000107), +	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x00001800, 0x00000800), +	SOC15_REG_GOLDEN_VALUE(GC, 0, mmWD_UTCL1_CNTL, 0x08000000, 0x08000080)  };  static const struct soc15_reg_golden golden_settings_gc_9_0_vg20[] = @@ -648,7 +661,10 @@ static int gfx_v9_0_init_microcode(struct amdgpu_device *adev)  		adev->firmware.fw_size +=  			ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE); -		if (adev->gfx.rlc.is_rlc_v2_1) { +		if (adev->gfx.rlc.is_rlc_v2_1 && +		    adev->gfx.rlc.save_restore_list_cntl_size_bytes && +		    adev->gfx.rlc.save_restore_list_gpm_size_bytes && +		    adev->gfx.rlc.save_restore_list_srm_size_bytes) {  			info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_RESTORE_LIST_CNTL];  			info->ucode_id = AMDGPU_UCODE_ID_RLC_RESTORE_LIST_CNTL;  			info->fw = adev->gfx.rlc_fw; @@ -943,6 +959,7 @@ static int gfx_v9_0_rlc_init(struct amdgpu_device *adev)  		dst_ptr = adev->gfx.rlc.cs_ptr;  		gfx_v9_0_get_csb_buffer(adev, dst_ptr);  		amdgpu_bo_kunmap(adev->gfx.rlc.clear_state_obj); +		amdgpu_bo_unpin(adev->gfx.rlc.clear_state_obj);  		amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj);  	} @@ -971,6 +988,39 @@ static int gfx_v9_0_rlc_init(struct amdgpu_device *adev)  	return 0;  } +static int gfx_v9_0_csb_vram_pin(struct amdgpu_device *adev) +{ +	int r; + +	r = amdgpu_bo_reserve(adev->gfx.rlc.clear_state_obj, false); +	if (unlikely(r != 0)) +		return r; + +	r = amdgpu_bo_pin(adev->gfx.rlc.clear_state_obj, +			AMDGPU_GEM_DOMAIN_VRAM); +	if (!r) +		adev->gfx.rlc.clear_state_gpu_addr = +			amdgpu_bo_gpu_offset(adev->gfx.rlc.clear_state_obj); + +	amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj); + +	return r; +} + +static void gfx_v9_0_csb_vram_unpin(struct amdgpu_device *adev) +{ +	int r; + +	if (!adev->gfx.rlc.clear_state_obj) +		return; + +	r = amdgpu_bo_reserve(adev->gfx.rlc.clear_state_obj, true); +	if (likely(r == 0)) { +		amdgpu_bo_unpin(adev->gfx.rlc.clear_state_obj); +		amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj); +	} +} +  static void gfx_v9_0_mec_fini(struct amdgpu_device *adev)  {  	amdgpu_bo_free_kernel(&adev->gfx.mec.hpd_eop_obj, NULL, NULL); @@ -1451,23 +1501,23 @@ static int gfx_v9_0_sw_init(void *handle)  	adev->gfx.mec.num_queue_per_pipe = 8;  	/* KIQ event */ -	r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, 178, &adev->gfx.kiq.irq); +	r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_IB2_INTERRUPT_PKT, &adev->gfx.kiq.irq);  	if (r)  		return r;  	/* EOP Event */ -	r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, 181, &adev->gfx.eop_irq); +	r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_EOP_INTERRUPT, &adev->gfx.eop_irq);  	if (r)  		return r;  	/* Privileged reg */ -	r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, 184, +	r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_PRIV_REG_FAULT,  			      &adev->gfx.priv_reg_irq);  	if (r)  		return r;  	/* Privileged inst */ -	r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, 185, +	r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_PRIV_INSTR_FAULT,  			      &adev->gfx.priv_inst_irq);  	if (r)  		return r; @@ -2148,8 +2198,16 @@ static void gfx_v9_0_enable_gfx_dynamic_mg_power_gating(struct amdgpu_device *ad  static void gfx_v9_0_init_pg(struct amdgpu_device *adev)  { -	if (!adev->gfx.rlc.is_rlc_v2_1) -		return; +	gfx_v9_0_init_csb(adev); + +	/* +	 * Rlc save restore list is workable since v2_1. +	 * And it's needed by gfxoff feature. +	 */ +	if (adev->gfx.rlc.is_rlc_v2_1) { +		gfx_v9_1_init_rlc_save_restore_list(adev); +		gfx_v9_0_enable_save_restore_machine(adev); +	}  	if (adev->pg_flags & (AMD_PG_SUPPORT_GFX_PG |  			      AMD_PG_SUPPORT_GFX_SMG | @@ -2157,10 +2215,6 @@ static void gfx_v9_0_init_pg(struct amdgpu_device *adev)  			      AMD_PG_SUPPORT_CP |  			      AMD_PG_SUPPORT_GDS |  			      AMD_PG_SUPPORT_RLC_SMU_HS)) { -		gfx_v9_0_init_csb(adev); -		gfx_v9_1_init_rlc_save_restore_list(adev); -		gfx_v9_0_enable_save_restore_machine(adev); -  		WREG32(mmRLC_JUMP_TABLE_RESTORE,  		       adev->gfx.rlc.cp_table_gpu_addr >> 8);  		gfx_v9_0_init_gfx_power_gating(adev); @@ -2252,9 +2306,6 @@ static int gfx_v9_0_rlc_resume(struct amdgpu_device *adev)  	/* disable CG */  	WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL, 0); -	/* disable PG */ -	WREG32_SOC15(GC, 0, mmRLC_PG_CNTL, 0); -  	gfx_v9_0_rlc_reset(adev);  	gfx_v9_0_init_pg(adev); @@ -3116,6 +3167,10 @@ static int gfx_v9_0_hw_init(void *handle)  	gfx_v9_0_gpu_init(adev); +	r = gfx_v9_0_csb_vram_pin(adev); +	if (r) +		return r; +  	r = gfx_v9_0_rlc_resume(adev);  	if (r)  		return r; @@ -3224,6 +3279,8 @@ static int gfx_v9_0_hw_fini(void *handle)  	gfx_v9_0_cp_enable(adev, false);  	gfx_v9_0_rlc_stop(adev); +	gfx_v9_0_csb_vram_unpin(adev); +  	return 0;  } @@ -3433,7 +3490,7 @@ static void gfx_v9_0_enter_rlc_safe_mode(struct amdgpu_device *adev)  		/* wait for RLC_SAFE_MODE */  		for (i = 0; i < adev->usec_timeout; i++) { -			if (!REG_GET_FIELD(SOC15_REG_OFFSET(GC, 0, mmRLC_SAFE_MODE), RLC_SAFE_MODE, CMD)) +			if (!REG_GET_FIELD(RREG32_SOC15(GC, 0, mmRLC_SAFE_MODE), RLC_SAFE_MODE, CMD))  				break;  			udelay(1);  		} @@ -3510,8 +3567,11 @@ static void gfx_v9_0_update_medium_grain_clock_gating(struct amdgpu_device *adev  	if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG)) {  		/* 1 - RLC_CGTT_MGCG_OVERRIDE */  		def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE); -		data &= ~(RLC_CGTT_MGCG_OVERRIDE__CPF_CGTT_SCLK_OVERRIDE_MASK | -			  RLC_CGTT_MGCG_OVERRIDE__GRBM_CGTT_SCLK_OVERRIDE_MASK | + +		if (adev->asic_type != CHIP_VEGA12) +			data &= ~RLC_CGTT_MGCG_OVERRIDE__CPF_CGTT_SCLK_OVERRIDE_MASK; + +		data &= ~(RLC_CGTT_MGCG_OVERRIDE__GRBM_CGTT_SCLK_OVERRIDE_MASK |  			  RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK |  			  RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGLS_OVERRIDE_MASK); @@ -3541,11 +3601,15 @@ static void gfx_v9_0_update_medium_grain_clock_gating(struct amdgpu_device *adev  	} else {  		/* 1 - MGCG_OVERRIDE */  		def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE); -		data |= (RLC_CGTT_MGCG_OVERRIDE__CPF_CGTT_SCLK_OVERRIDE_MASK | -			 RLC_CGTT_MGCG_OVERRIDE__RLC_CGTT_SCLK_OVERRIDE_MASK | + +		if (adev->asic_type != CHIP_VEGA12) +			data |= RLC_CGTT_MGCG_OVERRIDE__CPF_CGTT_SCLK_OVERRIDE_MASK; + +		data |= (RLC_CGTT_MGCG_OVERRIDE__RLC_CGTT_SCLK_OVERRIDE_MASK |  			 RLC_CGTT_MGCG_OVERRIDE__GRBM_CGTT_SCLK_OVERRIDE_MASK |  			 RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK |  			 RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGLS_OVERRIDE_MASK); +  		if (def != data)  			WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data); @@ -3581,9 +3645,11 @@ static void gfx_v9_0_update_3d_clock_gating(struct amdgpu_device *adev,  		/* update CGCG and CGLS override bits */  		if (def != data)  			WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data); -		/* enable 3Dcgcg FSM(0x0020003f) */ + +		/* enable 3Dcgcg FSM(0x0000363f) */  		def = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D); -		data = (0x2000 << RLC_CGCG_CGLS_CTRL_3D__CGCG_GFX_IDLE_THRESHOLD__SHIFT) | + +		data = (0x36 << RLC_CGCG_CGLS_CTRL_3D__CGCG_GFX_IDLE_THRESHOLD__SHIFT) |  			RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK;  		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGLS)  			data |= (0x000F << RLC_CGCG_CGLS_CTRL_3D__CGLS_REP_COMPANSAT_DELAY__SHIFT) | @@ -3630,9 +3696,10 @@ static void gfx_v9_0_update_coarse_grain_clock_gating(struct amdgpu_device *adev  		if (def != data)  			WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data); -		/* enable cgcg FSM(0x0020003F) */ +		/* enable cgcg FSM(0x0000363F) */  		def = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL); -		data = (0x2000 << RLC_CGCG_CGLS_CTRL__CGCG_GFX_IDLE_THRESHOLD__SHIFT) | + +		data = (0x36 << RLC_CGCG_CGLS_CTRL__CGCG_GFX_IDLE_THRESHOLD__SHIFT) |  			RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK;  		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS)  			data |= (0x000F << RLC_CGCG_CGLS_CTRL__CGLS_REP_COMPANSAT_DELAY__SHIFT) | @@ -3714,6 +3781,15 @@ static int gfx_v9_0_set_powergating_state(void *handle,  		/* update mgcg state */  		gfx_v9_0_update_gfx_mg_power_gating(adev, enable); + +		/* set gfx off through smu */ +		if (enable && adev->powerplay.pp_funcs->set_powergating_by_smu) +			amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_GFX, true); +		break; +	case CHIP_VEGA12: +		/* set gfx off through smu */ +		if (enable && adev->powerplay.pp_funcs->set_powergating_by_smu) +			amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_GFX, true);  		break;  	default:  		break; diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c index 79f9ac29019b..75317f283c69 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c @@ -41,11 +41,11 @@ static void gmc_v6_0_set_gmc_funcs(struct amdgpu_device *adev);  static void gmc_v6_0_set_irq_funcs(struct amdgpu_device *adev);  static int gmc_v6_0_wait_for_idle(void *handle); -MODULE_FIRMWARE("radeon/tahiti_mc.bin"); -MODULE_FIRMWARE("radeon/pitcairn_mc.bin"); -MODULE_FIRMWARE("radeon/verde_mc.bin"); -MODULE_FIRMWARE("radeon/oland_mc.bin"); -MODULE_FIRMWARE("radeon/si58_mc.bin"); +MODULE_FIRMWARE("amdgpu/tahiti_mc.bin"); +MODULE_FIRMWARE("amdgpu/pitcairn_mc.bin"); +MODULE_FIRMWARE("amdgpu/verde_mc.bin"); +MODULE_FIRMWARE("amdgpu/oland_mc.bin"); +MODULE_FIRMWARE("amdgpu/si58_mc.bin");  #define MC_SEQ_MISC0__MT__MASK   0xf0000000  #define MC_SEQ_MISC0__MT__GDDR1  0x10000000 @@ -134,9 +134,9 @@ static int gmc_v6_0_init_microcode(struct amdgpu_device *adev)  		is_58_fw = true;  	if (is_58_fw) -		snprintf(fw_name, sizeof(fw_name), "radeon/si58_mc.bin"); +		snprintf(fw_name, sizeof(fw_name), "amdgpu/si58_mc.bin");  	else -		snprintf(fw_name, sizeof(fw_name), "radeon/%s_mc.bin", chip_name); +		snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mc.bin", chip_name);  	err = request_firmware(&adev->gmc.fw, fw_name, adev->dev);  	if (err)  		goto out; diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c index 7147bfe25a23..36dc367c4b45 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c @@ -28,6 +28,7 @@  #include "cik.h"  #include "gmc_v7_0.h"  #include "amdgpu_ucode.h" +#include "amdgpu_amdkfd.h"  #include "bif/bif_4_1_d.h"  #include "bif/bif_4_1_sh_mask.h" @@ -43,12 +44,14 @@  #include "amdgpu_atombios.h" +#include "ivsrcid/ivsrcid_vislands30.h" +  static void gmc_v7_0_set_gmc_funcs(struct amdgpu_device *adev);  static void gmc_v7_0_set_irq_funcs(struct amdgpu_device *adev);  static int gmc_v7_0_wait_for_idle(void *handle); -MODULE_FIRMWARE("radeon/bonaire_mc.bin"); -MODULE_FIRMWARE("radeon/hawaii_mc.bin"); +MODULE_FIRMWARE("amdgpu/bonaire_mc.bin"); +MODULE_FIRMWARE("amdgpu/hawaii_mc.bin");  MODULE_FIRMWARE("amdgpu/topaz_mc.bin");  static const u32 golden_settings_iceland_a11[] = @@ -147,10 +150,7 @@ static int gmc_v7_0_init_microcode(struct amdgpu_device *adev)  	default: BUG();  	} -	if (adev->asic_type == CHIP_TOPAZ) -		snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mc.bin", chip_name); -	else -		snprintf(fw_name, sizeof(fw_name), "radeon/%s_mc.bin", chip_name); +	snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mc.bin", chip_name);  	err = request_firmware(&adev->gmc.fw, fw_name, adev->dev);  	if (err) @@ -999,11 +999,11 @@ static int gmc_v7_0_sw_init(void *handle)  		adev->gmc.vram_type = gmc_v7_0_convert_vram_type(tmp);  	} -	r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 146, &adev->gmc.vm_fault); +	r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, VISLANDS30_IV_SRCID_GFX_PAGE_INV_FAULT, &adev->gmc.vm_fault);  	if (r)  		return r; -	r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 147, &adev->gmc.vm_fault); +	r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, VISLANDS30_IV_SRCID_GFX_MEM_PROT_FAULT, &adev->gmc.vm_fault);  	if (r)  		return r; @@ -1079,6 +1079,12 @@ static int gmc_v7_0_sw_init(void *handle)  		adev->vm_manager.vram_base_offset = 0;  	} +	adev->gmc.vm_fault_info = kmalloc(sizeof(struct kfd_vm_fault_info), +					GFP_KERNEL); +	if (!adev->gmc.vm_fault_info) +		return -ENOMEM; +	atomic_set(&adev->gmc.vm_fault_info_updated, 0); +  	return 0;  } @@ -1088,6 +1094,7 @@ static int gmc_v7_0_sw_fini(void *handle)  	amdgpu_gem_force_release(adev);  	amdgpu_vm_manager_fini(adev); +	kfree(adev->gmc.vm_fault_info);  	gmc_v7_0_gart_fini(adev);  	amdgpu_bo_fini(adev);  	release_firmware(adev->gmc.fw); @@ -1277,7 +1284,7 @@ static int gmc_v7_0_process_interrupt(struct amdgpu_device *adev,  				      struct amdgpu_irq_src *source,  				      struct amdgpu_iv_entry *entry)  { -	u32 addr, status, mc_client; +	u32 addr, status, mc_client, vmid;  	addr = RREG32(mmVM_CONTEXT1_PROTECTION_FAULT_ADDR);  	status = RREG32(mmVM_CONTEXT1_PROTECTION_FAULT_STATUS); @@ -1302,6 +1309,29 @@ static int gmc_v7_0_process_interrupt(struct amdgpu_device *adev,  					 entry->pasid);  	} +	vmid = REG_GET_FIELD(status, VM_CONTEXT1_PROTECTION_FAULT_STATUS, +			     VMID); +	if (amdgpu_amdkfd_is_kfd_vmid(adev, vmid) +		&& !atomic_read(&adev->gmc.vm_fault_info_updated)) { +		struct kfd_vm_fault_info *info = adev->gmc.vm_fault_info; +		u32 protections = REG_GET_FIELD(status, +					VM_CONTEXT1_PROTECTION_FAULT_STATUS, +					PROTECTIONS); + +		info->vmid = vmid; +		info->mc_id = REG_GET_FIELD(status, +					    VM_CONTEXT1_PROTECTION_FAULT_STATUS, +					    MEMORY_CLIENT_ID); +		info->status = status; +		info->page_addr = addr; +		info->prot_valid = protections & 0x7 ? true : false; +		info->prot_read = protections & 0x8 ? true : false; +		info->prot_write = protections & 0x10 ? true : false; +		info->prot_exec = protections & 0x20 ? true : false; +		mb(); +		atomic_set(&adev->gmc.vm_fault_info_updated, 1); +	} +  	return 0;  } diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c index 1edbe6b477b5..70fc97b59b4f 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c @@ -26,6 +26,7 @@  #include "amdgpu.h"  #include "gmc_v8_0.h"  #include "amdgpu_ucode.h" +#include "amdgpu_amdkfd.h"  #include "gmc/gmc_8_1_d.h"  #include "gmc/gmc_8_1_sh_mask.h" @@ -44,6 +45,7 @@  #include "amdgpu_atombios.h" +#include "ivsrcid/ivsrcid_vislands30.h"  static void gmc_v8_0_set_gmc_funcs(struct amdgpu_device *adev);  static void gmc_v8_0_set_irq_funcs(struct amdgpu_device *adev); @@ -1101,11 +1103,11 @@ static int gmc_v8_0_sw_init(void *handle)  		adev->gmc.vram_type = gmc_v8_0_convert_vram_type(tmp);  	} -	r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 146, &adev->gmc.vm_fault); +	r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, VISLANDS30_IV_SRCID_GFX_PAGE_INV_FAULT, &adev->gmc.vm_fault);  	if (r)  		return r; -	r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 147, &adev->gmc.vm_fault); +	r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, VISLANDS30_IV_SRCID_GFX_MEM_PROT_FAULT, &adev->gmc.vm_fault);  	if (r)  		return r; @@ -1181,6 +1183,12 @@ static int gmc_v8_0_sw_init(void *handle)  		adev->vm_manager.vram_base_offset = 0;  	} +	adev->gmc.vm_fault_info = kmalloc(sizeof(struct kfd_vm_fault_info), +					GFP_KERNEL); +	if (!adev->gmc.vm_fault_info) +		return -ENOMEM; +	atomic_set(&adev->gmc.vm_fault_info_updated, 0); +  	return 0;  } @@ -1190,6 +1198,7 @@ static int gmc_v8_0_sw_fini(void *handle)  	amdgpu_gem_force_release(adev);  	amdgpu_vm_manager_fini(adev); +	kfree(adev->gmc.vm_fault_info);  	gmc_v8_0_gart_fini(adev);  	amdgpu_bo_fini(adev);  	release_firmware(adev->gmc.fw); @@ -1425,7 +1434,7 @@ static int gmc_v8_0_process_interrupt(struct amdgpu_device *adev,  				      struct amdgpu_irq_src *source,  				      struct amdgpu_iv_entry *entry)  { -	u32 addr, status, mc_client; +	u32 addr, status, mc_client, vmid;  	if (amdgpu_sriov_vf(adev)) {  		dev_err(adev->dev, "GPU fault detected: %d 0x%08x\n", @@ -1447,8 +1456,13 @@ static int gmc_v8_0_process_interrupt(struct amdgpu_device *adev,  		gmc_v8_0_set_fault_enable_default(adev, false);  	if (printk_ratelimit()) { -		dev_err(adev->dev, "GPU fault detected: %d 0x%08x\n", -			entry->src_id, entry->src_data[0]); +		struct amdgpu_task_info task_info = { 0 }; + +		amdgpu_vm_get_task_info(adev, entry->pasid, &task_info); + +		dev_err(adev->dev, "GPU fault detected: %d 0x%08x for process %s pid %d thread %s pid %d\n", +			entry->src_id, entry->src_data[0], task_info.process_name, +			task_info.tgid, task_info.task_name, task_info.pid);  		dev_err(adev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_ADDR   0x%08X\n",  			addr);  		dev_err(adev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n", @@ -1457,6 +1471,29 @@ static int gmc_v8_0_process_interrupt(struct amdgpu_device *adev,  					 entry->pasid);  	} +	vmid = REG_GET_FIELD(status, VM_CONTEXT1_PROTECTION_FAULT_STATUS, +			     VMID); +	if (amdgpu_amdkfd_is_kfd_vmid(adev, vmid) +		&& !atomic_read(&adev->gmc.vm_fault_info_updated)) { +		struct kfd_vm_fault_info *info = adev->gmc.vm_fault_info; +		u32 protections = REG_GET_FIELD(status, +					VM_CONTEXT1_PROTECTION_FAULT_STATUS, +					PROTECTIONS); + +		info->vmid = vmid; +		info->mc_id = REG_GET_FIELD(status, +					    VM_CONTEXT1_PROTECTION_FAULT_STATUS, +					    MEMORY_CLIENT_ID); +		info->status = status; +		info->page_addr = addr; +		info->prot_valid = protections & 0x7 ? true : false; +		info->prot_read = protections & 0x8 ? true : false; +		info->prot_write = protections & 0x10 ? true : false; +		info->prot_exec = protections & 0x20 ? true : false; +		mb(); +		atomic_set(&adev->gmc.vm_fault_info_updated, 1); +	} +  	return 0;  } diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c index 3c0a85d4e4ab..399a5db27649 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c @@ -43,6 +43,8 @@  #include "gfxhub_v1_0.h"  #include "mmhub_v1_0.h" +#include "ivsrcid/vmc/irqsrcs_vmc_1_0.h" +  /* add these here since we already include dce12 headers and these are for DCN */  #define mmHUBP0_DCSURF_PRI_VIEWPORT_DIMENSION                                                          0x055d  #define mmHUBP0_DCSURF_PRI_VIEWPORT_DIMENSION_BASE_IDX                                                 2 @@ -257,12 +259,17 @@ static int gmc_v9_0_process_interrupt(struct amdgpu_device *adev,  	}  	if (printk_ratelimit()) { +		struct amdgpu_task_info task_info = { 0 }; + +		amdgpu_vm_get_task_info(adev, entry->pasid, &task_info); +  		dev_err(adev->dev, -			"[%s] VMC page fault (src_id:%u ring:%u vmid:%u pasid:%u)\n", +			"[%s] VMC page fault (src_id:%u ring:%u vmid:%u pasid:%u, for process %s pid %d thread %s pid %d\n)\n",  			entry->vmid_src ? "mmhub" : "gfxhub",  			entry->src_id, entry->ring_id, entry->vmid, -			entry->pasid); -		dev_err(adev->dev, "  at page 0x%016llx from %d\n", +			entry->pasid, task_info.process_name, task_info.tgid, +			task_info.task_name, task_info.pid); +		dev_err(adev->dev, "  at address 0x%016llx from %d\n",  			addr, entry->client_id);  		if (!amdgpu_sriov_vf(adev))  			dev_err(adev->dev, @@ -872,9 +879,9 @@ static int gmc_v9_0_sw_init(void *handle)  	}  	/* This interrupt is VMC page fault.*/ -	r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_VMC, 0, +	r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_VMC, VMC_1_0__SRCID__VM_FAULT,  				&adev->gmc.vm_fault); -	r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_UTCL2, 0, +	r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_UTCL2, UTCL2_1_0__SRCID__FAULT,  				&adev->gmc.vm_fault);  	if (r) diff --git a/drivers/gpu/drm/amd/amdgpu/kv_dpm.c b/drivers/gpu/drm/amd/amdgpu/kv_dpm.c index 7a1e77c93bf1..3f57f6463dc8 100644 --- a/drivers/gpu/drm/amd/amdgpu/kv_dpm.c +++ b/drivers/gpu/drm/amd/amdgpu/kv_dpm.c @@ -1921,7 +1921,7 @@ static int kv_dpm_set_power_state(void *handle)  	int ret;  	if (pi->bapm_enable) { -		ret = amdgpu_kv_smc_bapm_enable(adev, adev->pm.dpm.ac_power); +		ret = amdgpu_kv_smc_bapm_enable(adev, adev->pm.ac_power);  		if (ret) {  			DRM_ERROR("amdgpu_kv_smc_bapm_enable failed\n");  			return ret; @@ -3306,6 +3306,19 @@ static int kv_dpm_read_sensor(void *handle, int idx,  	}  } +static int kv_set_powergating_by_smu(void *handle, +				uint32_t block_type, bool gate) +{ +	switch (block_type) { +	case AMD_IP_BLOCK_TYPE_UVD: +		kv_dpm_powergate_uvd(handle, gate); +		break; +	default: +		break; +	} +	return 0; +} +  static const struct amd_ip_funcs kv_dpm_ip_funcs = {  	.name = "kv_dpm",  	.early_init = kv_dpm_early_init, @@ -3342,7 +3355,7 @@ static const struct amd_pm_funcs kv_dpm_funcs = {  	.print_power_state = &kv_dpm_print_power_state,  	.debugfs_print_current_performance_level = &kv_dpm_debugfs_print_current_performance_level,  	.force_performance_level = &kv_dpm_force_performance_level, -	.powergate_uvd = &kv_dpm_powergate_uvd, +	.set_powergating_by_smu = kv_set_powergating_by_smu,  	.enable_bapm = &kv_dpm_enable_bapm,  	.get_vce_clock_state = amdgpu_get_vce_clock_state,  	.check_state_equal = kv_check_state_equal, diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c index 3d53c4413f13..e70a0d4d6db4 100644 --- a/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c +++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c @@ -471,8 +471,8 @@ void mmhub_v1_0_update_power_gating(struct amdgpu_device *adev,  						RENG_EXECUTE_ON_REG_UPDATE, 1);  		WREG32_SOC15(MMHUB, 0, mmPCTL1_RENG_EXECUTE, pctl1_reng_execute); -		if (adev->powerplay.pp_funcs->set_mmhub_powergating_by_smu) -			amdgpu_dpm_set_mmhub_powergating_by_smu(adev); +		if (adev->powerplay.pp_funcs->set_powergating_by_smu) +			amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_GMC, true);  	} else {  		pctl0_reng_execute = REG_SET_FIELD(pctl0_reng_execute, diff --git a/drivers/gpu/drm/amd/amdgpu/psp_v10_0.c b/drivers/gpu/drm/amd/amdgpu/psp_v10_0.c index 0ff136d02d9b..02be34e72ed9 100644 --- a/drivers/gpu/drm/amd/amdgpu/psp_v10_0.c +++ b/drivers/gpu/drm/amd/amdgpu/psp_v10_0.c @@ -88,6 +88,9 @@ psp_v10_0_get_fw_type(struct amdgpu_firmware_info *ucode, enum psp_gfx_fw_type *  	case AMDGPU_UCODE_ID_VCE:  		*type = GFX_FW_TYPE_VCE;  		break; +	case AMDGPU_UCODE_ID_VCN: +		*type = GFX_FW_TYPE_VCN; +		break;  	case AMDGPU_UCODE_ID_MAXIMUM:  	default:  		return -EINVAL; diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c b/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c index c7190c39c4f5..15ae4bc9c072 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c @@ -44,6 +44,8 @@  #include "iceland_sdma_pkt_open.h" +#include "ivsrcid/ivsrcid_vislands30.h" +  static void sdma_v2_4_set_ring_funcs(struct amdgpu_device *adev);  static void sdma_v2_4_set_buffer_funcs(struct amdgpu_device *adev);  static void sdma_v2_4_set_vm_pte_funcs(struct amdgpu_device *adev); @@ -202,8 +204,7 @@ static uint64_t sdma_v2_4_ring_get_rptr(struct amdgpu_ring *ring)  static uint64_t sdma_v2_4_ring_get_wptr(struct amdgpu_ring *ring)  {  	struct amdgpu_device *adev = ring->adev; -	int me = (ring == &ring->adev->sdma.instance[0].ring) ? 0 : 1; -	u32 wptr = RREG32(mmSDMA0_GFX_RB_WPTR + sdma_offsets[me]) >> 2; +	u32 wptr = RREG32(mmSDMA0_GFX_RB_WPTR + sdma_offsets[ring->me]) >> 2;  	return wptr;  } @@ -218,9 +219,8 @@ static uint64_t sdma_v2_4_ring_get_wptr(struct amdgpu_ring *ring)  static void sdma_v2_4_ring_set_wptr(struct amdgpu_ring *ring)  {  	struct amdgpu_device *adev = ring->adev; -	int me = (ring == &ring->adev->sdma.instance[0].ring) ? 0 : 1; -	WREG32(mmSDMA0_GFX_RB_WPTR + sdma_offsets[me], lower_32_bits(ring->wptr) << 2); +	WREG32(mmSDMA0_GFX_RB_WPTR + sdma_offsets[ring->me], lower_32_bits(ring->wptr) << 2);  }  static void sdma_v2_4_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count) @@ -273,7 +273,7 @@ static void sdma_v2_4_ring_emit_hdp_flush(struct amdgpu_ring *ring)  {  	u32 ref_and_mask = 0; -	if (ring == &ring->adev->sdma.instance[0].ring) +	if (ring->me == 0)  		ref_and_mask = REG_SET_FIELD(ref_and_mask, GPU_HDP_FLUSH_DONE, SDMA0, 1);  	else  		ref_and_mask = REG_SET_FIELD(ref_and_mask, GPU_HDP_FLUSH_DONE, SDMA1, 1); @@ -898,7 +898,7 @@ static int sdma_v2_4_sw_init(void *handle)  	struct amdgpu_device *adev = (struct amdgpu_device *)handle;  	/* SDMA trap event */ -	r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 224, +	r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, VISLANDS30_IV_SRCID_SDMA_TRAP,  			      &adev->sdma.trap_irq);  	if (r)  		return r; @@ -910,7 +910,7 @@ static int sdma_v2_4_sw_init(void *handle)  		return r;  	/* SDMA Privileged inst */ -	r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 247, +	r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, VISLANDS30_IV_SRCID_SDMA_SRBM_WRITE,  			      &adev->sdma.illegal_inst_irq);  	if (r)  		return r; @@ -1213,8 +1213,10 @@ static void sdma_v2_4_set_ring_funcs(struct amdgpu_device *adev)  {  	int i; -	for (i = 0; i < adev->sdma.num_instances; i++) +	for (i = 0; i < adev->sdma.num_instances; i++) {  		adev->sdma.instance[i].ring.funcs = &sdma_v2_4_ring_funcs; +		adev->sdma.instance[i].ring.me = i; +	}  }  static const struct amdgpu_irq_src_funcs sdma_v2_4_trap_irq_funcs = { diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c index aa9ab299fd32..1e07ff274d73 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c @@ -44,6 +44,8 @@  #include "tonga_sdma_pkt_open.h" +#include "ivsrcid/ivsrcid_vislands30.h" +  static void sdma_v3_0_set_ring_funcs(struct amdgpu_device *adev);  static void sdma_v3_0_set_buffer_funcs(struct amdgpu_device *adev);  static void sdma_v3_0_set_vm_pte_funcs(struct amdgpu_device *adev); @@ -365,9 +367,7 @@ static uint64_t sdma_v3_0_ring_get_wptr(struct amdgpu_ring *ring)  		/* XXX check if swapping is necessary on BE */  		wptr = ring->adev->wb.wb[ring->wptr_offs] >> 2;  	} else { -		int me = (ring == &ring->adev->sdma.instance[0].ring) ? 0 : 1; - -		wptr = RREG32(mmSDMA0_GFX_RB_WPTR + sdma_offsets[me]) >> 2; +		wptr = RREG32(mmSDMA0_GFX_RB_WPTR + sdma_offsets[ring->me]) >> 2;  	}  	return wptr; @@ -394,9 +394,7 @@ static void sdma_v3_0_ring_set_wptr(struct amdgpu_ring *ring)  		WRITE_ONCE(*wb, (lower_32_bits(ring->wptr) << 2));  	} else { -		int me = (ring == &ring->adev->sdma.instance[0].ring) ? 0 : 1; - -		WREG32(mmSDMA0_GFX_RB_WPTR + sdma_offsets[me], lower_32_bits(ring->wptr) << 2); +		WREG32(mmSDMA0_GFX_RB_WPTR + sdma_offsets[ring->me], lower_32_bits(ring->wptr) << 2);  	}  } @@ -450,7 +448,7 @@ static void sdma_v3_0_ring_emit_hdp_flush(struct amdgpu_ring *ring)  {  	u32 ref_and_mask = 0; -	if (ring == &ring->adev->sdma.instance[0].ring) +	if (ring->me == 0)  		ref_and_mask = REG_SET_FIELD(ref_and_mask, GPU_HDP_FLUSH_DONE, SDMA0, 1);  	else  		ref_and_mask = REG_SET_FIELD(ref_and_mask, GPU_HDP_FLUSH_DONE, SDMA1, 1); @@ -1179,7 +1177,7 @@ static int sdma_v3_0_sw_init(void *handle)  	struct amdgpu_device *adev = (struct amdgpu_device *)handle;  	/* SDMA trap event */ -	r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 224, +	r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, VISLANDS30_IV_SRCID_SDMA_TRAP,  			      &adev->sdma.trap_irq);  	if (r)  		return r; @@ -1191,7 +1189,7 @@ static int sdma_v3_0_sw_init(void *handle)  		return r;  	/* SDMA Privileged inst */ -	r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 247, +	r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, VISLANDS30_IV_SRCID_SDMA_SRBM_WRITE,  			      &adev->sdma.illegal_inst_irq);  	if (r)  		return r; @@ -1655,8 +1653,10 @@ static void sdma_v3_0_set_ring_funcs(struct amdgpu_device *adev)  {  	int i; -	for (i = 0; i < adev->sdma.num_instances; i++) +	for (i = 0; i < adev->sdma.num_instances; i++) {  		adev->sdma.instance[i].ring.funcs = &sdma_v3_0_ring_funcs; +		adev->sdma.instance[i].ring.me = i; +	}  }  static const struct amdgpu_irq_src_funcs sdma_v3_0_trap_irq_funcs = { diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c index ca53b3fba422..e7ca4623cfb9 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c @@ -38,6 +38,9 @@  #include "soc15.h"  #include "vega10_sdma_pkt_open.h" +#include "ivsrcid/sdma0/irqsrcs_sdma0_4_0.h" +#include "ivsrcid/sdma1/irqsrcs_sdma1_4_0.h" +  MODULE_FIRMWARE("amdgpu/vega10_sdma.bin");  MODULE_FIRMWARE("amdgpu/vega10_sdma1.bin");  MODULE_FIRMWARE("amdgpu/vega12_sdma.bin"); @@ -296,13 +299,12 @@ static uint64_t sdma_v4_0_ring_get_wptr(struct amdgpu_ring *ring)  		DRM_DEBUG("wptr/doorbell before shift == 0x%016llx\n", wptr);  	} else {  		u32 lowbit, highbit; -		int me = (ring == &adev->sdma.instance[0].ring) ? 0 : 1; -		lowbit = RREG32(sdma_v4_0_get_reg_offset(adev, me, mmSDMA0_GFX_RB_WPTR)) >> 2; -		highbit = RREG32(sdma_v4_0_get_reg_offset(adev, me, mmSDMA0_GFX_RB_WPTR_HI)) >> 2; +		lowbit = RREG32(sdma_v4_0_get_reg_offset(adev, ring->me, mmSDMA0_GFX_RB_WPTR)) >> 2; +		highbit = RREG32(sdma_v4_0_get_reg_offset(adev, ring->me, mmSDMA0_GFX_RB_WPTR_HI)) >> 2;  		DRM_DEBUG("wptr [%i]high== 0x%08x low==0x%08x\n", -				me, highbit, lowbit); +				ring->me, highbit, lowbit);  		wptr = highbit;  		wptr = wptr << 32;  		wptr |= lowbit; @@ -339,17 +341,15 @@ static void sdma_v4_0_ring_set_wptr(struct amdgpu_ring *ring)  				ring->doorbell_index, ring->wptr << 2);  		WDOORBELL64(ring->doorbell_index, ring->wptr << 2);  	} else { -		int me = (ring == &ring->adev->sdma.instance[0].ring) ? 0 : 1; -  		DRM_DEBUG("Not using doorbell -- "  				"mmSDMA%i_GFX_RB_WPTR == 0x%08x "  				"mmSDMA%i_GFX_RB_WPTR_HI == 0x%08x\n", -				me, +				ring->me,  				lower_32_bits(ring->wptr << 2), -				me, +				ring->me,  				upper_32_bits(ring->wptr << 2)); -		WREG32(sdma_v4_0_get_reg_offset(adev, me, mmSDMA0_GFX_RB_WPTR), lower_32_bits(ring->wptr << 2)); -		WREG32(sdma_v4_0_get_reg_offset(adev, me, mmSDMA0_GFX_RB_WPTR_HI), upper_32_bits(ring->wptr << 2)); +		WREG32(sdma_v4_0_get_reg_offset(adev, ring->me, mmSDMA0_GFX_RB_WPTR), lower_32_bits(ring->wptr << 2)); +		WREG32(sdma_v4_0_get_reg_offset(adev, ring->me, mmSDMA0_GFX_RB_WPTR_HI), upper_32_bits(ring->wptr << 2));  	}  } @@ -430,7 +430,7 @@ static void sdma_v4_0_ring_emit_hdp_flush(struct amdgpu_ring *ring)  	u32 ref_and_mask = 0;  	const struct nbio_hdp_flush_reg *nbio_hf_reg = adev->nbio_funcs->hdp_flush_reg; -	if (ring == &ring->adev->sdma.instance[0].ring) +	if (ring->me == 0)  		ref_and_mask = nbio_hf_reg->ref_and_mask_sdma0;  	else  		ref_and_mask = nbio_hf_reg->ref_and_mask_sdma1; @@ -1228,13 +1228,13 @@ static int sdma_v4_0_sw_init(void *handle)  	struct amdgpu_device *adev = (struct amdgpu_device *)handle;  	/* SDMA trap event */ -	r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_SDMA0, 224, +	r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_SDMA0, SDMA0_4_0__SRCID__SDMA_TRAP,  			      &adev->sdma.trap_irq);  	if (r)  		return r;  	/* SDMA trap event */ -	r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_SDMA1, 224, +	r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_SDMA1, SDMA1_4_0__SRCID__SDMA_TRAP,  			      &adev->sdma.trap_irq);  	if (r)  		return r; @@ -1651,8 +1651,10 @@ static void sdma_v4_0_set_ring_funcs(struct amdgpu_device *adev)  {  	int i; -	for (i = 0; i < adev->sdma.num_instances; i++) +	for (i = 0; i < adev->sdma.num_instances; i++) {  		adev->sdma.instance[i].ring.funcs = &sdma_v4_0_ring_funcs; +		adev->sdma.instance[i].ring.me = i; +	}  }  static const struct amdgpu_irq_src_funcs sdma_v4_0_trap_irq_funcs = { diff --git a/drivers/gpu/drm/amd/amdgpu/si_dpm.c b/drivers/gpu/drm/amd/amdgpu/si_dpm.c index 5c97a3671726..db327b412562 100644 --- a/drivers/gpu/drm/amd/amdgpu/si_dpm.c +++ b/drivers/gpu/drm/amd/amdgpu/si_dpm.c @@ -56,16 +56,16 @@  #define BIOS_SCRATCH_4                                    0x5cd -MODULE_FIRMWARE("radeon/tahiti_smc.bin"); -MODULE_FIRMWARE("radeon/pitcairn_smc.bin"); -MODULE_FIRMWARE("radeon/pitcairn_k_smc.bin"); -MODULE_FIRMWARE("radeon/verde_smc.bin"); -MODULE_FIRMWARE("radeon/verde_k_smc.bin"); -MODULE_FIRMWARE("radeon/oland_smc.bin"); -MODULE_FIRMWARE("radeon/oland_k_smc.bin"); -MODULE_FIRMWARE("radeon/hainan_smc.bin"); -MODULE_FIRMWARE("radeon/hainan_k_smc.bin"); -MODULE_FIRMWARE("radeon/banks_k_2_smc.bin"); +MODULE_FIRMWARE("amdgpu/tahiti_smc.bin"); +MODULE_FIRMWARE("amdgpu/pitcairn_smc.bin"); +MODULE_FIRMWARE("amdgpu/pitcairn_k_smc.bin"); +MODULE_FIRMWARE("amdgpu/verde_smc.bin"); +MODULE_FIRMWARE("amdgpu/verde_k_smc.bin"); +MODULE_FIRMWARE("amdgpu/oland_smc.bin"); +MODULE_FIRMWARE("amdgpu/oland_k_smc.bin"); +MODULE_FIRMWARE("amdgpu/hainan_smc.bin"); +MODULE_FIRMWARE("amdgpu/hainan_k_smc.bin"); +MODULE_FIRMWARE("amdgpu/banks_k_2_smc.bin");  static const struct amd_pm_funcs si_dpm_funcs; @@ -3480,7 +3480,7 @@ static void si_apply_state_adjust_rules(struct amdgpu_device *adev,  		disable_sclk_switching = true;  	} -	if (adev->pm.dpm.ac_power) +	if (adev->pm.ac_power)  		max_limits = &adev->pm.dpm.dyn_state.max_clock_voltage_on_ac;  	else  		max_limits = &adev->pm.dpm.dyn_state.max_clock_voltage_on_dc; @@ -3489,7 +3489,7 @@ static void si_apply_state_adjust_rules(struct amdgpu_device *adev,  		if (ps->performance_levels[i].vddc > ps->performance_levels[i+1].vddc)  			ps->performance_levels[i].vddc = ps->performance_levels[i+1].vddc;  	} -	if (adev->pm.dpm.ac_power == false) { +	if (adev->pm.ac_power == false) {  		for (i = 0; i < ps->performance_level_count; i++) {  			if (ps->performance_levels[i].mclk > max_limits->mclk)  				ps->performance_levels[i].mclk = max_limits->mclk; @@ -7318,8 +7318,7 @@ static int si_dpm_init(struct amdgpu_device *adev)  	pi = &eg_pi->rv7xx;  	si_pi->sys_pcie_mask = -		(adev->pm.pcie_gen_mask & CAIL_PCIE_LINK_SPEED_SUPPORT_MASK) >> -		CAIL_PCIE_LINK_SPEED_SUPPORT_SHIFT; +		adev->pm.pcie_gen_mask & CAIL_PCIE_LINK_SPEED_SUPPORT_MASK;  	si_pi->force_pcie_gen = AMDGPU_PCIE_GEN_INVALID;  	si_pi->boot_pcie_gen = si_get_current_pcie_speed(adev); @@ -7667,7 +7666,7 @@ static int si_dpm_init_microcode(struct amdgpu_device *adev)  	default: BUG();  	} -	snprintf(fw_name, sizeof(fw_name), "radeon/%s_smc.bin", chip_name); +	snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_smc.bin", chip_name);  	err = request_firmware(&adev->pm.fw, fw_name, adev->dev);  	if (err)  		goto out; diff --git a/drivers/gpu/drm/amd/amdgpu/soc15d.h b/drivers/gpu/drm/amd/amdgpu/soc15d.h index 8dc29107228f..edfe50821cd9 100644 --- a/drivers/gpu/drm/amd/amdgpu/soc15d.h +++ b/drivers/gpu/drm/amd/amdgpu/soc15d.h @@ -53,6 +53,29 @@  #define PACKET3_COMPUTE(op, n) (PACKET3(op, n) | 1 << 1) +#define	PACKETJ_CONDITION_CHECK0	0 +#define	PACKETJ_CONDITION_CHECK1	1 +#define	PACKETJ_CONDITION_CHECK2	2 +#define	PACKETJ_CONDITION_CHECK3	3 +#define	PACKETJ_CONDITION_CHECK4	4 +#define	PACKETJ_CONDITION_CHECK5	5 +#define	PACKETJ_CONDITION_CHECK6	6 +#define	PACKETJ_CONDITION_CHECK7	7 + +#define	PACKETJ_TYPE0	0 +#define	PACKETJ_TYPE1	1 +#define	PACKETJ_TYPE2	2 +#define	PACKETJ_TYPE3	3 +#define	PACKETJ_TYPE4	4 +#define	PACKETJ_TYPE5	5 +#define	PACKETJ_TYPE6	6 +#define	PACKETJ_TYPE7	7 + +#define PACKETJ(reg, r, cond, type)	((reg & 0x3FFFF) |			\ +			 ((r & 0x3F) << 18) |			\ +			 ((cond & 0xF) << 24) |				\ +			 ((type & 0xF) << 28)) +  /* Packet 3 types */  #define	PACKET3_NOP					0x10  #define	PACKET3_SET_BASE				0x11 diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v4_2.c b/drivers/gpu/drm/amd/amdgpu/uvd_v4_2.c index 6fed3d7797a8..8a926d1df939 100644 --- a/drivers/gpu/drm/amd/amdgpu/uvd_v4_2.c +++ b/drivers/gpu/drm/amd/amdgpu/uvd_v4_2.c @@ -123,6 +123,10 @@ static int uvd_v4_2_sw_init(void *handle)  	ring = &adev->uvd.inst->ring;  	sprintf(ring->name, "uvd");  	r = amdgpu_ring_init(adev, ring, 512, &adev->uvd.inst->irq, 0); +	if (r) +		return r; + +	r = amdgpu_uvd_entity_init(adev);  	return r;  } diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v5_0.c b/drivers/gpu/drm/amd/amdgpu/uvd_v5_0.c index 341ee6d55ce8..50248059412e 100644 --- a/drivers/gpu/drm/amd/amdgpu/uvd_v5_0.c +++ b/drivers/gpu/drm/amd/amdgpu/uvd_v5_0.c @@ -35,6 +35,7 @@  #include "vi.h"  #include "smu/smu_7_1_2_d.h"  #include "smu/smu_7_1_2_sh_mask.h" +#include "ivsrcid/ivsrcid_vislands30.h"  static void uvd_v5_0_set_ring_funcs(struct amdgpu_device *adev);  static void uvd_v5_0_set_irq_funcs(struct amdgpu_device *adev); @@ -104,7 +105,7 @@ static int uvd_v5_0_sw_init(void *handle)  	int r;  	/* UVD TRAP */ -	r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 124, &adev->uvd.inst->irq); +	r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, VISLANDS30_IV_SRCID_UVD_SYSTEM_MESSAGE, &adev->uvd.inst->irq);  	if (r)  		return r; @@ -119,6 +120,10 @@ static int uvd_v5_0_sw_init(void *handle)  	ring = &adev->uvd.inst->ring;  	sprintf(ring->name, "uvd");  	r = amdgpu_ring_init(adev, ring, 512, &adev->uvd.inst->irq, 0); +	if (r) +		return r; + +	r = amdgpu_uvd_entity_init(adev);  	return r;  } diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c b/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c index bfddf97dd13e..6ae82cc2e55e 100644 --- a/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c +++ b/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c @@ -36,6 +36,7 @@  #include "bif/bif_5_1_d.h"  #include "gmc/gmc_8_1_d.h"  #include "vi.h" +#include "ivsrcid/ivsrcid_vislands30.h"  /* Polaris10/11/12 firmware version */  #define FW_1_130_16 ((1 << 24) | (130 << 16) | (16 << 8)) @@ -247,12 +248,10 @@ static int uvd_v6_0_enc_get_create_msg(struct amdgpu_ring *ring, uint32_t handle  	for (i = ib->length_dw; i < ib_size_dw; ++i)  		ib->ptr[i] = 0x0; -	r = amdgpu_ib_schedule(ring, 1, ib, NULL, &f); -	job->fence = dma_fence_get(f); +	r = amdgpu_job_submit_direct(job, ring, &f);  	if (r)  		goto err; -	amdgpu_job_free(job);  	if (fence)  		*fence = dma_fence_get(f);  	dma_fence_put(f); @@ -311,19 +310,13 @@ static int uvd_v6_0_enc_get_destroy_msg(struct amdgpu_ring *ring,  	for (i = ib->length_dw; i < ib_size_dw; ++i)  		ib->ptr[i] = 0x0; -	if (direct) { -		r = amdgpu_ib_schedule(ring, 1, ib, NULL, &f); -		job->fence = dma_fence_get(f); -		if (r) -			goto err; - -		amdgpu_job_free(job); -	} else { -		r = amdgpu_job_submit(job, ring, &ring->adev->vce.entity, +	if (direct) +		r = amdgpu_job_submit_direct(job, ring, &f); +	else +		r = amdgpu_job_submit(job, &ring->adev->vce.entity,  				      AMDGPU_FENCE_OWNER_UNDEFINED, &f); -		if (r) -			goto err; -	} +	if (r) +		goto err;  	if (fence)  		*fence = dma_fence_get(f); @@ -400,14 +393,14 @@ static int uvd_v6_0_sw_init(void *handle)  	struct amdgpu_device *adev = (struct amdgpu_device *)handle;  	/* UVD TRAP */ -	r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 124, &adev->uvd.inst->irq); +	r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, VISLANDS30_IV_SRCID_UVD_SYSTEM_MESSAGE, &adev->uvd.inst->irq);  	if (r)  		return r;  	/* UVD ENC TRAP */  	if (uvd_v6_0_enc_support(adev)) {  		for (i = 0; i < adev->uvd.num_enc_rings; ++i) { -			r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, i + 119, &adev->uvd.inst->irq); +			r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, i + VISLANDS30_IV_SRCID_UVD_ENC_GEN_PURP, &adev->uvd.inst->irq);  			if (r)  				return r;  		} @@ -425,16 +418,6 @@ static int uvd_v6_0_sw_init(void *handle)  		adev->uvd.num_enc_rings = 0;  		DRM_INFO("UVD ENC is disabled\n"); -	} else { -		struct drm_sched_rq *rq; -		ring = &adev->uvd.inst->ring_enc[0]; -		rq = &ring->sched.sched_rq[DRM_SCHED_PRIORITY_NORMAL]; -		r = drm_sched_entity_init(&ring->sched, &adev->uvd.inst->entity_enc, -					  rq, NULL); -		if (r) { -			DRM_ERROR("Failed setting up UVD ENC run queue.\n"); -			return r; -		}  	}  	r = amdgpu_uvd_resume(adev); @@ -457,6 +440,8 @@ static int uvd_v6_0_sw_init(void *handle)  		}  	} +	r = amdgpu_uvd_entity_init(adev); +  	return r;  } @@ -470,8 +455,6 @@ static int uvd_v6_0_sw_fini(void *handle)  		return r;  	if (uvd_v6_0_enc_support(adev)) { -		drm_sched_entity_fini(&adev->uvd.inst->ring_enc[0].sched, &adev->uvd.inst->entity_enc); -  		for (i = 0; i < adev->uvd.num_enc_rings; ++i)  			amdgpu_ring_fini(&adev->uvd.inst->ring_enc[i]);  	} @@ -1569,7 +1552,6 @@ static const struct amdgpu_ring_funcs uvd_v6_0_ring_phys_funcs = {  static const struct amdgpu_ring_funcs uvd_v6_0_ring_vm_funcs = {  	.type = AMDGPU_RING_TYPE_UVD,  	.align_mask = 0xf, -	.nop = PACKET0(mmUVD_NO_OP, 0),  	.support_64bit_ptrs = false,  	.get_rptr = uvd_v6_0_ring_get_rptr,  	.get_wptr = uvd_v6_0_ring_get_wptr, @@ -1587,7 +1569,7 @@ static const struct amdgpu_ring_funcs uvd_v6_0_ring_vm_funcs = {  	.emit_hdp_flush = uvd_v6_0_ring_emit_hdp_flush,  	.test_ring = uvd_v6_0_ring_test_ring,  	.test_ib = amdgpu_uvd_ring_test_ib, -	.insert_nop = amdgpu_ring_insert_nop, +	.insert_nop = uvd_v6_0_ring_insert_nop,  	.pad_ib = amdgpu_ring_generic_pad_ib,  	.begin_use = amdgpu_uvd_ring_begin_use,  	.end_use = amdgpu_uvd_ring_end_use, diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c b/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c index 57d32f21b3a6..9b7f8469bc5c 100644 --- a/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c +++ b/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c @@ -39,6 +39,13 @@  #include "hdp/hdp_4_0_offset.h"  #include "mmhub/mmhub_1_0_offset.h"  #include "mmhub/mmhub_1_0_sh_mask.h" +#include "ivsrcid/uvd/irqsrcs_uvd_7_0.h" + +#define mmUVD_PG0_CC_UVD_HARVESTING                                                                    0x00c7 +#define mmUVD_PG0_CC_UVD_HARVESTING_BASE_IDX                                                           1 +//UVD_PG0_CC_UVD_HARVESTING +#define UVD_PG0_CC_UVD_HARVESTING__UVD_DISABLE__SHIFT                                                         0x1 +#define UVD_PG0_CC_UVD_HARVESTING__UVD_DISABLE_MASK                                                           0x00000002L  #define UVD7_MAX_HW_INSTANCES_VEGA20			2 @@ -249,12 +256,10 @@ static int uvd_v7_0_enc_get_create_msg(struct amdgpu_ring *ring, uint32_t handle  	for (i = ib->length_dw; i < ib_size_dw; ++i)  		ib->ptr[i] = 0x0; -	r = amdgpu_ib_schedule(ring, 1, ib, NULL, &f); -	job->fence = dma_fence_get(f); +	r = amdgpu_job_submit_direct(job, ring, &f);  	if (r)  		goto err; -	amdgpu_job_free(job);  	if (fence)  		*fence = dma_fence_get(f);  	dma_fence_put(f); @@ -312,19 +317,13 @@ int uvd_v7_0_enc_get_destroy_msg(struct amdgpu_ring *ring, uint32_t handle,  	for (i = ib->length_dw; i < ib_size_dw; ++i)  		ib->ptr[i] = 0x0; -	if (direct) { -		r = amdgpu_ib_schedule(ring, 1, ib, NULL, &f); -		job->fence = dma_fence_get(f); -		if (r) -			goto err; - -		amdgpu_job_free(job); -	} else { -		r = amdgpu_job_submit(job, ring, &ring->adev->vce.entity, +	if (direct) +		r = amdgpu_job_submit_direct(job, ring, &f); +	else +		r = amdgpu_job_submit(job, &ring->adev->vce.entity,  				      AMDGPU_FENCE_OWNER_UNDEFINED, &f); -		if (r) -			goto err; -	} +	if (r) +		goto err;  	if (fence)  		*fence = dma_fence_get(f); @@ -377,10 +376,25 @@ error:  static int uvd_v7_0_early_init(void *handle)  {  	struct amdgpu_device *adev = (struct amdgpu_device *)handle; -	if (adev->asic_type == CHIP_VEGA20) + +	if (adev->asic_type == CHIP_VEGA20) { +		u32 harvest; +		int i; +  		adev->uvd.num_uvd_inst = UVD7_MAX_HW_INSTANCES_VEGA20; -	else +		for (i = 0; i < adev->uvd.num_uvd_inst; i++) { +			harvest = RREG32_SOC15(UVD, i, mmUVD_PG0_CC_UVD_HARVESTING); +			if (harvest & UVD_PG0_CC_UVD_HARVESTING__UVD_DISABLE_MASK) { +				adev->uvd.harvest_config |= 1 << i; +			} +		} +		if (adev->uvd.harvest_config == (AMDGPU_UVD_HARVEST_UVD0 | +						 AMDGPU_UVD_HARVEST_UVD1)) +			/* both instances are harvested, disable the block */ +			return -ENOENT; +	} else {  		adev->uvd.num_uvd_inst = 1; +	}  	if (amdgpu_sriov_vf(adev))  		adev->uvd.num_enc_rings = 1; @@ -396,19 +410,21 @@ static int uvd_v7_0_early_init(void *handle)  static int uvd_v7_0_sw_init(void *handle)  {  	struct amdgpu_ring *ring; -	struct drm_sched_rq *rq; +  	int i, j, r;  	struct amdgpu_device *adev = (struct amdgpu_device *)handle;  	for (j = 0; j < adev->uvd.num_uvd_inst; j++) { +		if (adev->uvd.harvest_config & (1 << j)) +			continue;  		/* UVD TRAP */ -		r = amdgpu_irq_add_id(adev, amdgpu_ih_clientid_uvds[j], 124, &adev->uvd.inst[j].irq); +		r = amdgpu_irq_add_id(adev, amdgpu_ih_clientid_uvds[j], UVD_7_0__SRCID__UVD_SYSTEM_MESSAGE_INTERRUPT, &adev->uvd.inst[j].irq);  		if (r)  			return r;  		/* UVD ENC TRAP */  		for (i = 0; i < adev->uvd.num_enc_rings; ++i) { -			r = amdgpu_irq_add_id(adev, amdgpu_ih_clientid_uvds[j], i + 119, &adev->uvd.inst[j].irq); +			r = amdgpu_irq_add_id(adev, amdgpu_ih_clientid_uvds[j], i + UVD_7_0__SRCID__UVD_ENC_GEN_PURP, &adev->uvd.inst[j].irq);  			if (r)  				return r;  		} @@ -428,22 +444,13 @@ static int uvd_v7_0_sw_init(void *handle)  		DRM_INFO("PSP loading UVD firmware\n");  	} -	for (j = 0; j < adev->uvd.num_uvd_inst; j++) { -		ring = &adev->uvd.inst[j].ring_enc[0]; -		rq = &ring->sched.sched_rq[DRM_SCHED_PRIORITY_NORMAL]; -		r = drm_sched_entity_init(&ring->sched, &adev->uvd.inst[j].entity_enc, -					  rq, NULL); -		if (r) { -			DRM_ERROR("(%d)Failed setting up UVD ENC run queue.\n", j); -			return r; -		} -	} -  	r = amdgpu_uvd_resume(adev);  	if (r)  		return r;  	for (j = 0; j < adev->uvd.num_uvd_inst; j++) { +		if (adev->uvd.harvest_config & (1 << j)) +			continue;  		if (!amdgpu_sriov_vf(adev)) {  			ring = &adev->uvd.inst[j].ring;  			sprintf(ring->name, "uvd<%d>", j); @@ -472,6 +479,10 @@ static int uvd_v7_0_sw_init(void *handle)  		}  	} +	r = amdgpu_uvd_entity_init(adev); +	if (r) +		return r; +  	r = amdgpu_virt_alloc_mm_table(adev);  	if (r)  		return r; @@ -491,8 +502,8 @@ static int uvd_v7_0_sw_fini(void *handle)  		return r;  	for (j = 0; j < adev->uvd.num_uvd_inst; ++j) { -		drm_sched_entity_fini(&adev->uvd.inst[j].ring_enc[0].sched, &adev->uvd.inst[j].entity_enc); - +		if (adev->uvd.harvest_config & (1 << j)) +			continue;  		for (i = 0; i < adev->uvd.num_enc_rings; ++i)  			amdgpu_ring_fini(&adev->uvd.inst[j].ring_enc[i]);  	} @@ -521,6 +532,8 @@ static int uvd_v7_0_hw_init(void *handle)  		goto done;  	for (j = 0; j < adev->uvd.num_uvd_inst; ++j) { +		if (adev->uvd.harvest_config & (1 << j)) +			continue;  		ring = &adev->uvd.inst[j].ring;  		if (!amdgpu_sriov_vf(adev)) { @@ -600,8 +613,11 @@ static int uvd_v7_0_hw_fini(void *handle)  		DRM_DEBUG("For SRIOV client, shouldn't do anything.\n");  	} -	for (i = 0; i < adev->uvd.num_uvd_inst; ++i) +	for (i = 0; i < adev->uvd.num_uvd_inst; ++i) { +		if (adev->uvd.harvest_config & (1 << i)) +			continue;  		adev->uvd.inst[i].ring.ready = false; +	}  	return 0;  } @@ -644,6 +660,8 @@ static void uvd_v7_0_mc_resume(struct amdgpu_device *adev)  	int i;  	for (i = 0; i < adev->uvd.num_uvd_inst; ++i) { +		if (adev->uvd.harvest_config & (1 << i)) +			continue;  		if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {  			WREG32_SOC15(UVD, i, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW,  				lower_32_bits(adev->firmware.ucode[AMDGPU_UCODE_ID_UVD].mc_addr)); @@ -716,6 +734,8 @@ static int uvd_v7_0_mmsch_start(struct amdgpu_device *adev,  	WREG32_SOC15(VCE, 0, mmVCE_MMSCH_VF_MAILBOX_RESP, 0);  	for (i = 0; i < adev->uvd.num_uvd_inst; ++i) { +		if (adev->uvd.harvest_config & (1 << i)) +			continue;  		WDOORBELL32(adev->uvd.inst[i].ring_enc[0].doorbell_index, 0);  		adev->wb.wb[adev->uvd.inst[i].ring_enc[0].wptr_offs] = 0;  		adev->uvd.inst[i].ring_enc[0].wptr = 0; @@ -772,6 +792,8 @@ static int uvd_v7_0_sriov_start(struct amdgpu_device *adev)  		init_table += header->uvd_table_offset;  		for (i = 0; i < adev->uvd.num_uvd_inst; ++i) { +			if (adev->uvd.harvest_config & (1 << i)) +				continue;  			ring = &adev->uvd.inst[i].ring;  			ring->wptr = 0;  			size = AMDGPU_GPU_PAGE_ALIGN(adev->uvd.fw->size + 4); @@ -911,6 +933,8 @@ static int uvd_v7_0_start(struct amdgpu_device *adev)  	int i, j, k, r;  	for (k = 0; k < adev->uvd.num_uvd_inst; ++k) { +		if (adev->uvd.harvest_config & (1 << k)) +			continue;  		/* disable DPG */  		WREG32_P(SOC15_REG_OFFSET(UVD, k, mmUVD_POWER_STATUS), 0,  				~UVD_POWER_STATUS__UVD_PG_MODE_MASK); @@ -923,6 +947,8 @@ static int uvd_v7_0_start(struct amdgpu_device *adev)  	uvd_v7_0_mc_resume(adev);  	for (k = 0; k < adev->uvd.num_uvd_inst; ++k) { +		if (adev->uvd.harvest_config & (1 << k)) +			continue;  		ring = &adev->uvd.inst[k].ring;  		/* disable clock gating */  		WREG32_P(SOC15_REG_OFFSET(UVD, k, mmUVD_CGC_CTRL), 0, @@ -1090,6 +1116,8 @@ static void uvd_v7_0_stop(struct amdgpu_device *adev)  	uint8_t i = 0;  	for (i = 0; i < adev->uvd.num_uvd_inst; ++i) { +		if (adev->uvd.harvest_config & (1 << i)) +			continue;  		/* force RBC into idle state */  		WREG32_SOC15(UVD, i, mmUVD_RBC_RB_CNTL, 0x11010101); @@ -1227,6 +1255,34 @@ static int uvd_v7_0_ring_test_ring(struct amdgpu_ring *ring)  }  /** + * uvd_v7_0_ring_patch_cs_in_place - Patch the IB for command submission. + * + * @p: the CS parser with the IBs + * @ib_idx: which IB to patch + * + */ +static int uvd_v7_0_ring_patch_cs_in_place(struct amdgpu_cs_parser *p, +					   uint32_t ib_idx) +{ +	struct amdgpu_ib *ib = &p->job->ibs[ib_idx]; +	unsigned i; + +	/* No patching necessary for the first instance */ +	if (!p->ring->me) +		return 0; + +	for (i = 0; i < ib->length_dw; i += 2) { +		uint32_t reg = amdgpu_get_ib_value(p, ib_idx, i); + +		reg -= p->adev->reg_offset[UVD_HWIP][0][1]; +		reg += p->adev->reg_offset[UVD_HWIP][1][1]; + +		amdgpu_set_ib_value(p, ib_idx, i, reg); +	} +	return 0; +} + +/**   * uvd_v7_0_ring_emit_ib - execute indirect buffer   *   * @ring: amdgpu_ring pointer @@ -1718,6 +1774,7 @@ static const struct amdgpu_ring_funcs uvd_v7_0_ring_vm_funcs = {  	.get_rptr = uvd_v7_0_ring_get_rptr,  	.get_wptr = uvd_v7_0_ring_get_wptr,  	.set_wptr = uvd_v7_0_ring_set_wptr, +	.patch_cs_in_place = uvd_v7_0_ring_patch_cs_in_place,  	.emit_frame_size =  		6 + /* hdp invalidate */  		SOC15_FLUSH_GPU_TLB_NUM_WREG * 6 + @@ -1777,6 +1834,8 @@ static void uvd_v7_0_set_ring_funcs(struct amdgpu_device *adev)  	int i;  	for (i = 0; i < adev->uvd.num_uvd_inst; i++) { +		if (adev->uvd.harvest_config & (1 << i)) +			continue;  		adev->uvd.inst[i].ring.funcs = &uvd_v7_0_ring_vm_funcs;  		adev->uvd.inst[i].ring.me = i;  		DRM_INFO("UVD(%d) is enabled in VM mode\n", i); @@ -1788,6 +1847,8 @@ static void uvd_v7_0_set_enc_ring_funcs(struct amdgpu_device *adev)  	int i, j;  	for (j = 0; j < adev->uvd.num_uvd_inst; j++) { +		if (adev->uvd.harvest_config & (1 << j)) +			continue;  		for (i = 0; i < adev->uvd.num_enc_rings; ++i) {  			adev->uvd.inst[j].ring_enc[i].funcs = &uvd_v7_0_enc_ring_vm_funcs;  			adev->uvd.inst[j].ring_enc[i].me = j; @@ -1807,6 +1868,8 @@ static void uvd_v7_0_set_irq_funcs(struct amdgpu_device *adev)  	int i;  	for (i = 0; i < adev->uvd.num_uvd_inst; i++) { +		if (adev->uvd.harvest_config & (1 << i)) +			continue;  		adev->uvd.inst[i].irq.num_types = adev->uvd.num_enc_rings + 1;  		adev->uvd.inst[i].irq.funcs = &uvd_v7_0_irq_funcs;  	} diff --git a/drivers/gpu/drm/amd/amdgpu/vce_v2_0.c b/drivers/gpu/drm/amd/amdgpu/vce_v2_0.c index 47f70827195b..7eaa54ba016b 100644 --- a/drivers/gpu/drm/amd/amdgpu/vce_v2_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vce_v2_0.c @@ -56,7 +56,7 @@ static uint64_t vce_v2_0_ring_get_rptr(struct amdgpu_ring *ring)  {  	struct amdgpu_device *adev = ring->adev; -	if (ring == &adev->vce.ring[0]) +	if (ring->me == 0)  		return RREG32(mmVCE_RB_RPTR);  	else  		return RREG32(mmVCE_RB_RPTR2); @@ -73,7 +73,7 @@ static uint64_t vce_v2_0_ring_get_wptr(struct amdgpu_ring *ring)  {  	struct amdgpu_device *adev = ring->adev; -	if (ring == &adev->vce.ring[0]) +	if (ring->me == 0)  		return RREG32(mmVCE_RB_WPTR);  	else  		return RREG32(mmVCE_RB_WPTR2); @@ -90,7 +90,7 @@ static void vce_v2_0_ring_set_wptr(struct amdgpu_ring *ring)  {  	struct amdgpu_device *adev = ring->adev; -	if (ring == &adev->vce.ring[0]) +	if (ring->me == 0)  		WREG32(mmVCE_RB_WPTR, lower_32_bits(ring->wptr));  	else  		WREG32(mmVCE_RB_WPTR2, lower_32_bits(ring->wptr)); @@ -439,6 +439,8 @@ static int vce_v2_0_sw_init(void *handle)  			return r;  	} +	r = amdgpu_vce_entity_init(adev); +  	return r;  } @@ -627,8 +629,10 @@ static void vce_v2_0_set_ring_funcs(struct amdgpu_device *adev)  {  	int i; -	for (i = 0; i < adev->vce.num_rings; i++) +	for (i = 0; i < adev->vce.num_rings; i++) {  		adev->vce.ring[i].funcs = &vce_v2_0_ring_funcs; +		adev->vce.ring[i].me = i; +	}  }  static const struct amdgpu_irq_src_funcs vce_v2_0_irq_funcs = { diff --git a/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c b/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c index a71b97519cc0..c8390f9adfd6 100644 --- a/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c @@ -39,6 +39,7 @@  #include "smu/smu_7_1_2_sh_mask.h"  #include "gca/gfx_8_0_d.h"  #include "gca/gfx_8_0_sh_mask.h" +#include "ivsrcid/ivsrcid_vislands30.h"  #define GRBM_GFX_INDEX__VCE_INSTANCE__SHIFT	0x04 @@ -86,9 +87,9 @@ static uint64_t vce_v3_0_ring_get_rptr(struct amdgpu_ring *ring)  	else if (adev->vce.harvest_config == AMDGPU_VCE_HARVEST_VCE0)  		WREG32(mmGRBM_GFX_INDEX, GET_VCE_INSTANCE(1)); -	if (ring == &adev->vce.ring[0]) +	if (ring->me == 0)  		v = RREG32(mmVCE_RB_RPTR); -	else if (ring == &adev->vce.ring[1]) +	else if (ring->me == 1)  		v = RREG32(mmVCE_RB_RPTR2);  	else  		v = RREG32(mmVCE_RB_RPTR3); @@ -118,9 +119,9 @@ static uint64_t vce_v3_0_ring_get_wptr(struct amdgpu_ring *ring)  	else if (adev->vce.harvest_config == AMDGPU_VCE_HARVEST_VCE0)  		WREG32(mmGRBM_GFX_INDEX, GET_VCE_INSTANCE(1)); -	if (ring == &adev->vce.ring[0]) +	if (ring->me == 0)  		v = RREG32(mmVCE_RB_WPTR); -	else if (ring == &adev->vce.ring[1]) +	else if (ring->me == 1)  		v = RREG32(mmVCE_RB_WPTR2);  	else  		v = RREG32(mmVCE_RB_WPTR3); @@ -149,9 +150,9 @@ static void vce_v3_0_ring_set_wptr(struct amdgpu_ring *ring)  	else if (adev->vce.harvest_config == AMDGPU_VCE_HARVEST_VCE0)  		WREG32(mmGRBM_GFX_INDEX, GET_VCE_INSTANCE(1)); -	if (ring == &adev->vce.ring[0]) +	if (ring->me == 0)  		WREG32(mmVCE_RB_WPTR, lower_32_bits(ring->wptr)); -	else if (ring == &adev->vce.ring[1]) +	else if (ring->me == 1)  		WREG32(mmVCE_RB_WPTR2, lower_32_bits(ring->wptr));  	else  		WREG32(mmVCE_RB_WPTR3, lower_32_bits(ring->wptr)); @@ -422,7 +423,7 @@ static int vce_v3_0_sw_init(void *handle)  	int r, i;  	/* VCE */ -	r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 167, &adev->vce.irq); +	r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, VISLANDS30_IV_SRCID_VCE_TRAP, &adev->vce.irq);  	if (r)  		return r; @@ -447,6 +448,8 @@ static int vce_v3_0_sw_init(void *handle)  			return r;  	} +	r = amdgpu_vce_entity_init(adev); +  	return r;  } @@ -942,12 +945,16 @@ static void vce_v3_0_set_ring_funcs(struct amdgpu_device *adev)  	int i;  	if (adev->asic_type >= CHIP_STONEY) { -		for (i = 0; i < adev->vce.num_rings; i++) +		for (i = 0; i < adev->vce.num_rings; i++) {  			adev->vce.ring[i].funcs = &vce_v3_0_ring_vm_funcs; +			adev->vce.ring[i].me = i; +		}  		DRM_INFO("VCE enabled in VM mode\n");  	} else { -		for (i = 0; i < adev->vce.num_rings; i++) +		for (i = 0; i < adev->vce.num_rings; i++) {  			adev->vce.ring[i].funcs = &vce_v3_0_ring_phys_funcs; +			adev->vce.ring[i].me = i; +		}  		DRM_INFO("VCE enabled in physical mode\n");  	}  } diff --git a/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c b/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c index 8fd1b742985a..2e4d1b5f6243 100644 --- a/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c @@ -39,6 +39,8 @@  #include "mmhub/mmhub_1_0_offset.h"  #include "mmhub/mmhub_1_0_sh_mask.h" +#include "ivsrcid/vce/irqsrcs_vce_4_0.h" +  #define VCE_STATUS_VCPU_REPORT_FW_LOADED_MASK	0x02  #define VCE_V4_0_FW_SIZE	(384 * 1024) @@ -60,9 +62,9 @@ static uint64_t vce_v4_0_ring_get_rptr(struct amdgpu_ring *ring)  {  	struct amdgpu_device *adev = ring->adev; -	if (ring == &adev->vce.ring[0]) +	if (ring->me == 0)  		return RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_RPTR)); -	else if (ring == &adev->vce.ring[1]) +	else if (ring->me == 1)  		return RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_RPTR2));  	else  		return RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_RPTR3)); @@ -82,9 +84,9 @@ static uint64_t vce_v4_0_ring_get_wptr(struct amdgpu_ring *ring)  	if (ring->use_doorbell)  		return adev->wb.wb[ring->wptr_offs]; -	if (ring == &adev->vce.ring[0]) +	if (ring->me == 0)  		return RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR)); -	else if (ring == &adev->vce.ring[1]) +	else if (ring->me == 1)  		return RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR2));  	else  		return RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR3)); @@ -108,10 +110,10 @@ static void vce_v4_0_ring_set_wptr(struct amdgpu_ring *ring)  		return;  	} -	if (ring == &adev->vce.ring[0]) +	if (ring->me == 0)  		WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR),  			lower_32_bits(ring->wptr)); -	else if (ring == &adev->vce.ring[1]) +	else if (ring->me == 1)  		WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR2),  			lower_32_bits(ring->wptr));  	else @@ -417,6 +419,7 @@ static int vce_v4_0_sw_init(void *handle)  {  	struct amdgpu_device *adev = (struct amdgpu_device *)handle;  	struct amdgpu_ring *ring; +  	unsigned size;  	int r, i; @@ -436,7 +439,7 @@ static int vce_v4_0_sw_init(void *handle)  		const struct common_firmware_header *hdr;  		unsigned size = amdgpu_bo_size(adev->vce.vcpu_bo); -		adev->vce.saved_bo = kmalloc(size, GFP_KERNEL); +		adev->vce.saved_bo = kvmalloc(size, GFP_KERNEL);  		if (!adev->vce.saved_bo)  			return -ENOMEM; @@ -472,6 +475,11 @@ static int vce_v4_0_sw_init(void *handle)  			return r;  	} + +	r = amdgpu_vce_entity_init(adev); +	if (r) +		return r; +  	r = amdgpu_virt_alloc_mm_table(adev);  	if (r)  		return r; @@ -488,7 +496,7 @@ static int vce_v4_0_sw_fini(void *handle)  	amdgpu_virt_free_mm_table(adev);  	if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) { -		kfree(adev->vce.saved_bo); +		kvfree(adev->vce.saved_bo);  		adev->vce.saved_bo = NULL;  	} @@ -1088,8 +1096,10 @@ static void vce_v4_0_set_ring_funcs(struct amdgpu_device *adev)  {  	int i; -	for (i = 0; i < adev->vce.num_rings; i++) +	for (i = 0; i < adev->vce.num_rings; i++) {  		adev->vce.ring[i].funcs = &vce_v4_0_ring_vm_funcs; +		adev->vce.ring[i].me = i; +	}  	DRM_INFO("VCE enabled in VM mode\n");  } diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c b/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c index 29684c3ea4ef..072371ef5975 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c @@ -35,10 +35,14 @@  #include "mmhub/mmhub_9_1_offset.h"  #include "mmhub/mmhub_9_1_sh_mask.h" +#include "ivsrcid/vcn/irqsrcs_vcn_1_0.h" +  static int vcn_v1_0_stop(struct amdgpu_device *adev);  static void vcn_v1_0_set_dec_ring_funcs(struct amdgpu_device *adev);  static void vcn_v1_0_set_enc_ring_funcs(struct amdgpu_device *adev); +static void vcn_v1_0_set_jpeg_ring_funcs(struct amdgpu_device *adev);  static void vcn_v1_0_set_irq_funcs(struct amdgpu_device *adev); +static void vcn_v1_0_jpeg_ring_set_patch_ring(struct amdgpu_ring *ring, uint32_t ptr);  /**   * vcn_v1_0_early_init - set function pointers @@ -55,6 +59,7 @@ static int vcn_v1_0_early_init(void *handle)  	vcn_v1_0_set_dec_ring_funcs(adev);  	vcn_v1_0_set_enc_ring_funcs(adev); +	vcn_v1_0_set_jpeg_ring_funcs(adev);  	vcn_v1_0_set_irq_funcs(adev);  	return 0; @@ -74,22 +79,37 @@ static int vcn_v1_0_sw_init(void *handle)  	struct amdgpu_device *adev = (struct amdgpu_device *)handle;  	/* VCN DEC TRAP */ -	r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_VCN, 124, &adev->vcn.irq); +	r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_VCN, VCN_1_0__SRCID__UVD_SYSTEM_MESSAGE_INTERRUPT, &adev->vcn.irq);  	if (r)  		return r;  	/* VCN ENC TRAP */  	for (i = 0; i < adev->vcn.num_enc_rings; ++i) { -		r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_VCN, i + 119, +		r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_VCN, i + VCN_1_0__SRCID__UVD_ENC_GENERAL_PURPOSE,  					&adev->vcn.irq);  		if (r)  			return r;  	} +	/* VCN JPEG TRAP */ +	r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_VCN, 126, &adev->vcn.irq); +	if (r) +		return r; +  	r = amdgpu_vcn_sw_init(adev);  	if (r)  		return r; +	if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) { +		const struct common_firmware_header *hdr; +		hdr = (const struct common_firmware_header *)adev->vcn.fw->data; +		adev->firmware.ucode[AMDGPU_UCODE_ID_VCN].ucode_id = AMDGPU_UCODE_ID_VCN; +		adev->firmware.ucode[AMDGPU_UCODE_ID_VCN].fw = adev->vcn.fw; +		adev->firmware.fw_size += +			ALIGN(le32_to_cpu(hdr->ucode_size_bytes), PAGE_SIZE); +		DRM_INFO("PSP loading VCN firmware\n"); +	} +  	r = amdgpu_vcn_resume(adev);  	if (r)  		return r; @@ -108,6 +128,12 @@ static int vcn_v1_0_sw_init(void *handle)  			return r;  	} +	ring = &adev->vcn.ring_jpeg; +	sprintf(ring->name, "vcn_jpeg"); +	r = amdgpu_ring_init(adev, ring, 512, &adev->vcn.irq, 0); +	if (r) +		return r; +  	return r;  } @@ -162,6 +188,14 @@ static int vcn_v1_0_hw_init(void *handle)  		}  	} +	ring = &adev->vcn.ring_jpeg; +	ring->ready = true; +	r = amdgpu_ring_test_ring(ring); +	if (r) { +		ring->ready = false; +		goto done; +	} +  done:  	if (!r)  		DRM_INFO("VCN decode and encode initialized successfully.\n"); @@ -241,26 +275,38 @@ static int vcn_v1_0_resume(void *handle)  static void vcn_v1_0_mc_resume(struct amdgpu_device *adev)  {  	uint32_t size = AMDGPU_GPU_PAGE_ALIGN(adev->vcn.fw->size + 4); - -	WREG32_SOC15(UVD, 0, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW, +	uint32_t offset; + +	if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) { +		WREG32_SOC15(UVD, 0, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW, +			     (adev->firmware.ucode[AMDGPU_UCODE_ID_VCN].tmr_mc_addr_lo)); +		WREG32_SOC15(UVD, 0, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH, +			     (adev->firmware.ucode[AMDGPU_UCODE_ID_VCN].tmr_mc_addr_hi)); +		WREG32_SOC15(UVD, 0, mmUVD_VCPU_CACHE_OFFSET0, 0); +		offset = 0; +	} else { +		WREG32_SOC15(UVD, 0, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW,  			lower_32_bits(adev->vcn.gpu_addr)); -	WREG32_SOC15(UVD, 0, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH, +		WREG32_SOC15(UVD, 0, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH,  			upper_32_bits(adev->vcn.gpu_addr)); -	WREG32_SOC15(UVD, 0, mmUVD_VCPU_CACHE_OFFSET0, -				AMDGPU_UVD_FIRMWARE_OFFSET >> 3); +		offset = size; +		WREG32_SOC15(UVD, 0, mmUVD_VCPU_CACHE_OFFSET0, +			     AMDGPU_UVD_FIRMWARE_OFFSET >> 3); +	} +  	WREG32_SOC15(UVD, 0, mmUVD_VCPU_CACHE_SIZE0, size);  	WREG32_SOC15(UVD, 0, mmUVD_LMI_VCPU_CACHE1_64BIT_BAR_LOW, -			lower_32_bits(adev->vcn.gpu_addr + size)); +		     lower_32_bits(adev->vcn.gpu_addr + offset));  	WREG32_SOC15(UVD, 0, mmUVD_LMI_VCPU_CACHE1_64BIT_BAR_HIGH, -			upper_32_bits(adev->vcn.gpu_addr + size)); +		     upper_32_bits(adev->vcn.gpu_addr + offset));  	WREG32_SOC15(UVD, 0, mmUVD_VCPU_CACHE_OFFSET1, 0);  	WREG32_SOC15(UVD, 0, mmUVD_VCPU_CACHE_SIZE1, AMDGPU_VCN_HEAP_SIZE);  	WREG32_SOC15(UVD, 0, mmUVD_LMI_VCPU_CACHE2_64BIT_BAR_LOW, -			lower_32_bits(adev->vcn.gpu_addr + size + AMDGPU_VCN_HEAP_SIZE)); +		     lower_32_bits(adev->vcn.gpu_addr + offset + AMDGPU_VCN_HEAP_SIZE));  	WREG32_SOC15(UVD, 0, mmUVD_LMI_VCPU_CACHE2_64BIT_BAR_HIGH, -			upper_32_bits(adev->vcn.gpu_addr + size + AMDGPU_VCN_HEAP_SIZE)); +		     upper_32_bits(adev->vcn.gpu_addr + offset + AMDGPU_VCN_HEAP_SIZE));  	WREG32_SOC15(UVD, 0, mmUVD_VCPU_CACHE_OFFSET2, 0);  	WREG32_SOC15(UVD, 0, mmUVD_VCPU_CACHE_SIZE2,  			AMDGPU_VCN_STACK_SIZE + (AMDGPU_VCN_SESSION_SIZE * 40)); @@ -578,12 +624,12 @@ static int vcn_v1_0_start(struct amdgpu_device *adev)  	/* disable byte swapping */  	lmi_swap_cntl = 0; -	vcn_v1_0_mc_resume(adev); -  	vcn_1_0_disable_static_power_gating(adev);  	/* disable clock gating */  	vcn_v1_0_disable_clock_gating(adev); +	vcn_v1_0_mc_resume(adev); +  	/* disable interupt */  	WREG32_P(SOC15_REG_OFFSET(UVD, 0, mmUVD_MASTINT_EN), 0,  			~UVD_MASTINT_EN__VCPU_EN_MASK); @@ -729,6 +775,22 @@ static int vcn_v1_0_start(struct amdgpu_device *adev)  	WREG32_SOC15(UVD, 0, mmUVD_RB_BASE_HI2, upper_32_bits(ring->gpu_addr));  	WREG32_SOC15(UVD, 0, mmUVD_RB_SIZE2, ring->ring_size / 4); +	ring = &adev->vcn.ring_jpeg; +	WREG32_SOC15(UVD, 0, mmUVD_LMI_JRBC_RB_VMID, 0); +	WREG32_SOC15(UVD, 0, mmUVD_JRBC_RB_CNTL, (0x00000001L | 0x00000002L)); +	WREG32_SOC15(UVD, 0, mmUVD_LMI_JRBC_RB_64BIT_BAR_LOW, lower_32_bits(ring->gpu_addr)); +	WREG32_SOC15(UVD, 0, mmUVD_LMI_JRBC_RB_64BIT_BAR_HIGH, upper_32_bits(ring->gpu_addr)); +	WREG32_SOC15(UVD, 0, mmUVD_JRBC_RB_RPTR, 0); +	WREG32_SOC15(UVD, 0, mmUVD_JRBC_RB_WPTR, 0); +	WREG32_SOC15(UVD, 0, mmUVD_JRBC_RB_CNTL, 0x00000002L); + +	/* initialize wptr */ +	ring->wptr = RREG32_SOC15(UVD, 0, mmUVD_JRBC_RB_WPTR); + +	/* copy patch commands to the jpeg ring */ +	vcn_v1_0_jpeg_ring_set_patch_ring(ring, +		(ring->wptr + ring->max_dw * amdgpu_sched_hw_submission)); +  	return 0;  } @@ -1126,6 +1188,383 @@ static void vcn_v1_0_enc_ring_emit_wreg(struct amdgpu_ring *ring,  	amdgpu_ring_write(ring, val);  } + +/** + * vcn_v1_0_jpeg_ring_get_rptr - get read pointer + * + * @ring: amdgpu_ring pointer + * + * Returns the current hardware read pointer + */ +static uint64_t vcn_v1_0_jpeg_ring_get_rptr(struct amdgpu_ring *ring) +{ +	struct amdgpu_device *adev = ring->adev; + +	return RREG32_SOC15(UVD, 0, mmUVD_JRBC_RB_RPTR); +} + +/** + * vcn_v1_0_jpeg_ring_get_wptr - get write pointer + * + * @ring: amdgpu_ring pointer + * + * Returns the current hardware write pointer + */ +static uint64_t vcn_v1_0_jpeg_ring_get_wptr(struct amdgpu_ring *ring) +{ +	struct amdgpu_device *adev = ring->adev; + +	return RREG32_SOC15(UVD, 0, mmUVD_JRBC_RB_WPTR); +} + +/** + * vcn_v1_0_jpeg_ring_set_wptr - set write pointer + * + * @ring: amdgpu_ring pointer + * + * Commits the write pointer to the hardware + */ +static void vcn_v1_0_jpeg_ring_set_wptr(struct amdgpu_ring *ring) +{ +	struct amdgpu_device *adev = ring->adev; + +	WREG32_SOC15(UVD, 0, mmUVD_JRBC_RB_WPTR, lower_32_bits(ring->wptr)); +} + +/** + * vcn_v1_0_jpeg_ring_insert_start - insert a start command + * + * @ring: amdgpu_ring pointer + * + * Write a start command to the ring. + */ +static void vcn_v1_0_jpeg_ring_insert_start(struct amdgpu_ring *ring) +{ +	struct amdgpu_device *adev = ring->adev; + +	amdgpu_ring_write(ring, +		PACKETJ(SOC15_REG_OFFSET(UVD, 0, mmUVD_JRBC_EXTERNAL_REG_BASE), 0, 0, PACKETJ_TYPE0)); +	amdgpu_ring_write(ring, 0x68e04); + +	amdgpu_ring_write(ring, PACKETJ(0, 0, 0, PACKETJ_TYPE0)); +	amdgpu_ring_write(ring, 0x80010000); +} + +/** + * vcn_v1_0_jpeg_ring_insert_end - insert a end command + * + * @ring: amdgpu_ring pointer + * + * Write a end command to the ring. + */ +static void vcn_v1_0_jpeg_ring_insert_end(struct amdgpu_ring *ring) +{ +	struct amdgpu_device *adev = ring->adev; + +	amdgpu_ring_write(ring, +		PACKETJ(SOC15_REG_OFFSET(UVD, 0, mmUVD_JRBC_EXTERNAL_REG_BASE), 0, 0, PACKETJ_TYPE0)); +	amdgpu_ring_write(ring, 0x68e04); + +	amdgpu_ring_write(ring, PACKETJ(0, 0, 0, PACKETJ_TYPE0)); +	amdgpu_ring_write(ring, 0x00010000); +} + +/** + * vcn_v1_0_jpeg_ring_emit_fence - emit an fence & trap command + * + * @ring: amdgpu_ring pointer + * @fence: fence to emit + * + * Write a fence and a trap command to the ring. + */ +static void vcn_v1_0_jpeg_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u64 seq, +				     unsigned flags) +{ +	struct amdgpu_device *adev = ring->adev; + +	WARN_ON(flags & AMDGPU_FENCE_FLAG_64BIT); + +	amdgpu_ring_write(ring, +		PACKETJ(SOC15_REG_OFFSET(UVD, 0, mmUVD_JPEG_GPCOM_DATA0), 0, 0, PACKETJ_TYPE0)); +	amdgpu_ring_write(ring, seq); + +	amdgpu_ring_write(ring, +		PACKETJ(SOC15_REG_OFFSET(UVD, 0, mmUVD_JPEG_GPCOM_DATA1), 0, 0, PACKETJ_TYPE0)); +	amdgpu_ring_write(ring, seq); + +	amdgpu_ring_write(ring, +		PACKETJ(SOC15_REG_OFFSET(UVD, 0, mmUVD_LMI_JRBC_RB_MEM_WR_64BIT_BAR_LOW), 0, 0, PACKETJ_TYPE0)); +	amdgpu_ring_write(ring, lower_32_bits(addr)); + +	amdgpu_ring_write(ring, +		PACKETJ(SOC15_REG_OFFSET(UVD, 0, mmUVD_LMI_JRBC_RB_MEM_WR_64BIT_BAR_HIGH), 0, 0, PACKETJ_TYPE0)); +	amdgpu_ring_write(ring, upper_32_bits(addr)); + +	amdgpu_ring_write(ring, +		PACKETJ(SOC15_REG_OFFSET(UVD, 0, mmUVD_JPEG_GPCOM_CMD), 0, 0, PACKETJ_TYPE0)); +	amdgpu_ring_write(ring, 0x8); + +	amdgpu_ring_write(ring, +		PACKETJ(SOC15_REG_OFFSET(UVD, 0, mmUVD_JPEG_GPCOM_CMD), 0, PACKETJ_CONDITION_CHECK0, PACKETJ_TYPE4)); +	amdgpu_ring_write(ring, 0); + +	amdgpu_ring_write(ring, +		PACKETJ(SOC15_REG_OFFSET(UVD, 0, mmUVD_JRBC_RB_COND_RD_TIMER), 0, 0, PACKETJ_TYPE0)); +	amdgpu_ring_write(ring, 0x01400200); + +	amdgpu_ring_write(ring, +		PACKETJ(SOC15_REG_OFFSET(UVD, 0, mmUVD_JRBC_RB_REF_DATA), 0, 0, PACKETJ_TYPE0)); +	amdgpu_ring_write(ring, seq); + +	amdgpu_ring_write(ring, +		PACKETJ(SOC15_REG_OFFSET(UVD, 0, mmUVD_LMI_JRBC_RB_MEM_RD_64BIT_BAR_LOW), 0, 0, PACKETJ_TYPE0)); +	amdgpu_ring_write(ring, lower_32_bits(addr)); + +	amdgpu_ring_write(ring, +		PACKETJ(SOC15_REG_OFFSET(UVD, 0, mmUVD_LMI_JRBC_RB_MEM_RD_64BIT_BAR_HIGH), 0, 0, PACKETJ_TYPE0)); +	amdgpu_ring_write(ring, upper_32_bits(addr)); + +	amdgpu_ring_write(ring, +		PACKETJ(0, 0, PACKETJ_CONDITION_CHECK3, PACKETJ_TYPE2)); +	amdgpu_ring_write(ring, 0xffffffff); + +	amdgpu_ring_write(ring, +		PACKETJ(SOC15_REG_OFFSET(UVD, 0, mmUVD_JRBC_EXTERNAL_REG_BASE), 0, 0, PACKETJ_TYPE0)); +	amdgpu_ring_write(ring, 0x3fbc); + +	amdgpu_ring_write(ring, +		PACKETJ(0, 0, 0, PACKETJ_TYPE0)); +	amdgpu_ring_write(ring, 0x1); +} + +/** + * vcn_v1_0_jpeg_ring_emit_ib - execute indirect buffer + * + * @ring: amdgpu_ring pointer + * @ib: indirect buffer to execute + * + * Write ring commands to execute the indirect buffer. + */ +static void vcn_v1_0_jpeg_ring_emit_ib(struct amdgpu_ring *ring, +				  struct amdgpu_ib *ib, +				  unsigned vmid, bool ctx_switch) +{ +	struct amdgpu_device *adev = ring->adev; + +	amdgpu_ring_write(ring, +		PACKETJ(SOC15_REG_OFFSET(UVD, 0, mmUVD_LMI_JRBC_IB_VMID), 0, 0, PACKETJ_TYPE0)); +	amdgpu_ring_write(ring, (vmid | (vmid << 4))); + +	amdgpu_ring_write(ring, +		PACKETJ(SOC15_REG_OFFSET(UVD, 0, mmUVD_LMI_JPEG_VMID), 0, 0, PACKETJ_TYPE0)); +	amdgpu_ring_write(ring, (vmid | (vmid << 4))); + +	amdgpu_ring_write(ring, +		PACKETJ(SOC15_REG_OFFSET(UVD, 0, mmUVD_LMI_JRBC_IB_64BIT_BAR_LOW), 0, 0, PACKETJ_TYPE0)); +	amdgpu_ring_write(ring, lower_32_bits(ib->gpu_addr)); + +	amdgpu_ring_write(ring, +		PACKETJ(SOC15_REG_OFFSET(UVD, 0, mmUVD_LMI_JRBC_IB_64BIT_BAR_HIGH), 0, 0, PACKETJ_TYPE0)); +	amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr)); + +	amdgpu_ring_write(ring, +		PACKETJ(SOC15_REG_OFFSET(UVD, 0, mmUVD_JRBC_IB_SIZE), 0, 0, PACKETJ_TYPE0)); +	amdgpu_ring_write(ring, ib->length_dw); + +	amdgpu_ring_write(ring, +		PACKETJ(SOC15_REG_OFFSET(UVD, 0, mmUVD_LMI_JRBC_RB_MEM_RD_64BIT_BAR_LOW), 0, 0, PACKETJ_TYPE0)); +	amdgpu_ring_write(ring, lower_32_bits(ring->gpu_addr)); + +	amdgpu_ring_write(ring, +		PACKETJ(SOC15_REG_OFFSET(UVD, 0, mmUVD_LMI_JRBC_RB_MEM_RD_64BIT_BAR_HIGH), 0, 0, PACKETJ_TYPE0)); +	amdgpu_ring_write(ring, upper_32_bits(ring->gpu_addr)); + +	amdgpu_ring_write(ring, +		PACKETJ(0, 0, PACKETJ_CONDITION_CHECK0, PACKETJ_TYPE2)); +	amdgpu_ring_write(ring, 0); + +	amdgpu_ring_write(ring, +		PACKETJ(SOC15_REG_OFFSET(UVD, 0, mmUVD_JRBC_RB_COND_RD_TIMER), 0, 0, PACKETJ_TYPE0)); +	amdgpu_ring_write(ring, 0x01400200); + +	amdgpu_ring_write(ring, +		PACKETJ(SOC15_REG_OFFSET(UVD, 0, mmUVD_JRBC_RB_REF_DATA), 0, 0, PACKETJ_TYPE0)); +	amdgpu_ring_write(ring, 0x2); + +	amdgpu_ring_write(ring, +		PACKETJ(SOC15_REG_OFFSET(UVD, 0, mmUVD_JRBC_STATUS), 0, PACKETJ_CONDITION_CHECK3, PACKETJ_TYPE3)); +	amdgpu_ring_write(ring, 0x2); +} + +static void vcn_v1_0_jpeg_ring_emit_reg_wait(struct amdgpu_ring *ring, +					    uint32_t reg, uint32_t val, +					    uint32_t mask) +{ +	struct amdgpu_device *adev = ring->adev; +	uint32_t reg_offset = (reg << 2); + +	amdgpu_ring_write(ring, +		PACKETJ(SOC15_REG_OFFSET(UVD, 0, mmUVD_JRBC_RB_COND_RD_TIMER), 0, 0, PACKETJ_TYPE0)); +	amdgpu_ring_write(ring, 0x01400200); + +	amdgpu_ring_write(ring, +		PACKETJ(SOC15_REG_OFFSET(UVD, 0, mmUVD_JRBC_RB_REF_DATA), 0, 0, PACKETJ_TYPE0)); +	amdgpu_ring_write(ring, val); + +	amdgpu_ring_write(ring, +		PACKETJ(SOC15_REG_OFFSET(UVD, 0, mmUVD_JRBC_EXTERNAL_REG_BASE), 0, 0, PACKETJ_TYPE0)); +	if (((reg_offset >= 0x1f800) && (reg_offset <= 0x21fff)) || +		((reg_offset >= 0x1e000) && (reg_offset <= 0x1e1ff))) { +		amdgpu_ring_write(ring, 0); +		amdgpu_ring_write(ring, +			PACKETJ((reg_offset >> 2), 0, 0, PACKETJ_TYPE3)); +	} else { +		amdgpu_ring_write(ring, reg_offset); +		amdgpu_ring_write(ring, +			PACKETJ(0, 0, 0, PACKETJ_TYPE3)); +	} +	amdgpu_ring_write(ring, mask); +} + +static void vcn_v1_0_jpeg_ring_emit_vm_flush(struct amdgpu_ring *ring, +		unsigned vmid, uint64_t pd_addr) +{ +	struct amdgpu_vmhub *hub = &ring->adev->vmhub[ring->funcs->vmhub]; +	uint32_t data0, data1, mask; + +	pd_addr = amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr); + +	/* wait for register write */ +	data0 = hub->ctx0_ptb_addr_lo32 + vmid * 2; +	data1 = lower_32_bits(pd_addr); +	mask = 0xffffffff; +	vcn_v1_0_jpeg_ring_emit_reg_wait(ring, data0, data1, mask); +} + +static void vcn_v1_0_jpeg_ring_emit_wreg(struct amdgpu_ring *ring, +					uint32_t reg, uint32_t val) +{ +	struct amdgpu_device *adev = ring->adev; +	uint32_t reg_offset = (reg << 2); + +	amdgpu_ring_write(ring, +		PACKETJ(SOC15_REG_OFFSET(UVD, 0, mmUVD_JRBC_EXTERNAL_REG_BASE), 0, 0, PACKETJ_TYPE0)); +	if (((reg_offset >= 0x1f800) && (reg_offset <= 0x21fff)) || +			((reg_offset >= 0x1e000) && (reg_offset <= 0x1e1ff))) { +		amdgpu_ring_write(ring, 0); +		amdgpu_ring_write(ring, +			PACKETJ((reg_offset >> 2), 0, 0, PACKETJ_TYPE0)); +	} else { +		amdgpu_ring_write(ring, reg_offset); +		amdgpu_ring_write(ring, +			PACKETJ(0, 0, 0, PACKETJ_TYPE0)); +	} +	amdgpu_ring_write(ring, val); +} + +static void vcn_v1_0_jpeg_ring_nop(struct amdgpu_ring *ring, uint32_t count) +{ +	int i; + +	WARN_ON(ring->wptr % 2 || count % 2); + +	for (i = 0; i < count / 2; i++) { +		amdgpu_ring_write(ring, PACKETJ(0, 0, 0, PACKETJ_TYPE6)); +		amdgpu_ring_write(ring, 0); +	} +} + +static void vcn_v1_0_jpeg_ring_patch_wreg(struct amdgpu_ring *ring, uint32_t *ptr, uint32_t reg_offset, uint32_t val) +{ +	struct amdgpu_device *adev = ring->adev; +	ring->ring[(*ptr)++] = PACKETJ(SOC15_REG_OFFSET(UVD, 0, mmUVD_JRBC_EXTERNAL_REG_BASE), 0, 0, PACKETJ_TYPE0); +	if (((reg_offset >= 0x1f800) && (reg_offset <= 0x21fff)) || +		((reg_offset >= 0x1e000) && (reg_offset <= 0x1e1ff))) { +		ring->ring[(*ptr)++] = 0; +		ring->ring[(*ptr)++] = PACKETJ((reg_offset >> 2), 0, 0, PACKETJ_TYPE0); +	} else { +		ring->ring[(*ptr)++] = reg_offset; +		ring->ring[(*ptr)++] = PACKETJ(0, 0, 0, PACKETJ_TYPE0); +	} +	ring->ring[(*ptr)++] = val; +} + +static void vcn_v1_0_jpeg_ring_set_patch_ring(struct amdgpu_ring *ring, uint32_t ptr) +{ +	struct amdgpu_device *adev = ring->adev; + +	uint32_t reg, reg_offset, val, mask, i; + +	// 1st: program mmUVD_LMI_JRBC_RB_MEM_RD_64BIT_BAR_LOW +	reg = SOC15_REG_OFFSET(UVD, 0, mmUVD_LMI_JRBC_RB_MEM_RD_64BIT_BAR_LOW); +	reg_offset = (reg << 2); +	val = lower_32_bits(ring->gpu_addr); +	vcn_v1_0_jpeg_ring_patch_wreg(ring, &ptr, reg_offset, val); + +	// 2nd: program mmUVD_LMI_JRBC_RB_MEM_RD_64BIT_BAR_HIGH +	reg = SOC15_REG_OFFSET(UVD, 0, mmUVD_LMI_JRBC_RB_MEM_RD_64BIT_BAR_HIGH); +	reg_offset = (reg << 2); +	val = upper_32_bits(ring->gpu_addr); +	vcn_v1_0_jpeg_ring_patch_wreg(ring, &ptr, reg_offset, val); + +	// 3rd to 5th: issue MEM_READ commands +	for (i = 0; i <= 2; i++) { +		ring->ring[ptr++] = PACKETJ(0, 0, 0, PACKETJ_TYPE2); +		ring->ring[ptr++] = 0; +	} + +	// 6th: program mmUVD_JRBC_RB_CNTL register to enable NO_FETCH and RPTR write ability +	reg = SOC15_REG_OFFSET(UVD, 0, mmUVD_JRBC_RB_CNTL); +	reg_offset = (reg << 2); +	val = 0x13; +	vcn_v1_0_jpeg_ring_patch_wreg(ring, &ptr, reg_offset, val); + +	// 7th: program mmUVD_JRBC_RB_REF_DATA +	reg = SOC15_REG_OFFSET(UVD, 0, mmUVD_JRBC_RB_REF_DATA); +	reg_offset = (reg << 2); +	val = 0x1; +	vcn_v1_0_jpeg_ring_patch_wreg(ring, &ptr, reg_offset, val); + +	// 8th: issue conditional register read mmUVD_JRBC_RB_CNTL +	reg = SOC15_REG_OFFSET(UVD, 0, mmUVD_JRBC_RB_CNTL); +	reg_offset = (reg << 2); +	val = 0x1; +	mask = 0x1; + +	ring->ring[ptr++] = PACKETJ(SOC15_REG_OFFSET(UVD, 0, mmUVD_JRBC_RB_COND_RD_TIMER), 0, 0, PACKETJ_TYPE0); +	ring->ring[ptr++] = 0x01400200; +	ring->ring[ptr++] = PACKETJ(SOC15_REG_OFFSET(UVD, 0, mmUVD_JRBC_RB_REF_DATA), 0, 0, PACKETJ_TYPE0); +	ring->ring[ptr++] = val; +	ring->ring[ptr++] = PACKETJ(SOC15_REG_OFFSET(UVD, 0, mmUVD_JRBC_EXTERNAL_REG_BASE), 0, 0, PACKETJ_TYPE0); +	if (((reg_offset >= 0x1f800) && (reg_offset <= 0x21fff)) || +		((reg_offset >= 0x1e000) && (reg_offset <= 0x1e1ff))) { +		ring->ring[ptr++] = 0; +		ring->ring[ptr++] = PACKETJ((reg_offset >> 2), 0, 0, PACKETJ_TYPE3); +	} else { +		ring->ring[ptr++] = reg_offset; +		ring->ring[ptr++] = PACKETJ(0, 0, 0, PACKETJ_TYPE3); +	} +	ring->ring[ptr++] = mask; + +	//9th to 21st: insert no-op +	for (i = 0; i <= 12; i++) { +		ring->ring[ptr++] = PACKETJ(0, 0, 0, PACKETJ_TYPE6); +		ring->ring[ptr++] = 0; +	} + +	//22nd: reset mmUVD_JRBC_RB_RPTR +	reg = SOC15_REG_OFFSET(UVD, 0, mmUVD_JRBC_RB_RPTR); +	reg_offset = (reg << 2); +	val = 0; +	vcn_v1_0_jpeg_ring_patch_wreg(ring, &ptr, reg_offset, val); + +	//23rd: program mmUVD_JRBC_RB_CNTL to disable no_fetch +	reg = SOC15_REG_OFFSET(UVD, 0, mmUVD_JRBC_RB_CNTL); +	reg_offset = (reg << 2); +	val = 0x12; +	vcn_v1_0_jpeg_ring_patch_wreg(ring, &ptr, reg_offset, val); +} +  static int vcn_v1_0_set_interrupt_state(struct amdgpu_device *adev,  					struct amdgpu_irq_src *source,  					unsigned type, @@ -1150,6 +1589,9 @@ static int vcn_v1_0_process_interrupt(struct amdgpu_device *adev,  	case 120:  		amdgpu_fence_process(&adev->vcn.ring_enc[1]);  		break; +	case 126: +		amdgpu_fence_process(&adev->vcn.ring_jpeg); +		break;  	default:  		DRM_ERROR("Unhandled interrupt: %d %d\n",  			  entry->src_id, entry->src_data[0]); @@ -1273,6 +1715,39 @@ static const struct amdgpu_ring_funcs vcn_v1_0_enc_ring_vm_funcs = {  	.emit_reg_write_reg_wait = amdgpu_ring_emit_reg_write_reg_wait_helper,  }; +static const struct amdgpu_ring_funcs vcn_v1_0_jpeg_ring_vm_funcs = { +	.type = AMDGPU_RING_TYPE_VCN_JPEG, +	.align_mask = 0xf, +	.nop = PACKET0(0x81ff, 0), +	.support_64bit_ptrs = false, +	.vmhub = AMDGPU_MMHUB, +	.extra_dw = 64, +	.get_rptr = vcn_v1_0_jpeg_ring_get_rptr, +	.get_wptr = vcn_v1_0_jpeg_ring_get_wptr, +	.set_wptr = vcn_v1_0_jpeg_ring_set_wptr, +	.emit_frame_size = +		6 + 6 + /* hdp invalidate / flush */ +		SOC15_FLUSH_GPU_TLB_NUM_WREG * 6 + +		SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 8 + +		8 + /* vcn_v1_0_dec_ring_emit_vm_flush */ +		14 + 14 + /* vcn_v1_0_dec_ring_emit_fence x2 vm fence */ +		6, +	.emit_ib_size = 22, /* vcn_v1_0_dec_ring_emit_ib */ +	.emit_ib = vcn_v1_0_jpeg_ring_emit_ib, +	.emit_fence = vcn_v1_0_jpeg_ring_emit_fence, +	.emit_vm_flush = vcn_v1_0_jpeg_ring_emit_vm_flush, +	.test_ring = amdgpu_vcn_jpeg_ring_test_ring, +	.test_ib = amdgpu_vcn_jpeg_ring_test_ib, +	.insert_nop = vcn_v1_0_jpeg_ring_nop, +	.insert_start = vcn_v1_0_jpeg_ring_insert_start, +	.insert_end = vcn_v1_0_jpeg_ring_insert_end, +	.pad_ib = amdgpu_ring_generic_pad_ib, +	.begin_use = amdgpu_vcn_ring_begin_use, +	.end_use = amdgpu_vcn_ring_end_use, +	.emit_wreg = vcn_v1_0_jpeg_ring_emit_wreg, +	.emit_reg_wait = vcn_v1_0_jpeg_ring_emit_reg_wait, +}; +  static void vcn_v1_0_set_dec_ring_funcs(struct amdgpu_device *adev)  {  	adev->vcn.ring_dec.funcs = &vcn_v1_0_dec_ring_vm_funcs; @@ -1289,6 +1764,12 @@ static void vcn_v1_0_set_enc_ring_funcs(struct amdgpu_device *adev)  	DRM_INFO("VCN encode is enabled in VM mode\n");  } +static void vcn_v1_0_set_jpeg_ring_funcs(struct amdgpu_device *adev) +{ +	adev->vcn.ring_jpeg.funcs = &vcn_v1_0_jpeg_ring_vm_funcs; +	DRM_INFO("VCN jpeg decode is enabled in VM mode\n"); +} +  static const struct amdgpu_irq_src_funcs vcn_v1_0_irq_funcs = {  	.set = vcn_v1_0_set_interrupt_state,  	.process = vcn_v1_0_process_interrupt, diff --git a/drivers/gpu/drm/amd/amdgpu/vega10_reg_init.c b/drivers/gpu/drm/amd/amdgpu/vega10_reg_init.c index 45aafca7f315..c5c9b2bc190d 100644 --- a/drivers/gpu/drm/amd/amdgpu/vega10_reg_init.c +++ b/drivers/gpu/drm/amd/amdgpu/vega10_reg_init.c @@ -51,6 +51,7 @@ int vega10_reg_base_init(struct amdgpu_device *adev)  		adev->reg_offset[PWR_HWIP][i] = (uint32_t *)(&(PWR_BASE.instance[i]));  		adev->reg_offset[NBIF_HWIP][i] = (uint32_t *)(&(NBIF_BASE.instance[i]));  		adev->reg_offset[THM_HWIP][i] = (uint32_t *)(&(THM_BASE.instance[i])); +		adev->reg_offset[CLK_HWIP][i] = (uint32_t *)(&(CLK_BASE.instance[i]));  	}  	return 0;  } diff --git a/drivers/gpu/drm/amd/amdgpu/vi.c b/drivers/gpu/drm/amd/amdgpu/vi.c index 4ac1288ab7df..42c8ad105b05 100644 --- a/drivers/gpu/drm/amd/amdgpu/vi.c +++ b/drivers/gpu/drm/amd/amdgpu/vi.c @@ -1363,11 +1363,11 @@ static int vi_common_set_clockgating_state_by_smu(void *handle,  	if (adev->cg_flags & (AMD_CG_SUPPORT_MC_LS | AMD_CG_SUPPORT_MC_MGCG)) {  		if (adev->cg_flags & AMD_CG_SUPPORT_MC_LS) { -			pp_support_state = AMD_CG_SUPPORT_MC_LS; +			pp_support_state = PP_STATE_SUPPORT_LS;  			pp_state = PP_STATE_LS;  		}  		if (adev->cg_flags & AMD_CG_SUPPORT_MC_MGCG) { -			pp_support_state |= AMD_CG_SUPPORT_MC_MGCG; +			pp_support_state |= PP_STATE_SUPPORT_CG;  			pp_state |= PP_STATE_CG;  		}  		if (state == AMD_CG_STATE_UNGATE) @@ -1382,11 +1382,11 @@ static int vi_common_set_clockgating_state_by_smu(void *handle,  	if (adev->cg_flags & (AMD_CG_SUPPORT_SDMA_LS | AMD_CG_SUPPORT_SDMA_MGCG)) {  		if (adev->cg_flags & AMD_CG_SUPPORT_SDMA_LS) { -			pp_support_state = AMD_CG_SUPPORT_SDMA_LS; +			pp_support_state = PP_STATE_SUPPORT_LS;  			pp_state = PP_STATE_LS;  		}  		if (adev->cg_flags & AMD_CG_SUPPORT_SDMA_MGCG) { -			pp_support_state |= AMD_CG_SUPPORT_SDMA_MGCG; +			pp_support_state |= PP_STATE_SUPPORT_CG;  			pp_state |= PP_STATE_CG;  		}  		if (state == AMD_CG_STATE_UNGATE) @@ -1401,11 +1401,11 @@ static int vi_common_set_clockgating_state_by_smu(void *handle,  	if (adev->cg_flags & (AMD_CG_SUPPORT_HDP_LS | AMD_CG_SUPPORT_HDP_MGCG)) {  		if (adev->cg_flags & AMD_CG_SUPPORT_HDP_LS) { -			pp_support_state = AMD_CG_SUPPORT_HDP_LS; +			pp_support_state = PP_STATE_SUPPORT_LS;  			pp_state = PP_STATE_LS;  		}  		if (adev->cg_flags & AMD_CG_SUPPORT_HDP_MGCG) { -			pp_support_state |= AMD_CG_SUPPORT_HDP_MGCG; +			pp_support_state |= PP_STATE_SUPPORT_CG;  			pp_state |= PP_STATE_CG;  		}  		if (state == AMD_CG_STATE_UNGATE)  |