diff options
| author | Dmitry Torokhov <[email protected]> | 2024-07-15 14:03:44 -0700 | 
|---|---|---|
| committer | Dmitry Torokhov <[email protected]> | 2024-07-15 14:03:44 -0700 | 
| commit | a23e1966932464e1c5226cb9ac4ce1d5fc10ba22 (patch) | |
| tree | bf5f1b57faa01ca31656bfc48c7d6b6f0bc39189 /drivers/gpu/drm/amd/amdgpu/amdgpu_reset.c | |
| parent | 7c7b1be19b228b450c2945ec379d7fc6bfef9852 (diff) | |
| parent | f3efefb6fdcce604413135bd8d4c5568e53a1f13 (diff) | |
Merge branch 'next' into for-linus
Prepare input updates for 6.11 merge window.
Diffstat (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_reset.c')
| -rw-r--r-- | drivers/gpu/drm/amd/amdgpu/amdgpu_reset.c | 118 | 
1 files changed, 106 insertions, 12 deletions
| diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.c index eec41ad30406..147100c27c2d 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.c @@ -21,24 +21,19 @@   *   */ +#include <linux/devcoredump.h> +#include <generated/utsrelease.h> +  #include "amdgpu_reset.h"  #include "aldebaran.h"  #include "sienna_cichlid.h"  #include "smu_v13_0_10.h" -int amdgpu_reset_add_handler(struct amdgpu_reset_control *reset_ctl, -			     struct amdgpu_reset_handler *handler) -{ -	/* TODO: Check if handler exists? */ -	list_add_tail(&handler->handler_list, &reset_ctl->reset_handlers); -	return 0; -} -  int amdgpu_reset_init(struct amdgpu_device *adev)  {  	int ret = 0; -	switch (adev->ip_versions[MP1_HWIP][0]) { +	switch (amdgpu_ip_version(adev, MP1_HWIP, 0)) {  	case IP_VERSION(13, 0, 2):  	case IP_VERSION(13, 0, 6):  		ret = aldebaran_reset_init(adev); @@ -60,7 +55,7 @@ int amdgpu_reset_fini(struct amdgpu_device *adev)  {  	int ret = 0; -	switch (adev->ip_versions[MP1_HWIP][0]) { +	switch (amdgpu_ip_version(adev, MP1_HWIP, 0)) {  	case IP_VERSION(13, 0, 2):  	case IP_VERSION(13, 0, 6):  		ret = aldebaran_reset_fini(adev); @@ -87,7 +82,7 @@ int amdgpu_reset_prepare_hwcontext(struct amdgpu_device *adev,  		reset_handler = adev->reset_cntl->get_reset_handler(  			adev->reset_cntl, reset_context);  	if (!reset_handler) -		return -ENOSYS; +		return -EOPNOTSUPP;  	return reset_handler->prepare_hwcontext(adev->reset_cntl,  						reset_context); @@ -103,7 +98,7 @@ int amdgpu_reset_perform_reset(struct amdgpu_device *adev,  		reset_handler = adev->reset_cntl->get_reset_handler(  			adev->reset_cntl, reset_context);  	if (!reset_handler) -		return -ENOSYS; +		return -EOPNOTSUPP;  	ret = reset_handler->perform_reset(adev->reset_cntl, reset_context);  	if (ret) @@ -167,5 +162,104 @@ void amdgpu_device_unlock_reset_domain(struct amdgpu_reset_domain *reset_domain)  	up_write(&reset_domain->sem);  } +#ifndef CONFIG_DEV_COREDUMP +void amdgpu_coredump(struct amdgpu_device *adev, bool vram_lost, +		     struct amdgpu_reset_context *reset_context) +{ +} +#else +static ssize_t +amdgpu_devcoredump_read(char *buffer, loff_t offset, size_t count, +			void *data, size_t datalen) +{ +	struct drm_printer p; +	struct amdgpu_coredump_info *coredump = data; +	struct drm_print_iterator iter; +	int i; + +	iter.data = buffer; +	iter.offset = 0; +	iter.start = offset; +	iter.remain = count; + +	p = drm_coredump_printer(&iter); + +	drm_printf(&p, "**** AMDGPU Device Coredump ****\n"); +	drm_printf(&p, "version: " AMDGPU_COREDUMP_VERSION "\n"); +	drm_printf(&p, "kernel: " UTS_RELEASE "\n"); +	drm_printf(&p, "module: " KBUILD_MODNAME "\n"); +	drm_printf(&p, "time: %lld.%09ld\n", coredump->reset_time.tv_sec, +			coredump->reset_time.tv_nsec); + +	if (coredump->reset_task_info.pid) +		drm_printf(&p, "process_name: %s PID: %d\n", +			   coredump->reset_task_info.process_name, +			   coredump->reset_task_info.pid); + +	if (coredump->ring) { +		drm_printf(&p, "\nRing timed out details\n"); +		drm_printf(&p, "IP Type: %d Ring Name: %s\n", +			   coredump->ring->funcs->type, +			   coredump->ring->name); +	} + +	if (coredump->reset_vram_lost) +		drm_printf(&p, "VRAM is lost due to GPU reset!\n"); +	if (coredump->adev->reset_info.num_regs) { +		drm_printf(&p, "AMDGPU register dumps:\nOffset:     Value:\n"); + +		for (i = 0; i < coredump->adev->reset_info.num_regs; i++) +			drm_printf(&p, "0x%08x: 0x%08x\n", +				   coredump->adev->reset_info.reset_dump_reg_list[i], +				   coredump->adev->reset_info.reset_dump_reg_value[i]); +	} + +	return count - iter.remain; +} + +static void amdgpu_devcoredump_free(void *data) +{ +	kfree(data); +} + +void amdgpu_coredump(struct amdgpu_device *adev, bool vram_lost, +		     struct amdgpu_reset_context *reset_context) +{ +	struct amdgpu_coredump_info *coredump; +	struct drm_device *dev = adev_to_drm(adev); +	struct amdgpu_job *job = reset_context->job; +	struct drm_sched_job *s_job; + +	coredump = kzalloc(sizeof(*coredump), GFP_NOWAIT); + +	if (!coredump) { +		DRM_ERROR("%s: failed to allocate memory for coredump\n", __func__); +		return; +	} +	coredump->reset_vram_lost = vram_lost; +	if (reset_context->job && reset_context->job->vm) { +		struct amdgpu_task_info *ti; +		struct amdgpu_vm *vm = reset_context->job->vm; + +		ti = amdgpu_vm_get_task_info_vm(vm); +		if (ti) { +			coredump->reset_task_info = *ti; +			amdgpu_vm_put_task_info(ti); +		} +	} + +	if (job) { +		s_job = &job->base; +		coredump->ring = to_amdgpu_ring(s_job->sched); +	} + +	coredump->adev = adev; + +	ktime_get_ts64(&coredump->reset_time); + +	dev_coredumpm(dev->dev, THIS_MODULE, coredump, 0, GFP_NOWAIT, +		      amdgpu_devcoredump_read, amdgpu_devcoredump_free); +} +#endif |