aboutsummaryrefslogtreecommitdiff
path: root/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c')
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c169
1 files changed, 116 insertions, 53 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
index 33991b4a5627..0d7017a0f2b5 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
@@ -23,6 +23,7 @@
*/
#include <drm/amdgpu_drm.h>
+#include <drm/drm_aperture.h>
#include <drm/drm_drv.h>
#include <drm/drm_gem.h>
#include <drm/drm_vblank.h>
@@ -36,16 +37,18 @@
#include <linux/vga_switcheroo.h>
#include <drm/drm_probe_helper.h>
#include <linux/mmu_notifier.h>
+#include <linux/suspend.h>
#include "amdgpu.h"
#include "amdgpu_irq.h"
#include "amdgpu_dma_buf.h"
#include "amdgpu_sched.h"
-
+#include "amdgpu_fdinfo.h"
#include "amdgpu_amdkfd.h"
#include "amdgpu_ras.h"
#include "amdgpu_xgmi.h"
+#include "amdgpu_reset.h"
/*
* KMS wrapper.
@@ -183,7 +186,7 @@ uint amdgpu_ras_mask = 0xffffffff;
int amdgpu_bad_page_threshold = -1;
struct amdgpu_watchdog_timer amdgpu_watchdog_timer = {
.timeout_fatal_disable = false,
- .period = 0x23, /* default to max. timeout = 1 << 0x23 cycles */
+ .period = 0x0, /* default to 0x0 (timeout disable) */
};
/**
@@ -285,9 +288,9 @@ module_param_named(msi, amdgpu_msi, int, 0444);
* for SDMA and Video.
*
* By default(with no lockup_timeout settings), the timeout for all non-compute(GFX, SDMA and Video)
- * jobs is 10000. And there is no timeout enforced on compute jobs.
+ * jobs is 10000. The timeout for compute is 60000.
*/
-MODULE_PARM_DESC(lockup_timeout, "GPU lockup timeout in ms (default: for bare metal 10000 for non-compute jobs and infinity timeout for compute jobs; "
+MODULE_PARM_DESC(lockup_timeout, "GPU lockup timeout in ms (default: for bare metal 10000 for non-compute jobs and 60000 for compute jobs; "
"for passthrough or sriov, 10000 for all jobs."
" 0: keep default value. negative: infinity timeout), "
"format: for bare metal [Non-Compute] or [GFX,Compute,SDMA,Video]; "
@@ -515,7 +518,7 @@ module_param_named(compute_multipipe, amdgpu_compute_multipipe, int, 0444);
* DOC: gpu_recovery (int)
* Set to enable GPU recovery mechanism (1 = enable, 0 = disable). The default is -1 (auto, disabled except SRIOV).
*/
-MODULE_PARM_DESC(gpu_recovery, "Enable GPU recovery mechanism, (1 = enable, 0 = disable, -1 = auto)");
+MODULE_PARM_DESC(gpu_recovery, "Enable GPU recovery mechanism, (2 = advanced tdr mode, 1 = enable, 0 = disable, -1 = auto)");
module_param_named(gpu_recovery, amdgpu_gpu_recovery, int, 0444);
/**
@@ -551,7 +554,7 @@ module_param_named(timeout_fatal_disable, amdgpu_watchdog_timer.timeout_fatal_di
* DOC: timeout_period (uint)
* Modify the watchdog timeout max_cycles as (1 << period)
*/
-MODULE_PARM_DESC(timeout_period, "watchdog timeout period (1 to 0x23(default), timeout maxCycles = (1 << period)");
+MODULE_PARM_DESC(timeout_period, "watchdog timeout period (0 = timeout disabled, 1 ~ 0x23 = timeout maxcycles = (1 << period)");
module_param_named(timeout_period, amdgpu_watchdog_timer.period, uint, 0644);
/**
@@ -638,7 +641,8 @@ module_param_named(mes, amdgpu_mes, int, 0444);
/**
* DOC: noretry (int)
- * Disable retry faults in the GPU memory controller.
+ * Disable XNACK retry in the SQ by default on GFXv9 hardware. On ASICs that
+ * do not support per-process XNACK this also disables retry page faults.
* (0 = retry enabled, 1 = retry disabled, -1 auto (default))
*/
MODULE_PARM_DESC(noretry,
@@ -1161,6 +1165,7 @@ static const struct pci_device_id pciidlist[] = {
{0x1002, 0x73A3, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_SIENNA_CICHLID},
{0x1002, 0x73AB, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_SIENNA_CICHLID},
{0x1002, 0x73AE, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_SIENNA_CICHLID},
+ {0x1002, 0x73AF, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_SIENNA_CICHLID},
{0x1002, 0x73BF, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_SIENNA_CICHLID},
/* Van Gogh */
@@ -1182,6 +1187,7 @@ static const struct pci_device_id pciidlist[] = {
{0x1002, 0x7408, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_ALDEBARAN|AMD_EXP_HW_SUPPORT},
{0x1002, 0x740C, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_ALDEBARAN|AMD_EXP_HW_SUPPORT},
{0x1002, 0x740F, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_ALDEBARAN|AMD_EXP_HW_SUPPORT},
+ {0x1002, 0x7410, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_ALDEBARAN|AMD_EXP_HW_SUPPORT},
{0, 0, 0}
};
@@ -1255,7 +1261,7 @@ static int amdgpu_pci_probe(struct pci_dev *pdev,
#endif
/* Get rid of things like offb */
- ret = drm_fb_helper_remove_conflicting_pci_framebuffers(pdev, "amdgpudrmfb");
+ ret = drm_aperture_remove_conflicting_pci_framebuffers(pdev, "amdgpudrmfb");
if (ret)
return ret;
@@ -1307,14 +1313,16 @@ amdgpu_pci_remove(struct pci_dev *pdev)
{
struct drm_device *dev = pci_get_drvdata(pdev);
-#ifdef MODULE
- if (THIS_MODULE->state != MODULE_STATE_GOING)
-#endif
- DRM_ERROR("Hotplug removal is not supported\n");
drm_dev_unplug(dev);
amdgpu_driver_unload_kms(dev);
+
+ /*
+ * Flush any in flight DMA operations from device.
+ * Clear the Bus Master Enable bit and then wait on the PCIe Device
+ * StatusTransactions Pending bit.
+ */
pci_disable_device(pdev);
- pci_set_drvdata(pdev, NULL);
+ pci_wait_for_pending_transaction(pdev);
}
static void
@@ -1333,9 +1341,7 @@ amdgpu_pci_shutdown(struct pci_dev *pdev)
*/
if (!amdgpu_passthrough(adev))
adev->mp1_state = PP_MP1_STATE_UNLOAD;
- adev->in_poweroff_reboot_com = true;
amdgpu_device_ip_suspend(adev);
- adev->in_poweroff_reboot_com = false;
adev->mp1_state = PP_MP1_STATE_NONE;
}
@@ -1349,7 +1355,9 @@ static void amdgpu_drv_delayed_reset_work_handler(struct work_struct *work)
struct list_head device_list;
struct amdgpu_device *adev;
int i, r;
- bool need_full_reset = true;
+ struct amdgpu_reset_context reset_context;
+
+ memset(&reset_context, 0, sizeof(reset_context));
mutex_lock(&mgpu_info.mutex);
if (mgpu_info.pending_reset == true) {
@@ -1359,9 +1367,14 @@ static void amdgpu_drv_delayed_reset_work_handler(struct work_struct *work)
mgpu_info.pending_reset = true;
mutex_unlock(&mgpu_info.mutex);
+ /* Use a common context, just need to make sure full reset is done */
+ reset_context.method = AMD_RESET_METHOD_NONE;
+ set_bit(AMDGPU_NEED_FULL_RESET, &reset_context.flags);
+
for (i = 0; i < mgpu_info.num_dgpu; i++) {
adev = mgpu_info.gpu_ins[i].adev;
- r = amdgpu_device_pre_asic_reset(adev, NULL, &need_full_reset);
+ reset_context.reset_req_dev = adev;
+ r = amdgpu_device_pre_asic_reset(adev, &reset_context);
if (r) {
dev_err(adev->dev, "GPU pre asic reset failed with err, %d for drm dev, %s ",
r, adev_to_drm(adev)->unique);
@@ -1388,7 +1401,10 @@ static void amdgpu_drv_delayed_reset_work_handler(struct work_struct *work)
list_for_each_entry(adev, &device_list, reset_list)
amdgpu_unregister_gpu_instance(adev);
- r = amdgpu_do_asic_reset(NULL, &device_list, &need_full_reset, true);
+ /* Use a common context, just need to make sure full reset is done */
+ set_bit(AMDGPU_SKIP_HW_RESET, &reset_context.flags);
+ r = amdgpu_do_asic_reset(&device_list, &reset_context);
+
if (r) {
DRM_ERROR("reinit gpus failure");
return;
@@ -1402,18 +1418,50 @@ static void amdgpu_drv_delayed_reset_work_handler(struct work_struct *work)
return;
}
+static int amdgpu_pmops_prepare(struct device *dev)
+{
+ struct drm_device *drm_dev = dev_get_drvdata(dev);
+
+ /* Return a positive number here so
+ * DPM_FLAG_SMART_SUSPEND works properly
+ */
+ if (amdgpu_device_supports_boco(drm_dev))
+ return pm_runtime_suspended(dev) &&
+ pm_suspend_via_firmware();
+
+ return 0;
+}
+
+static void amdgpu_pmops_complete(struct device *dev)
+{
+ /* nothing to do */
+}
+
static int amdgpu_pmops_suspend(struct device *dev)
{
struct drm_device *drm_dev = dev_get_drvdata(dev);
+ struct amdgpu_device *adev = drm_to_adev(drm_dev);
+ int r;
- return amdgpu_device_suspend(drm_dev, true);
+ if (amdgpu_acpi_is_s0ix_supported(adev))
+ adev->in_s0ix = true;
+ adev->in_s3 = true;
+ r = amdgpu_device_suspend(drm_dev, true);
+ adev->in_s3 = false;
+
+ return r;
}
static int amdgpu_pmops_resume(struct device *dev)
{
struct drm_device *drm_dev = dev_get_drvdata(dev);
+ struct amdgpu_device *adev = drm_to_adev(drm_dev);
+ int r;
- return amdgpu_device_resume(drm_dev, true);
+ r = amdgpu_device_resume(drm_dev, true);
+ if (amdgpu_acpi_is_s0ix_supported(adev))
+ adev->in_s0ix = false;
+ return r;
}
static int amdgpu_pmops_freeze(struct device *dev)
@@ -1422,9 +1470,9 @@ static int amdgpu_pmops_freeze(struct device *dev)
struct amdgpu_device *adev = drm_to_adev(drm_dev);
int r;
- adev->in_hibernate = true;
+ adev->in_s4 = true;
r = amdgpu_device_suspend(drm_dev, true);
- adev->in_hibernate = false;
+ adev->in_s4 = false;
if (r)
return r;
return amdgpu_asic_reset(adev);
@@ -1440,13 +1488,8 @@ static int amdgpu_pmops_thaw(struct device *dev)
static int amdgpu_pmops_poweroff(struct device *dev)
{
struct drm_device *drm_dev = dev_get_drvdata(dev);
- struct amdgpu_device *adev = drm_to_adev(drm_dev);
- int r;
- adev->in_poweroff_reboot_com = true;
- r = amdgpu_device_suspend(drm_dev, true);
- adev->in_poweroff_reboot_com = false;
- return r;
+ return amdgpu_device_suspend(drm_dev, true);
}
static int amdgpu_pmops_restore(struct device *dev)
@@ -1479,7 +1522,7 @@ static int amdgpu_pmops_runtime_suspend(struct device *dev)
}
adev->in_runpm = true;
- if (amdgpu_device_supports_atpx(drm_dev))
+ if (amdgpu_device_supports_px(drm_dev))
drm_dev->switch_power_state = DRM_SWITCH_POWER_CHANGING;
ret = amdgpu_device_suspend(drm_dev, false);
@@ -1488,16 +1531,14 @@ static int amdgpu_pmops_runtime_suspend(struct device *dev)
return ret;
}
- if (amdgpu_device_supports_atpx(drm_dev)) {
+ if (amdgpu_device_supports_px(drm_dev)) {
/* Only need to handle PCI state in the driver for ATPX
* PCI core handles it for _PR3.
*/
- if (!amdgpu_is_atpx_hybrid()) {
- amdgpu_device_cache_pci_state(pdev);
- pci_disable_device(pdev);
- pci_ignore_hotplug(pdev);
- pci_set_power_state(pdev, PCI_D3cold);
- }
+ amdgpu_device_cache_pci_state(pdev);
+ pci_disable_device(pdev);
+ pci_ignore_hotplug(pdev);
+ pci_set_power_state(pdev, PCI_D3cold);
drm_dev->switch_power_state = DRM_SWITCH_POWER_DYNAMIC_OFF;
} else if (amdgpu_device_supports_baco(drm_dev)) {
amdgpu_device_baco_enter(drm_dev);
@@ -1516,19 +1557,21 @@ static int amdgpu_pmops_runtime_resume(struct device *dev)
if (!adev->runpm)
return -EINVAL;
- if (amdgpu_device_supports_atpx(drm_dev)) {
+ /* Avoids registers access if device is physically gone */
+ if (!pci_device_is_present(adev->pdev))
+ adev->no_hw_access = true;
+
+ if (amdgpu_device_supports_px(drm_dev)) {
drm_dev->switch_power_state = DRM_SWITCH_POWER_CHANGING;
/* Only need to handle PCI state in the driver for ATPX
* PCI core handles it for _PR3.
*/
- if (!amdgpu_is_atpx_hybrid()) {
- pci_set_power_state(pdev, PCI_D0);
- amdgpu_device_load_pci_state(pdev);
- ret = pci_enable_device(pdev);
- if (ret)
- return ret;
- }
+ pci_set_power_state(pdev, PCI_D0);
+ amdgpu_device_load_pci_state(pdev);
+ ret = pci_enable_device(pdev);
+ if (ret)
+ return ret;
pci_set_master(pdev);
} else if (amdgpu_device_supports_boco(drm_dev)) {
/* Only need to handle PCI state in the driver for ATPX
@@ -1539,7 +1582,10 @@ static int amdgpu_pmops_runtime_resume(struct device *dev)
amdgpu_device_baco_exit(drm_dev);
}
ret = amdgpu_device_resume(drm_dev, false);
- if (amdgpu_device_supports_atpx(drm_dev))
+ if (ret)
+ return ret;
+
+ if (amdgpu_device_supports_px(drm_dev))
drm_dev->switch_power_state = DRM_SWITCH_POWER_ON;
adev->in_runpm = false;
return 0;
@@ -1560,17 +1606,15 @@ static int amdgpu_pmops_runtime_idle(struct device *dev)
if (amdgpu_device_has_dc_support(adev)) {
struct drm_crtc *crtc;
- drm_modeset_lock_all(drm_dev);
-
drm_for_each_crtc(crtc, drm_dev) {
- if (crtc->state->active) {
+ drm_modeset_lock(&crtc->mutex, NULL);
+ if (crtc->state->active)
ret = -EBUSY;
+ drm_modeset_unlock(&crtc->mutex);
+ if (ret < 0)
break;
- }
}
- drm_modeset_unlock_all(drm_dev);
-
} else {
struct drm_connector *list_connector;
struct drm_connector_list_iter iter;
@@ -1620,6 +1664,8 @@ out:
}
static const struct dev_pm_ops amdgpu_pm_ops = {
+ .prepare = amdgpu_pmops_prepare,
+ .complete = amdgpu_pmops_complete,
.suspend = amdgpu_pmops_suspend,
.resume = amdgpu_pmops_resume,
.freeze = amdgpu_pmops_freeze,
@@ -1649,12 +1695,15 @@ static const struct file_operations amdgpu_driver_kms_fops = {
.flush = amdgpu_flush,
.release = drm_release,
.unlocked_ioctl = amdgpu_drm_ioctl,
- .mmap = amdgpu_mmap,
+ .mmap = drm_gem_mmap,
.poll = drm_poll,
.read = drm_read,
#ifdef CONFIG_COMPAT
.compat_ioctl = amdgpu_kms_compat_ioctl,
#endif
+#ifdef CONFIG_PROC_FS
+ .show_fdinfo = amdgpu_show_fdinfo
+#endif
};
int amdgpu_file_to_fpriv(struct file *filp, struct amdgpu_fpriv **fpriv)
@@ -1708,11 +1757,12 @@ static const struct drm_driver amdgpu_kms_driver = {
.dumb_create = amdgpu_mode_dumb_create,
.dumb_map_offset = amdgpu_mode_dumb_mmap,
.fops = &amdgpu_driver_kms_fops,
+ .release = &amdgpu_driver_release_kms,
.prime_handle_to_fd = drm_gem_prime_handle_to_fd,
.prime_fd_to_handle = drm_gem_prime_fd_to_handle,
.gem_prime_import = amdgpu_gem_prime_import,
- .gem_prime_mmap = amdgpu_gem_prime_mmap,
+ .gem_prime_mmap = drm_gem_prime_mmap,
.name = DRIVER_NAME,
.desc = DRIVER_DESC,
@@ -1729,6 +1779,18 @@ static struct pci_error_handlers amdgpu_pci_err_handler = {
.resume = amdgpu_pci_resume,
};
+extern const struct attribute_group amdgpu_vram_mgr_attr_group;
+extern const struct attribute_group amdgpu_gtt_mgr_attr_group;
+extern const struct attribute_group amdgpu_vbios_version_attr_group;
+
+static const struct attribute_group *amdgpu_sysfs_groups[] = {
+ &amdgpu_vram_mgr_attr_group,
+ &amdgpu_gtt_mgr_attr_group,
+ &amdgpu_vbios_version_attr_group,
+ NULL,
+};
+
+
static struct pci_driver amdgpu_kms_pci_driver = {
.name = DRIVER_NAME,
.id_table = pciidlist,
@@ -1737,6 +1799,7 @@ static struct pci_driver amdgpu_kms_pci_driver = {
.shutdown = amdgpu_pci_shutdown,
.driver.pm = &amdgpu_pm_ops,
.err_handler = &amdgpu_pci_err_handler,
+ .dev_groups = amdgpu_sysfs_groups,
};
static int __init amdgpu_init(void)