aboutsummaryrefslogtreecommitdiff
path: root/drivers/pci/pcie/err.c
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2018-06-07 12:45:58 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2018-06-07 12:45:58 -0700
commit3a3869f1c443383ef8354ffa0e5fb8df65d8b549 (patch)
tree7972a4444f04a91f4baab5896df33d4e55d9909d /drivers/pci/pcie/err.c
parent3036bc45364f98515a2c446d7fac2c34dcfbeff4 (diff)
parent488ad6d3678beee65bcd74e6a9764bd7cee9d3d3 (diff)
Merge tag 'pci-v4.18-changes' of git://git.kernel.org/pub/scm/linux/kernel/git/helgaas/pci
Pull PCI updates from Bjorn Helgaas: - unify AER decoding for native and ACPI CPER sources (Alexandru Gagniuc) - add TLP header info to AER tracepoint (Thomas Tai) - add generic pcie_wait_for_link() interface (Oza Pawandeep) - handle AER ERR_FATAL by removing and re-enumerating devices, as Downstream Port Containment does (Oza Pawandeep) - factor out common code between AER and DPC recovery (Oza Pawandeep) - stop triggering DPC for ERR_NONFATAL errors (Oza Pawandeep) - share ERR_FATAL recovery path between AER and DPC (Oza Pawandeep) - disable ASPM L1.2 substate if we don't have LTR (Bjorn Helgaas) - respect platform ownership of LTR (Bjorn Helgaas) - clear interrupt status in top half to avoid interrupt storm (Oza Pawandeep) - neaten pci=earlydump output (Andy Shevchenko) - avoid errors when extended config space inaccessible (Gilles Buloz) - prevent sysfs disable of device while driver attached (Christoph Hellwig) - use core interface to report PCIe link properties in bnx2x, bnxt_en, cxgb4, ixgbe (Bjorn Helgaas) - remove unused pcie_get_minimum_link() (Bjorn Helgaas) - fix use-before-set error in ibmphp (Dan Carpenter) - fix pciehp timeouts caused by Command Completed errata (Bjorn Helgaas) - fix refcounting in pnv_php hotplug (Julia Lawall) - clear pciehp Presence Detect and Data Link Layer Status Changed on resume so we don't miss hotplug events (Mika Westerberg) - only request pciehp control if we support it, so platform can use ACPI hotplug otherwise (Mika Westerberg) - convert SHPC to be builtin only (Mika Westerberg) - request SHPC control via _OSC if we support it (Mika Westerberg) - simplify SHPC handoff from firmware (Mika Westerberg) - fix an SHPC quirk that mistakenly included *all* AMD bridges as well as devices from any vendor with device ID 0x7458 (Bjorn Helgaas) - assign a bus number even to non-native hotplug bridges to leave space for acpiphp additions, to fix a common Thunderbolt xHCI hot-add failure (Mika Westerberg) - keep acpiphp from scanning native hotplug bridges, to fix common Thunderbolt hot-add failures (Mika Westerberg) - improve "partially hidden behind bridge" messages from core (Mika Westerberg) - add macros for PCIe Link Control 2 register (Frederick Lawler) - replace IB/hfi1 custom macros with PCI core versions (Frederick Lawler) - remove dead microblaze and xtensa code (Bjorn Helgaas) - use dev_printk() when possible in xtensa and mips (Bjorn Helgaas) - remove unused pcie_port_acpi_setup() and portdrv_acpi.c (Bjorn Helgaas) - add managed interface to get PCI host bridge resources from OF (Jan Kiszka) - add support for unbinding generic PCI host controller (Jan Kiszka) - fix memory leaks when unbinding generic PCI host controller (Jan Kiszka) - request legacy VGA framebuffer only for VGA devices to avoid false device conflicts (Bjorn Helgaas) - turn on PCI_COMMAND_IO & PCI_COMMAND_MEMORY in pci_enable_device() like everybody else, not in pcibios_fixup_bus() (Bjorn Helgaas) - add generic enable function for simple SR-IOV hardware (Alexander Duyck) - use generic SR-IOV enable for ena, nvme (Alexander Duyck) - add ACS quirk for Intel 7th & 8th Gen mobile (Alex Williamson) - add ACS quirk for Intel 300 series (Mika Westerberg) - enable register clock for Armada 7K/8K (Gregory CLEMENT) - reduce Keystone "link already up" log level (Fabio Estevam) - move private DT functions to drivers/pci/ (Rob Herring) - factor out dwc CONFIG_PCI Kconfig dependencies (Rob Herring) - add DesignWare support to the endpoint test driver (Gustavo Pimentel) - add DesignWare support for endpoint mode (Gustavo Pimentel) - use devm_ioremap_resource() instead of devm_ioremap() in dra7xx and artpec6 (Gustavo Pimentel) - fix Qualcomm bitwise NOT issue (Dan Carpenter) - add Qualcomm runtime PM support (Srinivas Kandagatla) - fix DesignWare enumeration below bridges (Koen Vandeputte) - use usleep() instead of mdelay() in endpoint test (Jia-Ju Bai) - add configfs entries for pci_epf_driver device IDs (Kishon Vijay Abraham I) - clean up pci_endpoint_test driver (Gustavo Pimentel) - update Layerscape maintainer email addresses (Minghuan Lian) - add COMPILE_TEST to improve build test coverage (Rob Herring) - fix Hyper-V bus registration failure caused by domain/serial number confusion (Sridhar Pitchai) - improve Hyper-V refcounting and coding style (Stephen Hemminger) - avoid potential Hyper-V hang waiting for a response that will never come (Dexuan Cui) - implement Mediatek chained IRQ handling (Honghui Zhang) - fix vendor ID & class type for Mediatek MT7622 (Honghui Zhang) - add Mobiveil PCIe host controller driver (Subrahmanya Lingappa) - add Mobiveil MSI support (Subrahmanya Lingappa) - clean up clocks, MSI, IRQ mappings in R-Car probe failure paths (Marek Vasut) - poll more frequently (5us vs 5ms) while waiting for R-Car data link active (Marek Vasut) - use generic OF parsing interface in R-Car (Vladimir Zapolskiy) - add R-Car V3H (R8A77980) "compatible" string (Sergei Shtylyov) - add R-Car gen3 PHY support (Sergei Shtylyov) - improve R-Car PHYRDY polling (Sergei Shtylyov) - clean up R-Car macros (Marek Vasut) - use runtime PM for R-Car controller clock (Dien Pham) - update arm64 defconfig for Rockchip (Shawn Lin) - refactor Rockchip code to facilitate both root port and endpoint mode (Shawn Lin) - add Rockchip endpoint mode driver (Shawn Lin) - support VMD "membar shadow" feature (Jon Derrick) - support VMD bus number offsets (Jon Derrick) - add VMD "no AER source ID" quirk for more device IDs (Jon Derrick) - remove unnecessary host controller CONFIG_PCIEPORTBUS Kconfig selections (Bjorn Helgaas) - clean up quirks.c organization and whitespace (Bjorn Helgaas) * tag 'pci-v4.18-changes' of git://git.kernel.org/pub/scm/linux/kernel/git/helgaas/pci: (144 commits) PCI/AER: Replace struct pcie_device with pci_dev PCI/AER: Remove unused parameters PCI: qcom: Include gpio/consumer.h PCI: Improve "partially hidden behind bridge" log message PCI: Improve pci_scan_bridge() and pci_scan_bridge_extend() doc PCI: Move resource distribution for single bridge outside loop PCI: Account for all bridges on bus when distributing bus numbers ACPI / hotplug / PCI: Drop unnecessary parentheses ACPI / hotplug / PCI: Mark stale PCI devices disconnected ACPI / hotplug / PCI: Don't scan bridges managed by native hotplug PCI: hotplug: Add hotplug_is_native() PCI: shpchp: Add shpchp_is_native() PCI: shpchp: Fix AMD POGO identification PCI: mobiveil: Add MSI support PCI: mobiveil: Add Mobiveil PCIe Host Bridge IP driver PCI/AER: Decode Error Source Requester ID PCI/AER: Remove aer_recover_work_func() forward declaration PCI/DPC: Use the generic pcie_do_fatal_recovery() path PCI/AER: Pass service type to pcie_do_fatal_recovery() PCI/DPC: Disable ERR_NONFATAL handling by DPC ...
Diffstat (limited to 'drivers/pci/pcie/err.c')
-rw-r--r--drivers/pci/pcie/err.c388
1 files changed, 388 insertions, 0 deletions
diff --git a/drivers/pci/pcie/err.c b/drivers/pci/pcie/err.c
new file mode 100644
index 000000000000..f7ce0cb0b0b7
--- /dev/null
+++ b/drivers/pci/pcie/err.c
@@ -0,0 +1,388 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * This file implements the error recovery as a core part of PCIe error
+ * reporting. When a PCIe error is delivered, an error message will be
+ * collected and printed to console, then, an error recovery procedure
+ * will be executed by following the PCI error recovery rules.
+ *
+ * Copyright (C) 2006 Intel Corp.
+ * Tom Long Nguyen (tom.l.nguyen@intel.com)
+ * Zhang Yanmin (yanmin.zhang@intel.com)
+ */
+
+#include <linux/pci.h>
+#include <linux/module.h>
+#include <linux/pci.h>
+#include <linux/kernel.h>
+#include <linux/errno.h>
+#include <linux/aer.h>
+#include "portdrv.h"
+#include "../pci.h"
+
+struct aer_broadcast_data {
+ enum pci_channel_state state;
+ enum pci_ers_result result;
+};
+
+static pci_ers_result_t merge_result(enum pci_ers_result orig,
+ enum pci_ers_result new)
+{
+ if (new == PCI_ERS_RESULT_NO_AER_DRIVER)
+ return PCI_ERS_RESULT_NO_AER_DRIVER;
+
+ if (new == PCI_ERS_RESULT_NONE)
+ return orig;
+
+ switch (orig) {
+ case PCI_ERS_RESULT_CAN_RECOVER:
+ case PCI_ERS_RESULT_RECOVERED:
+ orig = new;
+ break;
+ case PCI_ERS_RESULT_DISCONNECT:
+ if (new == PCI_ERS_RESULT_NEED_RESET)
+ orig = PCI_ERS_RESULT_NEED_RESET;
+ break;
+ default:
+ break;
+ }
+
+ return orig;
+}
+
+static int report_error_detected(struct pci_dev *dev, void *data)
+{
+ pci_ers_result_t vote;
+ const struct pci_error_handlers *err_handler;
+ struct aer_broadcast_data *result_data;
+
+ result_data = (struct aer_broadcast_data *) data;
+
+ device_lock(&dev->dev);
+ dev->error_state = result_data->state;
+
+ if (!dev->driver ||
+ !dev->driver->err_handler ||
+ !dev->driver->err_handler->error_detected) {
+ if (result_data->state == pci_channel_io_frozen &&
+ dev->hdr_type != PCI_HEADER_TYPE_BRIDGE) {
+ /*
+ * In case of fatal recovery, if one of down-
+ * stream device has no driver. We might be
+ * unable to recover because a later insmod
+ * of a driver for this device is unaware of
+ * its hw state.
+ */
+ pci_printk(KERN_DEBUG, dev, "device has %s\n",
+ dev->driver ?
+ "no AER-aware driver" : "no driver");
+ }
+
+ /*
+ * If there's any device in the subtree that does not
+ * have an error_detected callback, returning
+ * PCI_ERS_RESULT_NO_AER_DRIVER prevents calling of
+ * the subsequent mmio_enabled/slot_reset/resume
+ * callbacks of "any" device in the subtree. All the
+ * devices in the subtree are left in the error state
+ * without recovery.
+ */
+
+ if (dev->hdr_type != PCI_HEADER_TYPE_BRIDGE)
+ vote = PCI_ERS_RESULT_NO_AER_DRIVER;
+ else
+ vote = PCI_ERS_RESULT_NONE;
+ } else {
+ err_handler = dev->driver->err_handler;
+ vote = err_handler->error_detected(dev, result_data->state);
+ pci_uevent_ers(dev, PCI_ERS_RESULT_NONE);
+ }
+
+ result_data->result = merge_result(result_data->result, vote);
+ device_unlock(&dev->dev);
+ return 0;
+}
+
+static int report_mmio_enabled(struct pci_dev *dev, void *data)
+{
+ pci_ers_result_t vote;
+ const struct pci_error_handlers *err_handler;
+ struct aer_broadcast_data *result_data;
+
+ result_data = (struct aer_broadcast_data *) data;
+
+ device_lock(&dev->dev);
+ if (!dev->driver ||
+ !dev->driver->err_handler ||
+ !dev->driver->err_handler->mmio_enabled)
+ goto out;
+
+ err_handler = dev->driver->err_handler;
+ vote = err_handler->mmio_enabled(dev);
+ result_data->result = merge_result(result_data->result, vote);
+out:
+ device_unlock(&dev->dev);
+ return 0;
+}
+
+static int report_slot_reset(struct pci_dev *dev, void *data)
+{
+ pci_ers_result_t vote;
+ const struct pci_error_handlers *err_handler;
+ struct aer_broadcast_data *result_data;
+
+ result_data = (struct aer_broadcast_data *) data;
+
+ device_lock(&dev->dev);
+ if (!dev->driver ||
+ !dev->driver->err_handler ||
+ !dev->driver->err_handler->slot_reset)
+ goto out;
+
+ err_handler = dev->driver->err_handler;
+ vote = err_handler->slot_reset(dev);
+ result_data->result = merge_result(result_data->result, vote);
+out:
+ device_unlock(&dev->dev);
+ return 0;
+}
+
+static int report_resume(struct pci_dev *dev, void *data)
+{
+ const struct pci_error_handlers *err_handler;
+
+ device_lock(&dev->dev);
+ dev->error_state = pci_channel_io_normal;
+
+ if (!dev->driver ||
+ !dev->driver->err_handler ||
+ !dev->driver->err_handler->resume)
+ goto out;
+
+ err_handler = dev->driver->err_handler;
+ err_handler->resume(dev);
+ pci_uevent_ers(dev, PCI_ERS_RESULT_RECOVERED);
+out:
+ device_unlock(&dev->dev);
+ return 0;
+}
+
+/**
+ * default_reset_link - default reset function
+ * @dev: pointer to pci_dev data structure
+ *
+ * Invoked when performing link reset on a Downstream Port or a
+ * Root Port with no aer driver.
+ */
+static pci_ers_result_t default_reset_link(struct pci_dev *dev)
+{
+ pci_reset_bridge_secondary_bus(dev);
+ pci_printk(KERN_DEBUG, dev, "downstream link has been reset\n");
+ return PCI_ERS_RESULT_RECOVERED;
+}
+
+static pci_ers_result_t reset_link(struct pci_dev *dev, u32 service)
+{
+ struct pci_dev *udev;
+ pci_ers_result_t status;
+ struct pcie_port_service_driver *driver = NULL;
+
+ if (dev->hdr_type == PCI_HEADER_TYPE_BRIDGE) {
+ /* Reset this port for all subordinates */
+ udev = dev;
+ } else {
+ /* Reset the upstream component (likely downstream port) */
+ udev = dev->bus->self;
+ }
+
+ /* Use the aer driver of the component firstly */
+ driver = pcie_port_find_service(udev, service);
+
+ if (driver && driver->reset_link) {
+ status = driver->reset_link(udev);
+ } else if (udev->has_secondary_link) {
+ status = default_reset_link(udev);
+ } else {
+ pci_printk(KERN_DEBUG, dev, "no link-reset support at upstream device %s\n",
+ pci_name(udev));
+ return PCI_ERS_RESULT_DISCONNECT;
+ }
+
+ if (status != PCI_ERS_RESULT_RECOVERED) {
+ pci_printk(KERN_DEBUG, dev, "link reset at upstream device %s failed\n",
+ pci_name(udev));
+ return PCI_ERS_RESULT_DISCONNECT;
+ }
+
+ return status;
+}
+
+/**
+ * broadcast_error_message - handle message broadcast to downstream drivers
+ * @dev: pointer to from where in a hierarchy message is broadcasted down
+ * @state: error state
+ * @error_mesg: message to print
+ * @cb: callback to be broadcasted
+ *
+ * Invoked during error recovery process. Once being invoked, the content
+ * of error severity will be broadcasted to all downstream drivers in a
+ * hierarchy in question.
+ */
+static pci_ers_result_t broadcast_error_message(struct pci_dev *dev,
+ enum pci_channel_state state,
+ char *error_mesg,
+ int (*cb)(struct pci_dev *, void *))
+{
+ struct aer_broadcast_data result_data;
+
+ pci_printk(KERN_DEBUG, dev, "broadcast %s message\n", error_mesg);
+ result_data.state = state;
+ if (cb == report_error_detected)
+ result_data.result = PCI_ERS_RESULT_CAN_RECOVER;
+ else
+ result_data.result = PCI_ERS_RESULT_RECOVERED;
+
+ if (dev->hdr_type == PCI_HEADER_TYPE_BRIDGE) {
+ /*
+ * If the error is reported by a bridge, we think this error
+ * is related to the downstream link of the bridge, so we
+ * do error recovery on all subordinates of the bridge instead
+ * of the bridge and clear the error status of the bridge.
+ */
+ if (cb == report_error_detected)
+ dev->error_state = state;
+ pci_walk_bus(dev->subordinate, cb, &result_data);
+ if (cb == report_resume) {
+ pci_cleanup_aer_uncorrect_error_status(dev);
+ dev->error_state = pci_channel_io_normal;
+ }
+ } else {
+ /*
+ * If the error is reported by an end point, we think this
+ * error is related to the upstream link of the end point.
+ */
+ if (state == pci_channel_io_normal)
+ /*
+ * the error is non fatal so the bus is ok, just invoke
+ * the callback for the function that logged the error.
+ */
+ cb(dev, &result_data);
+ else
+ pci_walk_bus(dev->bus, cb, &result_data);
+ }
+
+ return result_data.result;
+}
+
+/**
+ * pcie_do_fatal_recovery - handle fatal error recovery process
+ * @dev: pointer to a pci_dev data structure of agent detecting an error
+ *
+ * Invoked when an error is fatal. Once being invoked, removes the devices
+ * beneath this AER agent, followed by reset link e.g. secondary bus reset
+ * followed by re-enumeration of devices.
+ */
+void pcie_do_fatal_recovery(struct pci_dev *dev, u32 service)
+{
+ struct pci_dev *udev;
+ struct pci_bus *parent;
+ struct pci_dev *pdev, *temp;
+ pci_ers_result_t result;
+
+ if (dev->hdr_type == PCI_HEADER_TYPE_BRIDGE)
+ udev = dev;
+ else
+ udev = dev->bus->self;
+
+ parent = udev->subordinate;
+ pci_lock_rescan_remove();
+ list_for_each_entry_safe_reverse(pdev, temp, &parent->devices,
+ bus_list) {
+ pci_dev_get(pdev);
+ pci_dev_set_disconnected(pdev, NULL);
+ if (pci_has_subordinate(pdev))
+ pci_walk_bus(pdev->subordinate,
+ pci_dev_set_disconnected, NULL);
+ pci_stop_and_remove_bus_device(pdev);
+ pci_dev_put(pdev);
+ }
+
+ result = reset_link(udev, service);
+
+ if ((service == PCIE_PORT_SERVICE_AER) &&
+ (dev->hdr_type == PCI_HEADER_TYPE_BRIDGE)) {
+ /*
+ * If the error is reported by a bridge, we think this error
+ * is related to the downstream link of the bridge, so we
+ * do error recovery on all subordinates of the bridge instead
+ * of the bridge and clear the error status of the bridge.
+ */
+ pci_cleanup_aer_uncorrect_error_status(dev);
+ }
+
+ if (result == PCI_ERS_RESULT_RECOVERED) {
+ if (pcie_wait_for_link(udev, true))
+ pci_rescan_bus(udev->bus);
+ pci_info(dev, "Device recovery from fatal error successful\n");
+ } else {
+ pci_uevent_ers(dev, PCI_ERS_RESULT_DISCONNECT);
+ pci_info(dev, "Device recovery from fatal error failed\n");
+ }
+
+ pci_unlock_rescan_remove();
+}
+
+/**
+ * pcie_do_nonfatal_recovery - handle nonfatal error recovery process
+ * @dev: pointer to a pci_dev data structure of agent detecting an error
+ *
+ * Invoked when an error is nonfatal/fatal. Once being invoked, broadcast
+ * error detected message to all downstream drivers within a hierarchy in
+ * question and return the returned code.
+ */
+void pcie_do_nonfatal_recovery(struct pci_dev *dev)
+{
+ pci_ers_result_t status;
+ enum pci_channel_state state;
+
+ state = pci_channel_io_normal;
+
+ status = broadcast_error_message(dev,
+ state,
+ "error_detected",
+ report_error_detected);
+
+ if (status == PCI_ERS_RESULT_CAN_RECOVER)
+ status = broadcast_error_message(dev,
+ state,
+ "mmio_enabled",
+ report_mmio_enabled);
+
+ if (status == PCI_ERS_RESULT_NEED_RESET) {
+ /*
+ * TODO: Should call platform-specific
+ * functions to reset slot before calling
+ * drivers' slot_reset callbacks?
+ */
+ status = broadcast_error_message(dev,
+ state,
+ "slot_reset",
+ report_slot_reset);
+ }
+
+ if (status != PCI_ERS_RESULT_RECOVERED)
+ goto failed;
+
+ broadcast_error_message(dev,
+ state,
+ "resume",
+ report_resume);
+
+ pci_info(dev, "AER: Device recovery successful\n");
+ return;
+
+failed:
+ pci_uevent_ers(dev, PCI_ERS_RESULT_DISCONNECT);
+
+ /* TODO: Should kernel panic here? */
+ pci_info(dev, "AER: Device recovery failed\n");
+}