aboutsummaryrefslogtreecommitdiff
path: root/drivers/pci/controller/pci-hyperv.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/pci/controller/pci-hyperv.c')
-rw-r--r--drivers/pci/controller/pci-hyperv.c220
1 files changed, 141 insertions, 79 deletions
diff --git a/drivers/pci/controller/pci-hyperv.c b/drivers/pci/controller/pci-hyperv.c
index 6511648271b2..eaec915ffe62 100644
--- a/drivers/pci/controller/pci-hyperv.c
+++ b/drivers/pci/controller/pci-hyperv.c
@@ -40,6 +40,7 @@
#include <linux/kernel.h>
#include <linux/module.h>
#include <linux/pci.h>
+#include <linux/pci-ecam.h>
#include <linux/delay.h>
#include <linux/semaphore.h>
#include <linux/irqdomain.h>
@@ -64,6 +65,7 @@ enum pci_protocol_version_t {
PCI_PROTOCOL_VERSION_1_1 = PCI_MAKE_VERSION(1, 1), /* Win10 */
PCI_PROTOCOL_VERSION_1_2 = PCI_MAKE_VERSION(1, 2), /* RS1 */
PCI_PROTOCOL_VERSION_1_3 = PCI_MAKE_VERSION(1, 3), /* Vibranium */
+ PCI_PROTOCOL_VERSION_1_4 = PCI_MAKE_VERSION(1, 4), /* WS2022 */
};
#define CPU_AFFINITY_ALL -1ULL
@@ -73,6 +75,7 @@ enum pci_protocol_version_t {
* first.
*/
static enum pci_protocol_version_t pci_protocol_versions[] = {
+ PCI_PROTOCOL_VERSION_1_4,
PCI_PROTOCOL_VERSION_1_3,
PCI_PROTOCOL_VERSION_1_2,
PCI_PROTOCOL_VERSION_1_1,
@@ -122,6 +125,8 @@ enum pci_message_type {
PCI_CREATE_INTERRUPT_MESSAGE2 = PCI_MESSAGE_BASE + 0x17,
PCI_DELETE_INTERRUPT_MESSAGE2 = PCI_MESSAGE_BASE + 0x18, /* unused */
PCI_BUS_RELATIONS2 = PCI_MESSAGE_BASE + 0x19,
+ PCI_RESOURCES_ASSIGNED3 = PCI_MESSAGE_BASE + 0x1A,
+ PCI_CREATE_INTERRUPT_MESSAGE3 = PCI_MESSAGE_BASE + 0x1B,
PCI_MESSAGE_MAXIMUM
};
@@ -235,6 +240,21 @@ struct hv_msi_desc2 {
u16 processor_array[32];
} __packed;
+/*
+ * struct hv_msi_desc3 - 1.3 version of hv_msi_desc
+ * Everything is the same as in 'hv_msi_desc2' except that the size of the
+ * 'vector' field is larger to support bigger vector values. For ex: LPI
+ * vectors on ARM.
+ */
+struct hv_msi_desc3 {
+ u32 vector;
+ u8 delivery_mode;
+ u8 reserved;
+ u16 vector_count;
+ u16 processor_count;
+ u16 processor_array[32];
+} __packed;
+
/**
* struct tran_int_desc
* @reserved: unused, padding
@@ -383,6 +403,12 @@ struct pci_create_interrupt2 {
struct hv_msi_desc2 int_desc;
} __packed;
+struct pci_create_interrupt3 {
+ struct pci_message message_type;
+ union win_slot_encoding wslot;
+ struct hv_msi_desc3 int_desc;
+} __packed;
+
struct pci_delete_interrupt {
struct pci_message message_type;
union win_slot_encoding wslot;
@@ -444,16 +470,20 @@ enum hv_pcibus_state {
hv_pcibus_probed,
hv_pcibus_installed,
hv_pcibus_removing,
- hv_pcibus_removed,
hv_pcibus_maximum
};
struct hv_pcibus_device {
+#ifdef CONFIG_X86
struct pci_sysdata sysdata;
+#elif defined(CONFIG_ARM64)
+ struct pci_config_window sysdata;
+#endif
+ struct pci_host_bridge *bridge;
+ struct fwnode_handle *fwnode;
/* Protocol version negotiated with the host */
enum pci_protocol_version_t protocol_version;
enum hv_pcibus_state state;
- refcount_t remove_lock;
struct hv_device *hdev;
resource_size_t low_mmio_space;
resource_size_t high_mmio_space;
@@ -461,14 +491,11 @@ struct hv_pcibus_device {
struct resource *low_mmio_res;
struct resource *high_mmio_res;
struct completion *survey_event;
- struct completion remove_event;
struct pci_bus *pci_bus;
spinlock_t config_lock; /* Avoid two threads writing index page */
spinlock_t device_list_lock; /* Protect lists below */
void __iomem *cfg_addr;
- struct list_head resources_for_children;
-
struct list_head children;
struct list_head dr_list;
@@ -593,9 +620,6 @@ static void put_pcichild(struct hv_pci_dev *hpdev)
kfree(hpdev);
}
-static void get_hvpcibus(struct hv_pcibus_device *hv_pcibus);
-static void put_hvpcibus(struct hv_pcibus_device *hv_pcibus);
-
/*
* There is no good way to get notified from vmbus_onoffer_rescind(),
* so let's use polling here, since this is not a hot path.
@@ -1334,6 +1358,15 @@ static u32 hv_compose_msi_req_v1(
return sizeof(*int_pkt);
}
+/*
+ * Create MSI w/ dummy vCPU set targeting just one vCPU, overwritten
+ * by subsequent retarget in hv_irq_unmask().
+ */
+static int hv_compose_msi_req_get_cpu(struct cpumask *affinity)
+{
+ return cpumask_first_and(affinity, cpu_online_mask);
+}
+
static u32 hv_compose_msi_req_v2(
struct pci_create_interrupt2 *int_pkt, struct cpumask *affinity,
u32 slot, u8 vector)
@@ -1345,12 +1378,27 @@ static u32 hv_compose_msi_req_v2(
int_pkt->int_desc.vector = vector;
int_pkt->int_desc.vector_count = 1;
int_pkt->int_desc.delivery_mode = APIC_DELIVERY_MODE_FIXED;
+ cpu = hv_compose_msi_req_get_cpu(affinity);
+ int_pkt->int_desc.processor_array[0] =
+ hv_cpu_number_to_vp_number(cpu);
+ int_pkt->int_desc.processor_count = 1;
- /*
- * Create MSI w/ dummy vCPU set targeting just one vCPU, overwritten
- * by subsequent retarget in hv_irq_unmask().
- */
- cpu = cpumask_first_and(affinity, cpu_online_mask);
+ return sizeof(*int_pkt);
+}
+
+static u32 hv_compose_msi_req_v3(
+ struct pci_create_interrupt3 *int_pkt, struct cpumask *affinity,
+ u32 slot, u32 vector)
+{
+ int cpu;
+
+ int_pkt->message_type.type = PCI_CREATE_INTERRUPT_MESSAGE3;
+ int_pkt->wslot.slot = slot;
+ int_pkt->int_desc.vector = vector;
+ int_pkt->int_desc.reserved = 0;
+ int_pkt->int_desc.vector_count = 1;
+ int_pkt->int_desc.delivery_mode = APIC_DELIVERY_MODE_FIXED;
+ cpu = hv_compose_msi_req_get_cpu(affinity);
int_pkt->int_desc.processor_array[0] =
hv_cpu_number_to_vp_number(cpu);
int_pkt->int_desc.processor_count = 1;
@@ -1385,6 +1433,7 @@ static void hv_compose_msi_msg(struct irq_data *data, struct msi_msg *msg)
union {
struct pci_create_interrupt v1;
struct pci_create_interrupt2 v2;
+ struct pci_create_interrupt3 v3;
} int_pkts;
} __packed ctxt;
@@ -1432,6 +1481,13 @@ static void hv_compose_msi_msg(struct irq_data *data, struct msi_msg *msg)
cfg->vector);
break;
+ case PCI_PROTOCOL_VERSION_1_4:
+ size = hv_compose_msi_req_v3(&ctxt.int_pkts.v3,
+ dest,
+ hpdev->desc.win_slot.slot,
+ cfg->vector);
+ break;
+
default:
/* As we only negotiate protocol versions known to this driver,
* this path should never hit. However, this is it not a hot
@@ -1572,7 +1628,7 @@ static int hv_pcie_init_irq_domain(struct hv_pcibus_device *hbus)
hbus->msi_info.handler = handle_edge_irq;
hbus->msi_info.handler_name = "edge";
hbus->msi_info.data = hbus;
- hbus->irq_domain = pci_msi_create_irq_domain(hbus->sysdata.fwnode,
+ hbus->irq_domain = pci_msi_create_irq_domain(hbus->fwnode,
&hbus->msi_info,
x86_vector_domain);
if (!hbus->irq_domain) {
@@ -1581,6 +1637,8 @@ static int hv_pcie_init_irq_domain(struct hv_pcibus_device *hbus)
return -ENODEV;
}
+ dev_set_msi_domain(&hbus->bridge->dev, hbus->irq_domain);
+
return 0;
}
@@ -1803,7 +1861,7 @@ static void hv_pci_assign_slots(struct hv_pcibus_device *hbus)
slot_nr = PCI_SLOT(wslot_to_devfn(hpdev->desc.win_slot.slot));
snprintf(name, SLOT_NAME_SIZE, "%u", hpdev->desc.ser);
- hpdev->pci_slot = pci_create_slot(hbus->pci_bus, slot_nr,
+ hpdev->pci_slot = pci_create_slot(hbus->bridge->bus, slot_nr,
name, NULL);
if (IS_ERR(hpdev->pci_slot)) {
pr_warn("pci_create slot %s failed\n", name);
@@ -1833,7 +1891,7 @@ static void hv_pci_remove_slots(struct hv_pcibus_device *hbus)
static void hv_pci_assign_numa_node(struct hv_pcibus_device *hbus)
{
struct pci_dev *dev;
- struct pci_bus *bus = hbus->pci_bus;
+ struct pci_bus *bus = hbus->bridge->bus;
struct hv_pci_dev *hv_dev;
list_for_each_entry(dev, &bus->devices, bus_list) {
@@ -1856,21 +1914,22 @@ static void hv_pci_assign_numa_node(struct hv_pcibus_device *hbus)
*/
static int create_root_hv_pci_bus(struct hv_pcibus_device *hbus)
{
- /* Register the device */
- hbus->pci_bus = pci_create_root_bus(&hbus->hdev->device,
- 0, /* bus number is always zero */
- &hv_pcifront_ops,
- &hbus->sysdata,
- &hbus->resources_for_children);
- if (!hbus->pci_bus)
- return -ENODEV;
+ int error;
+ struct pci_host_bridge *bridge = hbus->bridge;
+
+ bridge->dev.parent = &hbus->hdev->device;
+ bridge->sysdata = &hbus->sysdata;
+ bridge->ops = &hv_pcifront_ops;
+
+ error = pci_scan_root_bus_bridge(bridge);
+ if (error)
+ return error;
pci_lock_rescan_remove();
- pci_scan_child_bus(hbus->pci_bus);
hv_pci_assign_numa_node(hbus);
- pci_bus_assign_resources(hbus->pci_bus);
+ pci_bus_assign_resources(bridge->bus);
hv_pci_assign_slots(hbus);
- pci_bus_add_devices(hbus->pci_bus);
+ pci_bus_add_devices(bridge->bus);
pci_unlock_rescan_remove();
hbus->state = hv_pcibus_installed;
return 0;
@@ -2064,10 +2123,8 @@ static void pci_devices_present_work(struct work_struct *work)
}
spin_unlock_irqrestore(&hbus->device_list_lock, flags);
- if (!dr) {
- put_hvpcibus(hbus);
+ if (!dr)
return;
- }
/* First, mark all existing children as reported missing. */
spin_lock_irqsave(&hbus->device_list_lock, flags);
@@ -2135,7 +2192,7 @@ static void pci_devices_present_work(struct work_struct *work)
* because there may have been changes.
*/
pci_lock_rescan_remove();
- pci_scan_child_bus(hbus->pci_bus);
+ pci_scan_child_bus(hbus->bridge->bus);
hv_pci_assign_numa_node(hbus);
hv_pci_assign_slots(hbus);
pci_unlock_rescan_remove();
@@ -2150,7 +2207,6 @@ static void pci_devices_present_work(struct work_struct *work)
break;
}
- put_hvpcibus(hbus);
kfree(dr);
}
@@ -2191,12 +2247,10 @@ static int hv_pci_start_relations_work(struct hv_pcibus_device *hbus,
list_add_tail(&dr->list_entry, &hbus->dr_list);
spin_unlock_irqrestore(&hbus->device_list_lock, flags);
- if (pending_dr) {
+ if (pending_dr)
kfree(dr_wrk);
- } else {
- get_hvpcibus(hbus);
+ else
queue_work(hbus->wq, &dr_wrk->wrk);
- }
return 0;
}
@@ -2306,11 +2360,11 @@ static void hv_eject_device_work(struct work_struct *work)
/*
* Ejection can come before or after the PCI bus has been set up, so
* attempt to find it and tear down the bus state, if it exists. This
- * must be done without constructs like pci_domain_nr(hbus->pci_bus)
- * because hbus->pci_bus may not exist yet.
+ * must be done without constructs like pci_domain_nr(hbus->bridge->bus)
+ * because hbus->bridge->bus may not exist yet.
*/
wslot = wslot_to_devfn(hpdev->desc.win_slot.slot);
- pdev = pci_get_domain_bus_and_slot(hbus->sysdata.domain, 0, wslot);
+ pdev = pci_get_domain_bus_and_slot(hbus->bridge->domain_nr, 0, wslot);
if (pdev) {
pci_lock_rescan_remove();
pci_stop_and_remove_bus_device(pdev);
@@ -2339,8 +2393,6 @@ static void hv_eject_device_work(struct work_struct *work)
put_pcichild(hpdev);
put_pcichild(hpdev);
/* hpdev has been freed. Do not use it any more. */
-
- put_hvpcibus(hbus);
}
/**
@@ -2364,7 +2416,6 @@ static void hv_pci_eject_device(struct hv_pci_dev *hpdev)
hpdev->state = hv_pcichild_ejecting;
get_pcichild(hpdev);
INIT_WORK(&hpdev->wrk, hv_eject_device_work);
- get_hvpcibus(hbus);
queue_work(hbus->wq, &hpdev->wrk);
}
@@ -2676,8 +2727,7 @@ static int hv_pci_allocate_bridge_windows(struct hv_pcibus_device *hbus)
/* Modify this resource to become a bridge window. */
hbus->low_mmio_res->flags |= IORESOURCE_WINDOW;
hbus->low_mmio_res->flags &= ~IORESOURCE_BUSY;
- pci_add_resource(&hbus->resources_for_children,
- hbus->low_mmio_res);
+ pci_add_resource(&hbus->bridge->windows, hbus->low_mmio_res);
}
if (hbus->high_mmio_space) {
@@ -2696,8 +2746,7 @@ static int hv_pci_allocate_bridge_windows(struct hv_pcibus_device *hbus)
/* Modify this resource to become a bridge window. */
hbus->high_mmio_res->flags |= IORESOURCE_WINDOW;
hbus->high_mmio_res->flags &= ~IORESOURCE_BUSY;
- pci_add_resource(&hbus->resources_for_children,
- hbus->high_mmio_res);
+ pci_add_resource(&hbus->bridge->windows, hbus->high_mmio_res);
}
return 0;
@@ -2964,17 +3013,6 @@ static int hv_send_resources_released(struct hv_device *hdev)
return 0;
}
-static void get_hvpcibus(struct hv_pcibus_device *hbus)
-{
- refcount_inc(&hbus->remove_lock);
-}
-
-static void put_hvpcibus(struct hv_pcibus_device *hbus)
-{
- if (refcount_dec_and_test(&hbus->remove_lock))
- complete(&hbus->remove_event);
-}
-
#define HVPCI_DOM_MAP_SIZE (64 * 1024)
static DECLARE_BITMAP(hvpci_dom_map, HVPCI_DOM_MAP_SIZE);
@@ -3027,6 +3065,7 @@ static void hv_put_dom_num(u16 dom)
static int hv_pci_probe(struct hv_device *hdev,
const struct hv_vmbus_device_id *dev_id)
{
+ struct pci_host_bridge *bridge;
struct hv_pcibus_device *hbus;
u16 dom_req, dom;
char *name;
@@ -3039,6 +3078,10 @@ static int hv_pci_probe(struct hv_device *hdev,
*/
BUILD_BUG_ON(sizeof(*hbus) > HV_HYP_PAGE_SIZE);
+ bridge = devm_pci_alloc_host_bridge(&hdev->device, 0);
+ if (!bridge)
+ return -ENOMEM;
+
/*
* With the recent 59bb47985c1d ("mm, sl[aou]b: guarantee natural
* alignment for kmalloc(power-of-two)"), kzalloc() is able to allocate
@@ -3060,6 +3103,8 @@ static int hv_pci_probe(struct hv_device *hdev,
hbus = kzalloc(HV_HYP_PAGE_SIZE, GFP_KERNEL);
if (!hbus)
return -ENOMEM;
+
+ hbus->bridge = bridge;
hbus->state = hv_pcibus_init;
hbus->wslot_res_allocated = -1;
@@ -3091,19 +3136,19 @@ static int hv_pci_probe(struct hv_device *hdev,
"PCI dom# 0x%hx has collision, using 0x%hx",
dom_req, dom);
+ hbus->bridge->domain_nr = dom;
+#ifdef CONFIG_X86
hbus->sysdata.domain = dom;
+#endif
hbus->hdev = hdev;
- refcount_set(&hbus->remove_lock, 1);
INIT_LIST_HEAD(&hbus->children);
INIT_LIST_HEAD(&hbus->dr_list);
- INIT_LIST_HEAD(&hbus->resources_for_children);
spin_lock_init(&hbus->config_lock);
spin_lock_init(&hbus->device_list_lock);
spin_lock_init(&hbus->retarget_msi_interrupt_lock);
- init_completion(&hbus->remove_event);
hbus->wq = alloc_ordered_workqueue("hv_pci_%x", 0,
- hbus->sysdata.domain);
+ hbus->bridge->domain_nr);
if (!hbus->wq) {
ret = -ENOMEM;
goto free_dom;
@@ -3140,9 +3185,9 @@ static int hv_pci_probe(struct hv_device *hdev,
goto unmap;
}
- hbus->sysdata.fwnode = irq_domain_alloc_named_fwnode(name);
+ hbus->fwnode = irq_domain_alloc_named_fwnode(name);
kfree(name);
- if (!hbus->sysdata.fwnode) {
+ if (!hbus->fwnode) {
ret = -ENOMEM;
goto unmap;
}
@@ -3220,7 +3265,7 @@ exit_d0:
free_irq_domain:
irq_domain_remove(hbus->irq_domain);
free_fwnode:
- irq_domain_free_fwnode(hbus->sysdata.fwnode);
+ irq_domain_free_fwnode(hbus->fwnode);
unmap:
iounmap(hbus->cfg_addr);
free_config:
@@ -3230,7 +3275,7 @@ close:
destroy_wq:
destroy_workqueue(hbus->wq);
free_dom:
- hv_put_dom_num(hbus->sysdata.domain);
+ hv_put_dom_num(hbus->bridge->domain_nr);
free_bus:
kfree(hbus);
return ret;
@@ -3243,8 +3288,9 @@ static int hv_pci_bus_exit(struct hv_device *hdev, bool keep_devs)
struct pci_packet teardown_packet;
u8 buffer[sizeof(struct pci_message)];
} pkt;
- struct hv_dr_state *dr;
struct hv_pci_compl comp_pkt;
+ struct hv_pci_dev *hpdev, *tmp;
+ unsigned long flags;
int ret;
/*
@@ -3256,9 +3302,16 @@ static int hv_pci_bus_exit(struct hv_device *hdev, bool keep_devs)
if (!keep_devs) {
/* Delete any children which might still exist. */
- dr = kzalloc(sizeof(*dr), GFP_KERNEL);
- if (dr && hv_pci_start_relations_work(hbus, dr))
- kfree(dr);
+ spin_lock_irqsave(&hbus->device_list_lock, flags);
+ list_for_each_entry_safe(hpdev, tmp, &hbus->children, list_entry) {
+ list_del(&hpdev->list_entry);
+ if (hpdev->pci_slot)
+ pci_destroy_slot(hpdev->pci_slot);
+ /* For the two refs got in new_pcichild_device() */
+ put_pcichild(hpdev);
+ put_pcichild(hpdev);
+ }
+ spin_unlock_irqrestore(&hbus->device_list_lock, flags);
}
ret = hv_send_resources_released(hdev);
@@ -3301,13 +3354,23 @@ static int hv_pci_remove(struct hv_device *hdev)
hbus = hv_get_drvdata(hdev);
if (hbus->state == hv_pcibus_installed) {
+ tasklet_disable(&hdev->channel->callback_event);
+ hbus->state = hv_pcibus_removing;
+ tasklet_enable(&hdev->channel->callback_event);
+ destroy_workqueue(hbus->wq);
+ hbus->wq = NULL;
+ /*
+ * At this point, no work is running or can be scheduled
+ * on hbus-wq. We can't race with hv_pci_devices_present()
+ * or hv_pci_eject_device(), it's safe to proceed.
+ */
+
/* Remove the bus from PCI's point of view. */
pci_lock_rescan_remove();
- pci_stop_root_bus(hbus->pci_bus);
+ pci_stop_root_bus(hbus->bridge->bus);
hv_pci_remove_slots(hbus);
- pci_remove_root_bus(hbus->pci_bus);
+ pci_remove_root_bus(hbus->bridge->bus);
pci_unlock_rescan_remove();
- hbus->state = hv_pcibus_removed;
}
ret = hv_pci_bus_exit(hdev, false);
@@ -3316,15 +3379,11 @@ static int hv_pci_remove(struct hv_device *hdev)
iounmap(hbus->cfg_addr);
hv_free_config_window(hbus);
- pci_free_resource_list(&hbus->resources_for_children);
hv_pci_free_bridge_windows(hbus);
irq_domain_remove(hbus->irq_domain);
- irq_domain_free_fwnode(hbus->sysdata.fwnode);
- put_hvpcibus(hbus);
- wait_for_completion(&hbus->remove_event);
- destroy_workqueue(hbus->wq);
+ irq_domain_free_fwnode(hbus->fwnode);
- hv_put_dom_num(hbus->sysdata.domain);
+ hv_put_dom_num(hbus->bridge->domain_nr);
kfree(hbus);
return ret;
@@ -3402,7 +3461,7 @@ static int hv_pci_restore_msi_msg(struct pci_dev *pdev, void *arg)
*/
static void hv_pci_restore_msi_state(struct hv_pcibus_device *hbus)
{
- pci_walk_bus(hbus->pci_bus, hv_pci_restore_msi_msg, NULL);
+ pci_walk_bus(hbus->bridge->bus, hv_pci_restore_msi_msg, NULL);
}
static int hv_pci_resume(struct hv_device *hdev)
@@ -3476,6 +3535,9 @@ static void __exit exit_hv_pci_drv(void)
static int __init init_hv_pci_drv(void)
{
+ if (!hv_is_hyperv_initialized())
+ return -ENODEV;
+
/* Set the invalid domain number's bit, so it will not be used */
set_bit(HVPCI_DOM_INVALID, hvpci_dom_map);