From 88801d043b1d16caae76a5e2e5991e8b1f55ce7f Mon Sep 17 00:00:00 2001 From: Jiqian Chen Date: Tue, 24 Sep 2024 14:14:35 +0800 Subject: xen/pci: Add a function to reset device for xen When device on dom0 side has been reset, the vpci on Xen side won't get notification, so that the cached state in vpci is all out of date with the real device state. To solve that problem, add a new function to clear all vpci device state when device is reset on dom0 side. And call that function in pcistub_init_device. Because when using "pci-assignable-add" to assign a passthrough device in Xen, it will reset passthrough device and the vpci state will out of date, and then device will fail to restore bar state. Signed-off-by: Jiqian Chen Signed-off-by: Huang Rui Signed-off-by: Jiqian Chen Reviewed-by: Stefano Stabellini Message-ID: <20240924061437.2636766-2-Jiqian.Chen@amd.com> Signed-off-by: Juergen Gross --- drivers/xen/pci.c | 13 +++++++++++++ drivers/xen/xen-pciback/pci_stub.c | 18 +++++++++++++++--- include/xen/interface/physdev.h | 17 +++++++++++++++++ include/xen/pci.h | 6 ++++++ 4 files changed, 51 insertions(+), 3 deletions(-) diff --git a/drivers/xen/pci.c b/drivers/xen/pci.c index a2facd8f7e51..416f231809cb 100644 --- a/drivers/xen/pci.c +++ b/drivers/xen/pci.c @@ -173,6 +173,19 @@ static int xen_remove_device(struct device *dev) return r; } +int xen_reset_device(const struct pci_dev *dev) +{ + struct pci_device_reset device = { + .dev.seg = pci_domain_nr(dev->bus), + .dev.bus = dev->bus->number, + .dev.devfn = dev->devfn, + .flags = PCI_DEVICE_RESET_FLR, + }; + + return HYPERVISOR_physdev_op(PHYSDEVOP_pci_device_reset, &device); +} +EXPORT_SYMBOL_GPL(xen_reset_device); + static int xen_pci_notifier(struct notifier_block *nb, unsigned long action, void *data) { diff --git a/drivers/xen/xen-pciback/pci_stub.c b/drivers/xen/xen-pciback/pci_stub.c index 4faebbb84999..3e162c1753e2 100644 --- a/drivers/xen/xen-pciback/pci_stub.c +++ b/drivers/xen/xen-pciback/pci_stub.c @@ -89,6 +89,16 @@ static struct pcistub_device *pcistub_device_alloc(struct pci_dev *dev) return psdev; } +static int pcistub_reset_device_state(struct pci_dev *dev) +{ + __pci_reset_function_locked(dev); + + if (!xen_pv_domain()) + return xen_reset_device(dev); + else + return 0; +} + /* Don't call this directly as it's called by pcistub_device_put */ static void pcistub_device_release(struct kref *kref) { @@ -107,7 +117,7 @@ static void pcistub_device_release(struct kref *kref) /* Call the reset function which does not take lock as this * is called from "unbind" which takes a device_lock mutex. */ - __pci_reset_function_locked(dev); + pcistub_reset_device_state(dev); if (dev_data && pci_load_and_free_saved_state(dev, &dev_data->pci_saved_state)) dev_info(&dev->dev, "Could not reload PCI state\n"); @@ -284,7 +294,7 @@ void pcistub_put_pci_dev(struct pci_dev *dev) * (so it's ready for the next domain) */ device_lock_assert(&dev->dev); - __pci_reset_function_locked(dev); + pcistub_reset_device_state(dev); dev_data = pci_get_drvdata(dev); ret = pci_load_saved_state(dev, dev_data->pci_saved_state); @@ -420,7 +430,9 @@ static int pcistub_init_device(struct pci_dev *dev) dev_err(&dev->dev, "Could not store PCI conf saved state!\n"); else { dev_dbg(&dev->dev, "resetting (FLR, D3, etc) the device\n"); - __pci_reset_function_locked(dev); + err = pcistub_reset_device_state(dev); + if (err) + goto config_release; pci_restore_state(dev); } /* Now disable the device (this also ensures some private device diff --git a/include/xen/interface/physdev.h b/include/xen/interface/physdev.h index a237af867873..df74e65a884b 100644 --- a/include/xen/interface/physdev.h +++ b/include/xen/interface/physdev.h @@ -256,6 +256,13 @@ struct physdev_pci_device_add { */ #define PHYSDEVOP_prepare_msix 30 #define PHYSDEVOP_release_msix 31 +/* + * Notify the hypervisor that a PCI device has been reset, so that any + * internally cached state is regenerated. Should be called after any + * device reset performed by the hardware domain. + */ +#define PHYSDEVOP_pci_device_reset 32 + struct physdev_pci_device { /* IN */ uint16_t seg; @@ -263,6 +270,16 @@ struct physdev_pci_device { uint8_t devfn; }; +struct pci_device_reset { + struct physdev_pci_device dev; +#define PCI_DEVICE_RESET_COLD 0x0 +#define PCI_DEVICE_RESET_WARM 0x1 +#define PCI_DEVICE_RESET_HOT 0x2 +#define PCI_DEVICE_RESET_FLR 0x3 +#define PCI_DEVICE_RESET_MASK 0x3 + uint32_t flags; +}; + #define PHYSDEVOP_DBGP_RESET_PREPARE 1 #define PHYSDEVOP_DBGP_RESET_DONE 2 diff --git a/include/xen/pci.h b/include/xen/pci.h index b8337cf85fd1..424b8ea89ca8 100644 --- a/include/xen/pci.h +++ b/include/xen/pci.h @@ -4,10 +4,16 @@ #define __XEN_PCI_H__ #if defined(CONFIG_XEN_DOM0) +int xen_reset_device(const struct pci_dev *dev); int xen_find_device_domain_owner(struct pci_dev *dev); int xen_register_device_domain_owner(struct pci_dev *dev, uint16_t domain); int xen_unregister_device_domain_owner(struct pci_dev *dev); #else +static inline int xen_reset_device(const struct pci_dev *dev) +{ + return -1; +} + static inline int xen_find_device_domain_owner(struct pci_dev *dev) { return -1; -- cgit From b166b8ab4189743a717cb93f50d6fcca3a46770d Mon Sep 17 00:00:00 2001 From: Jiqian Chen Date: Tue, 24 Sep 2024 14:14:36 +0800 Subject: xen/pvh: Setup gsi for passthrough device In PVH dom0, the gsis don't get registered, but the gsi of a passthrough device must be configured for it to be able to be mapped into a domU. When assigning a device to passthrough, proactively setup the gsi of the device during that process. Signed-off-by: Jiqian Chen Signed-off-by: Huang Rui Signed-off-by: Jiqian Chen Reviewed-by: Stefano Stabellini Message-ID: <20240924061437.2636766-3-Jiqian.Chen@amd.com> Signed-off-by: Juergen Gross --- arch/x86/xen/enlighten_pvh.c | 23 ++++++++++++++++++ drivers/acpi/pci_irq.c | 2 +- drivers/xen/acpi.c | 50 ++++++++++++++++++++++++++++++++++++++ drivers/xen/xen-pciback/pci_stub.c | 20 +++++++++++++++ include/linux/acpi.h | 1 + include/xen/acpi.h | 18 ++++++++++++++ 6 files changed, 113 insertions(+), 1 deletion(-) diff --git a/arch/x86/xen/enlighten_pvh.c b/arch/x86/xen/enlighten_pvh.c index 728a4366ca85..bf68c329fc01 100644 --- a/arch/x86/xen/enlighten_pvh.c +++ b/arch/x86/xen/enlighten_pvh.c @@ -4,6 +4,7 @@ #include #include +#include #include #include @@ -28,6 +29,28 @@ bool __ro_after_init xen_pvh; EXPORT_SYMBOL_GPL(xen_pvh); +#ifdef CONFIG_XEN_DOM0 +int xen_pvh_setup_gsi(int gsi, int trigger, int polarity) +{ + int ret; + struct physdev_setup_gsi setup_gsi; + + setup_gsi.gsi = gsi; + setup_gsi.triggering = (trigger == ACPI_EDGE_SENSITIVE ? 0 : 1); + setup_gsi.polarity = (polarity == ACPI_ACTIVE_HIGH ? 0 : 1); + + ret = HYPERVISOR_physdev_op(PHYSDEVOP_setup_gsi, &setup_gsi); + if (ret == -EEXIST) { + xen_raw_printk("Already setup the GSI :%d\n", gsi); + ret = 0; + } else if (ret) + xen_raw_printk("Fail to setup GSI (%d)!\n", gsi); + + return ret; +} +EXPORT_SYMBOL_GPL(xen_pvh_setup_gsi); +#endif + /* * Reserve e820 UNUSABLE regions to inflate the memory balloon. * diff --git a/drivers/acpi/pci_irq.c b/drivers/acpi/pci_irq.c index ff30ceca2203..630fe0a34bc6 100644 --- a/drivers/acpi/pci_irq.c +++ b/drivers/acpi/pci_irq.c @@ -288,7 +288,7 @@ static int acpi_reroute_boot_interrupt(struct pci_dev *dev, } #endif /* CONFIG_X86_IO_APIC */ -static struct acpi_prt_entry *acpi_pci_irq_lookup(struct pci_dev *dev, int pin) +struct acpi_prt_entry *acpi_pci_irq_lookup(struct pci_dev *dev, int pin) { struct acpi_prt_entry *entry = NULL; struct pci_dev *bridge; diff --git a/drivers/xen/acpi.c b/drivers/xen/acpi.c index 6893c79fd2a1..9e2096524fbc 100644 --- a/drivers/xen/acpi.c +++ b/drivers/xen/acpi.c @@ -30,6 +30,7 @@ * IN THE SOFTWARE. */ +#include #include #include #include @@ -75,3 +76,52 @@ int xen_acpi_notify_hypervisor_extended_sleep(u8 sleep_state, return xen_acpi_notify_hypervisor_state(sleep_state, val_a, val_b, true); } + +struct acpi_prt_entry { + struct acpi_pci_id id; + u8 pin; + acpi_handle link; + u32 index; +}; + +int xen_acpi_get_gsi_info(struct pci_dev *dev, + int *gsi_out, + int *trigger_out, + int *polarity_out) +{ + int gsi; + u8 pin; + struct acpi_prt_entry *entry; + int trigger = ACPI_LEVEL_SENSITIVE; + int polarity = acpi_irq_model == ACPI_IRQ_MODEL_GIC ? + ACPI_ACTIVE_HIGH : ACPI_ACTIVE_LOW; + + if (!dev || !gsi_out || !trigger_out || !polarity_out) + return -EINVAL; + + pin = dev->pin; + if (!pin) + return -EINVAL; + + entry = acpi_pci_irq_lookup(dev, pin); + if (entry) { + if (entry->link) + gsi = acpi_pci_link_allocate_irq(entry->link, + entry->index, + &trigger, &polarity, + NULL); + else + gsi = entry->index; + } else + gsi = -1; + + if (gsi < 0) + return -EINVAL; + + *gsi_out = gsi; + *trigger_out = trigger; + *polarity_out = polarity; + + return 0; +} +EXPORT_SYMBOL_GPL(xen_acpi_get_gsi_info); diff --git a/drivers/xen/xen-pciback/pci_stub.c b/drivers/xen/xen-pciback/pci_stub.c index 3e162c1753e2..8ce27333f54b 100644 --- a/drivers/xen/xen-pciback/pci_stub.c +++ b/drivers/xen/xen-pciback/pci_stub.c @@ -21,6 +21,9 @@ #include #include #include +#ifdef CONFIG_XEN_ACPI +#include +#endif #include #include #include "pciback.h" @@ -367,6 +370,9 @@ static int pcistub_match(struct pci_dev *dev) static int pcistub_init_device(struct pci_dev *dev) { struct xen_pcibk_dev_data *dev_data; +#ifdef CONFIG_XEN_ACPI + int gsi, trigger, polarity; +#endif int err = 0; dev_dbg(&dev->dev, "initializing...\n"); @@ -435,6 +441,20 @@ static int pcistub_init_device(struct pci_dev *dev) goto config_release; pci_restore_state(dev); } + +#ifdef CONFIG_XEN_ACPI + if (xen_initial_domain() && xen_pvh_domain()) { + err = xen_acpi_get_gsi_info(dev, &gsi, &trigger, &polarity); + if (err) { + dev_err(&dev->dev, "Fail to get gsi info!\n"); + goto config_release; + } + err = xen_pvh_setup_gsi(gsi, trigger, polarity); + if (err) + goto config_release; + } +#endif + /* Now disable the device (this also ensures some private device * data is setup before we export) */ diff --git a/include/linux/acpi.h b/include/linux/acpi.h index 0687a442fec7..02ded9f53a6b 100644 --- a/include/linux/acpi.h +++ b/include/linux/acpi.h @@ -362,6 +362,7 @@ void acpi_unregister_gsi (u32 gsi); struct pci_dev; +struct acpi_prt_entry *acpi_pci_irq_lookup(struct pci_dev *dev, int pin); int acpi_pci_irq_enable (struct pci_dev *dev); void acpi_penalize_isa_irq(int irq, int active); bool acpi_isa_irq_available(int irq); diff --git a/include/xen/acpi.h b/include/xen/acpi.h index b1e11863144d..3bcfe82d9078 100644 --- a/include/xen/acpi.h +++ b/include/xen/acpi.h @@ -67,10 +67,28 @@ static inline void xen_acpi_sleep_register(void) acpi_suspend_lowlevel = xen_acpi_suspend_lowlevel; } } +int xen_pvh_setup_gsi(int gsi, int trigger, int polarity); +int xen_acpi_get_gsi_info(struct pci_dev *dev, + int *gsi_out, + int *trigger_out, + int *polarity_out); #else static inline void xen_acpi_sleep_register(void) { } + +static inline int xen_pvh_setup_gsi(int gsi, int trigger, int polarity) +{ + return -1; +} + +static inline int xen_acpi_get_gsi_info(struct pci_dev *dev, + int *gsi_out, + int *trigger_out, + int *polarity_out) +{ + return -1; +} #endif #endif /* _XEN_ACPI_H */ -- cgit From 2fae6bb7be320270801b3c3b040189bd7daa8056 Mon Sep 17 00:00:00 2001 From: Jiqian Chen Date: Tue, 24 Sep 2024 14:14:37 +0800 Subject: xen/privcmd: Add new syscall to get gsi from dev On PVH dom0, when passthrough a device to domU, QEMU and xl tools want to use gsi number to do pirq mapping, see QEMU code xen_pt_realize->xc_physdev_map_pirq, and xl code pci_add_dm_done->xc_physdev_map_pirq, but in current codes, the gsi number is got from file /sys/bus/pci/devices//irq, that is wrong, because irq is not equal with gsi, they are in different spaces, so pirq mapping fails. And in current linux codes, there is no method to get gsi for userspace. For above purpose, record gsi of pcistub devices when init pcistub and add a new syscall into privcmd to let userspace can get gsi when they have a need. Signed-off-by: Jiqian Chen Signed-off-by: Huang Rui Signed-off-by: Jiqian Chen Reviewed-by: Stefano Stabellini Message-ID: <20240924061437.2636766-4-Jiqian.Chen@amd.com> Signed-off-by: Juergen Gross --- drivers/xen/Kconfig | 1 + drivers/xen/privcmd.c | 32 ++++++++++++++++++++++++++++++++ drivers/xen/xen-pciback/pci_stub.c | 38 +++++++++++++++++++++++++++++++++++--- include/uapi/xen/privcmd.h | 7 +++++++ include/xen/acpi.h | 9 +++++++++ 5 files changed, 84 insertions(+), 3 deletions(-) diff --git a/drivers/xen/Kconfig b/drivers/xen/Kconfig index d5989871dd5d..fd83d51df2f4 100644 --- a/drivers/xen/Kconfig +++ b/drivers/xen/Kconfig @@ -261,6 +261,7 @@ config XEN_SCSI_BACKEND config XEN_PRIVCMD tristate "Xen hypercall passthrough driver" depends on XEN + imply CONFIG_XEN_PCIDEV_BACKEND default m help The hypercall passthrough driver allows privileged user programs to diff --git a/drivers/xen/privcmd.c b/drivers/xen/privcmd.c index 9563650dfbaf..588f99a2b8df 100644 --- a/drivers/xen/privcmd.c +++ b/drivers/xen/privcmd.c @@ -46,6 +46,9 @@ #include #include #include +#ifdef CONFIG_XEN_ACPI +#include +#endif #include "privcmd.h" @@ -844,6 +847,31 @@ out: return rc; } +static long privcmd_ioctl_pcidev_get_gsi(struct file *file, void __user *udata) +{ +#if defined(CONFIG_XEN_ACPI) + int rc = -EINVAL; + struct privcmd_pcidev_get_gsi kdata; + + if (copy_from_user(&kdata, udata, sizeof(kdata))) + return -EFAULT; + + if (IS_REACHABLE(CONFIG_XEN_PCIDEV_BACKEND)) + rc = pcistub_get_gsi_from_sbdf(kdata.sbdf); + + if (rc < 0) + return rc; + + kdata.gsi = rc; + if (copy_to_user(udata, &kdata, sizeof(kdata))) + return -EFAULT; + + return 0; +#else + return -EINVAL; +#endif +} + #ifdef CONFIG_XEN_PRIVCMD_EVENTFD /* Irqfd support */ static struct workqueue_struct *irqfd_cleanup_wq; @@ -1543,6 +1571,10 @@ static long privcmd_ioctl(struct file *file, ret = privcmd_ioctl_ioeventfd(file, udata); break; + case IOCTL_PRIVCMD_PCIDEV_GET_GSI: + ret = privcmd_ioctl_pcidev_get_gsi(file, udata); + break; + default: break; } diff --git a/drivers/xen/xen-pciback/pci_stub.c b/drivers/xen/xen-pciback/pci_stub.c index 8ce27333f54b..d003402ce66b 100644 --- a/drivers/xen/xen-pciback/pci_stub.c +++ b/drivers/xen/xen-pciback/pci_stub.c @@ -56,6 +56,9 @@ struct pcistub_device { struct pci_dev *dev; struct xen_pcibk_device *pdev;/* non-NULL if struct pci_dev is in use */ +#ifdef CONFIG_XEN_ACPI + int gsi; +#endif }; /* Access to pcistub_devices & seized_devices lists and the initialize_devices @@ -88,6 +91,9 @@ static struct pcistub_device *pcistub_device_alloc(struct pci_dev *dev) kref_init(&psdev->kref); spin_lock_init(&psdev->lock); +#ifdef CONFIG_XEN_ACPI + psdev->gsi = -1; +#endif return psdev; } @@ -220,6 +226,25 @@ static struct pci_dev *pcistub_device_get_pci_dev(struct xen_pcibk_device *pdev, return pci_dev; } +#ifdef CONFIG_XEN_ACPI +int pcistub_get_gsi_from_sbdf(unsigned int sbdf) +{ + struct pcistub_device *psdev; + int domain = (sbdf >> 16) & 0xffff; + int bus = PCI_BUS_NUM(sbdf); + int slot = PCI_SLOT(sbdf); + int func = PCI_FUNC(sbdf); + + psdev = pcistub_device_find(domain, bus, slot, func); + + if (!psdev) + return -ENODEV; + + return psdev->gsi; +} +EXPORT_SYMBOL_GPL(pcistub_get_gsi_from_sbdf); +#endif + struct pci_dev *pcistub_get_pci_dev_by_slot(struct xen_pcibk_device *pdev, int domain, int bus, int slot, int func) @@ -367,14 +392,20 @@ static int pcistub_match(struct pci_dev *dev) return found; } -static int pcistub_init_device(struct pci_dev *dev) +static int pcistub_init_device(struct pcistub_device *psdev) { struct xen_pcibk_dev_data *dev_data; + struct pci_dev *dev; #ifdef CONFIG_XEN_ACPI int gsi, trigger, polarity; #endif int err = 0; + if (!psdev) + return -EINVAL; + + dev = psdev->dev; + dev_dbg(&dev->dev, "initializing...\n"); /* The PCI backend is not intended to be a module (or to work with @@ -452,6 +483,7 @@ static int pcistub_init_device(struct pci_dev *dev) err = xen_pvh_setup_gsi(gsi, trigger, polarity); if (err) goto config_release; + psdev->gsi = gsi; } #endif @@ -494,7 +526,7 @@ static int __init pcistub_init_devices_late(void) spin_unlock_irqrestore(&pcistub_devices_lock, flags); - err = pcistub_init_device(psdev->dev); + err = pcistub_init_device(psdev); if (err) { dev_err(&psdev->dev->dev, "error %d initializing device\n", err); @@ -564,7 +596,7 @@ static int pcistub_seize(struct pci_dev *dev, spin_unlock_irqrestore(&pcistub_devices_lock, flags); /* don't want irqs disabled when calling pcistub_init_device */ - err = pcistub_init_device(psdev->dev); + err = pcistub_init_device(psdev); spin_lock_irqsave(&pcistub_devices_lock, flags); diff --git a/include/uapi/xen/privcmd.h b/include/uapi/xen/privcmd.h index 8b8c5d1420fe..8e2c8fd44764 100644 --- a/include/uapi/xen/privcmd.h +++ b/include/uapi/xen/privcmd.h @@ -126,6 +126,11 @@ struct privcmd_ioeventfd { __u8 pad[2]; }; +struct privcmd_pcidev_get_gsi { + __u32 sbdf; + __u32 gsi; +}; + /* * @cmd: IOCTL_PRIVCMD_HYPERCALL * @arg: &privcmd_hypercall_t @@ -157,5 +162,7 @@ struct privcmd_ioeventfd { _IOW('P', 8, struct privcmd_irqfd) #define IOCTL_PRIVCMD_IOEVENTFD \ _IOW('P', 9, struct privcmd_ioeventfd) +#define IOCTL_PRIVCMD_PCIDEV_GET_GSI \ + _IOC(_IOC_NONE, 'P', 10, sizeof(struct privcmd_pcidev_get_gsi)) #endif /* __LINUX_PUBLIC_PRIVCMD_H__ */ diff --git a/include/xen/acpi.h b/include/xen/acpi.h index 3bcfe82d9078..daa96a22d257 100644 --- a/include/xen/acpi.h +++ b/include/xen/acpi.h @@ -91,4 +91,13 @@ static inline int xen_acpi_get_gsi_info(struct pci_dev *dev, } #endif +#ifdef CONFIG_XEN_PCI_STUB +int pcistub_get_gsi_from_sbdf(unsigned int sbdf); +#else +static inline int pcistub_get_gsi_from_sbdf(unsigned int sbdf) +{ + return -1; +} +#endif + #endif /* _XEN_ACPI_H */ -- cgit From 8f2f74f2f3ebd9bb7159301cb7560db75d2e801a Mon Sep 17 00:00:00 2001 From: Min-Hua Chen Date: Wed, 18 Sep 2024 07:36:50 +0800 Subject: xen/pciback: fix cast to restricted pci_ers_result_t and pci_power_t This patch fix the following sparse warning by applying __force cast to pci_ers_result_t and pci_power_t. drivers/xen/xen-pciback/pci_stub.c:760:16: sparse: warning: cast to restricted pci_ers_result_t drivers/xen/xen-pciback/conf_space_capability.c:125:22: sparse: warning: cast to restricted pci_power_t No functional changes intended. Signed-off-by: Min-Hua Chen Reviewed-by: Juergen Gross Message-ID: <20240917233653.61630-1-minhuadotchen@gmail.com> Signed-off-by: Juergen Gross --- drivers/xen/xen-pciback/conf_space_capability.c | 2 +- drivers/xen/xen-pciback/pci_stub.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/xen/xen-pciback/conf_space_capability.c b/drivers/xen/xen-pciback/conf_space_capability.c index 1948a9700c8f..cf568e899ee2 100644 --- a/drivers/xen/xen-pciback/conf_space_capability.c +++ b/drivers/xen/xen-pciback/conf_space_capability.c @@ -122,7 +122,7 @@ static int pm_ctrl_write(struct pci_dev *dev, int offset, u16 new_value, if (err) goto out; - new_state = (pci_power_t)(new_value & PCI_PM_CTRL_STATE_MASK); + new_state = (__force pci_power_t)(new_value & PCI_PM_CTRL_STATE_MASK); new_value &= PM_OK_BITS; if ((old_value & PM_OK_BITS) != new_value) { diff --git a/drivers/xen/xen-pciback/pci_stub.c b/drivers/xen/xen-pciback/pci_stub.c index d003402ce66b..2f3da5ac62cd 100644 --- a/drivers/xen/xen-pciback/pci_stub.c +++ b/drivers/xen/xen-pciback/pci_stub.c @@ -821,7 +821,7 @@ static pci_ers_result_t common_process(struct pcistub_device *psdev, } clear_bit(_PCIB_op_pending, (unsigned long *)&pdev->flags); - res = (pci_ers_result_t)aer_op->err; + res = (__force pci_ers_result_t)aer_op->err; return res; } -- cgit From 08377ed24feef66866d0c6aabcae0aec515cf2ca Mon Sep 17 00:00:00 2001 From: Jason Andryuk Date: Fri, 23 Aug 2024 15:36:26 -0400 Subject: xen: sync elfnote.h from xen tree Sync Xen's elfnote.h header from xen.git to pull in the XEN_ELFNOTE_PHYS32_RELOC define. xen commit dfc9fab00378 ("x86/PVH: Support relocatable dom0 kernels") This is a copy except for the removal of the emacs editor config at the end of the file. Signed-off-by: Jason Andryuk Reviewed-by: Juergen Gross Message-ID: <20240823193630.2583107-2-jason.andryuk@amd.com> Signed-off-by: Juergen Gross --- include/xen/interface/elfnote.h | 93 ++++++++++++++++++++++++++++++++++++++--- 1 file changed, 88 insertions(+), 5 deletions(-) diff --git a/include/xen/interface/elfnote.h b/include/xen/interface/elfnote.h index 38deb1214613..918f47d87d7a 100644 --- a/include/xen/interface/elfnote.h +++ b/include/xen/interface/elfnote.h @@ -11,7 +11,9 @@ #define __XEN_PUBLIC_ELFNOTE_H__ /* - * The notes should live in a SHT_NOTE segment and have "Xen" in the + * `incontents 200 elfnotes ELF notes + * + * The notes should live in a PT_NOTE segment and have "Xen" in the * name field. * * Numeric types are either 4 or 8 bytes depending on the content of @@ -22,6 +24,8 @@ * * String values (for non-legacy) are NULL terminated ASCII, also known * as ASCIZ type. + * + * Xen only uses ELF Notes contained in x86 binaries. */ /* @@ -52,7 +56,7 @@ #define XEN_ELFNOTE_VIRT_BASE 3 /* - * The offset of the ELF paddr field from the acutal required + * The offset of the ELF paddr field from the actual required * pseudo-physical address (numeric). * * This is used to maintain backwards compatibility with older kernels @@ -92,7 +96,12 @@ #define XEN_ELFNOTE_LOADER 8 /* - * The kernel supports PAE (x86/32 only, string = "yes" or "no"). + * The kernel supports PAE (x86/32 only, string = "yes", "no" or + * "bimodal"). + * + * For compatibility with Xen 3.0.3 and earlier the "bimodal" setting + * may be given as "yes,bimodal" which will cause older Xen to treat + * this kernel as PAE. * * LEGACY: PAE (n.b. The legacy interface included a provision to * indicate 'extended-cr3' support allowing L3 page tables to be @@ -149,7 +158,9 @@ * The (non-default) location the initial phys-to-machine map should be * placed at by the hypervisor (Dom0) or the tools (DomU). * The kernel must be prepared for this mapping to be established using - * large pages, despite such otherwise not being available to guests. + * large pages, despite such otherwise not being available to guests. Note + * that these large pages may be misaligned in PFN space (they'll obviously + * be aligned in MFN and virtual address spaces). * The kernel must also be able to handle the page table pages used for * this mapping not being accessible through the initial mapping. * (Only x86-64 supports this at present.) @@ -185,9 +196,81 @@ */ #define XEN_ELFNOTE_PHYS32_ENTRY 18 +/* + * Physical loading constraints for PVH kernels + * + * The presence of this note indicates the kernel supports relocating itself. + * + * The note may include up to three 32bit values to place constraints on the + * guest physical loading addresses and alignment for a PVH kernel. Values + * are read in the following order: + * - a required start alignment (default 0x200000) + * - a minimum address for the start of the image (default 0; see below) + * - a maximum address for the last byte of the image (default 0xffffffff) + * + * When this note specifies an alignment value, it is used. Otherwise the + * maximum p_align value from loadable ELF Program Headers is used, if it is + * greater than or equal to 4k (0x1000). Otherwise, the default is used. + */ +#define XEN_ELFNOTE_PHYS32_RELOC 19 + /* * The number of the highest elfnote defined. */ -#define XEN_ELFNOTE_MAX XEN_ELFNOTE_PHYS32_ENTRY +#define XEN_ELFNOTE_MAX XEN_ELFNOTE_PHYS32_RELOC + +/* + * System information exported through crash notes. + * + * The kexec / kdump code will create one XEN_ELFNOTE_CRASH_INFO + * note in case of a system crash. This note will contain various + * information about the system, see xen/include/xen/elfcore.h. + */ +#define XEN_ELFNOTE_CRASH_INFO 0x1000001 + +/* + * System registers exported through crash notes. + * + * The kexec / kdump code will create one XEN_ELFNOTE_CRASH_REGS + * note per cpu in case of a system crash. This note is architecture + * specific and will contain registers not saved in the "CORE" note. + * See xen/include/xen/elfcore.h for more information. + */ +#define XEN_ELFNOTE_CRASH_REGS 0x1000002 + + +/* + * xen dump-core none note. + * xm dump-core code will create one XEN_ELFNOTE_DUMPCORE_NONE + * in its dump file to indicate that the file is xen dump-core + * file. This note doesn't have any other information. + * See tools/libxc/xc_core.h for more information. + */ +#define XEN_ELFNOTE_DUMPCORE_NONE 0x2000000 + +/* + * xen dump-core header note. + * xm dump-core code will create one XEN_ELFNOTE_DUMPCORE_HEADER + * in its dump file. + * See tools/libxc/xc_core.h for more information. + */ +#define XEN_ELFNOTE_DUMPCORE_HEADER 0x2000001 + +/* + * xen dump-core xen version note. + * xm dump-core code will create one XEN_ELFNOTE_DUMPCORE_XEN_VERSION + * in its dump file. It contains the xen version obtained via the + * XENVER hypercall. + * See tools/libxc/xc_core.h for more information. + */ +#define XEN_ELFNOTE_DUMPCORE_XEN_VERSION 0x2000002 + +/* + * xen dump-core format version note. + * xm dump-core code will create one XEN_ELFNOTE_DUMPCORE_FORMAT_VERSION + * in its dump file. It contains a format version identifier. + * See tools/libxc/xc_core.h for more information. + */ +#define XEN_ELFNOTE_DUMPCORE_FORMAT_VERSION 0x2000003 #endif /* __XEN_PUBLIC_ELFNOTE_H__ */ -- cgit From 1db29f99edb056d8445876292f53a63459142309 Mon Sep 17 00:00:00 2001 From: Jason Andryuk Date: Fri, 23 Aug 2024 15:36:27 -0400 Subject: x86/pvh: Make PVH entrypoint PIC for x86-64 The PVH entrypoint is 32bit non-PIC code running the uncompressed vmlinux at its load address CONFIG_PHYSICAL_START - default 0x1000000 (16MB). The kernel is loaded at that physical address inside the VM by the VMM software (Xen/QEMU). When running a Xen PVH Dom0, the host reserved addresses are mapped 1-1 into the PVH container. There exist system firmwares (Coreboot/EDK2) with reserved memory at 16MB. This creates a conflict where the PVH kernel cannot be loaded at that address. Modify the PVH entrypoint to be position-indepedent to allow flexibility in load address. Only the 64bit entry path is converted. A 32bit kernel is not PIC, so calling into other parts of the kernel, like xen_prepare_pvh() and mk_pgtable_32(), don't work properly when relocated. This makes the code PIC, but the page tables need to be updated as well to handle running from the kernel high map. The UNWIND_HINT_END_OF_STACK is to silence: vmlinux.o: warning: objtool: pvh_start_xen+0x7f: unreachable instruction after the lret into 64bit code. Signed-off-by: Jason Andryuk Reviewed-by: Juergen Gross Message-ID: <20240823193630.2583107-3-jason.andryuk@amd.com> Signed-off-by: Juergen Gross --- arch/x86/platform/pvh/head.S | 46 ++++++++++++++++++++++++++++++++------------ 1 file changed, 34 insertions(+), 12 deletions(-) diff --git a/arch/x86/platform/pvh/head.S b/arch/x86/platform/pvh/head.S index f7235ef87bc3..ba4d0eab4436 100644 --- a/arch/x86/platform/pvh/head.S +++ b/arch/x86/platform/pvh/head.S @@ -7,6 +7,7 @@ .code32 .text #define _pa(x) ((x) - __START_KERNEL_map) +#define rva(x) ((x) - pvh_start_xen) #include #include @@ -54,7 +55,25 @@ SYM_CODE_START_LOCAL(pvh_start_xen) UNWIND_HINT_END_OF_STACK cld - lgdt (_pa(gdt)) + /* + * See the comment for startup_32 for more details. We need to + * execute a call to get the execution address to be position + * independent, but we don't have a stack. Save and restore the + * magic field of start_info in ebx, and use that as the stack. + */ + mov (%ebx), %eax + leal 4(%ebx), %esp + ANNOTATE_INTRA_FUNCTION_CALL + call 1f +1: popl %ebp + mov %eax, (%ebx) + subl $rva(1b), %ebp + movl $0, %esp + + leal rva(gdt)(%ebp), %eax + leal rva(gdt_start)(%ebp), %ecx + movl %ecx, 2(%eax) + lgdt (%eax) mov $PVH_DS_SEL,%eax mov %eax,%ds @@ -62,14 +81,14 @@ SYM_CODE_START_LOCAL(pvh_start_xen) mov %eax,%ss /* Stash hvm_start_info. */ - mov $_pa(pvh_start_info), %edi + leal rva(pvh_start_info)(%ebp), %edi mov %ebx, %esi - mov _pa(pvh_start_info_sz), %ecx + movl rva(pvh_start_info_sz)(%ebp), %ecx shr $2,%ecx rep movsl - mov $_pa(early_stack_end), %esp + leal rva(early_stack_end)(%ebp), %esp /* Enable PAE mode. */ mov %cr4, %eax @@ -84,30 +103,33 @@ SYM_CODE_START_LOCAL(pvh_start_xen) wrmsr /* Enable pre-constructed page tables. */ - mov $_pa(init_top_pgt), %eax + leal rva(init_top_pgt)(%ebp), %eax mov %eax, %cr3 mov $(X86_CR0_PG | X86_CR0_PE), %eax mov %eax, %cr0 /* Jump to 64-bit mode. */ - ljmp $PVH_CS_SEL, $_pa(1f) + pushl $PVH_CS_SEL + leal rva(1f)(%ebp), %eax + pushl %eax + lretl /* 64-bit entry point. */ .code64 1: + UNWIND_HINT_END_OF_STACK + /* Set base address in stack canary descriptor. */ mov $MSR_GS_BASE,%ecx - mov $_pa(canary), %eax + leal canary(%rip), %eax xor %edx, %edx wrmsr call xen_prepare_pvh /* startup_64 expects boot_params in %rsi. */ - mov $_pa(pvh_bootparams), %rsi - mov $_pa(startup_64), %rax - ANNOTATE_RETPOLINE_SAFE - jmp *%rax + lea pvh_bootparams(%rip), %rsi + jmp startup_64 #else /* CONFIG_X86_64 */ @@ -143,7 +165,7 @@ SYM_CODE_END(pvh_start_xen) .balign 8 SYM_DATA_START_LOCAL(gdt) .word gdt_end - gdt_start - .long _pa(gdt_start) + .long _pa(gdt_start) /* x86-64 will overwrite if relocated. */ .word 0 SYM_DATA_END(gdt) SYM_DATA_START_LOCAL(gdt_start) -- cgit From b464b461d27d564125db760938643374864c1b1f Mon Sep 17 00:00:00 2001 From: Jason Andryuk Date: Fri, 23 Aug 2024 15:36:28 -0400 Subject: x86/pvh: Set phys_base when calling xen_prepare_pvh() phys_base needs to be set for __pa() to work in xen_pvh_init() when finding the hypercall page. Set it before calling into xen_prepare_pvh(), which calls xen_pvh_init(). Clear it afterward to avoid __startup_64() adding to it and creating an incorrect value. Signed-off-by: Jason Andryuk Reviewed-by: Juergen Gross Message-ID: <20240823193630.2583107-4-jason.andryuk@amd.com> Signed-off-by: Juergen Gross --- arch/x86/platform/pvh/head.S | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/arch/x86/platform/pvh/head.S b/arch/x86/platform/pvh/head.S index ba4d0eab4436..14b4345d9bae 100644 --- a/arch/x86/platform/pvh/head.S +++ b/arch/x86/platform/pvh/head.S @@ -125,7 +125,20 @@ SYM_CODE_START_LOCAL(pvh_start_xen) xor %edx, %edx wrmsr + /* + * Calculate load offset and store in phys_base. __pa() needs + * phys_base set to calculate the hypercall page in xen_pvh_init(). + */ + movq %rbp, %rbx + subq $_pa(pvh_start_xen), %rbx + movq %rbx, phys_base(%rip) call xen_prepare_pvh + /* + * Clear phys_base. __startup_64 will *add* to its value, + * so reset to 0. + */ + xor %rbx, %rbx + movq %rbx, phys_base(%rip) /* startup_64 expects boot_params in %rsi. */ lea pvh_bootparams(%rip), %rsi -- cgit From e3e8cd90f8e2eef67ded38f9c1a5f5520a407a62 Mon Sep 17 00:00:00 2001 From: Jason Andryuk Date: Fri, 23 Aug 2024 15:36:29 -0400 Subject: x86/kernel: Move page table macros to header The PVH entry point will need an additional set of prebuild page tables. Move the macros and defines to pgtable_64.h, so they can be re-used. Signed-off-by: Jason Andryuk Reviewed-by: Juergen Gross Acked-by: Dave Hansen Message-ID: <20240823193630.2583107-5-jason.andryuk@amd.com> Signed-off-by: Juergen Gross --- arch/x86/include/asm/pgtable_64.h | 23 ++++++++++++++++++++++- arch/x86/kernel/head_64.S | 20 -------------------- 2 files changed, 22 insertions(+), 21 deletions(-) diff --git a/arch/x86/include/asm/pgtable_64.h b/arch/x86/include/asm/pgtable_64.h index 3c4407271d08..72912b8edfdf 100644 --- a/arch/x86/include/asm/pgtable_64.h +++ b/arch/x86/include/asm/pgtable_64.h @@ -271,5 +271,26 @@ static inline bool gup_fast_permitted(unsigned long start, unsigned long end) #include -#endif /* !__ASSEMBLY__ */ +#else /* __ASSEMBLY__ */ + +#define l4_index(x) (((x) >> 39) & 511) +#define pud_index(x) (((x) >> PUD_SHIFT) & (PTRS_PER_PUD - 1)) + +L4_PAGE_OFFSET = l4_index(__PAGE_OFFSET_BASE_L4) +L4_START_KERNEL = l4_index(__START_KERNEL_map) + +L3_START_KERNEL = pud_index(__START_KERNEL_map) + +#define SYM_DATA_START_PAGE_ALIGNED(name) \ + SYM_START(name, SYM_L_GLOBAL, .balign PAGE_SIZE) + +/* Automate the creation of 1 to 1 mapping pmd entries */ +#define PMDS(START, PERM, COUNT) \ + i = 0 ; \ + .rept (COUNT) ; \ + .quad (START) + (i << PMD_SHIFT) + (PERM) ; \ + i = i + 1 ; \ + .endr + +#endif /* __ASSEMBLY__ */ #endif /* _ASM_X86_PGTABLE_64_H */ diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S index 330922b328bf..16752b8dfa89 100644 --- a/arch/x86/kernel/head_64.S +++ b/arch/x86/kernel/head_64.S @@ -32,13 +32,6 @@ * We are not able to switch in one step to the final KERNEL ADDRESS SPACE * because we need identity-mapped pages. */ -#define l4_index(x) (((x) >> 39) & 511) -#define pud_index(x) (((x) >> PUD_SHIFT) & (PTRS_PER_PUD-1)) - -L4_PAGE_OFFSET = l4_index(__PAGE_OFFSET_BASE_L4) -L4_START_KERNEL = l4_index(__START_KERNEL_map) - -L3_START_KERNEL = pud_index(__START_KERNEL_map) __HEAD .code64 @@ -577,9 +570,6 @@ SYM_CODE_START_NOALIGN(vc_no_ghcb) SYM_CODE_END(vc_no_ghcb) #endif -#define SYM_DATA_START_PAGE_ALIGNED(name) \ - SYM_START(name, SYM_L_GLOBAL, .balign PAGE_SIZE) - #ifdef CONFIG_MITIGATION_PAGE_TABLE_ISOLATION /* * Each PGD needs to be 8k long and 8k aligned. We do not @@ -601,14 +591,6 @@ SYM_CODE_END(vc_no_ghcb) #define PTI_USER_PGD_FILL 0 #endif -/* Automate the creation of 1 to 1 mapping pmd entries */ -#define PMDS(START, PERM, COUNT) \ - i = 0 ; \ - .rept (COUNT) ; \ - .quad (START) + (i << PMD_SHIFT) + (PERM) ; \ - i = i + 1 ; \ - .endr - __INITDATA .balign 4 @@ -708,8 +690,6 @@ SYM_DATA_START_PAGE_ALIGNED(level1_fixmap_pgt) .endr SYM_DATA_END(level1_fixmap_pgt) -#undef PMDS - .data .align 16 -- cgit From 47ffe0578aee45fed3a06d5dcff76cdebb303163 Mon Sep 17 00:00:00 2001 From: Jason Andryuk Date: Fri, 23 Aug 2024 15:36:30 -0400 Subject: x86/pvh: Add 64bit relocation page tables The PVH entry point is 32bit. For a 64bit kernel, the entry point must switch to 64bit mode, which requires a set of page tables. In the past, PVH used init_top_pgt. This works fine when the kernel is loaded at LOAD_PHYSICAL_ADDR, as the page tables are prebuilt for this address. If the kernel is loaded at a different address, they need to be adjusted. __startup_64() adjusts the prebuilt page tables for the physical load address, but it is 64bit code. The 32bit PVH entry code can't call it to adjust the page tables, so it can't readily be re-used. 64bit PVH entry needs page tables set up for identity map, the kernel high map and the direct map. pvh_start_xen() enters identity mapped. Inside xen_prepare_pvh(), it jumps through a pv_ops function pointer into the highmap. The direct map is used for __va() on the initramfs and other guest physical addresses. Add a dedicated set of prebuild page tables for PVH entry. They are adjusted in assembly before loading. Add XEN_ELFNOTE_PHYS32_RELOC to indicate support for relocation along with the kernel's loading constraints. The maximum load address, KERNEL_IMAGE_SIZE - 1, is determined by a single pvh_level2_ident_pgt page. It could be larger with more pages. Signed-off-by: Jason Andryuk Reviewed-by: Juergen Gross Message-ID: <20240823193630.2583107-6-jason.andryuk@amd.com> Signed-off-by: Juergen Gross --- arch/x86/platform/pvh/head.S | 104 ++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 103 insertions(+), 1 deletion(-) diff --git a/arch/x86/platform/pvh/head.S b/arch/x86/platform/pvh/head.S index 14b4345d9bae..64fca49cd88f 100644 --- a/arch/x86/platform/pvh/head.S +++ b/arch/x86/platform/pvh/head.S @@ -16,6 +16,7 @@ #include #include #include +#include #include #include #include @@ -102,8 +103,47 @@ SYM_CODE_START_LOCAL(pvh_start_xen) btsl $_EFER_LME, %eax wrmsr + mov %ebp, %ebx + subl $_pa(pvh_start_xen), %ebx /* offset */ + jz .Lpagetable_done + + /* Fixup page-tables for relocation. */ + leal rva(pvh_init_top_pgt)(%ebp), %edi + movl $PTRS_PER_PGD, %ecx +2: + testl $_PAGE_PRESENT, 0x00(%edi) + jz 1f + addl %ebx, 0x00(%edi) +1: + addl $8, %edi + decl %ecx + jnz 2b + + /* L3 ident has a single entry. */ + leal rva(pvh_level3_ident_pgt)(%ebp), %edi + addl %ebx, 0x00(%edi) + + leal rva(pvh_level3_kernel_pgt)(%ebp), %edi + addl %ebx, (PAGE_SIZE - 16)(%edi) + addl %ebx, (PAGE_SIZE - 8)(%edi) + + /* pvh_level2_ident_pgt is fine - large pages */ + + /* pvh_level2_kernel_pgt needs adjustment - large pages */ + leal rva(pvh_level2_kernel_pgt)(%ebp), %edi + movl $PTRS_PER_PMD, %ecx +2: + testl $_PAGE_PRESENT, 0x00(%edi) + jz 1f + addl %ebx, 0x00(%edi) +1: + addl $8, %edi + decl %ecx + jnz 2b + +.Lpagetable_done: /* Enable pre-constructed page tables. */ - leal rva(init_top_pgt)(%ebp), %eax + leal rva(pvh_init_top_pgt)(%ebp), %eax mov %eax, %cr3 mov $(X86_CR0_PG | X86_CR0_PE), %eax mov %eax, %cr0 @@ -198,5 +238,67 @@ SYM_DATA_START_LOCAL(early_stack) .fill BOOT_STACK_SIZE, 1, 0 SYM_DATA_END_LABEL(early_stack, SYM_L_LOCAL, early_stack_end) +#ifdef CONFIG_X86_64 +/* + * Xen PVH needs a set of identity mapped and kernel high mapping + * page tables. pvh_start_xen starts running on the identity mapped + * page tables, but xen_prepare_pvh calls into the high mapping. + * These page tables need to be relocatable and are only used until + * startup_64 transitions to init_top_pgt. + */ +SYM_DATA_START_PAGE_ALIGNED(pvh_init_top_pgt) + .quad pvh_level3_ident_pgt - __START_KERNEL_map + _KERNPG_TABLE_NOENC + .org pvh_init_top_pgt + L4_PAGE_OFFSET * 8, 0 + .quad pvh_level3_ident_pgt - __START_KERNEL_map + _KERNPG_TABLE_NOENC + .org pvh_init_top_pgt + L4_START_KERNEL * 8, 0 + /* (2^48-(2*1024*1024*1024))/(2^39) = 511 */ + .quad pvh_level3_kernel_pgt - __START_KERNEL_map + _PAGE_TABLE_NOENC +SYM_DATA_END(pvh_init_top_pgt) + +SYM_DATA_START_PAGE_ALIGNED(pvh_level3_ident_pgt) + .quad pvh_level2_ident_pgt - __START_KERNEL_map + _KERNPG_TABLE_NOENC + .fill 511, 8, 0 +SYM_DATA_END(pvh_level3_ident_pgt) +SYM_DATA_START_PAGE_ALIGNED(pvh_level2_ident_pgt) + /* + * Since I easily can, map the first 1G. + * Don't set NX because code runs from these pages. + * + * Note: This sets _PAGE_GLOBAL despite whether + * the CPU supports it or it is enabled. But, + * the CPU should ignore the bit. + */ + PMDS(0, __PAGE_KERNEL_IDENT_LARGE_EXEC, PTRS_PER_PMD) +SYM_DATA_END(pvh_level2_ident_pgt) +SYM_DATA_START_PAGE_ALIGNED(pvh_level3_kernel_pgt) + .fill L3_START_KERNEL, 8, 0 + /* (2^48-(2*1024*1024*1024)-((2^39)*511))/(2^30) = 510 */ + .quad pvh_level2_kernel_pgt - __START_KERNEL_map + _KERNPG_TABLE_NOENC + .quad 0 /* no fixmap */ +SYM_DATA_END(pvh_level3_kernel_pgt) + +SYM_DATA_START_PAGE_ALIGNED(pvh_level2_kernel_pgt) + /* + * Kernel high mapping. + * + * The kernel code+data+bss must be located below KERNEL_IMAGE_SIZE in + * virtual address space, which is 1 GiB if RANDOMIZE_BASE is enabled, + * 512 MiB otherwise. + * + * (NOTE: after that starts the module area, see MODULES_VADDR.) + * + * This table is eventually used by the kernel during normal runtime. + * Care must be taken to clear out undesired bits later, like _PAGE_RW + * or _PAGE_GLOBAL in some cases. + */ + PMDS(0, __PAGE_KERNEL_LARGE_EXEC, KERNEL_IMAGE_SIZE / PMD_SIZE) +SYM_DATA_END(pvh_level2_kernel_pgt) + + ELFNOTE(Xen, XEN_ELFNOTE_PHYS32_RELOC, + .long CONFIG_PHYSICAL_ALIGN; + .long LOAD_PHYSICAL_ADDR; + .long KERNEL_IMAGE_SIZE - 1) +#endif + ELFNOTE(Xen, XEN_ELFNOTE_PHYS32_ENTRY, _ASM_PTR (pvh_start_xen - __START_KERNEL_map)) -- cgit