diff options
63 files changed, 5836 insertions, 1097 deletions
diff --git a/Documentation/ABI/testing/sysfs-class-cxl b/Documentation/ABI/testing/sysfs-class-cxl index b07e86d4597f..7fd737eed38a 100644 --- a/Documentation/ABI/testing/sysfs-class-cxl +++ b/Documentation/ABI/testing/sysfs-class-cxl @@ -159,7 +159,7 @@ Description: read only Decimal value of the Per Process MMIO space length. Users: https://github.com/ibm-capi/libcxl -What: /sys/class/cxl/<afu>m/pp_mmio_off +What: /sys/class/cxl/<afu>m/pp_mmio_off (not in a guest) Date: September 2014 Contact: [email protected] Description: read only @@ -183,7 +183,7 @@ Description: read only Identifies the revision level of the PSL. Users: https://github.com/ibm-capi/libcxl -What: /sys/class/cxl/<card>/base_image +What: /sys/class/cxl/<card>/base_image (not in a guest) Date: September 2014 Contact: [email protected] Description: read only @@ -193,7 +193,7 @@ Description: read only during the initial program load. Users: https://github.com/ibm-capi/libcxl -What: /sys/class/cxl/<card>/image_loaded +What: /sys/class/cxl/<card>/image_loaded (not in a guest) Date: September 2014 Contact: [email protected] Description: read only @@ -201,7 +201,7 @@ Description: read only onto the card. Users: https://github.com/ibm-capi/libcxl -What: /sys/class/cxl/<card>/load_image_on_perst +What: /sys/class/cxl/<card>/load_image_on_perst (not in a guest) Date: December 2014 Contact: [email protected] Description: read/write @@ -224,7 +224,7 @@ Description: write only to reload the FPGA depending on load_image_on_perst. Users: https://github.com/ibm-capi/libcxl -What: /sys/class/cxl/<card>/perst_reloads_same_image +What: /sys/class/cxl/<card>/perst_reloads_same_image (not in a guest) Date: July 2015 Contact: [email protected] Description: read/write diff --git a/Documentation/powerpc/cxl.txt b/Documentation/powerpc/cxl.txt index 205c1b81625c..d5506ba0fef7 100644 --- a/Documentation/powerpc/cxl.txt +++ b/Documentation/powerpc/cxl.txt @@ -116,6 +116,8 @@ Work Element Descriptor (WED) User API ======== +1. AFU character devices + For AFUs operating in AFU directed mode, two character device files will be created. /dev/cxl/afu0.0m will correspond to a master context and /dev/cxl/afu0.0s will correspond to a slave @@ -362,6 +364,59 @@ read reserved fields: For future extensions and padding + +2. Card character device (powerVM guest only) + + In a powerVM guest, an extra character device is created for the + card. The device is only used to write (flash) a new image on the + FPGA accelerator. Once the image is written and verified, the + device tree is updated and the card is reset to reload the updated + image. + +open +---- + + Opens the device and allocates a file descriptor to be used with + the rest of the API. The device can only be opened once. + +ioctl +----- + +CXL_IOCTL_DOWNLOAD_IMAGE: +CXL_IOCTL_VALIDATE_IMAGE: + Starts and controls flashing a new FPGA image. Partial + reconfiguration is not supported (yet), so the image must contain + a copy of the PSL and AFU(s). Since an image can be quite large, + the caller may have to iterate, splitting the image in smaller + chunks. + + Takes a pointer to a struct cxl_adapter_image: + struct cxl_adapter_image { + __u64 flags; + __u64 data; + __u64 len_data; + __u64 len_image; + __u64 reserved1; + __u64 reserved2; + __u64 reserved3; + __u64 reserved4; + }; + + flags: + These flags indicate which optional fields are present in + this struct. Currently all fields are mandatory. + + data: + Pointer to a buffer with part of the image to write to the + card. + + len_data: + Size of the buffer pointed to by data. + + len_image: + Full size of the image. + + Sysfs Class =========== diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index d500772a1d9b..b580ce0c25a8 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -94,6 +94,7 @@ config PPC select OF_RESERVED_MEM select HAVE_FTRACE_MCOUNT_RECORD select HAVE_DYNAMIC_FTRACE + select HAVE_DYNAMIC_FTRACE_WITH_REGS if MPROFILE_KERNEL select HAVE_FUNCTION_TRACER select HAVE_FUNCTION_GRAPH_TRACER select SYSCTL_EXCEPTION_TRACE @@ -373,6 +374,24 @@ config PPC_TRANSACTIONAL_MEM ---help--- Support user-mode Transactional Memory on POWERPC. +config DISABLE_MPROFILE_KERNEL + bool "Disable use of mprofile-kernel for kernel tracing" + depends on PPC64 && CPU_LITTLE_ENDIAN + default y + help + Selecting this options disables use of the mprofile-kernel ABI for + kernel tracing. That will cause options such as live patching + (CONFIG_LIVEPATCH) which depend on CONFIG_DYNAMIC_FTRACE_WITH_REGS to + be disabled also. + + If you have a toolchain which supports mprofile-kernel, then you can + enable this. Otherwise leave it disabled. If you're not sure, say + "N". + +config MPROFILE_KERNEL + depends on PPC64 && CPU_LITTLE_ENDIAN + def_bool !DISABLE_MPROFILE_KERNEL + config IOMMU_HELPER def_bool PPC64 diff --git a/arch/powerpc/Makefile b/arch/powerpc/Makefile index 1a0ee01b0f46..709a22a3e824 100644 --- a/arch/powerpc/Makefile +++ b/arch/powerpc/Makefile @@ -133,6 +133,21 @@ else CFLAGS-$(CONFIG_GENERIC_CPU) += -mcpu=powerpc64 endif +ifdef CONFIG_MPROFILE_KERNEL + ifeq ($(shell $(srctree)/arch/powerpc/scripts/gcc-check-mprofile-kernel.sh $(CC) -I$(srctree)/include -D__KERNEL__),OK) + CC_FLAGS_FTRACE := -pg -mprofile-kernel + KBUILD_CPPFLAGS += -DCC_USING_MPROFILE_KERNEL + else + # If the user asked for mprofile-kernel but the toolchain doesn't + # support it, emit a warning and deliberately break the build later + # with mprofile-kernel-not-supported. We would prefer to make this an + # error right here, but then the user would never be able to run + # oldconfig to change their configuration. + $(warning Compiler does not support mprofile-kernel, set CONFIG_DISABLE_MPROFILE_KERNEL) + CC_FLAGS_FTRACE := -mprofile-kernel-not-supported + endif +endif + CFLAGS-$(CONFIG_CELL_CPU) += $(call cc-option,-mcpu=cell) CFLAGS-$(CONFIG_POWER4_CPU) += $(call cc-option,-mcpu=power4) CFLAGS-$(CONFIG_POWER5_CPU) += $(call cc-option,-mcpu=power5) diff --git a/arch/powerpc/include/asm/code-patching.h b/arch/powerpc/include/asm/code-patching.h index 840a5509b3f1..994c60a857ce 100644 --- a/arch/powerpc/include/asm/code-patching.h +++ b/arch/powerpc/include/asm/code-patching.h @@ -99,4 +99,25 @@ static inline unsigned long ppc_global_function_entry(void *func) #endif } +#ifdef CONFIG_PPC64 +/* + * Some instruction encodings commonly used in dynamic ftracing + * and function live patching. + */ + +/* This must match the definition of STK_GOT in <asm/ppc_asm.h> */ +#if defined(_CALL_ELF) && _CALL_ELF == 2 +#define R2_STACK_OFFSET 24 +#else +#define R2_STACK_OFFSET 40 +#endif + +#define PPC_INST_LD_TOC (PPC_INST_LD | ___PPC_RT(__REG_R2) | \ + ___PPC_RA(__REG_R1) | R2_STACK_OFFSET) + +/* usually preceded by a mflr r0 */ +#define PPC_INST_STD_LR (PPC_INST_STD | ___PPC_RS(__REG_R0) | \ + ___PPC_RA(__REG_R1) | PPC_LR_STKOFF) +#endif /* CONFIG_PPC64 */ + #endif /* _ASM_POWERPC_CODE_PATCHING_H */ diff --git a/arch/powerpc/include/asm/eeh.h b/arch/powerpc/include/asm/eeh.h index 867c39b45df6..fb9f376ae27b 100644 --- a/arch/powerpc/include/asm/eeh.h +++ b/arch/powerpc/include/asm/eeh.h @@ -72,6 +72,7 @@ struct pci_dn; #define EEH_PE_PHB (1 << 1) /* PHB PE */ #define EEH_PE_DEVICE (1 << 2) /* Device PE */ #define EEH_PE_BUS (1 << 3) /* Bus PE */ +#define EEH_PE_VF (1 << 4) /* VF PE */ #define EEH_PE_ISOLATED (1 << 0) /* Isolated PE */ #define EEH_PE_RECOVERING (1 << 1) /* Recovering PE */ @@ -136,11 +137,15 @@ struct eeh_dev { int pcix_cap; /* Saved PCIx capability */ int pcie_cap; /* Saved PCIe capability */ int aer_cap; /* Saved AER capability */ + int af_cap; /* Saved AF capability */ struct eeh_pe *pe; /* Associated PE */ struct list_head list; /* Form link list in the PE */ + struct list_head rmv_list; /* Record the removed edevs */ struct pci_controller *phb; /* Associated PHB */ struct pci_dn *pdn; /* Associated PCI device node */ struct pci_dev *pdev; /* Associated PCI device */ + bool in_error; /* Error flag for edev */ + struct pci_dev *physfn; /* Associated SRIOV PF */ struct pci_bus *bus; /* PCI bus for partial hotplug */ }; diff --git a/arch/powerpc/include/asm/ftrace.h b/arch/powerpc/include/asm/ftrace.h index ef89b1465573..50ca7585abe2 100644 --- a/arch/powerpc/include/asm/ftrace.h +++ b/arch/powerpc/include/asm/ftrace.h @@ -46,6 +46,8 @@ extern void _mcount(void); #ifdef CONFIG_DYNAMIC_FTRACE +# define FTRACE_ADDR ((unsigned long)ftrace_caller) +# define FTRACE_REGS_ADDR FTRACE_ADDR static inline unsigned long ftrace_call_adjust(unsigned long addr) { /* reloction of mcount call site is the same as the address */ @@ -58,6 +60,9 @@ struct dyn_arch_ftrace { #endif /* CONFIG_DYNAMIC_FTRACE */ #endif /* __ASSEMBLY__ */ +#ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS +#define ARCH_SUPPORTS_FTRACE_OPS 1 +#endif #endif #if defined(CONFIG_FTRACE_SYSCALLS) && defined(CONFIG_PPC64) && !defined(__ASSEMBLY__) diff --git a/arch/powerpc/include/asm/hvcall.h b/arch/powerpc/include/asm/hvcall.h index e3b54dd4f730..0bc9c284aa10 100644 --- a/arch/powerpc/include/asm/hvcall.h +++ b/arch/powerpc/include/asm/hvcall.h @@ -94,6 +94,7 @@ #define H_SG_LIST -72 #define H_OP_MODE -73 #define H_COP_HW -74 +#define H_STATE -75 #define H_UNSUPPORTED_FLAG_START -256 #define H_UNSUPPORTED_FLAG_END -511 #define H_MULTI_THREADS_ACTIVE -9005 diff --git a/arch/powerpc/include/asm/module.h b/arch/powerpc/include/asm/module.h index 5f1526fddccf..cd4ffd86765f 100644 --- a/arch/powerpc/include/asm/module.h +++ b/arch/powerpc/include/asm/module.h @@ -78,10 +78,18 @@ struct mod_arch_specific { # endif /* MODULE */ #endif -bool is_module_trampoline(u32 *insns); -int module_trampoline_target(struct module *mod, u32 *trampoline, +int module_trampoline_target(struct module *mod, unsigned long trampoline, unsigned long *target); +#ifdef CONFIG_DYNAMIC_FTRACE +int module_finalize_ftrace(struct module *mod, const Elf_Shdr *sechdrs); +#else +static inline int module_finalize_ftrace(struct module *mod, const Elf_Shdr *sechdrs) +{ + return 0; +} +#endif + struct exception_table_entry; void sort_ex_table(struct exception_table_entry *start, struct exception_table_entry *finish); diff --git a/arch/powerpc/include/asm/pci-bridge.h b/arch/powerpc/include/asm/pci-bridge.h index b0b43f5fbc5f..9f165e8a77bf 100644 --- a/arch/powerpc/include/asm/pci-bridge.h +++ b/arch/powerpc/include/asm/pci-bridge.h @@ -212,6 +212,7 @@ struct pci_dn { #define IODA_INVALID_PE (-1) #ifdef CONFIG_PPC_POWERNV int pe_number; + int vf_index; /* VF index in the PF */ #ifdef CONFIG_PCI_IOV u16 vfs_expanded; /* number of VFs IOV BAR expanded */ u16 num_vfs; /* number of VFs enabled*/ @@ -220,6 +221,7 @@ struct pci_dn { #define IODA_INVALID_M64 (-1) int (*m64_map)[PCI_SRIOV_NUM_BARS]; #endif /* CONFIG_PCI_IOV */ + int mps; /* Maximum Payload Size */ #endif struct list_head child_list; struct list_head list; diff --git a/arch/powerpc/include/asm/perf_event_server.h b/arch/powerpc/include/asm/perf_event_server.h index 814622146d5a..e157489ee7a1 100644 --- a/arch/powerpc/include/asm/perf_event_server.h +++ b/arch/powerpc/include/asm/perf_event_server.h @@ -136,16 +136,24 @@ extern ssize_t power_events_sysfs_show(struct device *dev, * event 'cpu-cycles' can have two entries in sysfs: 'cpu-cycles' and * 'PM_CYC' where the latter is the name by which the event is known in * POWER CPU specification. + * + * Similarly, some hardware and cache events use the same event code. Eg. + * on POWER8, both "cache-references" and "L1-dcache-loads" events refer + * to the same event, PM_LD_REF_L1. The suffix, allows us to have two + * sysfs objects for the same event and thus two entries/aliases in sysfs. */ #define EVENT_VAR(_id, _suffix) event_attr_##_id##_suffix #define EVENT_PTR(_id, _suffix) &EVENT_VAR(_id, _suffix).attr.attr #define EVENT_ATTR(_name, _id, _suffix) \ - PMU_EVENT_ATTR(_name, EVENT_VAR(_id, _suffix), PME_##_id, \ + PMU_EVENT_ATTR(_name, EVENT_VAR(_id, _suffix), _id, \ power_events_sysfs_show) #define GENERIC_EVENT_ATTR(_name, _id) EVENT_ATTR(_name, _id, _g) #define GENERIC_EVENT_PTR(_id) EVENT_PTR(_id, _g) +#define CACHE_EVENT_ATTR(_name, _id) EVENT_ATTR(_name, _id, _c) +#define CACHE_EVENT_PTR(_id) EVENT_PTR(_id, _c) + #define POWER_EVENT_ATTR(_name, _id) EVENT_ATTR(_name, _id, _p) #define POWER_EVENT_PTR(_id) EVENT_PTR(_id, _p) diff --git a/arch/powerpc/include/asm/sections.h b/arch/powerpc/include/asm/sections.h index a5e930aca804..abf5866e08c6 100644 --- a/arch/powerpc/include/asm/sections.h +++ b/arch/powerpc/include/asm/sections.h @@ -22,6 +22,18 @@ static inline int in_kernel_text(unsigned long addr) return 0; } +static inline unsigned long kernel_toc_addr(void) +{ + /* Defined by the linker, see vmlinux.lds.S */ + extern unsigned long __toc_start; + + /* + * The TOC register (r2) points 32kB into the TOC, so that 64kB of + * the TOC can be addressed using a single machine instruction. + */ + return (unsigned long)(&__toc_start) + 0x8000UL; +} + static inline int overlaps_interrupt_vector_text(unsigned long start, unsigned long end) { diff --git a/arch/powerpc/kernel/Makefile b/arch/powerpc/kernel/Makefile index 794f22adf99d..2da380fcc34c 100644 --- a/arch/powerpc/kernel/Makefile +++ b/arch/powerpc/kernel/Makefile @@ -16,14 +16,14 @@ endif ifdef CONFIG_FUNCTION_TRACER # Do not trace early boot code -CFLAGS_REMOVE_cputable.o = -pg -mno-sched-epilog -CFLAGS_REMOVE_prom_init.o = -pg -mno-sched-epilog -CFLAGS_REMOVE_btext.o = -pg -mno-sched-epilog -CFLAGS_REMOVE_prom.o = -pg -mno-sched-epilog +CFLAGS_REMOVE_cputable.o = -mno-sched-epilog $(CC_FLAGS_FTRACE) +CFLAGS_REMOVE_prom_init.o = -mno-sched-epilog $(CC_FLAGS_FTRACE) +CFLAGS_REMOVE_btext.o = -mno-sched-epilog $(CC_FLAGS_FTRACE) +CFLAGS_REMOVE_prom.o = -mno-sched-epilog $(CC_FLAGS_FTRACE) # do not trace tracer code -CFLAGS_REMOVE_ftrace.o = -pg -mno-sched-epilog +CFLAGS_REMOVE_ftrace.o = -mno-sched-epilog $(CC_FLAGS_FTRACE) # timers used by tracing -CFLAGS_REMOVE_time.o = -pg -mno-sched-epilog +CFLAGS_REMOVE_time.o = -mno-sched-epilog $(CC_FLAGS_FTRACE) endif obj-y := cputable.o ptrace.o syscalls.o \ diff --git a/arch/powerpc/kernel/eeh.c b/arch/powerpc/kernel/eeh.c index 8c6005cf1583..6544017eb90b 100644 --- a/arch/powerpc/kernel/eeh.c +++ b/arch/powerpc/kernel/eeh.c @@ -268,13 +268,6 @@ static void *eeh_dump_pe_log(void *data, void *flag) struct eeh_dev *edev, *tmp; size_t *plen = flag; - /* If the PE's config space is blocked, 0xFF's will be - * returned. It's pointless to collect the log in this - * case. - */ - if (pe->state & EEH_PE_CFG_BLOCKED) - return NULL; - eeh_pe_for_each_dev(pe, edev, tmp) *plen += eeh_dump_dev_log(edev, pci_regs_buf + *plen, EEH_PCI_REGS_LOG_LEN - *plen); @@ -677,7 +670,7 @@ int eeh_pci_enable(struct eeh_pe *pe, int function) /* Check if the request is finished successfully */ if (active_flag) { rc = eeh_ops->wait_state(pe, PCI_BUS_RESET_WAIT_MSEC); - if (rc <= 0) + if (rc < 0) return rc; if (rc & active_flag) @@ -761,7 +754,8 @@ int pcibios_set_pcie_reset_state(struct pci_dev *dev, enum pcie_reset_state stat case pcie_deassert_reset: eeh_ops->reset(pe, EEH_RESET_DEACTIVATE); eeh_unfreeze_pe(pe, false); - eeh_pe_state_clear(pe, EEH_PE_CFG_BLOCKED); + if (!(pe->type & EEH_PE_VF)) + eeh_pe_state_clear(pe, EEH_PE_CFG_BLOCKED); eeh_pe_dev_traverse(pe, eeh_restore_dev_state, dev); eeh_pe_state_clear(pe, EEH_PE_ISOLATED); break; @@ -769,14 +763,16 @@ int pcibios_set_pcie_reset_state(struct pci_dev *dev, enum pcie_reset_state stat eeh_pe_state_mark_with_cfg(pe, EEH_PE_ISOLATED); eeh_ops->set_option(pe, EEH_OPT_FREEZE_PE); eeh_pe_dev_traverse(pe, eeh_disable_and_save_dev_state, dev); - eeh_pe_state_mark(pe, EEH_PE_CFG_BLOCKED); + if (!(pe->type & EEH_PE_VF)) + eeh_pe_state_mark(pe, EEH_PE_CFG_BLOCKED); eeh_ops->reset(pe, EEH_RESET_HOT); break; case pcie_warm_reset: eeh_pe_state_mark_with_cfg(pe, EEH_PE_ISOLATED); eeh_ops->set_option(pe, EEH_OPT_FREEZE_PE); eeh_pe_dev_traverse(pe, eeh_disable_and_save_dev_state, dev); - eeh_pe_state_mark(pe, EEH_PE_CFG_BLOCKED); + if (!(pe->type & EEH_PE_VF)) + eeh_pe_state_mark(pe, EEH_PE_CFG_BLOCKED); eeh_ops->reset(pe, EEH_RESET_FUNDAMENTAL); break; default: @@ -1243,6 +1239,14 @@ void eeh_remove_device(struct pci_dev *dev) * from the parent PE during the BAR resotre. */ edev->pdev = NULL; + + /* + * The flag "in_error" is used to trace EEH devices for VFs + * in error state or not. It's set in eeh_report_error(). If + * it's not set, eeh_report_{reset,resume}() won't be called + * for the VF EEH device. + */ + edev->in_error = false; dev->dev.archdata.edev = NULL; if (!(edev->pe->state & EEH_PE_KEEP)) eeh_rmv_from_parent_pe(edev); @@ -1537,6 +1541,17 @@ int eeh_pe_get_state(struct eeh_pe *pe) if (!eeh_ops || !eeh_ops->get_state) return -ENOENT; + /* + * If the parent PE is owned by the host kernel and is undergoing + * error recovery, we should return the PE state as temporarily + * unavailable so that the error recovery on the guest is suspended + * until the recovery completes on the host. + */ + if (pe->parent && + !(pe->state & EEH_PE_REMOVED) && + (pe->parent->state & (EEH_PE_ISOLATED | EEH_PE_RECOVERING))) + return EEH_PE_STATE_UNAVAIL; + result = eeh_ops->get_state(pe, NULL); rst_active = !!(result & EEH_STATE_RESET_ACTIVE); dma_en = !!(result & EEH_STATE_DMA_ENABLED); diff --git a/arch/powerpc/kernel/eeh_cache.c b/arch/powerpc/kernel/eeh_cache.c index a1e86e172e3c..ddbcfab7efdf 100644 --- a/arch/powerpc/kernel/eeh_cache.c +++ b/arch/powerpc/kernel/eeh_cache.c @@ -195,8 +195,11 @@ static void __eeh_addr_cache_insert_dev(struct pci_dev *dev) return; } - /* Walk resources on this device, poke them into the tree */ - for (i = 0; i < DEVICE_COUNT_RESOURCE; i++) { + /* + * Walk resources on this device, poke the first 7 (6 normal BAR and 1 + * ROM BAR) into the tree. + */ + for (i = 0; i <= PCI_ROM_RESOURCE; i++) { resource_size_t start = pci_resource_start(dev,i); resource_size_t end = pci_resource_end(dev,i); unsigned long flags = pci_resource_flags(dev,i); @@ -222,10 +225,6 @@ void eeh_addr_cache_insert_dev(struct pci_dev *dev) { unsigned long flags; - /* Ignore PCI bridges */ - if ((dev->class >> 16) == PCI_BASE_CLASS_BRIDGE) - return; - spin_lock_irqsave(&pci_io_addr_cache_root.piar_lock, flags); __eeh_addr_cache_insert_dev(dev); spin_unlock_irqrestore(&pci_io_addr_cache_root.piar_lock, flags); diff --git a/arch/powerpc/kernel/eeh_dev.c b/arch/powerpc/kernel/eeh_dev.c index aabba94ff9cb..7815095fe3d8 100644 --- a/arch/powerpc/kernel/eeh_dev.c +++ b/arch/powerpc/kernel/eeh_dev.c @@ -67,6 +67,7 @@ void *eeh_dev_init(struct pci_dn *pdn, void *data) edev->pdn = pdn; edev->phb = phb; INIT_LIST_HEAD(&edev->list); + INIT_LIST_HEAD(&edev->rmv_list); return NULL; } diff --git a/arch/powerpc/kernel/eeh_driver.c b/arch/powerpc/kernel/eeh_driver.c index 650cfb31ea3d..fb6207d2c604 100644 --- a/arch/powerpc/kernel/eeh_driver.c +++ b/arch/powerpc/kernel/eeh_driver.c @@ -34,6 +34,11 @@ #include <asm/prom.h> #include <asm/rtas.h> +struct eeh_rmv_data { + struct list_head edev_list; + int removed; +}; + /** * eeh_pcid_name - Retrieve name of PCI device driver * @pdev: PCI device @@ -190,7 +195,7 @@ static void *eeh_report_error(void *data, void *userdata) enum pci_ers_result rc, *res = userdata; struct pci_driver *driver; - if (!dev || eeh_dev_removed(edev)) + if (!dev || eeh_dev_removed(edev) || eeh_pe_passed(edev->pe)) return NULL; dev->error_state = pci_channel_io_frozen; @@ -211,6 +216,7 @@ static void *eeh_report_error(void *data, void *userdata) if (rc == PCI_ERS_RESULT_NEED_RESET) *res = rc; if (*res == PCI_ERS_RESULT_NONE) *res = rc; + edev->in_error = true; eeh_pcid_put(dev); return NULL; } @@ -231,7 +237,7 @@ static void *eeh_report_mmio_enabled(void *data, void *userdata) enum pci_ers_result rc, *res = userdata; struct pci_driver *driver; - if (!dev || eeh_dev_removed(edev)) + if (!dev || eeh_dev_removed(edev) || eeh_pe_passed(edev->pe)) return NULL; driver = eeh_pcid_get(dev); @@ -271,7 +277,7 @@ static void *eeh_report_reset(void *data, void *userdata) enum pci_ers_result rc, *res = userdata; struct pci_driver *driver; - if (!dev || eeh_dev_removed(edev)) + if (!dev || eeh_dev_removed(edev) || eeh_pe_passed(edev->pe)) return NULL; dev->error_state = pci_channel_io_normal; @@ -282,7 +288,8 @@ static void *eeh_report_reset(void *data, void *userdata) if (!driver->err_handler || !driver->err_handler->slot_reset || - (edev->mode & EEH_DEV_NO_HANDLER)) { + (edev->mode & EEH_DEV_NO_HANDLER) || + (!edev->in_error)) { eeh_pcid_put(dev); return NULL; } @@ -326,20 +333,23 @@ static void *eeh_report_resume(void *data, void *userdata) { struct eeh_dev *edev = (struct eeh_dev *)data; struct pci_dev *dev = eeh_dev_to_pci_dev(edev); + bool was_in_error; struct pci_driver *driver; - if (!dev || eeh_dev_removed(edev)) + if (!dev || eeh_dev_removed(edev) || eeh_pe_passed(edev->pe)) return NULL; dev->error_state = pci_channel_io_normal; driver = eeh_pcid_get(dev); if (!driver) return NULL; + was_in_error = edev->in_error; + edev->in_error = false; eeh_enable_irq(dev); if (!driver->err_handler || !driver->err_handler->resume || - (edev->mode & EEH_DEV_NO_HANDLER)) { + (edev->mode & EEH_DEV_NO_HANDLER) || !was_in_error) { edev->mode &= ~EEH_DEV_NO_HANDLER; eeh_pcid_put(dev); return NULL; @@ -365,7 +375,7 @@ static void *eeh_report_failure(void *data, void *userdata) struct pci_dev *dev = eeh_dev_to_pci_dev(edev); struct pci_driver *driver; - if (!dev || eeh_dev_removed(edev)) + if (!dev || eeh_dev_removed(edev) || eeh_pe_passed(edev->pe)) return NULL; dev->error_state = pci_channel_io_perm_failure; @@ -386,12 +396,40 @@ static void *eeh_report_failure(void *data, void *userdata) return NULL; } +static void *eeh_add_virt_device(void *data, void *userdata) +{ + struct pci_driver *driver; + struct eeh_dev *edev = (struct eeh_dev *)data; + struct pci_dev *dev = eeh_dev_to_pci_dev(edev); + struct pci_dn *pdn = eeh_dev_to_pdn(edev); + + if (!(edev->physfn)) { + pr_warn("%s: EEH dev %04x:%02x:%02x.%01x not for VF\n", + __func__, edev->phb->global_number, pdn->busno, + PCI_SLOT(pdn->devfn), PCI_FUNC(pdn->devfn)); + return NULL; + } + + driver = eeh_pcid_get(dev); + if (driver) { + eeh_pcid_put(dev); + if (driver->err_handler) + return NULL; + } + +#ifdef CONFIG_PPC_POWERNV + pci_iov_add_virtfn(edev->physfn, pdn->vf_index, 0); +#endif + return NULL; +} + static void *eeh_rmv_device(void *data, void *userdata) { struct pci_driver *driver; struct eeh_dev *edev = (struct eeh_dev *)data; struct pci_dev *dev = eeh_dev_to_pci_dev(edev); - int *removed = (int *)userdata; + struct eeh_rmv_data *rmv_data = (struct eeh_rmv_data *)userdata; + int *removed = rmv_data ? &rmv_data->removed : NULL; /* * Actually, we should remove the PCI bridges as well. @@ -416,7 +454,11 @@ static void *eeh_rmv_device(void *data, void *userdata) driver = eeh_pcid_get(dev); if (driver) { eeh_pcid_put(dev); - if (driver->err_handler && + if (removed && + eeh_pe_passed(edev->pe)) + return NULL; + if (removed && + driver->err_handler && driver->err_handler->error_detected && driver->err_handler->slot_reset) return NULL; @@ -427,11 +469,29 @@ static void *eeh_rmv_device(void *data, void *userdata) pci_name(dev)); edev->bus = dev->bus; edev->mode |= EEH_DEV_DISCONNECTED; - (*removed)++; + if (removed) + (*removed)++; - pci_lock_rescan_remove(); - pci_stop_and_remove_bus_device(dev); - pci_unlock_rescan_remove(); + if (edev->physfn) { +#ifdef CONFIG_PPC_POWERNV + struct pci_dn *pdn = eeh_dev_to_pdn(edev); + + pci_iov_remove_virtfn(edev->physfn, pdn->vf_index, 0); + edev->pdev = NULL; + + /* + * We have to set the VF PE number to invalid one, which is + * required to plug the VF successfully. + */ + pdn->pe_number = IODA_INVALID_PE; +#endif + if (rmv_data) + list_add(&edev->rmv_list, &rmv_data->edev_list); + } else { + pci_lock_rescan_remove(); + pci_stop_and_remove_bus_device(dev); + pci_unlock_rescan_remove(); + } return NULL; } @@ -545,11 +605,13 @@ int eeh_pe_reset_and_recover(struct eeh_pe *pe) * During the reset, udev might be invoked because those affected * PCI devices will be removed and then added. */ -static int eeh_reset_device(struct eeh_pe *pe, struct pci_bus *bus) +static int eeh_reset_device(struct eeh_pe *pe, struct pci_bus *bus, + struct eeh_rmv_data *rmv_data) { struct pci_bus *frozen_bus = eeh_pe_bus_get(pe); struct timeval tstamp; - int cnt, rc, removed = 0; + int cnt, rc; + struct eeh_dev *edev; /* pcibios will clear the counter; save the value */ cnt = pe->freeze_count; @@ -563,12 +625,16 @@ static int eeh_reset_device(struct eeh_pe *pe, struct pci_bus *bus) */ eeh_pe_state_mark(pe, EEH_PE_KEEP); if (bus) { - eeh_pe_state_clear(pe, EEH_PE_PRI_BUS); - pci_lock_rescan_remove(); - pcibios_remove_pci_devices(bus); - pci_unlock_rescan_remove(); + if (pe->type & EEH_PE_VF) { + eeh_pe_dev_traverse(pe, eeh_rmv_device, NULL); + } else { + eeh_pe_state_clear(pe, EEH_PE_PRI_BUS); + pci_lock_rescan_remove(); + pcibios_remove_pci_devices(bus); + pci_unlock_rescan_remove(); + } } else if (frozen_bus) { - eeh_pe_dev_traverse(pe, eeh_rmv_device, &removed); + eeh_pe_dev_traverse(pe, eeh_rmv_device, &rmv_data); } /* @@ -610,14 +676,22 @@ static int eeh_reset_device(struct eeh_pe *pe, struct pci_bus *bus) * PE. We should disconnect it so the binding can be * rebuilt when adding PCI devices. */ + edev = list_first_entry(&pe->edevs, struct eeh_dev, list); eeh_pe_traverse(pe, eeh_pe_detach_dev, NULL); - pcibios_add_pci_devices(bus); - } else if (frozen_bus && removed) { + if (pe->type & EEH_PE_VF) + eeh_add_virt_device(edev, NULL); + else + pcibios_add_pci_devices(bus); + } else if (frozen_bus && rmv_data->removed) { pr_info("EEH: Sleep 5s ahead of partial hotplug\n"); ssleep(5); + edev = list_first_entry(&pe->edevs, struct eeh_dev, list); eeh_pe_traverse(pe, eeh_pe_detach_dev, NULL); - pcibios_add_pci_devices(frozen_bus); + if (pe->type & EEH_PE_VF) + eeh_add_virt_device(edev, NULL); + else + pcibios_add_pci_devices(frozen_bus); } eeh_pe_state_clear(pe, EEH_PE_KEEP); @@ -636,8 +710,10 @@ static int eeh_reset_device(struct eeh_pe *pe, struct pci_bus *bus) static void eeh_handle_normal_event(struct eeh_pe *pe) { struct pci_bus *frozen_bus; + struct eeh_dev *edev, *tmp; int rc = 0; enum pci_ers_result result = PCI_ERS_RESULT_NONE; + struct eeh_rmv_data rmv_data = {LIST_HEAD_INIT(rmv_data.edev_list), 0}; frozen_bus = eeh_pe_bus_get(pe); if (!frozen_bus) { @@ -692,7 +768,7 @@ static void eeh_handle_normal_event(struct eeh_pe *pe) */ if (result == PCI_ERS_RESULT_NONE) { pr_info("EEH: Reset with hotplug activity\n"); - rc = eeh_reset_device(pe, frozen_bus); + rc = eeh_reset_device(pe, frozen_bus, NULL); if (rc) { pr_warn("%s: Unable to reset, err=%d\n", __func__, rc); @@ -744,7 +820,7 @@ static void eeh_handle_normal_event(struct eeh_pe *pe) /* If any device called out for a reset, then reset the slot */ if (result == PCI_ERS_RESULT_NEED_RESET) { pr_info("EEH: Reset without hotplug activity\n"); - rc = eeh_reset_device(pe, NULL); + rc = eeh_reset_device(pe, NULL, &rmv_data); if (rc) { pr_warn("%s: Cannot reset, err=%d\n", __func__, rc); @@ -764,6 +840,15 @@ static void eeh_handle_normal_event(struct eeh_pe *pe) goto hard_fail; } + /* + * For those hot removed VFs, we should add back them after PF get + * recovered properly. + */ + list_for_each_entry_safe(edev, tmp, &rmv_data.edev_list, rmv_list) { + eeh_add_virt_device(edev, NULL); + list_del(&edev->rmv_list); + } + /* Tell all device drivers that they can resume operations */ pr_info("EEH: Notify device driver to resume\n"); eeh_pe_dev_traverse(pe, eeh_report_resume, NULL); @@ -803,12 +888,17 @@ perm_error: * the their PCI config any more. */ if (frozen_bus) { - eeh_pe_state_clear(pe, EEH_PE_PRI_BUS); - eeh_pe_dev_mode_mark(pe, EEH_DEV_REMOVED); + if (pe->type & EEH_PE_VF) { + eeh_pe_dev_traverse(pe, eeh_rmv_device, NULL); + eeh_pe_dev_mode_mark(pe, EEH_DEV_REMOVED); + } else { + eeh_pe_state_clear(pe, EEH_PE_PRI_BUS); + eeh_pe_dev_mode_mark(pe, EEH_DEV_REMOVED); - pci_lock_rescan_remove(); - pcibios_remove_pci_devices(frozen_bus); - pci_unlock_rescan_remove(); + pci_lock_rescan_remove(); + pcibios_remove_pci_devices(frozen_bus); + pci_unlock_rescan_remove(); + } } } diff --git a/arch/powerpc/kernel/eeh_pe.c b/arch/powerpc/kernel/eeh_pe.c index 98f81800e00c..eea48d8baf49 100644 --- a/arch/powerpc/kernel/eeh_pe.c +++ b/arch/powerpc/kernel/eeh_pe.c @@ -299,7 +299,10 @@ static struct eeh_pe *eeh_pe_get_parent(struct eeh_dev *edev) * EEH device already having associated PE, but * the direct parent EEH device doesn't have yet. */ - pdn = pdn ? pdn->parent : NULL; + if (edev->physfn) + pdn = pci_get_pdn(edev->physfn); + else + pdn = pdn ? pdn->parent : NULL; while (pdn) { /* We're poking out of PCI territory */ parent = pdn_to_eeh_dev(pdn); @@ -382,7 +385,10 @@ int eeh_add_to_parent_pe(struct eeh_dev *edev) } /* Create a new EEH PE */ - pe = eeh_pe_alloc(edev->phb, EEH_PE_DEVICE); + if (edev->physfn) + pe = eeh_pe_alloc(edev->phb, EEH_PE_VF); + else + pe = eeh_pe_alloc(edev->phb, EEH_PE_DEVICE); if (!pe) { pr_err("%s: out of memory!\n", __func__); return -ENOMEM; @@ -920,25 +926,21 @@ const char *eeh_pe_loc_get(struct eeh_pe *pe) */ struct pci_bus *eeh_pe_bus_get(struct eeh_pe *pe) { - struct pci_bus *bus = NULL; struct eeh_dev *edev; struct pci_dev *pdev; - if (pe->type & EEH_PE_PHB) { - bus = pe->phb->bus; - } else if (pe->type & EEH_PE_BUS || - pe->type & EEH_PE_DEVICE) { - if (pe->state & EEH_PE_PRI_BUS) { - bus = pe->bus; - goto out; - } + if (pe->type & EEH_PE_PHB) + return pe->phb->bus; - edev = list_first_entry(&pe->edevs, struct eeh_dev, list); - pdev = eeh_dev_to_pci_dev(edev); - if (pdev) - bus = pdev->bus; - } + /* The primary bus might be cached during probe time */ + if (pe->state & EEH_PE_PRI_BUS) + return pe->bus; -out: - return bus; + /* Retrieve the parent PCI bus of first (top) PCI device */ + edev = list_first_entry_or_null(&pe->edevs, struct eeh_dev, list); + pdev = eeh_dev_to_pci_dev(edev); + if (pdev) + return pdev->bus; + + return NULL; } diff --git a/arch/powerpc/kernel/entry_64.S b/arch/powerpc/kernel/entry_64.S index 038e0a1425e7..b9b125327f27 100644 --- a/arch/powerpc/kernel/entry_64.S +++ b/arch/powerpc/kernel/entry_64.S @@ -1158,8 +1158,12 @@ _GLOBAL(enter_prom) #ifdef CONFIG_DYNAMIC_FTRACE _GLOBAL(mcount) _GLOBAL(_mcount) - blr + mflr r12 + mtctr r12 + mtlr r0 + bctr +#ifndef CC_USING_MPROFILE_KERNEL _GLOBAL_TOC(ftrace_caller) /* Taken from output of objdump from lib64/glibc */ mflr r3 @@ -1181,6 +1185,115 @@ _GLOBAL(ftrace_graph_stub) ld r0, 128(r1) mtlr r0 addi r1, r1, 112 + +#else /* CC_USING_MPROFILE_KERNEL */ +/* + * + * ftrace_caller() is the function that replaces _mcount() when ftrace is + * active. + * + * We arrive here after a function A calls function B, and we are the trace + * function for B. When we enter r1 points to A's stack frame, B has not yet + * had a chance to allocate one yet. + * + * Additionally r2 may point either to the TOC for A, or B, depending on + * whether B did a TOC setup sequence before calling us. + * + * On entry the LR points back to the _mcount() call site, and r0 holds the + * saved LR as it was on entry to B, ie. the original return address at the + * call site in A. + * + * Our job is to save the register state into a struct pt_regs (on the stack) + * and then arrange for the ftrace function to be called. + */ +_GLOBAL(ftrace_caller) + /* Save the original return address in A's stack frame */ + std r0,LRSAVE(r1) + + /* Create our stack frame + pt_regs */ + stdu r1,-SWITCH_FRAME_SIZE(r1) + + /* Save all gprs to pt_regs */ + SAVE_8GPRS(0,r1) + SAVE_8GPRS(8,r1) + SAVE_8GPRS(16,r1) + SAVE_8GPRS(24,r1) + + /* Load special regs for save below */ + mfmsr r8 + mfctr r9 + mfxer r10 + mfcr r11 + + /* Get the _mcount() call site out of LR */ + mflr r7 + /* Save it as pt_regs->nip & pt_regs->link */ + std r7, _NIP(r1) + std r7, _LINK(r1) + + /* Save callee's TOC in the ABI compliant location */ + std r2, 24(r1) + ld r2,PACATOC(r13) /* get kernel TOC in r2 */ + + addis r3,r2,function_trace_op@toc@ha + addi r3,r3,function_trace_op@toc@l + ld r5,0(r3) + + /* Calculate ip from nip-4 into r3 for call below */ + subi r3, r7, MCOUNT_INSN_SIZE + + /* Put the original return address in r4 as parent_ip */ + mr r4, r0 + + /* Save special regs */ + std r8, _MSR(r1) + std r9, _CTR(r1) + std r10, _XER(r1) + std r11, _CCR(r1) + + /* Load &pt_regs in r6 for call below */ + addi r6, r1 ,STACK_FRAME_OVERHEAD + + /* ftrace_call(r3, r4, r5, r6) */ +.globl ftrace_call +ftrace_call: + bl ftrace_stub + nop + + /* Load ctr with the possibly modified NIP */ + ld r3, _NIP(r1) + mtctr r3 + + /* Restore gprs */ + REST_8GPRS(0,r1) + REST_8GPRS(8,r1) + REST_8GPRS(16,r1) + REST_8GPRS(24,r1) + + /* Restore callee's TOC */ + ld r2, 24(r1) + + /* Pop our stack frame */ + addi r1, r1, SWITCH_FRAME_SIZE + + /* Restore original LR for return to B */ + ld r0, LRSAVE(r1) + mtlr r0 + +#ifdef CONFIG_FUNCTION_GRAPH_TRACER + stdu r1, -112(r1) +.globl ftrace_graph_call +ftrace_graph_call: + b ftrace_graph_stub +_GLOBAL(ftrace_graph_stub) + addi r1, r1, 112 +#endif + + ld r0,LRSAVE(r1) /* restore callee's lr at _mcount site */ + mtlr r0 + bctr /* jump after _mcount site */ +#endif /* CC_USING_MPROFILE_KERNEL */ + _GLOBAL(ftrace_stub) blr #else @@ -1213,6 +1326,7 @@ _GLOBAL(ftrace_stub) #endif /* CONFIG_DYNAMIC_FTRACE */ #ifdef CONFIG_FUNCTION_GRAPH_TRACER +#ifndef CC_USING_MPROFILE_KERNEL _GLOBAL(ftrace_graph_caller) /* load r4 with local address */ ld r4, 128(r1) @@ -1237,6 +1351,56 @@ _GLOBAL(ftrace_graph_caller) addi r1, r1, 112 blr +#else /* CC_USING_MPROFILE_KERNEL */ +_GLOBAL(ftrace_graph_caller) + /* with -mprofile-kernel, parameter regs are still alive at _mcount */ + std r10, 104(r1) + std r9, 96(r1) + std r8, 88(r1) + std r7, 80(r1) + std r6, 72(r1) + std r5, 64(r1) + std r4, 56(r1) + std r3, 48(r1) + + /* Save callee's TOC in the ABI compliant location */ + std r2, 24(r1) + ld r2, PACATOC(r13) /* get kernel TOC in r2 */ + + mfctr r4 /* ftrace_caller has moved local addr here */ + std r4, 40(r1) + mflr r3 /* ftrace_caller has restored LR from stack */ + subi r4, r4, MCOUNT_INSN_SIZE + + bl prepare_ftrace_return + nop + + /* + * prepare_ftrace_return gives us the address we divert to. + * Change the LR to this. + */ + mtlr r3 + + ld r0, 40(r1) + mtctr r0 + ld r10, 104(r1) + ld r9, 96(r1) + ld r8, 88(r1) + ld r7, 80(r1) + ld r6, 72(r1) + ld r5, 64(r1) + ld r4, 56(r1) + ld r3, 48(r1) + + /* Restore callee's TOC */ + ld r2, 24(r1) + + addi r1, r1, 112 + mflr r0 + std r0, LRSAVE(r1) + bctr +#endif /* CC_USING_MPROFILE_KERNEL */ + _GLOBAL(return_to_handler) /* need to save return values */ std r4, -32(r1) diff --git a/arch/powerpc/kernel/ftrace.c b/arch/powerpc/kernel/ftrace.c index 44d4d8eb3c85..9dac18dabd03 100644 --- a/arch/powerpc/kernel/ftrace.c +++ b/arch/powerpc/kernel/ftrace.c @@ -61,8 +61,11 @@ ftrace_modify_code(unsigned long ip, unsigned int old, unsigned int new) return -EFAULT; /* Make sure it is what we expect it to be */ - if (replaced != old) + if (replaced != old) { + pr_err("%p: replaced (%#x) != old (%#x)", + (void *)ip, replaced, old); return -EINVAL; + } /* replace the text with the new text */ if (patch_instruction((unsigned int *)ip, new)) @@ -106,14 +109,15 @@ static int __ftrace_make_nop(struct module *mod, struct dyn_ftrace *rec, unsigned long addr) { - unsigned int op; - unsigned long entry, ptr; + unsigned long entry, ptr, tramp; unsigned long ip = rec->ip; - void *tramp; + unsigned int op, pop; /* read where this goes */ - if (probe_kernel_read(&op, (void *)ip, sizeof(int))) + if (probe_kernel_read(&op, (void *)ip, sizeof(int))) { + pr_err("Fetching opcode failed.\n"); return -EFAULT; + } /* Make sure that that this is still a 24bit jump */ if (!is_bl_op(op)) { @@ -122,14 +126,9 @@ __ftrace_make_nop(struct module *mod, } /* lets find where the pointer goes */ - tramp = (void *)find_bl_target(ip, op); - - pr_devel("ip:%lx jumps to %p", ip, tramp); + tramp = find_bl_target(ip, op); - if (!is_module_trampoline(tramp)) { - pr_err("Not a trampoline\n"); - return -EINVAL; - } + pr_devel("ip:%lx jumps to %lx", ip, tramp); if (module_trampoline_target(mod, tramp, &ptr)) { pr_err("Failed to get trampoline target\n"); @@ -158,10 +157,42 @@ __ftrace_make_nop(struct module *mod, * * Use a b +8 to jump over the load. */ - op = 0x48000008; /* b +8 */ - if (patch_instruction((unsigned int *)ip, op)) + pop = PPC_INST_BRANCH | 8; /* b +8 */ + + /* + * Check what is in the next instruction. We can see ld r2,40(r1), but + * on first pass after boot we will see mflr r0. + */ + if (probe_kernel_read(&op, (void *)(ip+4), MCOUNT_INSN_SIZE)) { + pr_err("Fetching op failed.\n"); + return -EFAULT; + } + + if (op != PPC_INST_LD_TOC) { + unsigned int inst; + + if (probe_kernel_read(&inst, (void *)(ip - 4), 4)) { + pr_err("Fetching instruction at %lx failed.\n", ip - 4); + return -EFAULT; + } + + /* We expect either a mlfr r0, or a std r0, LRSAVE(r1) */ + if (inst != PPC_INST_MFLR && inst != PPC_INST_STD_LR) { + pr_err("Unexpected instructions around bl _mcount\n" + "when enabling dynamic ftrace!\t" + "(%08x,bl,%08x)\n", inst, op); + return -EINVAL; + } + + /* When using -mkernel_profile there is no load to jump over */ + pop = PPC_INST_NOP; + } + + if (patch_instruction((unsigned int *)ip, pop)) { + pr_err("Patching NOP failed.\n"); return -EPERM; + } return 0; } @@ -287,16 +318,15 @@ int ftrace_make_nop(struct module *mod, #ifdef CONFIG_MODULES #ifdef CONFIG_PPC64 +/* + * Examine the existing instructions for __ftrace_make_call. + * They should effectively be a NOP, and follow formal constraints, + * depending on the ABI. Return false if they don't. + */ +#ifndef CC_USING_MPROFILE_KERNEL static int -__ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr) +expected_nop_sequence(void *ip, unsigned int op0, unsigned int op1) { - unsigned int op[2]; - void *ip = (void *)rec->ip; - - /* read where this goes */ - if (probe_kernel_read(op, ip, sizeof(op))) - return -EFAULT; - /* * We expect to see: * @@ -306,8 +336,34 @@ __ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr) * The load offset is different depending on the ABI. For simplicity * just mask it out when doing the compare. */ - if ((op[0] != 0x48000008) || ((op[1] & 0xffff0000) != 0xe8410000)) { - pr_err("Unexpected call sequence: %x %x\n", op[0], op[1]); + if ((op0 != 0x48000008) || ((op1 & 0xffff0000) != 0xe8410000)) + return 0; + return 1; +} +#else +static int +expected_nop_sequence(void *ip, unsigned int op0, unsigned int op1) +{ + /* look for patched "NOP" on ppc64 with -mprofile-kernel */ + if (op0 != PPC_INST_NOP) + return 0; + return 1; +} +#endif + +static int +__ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr) +{ + unsigned int op[2]; + void *ip = (void *)rec->ip; + + /* read where this goes */ + if (probe_kernel_read(op, ip, sizeof(op))) + return -EFAULT; + + if (!expected_nop_sequence(ip, op[0], op[1])) { + pr_err("Unexpected call sequence at %p: %x %x\n", + ip, op[0], op[1]); return -EINVAL; } @@ -330,7 +386,16 @@ __ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr) return 0; } -#else + +#ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS +int ftrace_modify_call(struct dyn_ftrace *rec, unsigned long old_addr, + unsigned long addr) +{ + return ftrace_make_call(rec, addr); +} +#endif + +#else /* !CONFIG_PPC64: */ static int __ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr) { @@ -455,20 +520,13 @@ void ftrace_replace_code(int enable) } } +/* + * Use the default ftrace_modify_all_code, but without + * stop_machine(). + */ void arch_ftrace_update_code(int command) { - if (command & FTRACE_UPDATE_CALLS) - ftrace_replace_code(1); - else if (command & FTRACE_DISABLE_CALLS) - ftrace_replace_code(0); - - if (command & FTRACE_UPDATE_TRACE_FUNC) - ftrace_update_ftrace_func(ftrace_trace_function); - - if (command & FTRACE_START_FUNC_RET) - ftrace_enable_ftrace_graph_caller(); - else if (command & FTRACE_STOP_FUNC_RET) - ftrace_disable_ftrace_graph_caller(); + ftrace_modify_all_code(command); } int __init ftrace_dyn_arch_init(void) diff --git a/arch/powerpc/kernel/module.c b/arch/powerpc/kernel/module.c index 9547381b631a..d1f1b35bf0c7 100644 --- a/arch/powerpc/kernel/module.c +++ b/arch/powerpc/kernel/module.c @@ -47,6 +47,11 @@ int module_finalize(const Elf_Ehdr *hdr, const Elf_Shdr *sechdrs, struct module *me) { const Elf_Shdr *sect; + int rc; + + rc = module_finalize_ftrace(me, sechdrs); + if (rc) + return rc; /* Apply feature fixups */ sect = find_section(hdr, sechdrs, "__ftr_fixup"); diff --git a/arch/powerpc/kernel/module_32.c b/arch/powerpc/kernel/module_32.c index 2c01665eb410..5a7a78f12562 100644 --- a/arch/powerpc/kernel/module_32.c +++ b/arch/powerpc/kernel/module_32.c @@ -181,7 +181,7 @@ static inline int entry_matches(struct ppc_plt_entry *entry, Elf32_Addr val) /* Set up a trampoline in the PLT to bounce us to the distant function */ static uint32_t do_plt_call(void *location, Elf32_Addr val, - Elf32_Shdr *sechdrs, + const Elf32_Shdr *sechdrs, struct module *mod) { struct ppc_plt_entry *entry; @@ -294,11 +294,19 @@ int apply_relocate_add(Elf32_Shdr *sechdrs, return -ENOEXEC; } } + + return 0; +} + #ifdef CONFIG_DYNAMIC_FTRACE - module->arch.tramp = - do_plt_call(module->core_layout.base, - (unsigned long)ftrace_caller, - sechdrs, module); -#endif +int module_finalize_ftrace(struct module *module, const Elf_Shdr *sechdrs) +{ + module->arch.tramp = do_plt_call(module->core_layout.base, + (unsigned long)ftrace_caller, + sechdrs, module); + if (!module->arch.tramp) + return -ENOENT; + return 0; } +#endif diff --git a/arch/powerpc/kernel/module_64.c b/arch/powerpc/kernel/module_64.c index 08b7a40de5f8..9ce9a25f58b5 100644 --- a/arch/powerpc/kernel/module_64.c +++ b/arch/powerpc/kernel/module_64.c @@ -31,6 +31,7 @@ #include <asm/code-patching.h> #include <linux/sort.h> #include <asm/setup.h> +#include <asm/sections.h> /* FIXME: We don't do .init separately. To do this, we'd need to have a separate r2 value in the init and core section, and stub between @@ -41,7 +42,6 @@ --RR. */ #if defined(_CALL_ELF) && _CALL_ELF == 2 -#define R2_STACK_OFFSET 24 /* An address is simply the address of the function. */ typedef unsigned long func_desc_t; @@ -73,7 +73,6 @@ static unsigned int local_entry_offset(const Elf64_Sym *sym) return PPC64_LOCAL_ENTRY_OFFSET(sym->st_other); } #else -#define R2_STACK_OFFSET 40 /* An address is address of the OPD entry, which contains address of fn. */ typedef struct ppc64_opd_entry func_desc_t; @@ -96,6 +95,8 @@ static unsigned int local_entry_offset(const Elf64_Sym *sym) } #endif +#define STUB_MAGIC 0x73747562 /* stub */ + /* Like PPC32, we need little trampolines to do > 24-bit jumps (into the kernel itself). But on PPC64, these need to be used for every jump, actually, to reset r2 (TOC+0x8000). */ @@ -105,7 +106,8 @@ struct ppc64_stub_entry * need 6 instructions on ABIv2 but we always allocate 7 so * so we don't have to modify the trampoline load instruction. */ u32 jump[7]; - u32 unused; + /* Used by ftrace to identify stubs */ + u32 magic; /* Data for the above code */ func_desc_t funcdata; }; @@ -139,70 +141,39 @@ static u32 ppc64_stub_insns[] = { }; #ifdef CONFIG_DYNAMIC_FTRACE - -static u32 ppc64_stub_mask[] = { - 0xffff0000, - 0xffff0000, - 0xffffffff, - 0xffffffff, -#if !defined(_CALL_ELF) || _CALL_ELF != 2 - 0xffffffff, -#endif - 0xffffffff, - 0xffffffff -}; - -bool is_module_trampoline(u32 *p) +int module_trampoline_target(struct module *mod, unsigned long addr, + unsigned long *target) { - unsigned int i; - u32 insns[ARRAY_SIZE(ppc64_stub_insns)]; - - BUILD_BUG_ON(sizeof(ppc64_stub_insns) != sizeof(ppc64_stub_mask)); + struct ppc64_stub_entry *stub; + func_desc_t funcdata; + u32 magic; - if (probe_kernel_read(insns, p, sizeof(insns))) + if (!within_module_core(addr, mod)) { + pr_err("%s: stub %lx not in module %s\n", __func__, addr, mod->name); return -EFAULT; - - for (i = 0; i < ARRAY_SIZE(ppc64_stub_insns); i++) { - u32 insna = insns[i]; - u32 insnb = ppc64_stub_insns[i]; - u32 mask = ppc64_stub_mask[i]; - - if ((insna & mask) != (insnb & mask)) - return false; } - return true; -} - -int module_trampoline_target(struct module *mod, u32 *trampoline, - unsigned long *target) -{ - u32 buf[2]; - u16 upper, lower; - long offset; - void *toc_entry; + stub = (struct ppc64_stub_entry *)addr; - if (probe_kernel_read(buf, trampoline, sizeof(buf))) + if (probe_kernel_read(&magic, &stub->magic, sizeof(magic))) { + pr_err("%s: fault reading magic for stub %lx for %s\n", __func__, addr, mod->name); return -EFAULT; + } - upper = buf[0] & 0xffff; - lower = buf[1] & 0xffff; - - /* perform the addis/addi, both signed */ - offset = ((short)upper << 16) + (short)lower; + if (magic != STUB_MAGIC) { + pr_err("%s: bad magic for stub %lx for %s\n", __func__, addr, mod->name); + return -EFAULT; + } - /* - * Now get the address this trampoline jumps to. This - * is always 32 bytes into our trampoline stub. - */ - toc_entry = (void *)mod->arch.toc + offset + 32; + if (probe_kernel_read(&funcdata, &stub->funcdata, sizeof(funcdata))) { + pr_err("%s: fault reading funcdata for stub %lx for %s\n", __func__, addr, mod->name); + return -EFAULT; + } - if (probe_kernel_read(target, toc_entry, sizeof(*target))) - return -EFAULT; + *target = stub_func_addr(funcdata); return 0; } - #endif /* Count how many different 24-bit relocations (different symbol, @@ -413,7 +384,7 @@ int module_frob_arch_sections(Elf64_Ehdr *hdr, /* r2 is the TOC pointer: it actually points 0x8000 into the TOC (this gives the value maximum span in an instruction which uses a signed offset) */ -static inline unsigned long my_r2(Elf64_Shdr *sechdrs, struct module *me) +static inline unsigned long my_r2(const Elf64_Shdr *sechdrs, struct module *me) { return sechdrs[me->arch.toc_section].sh_addr + 0x8000; } @@ -426,7 +397,7 @@ static inline unsigned long my_r2(Elf64_Shdr *sechdrs, struct module *me) #define PPC_HA(v) PPC_HI ((v) + 0x8000) /* Patch stub to reference function and correct r2 value. */ -static inline int create_stub(Elf64_Shdr *sechdrs, +static inline int create_stub(const Elf64_Shdr *sechdrs, struct ppc64_stub_entry *entry, unsigned long addr, struct module *me) @@ -447,12 +418,14 @@ static inline int create_stub(Elf64_Shdr *sechdrs, entry->jump[0] |= PPC_HA(reladdr); entry->jump[1] |= PPC_LO(reladdr); entry->funcdata = func_desc(addr); + entry->magic = STUB_MAGIC; + return 1; } /* Create stub to jump to function described in this OPD/ptr: we need the stub to set up the TOC ptr (r2) for the function. */ -static unsigned long stub_for_addr(Elf64_Shdr *sechdrs, +static unsigned long stub_for_addr(const Elf64_Shdr *sechdrs, unsigned long addr, struct module *me) { @@ -476,17 +449,60 @@ static unsigned long stub_for_addr(Elf64_Shdr *sechdrs, return (unsigned long)&stubs[i]; } +#ifdef CC_USING_MPROFILE_KERNEL +static bool is_early_mcount_callsite(u32 *instruction) +{ + /* + * Check if this is one of the -mprofile-kernel sequences. + */ + if (instruction[-1] == PPC_INST_STD_LR && + instruction[-2] == PPC_INST_MFLR) + return true; + + if (instruction[-1] == PPC_INST_MFLR) + return true; + + return false; +} + +/* + * In case of _mcount calls, do not save the current callee's TOC (in r2) into + * the original caller's stack frame. If we did we would clobber the saved TOC + * value of the original caller. + */ +static void squash_toc_save_inst(const char *name, unsigned long addr) +{ + struct ppc64_stub_entry *stub = (struct ppc64_stub_entry *)addr; + + /* Only for calls to _mcount */ + if (strcmp("_mcount", name) != 0) + return; + + stub->jump[2] = PPC_INST_NOP; +} +#else +static void squash_toc_save_inst(const char *name, unsigned long addr) { } + +/* without -mprofile-kernel, mcount calls are never early */ +static bool is_early_mcount_callsite(u32 *instruction) +{ + return false; +} +#endif + /* We expect a noop next: if it is, replace it with instruction to restore r2. */ static int restore_r2(u32 *instruction, struct module *me) { if (*instruction != PPC_INST_NOP) { + if (is_early_mcount_callsite(instruction - 1)) + return 1; pr_err("%s: Expect noop after relocate, got %08x\n", me->name, *instruction); return 0; } /* ld r2,R2_STACK_OFFSET(r1) */ - *instruction = 0xe8410000 | R2_STACK_OFFSET; + *instruction = PPC_INST_LD_TOC; return 1; } @@ -611,6 +627,8 @@ int apply_relocate_add(Elf64_Shdr *sechdrs, return -ENOENT; if (!restore_r2((u32 *)location + 1, me)) return -ENOEXEC; + + squash_toc_save_inst(strtab + sym->st_name, value); } else value += local_entry_offset(sym); @@ -693,12 +711,84 @@ int apply_relocate_add(Elf64_Shdr *sechdrs, } } + return 0; +} + #ifdef CONFIG_DYNAMIC_FTRACE - me->arch.toc = my_r2(sechdrs, me); - me->arch.tramp = stub_for_addr(sechdrs, - (unsigned long)ftrace_caller, - me); + +#ifdef CC_USING_MPROFILE_KERNEL + +#define PACATOC offsetof(struct paca_struct, kernel_toc) + +/* + * For mprofile-kernel we use a special stub for ftrace_caller() because we + * can't rely on r2 containing this module's TOC when we enter the stub. + * + * That can happen if the function calling us didn't need to use the toc. In + * that case it won't have setup r2, and the r2 value will be either the + * kernel's toc, or possibly another modules toc. + * + * To deal with that this stub uses the kernel toc, which is always accessible + * via the paca (in r13). The target (ftrace_caller()) is responsible for + * saving and restoring the toc before returning. + */ +static unsigned long create_ftrace_stub(const Elf64_Shdr *sechdrs, struct module *me) +{ + struct ppc64_stub_entry *entry; + unsigned int i, num_stubs; + static u32 stub_insns[] = { + 0xe98d0000 | PACATOC, /* ld r12,PACATOC(r13) */ + 0x3d8c0000, /* addis r12,r12,<high> */ + 0x398c0000, /* addi r12,r12,<low> */ + 0x7d8903a6, /* mtctr r12 */ + 0x4e800420, /* bctr */ + }; + long reladdr; + + num_stubs = sechdrs[me->arch.stubs_section].sh_size / sizeof(*entry); + + /* Find the next available stub entry */ + entry = (void *)sechdrs[me->arch.stubs_section].sh_addr; + for (i = 0; i < num_stubs && stub_func_addr(entry->funcdata); i++, entry++); + + if (i >= num_stubs) { + pr_err("%s: Unable to find a free slot for ftrace stub.\n", me->name); + return 0; + } + + memcpy(entry->jump, stub_insns, sizeof(stub_insns)); + + /* Stub uses address relative to kernel toc (from the paca) */ + reladdr = (unsigned long)ftrace_caller - kernel_toc_addr(); + if (reladdr > 0x7FFFFFFF || reladdr < -(0x80000000L)) { + pr_err("%s: Address of ftrace_caller out of range of kernel_toc.\n", me->name); + return 0; + } + + entry->jump[1] |= PPC_HA(reladdr); + entry->jump[2] |= PPC_LO(reladdr); + + /* Eventhough we don't use funcdata in the stub, it's needed elsewhere. */ + entry->funcdata = func_desc((unsigned long)ftrace_caller); + entry->magic = STUB_MAGIC; + + return (unsigned long)entry; +} +#else +static unsigned long create_ftrace_stub(const Elf64_Shdr *sechdrs, struct module *me) +{ + return stub_for_addr(sechdrs, (unsigned long)ftrace_caller, me); +} #endif +int module_finalize_ftrace(struct module *mod, const Elf_Shdr *sechdrs) +{ + mod->arch.toc = my_r2(sechdrs, mod); + mod->arch.tramp = create_ftrace_stub(sechdrs, mod); + + if (!mod->arch.tramp) + return -ENOENT; + return 0; } +#endif diff --git a/arch/powerpc/kernel/paca.c b/arch/powerpc/kernel/paca.c index 01ea0edf0579..93dae296b6be 100644 --- a/arch/powerpc/kernel/paca.c +++ b/arch/powerpc/kernel/paca.c @@ -17,10 +17,6 @@ #include <asm/pgtable.h> #include <asm/kexec.h> -/* This symbol is provided by the linker - let it fill in the paca - * field correctly */ -extern unsigned long __toc_start; - #ifdef CONFIG_PPC_BOOK3S /* @@ -149,11 +145,6 @@ EXPORT_SYMBOL(paca); void __init initialise_paca(struct paca_struct *new_paca, int cpu) { - /* The TOC register (GPR2) points 32kB into the TOC, so that 64kB - * of the TOC can be addressed using a single machine instruction. - */ - unsigned long kernel_toc = (unsigned long)(&__toc_start) + 0x8000UL; - #ifdef CONFIG_PPC_BOOK3S new_paca->lppaca_ptr = new_lppaca(cpu); #else @@ -161,7 +152,7 @@ void __init initialise_paca(struct paca_struct *new_paca, int cpu) #endif new_paca->lock_token = 0x8000; new_paca->paca_index = cpu; - new_paca->kernel_toc = kernel_toc; + new_paca->kernel_toc = kernel_toc_addr(); new_paca->kernelbase = (unsigned long) _stext; /* Only set MSR:IR/DR when MMU is initialized */ new_paca->kernel_msr = MSR_KERNEL & ~(MSR_IR | MSR_DR); diff --git a/arch/powerpc/kernel/pci-hotplug.c b/arch/powerpc/kernel/pci-hotplug.c index 7f9ed0c1f6b9..59c436189f46 100644 --- a/arch/powerpc/kernel/pci-hotplug.c +++ b/arch/powerpc/kernel/pci-hotplug.c @@ -55,7 +55,7 @@ void pcibios_remove_pci_devices(struct pci_bus *bus) pr_debug("PCI: Removing devices on bus %04x:%02x\n", pci_domain_nr(bus), bus->number); - list_for_each_entry_safe(dev, tmp, &bus->devices, bus_list) { + list_for_each_entry_safe_reverse(dev, tmp, &bus->devices, bus_list) { pr_debug(" Removing %s...\n", pci_name(dev)); pci_stop_and_remove_bus_device(dev); } diff --git a/arch/powerpc/kernel/pci_dn.c b/arch/powerpc/kernel/pci_dn.c index b3b4df91b792..38102cb9baa9 100644 --- a/arch/powerpc/kernel/pci_dn.c +++ b/arch/powerpc/kernel/pci_dn.c @@ -139,6 +139,7 @@ struct pci_dn *pci_get_pdn(struct pci_dev *pdev) #ifdef CONFIG_PCI_IOV static struct pci_dn *add_one_dev_pci_data(struct pci_dn *parent, struct pci_dev *pdev, + int vf_index, int busno, int devfn) { struct pci_dn *pdn; @@ -158,6 +159,7 @@ static struct pci_dn *add_one_dev_pci_data(struct pci_dn *parent, pdn->busno = busno; pdn->devfn = devfn; #ifdef CONFIG_PPC_POWERNV + pdn->vf_index = vf_index; pdn->pe_number = IODA_INVALID_PE; #endif INIT_LIST_HEAD(&pdn->child_list); @@ -179,6 +181,7 @@ struct pci_dn *add_dev_pci_data(struct pci_dev *pdev) { #ifdef CONFIG_PCI_IOV struct pci_dn *parent, *pdn; + struct eeh_dev *edev; int i; /* Only support IOV for now */ @@ -196,7 +199,7 @@ struct pci_dn *add_dev_pci_data(struct pci_dev *pdev) return NULL; for (i = 0; i < pci_sriov_get_totalvfs(pdev); i++) { - pdn = add_one_dev_pci_data(parent, NULL, + pdn = add_one_dev_pci_data(parent, NULL, i, pci_iov_virtfn_bus(pdev, i), pci_iov_virtfn_devfn(pdev, i)); if (!pdn) { @@ -204,6 +207,12 @@ struct pci_dn *add_dev_pci_data(struct pci_dev *pdev) __func__, i); return NULL; } + + /* Create the EEH device for the VF */ + eeh_dev_init(pdn, pci_bus_to_host(pdev->bus)); + edev = pdn_to_eeh_dev(pdn); + BUG_ON(!edev); + edev->physfn = pdev; } #endif /* CONFIG_PCI_IOV */ @@ -215,6 +224,7 @@ void remove_dev_pci_data(struct pci_dev *pdev) #ifdef CONFIG_PCI_IOV struct pci_dn *parent; struct pci_dn *pdn, *tmp; + struct eeh_dev *edev; int i; /* @@ -256,6 +266,13 @@ void remove_dev_pci_data(struct pci_dev *pdev) pdn->devfn != pci_iov_virtfn_devfn(pdev, i)) continue; + /* Release EEH device for the VF */ + edev = pdn_to_eeh_dev(pdn); + if (edev) { + pdn->edev = NULL; + kfree(edev); + } + if (!list_empty(&pdn->list)) list_del(&pdn->list); diff --git a/arch/powerpc/lib/Makefile b/arch/powerpc/lib/Makefile index e46b06822bea..ba21be15310f 100644 --- a/arch/powerpc/lib/Makefile +++ b/arch/powerpc/lib/Makefile @@ -6,8 +6,8 @@ subdir-ccflags-$(CONFIG_PPC_WERROR) := -Werror ccflags-$(CONFIG_PPC64) := $(NO_MINIMAL_TOC) -CFLAGS_REMOVE_code-patching.o = -pg -CFLAGS_REMOVE_feature-fixups.o = -pg +CFLAGS_REMOVE_code-patching.o = $(CC_FLAGS_FTRACE) +CFLAGS_REMOVE_feature-fixups.o = $(CC_FLAGS_FTRACE) obj-y += string.o alloc.o crtsavres.o ppc_ksyms.o code-patching.o \ feature-fixups.o diff --git a/arch/powerpc/perf/core-book3s.c b/arch/powerpc/perf/core-book3s.c index d1e65ce545b3..97a1d40d8696 100644 --- a/arch/powerpc/perf/core-book3s.c +++ b/arch/powerpc/perf/core-book3s.c @@ -651,7 +651,7 @@ static void pmao_restore_workaround(bool ebb) /* * We are already soft-disabled in power_pmu_enable(). We need to hard - * enable to actually prevent the PMU exception from firing. + * disable to actually prevent the PMU exception from firing. */ hard_irq_disable(); diff --git a/arch/powerpc/perf/hv-24x7.c b/arch/powerpc/perf/hv-24x7.c index 9f9dfda9ed2c..59012e750abe 100644 --- a/arch/powerpc/perf/hv-24x7.c +++ b/arch/powerpc/perf/hv-24x7.c @@ -27,20 +27,6 @@ #include "hv-24x7-catalog.h" #include "hv-common.h" -static const char *event_domain_suffix(unsigned domain) -{ - switch (domain) { -#define DOMAIN(n, v, x, c) \ - case HV_PERF_DOMAIN_##n: \ - return "__" #n; -#include "hv-24x7-domains.h" -#undef DOMAIN - default: - WARN(1, "unknown domain %d\n", domain); - return "__UNKNOWN_DOMAIN_SUFFIX"; - } -} - static bool domain_is_valid(unsigned domain) { switch (domain) { @@ -68,6 +54,24 @@ static bool is_physical_domain(unsigned domain) } } +static const char *domain_name(unsigned domain) +{ + if (!domain_is_valid(domain)) + return NULL; + + switch (domain) { + case HV_PERF_DOMAIN_PHYS_CHIP: return "Physical Chip"; + case HV_PERF_DOMAIN_PHYS_CORE: return "Physical Core"; + case HV_PERF_DOMAIN_VCPU_HOME_CORE: return "VCPU Home Core"; + case HV_PERF_DOMAIN_VCPU_HOME_CHIP: return "VCPU Home Chip"; + case HV_PERF_DOMAIN_VCPU_HOME_NODE: return "VCPU Home Node"; + case HV_PERF_DOMAIN_VCPU_REMOTE_NODE: return "VCPU Remote Node"; + } + + WARN_ON_ONCE(domain); + return NULL; +} + static bool catalog_entry_domain_is_valid(unsigned domain) { return is_physical_domain(domain); @@ -101,6 +105,7 @@ static bool catalog_entry_domain_is_valid(unsigned domain) EVENT_DEFINE_RANGE_FORMAT(domain, config, 0, 3); /* u16 */ EVENT_DEFINE_RANGE_FORMAT(core, config, 16, 31); +EVENT_DEFINE_RANGE_FORMAT(chip, config, 16, 31); EVENT_DEFINE_RANGE_FORMAT(vcpu, config, 16, 31); /* u32, see "data_offset" */ EVENT_DEFINE_RANGE_FORMAT(offset, config, 32, 63); @@ -115,6 +120,7 @@ static struct attribute *format_attrs[] = { &format_attr_domain.attr, &format_attr_offset.attr, &format_attr_core.attr, + &format_attr_chip.attr, &format_attr_vcpu.attr, &format_attr_lpar.attr, NULL, @@ -274,32 +280,70 @@ static unsigned long h_get_24x7_catalog_page(char page[], version, index); } -static unsigned core_domains[] = { - HV_PERF_DOMAIN_PHYS_CORE, - HV_PERF_DOMAIN_VCPU_HOME_CORE, - HV_PERF_DOMAIN_VCPU_HOME_CHIP, - HV_PERF_DOMAIN_VCPU_HOME_NODE, - HV_PERF_DOMAIN_VCPU_REMOTE_NODE, -}; -/* chip event data always yeilds a single event, core yeilds multiple */ -#define MAX_EVENTS_PER_EVENT_DATA ARRAY_SIZE(core_domains) - +/* + * Each event we find in the catalog, will have a sysfs entry. Format the + * data for this sysfs entry based on the event's domain. + * + * Events belonging to the Chip domain can only be monitored in that domain. + * i.e the domain for these events is a fixed/knwon value. + * + * Events belonging to the Core domain can be monitored either in the physical + * core or in one of the virtual CPU domains. So the domain value for these + * events must be specified by the user (i.e is a required parameter). Format + * the Core events with 'domain=?' so the perf-tool can error check required + * parameters. + * + * NOTE: For the Core domain events, rather than making domain a required + * parameter we could default it to PHYS_CORE and allowe users to + * override the domain to one of the VCPU domains. + * + * However, this can make the interface a little inconsistent. + * + * If we set domain=2 (PHYS_CHIP) and allow user to override this field + * the user may be tempted to also modify the "offset=x" field in which + * can lead to confusing usage. Consider the HPM_PCYC (offset=0x18) and + * HPM_INST (offset=0x20) events. With: + * + * perf stat -e hv_24x7/HPM_PCYC,offset=0x20/ + * + * we end up monitoring HPM_INST, while the command line has HPM_PCYC. + * + * By not assigning a default value to the domain for the Core events, + * we can have simple guidelines: + * + * - Specifying values for parameters with "=?" is required. + * + * - Specifying (i.e overriding) values for other parameters + * is undefined. + */ static char *event_fmt(struct hv_24x7_event_data *event, unsigned domain) { const char *sindex; const char *lpar; + const char *domain_str; + char buf[8]; - if (is_physical_domain(domain)) { + switch (domain) { + case HV_PERF_DOMAIN_PHYS_CHIP: + snprintf(buf, sizeof(buf), "%d", domain); + domain_str = buf; + lpar = "0x0"; + sindex = "chip"; + break; + case HV_PERF_DOMAIN_PHYS_CORE: + domain_str = "?"; lpar = "0x0"; sindex = "core"; - } else { + break; + default: + domain_str = "?"; lpar = "?"; sindex = "vcpu"; } return kasprintf(GFP_KERNEL, - "domain=0x%x,offset=0x%x,%s=?,lpar=%s", - domain, + "domain=%s,offset=0x%x,%s=?,lpar=%s", + domain_str, be16_to_cpu(event->event_counter_offs) + be16_to_cpu(event->event_group_record_offs), sindex, @@ -339,6 +383,15 @@ static struct attribute *device_str_attr_create_(char *name, char *str) return &attr->attr.attr; } +/* + * Allocate and initialize strings representing event attributes. + * + * NOTE: The strings allocated here are never destroyed and continue to + * exist till shutdown. This is to allow us to create as many events + * from the catalog as possible, even if we encounter errors with some. + * In case of changes to error paths in future, these may need to be + * freed by the caller. + */ static struct attribute *device_str_attr_create(char *name, int name_max, int name_nonce, char *str, size_t str_max) @@ -370,16 +423,6 @@ out_s: return NULL; } -static void device_str_attr_destroy(struct attribute *attr) -{ - struct dev_ext_attribute *d; - - d = container_of(attr, struct dev_ext_attribute, attr.attr); - kfree(d->var); - kfree(d->attr.attr.name); - kfree(d); -} - static struct attribute *event_to_attr(unsigned ix, struct hv_24x7_event_data *event, unsigned domain, @@ -387,7 +430,6 @@ static struct attribute *event_to_attr(unsigned ix, { int event_name_len; char *ev_name, *a_ev_name, *val; - const char *ev_suffix; struct attribute *attr; if (!domain_is_valid(domain)) { @@ -400,14 +442,13 @@ static struct attribute *event_to_attr(unsigned ix, if (!val) return NULL; - ev_suffix = event_domain_suffix(domain); ev_name = event_name(event, &event_name_len); if (!nonce) - a_ev_name = kasprintf(GFP_KERNEL, "%.*s%s", - (int)event_name_len, ev_name, ev_suffix); + a_ev_name = kasprintf(GFP_KERNEL, "%.*s", + (int)event_name_len, ev_name); else - a_ev_name = kasprintf(GFP_KERNEL, "%.*s%s__%d", - (int)event_name_len, ev_name, ev_suffix, nonce); + a_ev_name = kasprintf(GFP_KERNEL, "%.*s__%d", + (int)event_name_len, ev_name, nonce); if (!a_ev_name) goto out_val; @@ -452,45 +493,14 @@ event_to_long_desc_attr(struct hv_24x7_event_data *event, int nonce) return device_str_attr_create(name, nl, nonce, desc, dl); } -static ssize_t event_data_to_attrs(unsigned ix, struct attribute **attrs, +static int event_data_to_attrs(unsigned ix, struct attribute **attrs, struct hv_24x7_event_data *event, int nonce) { - unsigned i; - - switch (event->domain) { - case HV_PERF_DOMAIN_PHYS_CHIP: - *attrs = event_to_attr(ix, event, event->domain, nonce); - return 1; - case HV_PERF_DOMAIN_PHYS_CORE: - for (i = 0; i < ARRAY_SIZE(core_domains); i++) { - attrs[i] = event_to_attr(ix, event, core_domains[i], - nonce); - if (!attrs[i]) { - pr_warn("catalog event %u: individual attr %u " - "creation failure\n", ix, i); - for (; i; i--) - device_str_attr_destroy(attrs[i - 1]); - return -1; - } - } - return i; - default: - pr_warn("catalog event %u: domain %u is not allowed in the " - "catalog\n", ix, event->domain); + *attrs = event_to_attr(ix, event, event->domain, nonce); + if (!*attrs) return -1; - } -} -static size_t event_to_attr_ct(struct hv_24x7_event_data *event) -{ - switch (event->domain) { - case HV_PERF_DOMAIN_PHYS_CHIP: - return 1; - case HV_PERF_DOMAIN_PHYS_CORE: - return ARRAY_SIZE(core_domains); - default: - return 0; - } + return 0; } static unsigned long vmalloc_to_phys(void *v) @@ -726,9 +736,8 @@ static int create_events_from_catalog(struct attribute ***events_, goto e_free; } - if (SIZE_MAX / MAX_EVENTS_PER_EVENT_DATA - 1 < event_entry_count) { - pr_err("event_entry_count %zu is invalid\n", - event_entry_count); + if (SIZE_MAX - 1 < event_entry_count) { + pr_err("event_entry_count %zu is invalid\n", event_entry_count); ret = -EIO; goto e_free; } @@ -801,7 +810,7 @@ static int create_events_from_catalog(struct attribute ***events_, continue; } - attr_max += event_to_attr_ct(event); + attr_max++; } event_idx_last = event_idx; @@ -851,12 +860,12 @@ static int create_events_from_catalog(struct attribute ***events_, nonce = event_uniq_add(&ev_uniq, name, nl, event->domain); ct = event_data_to_attrs(event_idx, events + event_attr_ct, event, nonce); - if (ct <= 0) { + if (ct < 0) { pr_warn("event %zu (%.*s) creation failure, skipping\n", event_idx, nl, name); junk_events++; } else { - event_attr_ct += ct; + event_attr_ct++; event_descs[desc_ct] = event_to_desc_attr(event, nonce); if (event_descs[desc_ct]) desc_ct++; @@ -961,6 +970,27 @@ e_free: return ret; } +static ssize_t domains_show(struct device *dev, struct device_attribute *attr, + char *page) +{ + int d, n, count = 0; + const char *str; + + for (d = 0; d < HV_PERF_DOMAIN_MAX; d++) { + str = domain_name(d); + if (!str) + continue; + + n = sprintf(page, "%d: %s\n", d, str); + if (n < 0) + break; + + count += n; + page += n; + } + return count; +} + #define PAGE_0_ATTR(_name, _fmt, _expr) \ static ssize_t _name##_show(struct device *dev, \ struct device_attribute *dev_attr, \ @@ -989,6 +1019,7 @@ PAGE_0_ATTR(catalog_version, "%lld\n", PAGE_0_ATTR(catalog_len, "%lld\n", (unsigned long long)be32_to_cpu(page_0->length) * 4096); static BIN_ATTR_RO(catalog, 0/* real length varies */); +static DEVICE_ATTR_RO(domains); static struct bin_attribute *if_bin_attrs[] = { &bin_attr_catalog, @@ -998,6 +1029,7 @@ static struct bin_attribute *if_bin_attrs[] = { static struct attribute *if_attrs[] = { &dev_attr_catalog_len.attr, &dev_attr_catalog_version.attr, + &dev_attr_domains.attr, NULL, }; @@ -1089,10 +1121,16 @@ static int add_event_to_24x7_request(struct perf_event *event, return -EINVAL; } - if (is_physical_domain(event_get_domain(event))) + switch (event_get_domain(event)) { + case HV_PERF_DOMAIN_PHYS_CHIP: + idx = event_get_chip(event); + break; + case HV_PERF_DOMAIN_PHYS_CORE: idx = event_get_core(event); - else + break; + default: idx = event_get_vcpu(event); + } i = request_buffer->num_requests++; req = &request_buffer->requests[i]; @@ -1208,11 +1246,12 @@ static int h_24x7_event_init(struct perf_event *event) return -EACCES; } - /* see if the event complains */ + /* Get the initial value of the counter for this event */ if (single_24x7_request(event, &ct)) { pr_devel("test hcall failed\n"); return -EIO; } + (void)local64_xchg(&event->hw.prev_count, ct); return 0; } @@ -1275,6 +1314,16 @@ static void h_24x7_event_read(struct perf_event *event) h24x7hw = &get_cpu_var(hv_24x7_hw); h24x7hw->events[i] = event; put_cpu_var(h24x7hw); + /* + * Clear the event count so we can compute the _change_ + * in the 24x7 raw counter value at the end of the txn. + * + * Note that we could alternatively read the 24x7 value + * now and save its value in event->hw.prev_count. But + * that would require issuing a hcall, which would then + * defeat the purpose of using the txn interface. + */ + local64_set(&event->count, 0); } put_cpu_var(hv_24x7_reqb); diff --git a/arch/powerpc/perf/hv-24x7.h b/arch/powerpc/perf/hv-24x7.h index c57d67dc9f3f..791455e7f5cf 100644 --- a/arch/powerpc/perf/hv-24x7.h +++ b/arch/powerpc/perf/hv-24x7.h @@ -7,6 +7,7 @@ enum hv_perf_domains { #define DOMAIN(n, v, x, c) HV_PERF_DOMAIN_##n = v, #include "hv-24x7-domains.h" #undef DOMAIN + HV_PERF_DOMAIN_MAX, }; struct hv_24x7_request { diff --git a/arch/powerpc/perf/power7-pmu.c b/arch/powerpc/perf/power7-pmu.c index 5b62f2389290..a383c23a9070 100644 --- a/arch/powerpc/perf/power7-pmu.c +++ b/arch/powerpc/perf/power7-pmu.c @@ -54,7 +54,7 @@ * Power7 event codes. */ #define EVENT(_name, _code) \ - PME_##_name = _code, + _name = _code, enum { #include "power7-events-list.h" @@ -318,14 +318,14 @@ static void power7_disable_pmc(unsigned int pmc, unsigned long mmcr[]) } static int power7_generic_events[] = { - [PERF_COUNT_HW_CPU_CYCLES] = PME_PM_CYC, - [PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = PME_PM_GCT_NOSLOT_CYC, - [PERF_COUNT_HW_STALLED_CYCLES_BACKEND] = PME_PM_CMPLU_STALL, - [PERF_COUNT_HW_INSTRUCTIONS] = PME_PM_INST_CMPL, - [PERF_COUNT_HW_CACHE_REFERENCES] = PME_PM_LD_REF_L1, - [PERF_COUNT_HW_CACHE_MISSES] = PME_PM_LD_MISS_L1, - [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = PME_PM_BRU_FIN, - [PERF_COUNT_HW_BRANCH_MISSES] = PME_PM_BR_MPRED, + [PERF_COUNT_HW_CPU_CYCLES] = PM_CYC, + [PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = PM_GCT_NOSLOT_CYC, + [PERF_COUNT_HW_STALLED_CYCLES_BACKEND] = PM_CMPLU_STALL, + [PERF_COUNT_HW_INSTRUCTIONS] = PM_INST_CMPL, + [PERF_COUNT_HW_CACHE_REFERENCES] = PM_LD_REF_L1, + [PERF_COUNT_HW_CACHE_MISSES] = PM_LD_MISS_L1, + [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = PM_BRU_FIN, + [PERF_COUNT_HW_BRANCH_MISSES] = PM_BR_MPRED, }; #define C(x) PERF_COUNT_HW_CACHE_##x diff --git a/arch/powerpc/perf/power8-events-list.h b/arch/powerpc/perf/power8-events-list.h new file mode 100644 index 000000000000..741b77edd03e --- /dev/null +++ b/arch/powerpc/perf/power8-events-list.h @@ -0,0 +1,51 @@ +/* + * Performance counter support for POWER8 processors. + * + * Copyright 2014 Sukadev Bhattiprolu, IBM Corporation. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +/* + * Power8 event codes. + */ +EVENT(PM_CYC, 0x0001e) +EVENT(PM_GCT_NOSLOT_CYC, 0x100f8) +EVENT(PM_CMPLU_STALL, 0x4000a) +EVENT(PM_INST_CMPL, 0x00002) +EVENT(PM_BRU_FIN, 0x10068) +EVENT(PM_BR_MPRED_CMPL, 0x400f6) + +/* All L1 D cache load references counted at finish, gated by reject */ +EVENT(PM_LD_REF_L1, 0x100ee) +/* Load Missed L1 */ +EVENT(PM_LD_MISS_L1, 0x3e054) +/* Store Missed L1 */ +EVENT(PM_ST_MISS_L1, 0x300f0) +/* L1 cache data prefetches */ +EVENT(PM_L1_PREF, 0x0d8b8) +/* Instruction fetches from L1 */ +EVENT(PM_INST_FROM_L1, 0x04080) +/* Demand iCache Miss */ +EVENT(PM_L1_ICACHE_MISS, 0x200fd) +/* Instruction Demand sectors wriittent into IL1 */ +EVENT(PM_L1_DEMAND_WRITE, 0x0408c) +/* Instruction prefetch written into IL1 */ +EVENT(PM_IC_PREF_WRITE, 0x0408e) +/* The data cache was reloaded from local core's L3 due to a demand load */ +EVENT(PM_DATA_FROM_L3, 0x4c042) +/* Demand LD - L3 Miss (not L2 hit and not L3 hit) */ +EVENT(PM_DATA_FROM_L3MISS, 0x300fe) +/* All successful D-side store dispatches for this thread */ +EVENT(PM_L2_ST, 0x17080) +/* All successful D-side store dispatches for this thread that were L2 Miss */ +EVENT(PM_L2_ST_MISS, 0x17082) +/* Total HW L3 prefetches(Load+store) */ +EVENT(PM_L3_PREF_ALL, 0x4e052) +/* Data PTEG reload */ +EVENT(PM_DTLB_MISS, 0x300fc) +/* ITLB Reloaded */ +EVENT(PM_ITLB_MISS, 0x400fc) diff --git a/arch/powerpc/perf/power8-pmu.c b/arch/powerpc/perf/power8-pmu.c index be9b7aec216f..690d9186a855 100644 --- a/arch/powerpc/perf/power8-pmu.c +++ b/arch/powerpc/perf/power8-pmu.c @@ -17,48 +17,16 @@ #include <asm/firmware.h> #include <asm/cputable.h> - /* * Some power8 event codes. */ -#define PM_CYC 0x0001e -#define PM_GCT_NOSLOT_CYC 0x100f8 -#define PM_CMPLU_STALL 0x4000a -#define PM_INST_CMPL 0x00002 -#define PM_BRU_FIN 0x10068 -#define PM_BR_MPRED_CMPL 0x400f6 - -/* All L1 D cache load references counted at finish, gated by reject */ -#define PM_LD_REF_L1 0x100ee -/* Load Missed L1 */ -#define PM_LD_MISS_L1 0x3e054 -/* Store Missed L1 */ -#define PM_ST_MISS_L1 0x300f0 -/* L1 cache data prefetches */ -#define PM_L1_PREF 0x0d8b8 -/* Instruction fetches from L1 */ -#define PM_INST_FROM_L1 0x04080 -/* Demand iCache Miss */ -#define PM_L1_ICACHE_MISS 0x200fd -/* Instruction Demand sectors wriittent into IL1 */ -#define PM_L1_DEMAND_WRITE 0x0408c -/* Instruction prefetch written into IL1 */ -#define PM_IC_PREF_WRITE 0x0408e -/* The data cache was reloaded from local core's L3 due to a demand load */ -#define PM_DATA_FROM_L3 0x4c042 -/* Demand LD - L3 Miss (not L2 hit and not L3 hit) */ -#define PM_DATA_FROM_L3MISS 0x300fe -/* All successful D-side store dispatches for this thread */ -#define PM_L2_ST 0x17080 -/* All successful D-side store dispatches for this thread that were L2 Miss */ -#define PM_L2_ST_MISS 0x17082 -/* Total HW L3 prefetches(Load+store) */ -#define PM_L3_PREF_ALL 0x4e052 -/* Data PTEG reload */ -#define PM_DTLB_MISS 0x300fc -/* ITLB Reloaded */ -#define PM_ITLB_MISS 0x400fc +#define EVENT(_name, _code) _name = _code, + +enum { +#include "power8-events-list.h" +}; +#undef EVENT /* * Raw event encoding for POWER8: @@ -604,6 +572,71 @@ static void power8_disable_pmc(unsigned int pmc, unsigned long mmcr[]) mmcr[1] &= ~(0xffUL << MMCR1_PMCSEL_SHIFT(pmc + 1)); } +GENERIC_EVENT_ATTR(cpu-cycles, PM_CYC); +GENERIC_EVENT_ATTR(stalled-cycles-frontend, PM_GCT_NOSLOT_CYC); +GENERIC_EVENT_ATTR(stalled-cycles-backend, PM_CMPLU_STALL); +GENERIC_EVENT_ATTR(instructions, PM_INST_CMPL); +GENERIC_EVENT_ATTR(branch-instructions, PM_BRU_FIN); +GENERIC_EVENT_ATTR(branch-misses, PM_BR_MPRED_CMPL); +GENERIC_EVENT_ATTR(cache-references, PM_LD_REF_L1); +GENERIC_EVENT_ATTR(cache-misses, PM_LD_MISS_L1); + +CACHE_EVENT_ATTR(L1-dcache-load-misses, PM_LD_MISS_L1); +CACHE_EVENT_ATTR(L1-dcache-loads, PM_LD_REF_L1); + +CACHE_EVENT_ATTR(L1-dcache-prefetches, PM_L1_PREF); +CACHE_EVENT_ATTR(L1-dcache-store-misses, PM_ST_MISS_L1); +CACHE_EVENT_ATTR(L1-icache-load-misses, PM_L1_ICACHE_MISS); +CACHE_EVENT_ATTR(L1-icache-loads, PM_INST_FROM_L1); +CACHE_EVENT_ATTR(L1-icache-prefetches, PM_IC_PREF_WRITE); + +CACHE_EVENT_ATTR(LLC-load-misses, PM_DATA_FROM_L3MISS); +CACHE_EVENT_ATTR(LLC-loads, PM_DATA_FROM_L3); +CACHE_EVENT_ATTR(LLC-prefetches, PM_L3_PREF_ALL); +CACHE_EVENT_ATTR(LLC-store-misses, PM_L2_ST_MISS); +CACHE_EVENT_ATTR(LLC-stores, PM_L2_ST); + +CACHE_EVENT_ATTR(branch-load-misses, PM_BR_MPRED_CMPL); +CACHE_EVENT_ATTR(branch-loads, PM_BRU_FIN); +CACHE_EVENT_ATTR(dTLB-load-misses, PM_DTLB_MISS); +CACHE_EVENT_ATTR(iTLB-load-misses, PM_ITLB_MISS); + +static struct attribute *power8_events_attr[] = { + GENERIC_EVENT_PTR(PM_CYC), + GENERIC_EVENT_PTR(PM_GCT_NOSLOT_CYC), + GENERIC_EVENT_PTR(PM_CMPLU_STALL), + GENERIC_EVENT_PTR(PM_INST_CMPL), + GENERIC_EVENT_PTR(PM_BRU_FIN), + GENERIC_EVENT_PTR(PM_BR_MPRED_CMPL), + GENERIC_EVENT_PTR(PM_LD_REF_L1), + GENERIC_EVENT_PTR(PM_LD_MISS_L1), + + CACHE_EVENT_PTR(PM_LD_MISS_L1), + CACHE_EVENT_PTR(PM_LD_REF_L1), + CACHE_EVENT_PTR(PM_L1_PREF), + CACHE_EVENT_PTR(PM_ST_MISS_L1), + CACHE_EVENT_PTR(PM_L1_ICACHE_MISS), + CACHE_EVENT_PTR(PM_INST_FROM_L1), + CACHE_EVENT_PTR(PM_IC_PREF_WRITE), + CACHE_EVENT_PTR(PM_DATA_FROM_L3MISS), + CACHE_EVENT_PTR(PM_DATA_FROM_L3), + CACHE_EVENT_PTR(PM_L3_PREF_ALL), + CACHE_EVENT_PTR(PM_L2_ST_MISS), + CACHE_EVENT_PTR(PM_L2_ST), + + CACHE_EVENT_PTR(PM_BR_MPRED_CMPL), + CACHE_EVENT_PTR(PM_BRU_FIN), + + CACHE_EVENT_PTR(PM_DTLB_MISS), + CACHE_EVENT_PTR(PM_ITLB_MISS), + NULL +}; + +static struct attribute_group power8_pmu_events_group = { + .name = "events", + .attrs = power8_events_attr, +}; + PMU_FORMAT_ATTR(event, "config:0-49"); PMU_FORMAT_ATTR(pmcxsel, "config:0-7"); PMU_FORMAT_ATTR(mark, "config:8"); @@ -640,6 +673,7 @@ struct attribute_group power8_pmu_format_group = { static const struct attribute_group *power8_pmu_attr_groups[] = { &power8_pmu_format_group, + &power8_pmu_events_group, NULL, }; diff --git a/arch/powerpc/platforms/powermac/Makefile b/arch/powerpc/platforms/powermac/Makefile index 52c6ce1cc985..1eb7b45e017d 100644 --- a/arch/powerpc/platforms/powermac/Makefile +++ b/arch/powerpc/platforms/powermac/Makefile @@ -2,7 +2,7 @@ CFLAGS_bootx_init.o += -fPIC ifdef CONFIG_FUNCTION_TRACER # Do not trace early boot code -CFLAGS_REMOVE_bootx_init.o = -pg -mno-sched-epilog +CFLAGS_REMOVE_bootx_init.o = -mno-sched-epilog $(CC_FLAGS_FTRACE) endif obj-y += pic.o setup.o time.o feature.o pci.o \ diff --git a/arch/powerpc/platforms/powernv/eeh-powernv.c b/arch/powerpc/platforms/powernv/eeh-powernv.c index 811917219bf1..950b3e539057 100644 --- a/arch/powerpc/platforms/powernv/eeh-powernv.c +++ b/arch/powerpc/platforms/powernv/eeh-powernv.c @@ -371,6 +371,7 @@ static void *pnv_eeh_probe(struct pci_dn *pdn, void *data) edev->mode &= 0xFFFFFF00; edev->pcix_cap = pnv_eeh_find_cap(pdn, PCI_CAP_ID_PCIX); edev->pcie_cap = pnv_eeh_find_cap(pdn, PCI_CAP_ID_EXP); + edev->af_cap = pnv_eeh_find_cap(pdn, PCI_CAP_ID_AF); edev->aer_cap = pnv_eeh_find_ecap(pdn, PCI_EXT_CAP_ID_ERR); if ((edev->class_code >> 8) == PCI_CLASS_BRIDGE_PCI) { edev->mode |= EEH_DEV_BRIDGE; @@ -879,6 +880,120 @@ void pnv_pci_reset_secondary_bus(struct pci_dev *dev) } } +static void pnv_eeh_wait_for_pending(struct pci_dn *pdn, const char *type, + int pos, u16 mask) +{ + struct eeh_dev *edev = pdn_to_eeh_dev(pdn); + int i, status = 0; + + /* Wait for Transaction Pending bit to be cleared */ + for (i = 0; i < 4; i++) { + eeh_ops->read_config(pdn, pos, 2, &status); + if (!(status & mask)) + return; + + msleep((1 << i) * 100); + } + + pr_warn("%s: Pending transaction while issuing %sFLR to %04x:%02x:%02x.%01x\n", + __func__, type, + edev->phb->global_number, pdn->busno, + PCI_SLOT(pdn->devfn), PCI_FUNC(pdn->devfn)); +} + +static int pnv_eeh_do_flr(struct pci_dn *pdn, int option) +{ + struct eeh_dev *edev = pdn_to_eeh_dev(pdn); + u32 reg = 0; + + if (WARN_ON(!edev->pcie_cap)) + return -ENOTTY; + + eeh_ops->read_config(pdn, edev->pcie_cap + PCI_EXP_DEVCAP, 4, ®); + if (!(reg & PCI_EXP_DEVCAP_FLR)) + return -ENOTTY; + + switch (option) { + case EEH_RESET_HOT: + case EEH_RESET_FUNDAMENTAL: + pnv_eeh_wait_for_pending(pdn, "", + edev->pcie_cap + PCI_EXP_DEVSTA, + PCI_EXP_DEVSTA_TRPND); + eeh_ops->read_config(pdn, edev->pcie_cap + PCI_EXP_DEVCTL, + 4, ®); + reg |= PCI_EXP_DEVCTL_BCR_FLR; + eeh_ops->write_config(pdn, edev->pcie_cap + PCI_EXP_DEVCTL, + 4, reg); + msleep(EEH_PE_RST_HOLD_TIME); + break; + case EEH_RESET_DEACTIVATE: + eeh_ops->read_config(pdn, edev->pcie_cap + PCI_EXP_DEVCTL, + 4, ®); + reg &= ~PCI_EXP_DEVCTL_BCR_FLR; + eeh_ops->write_config(pdn, edev->pcie_cap + PCI_EXP_DEVCTL, + 4, reg); + msleep(EEH_PE_RST_SETTLE_TIME); + break; + } + + return 0; +} + +static int pnv_eeh_do_af_flr(struct pci_dn *pdn, int option) +{ + struct eeh_dev *edev = pdn_to_eeh_dev(pdn); + u32 cap = 0; + + if (WARN_ON(!edev->af_cap)) + return -ENOTTY; + + eeh_ops->read_config(pdn, edev->af_cap + PCI_AF_CAP, 1, &cap); + if (!(cap & PCI_AF_CAP_TP) || !(cap & PCI_AF_CAP_FLR)) + return -ENOTTY; + + switch (option) { + case EEH_RESET_HOT: + case EEH_RESET_FUNDAMENTAL: + /* + * Wait for Transaction Pending bit to clear. A word-aligned + * test is used, so we use the conrol offset rather than status + * and shift the test bit to match. + */ + pnv_eeh_wait_for_pending(pdn, "AF", + edev->af_cap + PCI_AF_CTRL, + PCI_AF_STATUS_TP << 8); + eeh_ops->write_config(pdn, edev->af_cap + PCI_AF_CTRL, + 1, PCI_AF_CTRL_FLR); + msleep(EEH_PE_RST_HOLD_TIME); + break; + case EEH_RESET_DEACTIVATE: + eeh_ops->write_config(pdn, edev->af_cap + PCI_AF_CTRL, 1, 0); + msleep(EEH_PE_RST_SETTLE_TIME); + break; + } + + return 0; +} + +static int pnv_eeh_reset_vf_pe(struct eeh_pe *pe, int option) +{ + struct eeh_dev *edev; + struct pci_dn *pdn; + int ret; + + /* The VF PE should have only one child device */ + edev = list_first_entry_or_null(&pe->edevs, struct eeh_dev, list); + pdn = eeh_dev_to_pdn(edev); + if (!pdn) + return -ENXIO; + + ret = pnv_eeh_do_flr(pdn, option); + if (!ret) + return ret; + + return pnv_eeh_do_af_flr(pdn, option); +} + /** * pnv_eeh_reset - Reset the specified PE * @pe: EEH PE @@ -940,7 +1055,9 @@ static int pnv_eeh_reset(struct eeh_pe *pe, int option) } bus = eeh_pe_bus_get(pe); - if (pci_is_root_bus(bus) || + if (pe->type & EEH_PE_VF) + ret = pnv_eeh_reset_vf_pe(pe, option); + else if (pci_is_root_bus(bus) || pci_is_root_bus(bus->parent)) ret = pnv_eeh_root_reset(hose, option); else @@ -1079,6 +1196,14 @@ static inline bool pnv_eeh_cfg_blocked(struct pci_dn *pdn) if (!edev || !edev->pe) return false; + /* + * We will issue FLR or AF FLR to all VFs, which are contained + * in VF PE. It relies on the EEH PCI config accessors. So we + * can't block them during the window. + */ + if (edev->physfn && (edev->pe->state & EEH_PE_RESET)) + return false; + if (edev->pe->state & EEH_PE_CFG_BLOCKED) return true; @@ -1463,6 +1588,65 @@ static int pnv_eeh_next_error(struct eeh_pe **pe) return ret; } +static int pnv_eeh_restore_vf_config(struct pci_dn *pdn) +{ + struct eeh_dev *edev = pdn_to_eeh_dev(pdn); + u32 devctl, cmd, cap2, aer_capctl; + int old_mps; + + if (edev->pcie_cap) { + /* Restore MPS */ + old_mps = (ffs(pdn->mps) - 8) << 5; + eeh_ops->read_config(pdn, edev->pcie_cap + PCI_EXP_DEVCTL, + 2, &devctl); + devctl &= ~PCI_EXP_DEVCTL_PAYLOAD; + devctl |= old_mps; + eeh_ops->write_config(pdn, edev->pcie_cap + PCI_EXP_DEVCTL, + 2, devctl); + + /* Disable Completion Timeout */ + eeh_ops->read_config(pdn, edev->pcie_cap + PCI_EXP_DEVCAP2, + 4, &cap2); + if (cap2 & 0x10) { + eeh_ops->read_config(pdn, + edev->pcie_cap + PCI_EXP_DEVCTL2, + 4, &cap2); + cap2 |= 0x10; + eeh_ops->write_config(pdn, + edev->pcie_cap + PCI_EXP_DEVCTL2, + 4, cap2); + } + } + + /* Enable SERR and parity checking */ + eeh_ops->read_config(pdn, PCI_COMMAND, 2, &cmd); + cmd |= (PCI_COMMAND_PARITY | PCI_COMMAND_SERR); + eeh_ops->write_config(pdn, PCI_COMMAND, 2, cmd); + + /* Enable report various errors */ + if (edev->pcie_cap) { + eeh_ops->read_config(pdn, edev->pcie_cap + PCI_EXP_DEVCTL, + 2, &devctl); + devctl &= ~PCI_EXP_DEVCTL_CERE; + devctl |= (PCI_EXP_DEVCTL_NFERE | + PCI_EXP_DEVCTL_FERE | + PCI_EXP_DEVCTL_URRE); + eeh_ops->write_config(pdn, edev->pcie_cap + PCI_EXP_DEVCTL, + 2, devctl); + } + + /* Enable ECRC generation and check */ + if (edev->pcie_cap && edev->aer_cap) { + eeh_ops->read_config(pdn, edev->aer_cap + PCI_ERR_CAP, + 4, &aer_capctl); + aer_capctl |= (PCI_ERR_CAP_ECRC_GENE | PCI_ERR_CAP_ECRC_CHKE); + eeh_ops->write_config(pdn, edev->aer_cap + PCI_ERR_CAP, + 4, aer_capctl); + } + + return 0; +} + static int pnv_eeh_restore_config(struct pci_dn *pdn) { struct eeh_dev *edev = pdn_to_eeh_dev(pdn); @@ -1472,9 +1656,21 @@ static int pnv_eeh_restore_config(struct pci_dn *pdn) if (!edev) return -EEXIST; - phb = edev->phb->private_data; - ret = opal_pci_reinit(phb->opal_id, - OPAL_REINIT_PCI_DEV, edev->config_addr); + /* + * We have to restore the PCI config space after reset since the + * firmware can't see SRIOV VFs. + * + * FIXME: The MPS, error routing rules, timeout setting are worthy + * to be exported by firmware in extendible way. + */ + if (edev->physfn) { + ret = pnv_eeh_restore_vf_config(pdn); + } else { + phb = edev->phb->private_data; + ret = opal_pci_reinit(phb->opal_id, + OPAL_REINIT_PCI_DEV, edev->config_addr); + } + if (ret) { pr_warn("%s: Can't reinit PCI dev 0x%x (%lld)\n", __func__, edev->config_addr, ret); @@ -1503,6 +1699,40 @@ static struct eeh_ops pnv_eeh_ops = { .restore_config = pnv_eeh_restore_config }; +void pcibios_bus_add_device(struct pci_dev *pdev) +{ + struct pci_dn *pdn = pci_get_pdn(pdev); + + if (!pdev->is_virtfn) + return; + + /* + * The following operations will fail if VF's sysfs files + * aren't created or its resources aren't finalized. + */ + eeh_add_device_early(pdn); + eeh_add_device_late(pdev); + eeh_sysfs_add_device(pdev); +} + +#ifdef CONFIG_PCI_IOV +static void pnv_pci_fixup_vf_mps(struct pci_dev *pdev) +{ + struct pci_dn *pdn = pci_get_pdn(pdev); + int parent_mps; + + if (!pdev->is_virtfn) + return; + + /* Synchronize MPS for VF and PF */ + parent_mps = pcie_get_mps(pdev->physfn); + if ((128 << pdev->pcie_mpss) >= parent_mps) + pcie_set_mps(pdev, parent_mps); + pdn->mps = pcie_get_mps(pdev); +} +DECLARE_PCI_FIXUP_HEADER(PCI_ANY_ID, PCI_ANY_ID, pnv_pci_fixup_vf_mps); +#endif /* CONFIG_PCI_IOV */ + /** * eeh_powernv_init - Register platform dependent EEH operations * diff --git a/arch/powerpc/scripts/gcc-check-mprofile-kernel.sh b/arch/powerpc/scripts/gcc-check-mprofile-kernel.sh new file mode 100755 index 000000000000..c658d8cf760b --- /dev/null +++ b/arch/powerpc/scripts/gcc-check-mprofile-kernel.sh @@ -0,0 +1,23 @@ +#!/bin/bash + +set -e +set -o pipefail + +# To debug, uncomment the following line +# set -x + +# Test whether the compile option -mprofile-kernel exists and generates +# profiling code (ie. a call to _mcount()). +echo "int func() { return 0; }" | \ + $* -S -x c -O2 -p -mprofile-kernel - -o - 2> /dev/null | \ + grep -q "_mcount" + +# Test whether the notrace attribute correctly suppresses calls to _mcount(). + +echo -e "#include <linux/compiler.h>\nnotrace int func() { return 0; }" | \ + $* -S -x c -O2 -p -mprofile-kernel - -o - 2> /dev/null | \ + grep -q "_mcount" && \ + exit 1 + +echo "OK" +exit 0 diff --git a/drivers/misc/cxl/Makefile b/drivers/misc/cxl/Makefile index be2ac5ce349f..8a55c1aa11aa 100644 --- a/drivers/misc/cxl/Makefile +++ b/drivers/misc/cxl/Makefile @@ -4,6 +4,7 @@ ccflags-$(CONFIG_PPC_WERROR) += -Werror cxl-y += main.o file.o irq.o fault.o native.o cxl-y += context.o sysfs.o debugfs.o pci.o trace.o cxl-y += vphb.o api.o +cxl-$(CONFIG_PPC_PSERIES) += flash.o guest.o of.o hcalls.o obj-$(CONFIG_CXL) += cxl.o obj-$(CONFIG_CXL_BASE) += base.o diff --git a/drivers/misc/cxl/api.c b/drivers/misc/cxl/api.c index ea3eeb7011e1..2107c948406d 100644 --- a/drivers/misc/cxl/api.c +++ b/drivers/misc/cxl/api.c @@ -51,8 +51,6 @@ struct cxl_context *cxl_dev_context_init(struct pci_dev *dev) if (rc) goto err_mapping; - cxl_assign_psn_space(ctx); - return ctx; err_mapping: @@ -78,7 +76,6 @@ struct device *cxl_get_phys_dev(struct pci_dev *dev) return afu->adapter->dev.parent; } -EXPORT_SYMBOL_GPL(cxl_get_phys_dev); int cxl_release_context(struct cxl_context *ctx) { @@ -91,28 +88,11 @@ int cxl_release_context(struct cxl_context *ctx) } EXPORT_SYMBOL_GPL(cxl_release_context); -int cxl_allocate_afu_irqs(struct cxl_context *ctx, int num) -{ - if (num == 0) - num = ctx->afu->pp_irqs; - return afu_allocate_irqs(ctx, num); -} -EXPORT_SYMBOL_GPL(cxl_allocate_afu_irqs); - -void cxl_free_afu_irqs(struct cxl_context *ctx) -{ - afu_irq_name_free(ctx); - cxl_release_irq_ranges(&ctx->irqs, ctx->afu->adapter); -} -EXPORT_SYMBOL_GPL(cxl_free_afu_irqs); - static irq_hw_number_t cxl_find_afu_irq(struct cxl_context *ctx, int num) { __u16 range; int r; - WARN_ON(num == 0); - for (r = 0; r < CXL_IRQ_RANGES; r++) { range = ctx->irqs.range[r]; if (num < range) { @@ -123,6 +103,44 @@ static irq_hw_number_t cxl_find_afu_irq(struct cxl_context *ctx, int num) return 0; } +int cxl_allocate_afu_irqs(struct cxl_context *ctx, int num) +{ + int res; + irq_hw_number_t hwirq; + + if (num == 0) + num = ctx->afu->pp_irqs; + res = afu_allocate_irqs(ctx, num); + if (!res && !cpu_has_feature(CPU_FTR_HVMODE)) { + /* In a guest, the PSL interrupt is not multiplexed. It was + * allocated above, and we need to set its handler + */ + hwirq = cxl_find_afu_irq(ctx, 0); + if (hwirq) + cxl_map_irq(ctx->afu->adapter, hwirq, cxl_ops->psl_interrupt, ctx, "psl"); + } + return res; +} +EXPORT_SYMBOL_GPL(cxl_allocate_afu_irqs); + +void cxl_free_afu_irqs(struct cxl_context *ctx) +{ + irq_hw_number_t hwirq; + unsigned int virq; + + if (!cpu_has_feature(CPU_FTR_HVMODE)) { + hwirq = cxl_find_afu_irq(ctx, 0); + if (hwirq) { + virq = irq_find_mapping(NULL, hwirq); + if (virq) + cxl_unmap_irq(virq, ctx); + } + } + afu_irq_name_free(ctx); + cxl_ops->release_irq_ranges(&ctx->irqs, ctx->afu->adapter); +} +EXPORT_SYMBOL_GPL(cxl_free_afu_irqs); + int cxl_map_afu_irq(struct cxl_context *ctx, int num, irq_handler_t handler, void *cookie, char *name) { @@ -178,7 +196,7 @@ int cxl_start_context(struct cxl_context *ctx, u64 wed, cxl_ctx_get(); - if ((rc = cxl_attach_process(ctx, kernel, wed , 0))) { + if ((rc = cxl_ops->attach_process(ctx, kernel, wed, 0))) { put_pid(ctx->pid); cxl_ctx_put(); goto out; @@ -193,7 +211,7 @@ EXPORT_SYMBOL_GPL(cxl_start_context); int cxl_process_element(struct cxl_context *ctx) { - return ctx->pe; + return ctx->external_pe; } EXPORT_SYMBOL_GPL(cxl_process_element); @@ -207,7 +225,6 @@ EXPORT_SYMBOL_GPL(cxl_stop_context); void cxl_set_master(struct cxl_context *ctx) { ctx->master = true; - cxl_assign_psn_space(ctx); } EXPORT_SYMBOL_GPL(cxl_set_master); @@ -325,15 +342,11 @@ EXPORT_SYMBOL_GPL(cxl_start_work); void __iomem *cxl_psa_map(struct cxl_context *ctx) { - struct cxl_afu *afu = ctx->afu; - int rc; - - rc = cxl_afu_check_and_enable(afu); - if (rc) + if (ctx->status != STARTED) return NULL; pr_devel("%s: psn_phys%llx size:%llx\n", - __func__, afu->psn_phys, afu->adapter->ps_size); + __func__, ctx->psn_phys, ctx->psn_size); return ioremap(ctx->psn_phys, ctx->psn_size); } EXPORT_SYMBOL_GPL(cxl_psa_map); @@ -349,11 +362,11 @@ int cxl_afu_reset(struct cxl_context *ctx) struct cxl_afu *afu = ctx->afu; int rc; - rc = __cxl_afu_reset(afu); + rc = cxl_ops->afu_reset(afu); if (rc) return rc; - return cxl_afu_check_and_enable(afu); + return cxl_ops->afu_check_and_enable(afu); } EXPORT_SYMBOL_GPL(cxl_afu_reset); @@ -363,3 +376,11 @@ void cxl_perst_reloads_same_image(struct cxl_afu *afu, afu->adapter->perst_same_image = perst_reloads_same_image; } EXPORT_SYMBOL_GPL(cxl_perst_reloads_same_image); + +ssize_t cxl_read_adapter_vpd(struct pci_dev *dev, void *buf, size_t count) +{ + struct cxl_afu *afu = cxl_pci_to_afu(dev); + + return cxl_ops->read_adapter_vpd(afu->adapter, buf, count); +} +EXPORT_SYMBOL_GPL(cxl_read_adapter_vpd); diff --git a/drivers/misc/cxl/base.c b/drivers/misc/cxl/base.c index a9f0dd3255a2..9b90ec6c07cd 100644 --- a/drivers/misc/cxl/base.c +++ b/drivers/misc/cxl/base.c @@ -11,6 +11,7 @@ #include <linux/rcupdate.h> #include <asm/errno.h> #include <misc/cxl-base.h> +#include <linux/of_platform.h> #include "cxl.h" /* protected by rcu */ @@ -84,3 +85,34 @@ void unregister_cxl_calls(struct cxl_calls *calls) synchronize_rcu(); } EXPORT_SYMBOL_GPL(unregister_cxl_calls); + +int cxl_update_properties(struct device_node *dn, + struct property *new_prop) +{ + return of_update_property(dn, new_prop); +} +EXPORT_SYMBOL_GPL(cxl_update_properties); + +static int __init cxl_base_init(void) +{ + struct device_node *np = NULL; + struct platform_device *dev; + int count = 0; + + /* + * Scan for compatible devices in guest only + */ + if (cpu_has_feature(CPU_FTR_HVMODE)) + return 0; + + while ((np = of_find_compatible_node(np, NULL, + "ibm,coherent-platform-facility"))) { + dev = of_platform_device_create(np, NULL, NULL); + if (dev) + count++; + } + pr_devel("Found %d cxl device(s)\n", count); + return 0; +} + +module_init(cxl_base_init); diff --git a/drivers/misc/cxl/context.c b/drivers/misc/cxl/context.c index 262b88eac414..10370f280500 100644 --- a/drivers/misc/cxl/context.c +++ b/drivers/misc/cxl/context.c @@ -95,7 +95,12 @@ int cxl_context_init(struct cxl_context *ctx, struct cxl_afu *afu, bool master, return i; ctx->pe = i; - ctx->elem = &ctx->afu->spa[i]; + if (cpu_has_feature(CPU_FTR_HVMODE)) { + ctx->elem = &ctx->afu->native->spa[i]; + ctx->external_pe = ctx->pe; + } else { + ctx->external_pe = -1; /* assigned when attaching */ + } ctx->pe_inserted = false; /* @@ -214,8 +219,8 @@ int __detach_context(struct cxl_context *ctx) /* Only warn if we detached while the link was OK. * If detach fails when hw is down, we don't care. */ - WARN_ON(cxl_detach_process(ctx) && - cxl_adapter_link_ok(ctx->afu->adapter)); + WARN_ON(cxl_ops->detach_process(ctx) && + cxl_ops->link_ok(ctx->afu->adapter, ctx->afu)); flush_work(&ctx->fault_work); /* Only needed for dedicated process */ /* release the reference to the group leader and mm handling pid */ diff --git a/drivers/misc/cxl/cxl.h b/drivers/misc/cxl/cxl.h index a521bc72cec2..38e21cf7806e 100644 --- a/drivers/misc/cxl/cxl.h +++ b/drivers/misc/cxl/cxl.h @@ -324,6 +324,10 @@ static const cxl_p2n_reg_t CXL_PSL_WED_An = {0x0A0}; #define CXL_MODE_TIME_SLICED 0x4 #define CXL_SUPPORTED_MODES (CXL_MODE_DEDICATED | CXL_MODE_DIRECTED) +#define CXL_DEV_MINORS 13 /* 1 control + 4 AFUs * 3 (dedicated/master/shared) */ +#define CXL_CARD_MINOR(adapter) (adapter->adapter_num * CXL_DEV_MINORS) +#define CXL_DEVT_ADAPTER(dev) (MINOR(dev) / CXL_DEV_MINORS) + enum cxl_context_status { CLOSED, OPENED, @@ -336,6 +340,12 @@ enum prefault_modes { CXL_PREFAULT_ALL, }; +enum cxl_attrs { + CXL_ADAPTER_ATTRS, + CXL_AFU_MASTER_ATTRS, + CXL_AFU_ATTRS, +}; + struct cxl_sste { __be64 esid_data; __be64 vsid_data; @@ -344,18 +354,46 @@ struct cxl_sste { #define to_cxl_adapter(d) container_of(d, struct cxl, dev) #define to_cxl_afu(d) container_of(d, struct cxl_afu, dev) -struct cxl_afu { +struct cxl_afu_native { + void __iomem *p1n_mmio; + void __iomem *afu_desc_mmio; irq_hw_number_t psl_hwirq; + unsigned int psl_virq; + struct mutex spa_mutex; + /* + * Only the first part of the SPA is used for the process element + * linked list. The only other part that software needs to worry about + * is sw_command_status, which we store a separate pointer to. + * Everything else in the SPA is only used by hardware + */ + struct cxl_process_element *spa; + __be64 *sw_command_status; + unsigned int spa_size; + int spa_order; + int spa_max_procs; + u64 pp_offset; +}; + +struct cxl_afu_guest { + u64 handle; + phys_addr_t p2n_phys; + u64 p2n_size; + int max_ints; + struct mutex recovery_lock; + int previous_state; +}; + +struct cxl_afu { + struct cxl_afu_native *native; + struct cxl_afu_guest *guest; irq_hw_number_t serr_hwirq; - char *err_irq_name; - char *psl_irq_name; unsigned int serr_virq; - void __iomem *p1n_mmio; + char *psl_irq_name; + char *err_irq_name; void __iomem *p2n_mmio; phys_addr_t psn_phys; - u64 pp_offset; u64 pp_size; - void __iomem *afu_desc_mmio; + struct cxl *adapter; struct device dev; struct cdev afu_cdev_s, afu_cdev_m, afu_cdev_d; @@ -363,26 +401,12 @@ struct cxl_afu { struct idr contexts_idr; struct dentry *debugfs; struct mutex contexts_lock; - struct mutex spa_mutex; spinlock_t afu_cntl_lock; /* AFU error buffer fields and bin attribute for sysfs */ u64 eb_len, eb_offset; struct bin_attribute attr_eb; - /* - * Only the first part of the SPA is used for the process element - * linked list. The only other part that software needs to worry about - * is sw_command_status, which we store a separate pointer to. - * Everything else in the SPA is only used by hardware - */ - struct cxl_process_element *spa; - __be64 *sw_command_status; - unsigned int spa_size; - int spa_order; - int spa_max_procs; - unsigned int psl_virq; - /* pointer to the vphb */ struct pci_controller *phb; @@ -421,6 +445,12 @@ struct cxl_irq_name { char *name; }; +struct irq_avail { + irq_hw_number_t offset; + irq_hw_number_t range; + unsigned long *bitmap; +}; + /* * This is a cxl context. If the PSL is in dedicated mode, there will be one * of these per AFU. If in AFU directed there can be lots of these. @@ -476,7 +506,19 @@ struct cxl_context { struct cxl_process_element *elem; - int pe; /* process element handle */ + /* + * pe is the process element handle, assigned by this driver when the + * context is initialized. + * + * external_pe is the PE shown outside of cxl. + * On bare-metal, pe=external_pe, because we decide what the handle is. + * In a guest, we only find out about the pe used by pHyp when the + * context is attached, and that's the value we want to report outside + * of cxl. + */ + int pe; + int external_pe; + u32 irq_count; bool pe_inserted; bool master; @@ -488,11 +530,34 @@ struct cxl_context { struct rcu_head rcu; }; -struct cxl { +struct cxl_native { + u64 afu_desc_off; + u64 afu_desc_size; void __iomem *p1_mmio; void __iomem *p2_mmio; irq_hw_number_t err_hwirq; unsigned int err_virq; + u64 ps_off; +}; + +struct cxl_guest { + struct platform_device *pdev; + int irq_nranges; + struct cdev cdev; + irq_hw_number_t irq_base_offset; + struct irq_avail *irq_avail; + spinlock_t irq_alloc_lock; + u64 handle; + char *status; + u16 vendor; + u16 device; + u16 subsystem_vendor; + u16 subsystem; +}; + +struct cxl { + struct cxl_native *native; + struct cxl_guest *guest; spinlock_t afu_list_lock; struct cxl_afu *afu[CXL_MAX_SLICES]; struct device dev; @@ -503,9 +568,6 @@ struct cxl { struct bin_attribute cxl_attr; int adapter_num; int user_irqs; - u64 afu_desc_off; - u64 afu_desc_size; - u64 ps_off; u64 ps_size; u16 psl_rev; u16 base_image; @@ -519,13 +581,15 @@ struct cxl { bool perst_same_image; }; -int cxl_alloc_one_irq(struct cxl *adapter); -void cxl_release_one_irq(struct cxl *adapter, int hwirq); -int cxl_alloc_irq_ranges(struct cxl_irq_ranges *irqs, struct cxl *adapter, unsigned int num); -void cxl_release_irq_ranges(struct cxl_irq_ranges *irqs, struct cxl *adapter); -int cxl_setup_irq(struct cxl *adapter, unsigned int hwirq, unsigned int virq); +int cxl_pci_alloc_one_irq(struct cxl *adapter); +void cxl_pci_release_one_irq(struct cxl *adapter, int hwirq); +int cxl_pci_alloc_irq_ranges(struct cxl_irq_ranges *irqs, struct cxl *adapter, unsigned int num); +void cxl_pci_release_irq_ranges(struct cxl_irq_ranges *irqs, struct cxl *adapter); +int cxl_pci_setup_irq(struct cxl *adapter, unsigned int hwirq, unsigned int virq); int cxl_update_image_control(struct cxl *adapter); -int cxl_reset(struct cxl *adapter); +int cxl_pci_reset(struct cxl *adapter); +void cxl_pci_release_afu(struct device *dev); +ssize_t cxl_pci_read_adapter_vpd(struct cxl *adapter, void *buf, size_t len); /* common == phyp + powernv */ struct cxl_process_element_common { @@ -555,29 +619,32 @@ struct cxl_process_element { __be32 software_state; } __packed; -static inline bool cxl_adapter_link_ok(struct cxl *cxl) +static inline bool cxl_adapter_link_ok(struct cxl *cxl, struct cxl_afu *afu) { struct pci_dev *pdev; - pdev = to_pci_dev(cxl->dev.parent); - return !pci_channel_offline(pdev); + if (cpu_has_feature(CPU_FTR_HVMODE)) { + pdev = to_pci_dev(cxl->dev.parent); + return !pci_channel_offline(pdev); + } + return true; } static inline void __iomem *_cxl_p1_addr(struct cxl *cxl, cxl_p1_reg_t reg) { WARN_ON(!cpu_has_feature(CPU_FTR_HVMODE)); - return cxl->p1_mmio + cxl_reg_off(reg); + return cxl->native->p1_mmio + cxl_reg_off(reg); } static inline void cxl_p1_write(struct cxl *cxl, cxl_p1_reg_t reg, u64 val) { - if (likely(cxl_adapter_link_ok(cxl))) + if (likely(cxl_adapter_link_ok(cxl, NULL))) out_be64(_cxl_p1_addr(cxl, reg), val); } static inline u64 cxl_p1_read(struct cxl *cxl, cxl_p1_reg_t reg) { - if (likely(cxl_adapter_link_ok(cxl))) + if (likely(cxl_adapter_link_ok(cxl, NULL))) return in_be64(_cxl_p1_addr(cxl, reg)); else return ~0ULL; @@ -586,18 +653,18 @@ static inline u64 cxl_p1_read(struct cxl *cxl, cxl_p1_reg_t reg) static inline void __iomem *_cxl_p1n_addr(struct cxl_afu *afu, cxl_p1n_reg_t reg) { WARN_ON(!cpu_has_feature(CPU_FTR_HVMODE)); - return afu->p1n_mmio + cxl_reg_off(reg); + return afu->native->p1n_mmio + cxl_reg_off(reg); } static inline void cxl_p1n_write(struct cxl_afu *afu, cxl_p1n_reg_t reg, u64 val) { - if (likely(cxl_adapter_link_ok(afu->adapter))) + if (likely(cxl_adapter_link_ok(afu->adapter, afu))) out_be64(_cxl_p1n_addr(afu, reg), val); } static inline u64 cxl_p1n_read(struct cxl_afu *afu, cxl_p1n_reg_t reg) { - if (likely(cxl_adapter_link_ok(afu->adapter))) + if (likely(cxl_adapter_link_ok(afu->adapter, afu))) return in_be64(_cxl_p1n_addr(afu, reg)); else return ~0ULL; @@ -610,39 +677,19 @@ static inline void __iomem *_cxl_p2n_addr(struct cxl_afu *afu, cxl_p2n_reg_t reg static inline void cxl_p2n_write(struct cxl_afu *afu, cxl_p2n_reg_t reg, u64 val) { - if (likely(cxl_adapter_link_ok(afu->adapter))) + if (likely(cxl_adapter_link_ok(afu->adapter, afu))) out_be64(_cxl_p2n_addr(afu, reg), val); } static inline u64 cxl_p2n_read(struct cxl_afu *afu, cxl_p2n_reg_t reg) { - if (likely(cxl_adapter_link_ok(afu->adapter))) + if (likely(cxl_adapter_link_ok(afu->adapter, afu))) return in_be64(_cxl_p2n_addr(afu, reg)); else return ~0ULL; } -static inline u64 cxl_afu_cr_read64(struct cxl_afu *afu, int cr, u64 off) -{ - if (likely(cxl_adapter_link_ok(afu->adapter))) - return in_le64((afu)->afu_desc_mmio + (afu)->crs_offset + - ((cr) * (afu)->crs_len) + (off)); - else - return ~0ULL; -} - -static inline u32 cxl_afu_cr_read32(struct cxl_afu *afu, int cr, u64 off) -{ - if (likely(cxl_adapter_link_ok(afu->adapter))) - return in_le32((afu)->afu_desc_mmio + (afu)->crs_offset + - ((cr) * (afu)->crs_len) + (off)); - else - return 0xffffffff; -} -u16 cxl_afu_cr_read16(struct cxl_afu *afu, int cr, u64 off); -u8 cxl_afu_cr_read8(struct cxl_afu *afu, int cr, u64 off); - -ssize_t cxl_afu_read_err_buffer(struct cxl_afu *afu, char *buf, +ssize_t cxl_pci_afu_read_err_buffer(struct cxl_afu *afu, char *buf, loff_t off, size_t count); @@ -652,13 +699,14 @@ struct cxl_calls { }; int register_cxl_calls(struct cxl_calls *calls); void unregister_cxl_calls(struct cxl_calls *calls); +int cxl_update_properties(struct device_node *dn, struct property *new_prop); -int cxl_alloc_adapter_nr(struct cxl *adapter); void cxl_remove_adapter_nr(struct cxl *adapter); int cxl_alloc_spa(struct cxl_afu *afu); void cxl_release_spa(struct cxl_afu *afu); +dev_t cxl_get_dev(void); int cxl_file_init(void); void cxl_file_exit(void); int cxl_register_adapter(struct cxl *adapter); @@ -679,21 +727,19 @@ void cxl_sysfs_afu_remove(struct cxl_afu *afu); int cxl_sysfs_afu_m_add(struct cxl_afu *afu); void cxl_sysfs_afu_m_remove(struct cxl_afu *afu); -int cxl_afu_activate_mode(struct cxl_afu *afu, int mode); -int _cxl_afu_deactivate_mode(struct cxl_afu *afu, int mode); -int cxl_afu_deactivate_mode(struct cxl_afu *afu); +struct cxl *cxl_alloc_adapter(void); +struct cxl_afu *cxl_alloc_afu(struct cxl *adapter, int slice); int cxl_afu_select_best_mode(struct cxl_afu *afu); -int cxl_register_psl_irq(struct cxl_afu *afu); -void cxl_release_psl_irq(struct cxl_afu *afu); -int cxl_register_psl_err_irq(struct cxl *adapter); -void cxl_release_psl_err_irq(struct cxl *adapter); -int cxl_register_serr_irq(struct cxl_afu *afu); -void cxl_release_serr_irq(struct cxl_afu *afu); +int cxl_native_register_psl_irq(struct cxl_afu *afu); +void cxl_native_release_psl_irq(struct cxl_afu *afu); +int cxl_native_register_psl_err_irq(struct cxl *adapter); +void cxl_native_release_psl_err_irq(struct cxl *adapter); +int cxl_native_register_serr_irq(struct cxl_afu *afu); +void cxl_native_release_serr_irq(struct cxl_afu *afu); int afu_register_irqs(struct cxl_context *ctx, u32 count); void afu_release_irqs(struct cxl_context *ctx, void *cookie); void afu_irq_name_free(struct cxl_context *ctx); -irqreturn_t cxl_slice_irq_err(int irq, void *data); int cxl_debugfs_init(void); void cxl_debugfs_exit(void); @@ -707,6 +753,7 @@ void cxl_prefault(struct cxl_context *ctx, u64 wed); struct cxl *get_cxl_adapter(int num); int cxl_alloc_sst(struct cxl_context *ctx); +void cxl_dump_debug_buffer(void *addr, size_t size); void init_cxl_native(void); @@ -720,40 +767,54 @@ unsigned int cxl_map_irq(struct cxl *adapter, irq_hw_number_t hwirq, void cxl_unmap_irq(unsigned int virq, void *cookie); int __detach_context(struct cxl_context *ctx); -/* This matches the layout of the H_COLLECT_CA_INT_INFO retbuf */ +/* + * This must match the layout of the H_COLLECT_CA_INT_INFO retbuf defined + * in PAPR. + * A word about endianness: a pointer to this structure is passed when + * calling the hcall. However, it is not a block of memory filled up by + * the hypervisor. The return values are found in registers, and copied + * one by one when returning from the hcall. See the end of the call to + * plpar_hcall9() in hvCall.S + * As a consequence: + * - we don't need to do any endianness conversion + * - the pid and tid are an exception. They are 32-bit values returned in + * the same 64-bit register. So we do need to worry about byte ordering. + */ struct cxl_irq_info { u64 dsisr; u64 dar; u64 dsr; +#ifndef CONFIG_CPU_LITTLE_ENDIAN u32 pid; u32 tid; +#else + u32 tid; + u32 pid; +#endif u64 afu_err; u64 errstat; - u64 padding[3]; /* to match the expected retbuf size for plpar_hcall9 */ + u64 proc_handle; + u64 padding[2]; /* to match the expected retbuf size for plpar_hcall9 */ }; void cxl_assign_psn_space(struct cxl_context *ctx); -int cxl_attach_process(struct cxl_context *ctx, bool kernel, u64 wed, - u64 amr); -int cxl_detach_process(struct cxl_context *ctx); - -int cxl_get_irq(struct cxl_afu *afu, struct cxl_irq_info *info); -int cxl_ack_irq(struct cxl_context *ctx, u64 tfc, u64 psl_reset_mask); +irqreturn_t cxl_irq(int irq, struct cxl_context *ctx, struct cxl_irq_info *irq_info); +int cxl_register_one_irq(struct cxl *adapter, irq_handler_t handler, + void *cookie, irq_hw_number_t *dest_hwirq, + unsigned int *dest_virq, const char *name); int cxl_check_error(struct cxl_afu *afu); int cxl_afu_slbia(struct cxl_afu *afu); int cxl_tlb_slb_invalidate(struct cxl *adapter); int cxl_afu_disable(struct cxl_afu *afu); -int __cxl_afu_reset(struct cxl_afu *afu); -int cxl_afu_check_and_enable(struct cxl_afu *afu); int cxl_psl_purge(struct cxl_afu *afu); void cxl_stop_trace(struct cxl *cxl); int cxl_pci_vphb_add(struct cxl_afu *afu); -void cxl_pci_vphb_reconfigure(struct cxl_afu *afu); void cxl_pci_vphb_remove(struct cxl_afu *afu); extern struct pci_driver cxl_pci_driver; +extern struct platform_driver cxl_of_driver; int afu_allocate_irqs(struct cxl_context *ctx, u32 count); int afu_open(struct inode *inode, struct file *file); @@ -764,4 +825,61 @@ unsigned int afu_poll(struct file *file, struct poll_table_struct *poll); ssize_t afu_read(struct file *file, char __user *buf, size_t count, loff_t *off); extern const struct file_operations afu_fops; +struct cxl *cxl_guest_init_adapter(struct device_node *np, struct platform_device *dev); +void cxl_guest_remove_adapter(struct cxl *adapter); +int cxl_of_read_adapter_handle(struct cxl *adapter, struct device_node *np); +int cxl_of_read_adapter_properties(struct cxl *adapter, struct device_node *np); +ssize_t cxl_guest_read_adapter_vpd(struct cxl *adapter, void *buf, size_t len); +ssize_t cxl_guest_read_afu_vpd(struct cxl_afu *afu, void *buf, size_t len); +int cxl_guest_init_afu(struct cxl *adapter, int slice, struct device_node *afu_np); +void cxl_guest_remove_afu(struct cxl_afu *afu); +int cxl_of_read_afu_handle(struct cxl_afu *afu, struct device_node *afu_np); +int cxl_of_read_afu_properties(struct cxl_afu *afu, struct device_node *afu_np); +int cxl_guest_add_chardev(struct cxl *adapter); +void cxl_guest_remove_chardev(struct cxl *adapter); +void cxl_guest_reload_module(struct cxl *adapter); +int cxl_of_probe(struct platform_device *pdev); + +struct cxl_backend_ops { + struct module *module; + int (*adapter_reset)(struct cxl *adapter); + int (*alloc_one_irq)(struct cxl *adapter); + void (*release_one_irq)(struct cxl *adapter, int hwirq); + int (*alloc_irq_ranges)(struct cxl_irq_ranges *irqs, + struct cxl *adapter, unsigned int num); + void (*release_irq_ranges)(struct cxl_irq_ranges *irqs, + struct cxl *adapter); + int (*setup_irq)(struct cxl *adapter, unsigned int hwirq, + unsigned int virq); + irqreturn_t (*handle_psl_slice_error)(struct cxl_context *ctx, + u64 dsisr, u64 errstat); + irqreturn_t (*psl_interrupt)(int irq, void *data); + int (*ack_irq)(struct cxl_context *ctx, u64 tfc, u64 psl_reset_mask); + int (*attach_process)(struct cxl_context *ctx, bool kernel, + u64 wed, u64 amr); + int (*detach_process)(struct cxl_context *ctx); + bool (*support_attributes)(const char *attr_name, enum cxl_attrs type); + bool (*link_ok)(struct cxl *cxl, struct cxl_afu *afu); + void (*release_afu)(struct device *dev); + ssize_t (*afu_read_err_buffer)(struct cxl_afu *afu, char *buf, + loff_t off, size_t count); + int (*afu_check_and_enable)(struct cxl_afu *afu); + int (*afu_activate_mode)(struct cxl_afu *afu, int mode); + int (*afu_deactivate_mode)(struct cxl_afu *afu, int mode); + int (*afu_reset)(struct cxl_afu *afu); + int (*afu_cr_read8)(struct cxl_afu *afu, int cr_idx, u64 offset, u8 *val); + int (*afu_cr_read16)(struct cxl_afu *afu, int cr_idx, u64 offset, u16 *val); + int (*afu_cr_read32)(struct cxl_afu *afu, int cr_idx, u64 offset, u32 *val); + int (*afu_cr_read64)(struct cxl_afu *afu, int cr_idx, u64 offset, u64 *val); + int (*afu_cr_write8)(struct cxl_afu *afu, int cr_idx, u64 offset, u8 val); + int (*afu_cr_write16)(struct cxl_afu *afu, int cr_idx, u64 offset, u16 val); + int (*afu_cr_write32)(struct cxl_afu *afu, int cr_idx, u64 offset, u32 val); + ssize_t (*read_adapter_vpd)(struct cxl *adapter, void *buf, size_t count); +}; +extern const struct cxl_backend_ops cxl_native_ops; +extern const struct cxl_backend_ops cxl_guest_ops; +extern const struct cxl_backend_ops *cxl_ops; + +/* check if the given pci_dev is on the the cxl vphb bus */ +bool cxl_pci_is_vphb_device(struct pci_dev *dev); #endif diff --git a/drivers/misc/cxl/debugfs.c b/drivers/misc/cxl/debugfs.c index 18df6f44af2a..5751899e0c17 100644 --- a/drivers/misc/cxl/debugfs.c +++ b/drivers/misc/cxl/debugfs.c @@ -118,6 +118,10 @@ void cxl_debugfs_afu_remove(struct cxl_afu *afu) int __init cxl_debugfs_init(void) { struct dentry *ent; + + if (!cpu_has_feature(CPU_FTR_HVMODE)) + return 0; + ent = debugfs_create_dir("cxl", NULL); if (IS_ERR(ent)) return PTR_ERR(ent); diff --git a/drivers/misc/cxl/fault.c b/drivers/misc/cxl/fault.c index 81c3f75b7330..9a8650bcb042 100644 --- a/drivers/misc/cxl/fault.c +++ b/drivers/misc/cxl/fault.c @@ -101,7 +101,7 @@ static void cxl_ack_ae(struct cxl_context *ctx) { unsigned long flags; - cxl_ack_irq(ctx, CXL_PSL_TFC_An_AE, 0); + cxl_ops->ack_irq(ctx, CXL_PSL_TFC_An_AE, 0); spin_lock_irqsave(&ctx->lock, flags); ctx->pending_fault = true; @@ -125,7 +125,7 @@ static int cxl_handle_segment_miss(struct cxl_context *ctx, else { mb(); /* Order seg table write to TFC MMIO write */ - cxl_ack_irq(ctx, CXL_PSL_TFC_An_R, 0); + cxl_ops->ack_irq(ctx, CXL_PSL_TFC_An_R, 0); } return IRQ_HANDLED; @@ -163,7 +163,7 @@ static void cxl_handle_page_fault(struct cxl_context *ctx, local_irq_restore(flags); pr_devel("Page fault successfully handled for pe: %i!\n", ctx->pe); - cxl_ack_irq(ctx, CXL_PSL_TFC_An_R, 0); + cxl_ops->ack_irq(ctx, CXL_PSL_TFC_An_R, 0); } /* @@ -254,14 +254,17 @@ void cxl_handle_fault(struct work_struct *fault_work) u64 dar = ctx->dar; struct mm_struct *mm = NULL; - if (cxl_p2n_read(ctx->afu, CXL_PSL_DSISR_An) != dsisr || - cxl_p2n_read(ctx->afu, CXL_PSL_DAR_An) != dar || - cxl_p2n_read(ctx->afu, CXL_PSL_PEHandle_An) != ctx->pe) { - /* Most likely explanation is harmless - a dedicated process - * has detached and these were cleared by the PSL purge, but - * warn about it just in case */ - dev_notice(&ctx->afu->dev, "cxl_handle_fault: Translation fault regs changed\n"); - return; + if (cpu_has_feature(CPU_FTR_HVMODE)) { + if (cxl_p2n_read(ctx->afu, CXL_PSL_DSISR_An) != dsisr || + cxl_p2n_read(ctx->afu, CXL_PSL_DAR_An) != dar || + cxl_p2n_read(ctx->afu, CXL_PSL_PEHandle_An) != ctx->pe) { + /* Most likely explanation is harmless - a dedicated + * process has detached and these were cleared by the + * PSL purge, but warn about it just in case + */ + dev_notice(&ctx->afu->dev, "cxl_handle_fault: Translation fault regs changed\n"); + return; + } } /* Early return if the context is being / has been detached */ diff --git a/drivers/misc/cxl/file.c b/drivers/misc/cxl/file.c index 783337d22f36..eec468f1612f 100644 --- a/drivers/misc/cxl/file.c +++ b/drivers/misc/cxl/file.c @@ -26,9 +26,7 @@ #include "trace.h" #define CXL_NUM_MINORS 256 /* Total to reserve */ -#define CXL_DEV_MINORS 13 /* 1 control + 4 AFUs * 3 (dedicated/master/shared) */ -#define CXL_CARD_MINOR(adapter) (adapter->adapter_num * CXL_DEV_MINORS) #define CXL_AFU_MINOR_D(afu) (CXL_CARD_MINOR(afu->adapter) + 1 + (3 * afu->slice)) #define CXL_AFU_MINOR_M(afu) (CXL_AFU_MINOR_D(afu) + 1) #define CXL_AFU_MINOR_S(afu) (CXL_AFU_MINOR_D(afu) + 2) @@ -36,7 +34,6 @@ #define CXL_AFU_MKDEV_M(afu) MKDEV(MAJOR(cxl_dev), CXL_AFU_MINOR_M(afu)) #define CXL_AFU_MKDEV_S(afu) MKDEV(MAJOR(cxl_dev), CXL_AFU_MINOR_S(afu)) -#define CXL_DEVT_ADAPTER(dev) (MINOR(dev) / CXL_DEV_MINORS) #define CXL_DEVT_AFU(dev) ((MINOR(dev) % CXL_DEV_MINORS - 1) / 3) #define CXL_DEVT_IS_CARD(dev) (MINOR(dev) % CXL_DEV_MINORS == 0) @@ -79,7 +76,7 @@ static int __afu_open(struct inode *inode, struct file *file, bool master) if (!afu->current_mode) goto err_put_afu; - if (!cxl_adapter_link_ok(adapter)) { + if (!cxl_ops->link_ok(adapter, afu)) { rc = -EIO; goto err_put_afu; } @@ -210,8 +207,8 @@ static long afu_ioctl_start_work(struct cxl_context *ctx, trace_cxl_attach(ctx, work.work_element_descriptor, work.num_interrupts, amr); - if ((rc = cxl_attach_process(ctx, false, work.work_element_descriptor, - amr))) { + if ((rc = cxl_ops->attach_process(ctx, false, work.work_element_descriptor, + amr))) { afu_release_irqs(ctx, ctx); goto out; } @@ -222,12 +219,13 @@ out: mutex_unlock(&ctx->status_mutex); return rc; } + static long afu_ioctl_process_element(struct cxl_context *ctx, int __user *upe) { pr_devel("%s: pe: %i\n", __func__, ctx->pe); - if (copy_to_user(upe, &ctx->pe, sizeof(__u32))) + if (copy_to_user(upe, &ctx->external_pe, sizeof(__u32))) return -EFAULT; return 0; @@ -259,7 +257,7 @@ long afu_ioctl(struct file *file, unsigned int cmd, unsigned long arg) if (ctx->status == CLOSED) return -EIO; - if (!cxl_adapter_link_ok(ctx->afu->adapter)) + if (!cxl_ops->link_ok(ctx->afu->adapter, ctx->afu)) return -EIO; pr_devel("afu_ioctl\n"); @@ -289,7 +287,7 @@ int afu_mmap(struct file *file, struct vm_area_struct *vm) if (ctx->status != STARTED) return -EIO; - if (!cxl_adapter_link_ok(ctx->afu->adapter)) + if (!cxl_ops->link_ok(ctx->afu->adapter, ctx->afu)) return -EIO; return cxl_context_iomap(ctx, vm); @@ -336,7 +334,7 @@ ssize_t afu_read(struct file *file, char __user *buf, size_t count, int rc; DEFINE_WAIT(wait); - if (!cxl_adapter_link_ok(ctx->afu->adapter)) + if (!cxl_ops->link_ok(ctx->afu->adapter, ctx->afu)) return -EIO; if (count < CXL_READ_MIN_SIZE) @@ -349,7 +347,7 @@ ssize_t afu_read(struct file *file, char __user *buf, size_t count, if (ctx_event_pending(ctx)) break; - if (!cxl_adapter_link_ok(ctx->afu->adapter)) { + if (!cxl_ops->link_ok(ctx->afu->adapter, ctx->afu)) { rc = -EIO; goto out; } @@ -445,7 +443,8 @@ static const struct file_operations afu_master_fops = { static char *cxl_devnode(struct device *dev, umode_t *mode) { - if (CXL_DEVT_IS_CARD(dev->devt)) { + if (cpu_has_feature(CPU_FTR_HVMODE) && + CXL_DEVT_IS_CARD(dev->devt)) { /* * These minor numbers will eventually be used to program the * PSL and AFUs once we have dynamic reprogramming support @@ -546,6 +545,11 @@ int cxl_register_adapter(struct cxl *adapter) return device_register(&adapter->dev); } +dev_t cxl_get_dev(void) +{ + return cxl_dev; +} + int __init cxl_file_init(void) { int rc; diff --git a/drivers/misc/cxl/flash.c b/drivers/misc/cxl/flash.c new file mode 100644 index 000000000000..68dd0b7da471 --- /dev/null +++ b/drivers/misc/cxl/flash.c @@ -0,0 +1,538 @@ +#include <linux/kernel.h> +#include <linux/fs.h> +#include <linux/semaphore.h> +#include <linux/slab.h> +#include <linux/uaccess.h> +#include <asm/rtas.h> + +#include "cxl.h" +#include "hcalls.h" + +#define DOWNLOAD_IMAGE 1 +#define VALIDATE_IMAGE 2 + +struct ai_header { + u16 version; + u8 reserved0[6]; + u16 vendor; + u16 device; + u16 subsystem_vendor; + u16 subsystem; + u64 image_offset; + u64 image_length; + u8 reserved1[96]; +}; + +static struct semaphore sem; +unsigned long *buffer[CXL_AI_MAX_ENTRIES]; +struct sg_list *le; +static u64 continue_token; +static unsigned int transfer; + +struct update_props_workarea { + __be32 phandle; + __be32 state; + __be64 reserved; + __be32 nprops; +} __packed; + +struct update_nodes_workarea { + __be32 state; + __be64 unit_address; + __be32 reserved; +} __packed; + +#define DEVICE_SCOPE 3 +#define NODE_ACTION_MASK 0xff000000 +#define NODE_COUNT_MASK 0x00ffffff +#define OPCODE_DELETE 0x01000000 +#define OPCODE_UPDATE 0x02000000 +#define OPCODE_ADD 0x03000000 + +static int rcall(int token, char *buf, s32 scope) +{ + int rc; + + spin_lock(&rtas_data_buf_lock); + + memcpy(rtas_data_buf, buf, RTAS_DATA_BUF_SIZE); + rc = rtas_call(token, 2, 1, NULL, rtas_data_buf, scope); + memcpy(buf, rtas_data_buf, RTAS_DATA_BUF_SIZE); + + spin_unlock(&rtas_data_buf_lock); + return rc; +} + +static int update_property(struct device_node *dn, const char *name, + u32 vd, char *value) +{ + struct property *new_prop; + u32 *val; + int rc; + + new_prop = kzalloc(sizeof(*new_prop), GFP_KERNEL); + if (!new_prop) + return -ENOMEM; + + new_prop->name = kstrdup(name, GFP_KERNEL); + if (!new_prop->name) { + kfree(new_prop); + return -ENOMEM; + } + + new_prop->length = vd; + new_prop->value = kzalloc(new_prop->length, GFP_KERNEL); + if (!new_prop->value) { + kfree(new_prop->name); + kfree(new_prop); + return -ENOMEM; + } + memcpy(new_prop->value, value, vd); + + val = (u32 *)new_prop->value; + rc = cxl_update_properties(dn, new_prop); + pr_devel("%s: update property (%s, length: %i, value: %#x)\n", + dn->name, name, vd, be32_to_cpu(*val)); + + if (rc) { + kfree(new_prop->name); + kfree(new_prop->value); + kfree(new_prop); + } + return rc; +} + +static int update_node(__be32 phandle, s32 scope) +{ + struct update_props_workarea *upwa; + struct device_node *dn; + int i, rc, ret; + char *prop_data; + char *buf; + int token; + u32 nprops; + u32 vd; + + token = rtas_token("ibm,update-properties"); + if (token == RTAS_UNKNOWN_SERVICE) + return -EINVAL; + + buf = kzalloc(RTAS_DATA_BUF_SIZE, GFP_KERNEL); + if (!buf) + return -ENOMEM; + + dn = of_find_node_by_phandle(be32_to_cpu(phandle)); + if (!dn) { + kfree(buf); + return -ENOENT; + } + + upwa = (struct update_props_workarea *)&buf[0]; + upwa->phandle = phandle; + do { + rc = rcall(token, buf, scope); + if (rc < 0) + break; + + prop_data = buf + sizeof(*upwa); + nprops = be32_to_cpu(upwa->nprops); + + if (*prop_data == 0) { + prop_data++; + vd = be32_to_cpu(*(__be32 *)prop_data); + prop_data += vd + sizeof(vd); + nprops--; + } + + for (i = 0; i < nprops; i++) { + char *prop_name; + + prop_name = prop_data; + prop_data += strlen(prop_name) + 1; + vd = be32_to_cpu(*(__be32 *)prop_data); + prop_data += sizeof(vd); + + if ((vd != 0x00000000) && (vd != 0x80000000)) { + ret = update_property(dn, prop_name, vd, + prop_data); + if (ret) + pr_err("cxl: Could not update property %s - %i\n", + prop_name, ret); + + prop_data += vd; + } + } + } while (rc == 1); + + of_node_put(dn); + kfree(buf); + return rc; +} + +static int update_devicetree(struct cxl *adapter, s32 scope) +{ + struct update_nodes_workarea *unwa; + u32 action, node_count; + int token, rc, i; + __be32 *data, drc_index, phandle; + char *buf; + + token = rtas_token("ibm,update-nodes"); + if (token == RTAS_UNKNOWN_SERVICE) + return -EINVAL; + + buf = kzalloc(RTAS_DATA_BUF_SIZE, GFP_KERNEL); + if (!buf) + return -ENOMEM; + + unwa = (struct update_nodes_workarea *)&buf[0]; + unwa->unit_address = cpu_to_be64(adapter->guest->handle); + do { + rc = rcall(token, buf, scope); + if (rc && rc != 1) + break; + + data = (__be32 *)buf + 4; + while (be32_to_cpu(*data) & NODE_ACTION_MASK) { + action = be32_to_cpu(*data) & NODE_ACTION_MASK; + node_count = be32_to_cpu(*data) & NODE_COUNT_MASK; + pr_devel("device reconfiguration - action: %#x, nodes: %#x\n", + action, node_count); + data++; + + for (i = 0; i < node_count; i++) { + phandle = *data++; + + switch (action) { + case OPCODE_DELETE: + /* nothing to do */ + break; + case OPCODE_UPDATE: + update_node(phandle, scope); + break; + case OPCODE_ADD: + /* nothing to do, just move pointer */ + drc_index = *data++; + break; + } + } + } + } while (rc == 1); + + kfree(buf); + return 0; +} + +static int handle_image(struct cxl *adapter, int operation, + long (*fct)(u64, u64, u64, u64 *), + struct cxl_adapter_image *ai) +{ + size_t mod, s_copy, len_chunk = 0; + struct ai_header *header = NULL; + unsigned int entries = 0, i; + void *dest, *from; + int rc = 0, need_header; + + /* base adapter image header */ + need_header = (ai->flags & CXL_AI_NEED_HEADER); + if (need_header) { + header = kzalloc(sizeof(struct ai_header), GFP_KERNEL); + if (!header) + return -ENOMEM; + header->version = cpu_to_be16(1); + header->vendor = cpu_to_be16(adapter->guest->vendor); + header->device = cpu_to_be16(adapter->guest->device); + header->subsystem_vendor = cpu_to_be16(adapter->guest->subsystem_vendor); + header->subsystem = cpu_to_be16(adapter->guest->subsystem); + header->image_offset = cpu_to_be64(CXL_AI_HEADER_SIZE); + header->image_length = cpu_to_be64(ai->len_image); + } + + /* number of entries in the list */ + len_chunk = ai->len_data; + if (need_header) + len_chunk += CXL_AI_HEADER_SIZE; + + entries = len_chunk / CXL_AI_BUFFER_SIZE; + mod = len_chunk % CXL_AI_BUFFER_SIZE; + if (mod) + entries++; + + if (entries > CXL_AI_MAX_ENTRIES) { + rc = -EINVAL; + goto err; + } + + /* < -- MAX_CHUNK_SIZE = 4096 * 256 = 1048576 bytes --> + * chunk 0 ---------------------------------------------------- + * | header | data | + * ---------------------------------------------------- + * chunk 1 ---------------------------------------------------- + * | data | + * ---------------------------------------------------- + * .... + * chunk n ---------------------------------------------------- + * | data | + * ---------------------------------------------------- + */ + from = (void *) ai->data; + for (i = 0; i < entries; i++) { + dest = buffer[i]; + s_copy = CXL_AI_BUFFER_SIZE; + + if ((need_header) && (i == 0)) { + /* add adapter image header */ + memcpy(buffer[i], header, sizeof(struct ai_header)); + s_copy = CXL_AI_BUFFER_SIZE - CXL_AI_HEADER_SIZE; + dest += CXL_AI_HEADER_SIZE; /* image offset */ + } + if ((i == (entries - 1)) && mod) + s_copy = mod; + + /* copy data */ + if (copy_from_user(dest, from, s_copy)) + goto err; + + /* fill in the list */ + le[i].phys_addr = cpu_to_be64(virt_to_phys(buffer[i])); + le[i].len = cpu_to_be64(CXL_AI_BUFFER_SIZE); + if ((i == (entries - 1)) && mod) + le[i].len = cpu_to_be64(mod); + from += s_copy; + } + pr_devel("%s (op: %i, need header: %i, entries: %i, token: %#llx)\n", + __func__, operation, need_header, entries, continue_token); + + /* + * download/validate the adapter image to the coherent + * platform facility + */ + rc = fct(adapter->guest->handle, virt_to_phys(le), entries, + &continue_token); + if (rc == 0) /* success of download/validation operation */ + continue_token = 0; + +err: + kfree(header); + + return rc; +} + +static int transfer_image(struct cxl *adapter, int operation, + struct cxl_adapter_image *ai) +{ + int rc = 0; + int afu; + + switch (operation) { + case DOWNLOAD_IMAGE: + rc = handle_image(adapter, operation, + &cxl_h_download_adapter_image, ai); + if (rc < 0) { + pr_devel("resetting adapter\n"); + cxl_h_reset_adapter(adapter->guest->handle); + } + return rc; + + case VALIDATE_IMAGE: + rc = handle_image(adapter, operation, + &cxl_h_validate_adapter_image, ai); + if (rc < 0) { + pr_devel("resetting adapter\n"); + cxl_h_reset_adapter(adapter->guest->handle); + return rc; + } + if (rc == 0) { + pr_devel("remove curent afu\n"); + for (afu = 0; afu < adapter->slices; afu++) + cxl_guest_remove_afu(adapter->afu[afu]); + + pr_devel("resetting adapter\n"); + cxl_h_reset_adapter(adapter->guest->handle); + + /* The entire image has now been + * downloaded and the validation has + * been successfully performed. + * After that, the partition should call + * ibm,update-nodes and + * ibm,update-properties to receive the + * current configuration + */ + rc = update_devicetree(adapter, DEVICE_SCOPE); + transfer = 1; + } + return rc; + } + + return -EINVAL; +} + +static long ioctl_transfer_image(struct cxl *adapter, int operation, + struct cxl_adapter_image __user *uai) +{ + struct cxl_adapter_image ai; + + pr_devel("%s\n", __func__); + + if (copy_from_user(&ai, uai, sizeof(struct cxl_adapter_image))) + return -EFAULT; + + /* + * Make sure reserved fields and bits are set to 0 + */ + if (ai.reserved1 || ai.reserved2 || ai.reserved3 || ai.reserved4 || + (ai.flags & ~CXL_AI_ALL)) + return -EINVAL; + + return transfer_image(adapter, operation, &ai); +} + +static int device_open(struct inode *inode, struct file *file) +{ + int adapter_num = CXL_DEVT_ADAPTER(inode->i_rdev); + struct cxl *adapter; + int rc = 0, i; + + pr_devel("in %s\n", __func__); + + BUG_ON(sizeof(struct ai_header) != CXL_AI_HEADER_SIZE); + + /* Allows one process to open the device by using a semaphore */ + if (down_interruptible(&sem) != 0) + return -EPERM; + + if (!(adapter = get_cxl_adapter(adapter_num))) + return -ENODEV; + + file->private_data = adapter; + continue_token = 0; + transfer = 0; + + for (i = 0; i < CXL_AI_MAX_ENTRIES; i++) + buffer[i] = NULL; + + /* aligned buffer containing list entries which describes up to + * 1 megabyte of data (256 entries of 4096 bytes each) + * Logical real address of buffer 0 - Buffer 0 length in bytes + * Logical real address of buffer 1 - Buffer 1 length in bytes + * Logical real address of buffer 2 - Buffer 2 length in bytes + * .... + * .... + * Logical real address of buffer N - Buffer N length in bytes + */ + le = (struct sg_list *)get_zeroed_page(GFP_KERNEL); + if (!le) { + rc = -ENOMEM; + goto err; + } + + for (i = 0; i < CXL_AI_MAX_ENTRIES; i++) { + buffer[i] = (unsigned long *)get_zeroed_page(GFP_KERNEL); + if (!buffer[i]) { + rc = -ENOMEM; + goto err1; + } + } + + return 0; + +err1: + for (i = 0; i < CXL_AI_MAX_ENTRIES; i++) { + if (buffer[i]) + free_page((unsigned long) buffer[i]); + } + + if (le) + free_page((unsigned long) le); +err: + put_device(&adapter->dev); + + return rc; +} + +static long device_ioctl(struct file *file, unsigned int cmd, unsigned long arg) +{ + struct cxl *adapter = file->private_data; + + pr_devel("in %s\n", __func__); + + if (cmd == CXL_IOCTL_DOWNLOAD_IMAGE) + return ioctl_transfer_image(adapter, + DOWNLOAD_IMAGE, + (struct cxl_adapter_image __user *)arg); + else if (cmd == CXL_IOCTL_VALIDATE_IMAGE) + return ioctl_transfer_image(adapter, + VALIDATE_IMAGE, + (struct cxl_adapter_image __user *)arg); + else + return -EINVAL; +} + +static long device_compat_ioctl(struct file *file, unsigned int cmd, + unsigned long arg) +{ + return device_ioctl(file, cmd, arg); +} + +static int device_close(struct inode *inode, struct file *file) +{ + struct cxl *adapter = file->private_data; + int i; + + pr_devel("in %s\n", __func__); + + for (i = 0; i < CXL_AI_MAX_ENTRIES; i++) { + if (buffer[i]) + free_page((unsigned long) buffer[i]); + } + + if (le) + free_page((unsigned long) le); + + up(&sem); + put_device(&adapter->dev); + continue_token = 0; + + /* reload the module */ + if (transfer) + cxl_guest_reload_module(adapter); + else { + pr_devel("resetting adapter\n"); + cxl_h_reset_adapter(adapter->guest->handle); + } + + transfer = 0; + return 0; +} + +static const struct file_operations fops = { + .owner = THIS_MODULE, + .open = device_open, + .unlocked_ioctl = device_ioctl, + .compat_ioctl = device_compat_ioctl, + .release = device_close, +}; + +void cxl_guest_remove_chardev(struct cxl *adapter) +{ + cdev_del(&adapter->guest->cdev); +} + +int cxl_guest_add_chardev(struct cxl *adapter) +{ + dev_t devt; + int rc; + + devt = MKDEV(MAJOR(cxl_get_dev()), CXL_CARD_MINOR(adapter)); + cdev_init(&adapter->guest->cdev, &fops); + if ((rc = cdev_add(&adapter->guest->cdev, devt, 1))) { + dev_err(&adapter->dev, + "Unable to add chardev on adapter (card%i): %i\n", + adapter->adapter_num, rc); + goto err; + } + adapter->dev.devt = devt; + sema_init(&sem, 1); +err: + return rc; +} diff --git a/drivers/misc/cxl/guest.c b/drivers/misc/cxl/guest.c new file mode 100644 index 000000000000..8213372de2b7 --- /dev/null +++ b/drivers/misc/cxl/guest.c @@ -0,0 +1,1177 @@ +/* + * Copyright 2015 IBM Corp. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include <linux/spinlock.h> +#include <linux/uaccess.h> +#include <linux/delay.h> + +#include "cxl.h" +#include "hcalls.h" +#include "trace.h" + +#define CXL_ERROR_DETECTED_EVENT 1 +#define CXL_SLOT_RESET_EVENT 2 +#define CXL_RESUME_EVENT 3 + +static void pci_error_handlers(struct cxl_afu *afu, + int bus_error_event, + pci_channel_state_t state) +{ + struct pci_dev *afu_dev; + + if (afu->phb == NULL) + return; + + list_for_each_entry(afu_dev, &afu->phb->bus->devices, bus_list) { + if (!afu_dev->driver) + continue; + + switch (bus_error_event) { + case CXL_ERROR_DETECTED_EVENT: + afu_dev->error_state = state; + + if (afu_dev->driver->err_handler && + afu_dev->driver->err_handler->error_detected) + afu_dev->driver->err_handler->error_detected(afu_dev, state); + break; + case CXL_SLOT_RESET_EVENT: + afu_dev->error_state = state; + + if (afu_dev->driver->err_handler && + afu_dev->driver->err_handler->slot_reset) + afu_dev->driver->err_handler->slot_reset(afu_dev); + break; + case CXL_RESUME_EVENT: + if (afu_dev->driver->err_handler && + afu_dev->driver->err_handler->resume) + afu_dev->driver->err_handler->resume(afu_dev); + break; + } + } +} + +static irqreturn_t guest_handle_psl_slice_error(struct cxl_context *ctx, u64 dsisr, + u64 errstat) +{ + pr_devel("in %s\n", __func__); + dev_crit(&ctx->afu->dev, "PSL ERROR STATUS: 0x%.16llx\n", errstat); + + return cxl_ops->ack_irq(ctx, 0, errstat); +} + +static ssize_t guest_collect_vpd(struct cxl *adapter, struct cxl_afu *afu, + void *buf, size_t len) +{ + unsigned int entries, mod; + unsigned long **vpd_buf = NULL; + struct sg_list *le; + int rc = 0, i, tocopy; + u64 out = 0; + + if (buf == NULL) + return -EINVAL; + + /* number of entries in the list */ + entries = len / SG_BUFFER_SIZE; + mod = len % SG_BUFFER_SIZE; + if (mod) + entries++; + + if (entries > SG_MAX_ENTRIES) { + entries = SG_MAX_ENTRIES; + len = SG_MAX_ENTRIES * SG_BUFFER_SIZE; + mod = 0; + } + + vpd_buf = kzalloc(entries * sizeof(unsigned long *), GFP_KERNEL); + if (!vpd_buf) + return -ENOMEM; + + le = (struct sg_list *)get_zeroed_page(GFP_KERNEL); + if (!le) { + rc = -ENOMEM; + goto err1; + } + + for (i = 0; i < entries; i++) { + vpd_buf[i] = (unsigned long *)get_zeroed_page(GFP_KERNEL); + if (!vpd_buf[i]) { + rc = -ENOMEM; + goto err2; + } + le[i].phys_addr = cpu_to_be64(virt_to_phys(vpd_buf[i])); + le[i].len = cpu_to_be64(SG_BUFFER_SIZE); + if ((i == (entries - 1)) && mod) + le[i].len = cpu_to_be64(mod); + } + + if (adapter) + rc = cxl_h_collect_vpd_adapter(adapter->guest->handle, + virt_to_phys(le), entries, &out); + else + rc = cxl_h_collect_vpd(afu->guest->handle, 0, + virt_to_phys(le), entries, &out); + pr_devel("length of available (entries: %i), vpd: %#llx\n", + entries, out); + + if (!rc) { + /* + * hcall returns in 'out' the size of available VPDs. + * It fills the buffer with as much data as possible. + */ + if (out < len) + len = out; + rc = len; + if (out) { + for (i = 0; i < entries; i++) { + if (len < SG_BUFFER_SIZE) + tocopy = len; + else + tocopy = SG_BUFFER_SIZE; + memcpy(buf, vpd_buf[i], tocopy); + buf += tocopy; + len -= tocopy; + } + } + } +err2: + for (i = 0; i < entries; i++) { + if (vpd_buf[i]) + free_page((unsigned long) vpd_buf[i]); + } + free_page((unsigned long) le); +err1: + kfree(vpd_buf); + return rc; +} + +static int guest_get_irq_info(struct cxl_context *ctx, struct cxl_irq_info *info) +{ + return cxl_h_collect_int_info(ctx->afu->guest->handle, ctx->process_token, info); +} + +static irqreturn_t guest_psl_irq(int irq, void *data) +{ + struct cxl_context *ctx = data; + struct cxl_irq_info irq_info; + int rc; + + pr_devel("%d: received PSL interrupt %i\n", ctx->pe, irq); + rc = guest_get_irq_info(ctx, &irq_info); + if (rc) { + WARN(1, "Unable to get IRQ info: %i\n", rc); + return IRQ_HANDLED; + } + + rc = cxl_irq(irq, ctx, &irq_info); + return rc; +} + +static int afu_read_error_state(struct cxl_afu *afu, int *state_out) +{ + u64 state; + int rc = 0; + + rc = cxl_h_read_error_state(afu->guest->handle, &state); + if (!rc) { + WARN_ON(state != H_STATE_NORMAL && + state != H_STATE_DISABLE && + state != H_STATE_TEMP_UNAVAILABLE && + state != H_STATE_PERM_UNAVAILABLE); + *state_out = state & 0xffffffff; + } + return rc; +} + +static irqreturn_t guest_slice_irq_err(int irq, void *data) +{ + struct cxl_afu *afu = data; + int rc; + u64 serr; + + WARN(irq, "CXL SLICE ERROR interrupt %i\n", irq); + rc = cxl_h_get_fn_error_interrupt(afu->guest->handle, &serr); + if (rc) { + dev_crit(&afu->dev, "Couldn't read PSL_SERR_An: %d\n", rc); + return IRQ_HANDLED; + } + dev_crit(&afu->dev, "PSL_SERR_An: 0x%.16llx\n", serr); + + rc = cxl_h_ack_fn_error_interrupt(afu->guest->handle, serr); + if (rc) + dev_crit(&afu->dev, "Couldn't ack slice error interrupt: %d\n", + rc); + + return IRQ_HANDLED; +} + + +static int irq_alloc_range(struct cxl *adapter, int len, int *irq) +{ + int i, n; + struct irq_avail *cur; + + for (i = 0; i < adapter->guest->irq_nranges; i++) { + cur = &adapter->guest->irq_avail[i]; + n = bitmap_find_next_zero_area(cur->bitmap, cur->range, + 0, len, 0); + if (n < cur->range) { + bitmap_set(cur->bitmap, n, len); + *irq = cur->offset + n; + pr_devel("guest: allocate IRQs %#x->%#x\n", + *irq, *irq + len - 1); + + return 0; + } + } + return -ENOSPC; +} + +static int irq_free_range(struct cxl *adapter, int irq, int len) +{ + int i, n; + struct irq_avail *cur; + + if (len == 0) + return -ENOENT; + + for (i = 0; i < adapter->guest->irq_nranges; i++) { + cur = &adapter->guest->irq_avail[i]; + if (irq >= cur->offset && + (irq + len) <= (cur->offset + cur->range)) { + n = irq - cur->offset; + bitmap_clear(cur->bitmap, n, len); + pr_devel("guest: release IRQs %#x->%#x\n", + irq, irq + len - 1); + return 0; + } + } + return -ENOENT; +} + +static int guest_reset(struct cxl *adapter) +{ + struct cxl_afu *afu = NULL; + int i, rc; + + pr_devel("Adapter reset request\n"); + for (i = 0; i < adapter->slices; i++) { + if ((afu = adapter->afu[i])) { + pci_error_handlers(afu, CXL_ERROR_DETECTED_EVENT, + pci_channel_io_frozen); + cxl_context_detach_all(afu); + } + } + + rc = cxl_h_reset_adapter(adapter->guest->handle); + for (i = 0; i < adapter->slices; i++) { + if (!rc && (afu = adapter->afu[i])) { + pci_error_handlers(afu, CXL_SLOT_RESET_EVENT, + pci_channel_io_normal); + pci_error_handlers(afu, CXL_RESUME_EVENT, 0); + } + } + return rc; +} + +static int guest_alloc_one_irq(struct cxl *adapter) +{ + int irq; + + spin_lock(&adapter->guest->irq_alloc_lock); + if (irq_alloc_range(adapter, 1, &irq)) + irq = -ENOSPC; + spin_unlock(&adapter->guest->irq_alloc_lock); + return irq; +} + +static void guest_release_one_irq(struct cxl *adapter, int irq) +{ + spin_lock(&adapter->guest->irq_alloc_lock); + irq_free_range(adapter, irq, 1); + spin_unlock(&adapter->guest->irq_alloc_lock); +} + +static int guest_alloc_irq_ranges(struct cxl_irq_ranges *irqs, + struct cxl *adapter, unsigned int num) +{ + int i, try, irq; + + memset(irqs, 0, sizeof(struct cxl_irq_ranges)); + + spin_lock(&adapter->guest->irq_alloc_lock); + for (i = 0; i < CXL_IRQ_RANGES && num; i++) { + try = num; + while (try) { + if (irq_alloc_range(adapter, try, &irq) == 0) + break; + try /= 2; + } + if (!try) + goto error; + irqs->offset[i] = irq; + irqs->range[i] = try; + num -= try; + } + if (num) + goto error; + spin_unlock(&adapter->guest->irq_alloc_lock); + return 0; + +error: + for (i = 0; i < CXL_IRQ_RANGES; i++) + irq_free_range(adapter, irqs->offset[i], irqs->range[i]); + spin_unlock(&adapter->guest->irq_alloc_lock); + return -ENOSPC; +} + +static void guest_release_irq_ranges(struct cxl_irq_ranges *irqs, + struct cxl *adapter) +{ + int i; + + spin_lock(&adapter->guest->irq_alloc_lock); + for (i = 0; i < CXL_IRQ_RANGES; i++) + irq_free_range(adapter, irqs->offset[i], irqs->range[i]); + spin_unlock(&adapter->guest->irq_alloc_lock); +} + +static int guest_register_serr_irq(struct cxl_afu *afu) +{ + afu->err_irq_name = kasprintf(GFP_KERNEL, "cxl-%s-err", + dev_name(&afu->dev)); + if (!afu->err_irq_name) + return -ENOMEM; + + if (!(afu->serr_virq = cxl_map_irq(afu->adapter, afu->serr_hwirq, + guest_slice_irq_err, afu, afu->err_irq_name))) { + kfree(afu->err_irq_name); + afu->err_irq_name = NULL; + return -ENOMEM; + } + + return 0; +} + +static void guest_release_serr_irq(struct cxl_afu *afu) +{ + cxl_unmap_irq(afu->serr_virq, afu); + cxl_ops->release_one_irq(afu->adapter, afu->serr_hwirq); + kfree(afu->err_irq_name); +} + +static int guest_ack_irq(struct cxl_context *ctx, u64 tfc, u64 psl_reset_mask) +{ + return cxl_h_control_faults(ctx->afu->guest->handle, ctx->process_token, + tfc >> 32, (psl_reset_mask != 0)); +} + +static void disable_afu_irqs(struct cxl_context *ctx) +{ + irq_hw_number_t hwirq; + unsigned int virq; + int r, i; + + pr_devel("Disabling AFU(%d) interrupts\n", ctx->afu->slice); + for (r = 0; r < CXL_IRQ_RANGES; r++) { + hwirq = ctx->irqs.offset[r]; + for (i = 0; i < ctx->irqs.range[r]; hwirq++, i++) { + virq = irq_find_mapping(NULL, hwirq); + disable_irq(virq); + } + } +} + +static void enable_afu_irqs(struct cxl_context *ctx) +{ + irq_hw_number_t hwirq; + unsigned int virq; + int r, i; + + pr_devel("Enabling AFU(%d) interrupts\n", ctx->afu->slice); + for (r = 0; r < CXL_IRQ_RANGES; r++) { + hwirq = ctx->irqs.offset[r]; + for (i = 0; i < ctx->irqs.range[r]; hwirq++, i++) { + virq = irq_find_mapping(NULL, hwirq); + enable_irq(virq); + } + } +} + +static int _guest_afu_cr_readXX(int sz, struct cxl_afu *afu, int cr_idx, + u64 offset, u64 *val) +{ + unsigned long cr; + char c; + int rc = 0; + + if (afu->crs_len < sz) + return -ENOENT; + + if (unlikely(offset >= afu->crs_len)) + return -ERANGE; + + cr = get_zeroed_page(GFP_KERNEL); + if (!cr) + return -ENOMEM; + + rc = cxl_h_get_config(afu->guest->handle, cr_idx, offset, + virt_to_phys((void *)cr), sz); + if (rc) + goto err; + + switch (sz) { + case 1: + c = *((char *) cr); + *val = c; + break; + case 2: + *val = in_le16((u16 *)cr); + break; + case 4: + *val = in_le32((unsigned *)cr); + break; + case 8: + *val = in_le64((u64 *)cr); + break; + default: + WARN_ON(1); + } +err: + free_page(cr); + return rc; +} + +static int guest_afu_cr_read32(struct cxl_afu *afu, int cr_idx, u64 offset, + u32 *out) +{ + int rc; + u64 val; + + rc = _guest_afu_cr_readXX(4, afu, cr_idx, offset, &val); + if (!rc) + *out = (u32) val; + return rc; +} + +static int guest_afu_cr_read16(struct cxl_afu *afu, int cr_idx, u64 offset, + u16 *out) +{ + int rc; + u64 val; + + rc = _guest_afu_cr_readXX(2, afu, cr_idx, offset, &val); + if (!rc) + *out = (u16) val; + return rc; +} + +static int guest_afu_cr_read8(struct cxl_afu *afu, int cr_idx, u64 offset, + u8 *out) +{ + int rc; + u64 val; + + rc = _guest_afu_cr_readXX(1, afu, cr_idx, offset, &val); + if (!rc) + *out = (u8) val; + return rc; +} + +static int guest_afu_cr_read64(struct cxl_afu *afu, int cr_idx, u64 offset, + u64 *out) +{ + return _guest_afu_cr_readXX(8, afu, cr_idx, offset, out); +} + +static int guest_afu_cr_write32(struct cxl_afu *afu, int cr, u64 off, u32 in) +{ + /* config record is not writable from guest */ + return -EPERM; +} + +static int guest_afu_cr_write16(struct cxl_afu *afu, int cr, u64 off, u16 in) +{ + /* config record is not writable from guest */ + return -EPERM; +} + +static int guest_afu_cr_write8(struct cxl_afu *afu, int cr, u64 off, u8 in) +{ + /* config record is not writable from guest */ + return -EPERM; +} + +static int attach_afu_directed(struct cxl_context *ctx, u64 wed, u64 amr) +{ + struct cxl_process_element_hcall *elem; + struct cxl *adapter = ctx->afu->adapter; + const struct cred *cred; + u32 pid, idx; + int rc, r, i; + u64 mmio_addr, mmio_size; + __be64 flags = 0; + + /* Must be 8 byte aligned and cannot cross a 4096 byte boundary */ + if (!(elem = (struct cxl_process_element_hcall *) + get_zeroed_page(GFP_KERNEL))) + return -ENOMEM; + + elem->version = cpu_to_be64(CXL_PROCESS_ELEMENT_VERSION); + if (ctx->kernel) { + pid = 0; + flags |= CXL_PE_TRANSLATION_ENABLED; + flags |= CXL_PE_PRIVILEGED_PROCESS; + if (mfmsr() & MSR_SF) + flags |= CXL_PE_64_BIT; + } else { + pid = current->pid; + flags |= CXL_PE_PROBLEM_STATE; + flags |= CXL_PE_TRANSLATION_ENABLED; + if (!test_tsk_thread_flag(current, TIF_32BIT)) + flags |= CXL_PE_64_BIT; + cred = get_current_cred(); + if (uid_eq(cred->euid, GLOBAL_ROOT_UID)) + flags |= CXL_PE_PRIVILEGED_PROCESS; + put_cred(cred); + } + elem->flags = cpu_to_be64(flags); + elem->common.tid = cpu_to_be32(0); /* Unused */ + elem->common.pid = cpu_to_be32(pid); + elem->common.csrp = cpu_to_be64(0); /* disable */ + elem->common.aurp0 = cpu_to_be64(0); /* disable */ + elem->common.aurp1 = cpu_to_be64(0); /* disable */ + + cxl_prefault(ctx, wed); + + elem->common.sstp0 = cpu_to_be64(ctx->sstp0); + elem->common.sstp1 = cpu_to_be64(ctx->sstp1); + for (r = 0; r < CXL_IRQ_RANGES; r++) { + for (i = 0; i < ctx->irqs.range[r]; i++) { + if (r == 0 && i == 0) { + elem->pslVirtualIsn = cpu_to_be32(ctx->irqs.offset[0]); + } else { + idx = ctx->irqs.offset[r] + i - adapter->guest->irq_base_offset; + elem->applicationVirtualIsnBitmap[idx / 8] |= 0x80 >> (idx % 8); + } + } + } + elem->common.amr = cpu_to_be64(amr); + elem->common.wed = cpu_to_be64(wed); + + disable_afu_irqs(ctx); + + rc = cxl_h_attach_process(ctx->afu->guest->handle, elem, + &ctx->process_token, &mmio_addr, &mmio_size); + if (rc == H_SUCCESS) { + if (ctx->master || !ctx->afu->pp_psa) { + ctx->psn_phys = ctx->afu->psn_phys; + ctx->psn_size = ctx->afu->adapter->ps_size; + } else { + ctx->psn_phys = mmio_addr; + ctx->psn_size = mmio_size; + } + if (ctx->afu->pp_psa && mmio_size && + ctx->afu->pp_size == 0) { + /* + * There's no property in the device tree to read the + * pp_size. We only find out at the 1st attach. + * Compared to bare-metal, it is too late and we + * should really lock here. However, on powerVM, + * pp_size is really only used to display in /sys. + * Being discussed with pHyp for their next release. + */ + ctx->afu->pp_size = mmio_size; + } + /* from PAPR: process element is bytes 4-7 of process token */ + ctx->external_pe = ctx->process_token & 0xFFFFFFFF; + pr_devel("CXL pe=%i is known as %i for pHyp, mmio_size=%#llx", + ctx->pe, ctx->external_pe, ctx->psn_size); + ctx->pe_inserted = true; + enable_afu_irqs(ctx); + } + + free_page((u64)elem); + return rc; +} + +static int guest_attach_process(struct cxl_context *ctx, bool kernel, u64 wed, u64 amr) +{ + pr_devel("in %s\n", __func__); + + ctx->kernel = kernel; + if (ctx->afu->current_mode == CXL_MODE_DIRECTED) + return attach_afu_directed(ctx, wed, amr); + + /* dedicated mode not supported on FW840 */ + + return -EINVAL; +} + +static int detach_afu_directed(struct cxl_context *ctx) +{ + if (!ctx->pe_inserted) + return 0; + if (cxl_h_detach_process(ctx->afu->guest->handle, ctx->process_token)) + return -1; + return 0; +} + +static int guest_detach_process(struct cxl_context *ctx) +{ + pr_devel("in %s\n", __func__); + trace_cxl_detach(ctx); + + if (!cxl_ops->link_ok(ctx->afu->adapter, ctx->afu)) + return -EIO; + + if (ctx->afu->current_mode == CXL_MODE_DIRECTED) + return detach_afu_directed(ctx); + + return -EINVAL; +} + +static void guest_release_afu(struct device *dev) +{ + struct cxl_afu *afu = to_cxl_afu(dev); + + pr_devel("%s\n", __func__); + + idr_destroy(&afu->contexts_idr); + + kfree(afu->guest); + kfree(afu); +} + +ssize_t cxl_guest_read_afu_vpd(struct cxl_afu *afu, void *buf, size_t len) +{ + return guest_collect_vpd(NULL, afu, buf, len); +} + +#define ERR_BUFF_MAX_COPY_SIZE PAGE_SIZE +static ssize_t guest_afu_read_err_buffer(struct cxl_afu *afu, char *buf, + loff_t off, size_t count) +{ + void *tbuf = NULL; + int rc = 0; + + tbuf = (void *) get_zeroed_page(GFP_KERNEL); + if (!tbuf) + return -ENOMEM; + + rc = cxl_h_get_afu_err(afu->guest->handle, + off & 0x7, + virt_to_phys(tbuf), + count); + if (rc) + goto err; + + if (count > ERR_BUFF_MAX_COPY_SIZE) + count = ERR_BUFF_MAX_COPY_SIZE - (off & 0x7); + memcpy(buf, tbuf, count); +err: + free_page((u64)tbuf); + + return rc; +} + +static int guest_afu_check_and_enable(struct cxl_afu *afu) +{ + return 0; +} + +static bool guest_support_attributes(const char *attr_name, + enum cxl_attrs type) +{ + switch (type) { + case CXL_ADAPTER_ATTRS: + if ((strcmp(attr_name, "base_image") == 0) || + (strcmp(attr_name, "load_image_on_perst") == 0) || + (strcmp(attr_name, "perst_reloads_same_image") == 0) || + (strcmp(attr_name, "image_loaded") == 0)) + return false; + break; + case CXL_AFU_MASTER_ATTRS: + if ((strcmp(attr_name, "pp_mmio_off") == 0)) + return false; + break; + case CXL_AFU_ATTRS: + break; + default: + break; + } + + return true; +} + +static int activate_afu_directed(struct cxl_afu *afu) +{ + int rc; + + dev_info(&afu->dev, "Activating AFU(%d) directed mode\n", afu->slice); + + afu->current_mode = CXL_MODE_DIRECTED; + + afu->num_procs = afu->max_procs_virtualised; + + if ((rc = cxl_chardev_m_afu_add(afu))) + return rc; + + if ((rc = cxl_sysfs_afu_m_add(afu))) + goto err; + + if ((rc = cxl_chardev_s_afu_add(afu))) + goto err1; + + return 0; +err1: + cxl_sysfs_afu_m_remove(afu); +err: + cxl_chardev_afu_remove(afu); + return rc; +} + +static int guest_afu_activate_mode(struct cxl_afu *afu, int mode) +{ + if (!mode) + return 0; + if (!(mode & afu->modes_supported)) + return -EINVAL; + + if (mode == CXL_MODE_DIRECTED) + return activate_afu_directed(afu); + + if (mode == CXL_MODE_DEDICATED) + dev_err(&afu->dev, "Dedicated mode not supported\n"); + + return -EINVAL; +} + +static int deactivate_afu_directed(struct cxl_afu *afu) +{ + dev_info(&afu->dev, "Deactivating AFU(%d) directed mode\n", afu->slice); + + afu->current_mode = 0; + afu->num_procs = 0; + + cxl_sysfs_afu_m_remove(afu); + cxl_chardev_afu_remove(afu); + + cxl_ops->afu_reset(afu); + + return 0; +} + +static int guest_afu_deactivate_mode(struct cxl_afu *afu, int mode) +{ + if (!mode) + return 0; + if (!(mode & afu->modes_supported)) + return -EINVAL; + + if (mode == CXL_MODE_DIRECTED) + return deactivate_afu_directed(afu); + return 0; +} + +static int guest_afu_reset(struct cxl_afu *afu) +{ + pr_devel("AFU(%d) reset request\n", afu->slice); + return cxl_h_reset_afu(afu->guest->handle); +} + +static int guest_map_slice_regs(struct cxl_afu *afu) +{ + if (!(afu->p2n_mmio = ioremap(afu->guest->p2n_phys, afu->guest->p2n_size))) { + dev_err(&afu->dev, "Error mapping AFU(%d) MMIO regions\n", + afu->slice); + return -ENOMEM; + } + return 0; +} + +static void guest_unmap_slice_regs(struct cxl_afu *afu) +{ + if (afu->p2n_mmio) + iounmap(afu->p2n_mmio); +} + +static int afu_update_state(struct cxl_afu *afu) +{ + int rc, cur_state; + + rc = afu_read_error_state(afu, &cur_state); + if (rc) + return rc; + + if (afu->guest->previous_state == cur_state) + return 0; + + pr_devel("AFU(%d) update state to %#x\n", afu->slice, cur_state); + + switch (cur_state) { + case H_STATE_NORMAL: + afu->guest->previous_state = cur_state; + rc = 1; + break; + + case H_STATE_DISABLE: + pci_error_handlers(afu, CXL_ERROR_DETECTED_EVENT, + pci_channel_io_frozen); + + cxl_context_detach_all(afu); + if ((rc = cxl_ops->afu_reset(afu))) + pr_devel("reset hcall failed %d\n", rc); + + rc = afu_read_error_state(afu, &cur_state); + if (!rc && cur_state == H_STATE_NORMAL) { + pci_error_handlers(afu, CXL_SLOT_RESET_EVENT, + pci_channel_io_normal); + pci_error_handlers(afu, CXL_RESUME_EVENT, 0); + rc = 1; + } + afu->guest->previous_state = 0; + break; + + case H_STATE_TEMP_UNAVAILABLE: + afu->guest->previous_state = cur_state; + break; + + case H_STATE_PERM_UNAVAILABLE: + dev_err(&afu->dev, "AFU is in permanent error state\n"); + pci_error_handlers(afu, CXL_ERROR_DETECTED_EVENT, + pci_channel_io_perm_failure); + afu->guest->previous_state = cur_state; + break; + + default: + pr_err("Unexpected AFU(%d) error state: %#x\n", + afu->slice, cur_state); + return -EINVAL; + } + + return rc; +} + +static int afu_do_recovery(struct cxl_afu *afu) +{ + int rc; + + /* many threads can arrive here, in case of detach_all for example. + * Only one needs to drive the recovery + */ + if (mutex_trylock(&afu->guest->recovery_lock)) { + rc = afu_update_state(afu); + mutex_unlock(&afu->guest->recovery_lock); + return rc; + } + return 0; +} + +static bool guest_link_ok(struct cxl *cxl, struct cxl_afu *afu) +{ + int state; + + if (afu) { + if (afu_read_error_state(afu, &state) || + state != H_STATE_NORMAL) { + if (afu_do_recovery(afu) > 0) { + /* check again in case we've just fixed it */ + if (!afu_read_error_state(afu, &state) && + state == H_STATE_NORMAL) + return true; + } + return false; + } + } + + return true; +} + +static int afu_properties_look_ok(struct cxl_afu *afu) +{ + if (afu->pp_irqs < 0) { + dev_err(&afu->dev, "Unexpected per-process minimum interrupt value\n"); + return -EINVAL; + } + + if (afu->max_procs_virtualised < 1) { + dev_err(&afu->dev, "Unexpected max number of processes virtualised value\n"); + return -EINVAL; + } + + if (afu->crs_len < 0) { + dev_err(&afu->dev, "Unexpected configuration record size value\n"); + return -EINVAL; + } + + return 0; +} + +int cxl_guest_init_afu(struct cxl *adapter, int slice, struct device_node *afu_np) +{ + struct cxl_afu *afu; + bool free = true; + int rc; + + pr_devel("in %s - AFU(%d)\n", __func__, slice); + if (!(afu = cxl_alloc_afu(adapter, slice))) + return -ENOMEM; + + if (!(afu->guest = kzalloc(sizeof(struct cxl_afu_guest), GFP_KERNEL))) { + kfree(afu); + return -ENOMEM; + } + + mutex_init(&afu->guest->recovery_lock); + + if ((rc = dev_set_name(&afu->dev, "afu%i.%i", + adapter->adapter_num, + slice))) + goto err1; + + adapter->slices++; + + if ((rc = cxl_of_read_afu_handle(afu, afu_np))) + goto err1; + + if ((rc = cxl_ops->afu_reset(afu))) + goto err1; + + if ((rc = cxl_of_read_afu_properties(afu, afu_np))) + goto err1; + + if ((rc = afu_properties_look_ok(afu))) + goto err1; + + if ((rc = guest_map_slice_regs(afu))) + goto err1; + + if ((rc = guest_register_serr_irq(afu))) + goto err2; + + /* + * After we call this function we must not free the afu directly, even + * if it returns an error! + */ + if ((rc = cxl_register_afu(afu))) + goto err_put1; + + if ((rc = cxl_sysfs_afu_add(afu))) + goto err_put1; + + /* + * pHyp doesn't expose the programming models supported by the + * AFU. pHyp currently only supports directed mode. If it adds + * dedicated mode later, this version of cxl has no way to + * detect it. So we'll initialize the driver, but the first + * attach will fail. + * Being discussed with pHyp to do better (likely new property) + */ + if (afu->max_procs_virtualised == 1) + afu->modes_supported = CXL_MODE_DEDICATED; + else + afu->modes_supported = CXL_MODE_DIRECTED; + + if ((rc = cxl_afu_select_best_mode(afu))) + goto err_put2; + + adapter->afu[afu->slice] = afu; + + afu->enabled = true; + + if ((rc = cxl_pci_vphb_add(afu))) + dev_info(&afu->dev, "Can't register vPHB\n"); + + return 0; + +err_put2: + cxl_sysfs_afu_remove(afu); +err_put1: + device_unregister(&afu->dev); + free = false; + guest_release_serr_irq(afu); +err2: + guest_unmap_slice_regs(afu); +err1: + if (free) { + kfree(afu->guest); + kfree(afu); + } + return rc; +} + +void cxl_guest_remove_afu(struct cxl_afu *afu) +{ + pr_devel("in %s - AFU(%d)\n", __func__, afu->slice); + + if (!afu) + return; + + cxl_pci_vphb_remove(afu); + cxl_sysfs_afu_remove(afu); + + spin_lock(&afu->adapter->afu_list_lock); + afu->adapter->afu[afu->slice] = NULL; + spin_unlock(&afu->adapter->afu_list_lock); + + cxl_context_detach_all(afu); + cxl_ops->afu_deactivate_mode(afu, afu->current_mode); + guest_release_serr_irq(afu); + guest_unmap_slice_regs(afu); + + device_unregister(&afu->dev); +} + +static void free_adapter(struct cxl *adapter) +{ + struct irq_avail *cur; + int i; + + if (adapter->guest->irq_avail) { + for (i = 0; i < adapter->guest->irq_nranges; i++) { + cur = &adapter->guest->irq_avail[i]; + kfree(cur->bitmap); + } + kfree(adapter->guest->irq_avail); + } + kfree(adapter->guest->status); + cxl_remove_adapter_nr(adapter); + kfree(adapter->guest); + kfree(adapter); +} + +static int properties_look_ok(struct cxl *adapter) +{ + /* The absence of this property means that the operational + * status is unknown or okay + */ + if (strlen(adapter->guest->status) && + strcmp(adapter->guest->status, "okay")) { + pr_err("ABORTING:Bad operational status of the device\n"); + return -EINVAL; + } + + return 0; +} + +ssize_t cxl_guest_read_adapter_vpd(struct cxl *adapter, void *buf, size_t len) +{ + return guest_collect_vpd(adapter, NULL, buf, len); +} + +void cxl_guest_remove_adapter(struct cxl *adapter) +{ + pr_devel("in %s\n", __func__); + + cxl_sysfs_adapter_remove(adapter); + + cxl_guest_remove_chardev(adapter); + device_unregister(&adapter->dev); +} + +static void release_adapter(struct device *dev) +{ + free_adapter(to_cxl_adapter(dev)); +} + +struct cxl *cxl_guest_init_adapter(struct device_node *np, struct platform_device *pdev) +{ + struct cxl *adapter; + bool free = true; + int rc; + + if (!(adapter = cxl_alloc_adapter())) + return ERR_PTR(-ENOMEM); + + if (!(adapter->guest = kzalloc(sizeof(struct cxl_guest), GFP_KERNEL))) { + free_adapter(adapter); + return ERR_PTR(-ENOMEM); + } + + adapter->slices = 0; + adapter->guest->pdev = pdev; + adapter->dev.parent = &pdev->dev; + adapter->dev.release = release_adapter; + dev_set_drvdata(&pdev->dev, adapter); + + if ((rc = cxl_of_read_adapter_handle(adapter, np))) + goto err1; + + if ((rc = cxl_of_read_adapter_properties(adapter, np))) + goto err1; + + if ((rc = properties_look_ok(adapter))) + goto err1; + + if ((rc = cxl_guest_add_chardev(adapter))) + goto err1; + + /* + * After we call this function we must not free the adapter directly, + * even if it returns an error! + */ + if ((rc = cxl_register_adapter(adapter))) + goto err_put1; + + if ((rc = cxl_sysfs_adapter_add(adapter))) + goto err_put1; + + return adapter; + +err_put1: + device_unregister(&adapter->dev); + free = false; + cxl_guest_remove_chardev(adapter); +err1: + if (free) + free_adapter(adapter); + return ERR_PTR(rc); +} + +void cxl_guest_reload_module(struct cxl *adapter) +{ + struct platform_device *pdev; + + pdev = adapter->guest->pdev; + cxl_guest_remove_adapter(adapter); + + cxl_of_probe(pdev); +} + +const struct cxl_backend_ops cxl_guest_ops = { + .module = THIS_MODULE, + .adapter_reset = guest_reset, + .alloc_one_irq = guest_alloc_one_irq, + .release_one_irq = guest_release_one_irq, + .alloc_irq_ranges = guest_alloc_irq_ranges, + .release_irq_ranges = guest_release_irq_ranges, + .setup_irq = NULL, + .handle_psl_slice_error = guest_handle_psl_slice_error, + .psl_interrupt = guest_psl_irq, + .ack_irq = guest_ack_irq, + .attach_process = guest_attach_process, + .detach_process = guest_detach_process, + .support_attributes = guest_support_attributes, + .link_ok = guest_link_ok, + .release_afu = guest_release_afu, + .afu_read_err_buffer = guest_afu_read_err_buffer, + .afu_check_and_enable = guest_afu_check_and_enable, + .afu_activate_mode = guest_afu_activate_mode, + .afu_deactivate_mode = guest_afu_deactivate_mode, + .afu_reset = guest_afu_reset, + .afu_cr_read8 = guest_afu_cr_read8, + .afu_cr_read16 = guest_afu_cr_read16, + .afu_cr_read32 = guest_afu_cr_read32, + .afu_cr_read64 = guest_afu_cr_read64, + .afu_cr_write8 = guest_afu_cr_write8, + .afu_cr_write16 = guest_afu_cr_write16, + .afu_cr_write32 = guest_afu_cr_write32, + .read_adapter_vpd = cxl_guest_read_adapter_vpd, +}; diff --git a/drivers/misc/cxl/hcalls.c b/drivers/misc/cxl/hcalls.c new file mode 100644 index 000000000000..d6d11f4056d7 --- /dev/null +++ b/drivers/misc/cxl/hcalls.c @@ -0,0 +1,647 @@ +/* + * Copyright 2015 IBM Corp. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + + +#include <linux/compiler.h> +#include <linux/types.h> +#include <linux/delay.h> +#include <asm/byteorder.h> +#include "hcalls.h" +#include "trace.h" + +#define CXL_HCALL_TIMEOUT 60000 +#define CXL_HCALL_TIMEOUT_DOWNLOAD 120000 + +#define H_ATTACH_CA_PROCESS 0x344 +#define H_CONTROL_CA_FUNCTION 0x348 +#define H_DETACH_CA_PROCESS 0x34C +#define H_COLLECT_CA_INT_INFO 0x350 +#define H_CONTROL_CA_FAULTS 0x354 +#define H_DOWNLOAD_CA_FUNCTION 0x35C +#define H_DOWNLOAD_CA_FACILITY 0x364 +#define H_CONTROL_CA_FACILITY 0x368 + +#define H_CONTROL_CA_FUNCTION_RESET 1 /* perform a reset */ +#define H_CONTROL_CA_FUNCTION_SUSPEND_PROCESS 2 /* suspend a process from being executed */ +#define H_CONTROL_CA_FUNCTION_RESUME_PROCESS 3 /* resume a process to be executed */ +#define H_CONTROL_CA_FUNCTION_READ_ERR_STATE 4 /* read the error state */ +#define H_CONTROL_CA_FUNCTION_GET_AFU_ERR 5 /* collect the AFU error buffer */ +#define H_CONTROL_CA_FUNCTION_GET_CONFIG 6 /* collect configuration record */ +#define H_CONTROL_CA_FUNCTION_GET_DOWNLOAD_STATE 7 /* query to return download status */ +#define H_CONTROL_CA_FUNCTION_TERMINATE_PROCESS 8 /* terminate the process before completion */ +#define H_CONTROL_CA_FUNCTION_COLLECT_VPD 9 /* collect VPD */ +#define H_CONTROL_CA_FUNCTION_GET_FUNCTION_ERR_INT 11 /* read the function-wide error data based on an interrupt */ +#define H_CONTROL_CA_FUNCTION_ACK_FUNCTION_ERR_INT 12 /* acknowledge function-wide error data based on an interrupt */ +#define H_CONTROL_CA_FUNCTION_GET_ERROR_LOG 13 /* retrieve the Platform Log ID (PLID) of an error log */ + +#define H_CONTROL_CA_FAULTS_RESPOND_PSL 1 +#define H_CONTROL_CA_FAULTS_RESPOND_AFU 2 + +#define H_CONTROL_CA_FACILITY_RESET 1 /* perform a reset */ +#define H_CONTROL_CA_FACILITY_COLLECT_VPD 2 /* collect VPD */ + +#define H_DOWNLOAD_CA_FACILITY_DOWNLOAD 1 /* download adapter image */ +#define H_DOWNLOAD_CA_FACILITY_VALIDATE 2 /* validate adapter image */ + + +#define _CXL_LOOP_HCALL(call, rc, retbuf, fn, ...) \ + { \ + unsigned int delay, total_delay = 0; \ + u64 token = 0; \ + \ + memset(retbuf, 0, sizeof(retbuf)); \ + while (1) { \ + rc = call(fn, retbuf, __VA_ARGS__, token); \ + token = retbuf[0]; \ + if (rc != H_BUSY && !H_IS_LONG_BUSY(rc)) \ + break; \ + \ + if (rc == H_BUSY) \ + delay = 10; \ + else \ + delay = get_longbusy_msecs(rc); \ + \ + total_delay += delay; \ + if (total_delay > CXL_HCALL_TIMEOUT) { \ + WARN(1, "Warning: Giving up waiting for CXL hcall " \ + "%#x after %u msec\n", fn, total_delay); \ + rc = H_BUSY; \ + break; \ + } \ + msleep(delay); \ + } \ + } +#define CXL_H_WAIT_UNTIL_DONE(...) _CXL_LOOP_HCALL(plpar_hcall, __VA_ARGS__) +#define CXL_H9_WAIT_UNTIL_DONE(...) _CXL_LOOP_HCALL(plpar_hcall9, __VA_ARGS__) + +#define _PRINT_MSG(rc, format, ...) \ + { \ + if ((rc != H_SUCCESS) && (rc != H_CONTINUE)) \ + pr_err(format, __VA_ARGS__); \ + else \ + pr_devel(format, __VA_ARGS__); \ + } \ + + +static char *afu_op_names[] = { + "UNKNOWN_OP", /* 0 undefined */ + "RESET", /* 1 */ + "SUSPEND_PROCESS", /* 2 */ + "RESUME_PROCESS", /* 3 */ + "READ_ERR_STATE", /* 4 */ + "GET_AFU_ERR", /* 5 */ + "GET_CONFIG", /* 6 */ + "GET_DOWNLOAD_STATE", /* 7 */ + "TERMINATE_PROCESS", /* 8 */ + "COLLECT_VPD", /* 9 */ + "UNKNOWN_OP", /* 10 undefined */ + "GET_FUNCTION_ERR_INT", /* 11 */ + "ACK_FUNCTION_ERR_INT", /* 12 */ + "GET_ERROR_LOG", /* 13 */ +}; + +static char *control_adapter_op_names[] = { + "UNKNOWN_OP", /* 0 undefined */ + "RESET", /* 1 */ + "COLLECT_VPD", /* 2 */ +}; + +static char *download_op_names[] = { + "UNKNOWN_OP", /* 0 undefined */ + "DOWNLOAD", /* 1 */ + "VALIDATE", /* 2 */ +}; + +static char *op_str(unsigned int op, char *name_array[], int array_len) +{ + if (op >= array_len) + return "UNKNOWN_OP"; + return name_array[op]; +} + +#define OP_STR(op, name_array) op_str(op, name_array, ARRAY_SIZE(name_array)) + +#define OP_STR_AFU(op) OP_STR(op, afu_op_names) +#define OP_STR_CONTROL_ADAPTER(op) OP_STR(op, control_adapter_op_names) +#define OP_STR_DOWNLOAD_ADAPTER(op) OP_STR(op, download_op_names) + + +long cxl_h_attach_process(u64 unit_address, + struct cxl_process_element_hcall *element, + u64 *process_token, u64 *mmio_addr, u64 *mmio_size) +{ + unsigned long retbuf[PLPAR_HCALL_BUFSIZE]; + long rc; + + CXL_H_WAIT_UNTIL_DONE(rc, retbuf, H_ATTACH_CA_PROCESS, unit_address, virt_to_phys(element)); + _PRINT_MSG(rc, "cxl_h_attach_process(%#.16llx, %#.16lx): %li\n", + unit_address, virt_to_phys(element), rc); + trace_cxl_hcall_attach(unit_address, virt_to_phys(element), retbuf[0], retbuf[1], retbuf[2], rc); + + pr_devel("token: 0x%.8lx mmio_addr: 0x%lx mmio_size: 0x%lx\nProcess Element Structure:\n", + retbuf[0], retbuf[1], retbuf[2]); + cxl_dump_debug_buffer(element, sizeof(*element)); + + switch (rc) { + case H_SUCCESS: /* The process info is attached to the coherent platform function */ + *process_token = retbuf[0]; + if (mmio_addr) + *mmio_addr = retbuf[1]; + if (mmio_size) + *mmio_size = retbuf[2]; + return 0; + case H_PARAMETER: /* An incorrect parameter was supplied. */ + case H_FUNCTION: /* The function is not supported. */ + return -EINVAL; + case H_AUTHORITY: /* The partition does not have authority to perform this hcall */ + case H_RESOURCE: /* The coherent platform function does not have enough additional resource to attach the process */ + case H_HARDWARE: /* A hardware event prevented the attach operation */ + case H_STATE: /* The coherent platform function is not in a valid state */ + case H_BUSY: + return -EBUSY; + default: + WARN(1, "Unexpected return code: %lx", rc); + return -EINVAL; + } +} + +/** + * cxl_h_detach_process - Detach a process element from a coherent + * platform function. + */ +long cxl_h_detach_process(u64 unit_address, u64 process_token) +{ + unsigned long retbuf[PLPAR_HCALL_BUFSIZE]; + long rc; + + CXL_H_WAIT_UNTIL_DONE(rc, retbuf, H_DETACH_CA_PROCESS, unit_address, process_token); + _PRINT_MSG(rc, "cxl_h_detach_process(%#.16llx, 0x%.8llx): %li\n", unit_address, process_token, rc); + trace_cxl_hcall_detach(unit_address, process_token, rc); + + switch (rc) { + case H_SUCCESS: /* The process was detached from the coherent platform function */ + return 0; + case H_PARAMETER: /* An incorrect parameter was supplied. */ + return -EINVAL; + case H_AUTHORITY: /* The partition does not have authority to perform this hcall */ + case H_RESOURCE: /* The function has page table mappings for MMIO */ + case H_HARDWARE: /* A hardware event prevented the detach operation */ + case H_STATE: /* The coherent platform function is not in a valid state */ + case H_BUSY: + return -EBUSY; + default: + WARN(1, "Unexpected return code: %lx", rc); + return -EINVAL; + } +} + +/** + * cxl_h_control_function - This H_CONTROL_CA_FUNCTION hypervisor call allows + * the partition to manipulate or query + * certain coherent platform function behaviors. + */ +static long cxl_h_control_function(u64 unit_address, u64 op, + u64 p1, u64 p2, u64 p3, u64 p4, u64 *out) +{ + unsigned long retbuf[PLPAR_HCALL9_BUFSIZE]; + long rc; + + CXL_H9_WAIT_UNTIL_DONE(rc, retbuf, H_CONTROL_CA_FUNCTION, unit_address, op, p1, p2, p3, p4); + _PRINT_MSG(rc, "cxl_h_control_function(%#.16llx, %s(%#llx, %#llx, %#llx, %#llx, R4: %#lx)): %li\n", + unit_address, OP_STR_AFU(op), p1, p2, p3, p4, retbuf[0], rc); + trace_cxl_hcall_control_function(unit_address, OP_STR_AFU(op), p1, p2, p3, p4, retbuf[0], rc); + + switch (rc) { + case H_SUCCESS: /* The operation is completed for the coherent platform function */ + if ((op == H_CONTROL_CA_FUNCTION_GET_FUNCTION_ERR_INT || + op == H_CONTROL_CA_FUNCTION_READ_ERR_STATE || + op == H_CONTROL_CA_FUNCTION_COLLECT_VPD)) + *out = retbuf[0]; + return 0; + case H_PARAMETER: /* An incorrect parameter was supplied. */ + case H_FUNCTION: /* The function is not supported. */ + case H_NOT_FOUND: /* The operation supplied was not valid */ + case H_NOT_AVAILABLE: /* The operation cannot be performed because the AFU has not been downloaded */ + case H_SG_LIST: /* An block list entry was invalid */ + return -EINVAL; + case H_AUTHORITY: /* The partition does not have authority to perform this hcall */ + case H_RESOURCE: /* The function has page table mappings for MMIO */ + case H_HARDWARE: /* A hardware event prevented the attach operation */ + case H_STATE: /* The coherent platform function is not in a valid state */ + case H_BUSY: + return -EBUSY; + default: + WARN(1, "Unexpected return code: %lx", rc); + return -EINVAL; + } +} + +/** + * cxl_h_reset_afu - Perform a reset to the coherent platform function. + */ +long cxl_h_reset_afu(u64 unit_address) +{ + return cxl_h_control_function(unit_address, + H_CONTROL_CA_FUNCTION_RESET, + 0, 0, 0, 0, + NULL); +} + +/** + * cxl_h_suspend_process - Suspend a process from being executed + * Parameter1 = process-token as returned from H_ATTACH_CA_PROCESS when + * process was attached. + */ +long cxl_h_suspend_process(u64 unit_address, u64 process_token) +{ + return cxl_h_control_function(unit_address, + H_CONTROL_CA_FUNCTION_SUSPEND_PROCESS, + process_token, 0, 0, 0, + NULL); +} + +/** + * cxl_h_resume_process - Resume a process to be executed + * Parameter1 = process-token as returned from H_ATTACH_CA_PROCESS when + * process was attached. + */ +long cxl_h_resume_process(u64 unit_address, u64 process_token) +{ + return cxl_h_control_function(unit_address, + H_CONTROL_CA_FUNCTION_RESUME_PROCESS, + process_token, 0, 0, 0, + NULL); +} + +/** + * cxl_h_read_error_state - Checks the error state of the coherent + * platform function. + * R4 contains the error state + */ +long cxl_h_read_error_state(u64 unit_address, u64 *state) +{ + return cxl_h_control_function(unit_address, + H_CONTROL_CA_FUNCTION_READ_ERR_STATE, + 0, 0, 0, 0, + state); +} + +/** + * cxl_h_get_afu_err - collect the AFU error buffer + * Parameter1 = byte offset into error buffer to retrieve, valid values + * are between 0 and (ibm,error-buffer-size - 1) + * Parameter2 = 4K aligned real address of error buffer, to be filled in + * Parameter3 = length of error buffer, valid values are 4K or less + */ +long cxl_h_get_afu_err(u64 unit_address, u64 offset, + u64 buf_address, u64 len) +{ + return cxl_h_control_function(unit_address, + H_CONTROL_CA_FUNCTION_GET_AFU_ERR, + offset, buf_address, len, 0, + NULL); +} + +/** + * cxl_h_get_config - collect configuration record for the + * coherent platform function + * Parameter1 = # of configuration record to retrieve, valid values are + * between 0 and (ibm,#config-records - 1) + * Parameter2 = byte offset into configuration record to retrieve, + * valid values are between 0 and (ibm,config-record-size - 1) + * Parameter3 = 4K aligned real address of configuration record buffer, + * to be filled in + * Parameter4 = length of configuration buffer, valid values are 4K or less + */ +long cxl_h_get_config(u64 unit_address, u64 cr_num, u64 offset, + u64 buf_address, u64 len) +{ + return cxl_h_control_function(unit_address, + H_CONTROL_CA_FUNCTION_GET_CONFIG, + cr_num, offset, buf_address, len, + NULL); +} + +/** + * cxl_h_terminate_process - Terminate the process before completion + * Parameter1 = process-token as returned from H_ATTACH_CA_PROCESS when + * process was attached. + */ +long cxl_h_terminate_process(u64 unit_address, u64 process_token) +{ + return cxl_h_control_function(unit_address, + H_CONTROL_CA_FUNCTION_TERMINATE_PROCESS, + process_token, 0, 0, 0, + NULL); +} + +/** + * cxl_h_collect_vpd - Collect VPD for the coherent platform function. + * Parameter1 = # of VPD record to retrieve, valid values are between 0 + * and (ibm,#config-records - 1). + * Parameter2 = 4K naturally aligned real buffer containing block + * list entries + * Parameter3 = number of block list entries in the block list, valid + * values are between 0 and 256 + */ +long cxl_h_collect_vpd(u64 unit_address, u64 record, u64 list_address, + u64 num, u64 *out) +{ + return cxl_h_control_function(unit_address, + H_CONTROL_CA_FUNCTION_COLLECT_VPD, + record, list_address, num, 0, + out); +} + +/** + * cxl_h_get_fn_error_interrupt - Read the function-wide error data based on an interrupt + */ +long cxl_h_get_fn_error_interrupt(u64 unit_address, u64 *reg) +{ + return cxl_h_control_function(unit_address, + H_CONTROL_CA_FUNCTION_GET_FUNCTION_ERR_INT, + 0, 0, 0, 0, reg); +} + +/** + * cxl_h_ack_fn_error_interrupt - Acknowledge function-wide error data + * based on an interrupt + * Parameter1 = value to write to the function-wide error interrupt register + */ +long cxl_h_ack_fn_error_interrupt(u64 unit_address, u64 value) +{ + return cxl_h_control_function(unit_address, + H_CONTROL_CA_FUNCTION_ACK_FUNCTION_ERR_INT, + value, 0, 0, 0, + NULL); +} + +/** + * cxl_h_get_error_log - Retrieve the Platform Log ID (PLID) of + * an error log + */ +long cxl_h_get_error_log(u64 unit_address, u64 value) +{ + return cxl_h_control_function(unit_address, + H_CONTROL_CA_FUNCTION_GET_ERROR_LOG, + 0, 0, 0, 0, + NULL); +} + +/** + * cxl_h_collect_int_info - Collect interrupt info about a coherent + * platform function after an interrupt occurred. + */ +long cxl_h_collect_int_info(u64 unit_address, u64 process_token, + struct cxl_irq_info *info) +{ + long rc; + + BUG_ON(sizeof(*info) != sizeof(unsigned long[PLPAR_HCALL9_BUFSIZE])); + + rc = plpar_hcall9(H_COLLECT_CA_INT_INFO, (unsigned long *) info, + unit_address, process_token); + _PRINT_MSG(rc, "cxl_h_collect_int_info(%#.16llx, 0x%llx): %li\n", + unit_address, process_token, rc); + trace_cxl_hcall_collect_int_info(unit_address, process_token, rc); + + switch (rc) { + case H_SUCCESS: /* The interrupt info is returned in return registers. */ + pr_devel("dsisr:%#llx, dar:%#llx, dsr:%#llx, pid:%u, tid:%u, afu_err:%#llx, errstat:%#llx\n", + info->dsisr, info->dar, info->dsr, info->pid, + info->tid, info->afu_err, info->errstat); + return 0; + case H_PARAMETER: /* An incorrect parameter was supplied. */ + return -EINVAL; + case H_AUTHORITY: /* The partition does not have authority to perform this hcall. */ + case H_HARDWARE: /* A hardware event prevented the collection of the interrupt info.*/ + case H_STATE: /* The coherent platform function is not in a valid state to collect interrupt info. */ + return -EBUSY; + default: + WARN(1, "Unexpected return code: %lx", rc); + return -EINVAL; + } +} + +/** + * cxl_h_control_faults - Control the operation of a coherent platform + * function after a fault occurs. + * + * Parameters + * control-mask: value to control the faults + * looks like PSL_TFC_An shifted >> 32 + * reset-mask: mask to control reset of function faults + * Set reset_mask = 1 to reset PSL errors + */ +long cxl_h_control_faults(u64 unit_address, u64 process_token, + u64 control_mask, u64 reset_mask) +{ + unsigned long retbuf[PLPAR_HCALL_BUFSIZE]; + long rc; + + memset(retbuf, 0, sizeof(retbuf)); + + rc = plpar_hcall(H_CONTROL_CA_FAULTS, retbuf, unit_address, + H_CONTROL_CA_FAULTS_RESPOND_PSL, process_token, + control_mask, reset_mask); + _PRINT_MSG(rc, "cxl_h_control_faults(%#.16llx, 0x%llx, %#llx, %#llx): %li (%#lx)\n", + unit_address, process_token, control_mask, reset_mask, + rc, retbuf[0]); + trace_cxl_hcall_control_faults(unit_address, process_token, + control_mask, reset_mask, retbuf[0], rc); + + switch (rc) { + case H_SUCCESS: /* Faults were successfully controlled for the function. */ + return 0; + case H_PARAMETER: /* An incorrect parameter was supplied. */ + return -EINVAL; + case H_HARDWARE: /* A hardware event prevented the control of faults. */ + case H_STATE: /* The function was in an invalid state. */ + case H_AUTHORITY: /* The partition does not have authority to perform this hcall; the coherent platform facilities may need to be licensed. */ + return -EBUSY; + case H_FUNCTION: /* The function is not supported */ + case H_NOT_FOUND: /* The operation supplied was not valid */ + return -EINVAL; + default: + WARN(1, "Unexpected return code: %lx", rc); + return -EINVAL; + } +} + +/** + * cxl_h_control_facility - This H_CONTROL_CA_FACILITY hypervisor call + * allows the partition to manipulate or query + * certain coherent platform facility behaviors. + */ +static long cxl_h_control_facility(u64 unit_address, u64 op, + u64 p1, u64 p2, u64 p3, u64 p4, u64 *out) +{ + unsigned long retbuf[PLPAR_HCALL9_BUFSIZE]; + long rc; + + CXL_H9_WAIT_UNTIL_DONE(rc, retbuf, H_CONTROL_CA_FACILITY, unit_address, op, p1, p2, p3, p4); + _PRINT_MSG(rc, "cxl_h_control_facility(%#.16llx, %s(%#llx, %#llx, %#llx, %#llx, R4: %#lx)): %li\n", + unit_address, OP_STR_CONTROL_ADAPTER(op), p1, p2, p3, p4, retbuf[0], rc); + trace_cxl_hcall_control_facility(unit_address, OP_STR_CONTROL_ADAPTER(op), p1, p2, p3, p4, retbuf[0], rc); + + switch (rc) { + case H_SUCCESS: /* The operation is completed for the coherent platform facility */ + if (op == H_CONTROL_CA_FACILITY_COLLECT_VPD) + *out = retbuf[0]; + return 0; + case H_PARAMETER: /* An incorrect parameter was supplied. */ + case H_FUNCTION: /* The function is not supported. */ + case H_NOT_FOUND: /* The operation supplied was not valid */ + case H_NOT_AVAILABLE: /* The operation cannot be performed because the AFU has not been downloaded */ + case H_SG_LIST: /* An block list entry was invalid */ + return -EINVAL; + case H_AUTHORITY: /* The partition does not have authority to perform this hcall */ + case H_RESOURCE: /* The function has page table mappings for MMIO */ + case H_HARDWARE: /* A hardware event prevented the attach operation */ + case H_STATE: /* The coherent platform facility is not in a valid state */ + case H_BUSY: + return -EBUSY; + default: + WARN(1, "Unexpected return code: %lx", rc); + return -EINVAL; + } +} + +/** + * cxl_h_reset_adapter - Perform a reset to the coherent platform facility. + */ +long cxl_h_reset_adapter(u64 unit_address) +{ + return cxl_h_control_facility(unit_address, + H_CONTROL_CA_FACILITY_RESET, + 0, 0, 0, 0, + NULL); +} + +/** + * cxl_h_collect_vpd - Collect VPD for the coherent platform function. + * Parameter1 = 4K naturally aligned real buffer containing block + * list entries + * Parameter2 = number of block list entries in the block list, valid + * values are between 0 and 256 + */ +long cxl_h_collect_vpd_adapter(u64 unit_address, u64 list_address, + u64 num, u64 *out) +{ + return cxl_h_control_facility(unit_address, + H_CONTROL_CA_FACILITY_COLLECT_VPD, + list_address, num, 0, 0, + out); +} + +/** + * cxl_h_download_facility - This H_DOWNLOAD_CA_FACILITY + * hypervisor call provide platform support for + * downloading a base adapter image to the coherent + * platform facility, and for validating the entire + * image after the download. + * Parameters + * op: operation to perform to the coherent platform function + * Download: operation = 1, the base image in the coherent platform + * facility is first erased, and then + * programmed using the image supplied + * in the scatter/gather list. + * Validate: operation = 2, the base image in the coherent platform + * facility is compared with the image + * supplied in the scatter/gather list. + * list_address: 4K naturally aligned real buffer containing + * scatter/gather list entries. + * num: number of block list entries in the scatter/gather list. + */ +static long cxl_h_download_facility(u64 unit_address, u64 op, + u64 list_address, u64 num, + u64 *out) +{ + unsigned long retbuf[PLPAR_HCALL_BUFSIZE]; + unsigned int delay, total_delay = 0; + u64 token = 0; + long rc; + + if (*out != 0) + token = *out; + + memset(retbuf, 0, sizeof(retbuf)); + while (1) { + rc = plpar_hcall(H_DOWNLOAD_CA_FACILITY, retbuf, + unit_address, op, list_address, num, + token); + token = retbuf[0]; + if (rc != H_BUSY && !H_IS_LONG_BUSY(rc)) + break; + + if (rc != H_BUSY) { + delay = get_longbusy_msecs(rc); + total_delay += delay; + if (total_delay > CXL_HCALL_TIMEOUT_DOWNLOAD) { + WARN(1, "Warning: Giving up waiting for CXL hcall " + "%#x after %u msec\n", + H_DOWNLOAD_CA_FACILITY, total_delay); + rc = H_BUSY; + break; + } + msleep(delay); + } + } + _PRINT_MSG(rc, "cxl_h_download_facility(%#.16llx, %s(%#llx, %#llx), %#lx): %li\n", + unit_address, OP_STR_DOWNLOAD_ADAPTER(op), list_address, num, retbuf[0], rc); + trace_cxl_hcall_download_facility(unit_address, OP_STR_DOWNLOAD_ADAPTER(op), list_address, num, retbuf[0], rc); + + switch (rc) { + case H_SUCCESS: /* The operation is completed for the coherent platform facility */ + return 0; + case H_PARAMETER: /* An incorrect parameter was supplied */ + case H_FUNCTION: /* The function is not supported. */ + case H_SG_LIST: /* An block list entry was invalid */ + case H_BAD_DATA: /* Image verification failed */ + return -EINVAL; + case H_AUTHORITY: /* The partition does not have authority to perform this hcall */ + case H_RESOURCE: /* The function has page table mappings for MMIO */ + case H_HARDWARE: /* A hardware event prevented the attach operation */ + case H_STATE: /* The coherent platform facility is not in a valid state */ + case H_BUSY: + return -EBUSY; + case H_CONTINUE: + *out = retbuf[0]; + return 1; /* More data is needed for the complete image */ + default: + WARN(1, "Unexpected return code: %lx", rc); + return -EINVAL; + } +} + +/** + * cxl_h_download_adapter_image - Download the base image to the coherent + * platform facility. + */ +long cxl_h_download_adapter_image(u64 unit_address, + u64 list_address, u64 num, + u64 *out) +{ + return cxl_h_download_facility(unit_address, + H_DOWNLOAD_CA_FACILITY_DOWNLOAD, + list_address, num, out); +} + +/** + * cxl_h_validate_adapter_image - Validate the base image in the coherent + * platform facility. + */ +long cxl_h_validate_adapter_image(u64 unit_address, + u64 list_address, u64 num, + u64 *out) +{ + return cxl_h_download_facility(unit_address, + H_DOWNLOAD_CA_FACILITY_VALIDATE, + list_address, num, out); +} diff --git a/drivers/misc/cxl/hcalls.h b/drivers/misc/cxl/hcalls.h new file mode 100644 index 000000000000..3e25522a5df6 --- /dev/null +++ b/drivers/misc/cxl/hcalls.h @@ -0,0 +1,204 @@ +/* + * Copyright 2015 IBM Corp. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#ifndef _HCALLS_H +#define _HCALLS_H + +#include <linux/types.h> +#include <asm/byteorder.h> +#include <asm/hvcall.h> +#include "cxl.h" + +#define SG_BUFFER_SIZE 4096 +#define SG_MAX_ENTRIES 256 + +struct sg_list { + u64 phys_addr; + u64 len; +}; + +/* + * This is straight out of PAPR, but replacing some of the compound fields with + * a single field, where they were identical to the register layout. + * + * The 'flags' parameter regroups the various bit-fields + */ +#define CXL_PE_CSRP_VALID (1ULL << 63) +#define CXL_PE_PROBLEM_STATE (1ULL << 62) +#define CXL_PE_SECONDARY_SEGMENT_TBL_SRCH (1ULL << 61) +#define CXL_PE_TAGS_ACTIVE (1ULL << 60) +#define CXL_PE_USER_STATE (1ULL << 59) +#define CXL_PE_TRANSLATION_ENABLED (1ULL << 58) +#define CXL_PE_64_BIT (1ULL << 57) +#define CXL_PE_PRIVILEGED_PROCESS (1ULL << 56) + +#define CXL_PROCESS_ELEMENT_VERSION 1 +struct cxl_process_element_hcall { + __be64 version; + __be64 flags; + u8 reserved0[12]; + __be32 pslVirtualIsn; + u8 applicationVirtualIsnBitmap[256]; + u8 reserved1[144]; + struct cxl_process_element_common common; + u8 reserved4[12]; +} __packed; + +#define H_STATE_NORMAL 1 +#define H_STATE_DISABLE 2 +#define H_STATE_TEMP_UNAVAILABLE 3 +#define H_STATE_PERM_UNAVAILABLE 4 + +/* NOTE: element must be a logical real address, and must be pinned */ +long cxl_h_attach_process(u64 unit_address, struct cxl_process_element_hcall *element, + u64 *process_token, u64 *mmio_addr, u64 *mmio_size); + +/** + * cxl_h_detach_process - Detach a process element from a coherent + * platform function. + */ +long cxl_h_detach_process(u64 unit_address, u64 process_token); + +/** + * cxl_h_reset_afu - Perform a reset to the coherent platform function. + */ +long cxl_h_reset_afu(u64 unit_address); + +/** + * cxl_h_suspend_process - Suspend a process from being executed + * Parameter1 = process-token as returned from H_ATTACH_CA_PROCESS when + * process was attached. + */ +long cxl_h_suspend_process(u64 unit_address, u64 process_token); + +/** + * cxl_h_resume_process - Resume a process to be executed + * Parameter1 = process-token as returned from H_ATTACH_CA_PROCESS when + * process was attached. + */ +long cxl_h_resume_process(u64 unit_address, u64 process_token); + +/** + * cxl_h_read_error_state - Reads the error state of the coherent + * platform function. + * R4 contains the error state + */ +long cxl_h_read_error_state(u64 unit_address, u64 *state); + +/** + * cxl_h_get_afu_err - collect the AFU error buffer + * Parameter1 = byte offset into error buffer to retrieve, valid values + * are between 0 and (ibm,error-buffer-size - 1) + * Parameter2 = 4K aligned real address of error buffer, to be filled in + * Parameter3 = length of error buffer, valid values are 4K or less + */ +long cxl_h_get_afu_err(u64 unit_address, u64 offset, u64 buf_address, u64 len); + +/** + * cxl_h_get_config - collect configuration record for the + * coherent platform function + * Parameter1 = # of configuration record to retrieve, valid values are + * between 0 and (ibm,#config-records - 1) + * Parameter2 = byte offset into configuration record to retrieve, + * valid values are between 0 and (ibm,config-record-size - 1) + * Parameter3 = 4K aligned real address of configuration record buffer, + * to be filled in + * Parameter4 = length of configuration buffer, valid values are 4K or less + */ +long cxl_h_get_config(u64 unit_address, u64 cr_num, u64 offset, + u64 buf_address, u64 len); + +/** + * cxl_h_terminate_process - Terminate the process before completion + * Parameter1 = process-token as returned from H_ATTACH_CA_PROCESS when + * process was attached. + */ +long cxl_h_terminate_process(u64 unit_address, u64 process_token); + +/** + * cxl_h_collect_vpd - Collect VPD for the coherent platform function. + * Parameter1 = # of VPD record to retrieve, valid values are between 0 + * and (ibm,#config-records - 1). + * Parameter2 = 4K naturally aligned real buffer containing block + * list entries + * Parameter3 = number of block list entries in the block list, valid + * values are between 0 and 256 + */ +long cxl_h_collect_vpd(u64 unit_address, u64 record, u64 list_address, + u64 num, u64 *out); + +/** + * cxl_h_get_fn_error_interrupt - Read the function-wide error data based on an interrupt + */ +long cxl_h_get_fn_error_interrupt(u64 unit_address, u64 *reg); + +/** + * cxl_h_ack_fn_error_interrupt - Acknowledge function-wide error data + * based on an interrupt + * Parameter1 = value to write to the function-wide error interrupt register + */ +long cxl_h_ack_fn_error_interrupt(u64 unit_address, u64 value); + +/** + * cxl_h_get_error_log - Retrieve the Platform Log ID (PLID) of + * an error log + */ +long cxl_h_get_error_log(u64 unit_address, u64 value); + +/** + * cxl_h_collect_int_info - Collect interrupt info about a coherent + * platform function after an interrupt occurred. + */ +long cxl_h_collect_int_info(u64 unit_address, u64 process_token, + struct cxl_irq_info *info); + +/** + * cxl_h_control_faults - Control the operation of a coherent platform + * function after a fault occurs. + * + * Parameters + * control-mask: value to control the faults + * looks like PSL_TFC_An shifted >> 32 + * reset-mask: mask to control reset of function faults + * Set reset_mask = 1 to reset PSL errors + */ +long cxl_h_control_faults(u64 unit_address, u64 process_token, + u64 control_mask, u64 reset_mask); + +/** + * cxl_h_reset_adapter - Perform a reset to the coherent platform facility. + */ +long cxl_h_reset_adapter(u64 unit_address); + +/** + * cxl_h_collect_vpd - Collect VPD for the coherent platform function. + * Parameter1 = 4K naturally aligned real buffer containing block + * list entries + * Parameter2 = number of block list entries in the block list, valid + * values are between 0 and 256 + */ +long cxl_h_collect_vpd_adapter(u64 unit_address, u64 list_address, + u64 num, u64 *out); + +/** + * cxl_h_download_adapter_image - Download the base image to the coherent + * platform facility. + */ +long cxl_h_download_adapter_image(u64 unit_address, + u64 list_address, u64 num, + u64 *out); + +/** + * cxl_h_validate_adapter_image - Validate the base image in the coherent + * platform facility. + */ +long cxl_h_validate_adapter_image(u64 unit_address, + u64 list_address, u64 num, + u64 *out); +#endif /* _HCALLS_H */ diff --git a/drivers/misc/cxl/irq.c b/drivers/misc/cxl/irq.c index 09a406058c46..be646dc41a2c 100644 --- a/drivers/misc/cxl/irq.c +++ b/drivers/misc/cxl/irq.c @@ -19,70 +19,11 @@ #include "cxl.h" #include "trace.h" -/* XXX: This is implementation specific */ -static irqreturn_t handle_psl_slice_error(struct cxl_context *ctx, u64 dsisr, u64 errstat) +static int afu_irq_range_start(void) { - u64 fir1, fir2, fir_slice, serr, afu_debug; - - fir1 = cxl_p1_read(ctx->afu->adapter, CXL_PSL_FIR1); - fir2 = cxl_p1_read(ctx->afu->adapter, CXL_PSL_FIR2); - fir_slice = cxl_p1n_read(ctx->afu, CXL_PSL_FIR_SLICE_An); - serr = cxl_p1n_read(ctx->afu, CXL_PSL_SERR_An); - afu_debug = cxl_p1n_read(ctx->afu, CXL_AFU_DEBUG_An); - - dev_crit(&ctx->afu->dev, "PSL ERROR STATUS: 0x%016llx\n", errstat); - dev_crit(&ctx->afu->dev, "PSL_FIR1: 0x%016llx\n", fir1); - dev_crit(&ctx->afu->dev, "PSL_FIR2: 0x%016llx\n", fir2); - dev_crit(&ctx->afu->dev, "PSL_SERR_An: 0x%016llx\n", serr); - dev_crit(&ctx->afu->dev, "PSL_FIR_SLICE_An: 0x%016llx\n", fir_slice); - dev_crit(&ctx->afu->dev, "CXL_PSL_AFU_DEBUG_An: 0x%016llx\n", afu_debug); - - dev_crit(&ctx->afu->dev, "STOPPING CXL TRACE\n"); - cxl_stop_trace(ctx->afu->adapter); - - return cxl_ack_irq(ctx, 0, errstat); -} - -irqreturn_t cxl_slice_irq_err(int irq, void *data) -{ - struct cxl_afu *afu = data; - u64 fir_slice, errstat, serr, afu_debug; - - WARN(irq, "CXL SLICE ERROR interrupt %i\n", irq); - - serr = cxl_p1n_read(afu, CXL_PSL_SERR_An); - fir_slice = cxl_p1n_read(afu, CXL_PSL_FIR_SLICE_An); - errstat = cxl_p2n_read(afu, CXL_PSL_ErrStat_An); - afu_debug = cxl_p1n_read(afu, CXL_AFU_DEBUG_An); - dev_crit(&afu->dev, "PSL_SERR_An: 0x%016llx\n", serr); - dev_crit(&afu->dev, "PSL_FIR_SLICE_An: 0x%016llx\n", fir_slice); - dev_crit(&afu->dev, "CXL_PSL_ErrStat_An: 0x%016llx\n", errstat); - dev_crit(&afu->dev, "CXL_PSL_AFU_DEBUG_An: 0x%016llx\n", afu_debug); - - cxl_p1n_write(afu, CXL_PSL_SERR_An, serr); - - return IRQ_HANDLED; -} - -static irqreturn_t cxl_irq_err(int irq, void *data) -{ - struct cxl *adapter = data; - u64 fir1, fir2, err_ivte; - - WARN(1, "CXL ERROR interrupt %i\n", irq); - - err_ivte = cxl_p1_read(adapter, CXL_PSL_ErrIVTE); - dev_crit(&adapter->dev, "PSL_ErrIVTE: 0x%016llx\n", err_ivte); - - dev_crit(&adapter->dev, "STOPPING CXL TRACE\n"); - cxl_stop_trace(adapter); - - fir1 = cxl_p1_read(adapter, CXL_PSL_FIR1); - fir2 = cxl_p1_read(adapter, CXL_PSL_FIR2); - - dev_crit(&adapter->dev, "PSL_FIR1: 0x%016llx\nPSL_FIR2: 0x%016llx\n", fir1, fir2); - - return IRQ_HANDLED; + if (cpu_has_feature(CPU_FTR_HVMODE)) + return 1; + return 0; } static irqreturn_t schedule_cxl_fault(struct cxl_context *ctx, u64 dsisr, u64 dar) @@ -93,9 +34,8 @@ static irqreturn_t schedule_cxl_fault(struct cxl_context *ctx, u64 dsisr, u64 da return IRQ_HANDLED; } -static irqreturn_t cxl_irq(int irq, void *data, struct cxl_irq_info *irq_info) +irqreturn_t cxl_irq(int irq, struct cxl_context *ctx, struct cxl_irq_info *irq_info) { - struct cxl_context *ctx = data; u64 dsisr, dar; dsisr = irq_info->dsisr; @@ -145,7 +85,8 @@ static irqreturn_t cxl_irq(int irq, void *data, struct cxl_irq_info *irq_info) if (dsisr & CXL_PSL_DSISR_An_UR) pr_devel("CXL interrupt: AURP PTE not found\n"); if (dsisr & CXL_PSL_DSISR_An_PE) - return handle_psl_slice_error(ctx, dsisr, irq_info->errstat); + return cxl_ops->handle_psl_slice_error(ctx, dsisr, + irq_info->errstat); if (dsisr & CXL_PSL_DSISR_An_AE) { pr_devel("CXL interrupt: AFU Error 0x%016llx\n", irq_info->afu_err); @@ -169,7 +110,7 @@ static irqreturn_t cxl_irq(int irq, void *data, struct cxl_irq_info *irq_info) wake_up_all(&ctx->wq); } - cxl_ack_irq(ctx, CXL_PSL_TFC_An_A, 0); + cxl_ops->ack_irq(ctx, CXL_PSL_TFC_An_A, 0); return IRQ_HANDLED; } if (dsisr & CXL_PSL_DSISR_An_OC) @@ -179,54 +120,27 @@ static irqreturn_t cxl_irq(int irq, void *data, struct cxl_irq_info *irq_info) return IRQ_HANDLED; } -static irqreturn_t fail_psl_irq(struct cxl_afu *afu, struct cxl_irq_info *irq_info) -{ - if (irq_info->dsisr & CXL_PSL_DSISR_TRANS) - cxl_p2n_write(afu, CXL_PSL_TFC_An, CXL_PSL_TFC_An_AE); - else - cxl_p2n_write(afu, CXL_PSL_TFC_An, CXL_PSL_TFC_An_A); - - return IRQ_HANDLED; -} - -static irqreturn_t cxl_irq_multiplexed(int irq, void *data) -{ - struct cxl_afu *afu = data; - struct cxl_context *ctx; - struct cxl_irq_info irq_info; - int ph = cxl_p2n_read(afu, CXL_PSL_PEHandle_An) & 0xffff; - int ret; - - if ((ret = cxl_get_irq(afu, &irq_info))) { - WARN(1, "Unable to get CXL IRQ Info: %i\n", ret); - return fail_psl_irq(afu, &irq_info); - } - - rcu_read_lock(); - ctx = idr_find(&afu->contexts_idr, ph); - if (ctx) { - ret = cxl_irq(irq, ctx, &irq_info); - rcu_read_unlock(); - return ret; - } - rcu_read_unlock(); - - WARN(1, "Unable to demultiplex CXL PSL IRQ for PE %i DSISR %016llx DAR" - " %016llx\n(Possible AFU HW issue - was a term/remove acked" - " with outstanding transactions?)\n", ph, irq_info.dsisr, - irq_info.dar); - return fail_psl_irq(afu, &irq_info); -} - static irqreturn_t cxl_irq_afu(int irq, void *data) { struct cxl_context *ctx = data; irq_hw_number_t hwirq = irqd_to_hwirq(irq_get_irq_data(irq)); - int irq_off, afu_irq = 1; + int irq_off, afu_irq = 0; __u16 range; int r; - for (r = 1; r < CXL_IRQ_RANGES; r++) { + /* + * Look for the interrupt number. + * On bare-metal, we know range 0 only contains the PSL + * interrupt so we could start counting at range 1 and initialize + * afu_irq at 1. + * In a guest, range 0 also contains AFU interrupts, so it must + * be counted for. Therefore we initialize afu_irq at 0 to take into + * account the PSL interrupt. + * + * For code-readability, it just seems easier to go over all + * the ranges on bare-metal and guest. The end result is the same. + */ + for (r = 0; r < CXL_IRQ_RANGES; r++) { irq_off = hwirq - ctx->irqs.offset[r]; range = ctx->irqs.range[r]; if (irq_off >= 0 && irq_off < range) { @@ -236,7 +150,7 @@ static irqreturn_t cxl_irq_afu(int irq, void *data) afu_irq += range; } if (unlikely(r >= CXL_IRQ_RANGES)) { - WARN(1, "Recieved AFU IRQ out of range for pe %i (virq %i hwirq %lx)\n", + WARN(1, "Received AFU IRQ out of range for pe %i (virq %i hwirq %lx)\n", ctx->pe, irq, hwirq); return IRQ_HANDLED; } @@ -246,7 +160,7 @@ static irqreturn_t cxl_irq_afu(int irq, void *data) afu_irq, ctx->pe, irq, hwirq); if (unlikely(!ctx->irq_bitmap)) { - WARN(1, "Recieved AFU IRQ for context with no IRQ bitmap\n"); + WARN(1, "Received AFU IRQ for context with no IRQ bitmap\n"); return IRQ_HANDLED; } spin_lock(&ctx->lock); @@ -272,7 +186,8 @@ unsigned int cxl_map_irq(struct cxl *adapter, irq_hw_number_t hwirq, return 0; } - cxl_setup_irq(adapter, hwirq, virq); + if (cxl_ops->setup_irq) + cxl_ops->setup_irq(adapter, hwirq, virq); pr_devel("hwirq %#lx mapped to virq %u\n", hwirq, virq); @@ -291,16 +206,16 @@ void cxl_unmap_irq(unsigned int virq, void *cookie) irq_dispose_mapping(virq); } -static int cxl_register_one_irq(struct cxl *adapter, - irq_handler_t handler, - void *cookie, - irq_hw_number_t *dest_hwirq, - unsigned int *dest_virq, - const char *name) +int cxl_register_one_irq(struct cxl *adapter, + irq_handler_t handler, + void *cookie, + irq_hw_number_t *dest_hwirq, + unsigned int *dest_virq, + const char *name) { int hwirq, virq; - if ((hwirq = cxl_alloc_one_irq(adapter)) < 0) + if ((hwirq = cxl_ops->alloc_one_irq(adapter)) < 0) return hwirq; if (!(virq = cxl_map_irq(adapter, hwirq, handler, cookie, name))) @@ -312,108 +227,10 @@ static int cxl_register_one_irq(struct cxl *adapter, return 0; err: - cxl_release_one_irq(adapter, hwirq); + cxl_ops->release_one_irq(adapter, hwirq); return -ENOMEM; } -int cxl_register_psl_err_irq(struct cxl *adapter) -{ - int rc; - - adapter->irq_name = kasprintf(GFP_KERNEL, "cxl-%s-err", - dev_name(&adapter->dev)); - if (!adapter->irq_name) - return -ENOMEM; - - if ((rc = cxl_register_one_irq(adapter, cxl_irq_err, adapter, - &adapter->err_hwirq, - &adapter->err_virq, - adapter->irq_name))) { - kfree(adapter->irq_name); - adapter->irq_name = NULL; - return rc; - } - - cxl_p1_write(adapter, CXL_PSL_ErrIVTE, adapter->err_hwirq & 0xffff); - - return 0; -} - -void cxl_release_psl_err_irq(struct cxl *adapter) -{ - if (adapter->err_virq != irq_find_mapping(NULL, adapter->err_hwirq)) - return; - - cxl_p1_write(adapter, CXL_PSL_ErrIVTE, 0x0000000000000000); - cxl_unmap_irq(adapter->err_virq, adapter); - cxl_release_one_irq(adapter, adapter->err_hwirq); - kfree(adapter->irq_name); -} - -int cxl_register_serr_irq(struct cxl_afu *afu) -{ - u64 serr; - int rc; - - afu->err_irq_name = kasprintf(GFP_KERNEL, "cxl-%s-err", - dev_name(&afu->dev)); - if (!afu->err_irq_name) - return -ENOMEM; - - if ((rc = cxl_register_one_irq(afu->adapter, cxl_slice_irq_err, afu, - &afu->serr_hwirq, - &afu->serr_virq, afu->err_irq_name))) { - kfree(afu->err_irq_name); - afu->err_irq_name = NULL; - return rc; - } - - serr = cxl_p1n_read(afu, CXL_PSL_SERR_An); - serr = (serr & 0x00ffffffffff0000ULL) | (afu->serr_hwirq & 0xffff); - cxl_p1n_write(afu, CXL_PSL_SERR_An, serr); - - return 0; -} - -void cxl_release_serr_irq(struct cxl_afu *afu) -{ - if (afu->serr_virq != irq_find_mapping(NULL, afu->serr_hwirq)) - return; - - cxl_p1n_write(afu, CXL_PSL_SERR_An, 0x0000000000000000); - cxl_unmap_irq(afu->serr_virq, afu); - cxl_release_one_irq(afu->adapter, afu->serr_hwirq); - kfree(afu->err_irq_name); -} - -int cxl_register_psl_irq(struct cxl_afu *afu) -{ - int rc; - - afu->psl_irq_name = kasprintf(GFP_KERNEL, "cxl-%s", - dev_name(&afu->dev)); - if (!afu->psl_irq_name) - return -ENOMEM; - - if ((rc = cxl_register_one_irq(afu->adapter, cxl_irq_multiplexed, afu, - &afu->psl_hwirq, &afu->psl_virq, - afu->psl_irq_name))) { - kfree(afu->psl_irq_name); - afu->psl_irq_name = NULL; - } - return rc; -} - -void cxl_release_psl_irq(struct cxl_afu *afu) -{ - if (afu->psl_virq != irq_find_mapping(NULL, afu->psl_hwirq)) - return; - - cxl_unmap_irq(afu->psl_virq, afu); - cxl_release_one_irq(afu->adapter, afu->psl_hwirq); - kfree(afu->psl_irq_name); -} - void afu_irq_name_free(struct cxl_context *ctx) { struct cxl_irq_name *irq_name, *tmp; @@ -429,16 +246,33 @@ int afu_allocate_irqs(struct cxl_context *ctx, u32 count) { int rc, r, i, j = 1; struct cxl_irq_name *irq_name; + int alloc_count; + + /* + * In native mode, range 0 is reserved for the multiplexed + * PSL interrupt. It has been allocated when the AFU was initialized. + * + * In a guest, the PSL interrupt is not mutliplexed, but per-context, + * and is the first interrupt from range 0. It still needs to be + * allocated, so bump the count by one. + */ + if (cpu_has_feature(CPU_FTR_HVMODE)) + alloc_count = count; + else + alloc_count = count + 1; /* Initialize the list head to hold irq names */ INIT_LIST_HEAD(&ctx->irq_names); - if ((rc = cxl_alloc_irq_ranges(&ctx->irqs, ctx->afu->adapter, count))) + if ((rc = cxl_ops->alloc_irq_ranges(&ctx->irqs, ctx->afu->adapter, + alloc_count))) return rc; - /* Multiplexed PSL Interrupt */ - ctx->irqs.offset[0] = ctx->afu->psl_hwirq; - ctx->irqs.range[0] = 1; + if (cpu_has_feature(CPU_FTR_HVMODE)) { + /* Multiplexed PSL Interrupt */ + ctx->irqs.offset[0] = ctx->afu->native->psl_hwirq; + ctx->irqs.range[0] = 1; + } ctx->irq_count = count; ctx->irq_bitmap = kcalloc(BITS_TO_LONGS(count), @@ -450,7 +284,7 @@ int afu_allocate_irqs(struct cxl_context *ctx, u32 count) * Allocate names first. If any fail, bail out before allocating * actual hardware IRQs. */ - for (r = 1; r < CXL_IRQ_RANGES; r++) { + for (r = afu_irq_range_start(); r < CXL_IRQ_RANGES; r++) { for (i = 0; i < ctx->irqs.range[r]; i++) { irq_name = kmalloc(sizeof(struct cxl_irq_name), GFP_KERNEL); @@ -471,7 +305,7 @@ int afu_allocate_irqs(struct cxl_context *ctx, u32 count) return 0; out: - cxl_release_irq_ranges(&ctx->irqs, ctx->afu->adapter); + cxl_ops->release_irq_ranges(&ctx->irqs, ctx->afu->adapter); afu_irq_name_free(ctx); return -ENOMEM; } @@ -480,15 +314,30 @@ static void afu_register_hwirqs(struct cxl_context *ctx) { irq_hw_number_t hwirq; struct cxl_irq_name *irq_name; - int r,i; + int r, i; + irqreturn_t (*handler)(int irq, void *data); /* We've allocated all memory now, so let's do the irq allocations */ irq_name = list_first_entry(&ctx->irq_names, struct cxl_irq_name, list); - for (r = 1; r < CXL_IRQ_RANGES; r++) { + for (r = afu_irq_range_start(); r < CXL_IRQ_RANGES; r++) { hwirq = ctx->irqs.offset[r]; for (i = 0; i < ctx->irqs.range[r]; hwirq++, i++) { - cxl_map_irq(ctx->afu->adapter, hwirq, - cxl_irq_afu, ctx, irq_name->name); + if (r == 0 && i == 0) + /* + * The very first interrupt of range 0 is + * always the PSL interrupt, but we only + * need to connect a handler for guests, + * because there's one PSL interrupt per + * context. + * On bare-metal, the PSL interrupt is + * multiplexed and was setup when the AFU + * was configured. + */ + handler = cxl_ops->psl_interrupt; + else + handler = cxl_irq_afu; + cxl_map_irq(ctx->afu->adapter, hwirq, handler, ctx, + irq_name->name); irq_name = list_next_entry(irq_name, list); } } @@ -504,7 +353,7 @@ int afu_register_irqs(struct cxl_context *ctx, u32 count) afu_register_hwirqs(ctx); return 0; - } +} void afu_release_irqs(struct cxl_context *ctx, void *cookie) { @@ -512,7 +361,7 @@ void afu_release_irqs(struct cxl_context *ctx, void *cookie) unsigned int virq; int r, i; - for (r = 1; r < CXL_IRQ_RANGES; r++) { + for (r = afu_irq_range_start(); r < CXL_IRQ_RANGES; r++) { hwirq = ctx->irqs.offset[r]; for (i = 0; i < ctx->irqs.range[r]; hwirq++, i++) { virq = irq_find_mapping(NULL, hwirq); @@ -522,7 +371,7 @@ void afu_release_irqs(struct cxl_context *ctx, void *cookie) } afu_irq_name_free(ctx); - cxl_release_irq_ranges(&ctx->irqs, ctx->afu->adapter); + cxl_ops->release_irq_ranges(&ctx->irqs, ctx->afu->adapter); ctx->irq_count = 0; } diff --git a/drivers/misc/cxl/main.c b/drivers/misc/cxl/main.c index 9fde75ed4fac..ae68c3201156 100644 --- a/drivers/misc/cxl/main.c +++ b/drivers/misc/cxl/main.c @@ -32,6 +32,29 @@ uint cxl_verbose; module_param_named(verbose, cxl_verbose, uint, 0600); MODULE_PARM_DESC(verbose, "Enable verbose dmesg output"); +const struct cxl_backend_ops *cxl_ops; + +int cxl_afu_slbia(struct cxl_afu *afu) +{ + unsigned long timeout = jiffies + (HZ * CXL_TIMEOUT); + + pr_devel("cxl_afu_slbia issuing SLBIA command\n"); + cxl_p2n_write(afu, CXL_SLBIA_An, CXL_TLB_SLB_IQ_ALL); + while (cxl_p2n_read(afu, CXL_SLBIA_An) & CXL_TLB_SLB_P) { + if (time_after_eq(jiffies, timeout)) { + dev_warn(&afu->dev, "WARNING: CXL AFU SLBIA timed out!\n"); + return -EBUSY; + } + /* If the adapter has gone down, we can assume that we + * will PERST it and that will invalidate everything. + */ + if (!cxl_ops->link_ok(afu->adapter, afu)) + return -EIO; + cpu_relax(); + } + return 0; +} + static inline void _cxl_slbia(struct cxl_context *ctx, struct mm_struct *mm) { struct task_struct *task; @@ -139,6 +162,32 @@ int cxl_alloc_sst(struct cxl_context *ctx) return 0; } +/* print buffer content as integers when debugging */ +void cxl_dump_debug_buffer(void *buf, size_t buf_len) +{ +#ifdef DEBUG + int i, *ptr; + + /* + * We want to regroup up to 4 integers per line, which means they + * need to be in the same pr_devel() statement + */ + ptr = (int *) buf; + for (i = 0; i * 4 < buf_len; i += 4) { + if ((i + 3) * 4 < buf_len) + pr_devel("%.8x %.8x %.8x %.8x\n", ptr[i], ptr[i + 1], + ptr[i + 2], ptr[i + 3]); + else if ((i + 2) * 4 < buf_len) + pr_devel("%.8x %.8x %.8x\n", ptr[i], ptr[i + 1], + ptr[i + 2]); + else if ((i + 1) * 4 < buf_len) + pr_devel("%.8x %.8x\n", ptr[i], ptr[i + 1]); + else + pr_devel("%.8x\n", ptr[i]); + } +#endif /* DEBUG */ +} + /* Find a CXL adapter by it's number and increase it's refcount */ struct cxl *get_cxl_adapter(int num) { @@ -152,7 +201,7 @@ struct cxl *get_cxl_adapter(int num) return adapter; } -int cxl_alloc_adapter_nr(struct cxl *adapter) +static int cxl_alloc_adapter_nr(struct cxl *adapter) { int i; @@ -174,13 +223,58 @@ void cxl_remove_adapter_nr(struct cxl *adapter) idr_remove(&cxl_adapter_idr, adapter->adapter_num); } +struct cxl *cxl_alloc_adapter(void) +{ + struct cxl *adapter; + + if (!(adapter = kzalloc(sizeof(struct cxl), GFP_KERNEL))) + return NULL; + + spin_lock_init(&adapter->afu_list_lock); + + if (cxl_alloc_adapter_nr(adapter)) + goto err1; + + if (dev_set_name(&adapter->dev, "card%i", adapter->adapter_num)) + goto err2; + + return adapter; + +err2: + cxl_remove_adapter_nr(adapter); +err1: + kfree(adapter); + return NULL; +} + +struct cxl_afu *cxl_alloc_afu(struct cxl *adapter, int slice) +{ + struct cxl_afu *afu; + + if (!(afu = kzalloc(sizeof(struct cxl_afu), GFP_KERNEL))) + return NULL; + + afu->adapter = adapter; + afu->dev.parent = &adapter->dev; + afu->dev.release = cxl_ops->release_afu; + afu->slice = slice; + idr_init(&afu->contexts_idr); + mutex_init(&afu->contexts_lock); + spin_lock_init(&afu->afu_cntl_lock); + + afu->prefault_mode = CXL_PREFAULT_NONE; + afu->irqs_max = afu->adapter->user_irqs; + + return afu; +} + int cxl_afu_select_best_mode(struct cxl_afu *afu) { if (afu->modes_supported & CXL_MODE_DIRECTED) - return cxl_afu_activate_mode(afu, CXL_MODE_DIRECTED); + return cxl_ops->afu_activate_mode(afu, CXL_MODE_DIRECTED); if (afu->modes_supported & CXL_MODE_DEDICATED) - return cxl_afu_activate_mode(afu, CXL_MODE_DEDICATED); + return cxl_ops->afu_activate_mode(afu, CXL_MODE_DEDICATED); dev_warn(&afu->dev, "No supported programming modes available\n"); /* We don't fail this so the user can inspect sysfs */ @@ -191,9 +285,6 @@ static int __init init_cxl(void) { int rc = 0; - if (!cpu_has_feature(CPU_FTR_HVMODE)) - return -EPERM; - if ((rc = cxl_file_init())) return rc; @@ -202,7 +293,17 @@ static int __init init_cxl(void) if ((rc = register_cxl_calls(&cxl_calls))) goto err; - if ((rc = pci_register_driver(&cxl_pci_driver))) + if (cpu_has_feature(CPU_FTR_HVMODE)) { + cxl_ops = &cxl_native_ops; + rc = pci_register_driver(&cxl_pci_driver); + } +#ifdef CONFIG_PPC_PSERIES + else { + cxl_ops = &cxl_guest_ops; + rc = platform_driver_register(&cxl_of_driver); + } +#endif + if (rc) goto err1; return 0; @@ -217,7 +318,12 @@ err: static void exit_cxl(void) { - pci_unregister_driver(&cxl_pci_driver); + if (cpu_has_feature(CPU_FTR_HVMODE)) + pci_unregister_driver(&cxl_pci_driver); +#ifdef CONFIG_PPC_PSERIES + else + platform_driver_unregister(&cxl_of_driver); +#endif cxl_debugfs_exit(); cxl_file_exit(); diff --git a/drivers/misc/cxl/native.c b/drivers/misc/cxl/native.c index f40909793490..387fcbdf9793 100644 --- a/drivers/misc/cxl/native.c +++ b/drivers/misc/cxl/native.c @@ -42,7 +42,7 @@ static int afu_control(struct cxl_afu *afu, u64 command, goto out; } - if (!cxl_adapter_link_ok(afu->adapter)) { + if (!cxl_ops->link_ok(afu->adapter, afu)) { afu->enabled = enabled; rc = -EIO; goto out; @@ -80,7 +80,7 @@ int cxl_afu_disable(struct cxl_afu *afu) } /* This will disable as well as reset */ -int __cxl_afu_reset(struct cxl_afu *afu) +static int native_afu_reset(struct cxl_afu *afu) { pr_devel("AFU reset request\n"); @@ -90,9 +90,9 @@ int __cxl_afu_reset(struct cxl_afu *afu) false); } -int cxl_afu_check_and_enable(struct cxl_afu *afu) +static int native_afu_check_and_enable(struct cxl_afu *afu) { - if (!cxl_adapter_link_ok(afu->adapter)) { + if (!cxl_ops->link_ok(afu->adapter, afu)) { WARN(1, "Refusing to enable afu while link down!\n"); return -EIO; } @@ -114,7 +114,7 @@ int cxl_psl_purge(struct cxl_afu *afu) pr_devel("PSL purge request\n"); - if (!cxl_adapter_link_ok(afu->adapter)) { + if (!cxl_ops->link_ok(afu->adapter, afu)) { dev_warn(&afu->dev, "PSL Purge called with link down, ignoring\n"); rc = -EIO; goto out; @@ -136,7 +136,7 @@ int cxl_psl_purge(struct cxl_afu *afu) rc = -EBUSY; goto out; } - if (!cxl_adapter_link_ok(afu->adapter)) { + if (!cxl_ops->link_ok(afu->adapter, afu)) { rc = -EIO; goto out; } @@ -186,22 +186,22 @@ static int spa_max_procs(int spa_size) int cxl_alloc_spa(struct cxl_afu *afu) { /* Work out how many pages to allocate */ - afu->spa_order = 0; + afu->native->spa_order = 0; do { - afu->spa_order++; - afu->spa_size = (1 << afu->spa_order) * PAGE_SIZE; - afu->spa_max_procs = spa_max_procs(afu->spa_size); - } while (afu->spa_max_procs < afu->num_procs); + afu->native->spa_order++; + afu->native->spa_size = (1 << afu->native->spa_order) * PAGE_SIZE; + afu->native->spa_max_procs = spa_max_procs(afu->native->spa_size); + } while (afu->native->spa_max_procs < afu->num_procs); - WARN_ON(afu->spa_size > 0x100000); /* Max size supported by the hardware */ + WARN_ON(afu->native->spa_size > 0x100000); /* Max size supported by the hardware */ - if (!(afu->spa = (struct cxl_process_element *) - __get_free_pages(GFP_KERNEL | __GFP_ZERO, afu->spa_order))) { + if (!(afu->native->spa = (struct cxl_process_element *) + __get_free_pages(GFP_KERNEL | __GFP_ZERO, afu->native->spa_order))) { pr_err("cxl_alloc_spa: Unable to allocate scheduled process area\n"); return -ENOMEM; } pr_devel("spa pages: %i afu->spa_max_procs: %i afu->num_procs: %i\n", - 1<<afu->spa_order, afu->spa_max_procs, afu->num_procs); + 1<<afu->native->spa_order, afu->native->spa_max_procs, afu->num_procs); return 0; } @@ -210,13 +210,15 @@ static void attach_spa(struct cxl_afu *afu) { u64 spap; - afu->sw_command_status = (__be64 *)((char *)afu->spa + - ((afu->spa_max_procs + 3) * 128)); + afu->native->sw_command_status = (__be64 *)((char *)afu->native->spa + + ((afu->native->spa_max_procs + 3) * 128)); - spap = virt_to_phys(afu->spa) & CXL_PSL_SPAP_Addr; - spap |= ((afu->spa_size >> (12 - CXL_PSL_SPAP_Size_Shift)) - 1) & CXL_PSL_SPAP_Size; + spap = virt_to_phys(afu->native->spa) & CXL_PSL_SPAP_Addr; + spap |= ((afu->native->spa_size >> (12 - CXL_PSL_SPAP_Size_Shift)) - 1) & CXL_PSL_SPAP_Size; spap |= CXL_PSL_SPAP_V; - pr_devel("cxl: SPA allocated at 0x%p. Max processes: %i, sw_command_status: 0x%p CXL_PSL_SPAP_An=0x%016llx\n", afu->spa, afu->spa_max_procs, afu->sw_command_status, spap); + pr_devel("cxl: SPA allocated at 0x%p. Max processes: %i, sw_command_status: 0x%p CXL_PSL_SPAP_An=0x%016llx\n", + afu->native->spa, afu->native->spa_max_procs, + afu->native->sw_command_status, spap); cxl_p1n_write(afu, CXL_PSL_SPAP_An, spap); } @@ -227,9 +229,10 @@ static inline void detach_spa(struct cxl_afu *afu) void cxl_release_spa(struct cxl_afu *afu) { - if (afu->spa) { - free_pages((unsigned long) afu->spa, afu->spa_order); - afu->spa = NULL; + if (afu->native->spa) { + free_pages((unsigned long) afu->native->spa, + afu->native->spa_order); + afu->native->spa = NULL; } } @@ -247,7 +250,7 @@ int cxl_tlb_slb_invalidate(struct cxl *adapter) dev_warn(&adapter->dev, "WARNING: CXL adapter wide TLBIA timed out!\n"); return -EBUSY; } - if (!cxl_adapter_link_ok(adapter)) + if (!cxl_ops->link_ok(adapter, NULL)) return -EIO; cpu_relax(); } @@ -258,28 +261,7 @@ int cxl_tlb_slb_invalidate(struct cxl *adapter) dev_warn(&adapter->dev, "WARNING: CXL adapter wide SLBIA timed out!\n"); return -EBUSY; } - if (!cxl_adapter_link_ok(adapter)) - return -EIO; - cpu_relax(); - } - return 0; -} - -int cxl_afu_slbia(struct cxl_afu *afu) -{ - unsigned long timeout = jiffies + (HZ * CXL_TIMEOUT); - - pr_devel("cxl_afu_slbia issuing SLBIA command\n"); - cxl_p2n_write(afu, CXL_SLBIA_An, CXL_TLB_SLB_IQ_ALL); - while (cxl_p2n_read(afu, CXL_SLBIA_An) & CXL_TLB_SLB_P) { - if (time_after_eq(jiffies, timeout)) { - dev_warn(&afu->dev, "WARNING: CXL AFU SLBIA timed out!\n"); - return -EBUSY; - } - /* If the adapter has gone down, we can assume that we - * will PERST it and that will invalidate everything. - */ - if (!cxl_adapter_link_ok(afu->adapter)) + if (!cxl_ops->link_ok(adapter, NULL)) return -EIO; cpu_relax(); } @@ -312,7 +294,7 @@ static void slb_invalid(struct cxl_context *ctx) struct cxl *adapter = ctx->afu->adapter; u64 slbia; - WARN_ON(!mutex_is_locked(&ctx->afu->spa_mutex)); + WARN_ON(!mutex_is_locked(&ctx->afu->native->spa_mutex)); cxl_p1_write(adapter, CXL_PSL_LBISEL, ((u64)be32_to_cpu(ctx->elem->common.pid) << 32) | @@ -320,7 +302,7 @@ static void slb_invalid(struct cxl_context *ctx) cxl_p1_write(adapter, CXL_PSL_SLBIA, CXL_TLB_SLB_IQ_LPIDPID); while (1) { - if (!cxl_adapter_link_ok(adapter)) + if (!cxl_ops->link_ok(adapter, NULL)) break; slbia = cxl_p1_read(adapter, CXL_PSL_SLBIA); if (!(slbia & CXL_TLB_SLB_P)) @@ -342,7 +324,7 @@ static int do_process_element_cmd(struct cxl_context *ctx, ctx->elem->software_state = cpu_to_be32(pe_state); smp_wmb(); - *(ctx->afu->sw_command_status) = cpu_to_be64(cmd | 0 | ctx->pe); + *(ctx->afu->native->sw_command_status) = cpu_to_be64(cmd | 0 | ctx->pe); smp_mb(); cxl_p1n_write(ctx->afu, CXL_PSL_LLCMD_An, cmd | ctx->pe); while (1) { @@ -351,12 +333,12 @@ static int do_process_element_cmd(struct cxl_context *ctx, rc = -EBUSY; goto out; } - if (!cxl_adapter_link_ok(ctx->afu->adapter)) { + if (!cxl_ops->link_ok(ctx->afu->adapter, ctx->afu)) { dev_warn(&ctx->afu->dev, "WARNING: Device link down, aborting Process Element Command!\n"); rc = -EIO; goto out; } - state = be64_to_cpup(ctx->afu->sw_command_status); + state = be64_to_cpup(ctx->afu->native->sw_command_status); if (state == ~0ULL) { pr_err("cxl: Error adding process element to AFU\n"); rc = -1; @@ -384,12 +366,12 @@ static int add_process_element(struct cxl_context *ctx) { int rc = 0; - mutex_lock(&ctx->afu->spa_mutex); + mutex_lock(&ctx->afu->native->spa_mutex); pr_devel("%s Adding pe: %i started\n", __func__, ctx->pe); if (!(rc = do_process_element_cmd(ctx, CXL_SPA_SW_CMD_ADD, CXL_PE_SOFTWARE_STATE_V))) ctx->pe_inserted = true; pr_devel("%s Adding pe: %i finished\n", __func__, ctx->pe); - mutex_unlock(&ctx->afu->spa_mutex); + mutex_unlock(&ctx->afu->native->spa_mutex); return rc; } @@ -401,18 +383,18 @@ static int terminate_process_element(struct cxl_context *ctx) if (!(ctx->elem->software_state & cpu_to_be32(CXL_PE_SOFTWARE_STATE_V))) return rc; - mutex_lock(&ctx->afu->spa_mutex); + mutex_lock(&ctx->afu->native->spa_mutex); pr_devel("%s Terminate pe: %i started\n", __func__, ctx->pe); /* We could be asked to terminate when the hw is down. That * should always succeed: it's not running if the hw has gone * away and is being reset. */ - if (cxl_adapter_link_ok(ctx->afu->adapter)) + if (cxl_ops->link_ok(ctx->afu->adapter, ctx->afu)) rc = do_process_element_cmd(ctx, CXL_SPA_SW_CMD_TERMINATE, CXL_PE_SOFTWARE_STATE_V | CXL_PE_SOFTWARE_STATE_T); ctx->elem->software_state = 0; /* Remove Valid bit */ pr_devel("%s Terminate pe: %i finished\n", __func__, ctx->pe); - mutex_unlock(&ctx->afu->spa_mutex); + mutex_unlock(&ctx->afu->native->spa_mutex); return rc; } @@ -420,20 +402,20 @@ static int remove_process_element(struct cxl_context *ctx) { int rc = 0; - mutex_lock(&ctx->afu->spa_mutex); + mutex_lock(&ctx->afu->native->spa_mutex); pr_devel("%s Remove pe: %i started\n", __func__, ctx->pe); /* We could be asked to remove when the hw is down. Again, if * the hw is down, the PE is gone, so we succeed. */ - if (cxl_adapter_link_ok(ctx->afu->adapter)) + if (cxl_ops->link_ok(ctx->afu->adapter, ctx->afu)) rc = do_process_element_cmd(ctx, CXL_SPA_SW_CMD_REMOVE, 0); if (!rc) ctx->pe_inserted = false; slb_invalid(ctx); pr_devel("%s Remove pe: %i finished\n", __func__, ctx->pe); - mutex_unlock(&ctx->afu->spa_mutex); + mutex_unlock(&ctx->afu->native->spa_mutex); return rc; } @@ -446,7 +428,7 @@ void cxl_assign_psn_space(struct cxl_context *ctx) ctx->psn_size = ctx->afu->adapter->ps_size; } else { ctx->psn_phys = ctx->afu->psn_phys + - (ctx->afu->pp_offset + ctx->afu->pp_size * ctx->pe); + (ctx->afu->native->pp_offset + ctx->afu->pp_size * ctx->pe); ctx->psn_size = ctx->afu->pp_size; } } @@ -458,7 +440,7 @@ static int activate_afu_directed(struct cxl_afu *afu) dev_info(&afu->dev, "Activating AFU directed mode\n"); afu->num_procs = afu->max_procs_virtualised; - if (afu->spa == NULL) { + if (afu->native->spa == NULL) { if (cxl_alloc_spa(afu)) return -ENOMEM; } @@ -552,7 +534,7 @@ static int attach_afu_directed(struct cxl_context *ctx, u64 wed, u64 amr) ctx->elem->common.wed = cpu_to_be64(wed); /* first guy needs to enable */ - if ((result = cxl_afu_check_and_enable(ctx->afu))) + if ((result = cxl_ops->afu_check_and_enable(ctx->afu))) return result; return add_process_element(ctx); @@ -568,7 +550,7 @@ static int deactivate_afu_directed(struct cxl_afu *afu) cxl_sysfs_afu_m_remove(afu); cxl_chardev_afu_remove(afu); - __cxl_afu_reset(afu); + cxl_ops->afu_reset(afu); cxl_afu_disable(afu); cxl_psl_purge(afu); @@ -632,7 +614,7 @@ static int attach_dedicated(struct cxl_context *ctx, u64 wed, u64 amr) /* master only context for dedicated */ cxl_assign_psn_space(ctx); - if ((rc = __cxl_afu_reset(afu))) + if ((rc = cxl_ops->afu_reset(afu))) return rc; cxl_p2n_write(afu, CXL_PSL_WED_An, wed); @@ -652,7 +634,7 @@ static int deactivate_dedicated_process(struct cxl_afu *afu) return 0; } -int _cxl_afu_deactivate_mode(struct cxl_afu *afu, int mode) +static int native_afu_deactivate_mode(struct cxl_afu *afu, int mode) { if (mode == CXL_MODE_DIRECTED) return deactivate_afu_directed(afu); @@ -661,19 +643,14 @@ int _cxl_afu_deactivate_mode(struct cxl_afu *afu, int mode) return 0; } -int cxl_afu_deactivate_mode(struct cxl_afu *afu) -{ - return _cxl_afu_deactivate_mode(afu, afu->current_mode); -} - -int cxl_afu_activate_mode(struct cxl_afu *afu, int mode) +static int native_afu_activate_mode(struct cxl_afu *afu, int mode) { if (!mode) return 0; if (!(mode & afu->modes_supported)) return -EINVAL; - if (!cxl_adapter_link_ok(afu->adapter)) { + if (!cxl_ops->link_ok(afu->adapter, afu)) { WARN(1, "Device link is down, refusing to activate!\n"); return -EIO; } @@ -686,9 +663,10 @@ int cxl_afu_activate_mode(struct cxl_afu *afu, int mode) return -EINVAL; } -int cxl_attach_process(struct cxl_context *ctx, bool kernel, u64 wed, u64 amr) +static int native_attach_process(struct cxl_context *ctx, bool kernel, + u64 wed, u64 amr) { - if (!cxl_adapter_link_ok(ctx->afu->adapter)) { + if (!cxl_ops->link_ok(ctx->afu->adapter, ctx->afu)) { WARN(1, "Device link is down, refusing to attach process!\n"); return -EIO; } @@ -705,7 +683,7 @@ int cxl_attach_process(struct cxl_context *ctx, bool kernel, u64 wed, u64 amr) static inline int detach_process_native_dedicated(struct cxl_context *ctx) { - __cxl_afu_reset(ctx->afu); + cxl_ops->afu_reset(ctx->afu); cxl_afu_disable(ctx->afu); cxl_psl_purge(ctx->afu); return 0; @@ -723,7 +701,7 @@ static inline int detach_process_native_afu_directed(struct cxl_context *ctx) return 0; } -int cxl_detach_process(struct cxl_context *ctx) +static int native_detach_process(struct cxl_context *ctx) { trace_cxl_detach(ctx); @@ -733,14 +711,14 @@ int cxl_detach_process(struct cxl_context *ctx) return detach_process_native_afu_directed(ctx); } -int cxl_get_irq(struct cxl_afu *afu, struct cxl_irq_info *info) +static int native_get_irq_info(struct cxl_afu *afu, struct cxl_irq_info *info) { u64 pidtid; /* If the adapter has gone away, we can't get any meaningful * information. */ - if (!cxl_adapter_link_ok(afu->adapter)) + if (!cxl_ops->link_ok(afu->adapter, afu)) return -EIO; info->dsisr = cxl_p2n_read(afu, CXL_PSL_DSISR_An); @@ -751,10 +729,214 @@ int cxl_get_irq(struct cxl_afu *afu, struct cxl_irq_info *info) info->tid = pidtid & 0xffffffff; info->afu_err = cxl_p2n_read(afu, CXL_AFU_ERR_An); info->errstat = cxl_p2n_read(afu, CXL_PSL_ErrStat_An); + info->proc_handle = 0; + + return 0; +} + +static irqreturn_t native_handle_psl_slice_error(struct cxl_context *ctx, + u64 dsisr, u64 errstat) +{ + u64 fir1, fir2, fir_slice, serr, afu_debug; + + fir1 = cxl_p1_read(ctx->afu->adapter, CXL_PSL_FIR1); + fir2 = cxl_p1_read(ctx->afu->adapter, CXL_PSL_FIR2); + fir_slice = cxl_p1n_read(ctx->afu, CXL_PSL_FIR_SLICE_An); + serr = cxl_p1n_read(ctx->afu, CXL_PSL_SERR_An); + afu_debug = cxl_p1n_read(ctx->afu, CXL_AFU_DEBUG_An); + + dev_crit(&ctx->afu->dev, "PSL ERROR STATUS: 0x%016llx\n", errstat); + dev_crit(&ctx->afu->dev, "PSL_FIR1: 0x%016llx\n", fir1); + dev_crit(&ctx->afu->dev, "PSL_FIR2: 0x%016llx\n", fir2); + dev_crit(&ctx->afu->dev, "PSL_SERR_An: 0x%016llx\n", serr); + dev_crit(&ctx->afu->dev, "PSL_FIR_SLICE_An: 0x%016llx\n", fir_slice); + dev_crit(&ctx->afu->dev, "CXL_PSL_AFU_DEBUG_An: 0x%016llx\n", afu_debug); + + dev_crit(&ctx->afu->dev, "STOPPING CXL TRACE\n"); + cxl_stop_trace(ctx->afu->adapter); + + return cxl_ops->ack_irq(ctx, 0, errstat); +} + +static irqreturn_t fail_psl_irq(struct cxl_afu *afu, struct cxl_irq_info *irq_info) +{ + if (irq_info->dsisr & CXL_PSL_DSISR_TRANS) + cxl_p2n_write(afu, CXL_PSL_TFC_An, CXL_PSL_TFC_An_AE); + else + cxl_p2n_write(afu, CXL_PSL_TFC_An, CXL_PSL_TFC_An_A); + + return IRQ_HANDLED; +} + +static irqreturn_t native_irq_multiplexed(int irq, void *data) +{ + struct cxl_afu *afu = data; + struct cxl_context *ctx; + struct cxl_irq_info irq_info; + int ph = cxl_p2n_read(afu, CXL_PSL_PEHandle_An) & 0xffff; + int ret; + + if ((ret = native_get_irq_info(afu, &irq_info))) { + WARN(1, "Unable to get CXL IRQ Info: %i\n", ret); + return fail_psl_irq(afu, &irq_info); + } + + rcu_read_lock(); + ctx = idr_find(&afu->contexts_idr, ph); + if (ctx) { + ret = cxl_irq(irq, ctx, &irq_info); + rcu_read_unlock(); + return ret; + } + rcu_read_unlock(); + + WARN(1, "Unable to demultiplex CXL PSL IRQ for PE %i DSISR %016llx DAR" + " %016llx\n(Possible AFU HW issue - was a term/remove acked" + " with outstanding transactions?)\n", ph, irq_info.dsisr, + irq_info.dar); + return fail_psl_irq(afu, &irq_info); +} + +static irqreturn_t native_slice_irq_err(int irq, void *data) +{ + struct cxl_afu *afu = data; + u64 fir_slice, errstat, serr, afu_debug; + + WARN(irq, "CXL SLICE ERROR interrupt %i\n", irq); + + serr = cxl_p1n_read(afu, CXL_PSL_SERR_An); + fir_slice = cxl_p1n_read(afu, CXL_PSL_FIR_SLICE_An); + errstat = cxl_p2n_read(afu, CXL_PSL_ErrStat_An); + afu_debug = cxl_p1n_read(afu, CXL_AFU_DEBUG_An); + dev_crit(&afu->dev, "PSL_SERR_An: 0x%016llx\n", serr); + dev_crit(&afu->dev, "PSL_FIR_SLICE_An: 0x%016llx\n", fir_slice); + dev_crit(&afu->dev, "CXL_PSL_ErrStat_An: 0x%016llx\n", errstat); + dev_crit(&afu->dev, "CXL_PSL_AFU_DEBUG_An: 0x%016llx\n", afu_debug); + + cxl_p1n_write(afu, CXL_PSL_SERR_An, serr); + + return IRQ_HANDLED; +} + +static irqreturn_t native_irq_err(int irq, void *data) +{ + struct cxl *adapter = data; + u64 fir1, fir2, err_ivte; + + WARN(1, "CXL ERROR interrupt %i\n", irq); + + err_ivte = cxl_p1_read(adapter, CXL_PSL_ErrIVTE); + dev_crit(&adapter->dev, "PSL_ErrIVTE: 0x%016llx\n", err_ivte); + + dev_crit(&adapter->dev, "STOPPING CXL TRACE\n"); + cxl_stop_trace(adapter); + + fir1 = cxl_p1_read(adapter, CXL_PSL_FIR1); + fir2 = cxl_p1_read(adapter, CXL_PSL_FIR2); + + dev_crit(&adapter->dev, "PSL_FIR1: 0x%016llx\nPSL_FIR2: 0x%016llx\n", fir1, fir2); + + return IRQ_HANDLED; +} + +int cxl_native_register_psl_err_irq(struct cxl *adapter) +{ + int rc; + + adapter->irq_name = kasprintf(GFP_KERNEL, "cxl-%s-err", + dev_name(&adapter->dev)); + if (!adapter->irq_name) + return -ENOMEM; + + if ((rc = cxl_register_one_irq(adapter, native_irq_err, adapter, + &adapter->native->err_hwirq, + &adapter->native->err_virq, + adapter->irq_name))) { + kfree(adapter->irq_name); + adapter->irq_name = NULL; + return rc; + } + + cxl_p1_write(adapter, CXL_PSL_ErrIVTE, adapter->native->err_hwirq & 0xffff); + + return 0; +} + +void cxl_native_release_psl_err_irq(struct cxl *adapter) +{ + if (adapter->native->err_virq != irq_find_mapping(NULL, adapter->native->err_hwirq)) + return; + + cxl_p1_write(adapter, CXL_PSL_ErrIVTE, 0x0000000000000000); + cxl_unmap_irq(adapter->native->err_virq, adapter); + cxl_ops->release_one_irq(adapter, adapter->native->err_hwirq); + kfree(adapter->irq_name); +} + +int cxl_native_register_serr_irq(struct cxl_afu *afu) +{ + u64 serr; + int rc; + + afu->err_irq_name = kasprintf(GFP_KERNEL, "cxl-%s-err", + dev_name(&afu->dev)); + if (!afu->err_irq_name) + return -ENOMEM; + + if ((rc = cxl_register_one_irq(afu->adapter, native_slice_irq_err, afu, + &afu->serr_hwirq, + &afu->serr_virq, afu->err_irq_name))) { + kfree(afu->err_irq_name); + afu->err_irq_name = NULL; + return rc; + } + + serr = cxl_p1n_read(afu, CXL_PSL_SERR_An); + serr = (serr & 0x00ffffffffff0000ULL) | (afu->serr_hwirq & 0xffff); + cxl_p1n_write(afu, CXL_PSL_SERR_An, serr); return 0; } +void cxl_native_release_serr_irq(struct cxl_afu *afu) +{ + if (afu->serr_virq != irq_find_mapping(NULL, afu->serr_hwirq)) + return; + + cxl_p1n_write(afu, CXL_PSL_SERR_An, 0x0000000000000000); + cxl_unmap_irq(afu->serr_virq, afu); + cxl_ops->release_one_irq(afu->adapter, afu->serr_hwirq); + kfree(afu->err_irq_name); +} + +int cxl_native_register_psl_irq(struct cxl_afu *afu) +{ + int rc; + + afu->psl_irq_name = kasprintf(GFP_KERNEL, "cxl-%s", + dev_name(&afu->dev)); + if (!afu->psl_irq_name) + return -ENOMEM; + + if ((rc = cxl_register_one_irq(afu->adapter, native_irq_multiplexed, + afu, &afu->native->psl_hwirq, &afu->native->psl_virq, + afu->psl_irq_name))) { + kfree(afu->psl_irq_name); + afu->psl_irq_name = NULL; + } + return rc; +} + +void cxl_native_release_psl_irq(struct cxl_afu *afu) +{ + if (afu->native->psl_virq != irq_find_mapping(NULL, afu->native->psl_hwirq)) + return; + + cxl_unmap_irq(afu->native->psl_virq, afu); + cxl_ops->release_one_irq(afu->adapter, afu->native->psl_hwirq); + kfree(afu->psl_irq_name); +} + static void recover_psl_err(struct cxl_afu *afu, u64 errstat) { u64 dsisr; @@ -769,7 +951,7 @@ static void recover_psl_err(struct cxl_afu *afu, u64 errstat) cxl_p2n_write(afu, CXL_PSL_ErrStat_An, errstat); } -int cxl_ack_irq(struct cxl_context *ctx, u64 tfc, u64 psl_reset_mask) +static int native_ack_irq(struct cxl_context *ctx, u64 tfc, u64 psl_reset_mask) { trace_cxl_psl_irq_ack(ctx, tfc); if (tfc) @@ -784,3 +966,132 @@ int cxl_check_error(struct cxl_afu *afu) { return (cxl_p1n_read(afu, CXL_PSL_SCNTL_An) == ~0ULL); } + +static bool native_support_attributes(const char *attr_name, + enum cxl_attrs type) +{ + return true; +} + +static int native_afu_cr_read64(struct cxl_afu *afu, int cr, u64 off, u64 *out) +{ + if (unlikely(!cxl_ops->link_ok(afu->adapter, afu))) + return -EIO; + if (unlikely(off >= afu->crs_len)) + return -ERANGE; + *out = in_le64(afu->native->afu_desc_mmio + afu->crs_offset + + (cr * afu->crs_len) + off); + return 0; +} + +static int native_afu_cr_read32(struct cxl_afu *afu, int cr, u64 off, u32 *out) +{ + if (unlikely(!cxl_ops->link_ok(afu->adapter, afu))) + return -EIO; + if (unlikely(off >= afu->crs_len)) + return -ERANGE; + *out = in_le32(afu->native->afu_desc_mmio + afu->crs_offset + + (cr * afu->crs_len) + off); + return 0; +} + +static int native_afu_cr_read16(struct cxl_afu *afu, int cr, u64 off, u16 *out) +{ + u64 aligned_off = off & ~0x3L; + u32 val; + int rc; + + rc = native_afu_cr_read32(afu, cr, aligned_off, &val); + if (!rc) + *out = (val >> ((off & 0x3) * 8)) & 0xffff; + return rc; +} + +static int native_afu_cr_read8(struct cxl_afu *afu, int cr, u64 off, u8 *out) +{ + u64 aligned_off = off & ~0x3L; + u32 val; + int rc; + + rc = native_afu_cr_read32(afu, cr, aligned_off, &val); + if (!rc) + *out = (val >> ((off & 0x3) * 8)) & 0xff; + return rc; +} + +static int native_afu_cr_write32(struct cxl_afu *afu, int cr, u64 off, u32 in) +{ + if (unlikely(!cxl_ops->link_ok(afu->adapter, afu))) + return -EIO; + if (unlikely(off >= afu->crs_len)) + return -ERANGE; + out_le32(afu->native->afu_desc_mmio + afu->crs_offset + + (cr * afu->crs_len) + off, in); + return 0; +} + +static int native_afu_cr_write16(struct cxl_afu *afu, int cr, u64 off, u16 in) +{ + u64 aligned_off = off & ~0x3L; + u32 val32, mask, shift; + int rc; + + rc = native_afu_cr_read32(afu, cr, aligned_off, &val32); + if (rc) + return rc; + shift = (off & 0x3) * 8; + WARN_ON(shift == 24); + mask = 0xffff << shift; + val32 = (val32 & ~mask) | (in << shift); + + rc = native_afu_cr_write32(afu, cr, aligned_off, val32); + return rc; +} + +static int native_afu_cr_write8(struct cxl_afu *afu, int cr, u64 off, u8 in) +{ + u64 aligned_off = off & ~0x3L; + u32 val32, mask, shift; + int rc; + + rc = native_afu_cr_read32(afu, cr, aligned_off, &val32); + if (rc) + return rc; + shift = (off & 0x3) * 8; + mask = 0xff << shift; + val32 = (val32 & ~mask) | (in << shift); + + rc = native_afu_cr_write32(afu, cr, aligned_off, val32); + return rc; +} + +const struct cxl_backend_ops cxl_native_ops = { + .module = THIS_MODULE, + .adapter_reset = cxl_pci_reset, + .alloc_one_irq = cxl_pci_alloc_one_irq, + .release_one_irq = cxl_pci_release_one_irq, + .alloc_irq_ranges = cxl_pci_alloc_irq_ranges, + .release_irq_ranges = cxl_pci_release_irq_ranges, + .setup_irq = cxl_pci_setup_irq, + .handle_psl_slice_error = native_handle_psl_slice_error, + .psl_interrupt = NULL, + .ack_irq = native_ack_irq, + .attach_process = native_attach_process, + .detach_process = native_detach_process, + .support_attributes = native_support_attributes, + .link_ok = cxl_adapter_link_ok, + .release_afu = cxl_pci_release_afu, + .afu_read_err_buffer = cxl_pci_afu_read_err_buffer, + .afu_check_and_enable = native_afu_check_and_enable, + .afu_activate_mode = native_afu_activate_mode, + .afu_deactivate_mode = native_afu_deactivate_mode, + .afu_reset = native_afu_reset, + .afu_cr_read8 = native_afu_cr_read8, + .afu_cr_read16 = native_afu_cr_read16, + .afu_cr_read32 = native_afu_cr_read32, + .afu_cr_read64 = native_afu_cr_read64, + .afu_cr_write8 = native_afu_cr_write8, + .afu_cr_write16 = native_afu_cr_write16, + .afu_cr_write32 = native_afu_cr_write32, + .read_adapter_vpd = cxl_pci_read_adapter_vpd, +}; diff --git a/drivers/misc/cxl/of.c b/drivers/misc/cxl/of.c new file mode 100644 index 000000000000..edc458395f68 --- /dev/null +++ b/drivers/misc/cxl/of.c @@ -0,0 +1,513 @@ +/* + * Copyright 2015 IBM Corp. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include <linux/kernel.h> +#include <linux/module.h> +#include <linux/platform_device.h> +#include <linux/slab.h> +#include <linux/of_address.h> +#include <linux/of_platform.h> + +#include "cxl.h" + + +static const __be32 *read_prop_string(const struct device_node *np, + const char *prop_name) +{ + const __be32 *prop; + + prop = of_get_property(np, prop_name, NULL); + if (cxl_verbose && prop) + pr_info("%s: %s\n", prop_name, (char *) prop); + return prop; +} + +static const __be32 *read_prop_dword(const struct device_node *np, + const char *prop_name, u32 *val) +{ + const __be32 *prop; + + prop = of_get_property(np, prop_name, NULL); + if (prop) + *val = be32_to_cpu(prop[0]); + if (cxl_verbose && prop) + pr_info("%s: %#x (%u)\n", prop_name, *val, *val); + return prop; +} + +static const __be64 *read_prop64_dword(const struct device_node *np, + const char *prop_name, u64 *val) +{ + const __be64 *prop; + + prop = of_get_property(np, prop_name, NULL); + if (prop) + *val = be64_to_cpu(prop[0]); + if (cxl_verbose && prop) + pr_info("%s: %#llx (%llu)\n", prop_name, *val, *val); + return prop; +} + + +static int read_handle(struct device_node *np, u64 *handle) +{ + const __be32 *prop; + u64 size; + + /* Get address and size of the node */ + prop = of_get_address(np, 0, &size, NULL); + if (size) + return -EINVAL; + + /* Helper to read a big number; size is in cells (not bytes) */ + *handle = of_read_number(prop, of_n_addr_cells(np)); + return 0; +} + +static int read_phys_addr(struct device_node *np, char *prop_name, + struct cxl_afu *afu) +{ + int i, len, entry_size, naddr, nsize, type; + u64 addr, size; + const __be32 *prop; + + naddr = of_n_addr_cells(np); + nsize = of_n_size_cells(np); + + prop = of_get_property(np, prop_name, &len); + if (prop) { + entry_size = naddr + nsize; + for (i = 0; i < (len / 4); i += entry_size, prop += entry_size) { + type = be32_to_cpu(prop[0]); + addr = of_read_number(prop, naddr); + size = of_read_number(&prop[naddr], nsize); + switch (type) { + case 0: /* unit address */ + afu->guest->handle = addr; + break; + case 1: /* p2 area */ + afu->guest->p2n_phys += addr; + afu->guest->p2n_size = size; + break; + case 2: /* problem state area */ + afu->psn_phys += addr; + afu->adapter->ps_size = size; + break; + default: + pr_err("Invalid address type %d found in %s property of AFU\n", + type, prop_name); + return -EINVAL; + } + if (cxl_verbose) + pr_info("%s: %#x %#llx (size %#llx)\n", + prop_name, type, addr, size); + } + } + return 0; +} + +static int read_vpd(struct cxl *adapter, struct cxl_afu *afu) +{ + char vpd[256]; + int rc; + size_t len = sizeof(vpd); + + memset(vpd, 0, len); + + if (adapter) + rc = cxl_guest_read_adapter_vpd(adapter, vpd, len); + else + rc = cxl_guest_read_afu_vpd(afu, vpd, len); + + if (rc > 0) { + cxl_dump_debug_buffer(vpd, rc); + rc = 0; + } + return rc; +} + +int cxl_of_read_afu_handle(struct cxl_afu *afu, struct device_node *afu_np) +{ + if (read_handle(afu_np, &afu->guest->handle)) + return -EINVAL; + pr_devel("AFU handle: 0x%.16llx\n", afu->guest->handle); + + return 0; +} + +int cxl_of_read_afu_properties(struct cxl_afu *afu, struct device_node *np) +{ + int i, len, rc; + char *p; + const __be32 *prop; + u16 device_id, vendor_id; + u32 val = 0, class_code; + + /* Properties are read in the same order as listed in PAPR */ + + if (cxl_verbose) { + pr_info("Dump of the 'ibm,coherent-platform-function' node properties:\n"); + + prop = of_get_property(np, "compatible", &len); + i = 0; + while (i < len) { + p = (char *) prop + i; + pr_info("compatible: %s\n", p); + i += strlen(p) + 1; + } + read_prop_string(np, "name"); + } + + rc = read_phys_addr(np, "reg", afu); + if (rc) + return rc; + + rc = read_phys_addr(np, "assigned-addresses", afu); + if (rc) + return rc; + + if (afu->psn_phys == 0) + afu->psa = false; + else + afu->psa = true; + + if (cxl_verbose) { + read_prop_string(np, "ibm,loc-code"); + read_prop_string(np, "device_type"); + } + + read_prop_dword(np, "ibm,#processes", &afu->max_procs_virtualised); + + if (cxl_verbose) { + read_prop_dword(np, "ibm,scratchpad-size", &val); + read_prop_dword(np, "ibm,programmable", &val); + read_prop_string(np, "ibm,phandle"); + read_vpd(NULL, afu); + } + + read_prop_dword(np, "ibm,max-ints-per-process", &afu->guest->max_ints); + afu->irqs_max = afu->guest->max_ints; + + prop = read_prop_dword(np, "ibm,min-ints-per-process", &afu->pp_irqs); + if (prop) { + /* One extra interrupt for the PSL interrupt is already + * included. Remove it now to keep only AFU interrupts and + * match the native case. + */ + afu->pp_irqs--; + } + + if (cxl_verbose) { + read_prop_dword(np, "ibm,max-ints", &val); + read_prop_dword(np, "ibm,vpd-size", &val); + } + + read_prop64_dword(np, "ibm,error-buffer-size", &afu->eb_len); + afu->eb_offset = 0; + + if (cxl_verbose) + read_prop_dword(np, "ibm,config-record-type", &val); + + read_prop64_dword(np, "ibm,config-record-size", &afu->crs_len); + afu->crs_offset = 0; + + read_prop_dword(np, "ibm,#config-records", &afu->crs_num); + + if (cxl_verbose) { + for (i = 0; i < afu->crs_num; i++) { + rc = cxl_ops->afu_cr_read16(afu, i, PCI_DEVICE_ID, + &device_id); + if (!rc) + pr_info("record %d - device-id: %#x\n", + i, device_id); + rc = cxl_ops->afu_cr_read16(afu, i, PCI_VENDOR_ID, + &vendor_id); + if (!rc) + pr_info("record %d - vendor-id: %#x\n", + i, vendor_id); + rc = cxl_ops->afu_cr_read32(afu, i, PCI_CLASS_REVISION, + &class_code); + if (!rc) { + class_code >>= 8; + pr_info("record %d - class-code: %#x\n", + i, class_code); + } + } + + read_prop_dword(np, "ibm,function-number", &val); + read_prop_dword(np, "ibm,privileged-function", &val); + read_prop_dword(np, "vendor-id", &val); + read_prop_dword(np, "device-id", &val); + read_prop_dword(np, "revision-id", &val); + read_prop_dword(np, "class-code", &val); + read_prop_dword(np, "subsystem-vendor-id", &val); + read_prop_dword(np, "subsystem-id", &val); + } + /* + * if "ibm,process-mmio" doesn't exist then per-process mmio is + * not supported + */ + val = 0; + prop = read_prop_dword(np, "ibm,process-mmio", &val); + if (prop && val == 1) + afu->pp_psa = true; + else + afu->pp_psa = false; + + if (cxl_verbose) { + read_prop_dword(np, "ibm,supports-aur", &val); + read_prop_dword(np, "ibm,supports-csrp", &val); + read_prop_dword(np, "ibm,supports-prr", &val); + } + + prop = read_prop_dword(np, "ibm,function-error-interrupt", &val); + if (prop) + afu->serr_hwirq = val; + + pr_devel("AFU handle: %#llx\n", afu->guest->handle); + pr_devel("p2n_phys: %#llx (size %#llx)\n", + afu->guest->p2n_phys, afu->guest->p2n_size); + pr_devel("psn_phys: %#llx (size %#llx)\n", + afu->psn_phys, afu->adapter->ps_size); + pr_devel("Max number of processes virtualised=%i\n", + afu->max_procs_virtualised); + pr_devel("Per-process irqs min=%i, max=%i\n", afu->pp_irqs, + afu->irqs_max); + pr_devel("Slice error interrupt=%#lx\n", afu->serr_hwirq); + + return 0; +} + +static int read_adapter_irq_config(struct cxl *adapter, struct device_node *np) +{ + const __be32 *ranges; + int len, nranges, i; + struct irq_avail *cur; + + ranges = of_get_property(np, "interrupt-ranges", &len); + if (ranges == NULL || len < (2 * sizeof(int))) + return -EINVAL; + + /* + * encoded array of two cells per entry, each cell encoded as + * with encode-int + */ + nranges = len / (2 * sizeof(int)); + if (nranges == 0 || (nranges * 2 * sizeof(int)) != len) + return -EINVAL; + + adapter->guest->irq_avail = kzalloc(nranges * sizeof(struct irq_avail), + GFP_KERNEL); + if (adapter->guest->irq_avail == NULL) + return -ENOMEM; + + adapter->guest->irq_base_offset = be32_to_cpu(ranges[0]); + for (i = 0; i < nranges; i++) { + cur = &adapter->guest->irq_avail[i]; + cur->offset = be32_to_cpu(ranges[i * 2]); + cur->range = be32_to_cpu(ranges[i * 2 + 1]); + cur->bitmap = kcalloc(BITS_TO_LONGS(cur->range), + sizeof(*cur->bitmap), GFP_KERNEL); + if (cur->bitmap == NULL) + goto err; + if (cur->offset < adapter->guest->irq_base_offset) + adapter->guest->irq_base_offset = cur->offset; + if (cxl_verbose) + pr_info("available IRQ range: %#lx-%#lx (%lu)\n", + cur->offset, cur->offset + cur->range - 1, + cur->range); + } + adapter->guest->irq_nranges = nranges; + spin_lock_init(&adapter->guest->irq_alloc_lock); + + return 0; +err: + for (i--; i >= 0; i--) { + cur = &adapter->guest->irq_avail[i]; + kfree(cur->bitmap); + } + kfree(adapter->guest->irq_avail); + adapter->guest->irq_avail = NULL; + return -ENOMEM; +} + +int cxl_of_read_adapter_handle(struct cxl *adapter, struct device_node *np) +{ + if (read_handle(np, &adapter->guest->handle)) + return -EINVAL; + pr_devel("Adapter handle: 0x%.16llx\n", adapter->guest->handle); + + return 0; +} + +int cxl_of_read_adapter_properties(struct cxl *adapter, struct device_node *np) +{ + int rc, len, naddr, i; + char *p; + const __be32 *prop; + u32 val = 0; + + /* Properties are read in the same order as listed in PAPR */ + + naddr = of_n_addr_cells(np); + + if (cxl_verbose) { + pr_info("Dump of the 'ibm,coherent-platform-facility' node properties:\n"); + + read_prop_dword(np, "#address-cells", &val); + read_prop_dword(np, "#size-cells", &val); + + prop = of_get_property(np, "compatible", &len); + i = 0; + while (i < len) { + p = (char *) prop + i; + pr_info("compatible: %s\n", p); + i += strlen(p) + 1; + } + read_prop_string(np, "name"); + read_prop_string(np, "model"); + + prop = of_get_property(np, "reg", NULL); + if (prop) { + pr_info("reg: addr:%#llx size:%#x\n", + of_read_number(prop, naddr), + be32_to_cpu(prop[naddr])); + } + + read_prop_string(np, "ibm,loc-code"); + } + + if ((rc = read_adapter_irq_config(adapter, np))) + return rc; + + if (cxl_verbose) { + read_prop_string(np, "device_type"); + read_prop_string(np, "ibm,phandle"); + } + + prop = read_prop_dword(np, "ibm,caia-version", &val); + if (prop) { + adapter->caia_major = (val & 0xFF00) >> 8; + adapter->caia_minor = val & 0xFF; + } + + prop = read_prop_dword(np, "ibm,psl-revision", &val); + if (prop) + adapter->psl_rev = val; + + prop = read_prop_string(np, "status"); + if (prop) { + adapter->guest->status = kasprintf(GFP_KERNEL, "%s", (char *) prop); + if (adapter->guest->status == NULL) + return -ENOMEM; + } + + prop = read_prop_dword(np, "vendor-id", &val); + if (prop) + adapter->guest->vendor = val; + + prop = read_prop_dword(np, "device-id", &val); + if (prop) + adapter->guest->device = val; + + if (cxl_verbose) { + read_prop_dword(np, "ibm,privileged-facility", &val); + read_prop_dword(np, "revision-id", &val); + read_prop_dword(np, "class-code", &val); + } + + prop = read_prop_dword(np, "subsystem-vendor-id", &val); + if (prop) + adapter->guest->subsystem_vendor = val; + + prop = read_prop_dword(np, "subsystem-id", &val); + if (prop) + adapter->guest->subsystem = val; + + if (cxl_verbose) + read_vpd(adapter, NULL); + + return 0; +} + +static int cxl_of_remove(struct platform_device *pdev) +{ + struct cxl *adapter; + int afu; + + adapter = dev_get_drvdata(&pdev->dev); + for (afu = 0; afu < adapter->slices; afu++) + cxl_guest_remove_afu(adapter->afu[afu]); + + cxl_guest_remove_adapter(adapter); + return 0; +} + +static void cxl_of_shutdown(struct platform_device *pdev) +{ + cxl_of_remove(pdev); +} + +int cxl_of_probe(struct platform_device *pdev) +{ + struct device_node *np = NULL; + struct device_node *afu_np = NULL; + struct cxl *adapter = NULL; + int ret; + int slice, slice_ok; + + pr_devel("in %s\n", __func__); + + np = pdev->dev.of_node; + if (np == NULL) + return -ENODEV; + + /* init adapter */ + adapter = cxl_guest_init_adapter(np, pdev); + if (IS_ERR(adapter)) { + dev_err(&pdev->dev, "guest_init_adapter failed: %li\n", PTR_ERR(adapter)); + return PTR_ERR(adapter); + } + + /* init afu */ + slice_ok = 0; + for (afu_np = NULL, slice = 0; (afu_np = of_get_next_child(np, afu_np)); slice++) { + if ((ret = cxl_guest_init_afu(adapter, slice, afu_np))) + dev_err(&pdev->dev, "AFU %i failed to initialise: %i\n", + slice, ret); + else + slice_ok++; + } + + if (slice_ok == 0) { + dev_info(&pdev->dev, "No active AFU"); + adapter->slices = 0; + } + + if (afu_np) + of_node_put(afu_np); + return 0; +} + +static const struct of_device_id cxl_of_match[] = { + { .compatible = "ibm,coherent-platform-facility",}, + {}, +}; +MODULE_DEVICE_TABLE(of, cxl_of_match); + +struct platform_driver cxl_of_driver = { + .driver = { + .name = "cxl_of", + .of_match_table = cxl_of_match, + .owner = THIS_MODULE + }, + .probe = cxl_of_probe, + .remove = cxl_of_remove, + .shutdown = cxl_of_shutdown, +}; diff --git a/drivers/misc/cxl/pci.c b/drivers/misc/cxl/pci.c index 4c1903f781fc..6634b7a3c42b 100644 --- a/drivers/misc/cxl/pci.c +++ b/drivers/misc/cxl/pci.c @@ -90,8 +90,8 @@ /* This works a little different than the p1/p2 register accesses to make it * easier to pull out individual fields */ -#define AFUD_READ(afu, off) in_be64(afu->afu_desc_mmio + off) -#define AFUD_READ_LE(afu, off) in_le64(afu->afu_desc_mmio + off) +#define AFUD_READ(afu, off) in_be64(afu->native->afu_desc_mmio + off) +#define AFUD_READ_LE(afu, off) in_le64(afu->native->afu_desc_mmio + off) #define EXTRACT_PPC_BIT(val, bit) (!!(val & PPC_BIT(bit))) #define EXTRACT_PPC_BITS(val, bs, be) ((val & PPC_BITMASK(bs, be)) >> PPC_BITLSHIFT(be)) @@ -116,24 +116,6 @@ #define AFUD_EB_LEN(val) EXTRACT_PPC_BITS(val, 8, 63) #define AFUD_READ_EB_OFF(afu) AFUD_READ(afu, 0x48) -u16 cxl_afu_cr_read16(struct cxl_afu *afu, int cr, u64 off) -{ - u64 aligned_off = off & ~0x3L; - u32 val; - - val = cxl_afu_cr_read32(afu, cr, aligned_off); - return (val >> ((off & 0x2) * 8)) & 0xffff; -} - -u8 cxl_afu_cr_read8(struct cxl_afu *afu, int cr, u64 off) -{ - u64 aligned_off = off & ~0x3L; - u32 val; - - val = cxl_afu_cr_read32(afu, cr, aligned_off); - return (val >> ((off & 0x3) * 8)) & 0xff; -} - static const struct pci_device_id cxl_pci_tbl[] = { { PCI_DEVICE(PCI_VENDOR_ID_IBM, 0x0477), }, { PCI_DEVICE(PCI_VENDOR_ID_IBM, 0x044b), }, @@ -433,8 +415,8 @@ static int init_implementation_afu_regs(struct cxl_afu *afu) return 0; } -int cxl_setup_irq(struct cxl *adapter, unsigned int hwirq, - unsigned int virq) +int cxl_pci_setup_irq(struct cxl *adapter, unsigned int hwirq, + unsigned int virq) { struct pci_dev *dev = to_pci_dev(adapter->dev.parent); @@ -476,28 +458,30 @@ int cxl_update_image_control(struct cxl *adapter) return 0; } -int cxl_alloc_one_irq(struct cxl *adapter) +int cxl_pci_alloc_one_irq(struct cxl *adapter) { struct pci_dev *dev = to_pci_dev(adapter->dev.parent); return pnv_cxl_alloc_hwirqs(dev, 1); } -void cxl_release_one_irq(struct cxl *adapter, int hwirq) +void cxl_pci_release_one_irq(struct cxl *adapter, int hwirq) { struct pci_dev *dev = to_pci_dev(adapter->dev.parent); return pnv_cxl_release_hwirqs(dev, hwirq, 1); } -int cxl_alloc_irq_ranges(struct cxl_irq_ranges *irqs, struct cxl *adapter, unsigned int num) +int cxl_pci_alloc_irq_ranges(struct cxl_irq_ranges *irqs, + struct cxl *adapter, unsigned int num) { struct pci_dev *dev = to_pci_dev(adapter->dev.parent); return pnv_cxl_alloc_hwirq_ranges(irqs, dev, num); } -void cxl_release_irq_ranges(struct cxl_irq_ranges *irqs, struct cxl *adapter) +void cxl_pci_release_irq_ranges(struct cxl_irq_ranges *irqs, + struct cxl *adapter) { struct pci_dev *dev = to_pci_dev(adapter->dev.parent); @@ -558,7 +542,7 @@ static int switch_card_to_cxl(struct pci_dev *dev) return 0; } -static int cxl_map_slice_regs(struct cxl_afu *afu, struct cxl *adapter, struct pci_dev *dev) +static int pci_map_slice_regs(struct cxl_afu *afu, struct cxl *adapter, struct pci_dev *dev) { u64 p1n_base, p2n_base, afu_desc; const u64 p1n_size = 0x100; @@ -566,15 +550,15 @@ static int cxl_map_slice_regs(struct cxl_afu *afu, struct cxl *adapter, struct p p1n_base = p1_base(dev) + 0x10000 + (afu->slice * p1n_size); p2n_base = p2_base(dev) + (afu->slice * p2n_size); - afu->psn_phys = p2_base(dev) + (adapter->ps_off + (afu->slice * adapter->ps_size)); - afu_desc = p2_base(dev) + adapter->afu_desc_off + (afu->slice * adapter->afu_desc_size); + afu->psn_phys = p2_base(dev) + (adapter->native->ps_off + (afu->slice * adapter->ps_size)); + afu_desc = p2_base(dev) + adapter->native->afu_desc_off + (afu->slice * adapter->native->afu_desc_size); - if (!(afu->p1n_mmio = ioremap(p1n_base, p1n_size))) + if (!(afu->native->p1n_mmio = ioremap(p1n_base, p1n_size))) goto err; if (!(afu->p2n_mmio = ioremap(p2n_base, p2n_size))) goto err1; if (afu_desc) { - if (!(afu->afu_desc_mmio = ioremap(afu_desc, adapter->afu_desc_size))) + if (!(afu->native->afu_desc_mmio = ioremap(afu_desc, adapter->native->afu_desc_size))) goto err2; } @@ -582,62 +566,41 @@ static int cxl_map_slice_regs(struct cxl_afu *afu, struct cxl *adapter, struct p err2: iounmap(afu->p2n_mmio); err1: - iounmap(afu->p1n_mmio); + iounmap(afu->native->p1n_mmio); err: dev_err(&afu->dev, "Error mapping AFU MMIO regions\n"); return -ENOMEM; } -static void cxl_unmap_slice_regs(struct cxl_afu *afu) +static void pci_unmap_slice_regs(struct cxl_afu *afu) { if (afu->p2n_mmio) { iounmap(afu->p2n_mmio); afu->p2n_mmio = NULL; } - if (afu->p1n_mmio) { - iounmap(afu->p1n_mmio); - afu->p1n_mmio = NULL; + if (afu->native->p1n_mmio) { + iounmap(afu->native->p1n_mmio); + afu->native->p1n_mmio = NULL; } - if (afu->afu_desc_mmio) { - iounmap(afu->afu_desc_mmio); - afu->afu_desc_mmio = NULL; + if (afu->native->afu_desc_mmio) { + iounmap(afu->native->afu_desc_mmio); + afu->native->afu_desc_mmio = NULL; } } -static void cxl_release_afu(struct device *dev) +void cxl_pci_release_afu(struct device *dev) { struct cxl_afu *afu = to_cxl_afu(dev); - pr_devel("cxl_release_afu\n"); + pr_devel("%s\n", __func__); idr_destroy(&afu->contexts_idr); cxl_release_spa(afu); + kfree(afu->native); kfree(afu); } -static struct cxl_afu *cxl_alloc_afu(struct cxl *adapter, int slice) -{ - struct cxl_afu *afu; - - if (!(afu = kzalloc(sizeof(struct cxl_afu), GFP_KERNEL))) - return NULL; - - afu->adapter = adapter; - afu->dev.parent = &adapter->dev; - afu->dev.release = cxl_release_afu; - afu->slice = slice; - idr_init(&afu->contexts_idr); - mutex_init(&afu->contexts_lock); - spin_lock_init(&afu->afu_cntl_lock); - mutex_init(&afu->spa_mutex); - - afu->prefault_mode = CXL_PREFAULT_NONE; - afu->irqs_max = afu->adapter->user_irqs; - - return afu; -} - /* Expects AFU struct to have recently been zeroed out */ static int cxl_read_afu_descriptor(struct cxl_afu *afu) { @@ -659,7 +622,7 @@ static int cxl_read_afu_descriptor(struct cxl_afu *afu) afu->pp_size = AFUD_PPPSA_LEN(val) * 4096; afu->psa = AFUD_PPPSA_PSA(val); if ((afu->pp_psa = AFUD_PPPSA_PP(val))) - afu->pp_offset = AFUD_READ_PPPSA_OFF(afu); + afu->native->pp_offset = AFUD_READ_PPPSA_OFF(afu); val = AFUD_READ_CR(afu); afu->crs_len = AFUD_CR_LEN(val) * 256; @@ -686,10 +649,11 @@ static int cxl_read_afu_descriptor(struct cxl_afu *afu) static int cxl_afu_descriptor_looks_ok(struct cxl_afu *afu) { - int i; + int i, rc; + u32 val; if (afu->psa && afu->adapter->ps_size < - (afu->pp_offset + afu->pp_size*afu->max_procs_virtualised)) { + (afu->native->pp_offset + afu->pp_size*afu->max_procs_virtualised)) { dev_err(&afu->dev, "per-process PSA can't fit inside the PSA!\n"); return -ENODEV; } @@ -698,7 +662,8 @@ static int cxl_afu_descriptor_looks_ok(struct cxl_afu *afu) dev_warn(&afu->dev, "AFU uses < PAGE_SIZE per-process PSA!"); for (i = 0; i < afu->crs_num; i++) { - if ((cxl_afu_cr_read32(afu, i, 0) == 0)) { + rc = cxl_ops->afu_cr_read32(afu, i, 0, &val); + if (rc || val == 0) { dev_err(&afu->dev, "ABORTING: AFU configuration record %i is invalid\n", i); return -EINVAL; } @@ -719,7 +684,7 @@ static int sanitise_afu_regs(struct cxl_afu *afu) reg = cxl_p2n_read(afu, CXL_AFU_Cntl_An); if ((reg & CXL_AFU_Cntl_An_ES_MASK) != CXL_AFU_Cntl_An_ES_Disabled) { dev_warn(&afu->dev, "WARNING: AFU was not disabled: %#016llx\n", reg); - if (__cxl_afu_reset(afu)) + if (cxl_ops->afu_reset(afu)) return -EIO; if (cxl_afu_disable(afu)) return -EIO; @@ -767,13 +732,13 @@ static int sanitise_afu_regs(struct cxl_afu *afu) * 4/8 bytes aligned access. So in case the requested offset/count arent 8 byte * aligned the function uses a bounce buffer which can be max PAGE_SIZE. */ -ssize_t cxl_afu_read_err_buffer(struct cxl_afu *afu, char *buf, +ssize_t cxl_pci_afu_read_err_buffer(struct cxl_afu *afu, char *buf, loff_t off, size_t count) { loff_t aligned_start, aligned_end; size_t aligned_length; void *tbuf; - const void __iomem *ebuf = afu->afu_desc_mmio + afu->eb_offset; + const void __iomem *ebuf = afu->native->afu_desc_mmio + afu->eb_offset; if (count == 0 || off < 0 || (size_t)off >= afu->eb_len) return 0; @@ -804,18 +769,18 @@ ssize_t cxl_afu_read_err_buffer(struct cxl_afu *afu, char *buf, return count; } -static int cxl_configure_afu(struct cxl_afu *afu, struct cxl *adapter, struct pci_dev *dev) +static int pci_configure_afu(struct cxl_afu *afu, struct cxl *adapter, struct pci_dev *dev) { int rc; - if ((rc = cxl_map_slice_regs(afu, adapter, dev))) + if ((rc = pci_map_slice_regs(afu, adapter, dev))) return rc; if ((rc = sanitise_afu_regs(afu))) goto err1; /* We need to reset the AFU before we can read the AFU descriptor */ - if ((rc = __cxl_afu_reset(afu))) + if ((rc = cxl_ops->afu_reset(afu))) goto err1; if (cxl_verbose) @@ -830,44 +795,50 @@ static int cxl_configure_afu(struct cxl_afu *afu, struct cxl *adapter, struct pc if ((rc = init_implementation_afu_regs(afu))) goto err1; - if ((rc = cxl_register_serr_irq(afu))) + if ((rc = cxl_native_register_serr_irq(afu))) goto err1; - if ((rc = cxl_register_psl_irq(afu))) + if ((rc = cxl_native_register_psl_irq(afu))) goto err2; return 0; err2: - cxl_release_serr_irq(afu); + cxl_native_release_serr_irq(afu); err1: - cxl_unmap_slice_regs(afu); + pci_unmap_slice_regs(afu); return rc; } -static void cxl_deconfigure_afu(struct cxl_afu *afu) +static void pci_deconfigure_afu(struct cxl_afu *afu) { - cxl_release_psl_irq(afu); - cxl_release_serr_irq(afu); - cxl_unmap_slice_regs(afu); + cxl_native_release_psl_irq(afu); + cxl_native_release_serr_irq(afu); + pci_unmap_slice_regs(afu); } -static int cxl_init_afu(struct cxl *adapter, int slice, struct pci_dev *dev) +static int pci_init_afu(struct cxl *adapter, int slice, struct pci_dev *dev) { struct cxl_afu *afu; - int rc; + int rc = -ENOMEM; afu = cxl_alloc_afu(adapter, slice); if (!afu) return -ENOMEM; + afu->native = kzalloc(sizeof(struct cxl_afu_native), GFP_KERNEL); + if (!afu->native) + goto err_free_afu; + + mutex_init(&afu->native->spa_mutex); + rc = dev_set_name(&afu->dev, "afu%i.%i", adapter->adapter_num, slice); if (rc) - goto err_free; + goto err_free_native; - rc = cxl_configure_afu(afu, adapter, dev); + rc = pci_configure_afu(afu, adapter, dev); if (rc) - goto err_free; + goto err_free_native; /* Don't care if this fails */ cxl_debugfs_afu_add(afu); @@ -890,24 +861,27 @@ static int cxl_init_afu(struct cxl *adapter, int slice, struct pci_dev *dev) return 0; err_put1: - cxl_deconfigure_afu(afu); + pci_deconfigure_afu(afu); cxl_debugfs_afu_remove(afu); device_unregister(&afu->dev); return rc; -err_free: +err_free_native: + kfree(afu->native); +err_free_afu: kfree(afu); return rc; } -static void cxl_remove_afu(struct cxl_afu *afu) +static void cxl_pci_remove_afu(struct cxl_afu *afu) { - pr_devel("cxl_remove_afu\n"); + pr_devel("%s\n", __func__); if (!afu) return; + cxl_pci_vphb_remove(afu); cxl_sysfs_afu_remove(afu); cxl_debugfs_afu_remove(afu); @@ -916,13 +890,13 @@ static void cxl_remove_afu(struct cxl_afu *afu) spin_unlock(&afu->adapter->afu_list_lock); cxl_context_detach_all(afu); - cxl_afu_deactivate_mode(afu); + cxl_ops->afu_deactivate_mode(afu, afu->current_mode); - cxl_deconfigure_afu(afu); + pci_deconfigure_afu(afu); device_unregister(&afu->dev); } -int cxl_reset(struct cxl *adapter) +int cxl_pci_reset(struct cxl *adapter) { struct pci_dev *dev = to_pci_dev(adapter->dev.parent); int rc; @@ -956,17 +930,17 @@ static int cxl_map_adapter_regs(struct cxl *adapter, struct pci_dev *dev) pr_devel("cxl_map_adapter_regs: p1: %#016llx %#llx, p2: %#016llx %#llx", p1_base(dev), p1_size(dev), p2_base(dev), p2_size(dev)); - if (!(adapter->p1_mmio = ioremap(p1_base(dev), p1_size(dev)))) + if (!(adapter->native->p1_mmio = ioremap(p1_base(dev), p1_size(dev)))) goto err3; - if (!(adapter->p2_mmio = ioremap(p2_base(dev), p2_size(dev)))) + if (!(adapter->native->p2_mmio = ioremap(p2_base(dev), p2_size(dev)))) goto err4; return 0; err4: - iounmap(adapter->p1_mmio); - adapter->p1_mmio = NULL; + iounmap(adapter->native->p1_mmio); + adapter->native->p1_mmio = NULL; err3: pci_release_region(dev, 0); err2: @@ -977,14 +951,14 @@ err1: static void cxl_unmap_adapter_regs(struct cxl *adapter) { - if (adapter->p1_mmio) { - iounmap(adapter->p1_mmio); - adapter->p1_mmio = NULL; + if (adapter->native->p1_mmio) { + iounmap(adapter->native->p1_mmio); + adapter->native->p1_mmio = NULL; pci_release_region(to_pci_dev(adapter->dev.parent), 2); } - if (adapter->p2_mmio) { - iounmap(adapter->p2_mmio); - adapter->p2_mmio = NULL; + if (adapter->native->p2_mmio) { + iounmap(adapter->native->p2_mmio); + adapter->native->p2_mmio = NULL; pci_release_region(to_pci_dev(adapter->dev.parent), 0); } } @@ -1025,10 +999,10 @@ static int cxl_read_vsec(struct cxl *adapter, struct pci_dev *dev) /* Convert everything to bytes, because there is NO WAY I'd look at the * code a month later and forget what units these are in ;-) */ - adapter->ps_off = ps_off * 64 * 1024; + adapter->native->ps_off = ps_off * 64 * 1024; adapter->ps_size = ps_size * 64 * 1024; - adapter->afu_desc_off = afu_desc_off * 64 * 1024; - adapter->afu_desc_size = afu_desc_size *64 * 1024; + adapter->native->afu_desc_off = afu_desc_off * 64 * 1024; + adapter->native->afu_desc_size = afu_desc_size * 64 * 1024; /* Total IRQs - 1 PSL ERROR - #AFU*(1 slice error + 1 DSI) */ adapter->user_irqs = pnv_cxl_get_irq_count(dev) - 1 - 2*adapter->slices; @@ -1079,21 +1053,26 @@ static int cxl_vsec_looks_ok(struct cxl *adapter, struct pci_dev *dev) return -EINVAL; } - if (!adapter->afu_desc_off || !adapter->afu_desc_size) { + if (!adapter->native->afu_desc_off || !adapter->native->afu_desc_size) { dev_err(&dev->dev, "ABORTING: VSEC shows no AFU descriptors\n"); return -EINVAL; } - if (adapter->ps_size > p2_size(dev) - adapter->ps_off) { + if (adapter->ps_size > p2_size(dev) - adapter->native->ps_off) { dev_err(&dev->dev, "ABORTING: Problem state size larger than " "available in BAR2: 0x%llx > 0x%llx\n", - adapter->ps_size, p2_size(dev) - adapter->ps_off); + adapter->ps_size, p2_size(dev) - adapter->native->ps_off); return -EINVAL; } return 0; } +ssize_t cxl_pci_read_adapter_vpd(struct cxl *adapter, void *buf, size_t len) +{ + return pci_read_vpd(to_pci_dev(adapter->dev.parent), 0, len, buf); +} + static void cxl_release_adapter(struct device *dev) { struct cxl *adapter = to_cxl_adapter(dev); @@ -1102,33 +1081,10 @@ static void cxl_release_adapter(struct device *dev) cxl_remove_adapter_nr(adapter); + kfree(adapter->native); kfree(adapter); } -static struct cxl *cxl_alloc_adapter(void) -{ - struct cxl *adapter; - - if (!(adapter = kzalloc(sizeof(struct cxl), GFP_KERNEL))) - return NULL; - - spin_lock_init(&adapter->afu_list_lock); - - if (cxl_alloc_adapter_nr(adapter)) - goto err1; - - if (dev_set_name(&adapter->dev, "card%i", adapter->adapter_num)) - goto err2; - - return adapter; - -err2: - cxl_remove_adapter_nr(adapter); -err1: - kfree(adapter); - return NULL; -} - #define CXL_PSL_ErrIVTE_tberror (0x1ull << (63-31)) static int sanitise_adapter_regs(struct cxl *adapter) @@ -1192,7 +1148,7 @@ static int cxl_configure_adapter(struct cxl *adapter, struct pci_dev *dev) if ((rc = cxl_setup_psl_timebase(adapter, dev))) goto err; - if ((rc = cxl_register_psl_err_irq(adapter))) + if ((rc = cxl_native_register_psl_err_irq(adapter))) goto err; return 0; @@ -1207,13 +1163,13 @@ static void cxl_deconfigure_adapter(struct cxl *adapter) { struct pci_dev *pdev = to_pci_dev(adapter->dev.parent); - cxl_release_psl_err_irq(adapter); + cxl_native_release_psl_err_irq(adapter); cxl_unmap_adapter_regs(adapter); pci_disable_device(pdev); } -static struct cxl *cxl_init_adapter(struct pci_dev *dev) +static struct cxl *cxl_pci_init_adapter(struct pci_dev *dev) { struct cxl *adapter; int rc; @@ -1222,6 +1178,12 @@ static struct cxl *cxl_init_adapter(struct pci_dev *dev) if (!adapter) return ERR_PTR(-ENOMEM); + adapter->native = kzalloc(sizeof(struct cxl_native), GFP_KERNEL); + if (!adapter->native) { + rc = -ENOMEM; + goto err_release; + } + /* Set defaults for parameters which need to persist over * configure/reconfigure */ @@ -1231,8 +1193,7 @@ static struct cxl *cxl_init_adapter(struct pci_dev *dev) rc = cxl_configure_adapter(adapter, dev); if (rc) { pci_disable_device(dev); - cxl_release_adapter(&adapter->dev); - return ERR_PTR(rc); + goto err_release; } /* Don't care if this one fails: */ @@ -1258,9 +1219,13 @@ err_put1: cxl_deconfigure_adapter(adapter); device_unregister(&adapter->dev); return ERR_PTR(rc); + +err_release: + cxl_release_adapter(&adapter->dev); + return ERR_PTR(rc); } -static void cxl_remove_adapter(struct cxl *adapter) +static void cxl_pci_remove_adapter(struct cxl *adapter) { pr_devel("cxl_remove_adapter\n"); @@ -1278,17 +1243,22 @@ static int cxl_probe(struct pci_dev *dev, const struct pci_device_id *id) int slice; int rc; + if (cxl_pci_is_vphb_device(dev)) { + dev_dbg(&dev->dev, "cxl_init_adapter: Ignoring cxl vphb device\n"); + return -ENODEV; + } + if (cxl_verbose) dump_cxl_config_space(dev); - adapter = cxl_init_adapter(dev); + adapter = cxl_pci_init_adapter(dev); if (IS_ERR(adapter)) { dev_err(&dev->dev, "cxl_init_adapter failed: %li\n", PTR_ERR(adapter)); return PTR_ERR(adapter); } for (slice = 0; slice < adapter->slices; slice++) { - if ((rc = cxl_init_afu(adapter, slice, dev))) { + if ((rc = pci_init_afu(adapter, slice, dev))) { dev_err(&dev->dev, "AFU %i failed to initialise: %i\n", slice, rc); continue; } @@ -1313,10 +1283,9 @@ static void cxl_remove(struct pci_dev *dev) */ for (i = 0; i < adapter->slices; i++) { afu = adapter->afu[i]; - cxl_pci_vphb_remove(afu); - cxl_remove_afu(afu); + cxl_pci_remove_afu(afu); } - cxl_remove_adapter(adapter); + cxl_pci_remove_adapter(adapter); } static pci_ers_result_t cxl_vphb_error_detected(struct cxl_afu *afu, @@ -1462,8 +1431,8 @@ static pci_ers_result_t cxl_pci_error_detected(struct pci_dev *pdev, return result; cxl_context_detach_all(afu); - cxl_afu_deactivate_mode(afu); - cxl_deconfigure_afu(afu); + cxl_ops->afu_deactivate_mode(afu, afu->current_mode); + pci_deconfigure_afu(afu); } cxl_deconfigure_adapter(adapter); @@ -1486,14 +1455,12 @@ static pci_ers_result_t cxl_pci_slot_reset(struct pci_dev *pdev) for (i = 0; i < adapter->slices; i++) { afu = adapter->afu[i]; - if (cxl_configure_afu(afu, adapter, pdev)) + if (pci_configure_afu(afu, adapter, pdev)) goto err; if (cxl_afu_select_best_mode(afu)) goto err; - cxl_pci_vphb_reconfigure(afu); - list_for_each_entry(afu_dev, &afu->phb->bus->devices, bus_list) { /* Reset the device context. * TODO: make this less disruptive @@ -1509,7 +1476,7 @@ static pci_ers_result_t cxl_pci_slot_reset(struct pci_dev *pdev) afu_dev->dev.archdata.cxl_ctx = ctx; - if (cxl_afu_check_and_enable(afu)) + if (cxl_ops->afu_check_and_enable(afu)) goto err; afu_dev->error_state = pci_channel_io_normal; diff --git a/drivers/misc/cxl/sysfs.c b/drivers/misc/cxl/sysfs.c index 02006f7109a8..907c99b9f5af 100644 --- a/drivers/misc/cxl/sysfs.c +++ b/drivers/misc/cxl/sysfs.c @@ -69,7 +69,7 @@ static ssize_t reset_adapter_store(struct device *device, if ((rc != 1) || (val != 1)) return -EINVAL; - if ((rc = cxl_reset(adapter))) + if ((rc = cxl_ops->adapter_reset(adapter))) return rc; return count; } @@ -165,7 +165,7 @@ static ssize_t pp_mmio_off_show(struct device *device, { struct cxl_afu *afu = to_afu_chardev_m(device); - return scnprintf(buf, PAGE_SIZE, "%llu\n", afu->pp_offset); + return scnprintf(buf, PAGE_SIZE, "%llu\n", afu->native->pp_offset); } static ssize_t pp_mmio_len_show(struct device *device, @@ -211,7 +211,7 @@ static ssize_t reset_store_afu(struct device *device, goto err; } - if ((rc = __cxl_afu_reset(afu))) + if ((rc = cxl_ops->afu_reset(afu))) goto err; rc = count; @@ -253,8 +253,14 @@ static ssize_t irqs_max_store(struct device *device, if (irqs_max < afu->pp_irqs) return -EINVAL; - if (irqs_max > afu->adapter->user_irqs) - return -EINVAL; + if (cpu_has_feature(CPU_FTR_HVMODE)) { + if (irqs_max > afu->adapter->user_irqs) + return -EINVAL; + } else { + /* pHyp sets a per-AFU limit */ + if (irqs_max > afu->guest->max_ints) + return -EINVAL; + } afu->irqs_max = irqs_max; return count; @@ -348,7 +354,7 @@ static ssize_t mode_store(struct device *device, struct device_attribute *attr, } /* - * cxl_afu_deactivate_mode needs to be done outside the lock, prevent + * afu_deactivate_mode needs to be done outside the lock, prevent * other contexts coming in before we are ready: */ old_mode = afu->current_mode; @@ -357,9 +363,9 @@ static ssize_t mode_store(struct device *device, struct device_attribute *attr, mutex_unlock(&afu->contexts_lock); - if ((rc = _cxl_afu_deactivate_mode(afu, old_mode))) + if ((rc = cxl_ops->afu_deactivate_mode(afu, old_mode))) return rc; - if ((rc = cxl_afu_activate_mode(afu, mode))) + if ((rc = cxl_ops->afu_activate_mode(afu, mode))) return rc; return count; @@ -389,7 +395,7 @@ static ssize_t afu_eb_read(struct file *filp, struct kobject *kobj, struct cxl_afu *afu = to_cxl_afu(container_of(kobj, struct device, kobj)); - return cxl_afu_read_err_buffer(afu, buf, off, count); + return cxl_ops->afu_read_err_buffer(afu, buf, off, count); } static struct device_attribute afu_attrs[] = { @@ -406,24 +412,39 @@ static struct device_attribute afu_attrs[] = { int cxl_sysfs_adapter_add(struct cxl *adapter) { + struct device_attribute *dev_attr; int i, rc; for (i = 0; i < ARRAY_SIZE(adapter_attrs); i++) { - if ((rc = device_create_file(&adapter->dev, &adapter_attrs[i]))) - goto err; + dev_attr = &adapter_attrs[i]; + if (cxl_ops->support_attributes(dev_attr->attr.name, + CXL_ADAPTER_ATTRS)) { + if ((rc = device_create_file(&adapter->dev, dev_attr))) + goto err; + } } return 0; err: - for (i--; i >= 0; i--) - device_remove_file(&adapter->dev, &adapter_attrs[i]); + for (i--; i >= 0; i--) { + dev_attr = &adapter_attrs[i]; + if (cxl_ops->support_attributes(dev_attr->attr.name, + CXL_ADAPTER_ATTRS)) + device_remove_file(&adapter->dev, dev_attr); + } return rc; } + void cxl_sysfs_adapter_remove(struct cxl *adapter) { + struct device_attribute *dev_attr; int i; - for (i = 0; i < ARRAY_SIZE(adapter_attrs); i++) - device_remove_file(&adapter->dev, &adapter_attrs[i]); + for (i = 0; i < ARRAY_SIZE(adapter_attrs); i++) { + dev_attr = &adapter_attrs[i]; + if (cxl_ops->support_attributes(dev_attr->attr.name, + CXL_ADAPTER_ATTRS)) + device_remove_file(&adapter->dev, dev_attr); + } } struct afu_config_record { @@ -469,10 +490,12 @@ static ssize_t afu_read_config(struct file *filp, struct kobject *kobj, struct afu_config_record *cr = to_cr(kobj); struct cxl_afu *afu = to_cxl_afu(container_of(kobj->parent, struct device, kobj)); - u64 i, j, val; + u64 i, j, val, rc; for (i = 0; i < count;) { - val = cxl_afu_cr_read64(afu, cr->cr, off & ~0x7); + rc = cxl_ops->afu_cr_read64(afu, cr->cr, off & ~0x7, &val); + if (rc) + val = ~0ULL; for (j = off & 0x7; j < 8 && i < count; i++, j++, off++) buf[i] = (val >> (j * 8)) & 0xff; } @@ -517,14 +540,22 @@ static struct afu_config_record *cxl_sysfs_afu_new_cr(struct cxl_afu *afu, int c return ERR_PTR(-ENOMEM); cr->cr = cr_idx; - cr->device = cxl_afu_cr_read16(afu, cr_idx, PCI_DEVICE_ID); - cr->vendor = cxl_afu_cr_read16(afu, cr_idx, PCI_VENDOR_ID); - cr->class = cxl_afu_cr_read32(afu, cr_idx, PCI_CLASS_REVISION) >> 8; + + rc = cxl_ops->afu_cr_read16(afu, cr_idx, PCI_DEVICE_ID, &cr->device); + if (rc) + goto err; + rc = cxl_ops->afu_cr_read16(afu, cr_idx, PCI_VENDOR_ID, &cr->vendor); + if (rc) + goto err; + rc = cxl_ops->afu_cr_read32(afu, cr_idx, PCI_CLASS_REVISION, &cr->class); + if (rc) + goto err; + cr->class >>= 8; /* * Export raw AFU PCIe like config record. For now this is read only by * root - we can expand that later to be readable by non-root and maybe - * even writable provided we have a good use-case. Once we suport + * even writable provided we have a good use-case. Once we support * exposing AFUs through a virtual PHB they will get that for free from * Linux' PCI infrastructure, but until then it's not clear that we * need it for anything since the main use case is just identifying @@ -562,6 +593,7 @@ err: void cxl_sysfs_afu_remove(struct cxl_afu *afu) { + struct device_attribute *dev_attr; struct afu_config_record *cr, *tmp; int i; @@ -569,8 +601,12 @@ void cxl_sysfs_afu_remove(struct cxl_afu *afu) if (afu->eb_len) device_remove_bin_file(&afu->dev, &afu->attr_eb); - for (i = 0; i < ARRAY_SIZE(afu_attrs); i++) - device_remove_file(&afu->dev, &afu_attrs[i]); + for (i = 0; i < ARRAY_SIZE(afu_attrs); i++) { + dev_attr = &afu_attrs[i]; + if (cxl_ops->support_attributes(dev_attr->attr.name, + CXL_AFU_ATTRS)) + device_remove_file(&afu->dev, &afu_attrs[i]); + } list_for_each_entry_safe(cr, tmp, &afu->crs, list) { sysfs_remove_bin_file(&cr->kobj, &cr->config_attr); @@ -580,14 +616,19 @@ void cxl_sysfs_afu_remove(struct cxl_afu *afu) int cxl_sysfs_afu_add(struct cxl_afu *afu) { + struct device_attribute *dev_attr; struct afu_config_record *cr; int i, rc; INIT_LIST_HEAD(&afu->crs); for (i = 0; i < ARRAY_SIZE(afu_attrs); i++) { - if ((rc = device_create_file(&afu->dev, &afu_attrs[i]))) - goto err; + dev_attr = &afu_attrs[i]; + if (cxl_ops->support_attributes(dev_attr->attr.name, + CXL_AFU_ATTRS)) { + if ((rc = device_create_file(&afu->dev, &afu_attrs[i]))) + goto err; + } } /* conditionally create the add the binary file for error info buffer */ @@ -626,32 +667,50 @@ err: /* reset the eb_len as we havent created the bin attr */ afu->eb_len = 0; - for (i--; i >= 0; i--) + for (i--; i >= 0; i--) { + dev_attr = &afu_attrs[i]; + if (cxl_ops->support_attributes(dev_attr->attr.name, + CXL_AFU_ATTRS)) device_remove_file(&afu->dev, &afu_attrs[i]); + } return rc; } int cxl_sysfs_afu_m_add(struct cxl_afu *afu) { + struct device_attribute *dev_attr; int i, rc; for (i = 0; i < ARRAY_SIZE(afu_master_attrs); i++) { - if ((rc = device_create_file(afu->chardev_m, &afu_master_attrs[i]))) - goto err; + dev_attr = &afu_master_attrs[i]; + if (cxl_ops->support_attributes(dev_attr->attr.name, + CXL_AFU_MASTER_ATTRS)) { + if ((rc = device_create_file(afu->chardev_m, &afu_master_attrs[i]))) + goto err; + } } return 0; err: - for (i--; i >= 0; i--) - device_remove_file(afu->chardev_m, &afu_master_attrs[i]); + for (i--; i >= 0; i--) { + dev_attr = &afu_master_attrs[i]; + if (cxl_ops->support_attributes(dev_attr->attr.name, + CXL_AFU_MASTER_ATTRS)) + device_remove_file(afu->chardev_m, &afu_master_attrs[i]); + } return rc; } void cxl_sysfs_afu_m_remove(struct cxl_afu *afu) { + struct device_attribute *dev_attr; int i; - for (i = 0; i < ARRAY_SIZE(afu_master_attrs); i++) - device_remove_file(afu->chardev_m, &afu_master_attrs[i]); + for (i = 0; i < ARRAY_SIZE(afu_master_attrs); i++) { + dev_attr = &afu_master_attrs[i]; + if (cxl_ops->support_attributes(dev_attr->attr.name, + CXL_AFU_MASTER_ATTRS)) + device_remove_file(afu->chardev_m, &afu_master_attrs[i]); + } } diff --git a/drivers/misc/cxl/trace.h b/drivers/misc/cxl/trace.h index 6e1e2adfba8e..751d6119683e 100644 --- a/drivers/misc/cxl/trace.h +++ b/drivers/misc/cxl/trace.h @@ -450,6 +450,199 @@ DEFINE_EVENT(cxl_pe_class, cxl_slbia, TP_ARGS(ctx) ); +TRACE_EVENT(cxl_hcall, + TP_PROTO(u64 unit_address, u64 process_token, long rc), + + TP_ARGS(unit_address, process_token, rc), + + TP_STRUCT__entry( + __field(u64, unit_address) + __field(u64, process_token) + __field(long, rc) + ), + + TP_fast_assign( + __entry->unit_address = unit_address; + __entry->process_token = process_token; + __entry->rc = rc; + ), + + TP_printk("unit_address=0x%016llx process_token=0x%016llx rc=%li", + __entry->unit_address, + __entry->process_token, + __entry->rc + ) +); + +TRACE_EVENT(cxl_hcall_control, + TP_PROTO(u64 unit_address, char *fct, u64 p1, u64 p2, u64 p3, + u64 p4, unsigned long r4, long rc), + + TP_ARGS(unit_address, fct, p1, p2, p3, p4, r4, rc), + + TP_STRUCT__entry( + __field(u64, unit_address) + __field(char *, fct) + __field(u64, p1) + __field(u64, p2) + __field(u64, p3) + __field(u64, p4) + __field(unsigned long, r4) + __field(long, rc) + ), + + TP_fast_assign( + __entry->unit_address = unit_address; + __entry->fct = fct; + __entry->p1 = p1; + __entry->p2 = p2; + __entry->p3 = p3; + __entry->p4 = p4; + __entry->r4 = r4; + __entry->rc = rc; + ), + + TP_printk("unit_address=%#.16llx %s(%#llx, %#llx, %#llx, %#llx, R4: %#lx)): %li", + __entry->unit_address, + __entry->fct, + __entry->p1, + __entry->p2, + __entry->p3, + __entry->p4, + __entry->r4, + __entry->rc + ) +); + +TRACE_EVENT(cxl_hcall_attach, + TP_PROTO(u64 unit_address, u64 phys_addr, unsigned long process_token, + unsigned long mmio_addr, unsigned long mmio_size, long rc), + + TP_ARGS(unit_address, phys_addr, process_token, + mmio_addr, mmio_size, rc), + + TP_STRUCT__entry( + __field(u64, unit_address) + __field(u64, phys_addr) + __field(unsigned long, process_token) + __field(unsigned long, mmio_addr) + __field(unsigned long, mmio_size) + __field(long, rc) + ), + + TP_fast_assign( + __entry->unit_address = unit_address; + __entry->phys_addr = phys_addr; + __entry->process_token = process_token; + __entry->mmio_addr = mmio_addr; + __entry->mmio_size = mmio_size; + __entry->rc = rc; + ), + + TP_printk("unit_address=0x%016llx phys_addr=0x%016llx " + "token=0x%.8lx mmio_addr=0x%lx mmio_size=0x%lx rc=%li", + __entry->unit_address, + __entry->phys_addr, + __entry->process_token, + __entry->mmio_addr, + __entry->mmio_size, + __entry->rc + ) +); + +DEFINE_EVENT(cxl_hcall, cxl_hcall_detach, + TP_PROTO(u64 unit_address, u64 process_token, long rc), + TP_ARGS(unit_address, process_token, rc) +); + +DEFINE_EVENT(cxl_hcall_control, cxl_hcall_control_function, + TP_PROTO(u64 unit_address, char *fct, u64 p1, u64 p2, u64 p3, + u64 p4, unsigned long r4, long rc), + TP_ARGS(unit_address, fct, p1, p2, p3, p4, r4, rc) +); + +DEFINE_EVENT(cxl_hcall, cxl_hcall_collect_int_info, + TP_PROTO(u64 unit_address, u64 process_token, long rc), + TP_ARGS(unit_address, process_token, rc) +); + +TRACE_EVENT(cxl_hcall_control_faults, + TP_PROTO(u64 unit_address, u64 process_token, + u64 control_mask, u64 reset_mask, unsigned long r4, + long rc), + + TP_ARGS(unit_address, process_token, + control_mask, reset_mask, r4, rc), + + TP_STRUCT__entry( + __field(u64, unit_address) + __field(u64, process_token) + __field(u64, control_mask) + __field(u64, reset_mask) + __field(unsigned long, r4) + __field(long, rc) + ), + + TP_fast_assign( + __entry->unit_address = unit_address; + __entry->process_token = process_token; + __entry->control_mask = control_mask; + __entry->reset_mask = reset_mask; + __entry->r4 = r4; + __entry->rc = rc; + ), + + TP_printk("unit_address=0x%016llx process_token=0x%llx " + "control_mask=%#llx reset_mask=%#llx r4=%#lx rc=%li", + __entry->unit_address, + __entry->process_token, + __entry->control_mask, + __entry->reset_mask, + __entry->r4, + __entry->rc + ) +); + +DEFINE_EVENT(cxl_hcall_control, cxl_hcall_control_facility, + TP_PROTO(u64 unit_address, char *fct, u64 p1, u64 p2, u64 p3, + u64 p4, unsigned long r4, long rc), + TP_ARGS(unit_address, fct, p1, p2, p3, p4, r4, rc) +); + +TRACE_EVENT(cxl_hcall_download_facility, + TP_PROTO(u64 unit_address, char *fct, u64 list_address, u64 num, + unsigned long r4, long rc), + + TP_ARGS(unit_address, fct, list_address, num, r4, rc), + + TP_STRUCT__entry( + __field(u64, unit_address) + __field(char *, fct) + __field(u64, list_address) + __field(u64, num) + __field(unsigned long, r4) + __field(long, rc) + ), + + TP_fast_assign( + __entry->unit_address = unit_address; + __entry->fct = fct; + __entry->list_address = list_address; + __entry->num = num; + __entry->r4 = r4; + __entry->rc = rc; + ), + + TP_printk("%#.16llx, %s(%#llx, %#llx), %#lx): %li", + __entry->unit_address, + __entry->fct, + __entry->list_address, + __entry->num, + __entry->r4, + __entry->rc + ) +); + #endif /* _CXL_TRACE_H */ /* This part must be outside protection */ diff --git a/drivers/misc/cxl/vphb.c b/drivers/misc/cxl/vphb.c index cbd4331fb45c..cdc7723b845d 100644 --- a/drivers/misc/cxl/vphb.c +++ b/drivers/misc/cxl/vphb.c @@ -49,7 +49,7 @@ static bool cxl_pci_enable_device_hook(struct pci_dev *dev) phb = pci_bus_to_host(dev->bus); afu = (struct cxl_afu *)phb->private_data; - if (!cxl_adapter_link_ok(afu->adapter)) { + if (!cxl_ops->link_ok(afu->adapter, afu)) { dev_warn(&dev->dev, "%s: Device link is down, refusing to enable AFU\n", __func__); return false; } @@ -66,7 +66,7 @@ static bool cxl_pci_enable_device_hook(struct pci_dev *dev) return false; dev->dev.archdata.cxl_ctx = ctx; - return (cxl_afu_check_and_enable(afu) == 0); + return (cxl_ops->afu_check_and_enable(afu) == 0); } static void cxl_pci_disable_device(struct pci_dev *dev) @@ -99,113 +99,90 @@ static int cxl_pcie_cfg_record(u8 bus, u8 devfn) return (bus << 8) + devfn; } -static unsigned long cxl_pcie_cfg_addr(struct pci_controller* phb, - u8 bus, u8 devfn, int offset) -{ - int record = cxl_pcie_cfg_record(bus, devfn); - - return (unsigned long)phb->cfg_addr + ((unsigned long)phb->cfg_data * record) + offset; -} - - static int cxl_pcie_config_info(struct pci_bus *bus, unsigned int devfn, - int offset, int len, - volatile void __iomem **ioaddr, - u32 *mask, int *shift) + struct cxl_afu **_afu, int *_record) { struct pci_controller *phb; struct cxl_afu *afu; - unsigned long addr; + int record; phb = pci_bus_to_host(bus); if (phb == NULL) return PCIBIOS_DEVICE_NOT_FOUND; - afu = (struct cxl_afu *)phb->private_data; - if (cxl_pcie_cfg_record(bus->number, devfn) > afu->crs_num) + afu = (struct cxl_afu *)phb->private_data; + record = cxl_pcie_cfg_record(bus->number, devfn); + if (record > afu->crs_num) return PCIBIOS_DEVICE_NOT_FOUND; - if (offset >= (unsigned long)phb->cfg_data) - return PCIBIOS_BAD_REGISTER_NUMBER; - addr = cxl_pcie_cfg_addr(phb, bus->number, devfn, offset); - *ioaddr = (void *)(addr & ~0x3ULL); - *shift = ((addr & 0x3) * 8); - switch (len) { - case 1: - *mask = 0xff; - break; - case 2: - *mask = 0xffff; - break; - default: - *mask = 0xffffffff; - break; - } + *_afu = afu; + *_record = record; return 0; } - -static inline bool cxl_config_link_ok(struct pci_bus *bus) -{ - struct pci_controller *phb; - struct cxl_afu *afu; - - /* Config space IO is based on phb->cfg_addr, which is based on - * afu_desc_mmio. This isn't safe to read/write when the link - * goes down, as EEH tears down MMIO space. - * - * Check if the link is OK before proceeding. - */ - - phb = pci_bus_to_host(bus); - if (phb == NULL) - return false; - afu = (struct cxl_afu *)phb->private_data; - return cxl_adapter_link_ok(afu->adapter); -} - static int cxl_pcie_read_config(struct pci_bus *bus, unsigned int devfn, int offset, int len, u32 *val) { - volatile void __iomem *ioaddr; - int shift, rc; - u32 mask; + int rc, record; + struct cxl_afu *afu; + u8 val8; + u16 val16; + u32 val32; - rc = cxl_pcie_config_info(bus, devfn, offset, len, &ioaddr, - &mask, &shift); + rc = cxl_pcie_config_info(bus, devfn, &afu, &record); if (rc) return rc; - if (!cxl_config_link_ok(bus)) + switch (len) { + case 1: + rc = cxl_ops->afu_cr_read8(afu, record, offset, &val8); + *val = val8; + break; + case 2: + rc = cxl_ops->afu_cr_read16(afu, record, offset, &val16); + *val = val16; + break; + case 4: + rc = cxl_ops->afu_cr_read32(afu, record, offset, &val32); + *val = val32; + break; + default: + WARN_ON(1); + } + + if (rc) return PCIBIOS_DEVICE_NOT_FOUND; - /* Can only read 32 bits */ - *val = (in_le32(ioaddr) >> shift) & mask; return PCIBIOS_SUCCESSFUL; } static int cxl_pcie_write_config(struct pci_bus *bus, unsigned int devfn, int offset, int len, u32 val) { - volatile void __iomem *ioaddr; - u32 v, mask; - int shift, rc; + int rc, record; + struct cxl_afu *afu; - rc = cxl_pcie_config_info(bus, devfn, offset, len, &ioaddr, - &mask, &shift); + rc = cxl_pcie_config_info(bus, devfn, &afu, &record); if (rc) return rc; - if (!cxl_config_link_ok(bus)) - return PCIBIOS_DEVICE_NOT_FOUND; - - /* Can only write 32 bits so do read-modify-write */ - mask <<= shift; - val <<= shift; + switch (len) { + case 1: + rc = cxl_ops->afu_cr_write8(afu, record, offset, val & 0xff); + break; + case 2: + rc = cxl_ops->afu_cr_write16(afu, record, offset, val & 0xffff); + break; + case 4: + rc = cxl_ops->afu_cr_write32(afu, record, offset, val); + break; + default: + WARN_ON(1); + } - v = (in_le32(ioaddr) & ~mask) | (val & mask); + if (rc) + return PCIBIOS_SET_FAILED; - out_le32(ioaddr, v); return PCIBIOS_SUCCESSFUL; } @@ -233,23 +210,31 @@ int cxl_pci_vphb_add(struct cxl_afu *afu) { struct pci_dev *phys_dev; struct pci_controller *phb, *phys_phb; - - phys_dev = to_pci_dev(afu->adapter->dev.parent); - phys_phb = pci_bus_to_host(phys_dev->bus); + struct device_node *vphb_dn; + struct device *parent; + + if (cpu_has_feature(CPU_FTR_HVMODE)) { + phys_dev = to_pci_dev(afu->adapter->dev.parent); + phys_phb = pci_bus_to_host(phys_dev->bus); + vphb_dn = phys_phb->dn; + parent = &phys_dev->dev; + } else { + vphb_dn = afu->adapter->dev.parent->of_node; + parent = afu->adapter->dev.parent; + } /* Alloc and setup PHB data structure */ - phb = pcibios_alloc_controller(phys_phb->dn); - + phb = pcibios_alloc_controller(vphb_dn); if (!phb) return -ENODEV; /* Setup parent in sysfs */ - phb->parent = &phys_dev->dev; + phb->parent = parent; /* Setup the PHB using arch provided callback */ phb->ops = &cxl_pcie_pci_ops; - phb->cfg_addr = afu->afu_desc_mmio + afu->crs_offset; - phb->cfg_data = (void *)(u64)afu->crs_len; + phb->cfg_addr = NULL; + phb->cfg_data = 0; phb->private_data = afu; phb->controller_ops = cxl_pci_controller_ops; @@ -272,15 +257,6 @@ int cxl_pci_vphb_add(struct cxl_afu *afu) return 0; } -void cxl_pci_vphb_reconfigure(struct cxl_afu *afu) -{ - /* When we are reconfigured, the AFU's MMIO space is unmapped - * and remapped. We need to reflect this in the PHB's view of - * the world. - */ - afu->phb->cfg_addr = afu->afu_desc_mmio + afu->crs_offset; -} - void cxl_pci_vphb_remove(struct cxl_afu *afu) { struct pci_controller *phb; @@ -296,6 +272,15 @@ void cxl_pci_vphb_remove(struct cxl_afu *afu) pcibios_free_controller(phb); } +bool cxl_pci_is_vphb_device(struct pci_dev *dev) +{ + struct pci_controller *phb; + + phb = pci_bus_to_host(dev->bus); + + return (phb->ops == &cxl_pcie_pci_ops); +} + struct cxl_afu *cxl_pci_to_afu(struct pci_dev *dev) { struct pci_controller *phb; diff --git a/drivers/pci/bus.c b/drivers/pci/bus.c index 89b3befc7155..6469ff6208b0 100644 --- a/drivers/pci/bus.c +++ b/drivers/pci/bus.c @@ -271,6 +271,8 @@ bool pci_bus_clip_resource(struct pci_dev *dev, int idx) void __weak pcibios_resource_survey_bus(struct pci_bus *bus) { } +void __weak pcibios_bus_add_device(struct pci_dev *pdev) { } + /** * pci_bus_add_device - start driver for a single device * @dev: device to add @@ -285,6 +287,7 @@ void pci_bus_add_device(struct pci_dev *dev) * Can not put in pci_device_add yet because resources * are not assigned yet for some devices. */ + pcibios_bus_add_device(dev); pci_fixup_device(pci_fixup_final, dev); pci_create_sysfs_dev_files(dev); pci_proc_attach_device(dev); diff --git a/drivers/pci/iov.c b/drivers/pci/iov.c index 31f31d460fc9..fa4f13869fa9 100644 --- a/drivers/pci/iov.c +++ b/drivers/pci/iov.c @@ -113,7 +113,7 @@ resource_size_t pci_iov_resource_size(struct pci_dev *dev, int resno) return dev->sriov->barsz[resno - PCI_IOV_RESOURCES]; } -static int virtfn_add(struct pci_dev *dev, int id, int reset) +int pci_iov_add_virtfn(struct pci_dev *dev, int id, int reset) { int i; int rc = -ENOMEM; @@ -188,7 +188,7 @@ failed: return rc; } -static void virtfn_remove(struct pci_dev *dev, int id, int reset) +void pci_iov_remove_virtfn(struct pci_dev *dev, int id, int reset) { char buf[VIRTFN_ID_LEN]; struct pci_dev *virtfn; @@ -321,7 +321,7 @@ static int sriov_enable(struct pci_dev *dev, int nr_virtfn) } for (i = 0; i < initial; i++) { - rc = virtfn_add(dev, i, 0); + rc = pci_iov_add_virtfn(dev, i, 0); if (rc) goto failed; } @@ -333,7 +333,7 @@ static int sriov_enable(struct pci_dev *dev, int nr_virtfn) failed: while (i--) - virtfn_remove(dev, i, 0); + pci_iov_remove_virtfn(dev, i, 0); pcibios_sriov_disable(dev); err_pcibios: @@ -359,7 +359,7 @@ static void sriov_disable(struct pci_dev *dev) return; for (i = 0; i < iov->num_VFs; i++) - virtfn_remove(dev, i, 0); + pci_iov_remove_virtfn(dev, i, 0); pcibios_sriov_disable(dev); diff --git a/drivers/scsi/cxlflash/common.h b/drivers/scsi/cxlflash/common.h index 5ada9268a450..580f37006977 100644 --- a/drivers/scsi/cxlflash/common.h +++ b/drivers/scsi/cxlflash/common.h @@ -106,7 +106,6 @@ struct cxlflash_cfg { atomic_t scan_host_needed; struct cxl_afu *cxl_afu; - struct pci_dev *parent_dev; atomic_t recovery_threads; struct mutex ctx_recovery_mutex; diff --git a/drivers/scsi/cxlflash/main.c b/drivers/scsi/cxlflash/main.c index f6d90ce8f3b7..e04aae70643b 100644 --- a/drivers/scsi/cxlflash/main.c +++ b/drivers/scsi/cxlflash/main.c @@ -1407,7 +1407,7 @@ static int start_context(struct cxlflash_cfg *cfg) */ static int read_vpd(struct cxlflash_cfg *cfg, u64 wwpn[]) { - struct pci_dev *dev = cfg->parent_dev; + struct pci_dev *dev = cfg->dev; int rc = 0; int ro_start, ro_size, i, j, k; ssize_t vpd_size; @@ -1416,7 +1416,7 @@ static int read_vpd(struct cxlflash_cfg *cfg, u64 wwpn[]) char *wwpn_vpd_tags[NUM_FC_PORTS] = { "V5", "V6" }; /* Get the VPD data from the device */ - vpd_size = pci_read_vpd(dev, 0, sizeof(vpd_data), vpd_data); + vpd_size = cxl_read_adapter_vpd(dev, vpd_data, sizeof(vpd_data)); if (unlikely(vpd_size <= 0)) { dev_err(&dev->dev, "%s: Unable to read VPD (size = %ld)\n", __func__, vpd_size); @@ -2392,7 +2392,6 @@ static int cxlflash_probe(struct pci_dev *pdev, { struct Scsi_Host *host; struct cxlflash_cfg *cfg = NULL; - struct device *phys_dev; struct dev_dependent_vals *ddv; int rc = 0; @@ -2458,19 +2457,6 @@ static int cxlflash_probe(struct pci_dev *pdev, pci_set_drvdata(pdev, cfg); - /* - * Use the special service provided to look up the physical - * PCI device, since we are called on the probe of the virtual - * PCI host bus (vphb) - */ - phys_dev = cxl_get_phys_dev(pdev); - if (!dev_is_pci(phys_dev)) { - dev_err(&pdev->dev, "%s: not a pci dev\n", __func__); - rc = -ENODEV; - goto out_remove; - } - cfg->parent_dev = to_pci_dev(phys_dev); - cfg->cxl_afu = cxl_pci_to_afu(pdev); rc = init_pci(cfg); diff --git a/include/linux/pci.h b/include/linux/pci.h index 27df4a6585da..bc435d6293d2 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -770,6 +770,7 @@ extern struct list_head pci_root_buses; /* list of all known PCI buses */ int no_pci_devices(void); void pcibios_resource_survey_bus(struct pci_bus *bus); +void pcibios_bus_add_device(struct pci_dev *pdev); void pcibios_add_bus(struct pci_bus *bus); void pcibios_remove_bus(struct pci_bus *bus); void pcibios_fixup_bus(struct pci_bus *); @@ -1738,6 +1739,8 @@ int pci_iov_virtfn_devfn(struct pci_dev *dev, int id); int pci_enable_sriov(struct pci_dev *dev, int nr_virtfn); void pci_disable_sriov(struct pci_dev *dev); +int pci_iov_add_virtfn(struct pci_dev *dev, int id, int reset); +void pci_iov_remove_virtfn(struct pci_dev *dev, int id, int reset); int pci_num_vf(struct pci_dev *dev); int pci_vfs_assigned(struct pci_dev *dev); int pci_sriov_set_totalvfs(struct pci_dev *dev, u16 numvfs); @@ -1754,6 +1757,12 @@ static inline int pci_iov_virtfn_devfn(struct pci_dev *dev, int id) } static inline int pci_enable_sriov(struct pci_dev *dev, int nr_virtfn) { return -ENODEV; } +static inline int pci_iov_add_virtfn(struct pci_dev *dev, int id, int reset) +{ + return -ENOSYS; +} +static inline void pci_iov_remove_virtfn(struct pci_dev *dev, + int id, int reset) { } static inline void pci_disable_sriov(struct pci_dev *dev) { } static inline int pci_num_vf(struct pci_dev *dev) { return 0; } static inline int pci_vfs_assigned(struct pci_dev *dev) diff --git a/include/misc/cxl.h b/include/misc/cxl.h index f2ffe5bd720d..7d5e2613c7b8 100644 --- a/include/misc/cxl.h +++ b/include/misc/cxl.h @@ -30,9 +30,6 @@ struct cxl_afu *cxl_pci_to_afu(struct pci_dev *dev); /* Get the AFU conf record number associated with a pci_dev */ unsigned int cxl_pci_to_cfg_record(struct pci_dev *dev); -/* Get the physical device (ie. the PCIe card) which the AFU is attached */ -struct device *cxl_get_phys_dev(struct pci_dev *dev); - /* * Context lifetime overview: @@ -210,4 +207,9 @@ ssize_t cxl_fd_read(struct file *file, char __user *buf, size_t count, void cxl_perst_reloads_same_image(struct cxl_afu *afu, bool perst_reloads_same_image); +/* + * Read the VPD for the card where the AFU resides + */ +ssize_t cxl_read_adapter_vpd(struct pci_dev *dev, void *buf, size_t count); + #endif /* _MISC_CXL_H */ diff --git a/include/uapi/misc/cxl.h b/include/uapi/misc/cxl.h index 1e889aa8a36e..8cd334f99ddc 100644 --- a/include/uapi/misc/cxl.h +++ b/include/uapi/misc/cxl.h @@ -55,11 +55,35 @@ struct cxl_afu_id { __u64 reserved6; }; +/* base adapter image header is included in the image */ +#define CXL_AI_NEED_HEADER 0x0000000000000001ULL +#define CXL_AI_ALL CXL_AI_NEED_HEADER + +#define CXL_AI_HEADER_SIZE 128 +#define CXL_AI_BUFFER_SIZE 4096 +#define CXL_AI_MAX_ENTRIES 256 +#define CXL_AI_MAX_CHUNK_SIZE (CXL_AI_BUFFER_SIZE * CXL_AI_MAX_ENTRIES) + +struct cxl_adapter_image { + __u64 flags; + __u64 data; + __u64 len_data; + __u64 len_image; + __u64 reserved1; + __u64 reserved2; + __u64 reserved3; + __u64 reserved4; +}; + /* ioctl numbers */ #define CXL_MAGIC 0xCA +/* AFU devices */ #define CXL_IOCTL_START_WORK _IOW(CXL_MAGIC, 0x00, struct cxl_ioctl_start_work) #define CXL_IOCTL_GET_PROCESS_ELEMENT _IOR(CXL_MAGIC, 0x01, __u32) #define CXL_IOCTL_GET_AFU_ID _IOR(CXL_MAGIC, 0x02, struct cxl_afu_id) +/* adapter devices */ +#define CXL_IOCTL_DOWNLOAD_IMAGE _IOW(CXL_MAGIC, 0x0A, struct cxl_adapter_image) +#define CXL_IOCTL_VALIDATE_IMAGE _IOW(CXL_MAGIC, 0x0B, struct cxl_adapter_image) #define CXL_READ_MIN_SIZE 0x1000 /* 4K */ |